From b1538fab5f78eaf3ae3e0eec0ff1e39a5cffdfc7 Mon Sep 17 00:00:00 2001 From: "Lucille(Xulu) Chu" Date: Sun, 20 Oct 2024 10:34:03 -0500 Subject: [PATCH] feat: Implemented a new sql `explain analyze graphical` (#16543) * feat: Add support for `Explain Analyze Graphical` statement in sql * refactor: refactoring graphical by partial method * chore: run lin * refactor: Simplify return of DataBlock vector * chore(test): add json struct validation for the explain analyze graphical result * feat(test): explain analyze graphical * feat(test): rewrite explain analyze graphical test * fix: explain analyze graphical test * fix: remove an extra space from the explain profile result file --- src/query/ast/src/ast/statements/explain.rs | 2 + src/query/ast/src/ast/statements/statement.rs | 10 ++- src/query/ast/src/parser/statement.rs | 25 ++++++-- src/query/ast/src/parser/token.rs | 12 ++++ .../src/interpreters/interpreter_explain.rs | 64 ++++++++++++++++++- .../src/interpreters/interpreter_factory.rs | 10 ++- src/query/sql/src/planner/binder/binder.rs | 4 +- .../sql/src/planner/optimizer/optimizer.rs | 7 +- src/query/sql/src/planner/plans/plan.rs | 1 + .../02_query/02_0009_explain_profile.result | 4 ++ .../02_query/02_0009_explain_profile.sh | 39 +++++++++++ 11 files changed, 166 insertions(+), 12 deletions(-) create mode 100644 tests/suites/1_stateful/02_query/02_0009_explain_profile.result create mode 100755 tests/suites/1_stateful/02_query/02_0009_explain_profile.sh diff --git a/src/query/ast/src/ast/statements/explain.rs b/src/query/ast/src/ast/statements/explain.rs index 2664c6572e54..432becb30baa 100644 --- a/src/query/ast/src/ast/statements/explain.rs +++ b/src/query/ast/src/ast/statements/explain.rs @@ -37,6 +37,8 @@ pub enum ExplainKind { // Explain analyze plan AnalyzePlan, + + Graphical, } #[derive(Debug, Clone, PartialEq, Eq, Drive, DriveMut)] diff --git a/src/query/ast/src/ast/statements/statement.rs b/src/query/ast/src/ast/statements/statement.rs index 86c5b965dc96..1a6539512c3d 100644 --- a/src/query/ast/src/ast/statements/statement.rs +++ b/src/query/ast/src/ast/statements/statement.rs @@ -46,6 +46,7 @@ pub enum Statement { ExplainAnalyze { // if partial is true, only scan/filter/join will be shown. partial: bool, + graphical: bool, query: Box, }, @@ -408,12 +409,19 @@ impl Display for Statement { ExplainKind::AnalyzePlan => write!(f, " ANALYZE")?, ExplainKind::Join => write!(f, " JOIN")?, ExplainKind::Memo(_) => write!(f, " MEMO")?, + ExplainKind::Graphical => write!(f, " GRAPHICAL")?, } write!(f, " {query}")?; } - Statement::ExplainAnalyze { partial, query } => { + Statement::ExplainAnalyze { + partial, + graphical, + query, + } => { if *partial { write!(f, "EXPLAIN ANALYZE PARTIAL {query}")?; + } else if *graphical { + write!(f, "EXPLAIN ANALYZE GRAPHICAL {query}")?; } else { write!(f, "EXPLAIN ANALYZE {query}")?; } diff --git a/src/query/ast/src/parser/statement.rs b/src/query/ast/src/parser/statement.rs index 683d9f46df7e..ef2bd4e00032 100644 --- a/src/query/ast/src/parser/statement.rs +++ b/src/query/ast/src/parser/statement.rs @@ -75,6 +75,7 @@ pub fn statement_body(i: Input) -> IResult { Some(TokenKind::RAW) => ExplainKind::Raw, Some(TokenKind::OPTIMIZED) => ExplainKind::Optimized, Some(TokenKind::MEMO) => ExplainKind::Memo("".to_string()), + Some(TokenKind::GRAPHICAL) => ExplainKind::Graphical, None => ExplainKind::Plan, _ => unreachable!(), }, @@ -85,11 +86,25 @@ pub fn statement_body(i: Input) -> IResult { ); let explain_analyze = map( rule! { - EXPLAIN ~ ANALYZE ~ PARTIAL? ~ #statement - }, - |(_, _, partial, statement)| Statement::ExplainAnalyze { - partial: partial.is_some(), - query: Box::new(statement.stmt), + EXPLAIN ~ ANALYZE ~ (PARTIAL|GRAPHICAL)? ~ #statement + }, + |(_, _, opt_partial_or_graphical, statement)| { + let (partial, graphical) = match opt_partial_or_graphical { + Some(Token { + kind: TokenKind::PARTIAL, + .. + }) => (true, false), + Some(Token { + kind: TokenKind::GRAPHICAL, + .. + }) => (false, true), + _ => (false, false), + }; + Statement::ExplainAnalyze { + partial, + graphical, + query: Box::new(statement.stmt), + } }, ); diff --git a/src/query/ast/src/parser/token.rs b/src/query/ast/src/parser/token.rs index 70641dc974db..a24ddc213e09 100644 --- a/src/query/ast/src/parser/token.rs +++ b/src/query/ast/src/parser/token.rs @@ -65,6 +65,16 @@ impl<'a> Tokenizer<'a> { prev_token: None, } } + + pub fn contains_token(query: &str, target_kind: TokenKind) -> bool { + let mut tokenizer = Tokenizer::new(query); + while let Some(Ok(token)) = tokenizer.next() { + if token.kind == target_kind { + return true; + } + } + false + } } impl<'a> Iterator for Tokenizer<'a> { @@ -1204,6 +1214,8 @@ pub enum TokenKind { VARIABLE, #[token("VERBOSE", ignore(ascii_case))] VERBOSE, + #[token("GRAPHICAL", ignore(ascii_case))] + GRAPHICAL, #[token("VIEW", ignore(ascii_case))] VIEW, #[token("VIEWS", ignore(ascii_case))] diff --git a/src/query/service/src/interpreters/interpreter_explain.rs b/src/query/service/src/interpreters/interpreter_explain.rs index fbf195de57f6..b94b5704211f 100644 --- a/src/query/service/src/interpreters/interpreter_explain.rs +++ b/src/query/service/src/interpreters/interpreter_explain.rs @@ -12,11 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::BTreeMap; use std::collections::HashMap; use std::sync::Arc; use databend_common_ast::ast::ExplainKind; use databend_common_ast::ast::FormatTreeNode; +use databend_common_base::runtime::profile::get_statistics_desc; +use databend_common_base::runtime::profile::ProfileDesc; +use databend_common_base::runtime::profile::ProfileStatisticsName; use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; use databend_common_exception::Result; @@ -36,6 +40,8 @@ use databend_common_sql::MetadataRef; use databend_common_storages_result_cache::gen_result_cache_key; use databend_common_storages_result_cache::ResultCacheReader; use databend_common_users::UserApiProvider; +use serde::Serialize; +use serde_json; use super::InsertMultiTableInterpreter; use super::InterpreterFactory; @@ -60,9 +66,17 @@ pub struct ExplainInterpreter { config: ExplainConfig, kind: ExplainKind, partial: bool, + graphical: bool, plan: Plan, } +#[derive(Serialize)] +pub struct GraphicalProfiles { + query_id: String, + profiles: Vec, + statistics_desc: Arc>, +} + #[async_trait::async_trait] impl Interpreter for ExplainInterpreter { fn name(&self) -> &str { @@ -155,7 +169,7 @@ impl Interpreter for ExplainInterpreter { ))?, }, - ExplainKind::AnalyzePlan => match &self.plan { + ExplainKind::AnalyzePlan | ExplainKind::Graphical => match &self.plan { Plan::Query { s_expr, metadata, @@ -259,6 +273,7 @@ impl ExplainInterpreter { kind: ExplainKind, config: ExplainConfig, partial: bool, + graphical: bool, ) -> Result { Ok(ExplainInterpreter { ctx, @@ -266,6 +281,7 @@ impl ExplainInterpreter { kind, config, partial, + graphical, }) } @@ -377,6 +393,40 @@ impl ExplainInterpreter { Ok(vec![DataBlock::new_from_columns(vec![formatted_plan])]) } + fn graphical_profiles_to_datablocks(profiles: GraphicalProfiles) -> Vec { + let json_string = serde_json::to_string_pretty(&profiles) + .unwrap_or_else(|_| "Failed to format profiles".to_string()); + + let line_split_result: Vec<&str> = json_string.lines().collect(); + let formatted_block = StringType::from_data(line_split_result); + + vec![DataBlock::new_from_columns(vec![formatted_block])] + } + + #[async_backtrace::framed] + async fn explain_analyze_graphical( + &self, + s_expr: &SExpr, + metadata: &MetadataRef, + required: ColumnSet, + ignore_result: bool, + ) -> Result { + let query_ctx = self.ctx.clone(); + + let mut builder = PhysicalPlanBuilder::new(metadata.clone(), self.ctx.clone(), true); + let plan = builder.build(s_expr, required).await?; + let build_res = build_query_pipeline(&self.ctx, &[], &plan, ignore_result).await?; + + // Drain the data + let query_profiles = self.execute_and_get_profiles(build_res)?; + + Ok(GraphicalProfiles { + query_id: query_ctx.get_id(), + profiles: query_profiles.values().cloned().collect(), + statistics_desc: get_statistics_desc(), + }) + } + #[async_backtrace::framed] async fn explain_analyze( &self, @@ -395,11 +445,21 @@ impl ExplainInterpreter { let result = if self.partial { format_partial_tree(&plan, metadata, &query_profiles)?.format_pretty()? } else { - plan.format(metadata.clone(), query_profiles)? + plan.format(metadata.clone(), query_profiles.clone())? .format_pretty()? }; let line_split_result: Vec<&str> = result.lines().collect(); let formatted_plan = StringType::from_data(line_split_result); + + if self.graphical { + let profiles = GraphicalProfiles { + query_id: self.ctx.clone().get_id(), + profiles: query_profiles.clone().values().cloned().collect(), + statistics_desc: get_statistics_desc(), + }; + return Ok(Self::graphical_profiles_to_datablocks(profiles)); + } + Ok(vec![DataBlock::new_from_columns(vec![formatted_plan])]) } diff --git a/src/query/service/src/interpreters/interpreter_factory.rs b/src/query/service/src/interpreters/interpreter_factory.rs index f9b27e7efbf1..773f82788c82 100644 --- a/src/query/service/src/interpreters/interpreter_factory.rs +++ b/src/query/service/src/interpreters/interpreter_factory.rs @@ -125,6 +125,7 @@ impl InterpreterFactory { kind.clone(), config.clone(), false, + false, )?)), Plan::ExplainAst { formatted_string } => Ok(Arc::new(ExplainInterpreter::try_create( ctx, @@ -132,6 +133,7 @@ impl InterpreterFactory { ExplainKind::Ast(formatted_string.clone()), ExplainConfig::default(), false, + false, )?)), Plan::ExplainSyntax { formatted_sql } => Ok(Arc::new(ExplainInterpreter::try_create( ctx, @@ -139,13 +141,19 @@ impl InterpreterFactory { ExplainKind::Syntax(formatted_sql.clone()), ExplainConfig::default(), false, + false, )?)), - Plan::ExplainAnalyze { partial, plan } => Ok(Arc::new(ExplainInterpreter::try_create( + Plan::ExplainAnalyze { + graphical, + partial, + plan, + } => Ok(Arc::new(ExplainInterpreter::try_create( ctx, *plan.clone(), ExplainKind::AnalyzePlan, ExplainConfig::default(), *partial, + *graphical, )?)), Plan::CopyIntoTable(copy_plan) => Ok(Arc::new(CopyIntoTableInterpreter::try_create( diff --git a/src/query/sql/src/planner/binder/binder.rs b/src/query/sql/src/planner/binder/binder.rs index 575c4bcdab63..628f49639094 100644 --- a/src/query/sql/src/planner/binder/binder.rs +++ b/src/query/sql/src/planner/binder/binder.rs @@ -211,9 +211,9 @@ impl<'a> Binder { self.bind_explain(bind_context, kind, options, query).await? } - Statement::ExplainAnalyze {partial, query } => { + Statement::ExplainAnalyze {partial, graphical, query } => { let plan = self.bind_statement(bind_context, query).await?; - Plan::ExplainAnalyze { partial: *partial, plan: Box::new(plan) } + Plan::ExplainAnalyze { partial: *partial, graphical: *graphical, plan: Box::new(plan) } } Statement::ShowFunctions { show_options } => { diff --git a/src/query/sql/src/planner/optimizer/optimizer.rs b/src/query/sql/src/planner/optimizer/optimizer.rs index 26d85bcebfb2..2591ddfbc84a 100644 --- a/src/query/sql/src/planner/optimizer/optimizer.rs +++ b/src/query/sql/src/planner/optimizer/optimizer.rs @@ -231,8 +231,13 @@ pub async fn optimize(mut opt_ctx: OptimizerContext, plan: Plan) -> Result } } }, - Plan::ExplainAnalyze { plan, partial } => Ok(Plan::ExplainAnalyze { + Plan::ExplainAnalyze { + plan, partial, + graphical, + } => Ok(Plan::ExplainAnalyze { + partial, + graphical, plan: Box::new(Box::pin(optimize(opt_ctx, *plan)).await?), }), Plan::CopyIntoLocation(CopyIntoLocationPlan { diff --git a/src/query/sql/src/planner/plans/plan.rs b/src/query/sql/src/planner/plans/plan.rs index d2df531f5be7..0a3efacdc753 100644 --- a/src/query/sql/src/planner/plans/plan.rs +++ b/src/query/sql/src/planner/plans/plan.rs @@ -176,6 +176,7 @@ pub enum Plan { }, ExplainAnalyze { partial: bool, + graphical: bool, plan: Box, }, diff --git a/tests/suites/1_stateful/02_query/02_0009_explain_profile.result b/tests/suites/1_stateful/02_query/02_0009_explain_profile.result new file mode 100644 index 000000000000..0f42683633c2 --- /dev/null +++ b/tests/suites/1_stateful/02_query/02_0009_explain_profile.result @@ -0,0 +1,4 @@ +2 +0 +true +0 diff --git a/tests/suites/1_stateful/02_query/02_0009_explain_profile.sh b/tests/suites/1_stateful/02_query/02_0009_explain_profile.sh new file mode 100755 index 000000000000..0a2ad18bb861 --- /dev/null +++ b/tests/suites/1_stateful/02_query/02_0009_explain_profile.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +response=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d '{"sql": "explain analyze graphical select 1"}') + +data=$(echo $response | jq -r '.data') + +json_string=$(echo "$data" | jq -r '.[][]') +profiles=$(echo "$json_string" | jq -r '.profiles') + +profile_count=$(echo "$profiles" | jq length) +# Check the number of profiles +echo $profile_count + +# Initialize memory_usage, error_count, cpu_time +memory_usage=0 +error_count=0 +cpu_time=0 + +# Loop through profiles and calculate statistics +for i in $(seq 0 $((profile_count - 1))); do + profile=$(echo "$profiles" | jq ".[$i]") + statistics=$(echo "$profile" | jq '.statistics') + errors=$(echo "$profile" | jq '.errors') + + # Check if statistics has enough data (17 elements) + if [ "$(echo "$statistics" | jq length)" -ge 17 ]; then + memory_usage=$((memory_usage + $(echo "$statistics" | jq '.[16]'))) + cpu_time=$((cpu_time + $(echo "$statistics" | jq '.[0]'))) + fi + + + # Count errors + error_count=$((error_count + $(echo "$errors" | jq length))) +done + + +echo $memory_usage +echo "$( [ "$cpu_time" -gt 0 ] && echo true || echo false )" +echo $error_count +