From be39b462785deef7cb2170ba39e3e3c4d1ae6b60 Mon Sep 17 00:00:00 2001 From: zhyass Date: Wed, 20 Sep 2023 19:55:06 +0800 Subject: [PATCH 01/18] fix: add column panic (#12946) * fix add column panic * Add sqllogc test --- src/meta/app/src/schema/table.rs | 23 +++++++++-------- .../interpreters/interpreter_table_create.rs | 11 ++++++-- .../interpreter_table_modify_column.rs | 1 + .../05_0028_ddl_alter_table_add_drop_column | 25 +++++++++++++++++++ 4 files changed, 48 insertions(+), 12 deletions(-) diff --git a/src/meta/app/src/schema/table.rs b/src/meta/app/src/schema/table.rs index e2eb77c6296f..38e5c5f352f5 100644 --- a/src/meta/app/src/schema/table.rs +++ b/src/meta/app/src/schema/table.rs @@ -243,22 +243,14 @@ pub struct TableMeta { } impl TableMeta { - pub fn add_columns(&mut self, fields: &[TableField], field_comments: &[String]) -> Result<()> { - let mut new_schema = self.schema.as_ref().to_owned(); - new_schema.add_columns(fields)?; - self.schema = Arc::new(new_schema); - field_comments.iter().for_each(|c| { - self.field_comments.push(c.to_owned()); - }); - Ok(()) - } - pub fn add_column( &mut self, field: &TableField, comment: &str, index: FieldIndex, ) -> Result<()> { + self.fill_field_comments(); + let mut new_schema = self.schema.as_ref().to_owned(); new_schema.add_column(field, index)?; self.schema = Arc::new(new_schema); @@ -267,12 +259,23 @@ impl TableMeta { } pub fn drop_column(&mut self, column: &str) -> Result<()> { + self.fill_field_comments(); + let mut new_schema = self.schema.as_ref().to_owned(); let index = new_schema.drop_column(column)?; self.field_comments.remove(index); self.schema = Arc::new(new_schema); Ok(()) } + + /// To fix the field comments panic. + pub fn fill_field_comments(&mut self) { + let num_fields = self.schema.num_fields(); + // If the field comments is confused, fill it with empty string. + if self.field_comments.len() < num_fields { + self.field_comments = vec!["".to_string(); num_fields]; + } + } } impl TableInfo { diff --git a/src/query/service/src/interpreters/interpreter_table_create.rs b/src/query/service/src/interpreters/interpreter_table_create.rs index 3317efc99459..f574ef453e3f 100644 --- a/src/query/service/src/interpreters/interpreter_table_create.rs +++ b/src/query/service/src/interpreters/interpreter_table_create.rs @@ -243,6 +243,11 @@ impl CreateTableInterpreter { } is_valid_column(field.name())?; } + let field_comments = if self.plan.field_comments.is_empty() { + vec!["".to_string(); fields.len()] + } else { + self.plan.field_comments.clone() + }; let schema = TableSchemaRefExt::create(fields); let mut table_meta = TableMeta { @@ -252,7 +257,7 @@ impl CreateTableInterpreter { part_prefix: self.plan.part_prefix.clone(), options: self.plan.options.clone(), default_cluster_key: None, - field_comments: self.plan.field_comments.clone(), + field_comments, drop_on: None, statistics: if let Some(stat) = statistics { stat @@ -325,6 +330,8 @@ impl CreateTableInterpreter { number_of_segments: Some(snapshot.segments.len() as u64), number_of_blocks: Some(snapshot.summary.block_count), }; + + let field_comments = vec!["".to_string(); snapshot.schema.num_fields()]; let table_meta = TableMeta { schema: Arc::new(snapshot.schema.clone()), engine: self.plan.engine.to_string(), @@ -332,7 +339,7 @@ impl CreateTableInterpreter { part_prefix: self.plan.part_prefix.clone(), options, default_cluster_key: None, - field_comments: self.plan.field_comments.clone(), + field_comments, drop_on: None, statistics: stat, ..Default::default() diff --git a/src/query/service/src/interpreters/interpreter_table_modify_column.rs b/src/query/service/src/interpreters/interpreter_table_modify_column.rs index a4d13a5c2a82..322fdd41a5e6 100644 --- a/src/query/service/src/interpreters/interpreter_table_modify_column.rs +++ b/src/query/service/src/interpreters/interpreter_table_modify_column.rs @@ -238,6 +238,7 @@ impl ModifyTableColumnInterpreter { } let mut table_info = table.get_table_info().clone(); + table_info.meta.fill_field_comments(); for (field, comment) in field_and_comments { let column = &field.name.to_string(); let data_type = &field.data_type; diff --git a/tests/sqllogictests/suites/base/05_ddl/05_0028_ddl_alter_table_add_drop_column b/tests/sqllogictests/suites/base/05_ddl/05_0028_ddl_alter_table_add_drop_column index a657a1a8171e..b5889011da0b 100644 --- a/tests/sqllogictests/suites/base/05_ddl/05_0028_ddl_alter_table_add_drop_column +++ b/tests/sqllogictests/suites/base/05_ddl/05_0028_ddl_alter_table_add_drop_column @@ -232,5 +232,30 @@ SELECT a,c FROM `05_0028_at_t0_3` order by a statement ok ALTER TABLE `05_0028_at_t0_3` ADD COLUMN d int not null AFTER c +statement ok +CREATE TABLE `05_0028_at_t0_4` AS SELECT a,d FROM `05_0028_at_t0_3` + +statement ok +ALTER TABLE `05_0028_at_t0_4` ADD COLUMN e int COMMENT 'end' + +statement ok +ALTER TABLE `05_0028_at_t0_4` MODIFY COLUMN d uint64 COMMENT 'middle' + +query TT +SHOW CREATE TABLE `05_0028_at_t0_4` +---- +05_0028_at_t0_4 CREATE TABLE `05_0028_at_t0_4` ( `a` FLOAT, `d` BIGINT UNSIGNED NULL COMMENT 'middle', `e` INT NULL COMMENT 'end' ) ENGINE=FUSE + +query IIT +SELECT * FROM `05_0028_at_t0_4` order by a +---- +0.1 0 NULL +0.2 0 NULL +0.3 0 NULL +0.4 0 NULL + statement ok DROP TABLE IF EXISTS `05_0028_at_t0_3` + +statement ok +DROP TABLE IF EXISTS `05_0028_at_t0_4` From 49cb14d6486fdcfae16a026d085cd6c114c2c045 Mon Sep 17 00:00:00 2001 From: TCeason <33082201+TCeason@users.noreply.github.com> Date: Wed, 20 Sep 2023 19:55:42 +0800 Subject: [PATCH 02/18] feat(sqlsmith): add table function (#12942) * add table function * optimize code --- .../service/src/table_functions/srf/range.rs | 6 +- src/tests/sqlsmith/src/runner.rs | 5 +- src/tests/sqlsmith/src/sql_gen/func.rs | 2 +- src/tests/sqlsmith/src/sql_gen/query.rs | 123 +++++++++++++++++- 4 files changed, 128 insertions(+), 8 deletions(-) diff --git a/src/query/service/src/table_functions/srf/range.rs b/src/query/service/src/table_functions/srf/range.rs index 54e5620fd6f4..58b376585356 100644 --- a/src/query/service/src/table_functions/srf/range.rs +++ b/src/query/service/src/table_functions/srf/range.rs @@ -68,9 +68,9 @@ impl RangeTable { validate_args(&table_args.positioned, table_func_name)?; let data_type = match &table_args.positioned[0] { - Scalar::Number(_) => Int64Type::data_type(), - Scalar::Timestamp(_) => TimestampType::data_type(), - Scalar::Date(_) => DateType::data_type(), + Scalar::Number(_) => DataType::Number(NumberDataType::Int64), + Scalar::Timestamp(_) => DataType::Timestamp, + Scalar::Date(_) => DataType::Date, other => { return Err(ErrorCode::BadArguments(format!( "Unsupported data type for generate_series: {:?}", diff --git a/src/tests/sqlsmith/src/runner.rs b/src/tests/sqlsmith/src/runner.rs index 5640fb89e63d..af09186a5b9c 100644 --- a/src/tests/sqlsmith/src/runner.rs +++ b/src/tests/sqlsmith/src/runner.rs @@ -32,7 +32,7 @@ use rand::SeedableRng; use crate::sql_gen::SqlGenerator; use crate::sql_gen::Table; -const KNOWN_ERRORS: [&str; 27] = [ +const KNOWN_ERRORS: [&str; 30] = [ // Errors caused by illegal parameters "Overflow on date YMD", "timestamp is out of range", @@ -62,6 +62,9 @@ const KNOWN_ERRORS: [&str; 27] = [ "The arguments of AggregateRetention should be an expression which returns a Boolean result", "AggregateWindowFunnelFunction does not support type", "nth_value should count from 1", + "start must be less than or equal to end when step is positive vice versa", + "Expected Number, Date or Timestamp type, but got", + "Unsupported data type for generate_series", ]; pub struct Runner { diff --git a/src/tests/sqlsmith/src/sql_gen/func.rs b/src/tests/sqlsmith/src/sql_gen/func.rs index 82388e03dbe0..22209fb959cf 100644 --- a/src/tests/sqlsmith/src/sql_gen/func.rs +++ b/src/tests/sqlsmith/src/sql_gen/func.rs @@ -283,7 +283,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { (name, vec![], args_type) } DataType::Decimal(_) => { - let decimal = vec!["to_float64", "to_folat32", "to_decimal", "try_to_decimal"]; + let decimal = vec!["to_float64", "to_float32", "to_decimal", "try_to_decimal"]; let name = decimal[self.rng.gen_range(0..=3)].to_string(); if name == "to_decimal" || name == "try_to_decimal" { let args_type = vec![self.gen_data_type(); 1]; diff --git a/src/tests/sqlsmith/src/sql_gen/query.rs b/src/tests/sqlsmith/src/sql_gen/query.rs index c3fb249d960f..f9bb81cbcfd6 100644 --- a/src/tests/sqlsmith/src/sql_gen/query.rs +++ b/src/tests/sqlsmith/src/sql_gen/query.rs @@ -28,6 +28,10 @@ use common_ast::ast::SelectTarget; use common_ast::ast::SetExpr; use common_ast::ast::TableReference; use common_expression::types::DataType; +use common_expression::types::NumberDataType; +use common_expression::TableDataType; +use common_expression::TableField; +use common_expression::TableSchemaRefExt; use rand::Rng; use crate::sql_gen::Column; @@ -296,21 +300,29 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { } fn gen_from(&mut self) -> Vec { - match self.rng.gen_range(0..=9) { + let mut table_refs = vec![]; + // TODO: generate more table reference + // let table_ref_num = self.rng.gen_range(1..=3); + match self.rng.gen_range(0..=10) { 0..=7 => { let i = self.rng.gen_range(0..self.tables.len()); let table_ref = self.gen_table_ref(self.tables[i].clone()); - vec![table_ref] + table_refs.push(table_ref); } // join 8..=9 => { self.is_join = true; let join = self.gen_join_table_ref(); - vec![join] + table_refs.push(join); + } + 10 => { + let table_func = self.gen_table_func(); + table_refs.push(table_func); } // TODO _ => unreachable!(), } + table_refs } fn gen_table_ref(&mut self, table: Table) -> TableReference { @@ -336,6 +348,111 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { } } + // Only test: + // [numbers, numbers_mt, numbers_local, generate_series, range] + // No need to test: + // [fuse_snapshot,fuse_segment, fuse_block, fuse_column, fuse_statistic, clustering_information, + // sync_crash_me, async_crash_me ,infer_schema ,list_stage, + // ai_to_sql, execute_background_job, license_info, suggested_background_tasks ,tenant_quota] + fn gen_table_func(&mut self) -> TableReference { + let tbl_func = [ + "numbers", + "numbers_mt", + "numbers_local", + "generate_series", + "range", + ]; + let name = tbl_func[self.rng.gen_range(0..=4)]; + + match name { + "numbers" | "numbers_mt" | "numbers_local" => { + let table = Table { + name: name.to_string(), + schema: TableSchemaRefExt::create(vec![TableField::new( + "number", + TableDataType::Number(NumberDataType::UInt64), + )]), + }; + self.bound_table(table); + TableReference::TableFunction { + span: None, + name: Identifier::from_name(name), + params: vec![Expr::Literal { + span: None, + lit: Literal::UInt64(self.rng.gen_range(0..=10)), + }], + named_params: vec![], + alias: None, + } + } + "generate_series" | "range" => { + let mut gen_expr = || -> (TableDataType, Expr) { + let idx = self.rng.gen_range(0..=2); + match idx { + 0 => { + let arg = Expr::Literal { + span: None, + lit: Literal::UInt64(self.rng.gen_range(0..=1000000)), + }; + (TableDataType::Timestamp, Expr::FunctionCall { + span: None, + distinct: false, + name: Identifier::from_name("to_date".to_string()), + args: vec![arg], + params: vec![], + window: None, + lambda: None, + }) + } + 1 => { + let arg = Expr::Literal { + span: None, + lit: Literal::UInt64(self.rng.gen_range(0..=10000000000000)), + }; + (TableDataType::Date, Expr::FunctionCall { + span: None, + distinct: false, + name: Identifier::from_name("to_timestamp".to_string()), + args: vec![arg], + params: vec![], + window: None, + lambda: None, + }) + } + 2 => ( + TableDataType::Number(NumberDataType::Int64), + Expr::Literal { + span: None, + lit: Literal::UInt64(self.rng.gen_range(0..=1000)), + }, + ), + _ => unreachable!(), + } + }; + let (ty1, param1) = gen_expr(); + let (_, param2) = gen_expr(); + let table = Table { + name: name.to_string(), + schema: TableSchemaRefExt::create(vec![TableField::new(name, ty1)]), + }; + let (_, param3) = gen_expr(); + self.bound_table(table); + + TableReference::TableFunction { + span: None, + name: Identifier::from_name(name), + params: if self.rng.gen_bool(0.5) { + vec![param1, param2] + } else { + vec![param1, param2, param3] + }, + named_params: vec![], + alias: None, + } + } + _ => unreachable!(), + } + } fn gen_join_table_ref(&mut self) -> TableReference { let i = self.rng.gen_range(0..self.tables.len()); let j = if i == self.tables.len() - 1 { 0 } else { i + 1 }; From 2add53d0b329a4df6290b2b49a0ea2860f5246ff Mon Sep 17 00:00:00 2001 From: Yang Xiufeng Date: Wed, 20 Sep 2023 21:00:44 +0800 Subject: [PATCH 03/18] fix: add session info in final response. (#12947) --- .../src/servers/http/v1/query/http_query.rs | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/query/service/src/servers/http/v1/query/http_query.rs b/src/query/service/src/servers/http/v1/query/http_query.rs index 5ec81be59e31..e97489cfd41b 100644 --- a/src/query/service/src/servers/http/v1/query/http_query.rs +++ b/src/query/service/src/servers/http/v1/query/http_query.rs @@ -387,28 +387,35 @@ impl HttpQuery { pub async fn get_response_page(&self, page_no: usize) -> Result { let data = Some(self.get_page(page_no).await?); let state = self.get_state().await; - let session_conf = self.request.session.clone().unwrap_or_default(); - let session_conf = if let Some(affect) = &state.affect { - Some(session_conf.apply_affect(affect)) + let session = self.request.session.clone().unwrap_or_default(); + let session = if let Some(affect) = &state.affect { + Some(session.apply_affect(affect)) } else { - Some(session_conf) + Some(session) }; Ok(HttpQueryResponseInternal { data, state, - session: session_conf, + session, session_id: self.session_id.clone(), }) } #[async_backtrace::framed] pub async fn get_response_state_only(&self) -> HttpQueryResponseInternal { + let state = self.get_state().await; + let session = self.request.session.clone().unwrap_or_default(); + let session = if let Some(affect) = &state.affect { + Some(session.apply_affect(affect)) + } else { + Some(session) + }; HttpQueryResponseInternal { data: None, session_id: self.session_id.clone(), - state: self.get_state().await, - session: None, + state, + session, } } From 321b99b26e00623a9c1a83236e2be9ef093047ec Mon Sep 17 00:00:00 2001 From: baishen Date: Wed, 20 Sep 2023 21:45:00 +0800 Subject: [PATCH 04/18] fix(query): fix add/subtract datetime with big integer panic (#12940) * fix(query): fix add/subtract datetime with big integer panic * fix * fix domain --- src/query/expression/src/utils/date_helper.rs | 7 + src/query/functions/src/scalars/datetime.rs | 102 ++++++++++---- .../functions/tests/it/scalars/datetime.rs | 18 +++ .../tests/it/scalars/testdata/datetime.txt | 130 ++++++++++++++++++ .../it/scalars/testdata/function_list.txt | 4 + .../02_function/02_0012_function_datetimes | 35 +++++ 6 files changed, 268 insertions(+), 28 deletions(-) diff --git a/src/query/expression/src/utils/date_helper.rs b/src/query/expression/src/utils/date_helper.rs index 528af75adfb7..0722f0097f9c 100644 --- a/src/query/expression/src/utils/date_helper.rs +++ b/src/query/expression/src/utils/date_helper.rs @@ -417,6 +417,7 @@ pub struct ToYYYYMMDD; pub struct ToYYYYMMDDHH; pub struct ToYYYYMMDDHHMMSS; pub struct ToYear; +pub struct ToQuarter; pub struct ToMonth; pub struct ToDayOfYear; pub struct ToDayOfMonth; @@ -464,6 +465,12 @@ impl ToNumber for ToYear { } } +impl ToNumber for ToQuarter { + fn to_number(dt: &DateTime) -> u8 { + (dt.month0() / 3 + 1) as u8 + } +} + impl ToNumber for ToMonth { fn to_number(dt: &DateTime) -> u8 { dt.month() as u8 diff --git a/src/query/functions/src/scalars/datetime.rs b/src/query/functions/src/scalars/datetime.rs index 62d2a4a3a85d..03b40a8f5024 100644 --- a/src/query/functions/src/scalars/datetime.rs +++ b/src/query/functions/src/scalars/datetime.rs @@ -909,6 +909,13 @@ fn register_to_number_functions(registry: &mut FunctionRegistry) { ToNumberImpl::eval_date::(val, ctx.func_ctx.tz) }), ); + registry.register_passthrough_nullable_1_arg::( + "to_quarter", + |_, _| FunctionDomain::Full, + vectorize_1_arg::(|val, ctx| { + ToNumberImpl::eval_date::(val, ctx.func_ctx.tz) + }), + ); registry.register_passthrough_nullable_1_arg::( "to_month", |_, _| FunctionDomain::Full, @@ -973,6 +980,13 @@ fn register_to_number_functions(registry: &mut FunctionRegistry) { ToNumberImpl::eval_timestamp::(val, ctx.func_ctx.tz) }), ); + registry.register_passthrough_nullable_1_arg::( + "to_quarter", + |_, _| FunctionDomain::Full, + vectorize_1_arg::(|val, ctx| { + ToNumberImpl::eval_timestamp::(val, ctx.func_ctx.tz) + }), + ); registry.register_passthrough_nullable_1_arg::( "to_month", |_, _| FunctionDomain::Full, @@ -1027,23 +1041,31 @@ fn register_to_number_functions(registry: &mut FunctionRegistry) { } fn register_timestamp_add_sub(registry: &mut FunctionRegistry) { - registry.register_2_arg::( + registry.register_passthrough_nullable_2_arg::( "plus", |_, lhs, rhs| { (|| { - let lm = lhs.max; - let ln = lhs.min; - let rm: i32 = num_traits::cast::cast(rhs.max)?; - let rn: i32 = num_traits::cast::cast(rhs.min)?; + let lm: i64 = num_traits::cast::cast(lhs.max)?; + let ln: i64 = num_traits::cast::cast(lhs.min)?; + let rm = rhs.max; + let rn = rhs.min; Some(FunctionDomain::Domain(SimpleDomain:: { - min: ln.checked_add(rn)?, - max: lm.checked_add(rm)?, + min: check_date(ln + rn).ok()?, + max: check_date(lm + rm).ok()?, })) })() - .unwrap_or(FunctionDomain::Full) + .unwrap_or(FunctionDomain::MayThrow) }, - |a, b, _| a + (b as i32), + vectorize_with_builder_2_arg::(|a, b, output, ctx| { + match check_date((a as i64) + b) { + Ok(v) => output.push(v), + Err(err) => { + ctx.set_error(output.len(), err); + output.push(0); + } + } + }), ); registry.register_2_arg::( @@ -1065,7 +1087,7 @@ fn register_timestamp_add_sub(registry: &mut FunctionRegistry) { |a, b, _| a + b, ); - registry.register_2_arg::( + registry.register_passthrough_nullable_2_arg::( "plus", |_, lhs, rhs| { (|| { @@ -1074,13 +1096,21 @@ fn register_timestamp_add_sub(registry: &mut FunctionRegistry) { let rm = rhs.max; let rn = rhs.min; Some(FunctionDomain::Domain(SimpleDomain:: { - min: ln.checked_add(rn)?, - max: lm.checked_add(rm)?, + min: check_timestamp(ln + rn).ok()?, + max: check_timestamp(lm + rm).ok()?, })) })() - .unwrap_or(FunctionDomain::Full) + .unwrap_or(FunctionDomain::MayThrow) }, - |a, b, _| a + b, + vectorize_with_builder_2_arg::( + |a, b, output, ctx| match check_timestamp(a + b) { + Ok(v) => output.push(v), + Err(err) => { + ctx.set_error(output.len(), err); + output.push(0); + } + }, + ), ); registry.register_2_arg::( @@ -1101,23 +1131,31 @@ fn register_timestamp_add_sub(registry: &mut FunctionRegistry) { |a, b, _| a + b, ); - registry.register_2_arg::( + registry.register_passthrough_nullable_2_arg::( "minus", |_, lhs, rhs| { (|| { - let lm = lhs.max; - let ln = lhs.min; - let rm: i32 = num_traits::cast::cast(rhs.max)?; - let rn: i32 = num_traits::cast::cast(rhs.min)?; + let lm: i64 = num_traits::cast::cast(lhs.max)?; + let ln: i64 = num_traits::cast::cast(lhs.min)?; + let rm = rhs.max; + let rn = rhs.min; Some(FunctionDomain::Domain(SimpleDomain:: { - min: ln.checked_sub(rm)?, - max: lm.checked_sub(rn)?, + min: check_date(ln - rn).ok()?, + max: check_date(lm - rm).ok()?, })) })() - .unwrap_or(FunctionDomain::Full) + .unwrap_or(FunctionDomain::MayThrow) }, - |a, b, _| a - b as i32, + vectorize_with_builder_2_arg::(|a, b, output, ctx| { + match check_date((a as i64) - b) { + Ok(v) => output.push(v), + Err(err) => { + ctx.set_error(output.len(), err); + output.push(0); + } + } + }), ); registry.register_2_arg::( @@ -1139,7 +1177,7 @@ fn register_timestamp_add_sub(registry: &mut FunctionRegistry) { |a, b, _| a - b, ); - registry.register_2_arg::( + registry.register_passthrough_nullable_2_arg::( "minus", |_, lhs, rhs| { (|| { @@ -1149,13 +1187,21 @@ fn register_timestamp_add_sub(registry: &mut FunctionRegistry) { let rn = rhs.min; Some(FunctionDomain::Domain(SimpleDomain:: { - min: ln.checked_sub(rm)?, - max: lm.checked_sub(rn)?, + min: check_timestamp(ln - rn).ok()?, + max: check_timestamp(lm - rm).ok()?, })) })() - .unwrap_or(FunctionDomain::Full) + .unwrap_or(FunctionDomain::MayThrow) }, - |a, b, _| a - b, + vectorize_with_builder_2_arg::( + |a, b, output, ctx| match check_timestamp(a - b) { + Ok(v) => output.push(v), + Err(err) => { + ctx.set_error(output.len(), err); + output.push(0); + } + }, + ), ); registry.register_2_arg::( diff --git a/src/query/functions/tests/it/scalars/datetime.rs b/src/query/functions/tests/it/scalars/datetime.rs index e672a6a095bb..8f5fb8ccc028 100644 --- a/src/query/functions/tests/it/scalars/datetime.rs +++ b/src/query/functions/tests/it/scalars/datetime.rs @@ -108,10 +108,14 @@ fn test_date_add_subtract(file: &mut impl Write) { run_ast(file, "add_years(to_date(0), 100)", &[]); run_ast(file, "add_months(to_date(0), 100)", &[]); run_ast(file, "add_days(to_date(0), 100)", &[]); + run_ast(file, "add(to_date(0), 100)", &[]); + run_ast(file, "add(to_date(0), 10000000)", &[]); run_ast(file, "subtract_years(to_date(0), 100)", &[]); run_ast(file, "subtract_quarters(to_date(0), 100)", &[]); run_ast(file, "subtract_months(to_date(0), 100)", &[]); run_ast(file, "subtract_days(to_date(0), 100)", &[]); + run_ast(file, "subtract(to_date(0), 100)", &[]); + run_ast(file, "subtract(to_date(0), 10000000)", &[]); run_ast(file, "add_years(a, b)", &[ ("a", DateType::from_data(vec![-100, 0, 100])), ("b", Int32Type::from_data(vec![1, 2, 3])), @@ -155,6 +159,8 @@ fn test_timestamp_add_subtract(file: &mut impl Write) { run_ast(file, "add_hours(to_timestamp(0), 100)", &[]); run_ast(file, "add_minutes(to_timestamp(0), 100)", &[]); run_ast(file, "add_seconds(to_timestamp(0), 100)", &[]); + run_ast(file, "add(to_timestamp(0), 100000000000000)", &[]); + run_ast(file, "add(to_timestamp(0), 1000000000000000000)", &[]); run_ast(file, "subtract_years(to_timestamp(0), 100)", &[]); run_ast(file, "subtract_quarters(to_timestamp(0), 100)", &[]); run_ast(file, "subtract_months(to_timestamp(0), 100)", &[]); @@ -162,6 +168,8 @@ fn test_timestamp_add_subtract(file: &mut impl Write) { run_ast(file, "subtract_hours(to_timestamp(0), 100)", &[]); run_ast(file, "subtract_minutes(to_timestamp(0), 100)", &[]); run_ast(file, "subtract_seconds(to_timestamp(0), 100)", &[]); + run_ast(file, "subtract(to_timestamp(0), 100000000000000)", &[]); + run_ast(file, "subtract(to_timestamp(0), 1000000000000000000)", &[]); run_ast(file, "add_years(a, b)", &[ ("a", TimestampType::from_data(vec![-100, 0, 100])), ("b", Int32Type::from_data(vec![1, 2, 3])), @@ -462,6 +470,7 @@ fn test_to_number(file: &mut impl Write) { run_ast(file, "to_yyyymmdd(to_date(18875))", &[]); run_ast(file, "to_yyyymmddhhmmss(to_date(18875))", &[]); run_ast(file, "to_year(to_date(18875))", &[]); + run_ast(file, "to_quarter(to_date(18875))", &[]); run_ast(file, "to_month(to_date(18875))", &[]); run_ast(file, "to_day_of_year(to_date(18875))", &[]); run_ast(file, "to_day_of_month(to_date(18875))", &[]); @@ -482,6 +491,10 @@ fn test_to_number(file: &mut impl Write) { "a", DateType::from_data(vec![-100, 0, 100]), )]); + run_ast(file, "to_quarter(a)", &[( + "a", + DateType::from_data(vec![-100, 0, 100]), + )]); run_ast(file, "to_month(a)", &[( "a", DateType::from_data(vec![-100, 0, 100]), @@ -504,6 +517,7 @@ fn test_to_number(file: &mut impl Write) { run_ast(file, "to_yyyymmdd(to_timestamp(1630812366))", &[]); run_ast(file, "to_yyyymmddhhmmss(to_timestamp(1630812366))", &[]); run_ast(file, "to_year(to_timestamp(1630812366))", &[]); + run_ast(file, "to_quarter(to_timestamp(1630812366))", &[]); run_ast(file, "to_month(to_timestamp(1630812366))", &[]); run_ast(file, "to_day_of_year(to_timestamp(1630812366))", &[]); run_ast(file, "to_day_of_month(to_timestamp(1630812366))", &[]); @@ -527,6 +541,10 @@ fn test_to_number(file: &mut impl Write) { "a", TimestampType::from_data(vec![-100, 0, 100]), )]); + run_ast(file, "to_quarter(a)", &[( + "a", + TimestampType::from_data(vec![-100, 0, 100]), + )]); run_ast(file, "to_month(a)", &[( "a", TimestampType::from_data(vec![-100, 0, 100]), diff --git a/src/query/functions/tests/it/scalars/testdata/datetime.txt b/src/query/functions/tests/it/scalars/testdata/datetime.txt index 699082a4c25a..2b42bc1f3d04 100644 --- a/src/query/functions/tests/it/scalars/testdata/datetime.txt +++ b/src/query/functions/tests/it/scalars/testdata/datetime.txt @@ -355,6 +355,23 @@ output domain : {100..=100} output : '1970-04-11' +ast : add(to_date(0), 100) +raw expr : add(to_date(0), 100) +checked expr : plus(to_date(to_int64(0_u8)), to_int64(100_u8)) +optimized expr : 100 +output type : Date +output domain : {100..=100} +output : '1970-04-11' + + +error: + --> SQL:1:1 + | +1 | add(to_date(0), 10000000) + | ^^^^^^^^^^^^^^^^^^^^^^^^^ date is out of range while evaluating function `plus('1970-01-01', 10000000)` + + + ast : subtract_years(to_date(0), 100) raw expr : subtract_years(to_date(0), 100) checked expr : subtract_years(to_date(to_int64(0_u8)), to_int64(100_u8)) @@ -391,6 +408,23 @@ output domain : {-100..=-100} output : '1969-09-23' +ast : subtract(to_date(0), 100) +raw expr : subtract(to_date(0), 100) +checked expr : minus(to_date(to_int64(0_u8)), to_int64(100_u8)) +optimized expr : -100 +output type : Date +output domain : {-100..=-100} +output : '1969-09-23' + + +error: + --> SQL:1:1 + | +1 | subtract(to_date(0), 10000000) + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ date is out of range while evaluating function `minus('1970-01-01', 10000000)` + + + ast : add_years(a, b) raw expr : add_years(a::Date, b::Int32) checked expr : add_years(a, to_int64(b)) @@ -646,6 +680,23 @@ output domain : {100000000..=100000000} output : '1970-01-01 00:01:40.000000' +ast : add(to_timestamp(0), 100000000000000) +raw expr : add(to_timestamp(0), 100000000000000) +checked expr : plus(to_timestamp(to_int64(0_u8)), to_int64(100000000000000_u64)) +optimized expr : 100000000000000 +output type : Timestamp +output domain : {100000000000000..=100000000000000} +output : '1973-03-03 09:46:40.000000' + + +error: + --> SQL:1:1 + | +1 | add(to_timestamp(0), 1000000000000000000) + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ timestamp is out of range while evaluating function `plus('1970-01-01 00:00:00.000000', 1000000000000000000)` + + + ast : subtract_years(to_timestamp(0), 100) raw expr : subtract_years(to_timestamp(0), 100) checked expr : subtract_years(to_timestamp(to_int64(0_u8)), to_int64(100_u8)) @@ -709,6 +760,23 @@ output domain : {-100000000..=-100000000} output : '1969-12-31 23:58:20.000000' +ast : subtract(to_timestamp(0), 100000000000000) +raw expr : subtract(to_timestamp(0), 100000000000000) +checked expr : minus(to_timestamp(to_int64(0_u8)), to_int64(100000000000000_u64)) +optimized expr : -100000000000000 +output type : Timestamp +output domain : {-100000000000000..=-100000000000000} +output : '1966-10-31 14:13:20.000000' + + +error: + --> SQL:1:1 + | +1 | subtract(to_timestamp(0), 1000000000000000000) + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ timestamp is out of range while evaluating function `minus('1970-01-01 00:00:00.000000', 1000000000000000000)` + + + ast : add_years(a, b) raw expr : add_years(a::Timestamp, b::Int32) checked expr : add_years(a, to_int64(b)) @@ -2507,6 +2575,15 @@ output domain : {2021..=2021} output : 2021 +ast : to_quarter(to_date(18875)) +raw expr : to_quarter(to_date(18875)) +checked expr : to_quarter(to_date(to_int64(18875_u16))) +optimized expr : 3_u8 +output type : UInt8 +output domain : {3..=3} +output : 3 + + ast : to_month(to_date(18875)) raw expr : to_month(to_date(18875)) checked expr : to_month(to_date(to_int64(18875_u16))) @@ -2631,6 +2708,28 @@ evaluation (internal): +--------+----------------------------+ +ast : to_quarter(a) +raw expr : to_quarter(a::Date) +checked expr : to_quarter(a) +evaluation: ++--------+--------------+-----------+ +| | a | Output | ++--------+--------------+-----------+ +| Type | Date | UInt8 | +| Domain | {-100..=100} | {0..=255} | +| Row 0 | '1969-09-23' | 3 | +| Row 1 | '1970-01-01' | 1 | +| Row 2 | '1970-04-11' | 2 | ++--------+--------------+-----------+ +evaluation (internal): ++--------+------------------+ +| Column | Data | ++--------+------------------+ +| a | [-100, 0, 100] | +| Output | UInt8([3, 1, 2]) | ++--------+------------------+ + + ast : to_month(a) raw expr : to_month(a::Date) checked expr : to_month(a) @@ -2755,6 +2854,15 @@ output domain : {2021..=2021} output : 2021 +ast : to_quarter(to_timestamp(1630812366)) +raw expr : to_quarter(to_timestamp(1630812366)) +checked expr : to_quarter(to_timestamp(to_int64(1630812366_u32))) +optimized expr : 3_u8 +output type : UInt8 +output domain : {3..=3} +output : 3 + + ast : to_month(to_timestamp(1630812366)) raw expr : to_month(to_timestamp(1630812366)) checked expr : to_month(to_timestamp(to_int64(1630812366_u32))) @@ -2906,6 +3014,28 @@ evaluation (internal): +--------+----------------------------+ +ast : to_quarter(a) +raw expr : to_quarter(a::Timestamp) +checked expr : to_quarter(a) +evaluation: ++--------+------------------------------+-----------+ +| | a | Output | ++--------+------------------------------+-----------+ +| Type | Timestamp | UInt8 | +| Domain | {-100..=100} | {0..=255} | +| Row 0 | '1969-12-31 23:59:59.999900' | 4 | +| Row 1 | '1970-01-01 00:00:00.000000' | 1 | +| Row 2 | '1970-01-01 00:00:00.000100' | 1 | ++--------+------------------------------+-----------+ +evaluation (internal): ++--------+------------------+ +| Column | Data | ++--------+------------------+ +| a | [-100, 0, 100] | +| Output | UInt8([4, 1, 1]) | ++--------+------------------+ + + ast : to_month(a) raw expr : to_month(a::Timestamp) checked expr : to_month(a) diff --git a/src/query/functions/tests/it/scalars/testdata/function_list.txt b/src/query/functions/tests/it/scalars/testdata/function_list.txt index 9f2d1d74966e..e898b283a63b 100644 --- a/src/query/functions/tests/it/scalars/testdata/function_list.txt +++ b/src/query/functions/tests/it/scalars/testdata/function_list.txt @@ -3362,6 +3362,10 @@ Functions overloads: 3 to_month(Timestamp NULL) :: UInt8 NULL 0 to_nullable(NULL) :: NULL 1 to_nullable(T0 NULL) :: T0 NULL +0 to_quarter(Date) :: UInt8 +1 to_quarter(Date NULL) :: UInt8 NULL +2 to_quarter(Timestamp) :: UInt8 +3 to_quarter(Timestamp NULL) :: UInt8 NULL 0 to_second(Timestamp) :: UInt8 1 to_second(Timestamp NULL) :: UInt8 NULL 0 to_start_of_day(Timestamp) :: Timestamp diff --git a/tests/sqllogictests/suites/query/02_function/02_0012_function_datetimes b/tests/sqllogictests/suites/query/02_function/02_0012_function_datetimes index 76b1133e7b04..b795556fdfe0 100644 --- a/tests/sqllogictests/suites/query/02_function/02_0012_function_datetimes +++ b/tests/sqllogictests/suites/query/02_function/02_0012_function_datetimes @@ -212,6 +212,21 @@ select today() + 1 = tomorrow() ---- 1 +query B +select to_date('2023-01-01') + 100 = to_date('2023-04-11') +---- +1 + +query B +select to_date('2023-01-01') - 100 = to_date('2022-09-23') +---- +1 + +statement error 1001 +select to_date('2023-01-01') + 100000000 + +statement error 1001 +select to_date('2023-01-01') - 100000000 query B @@ -643,6 +658,21 @@ select add_seconds(to_datetime(1582970400000000), cast(61, INT32)) ---- 2020-02-29 10:01:01.000000 +query T +select to_datetime('2023-01-01 00:00:00') + 10000000000 +---- +2023-01-01 02:46:40.000000 + +query t +select to_datetime('2023-01-01 00:00:00') - 10000000000 +---- +2022-12-31 21:13:20.000000 + +statement error 1001 +select to_datetime('2023-01-01 00:00:00') + 1000000000000000000 + +statement error 1001 +select to_datetime('2023-01-01 00:00:00') - 1000000000000000000 query I select to_month(to_datetime(1633081817000000)) @@ -781,6 +811,11 @@ select to_year(to_datetime(1646404329000000)) = 2022 ---- 1 +query B +select to_quarter(to_datetime(1646404329000000)) = 1 +---- +1 + query T select date_add(QUARTER, 1, to_date('2018-01-02')) ---- From 7c1a8a86689befcd41e25809671d541ea15e5c2e Mon Sep 17 00:00:00 2001 From: Yang Xiufeng Date: Wed, 20 Sep 2023 23:19:16 +0800 Subject: [PATCH 05/18] feat: stage file pattern match the whole string after prefix. (#12935) --- src/common/storage/src/stage.rs | 8 ++++--- src/query/ast/src/ast/format/ast_format.rs | 4 ++-- src/query/ast/src/ast/statements/statement.rs | 4 ++-- src/query/ast/src/parser/statement.rs | 2 +- src/query/ast/src/visitors/visitor.rs | 2 +- src/query/ast/src/visitors/visitor_mut.rs | 2 +- src/query/ast/tests/it/testdata/statement.txt | 4 ++-- src/query/sql/src/planner/binder/binder.rs | 9 +++++++- tests/sqllogictests/suites/stage/list_stage | 4 ++-- .../suites/stage/options/pattern | 23 +++++++++++++++++++ .../00_0009_remove_internal_stage.result | 1 + 11 files changed, 48 insertions(+), 15 deletions(-) create mode 100644 tests/sqllogictests/suites/stage/options/pattern diff --git a/src/common/storage/src/stage.rs b/src/common/storage/src/stage.rs index fb809a3eaa8d..fdf714284494 100644 --- a/src/common/storage/src/stage.rs +++ b/src/common/storage/src/stage.rs @@ -92,7 +92,7 @@ pub struct StageFilesInfo { impl StageFilesInfo { fn get_pattern(&self) -> Result> { match &self.pattern { - Some(pattern) => match Regex::new(pattern) { + Some(pattern) => match Regex::new(&format!("^{pattern}$")) { Ok(r) => Ok(Some(r)), Err(e) => Err(ErrorCode::SyntaxException(format!( "Pattern format invalid, got:{}, error:{:?}", @@ -206,6 +206,7 @@ impl StageFilesInfo { first_only: bool, max_files: usize, ) -> Result> { + let prefix_len = if path == "/" { 0 } else { path.len() }; let root_meta = operator.stat(path).await; match root_meta { Ok(meta) => match meta.mode() { @@ -233,7 +234,7 @@ impl StageFilesInfo { let mut limit: usize = 0; while let Some(obj) = list.try_next().await? { let meta = operator.metadata(&obj, StageFileInfo::meta_query()).await?; - if check_file(obj.path(), meta.mode(), &pattern) { + if check_file(&obj.path()[prefix_len..], meta.mode(), &pattern) { files.push(StageFileInfo::new(obj.path().to_string(), &meta)); if first_only { return Ok(files); @@ -263,6 +264,7 @@ fn blocking_list_files_with_pattern( first_only: bool, max_files: usize, ) -> Result> { + let prefix_len = if path == "/" { 0 } else { path.len() }; let operator = operator.blocking(); let root_meta = operator.stat(path); @@ -293,7 +295,7 @@ fn blocking_list_files_with_pattern( for obj in list { let obj = obj?; let meta = operator.metadata(&obj, StageFileInfo::meta_query())?; - if check_file(obj.path(), meta.mode(), &pattern) { + if check_file(&obj.path()[prefix_len..], meta.mode(), &pattern) { files.push(StageFileInfo::new(obj.path().to_string(), &meta)); if first_only { return Ok(files); diff --git a/src/query/ast/src/ast/format/ast_format.rs b/src/query/ast/src/ast/format/ast_format.rs index 27ffead1343b..da521cc648ad 100644 --- a/src/query/ast/src/ast/format/ast_format.rs +++ b/src/query/ast/src/ast/format/ast_format.rs @@ -2179,10 +2179,10 @@ impl<'ast> Visitor<'ast> for AstFormatVisitor { self.children.push(node); } - fn visit_list_stage(&mut self, location: &'ast str, pattern: &'ast str) { + fn visit_list_stage(&mut self, location: &'ast str, pattern: &'ast Option) { let location_format_ctx = AstFormatContext::new(format!("Location {}", location)); let location_child = FormatTreeNode::new(location_format_ctx); - let pattern_format_ctx = AstFormatContext::new(format!("Pattern {}", pattern)); + let pattern_format_ctx = AstFormatContext::new(format!("Pattern {:?}", pattern)); let pattern_child = FormatTreeNode::new(pattern_format_ctx); let name = "ListStage".to_string(); diff --git a/src/query/ast/src/ast/statements/statement.rs b/src/query/ast/src/ast/statements/statement.rs index 5e4f7ba6de3b..8ae30da9c555 100644 --- a/src/query/ast/src/ast/statements/statement.rs +++ b/src/query/ast/src/ast/statements/statement.rs @@ -186,7 +186,7 @@ pub enum Statement { }, ListStage { location: String, - pattern: String, + pattern: Option, }, // UserDefinedFileFormat @@ -452,7 +452,7 @@ impl Display for Statement { Statement::AlterUDF(stmt) => write!(f, "{stmt}")?, Statement::ListStage { location, pattern } => { write!(f, "LIST @{location}")?; - if !pattern.is_empty() { + if let Some(pattern) = pattern { write!(f, " PATTERN = '{pattern}'")?; } } diff --git a/src/query/ast/src/parser/statement.rs b/src/query/ast/src/parser/statement.rs index e44629718c70..fd6c453694dd 100644 --- a/src/query/ast/src/parser/statement.rs +++ b/src/query/ast/src/parser/statement.rs @@ -1054,7 +1054,7 @@ pub fn statement(i: Input) -> IResult { }, |(_, location, opt_pattern)| Statement::ListStage { location, - pattern: opt_pattern.map(|v| v.2).unwrap_or_default(), + pattern: opt_pattern.map(|v| v.2), }, ); diff --git a/src/query/ast/src/visitors/visitor.rs b/src/query/ast/src/visitors/visitor.rs index 329b844de4dd..520dddc74a68 100644 --- a/src/query/ast/src/visitors/visitor.rs +++ b/src/query/ast/src/visitors/visitor.rs @@ -531,7 +531,7 @@ pub trait Visitor<'ast>: Sized { fn visit_remove_stage(&mut self, _location: &'ast str, _pattern: &'ast str) {} - fn visit_list_stage(&mut self, _location: &'ast str, _pattern: &'ast str) {} + fn visit_list_stage(&mut self, _location: &'ast str, _pattern: &'ast Option) {} fn visit_create_file_format( &mut self, diff --git a/src/query/ast/src/visitors/visitor_mut.rs b/src/query/ast/src/visitors/visitor_mut.rs index 401e39b26e6e..bee45ee26889 100644 --- a/src/query/ast/src/visitors/visitor_mut.rs +++ b/src/query/ast/src/visitors/visitor_mut.rs @@ -546,7 +546,7 @@ pub trait VisitorMut: Sized { fn visit_remove_stage(&mut self, _location: &mut String, _pattern: &mut String) {} - fn visit_list_stage(&mut self, _location: &mut String, _pattern: &mut String) {} + fn visit_list_stage(&mut self, _location: &mut String, _pattern: &mut Option) {} fn visit_create_file_format( &mut self, diff --git a/src/query/ast/tests/it/testdata/statement.txt b/src/query/ast/tests/it/testdata/statement.txt index 2cfa725c03f4..af9681a85cf0 100644 --- a/src/query/ast/tests/it/testdata/statement.txt +++ b/src/query/ast/tests/it/testdata/statement.txt @@ -7120,7 +7120,7 @@ LIST @stage_a ---------- AST ------------ ListStage { location: "stage_a", - pattern: "", + pattern: None, } @@ -7131,7 +7131,7 @@ LIST @~ ---------- AST ------------ ListStage { location: "~", - pattern: "", + pattern: None, } diff --git a/src/query/sql/src/planner/binder/binder.rs b/src/query/sql/src/planner/binder/binder.rs index 34f62c7d4f94..2a8dc4f466f4 100644 --- a/src/query/sql/src/planner/binder/binder.rs +++ b/src/query/sql/src/planner/binder/binder.rs @@ -346,7 +346,14 @@ impl<'a> Binder { // Stages Statement::ShowStages => self.bind_rewrite_to_query(bind_context, "SELECT name, stage_type, number_of_files, creator, comment FROM system.stages ORDER BY name", RewriteKind::ShowStages).await?, - Statement::ListStage { location, pattern } => self.bind_rewrite_to_query(bind_context, format!("SELECT * FROM LIST_STAGE(location => '@{location}', pattern => '{pattern}')").as_str(), RewriteKind::ListStage).await?, + Statement::ListStage { location, pattern } => { + let pattern = if let Some(pattern) = pattern { + format!(", pattern => '{pattern}'") + } else { + "".to_string() + }; + self.bind_rewrite_to_query(bind_context, format!("SELECT * FROM LIST_STAGE(location => '@{location}'{pattern})").as_str(), RewriteKind::ListStage).await? + }, Statement::DescribeStage { stage_name } => self.bind_rewrite_to_query(bind_context, format!("SELECT * FROM system.stages WHERE name = '{stage_name}'").as_str(), RewriteKind::DescribeStage).await?, Statement::CreateStage(stmt) => self.bind_create_stage(stmt).await?, Statement::DropStage { diff --git a/tests/sqllogictests/suites/stage/list_stage b/tests/sqllogictests/suites/stage/list_stage index 3ec8ca80af94..f67a81a5dec7 100644 --- a/tests/sqllogictests/suites/stage/list_stage +++ b/tests/sqllogictests/suites/stage/list_stage @@ -8,12 +8,12 @@ parquet/multi_page/multi_page_3.parquet 4020 NULL parquet/multi_page/multi_page_4.parquet 6636 NULL query -select name, size, creator from list_stage(location => '@data/parquet/', pattern => 'complex[.]*') +select name, size, creator from list_stage(location => '@data/parquet/', pattern => 'complex[.].*') ---- parquet/complex.parquet 92762 NULL query -select name, size, creator from list_stage(location => '@data/', pattern => 'parquet/complex[.]*') +select name, size, creator from list_stage(location => '@data/', pattern => 'parquet/complex[.].*') ---- parquet/complex.parquet 92762 NULL diff --git a/tests/sqllogictests/suites/stage/options/pattern b/tests/sqllogictests/suites/stage/options/pattern new file mode 100644 index 000000000000..74d25f0df3fb --- /dev/null +++ b/tests/sqllogictests/suites/stage/options/pattern @@ -0,0 +1,23 @@ +# the following 2 cases show that `pattern` only matching sub path (or suffix) after the 'parquet/' prefix +# wrong case +query +select name from list_stage(location => '@data/parquet/', pattern => 'parquet/.*_page_1.*') order by name +---- + +# right case +query +select name from list_stage(location => '@data/parquet/', pattern => 'multi_page/.*_page_1.*') order by name +---- +parquet/multi_page/multi_page_1.parquet + + +# the following 2 cases show that `pattern` need to matching match the whole suffix, it is in fact '%$' +# wrong case +query +select name from list_stage(location => '@data/parquet/', pattern => 'multi_page_1') order by name +---- + +query +select name from list_stage(location => '@data/parquet/', pattern => '.*multi_page_1.*') order by name +---- +parquet/multi_page/multi_page_1.parquet diff --git a/tests/suites/1_stateful/00_stage/00_0009_remove_internal_stage.result b/tests/suites/1_stateful/00_stage/00_0009_remove_internal_stage.result index b22c26728a3e..4218f58997c2 100644 --- a/tests/suites/1_stateful/00_stage/00_0009_remove_internal_stage.result +++ b/tests/suites/1_stateful/00_stage/00_0009_remove_internal_stage.result @@ -16,3 +16,4 @@ ontime_200.csv.zst dir/ontime_200.csv ontime_200.csv ontime_200.csv.zst +dir/ontime_200.csv From b4f9763533eba88ddb4d2d550b53e40b59a92d1f Mon Sep 17 00:00:00 2001 From: zhyass Date: Thu, 21 Sep 2023 11:14:37 +0800 Subject: [PATCH 06/18] fix: purge oom (#12950) * fix purge oom * add unit test --- .../tests/it/storages/fuse/meta/snapshot.rs | 33 +++++++++++++++++++ .../common/table-meta/src/meta/mod.rs | 2 ++ .../common/table-meta/src/meta/v2/snapshot.rs | 2 +- src/query/storages/fuse/src/operations/gc.rs | 8 ++++- 4 files changed, 43 insertions(+), 2 deletions(-) diff --git a/src/query/service/tests/it/storages/fuse/meta/snapshot.rs b/src/query/service/tests/it/storages/fuse/meta/snapshot.rs index 43e14d08abd9..46f94418b49e 100644 --- a/src/query/service/tests/it/storages/fuse/meta/snapshot.rs +++ b/src/query/service/tests/it/storages/fuse/meta/snapshot.rs @@ -12,9 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashMap; use std::ops::Add; use common_expression::TableSchema; +use storages_common_table_meta::meta::testing::StatisticsV0; +use storages_common_table_meta::meta::testing::TableSnapshotV1; +use storages_common_table_meta::meta::testing::TableSnapshotV2; use storages_common_table_meta::meta::TableSnapshot; use uuid::Uuid; @@ -74,3 +78,32 @@ fn snapshot_timestamp_time_skew_tolerance() { let prev_ts = prev.timestamp.unwrap(); assert!(current_ts > prev_ts) } + +#[test] +fn test_snapshot_v1_to_v4() { + let summary = StatisticsV0 { + row_count: 0, + block_count: 0, + perfect_block_count: 0, + uncompressed_byte_size: 0, + compressed_byte_size: 0, + index_size: 0, + col_stats: HashMap::new(), + }; + let v1 = TableSnapshotV1::new( + Uuid::new_v4(), + &None, + None, + Default::default(), + summary, + vec![], + None, + None, + ); + assert!(v1.timestamp.is_some()); + + let v4: TableSnapshot = TableSnapshotV2::from(v1.clone()).into(); + assert_eq!(v4.format_version, v1.format_version()); + assert_eq!(v4.snapshot_id, v1.snapshot_id); + assert_eq!(v4.timestamp, v1.timestamp); +} diff --git a/src/query/storages/common/table-meta/src/meta/mod.rs b/src/query/storages/common/table-meta/src/meta/mod.rs index 7ba2f446ddfb..724a22165773 100644 --- a/src/query/storages/common/table-meta/src/meta/mod.rs +++ b/src/query/storages/common/table-meta/src/meta/mod.rs @@ -50,6 +50,8 @@ pub use versions::Versioned; // - export meta encoding to benchmarking tests pub mod testing { pub use super::format::MetaEncoding; + pub use super::v0::statistics::Statistics as StatisticsV0; + pub use super::v1::TableSnapshot as TableSnapshotV1; pub use super::v2::SegmentInfo as SegmentInfoV2; pub use super::v2::TableSnapshot as TableSnapshotV2; pub use super::v3::SegmentInfo as SegmentInfoV3; diff --git a/src/query/storages/common/table-meta/src/meta/v2/snapshot.rs b/src/query/storages/common/table-meta/src/meta/v2/snapshot.rs index e7759854ad78..5bcd1affc5b2 100644 --- a/src/query/storages/common/table-meta/src/meta/v2/snapshot.rs +++ b/src/query/storages/common/table-meta/src/meta/v2/snapshot.rs @@ -127,7 +127,7 @@ impl From for TableSnapshot { // carries the format_version of snapshot being converted. format_version: s.format_version, snapshot_id: s.snapshot_id, - timestamp: None, + timestamp: s.timestamp, prev_snapshot_id: s.prev_snapshot_id, schema, summary, diff --git a/src/query/storages/fuse/src/operations/gc.rs b/src/query/storages/fuse/src/operations/gc.rs index 70e2a015a0e9..c15195f14841 100644 --- a/src/query/storages/fuse/src/operations/gc.rs +++ b/src/query/storages/fuse/src/operations/gc.rs @@ -63,6 +63,12 @@ impl FuseTable { } } let root_snapshot_info = root_snapshot_info_op.unwrap(); + if root_snapshot_info.snapshot_lite.timestamp.is_none() { + return Err(ErrorCode::StorageOther(format!( + "gc: snapshot timestamp is none, snapshot location: {}", + root_snapshot_info.snapshot_location + ))); + } let snapshots_io = SnapshotsIO::create(ctx.clone(), self.operator.clone()); let location_gen = self.meta_location_generator(); @@ -116,7 +122,7 @@ impl FuseTable { let mut segments_to_be_purged = HashSet::new(); let mut ts_to_be_purged = HashSet::new(); for s in snapshots.into_iter() { - if s.timestamp >= base_timestamp { + if s.timestamp.is_some() && s.timestamp >= base_timestamp { remain_snapshots.push(s); continue; } From 25270d876e4d51832e6a361b6794110b651dcf61 Mon Sep 17 00:00:00 2001 From: Yang Xiufeng Date: Thu, 21 Sep 2023 12:06:58 +0800 Subject: [PATCH 07/18] docs: note about pattern in copy. (#12951) * docs: note about pattern in copy. * Update dml-copy-into-table.md --------- Co-authored-by: BohuTANG --- docs/doc/14-sql-commands/10-dml/dml-copy-into-table.md | 6 +++++- .../doc/15-sql-functions/112-table-functions/list_stage.md | 7 ++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/docs/doc/14-sql-commands/10-dml/dml-copy-into-table.md b/docs/doc/14-sql-commands/10-dml/dml-copy-into-table.md index 5448cf61f869..53f14d49bc95 100644 --- a/docs/doc/14-sql-commands/10-dml/dml-copy-into-table.md +++ b/docs/doc/14-sql-commands/10-dml/dml-copy-into-table.md @@ -184,10 +184,14 @@ externalLocation ::= Specify a list of one or more files names (separated by commas) to be loaded. -### PATTERN = 'regex_pattern' +### PATTERN = '' A [PCRE2](https://www.pcre.org/current/doc/html/)-based regular expression pattern string, enclosed in single quotes, specifying the file names to match. Click [here](#loading-data-with-pattern-matching) to see an example. For PCRE2 syntax, see http://www.pcre.org/current/doc/html/pcre2syntax.html. +:::note +Suppose there is a file `@//`, to include it, `` needs to match `^$`. +::: + ### FILE_FORMAT See [Input & Output File Formats](../../13-sql-reference/50-file-format-options.md). diff --git a/docs/doc/15-sql-functions/112-table-functions/list_stage.md b/docs/doc/15-sql-functions/112-table-functions/list_stage.md index e5ddaeaff1ef..9843e95ea0d7 100644 --- a/docs/doc/15-sql-functions/112-table-functions/list_stage.md +++ b/docs/doc/15-sql-functions/112-table-functions/list_stage.md @@ -36,10 +36,15 @@ externalStage ::= @[/] userStage ::= @~[/] ``` +### PATTERN + +See [COPY INTO table](/14-sql-commands/10-dml/dml-copy-into-table.md). + + ## Examples ```sql -SELECT * FROM list_stage(location => '@my_stage/', pattern => '.log'); +SELECT * FROM list_stage(location => '@my_stage/', pattern => '.*[.]log'); +----------------+------+------------------------------------+-------------------------------+---------+ | name | size | md5 | last_modified | creator | +----------------+------+------------------------------------+-------------------------------+---------+ From 7d268dbe3b9da663f8fb8ac7dfbcc018bcb0cdd4 Mon Sep 17 00:00:00 2001 From: Yijun Zhao Date: Thu, 21 Sep 2023 13:30:06 +0800 Subject: [PATCH 08/18] fix reviewer comments (#12948) --- src/query/functions/src/scalars/geo.rs | 55 ++++++++++++++----- .../query/02_function/02_0060_function_geo | 22 ++++++++ 2 files changed, 63 insertions(+), 14 deletions(-) diff --git a/src/query/functions/src/scalars/geo.rs b/src/query/functions/src/scalars/geo.rs index b7a8d4482b5c..4e9f79214fc5 100644 --- a/src/query/functions/src/scalars/geo.rs +++ b/src/query/functions/src/scalars/geo.rs @@ -198,7 +198,8 @@ pub fn register(registry: &mut FunctionRegistry) { // point in ellipses registry.register_function_factory("point_in_ellipses", |_, args_type| { - if args_type.len() < 6 { + // The input parameters must be 2+4*n, where n is the number of ellipses. + if args_type.len() < 6 || (args_type.len() - 2) % 4 != 0 { return None; } Some(Arc::new(Function { @@ -221,20 +222,28 @@ pub fn register(registry: &mut FunctionRegistry) { return None; } - let (arg1, arg2) = if args_type.len() == 2 { + let (arg1, arg2) = { let arg1 = match args_type.get(0)? { - DataType::Tuple(tys) => vec![DataType::Number(NumberDataType::Float64); tys.len()], + DataType::Tuple(tys) => { + if tys.len() == 2 { + vec![DataType::Number(NumberDataType::Float64); tys.len()] + } else { + return None; + } + } _ => return None, }; let arg2 = match args_type.get(1)? { DataType::Array(box DataType::Tuple(tys)) => { - vec![DataType::Number(NumberDataType::Float64); tys.len()] + if tys.len() == 2 { + vec![DataType::Number(NumberDataType::Float64); tys.len()] + } else { + return None; + } } _ => return None, }; (arg1, arg2) - } else { - (vec![], vec![]) }; Some(Arc::new(Function { @@ -260,20 +269,28 @@ pub fn register(registry: &mut FunctionRegistry) { return None; } - let (arg1, arg2) = if args_type.len() == 2 { + let (arg1, arg2) = { let arg1 = match args_type.get(0)? { - DataType::Tuple(tys) => vec![DataType::Number(NumberDataType::Float64); tys.len()], + DataType::Tuple(tys) => { + if tys.len() == 2 { + vec![DataType::Number(NumberDataType::Float64); tys.len()] + } else { + return None; + } + } _ => return None, }; let arg2 = match args_type.get(1)? { DataType::Array(box DataType::Array(box DataType::Tuple(tys))) => { - vec![DataType::Number(NumberDataType::Float64); tys.len()] + if tys.len() == 2 { + vec![DataType::Number(NumberDataType::Float64); tys.len()] + } else { + return None; + } } _ => return None, }; (arg1, arg2) - } else { - (vec![], vec![]) }; Some(Arc::new(Function { @@ -302,20 +319,30 @@ pub fn register(registry: &mut FunctionRegistry) { let mut args = vec![]; let arg1 = match args_type.get(0)? { - DataType::Tuple(tys) => vec![DataType::Number(NumberDataType::Float64); tys.len()], + DataType::Tuple(tys) => { + if tys.len() == 2 { + vec![DataType::Number(NumberDataType::Float64); tys.len()] + } else { + return None; + } + } _ => return None, }; args.push(DataType::Tuple(arg1)); let arg2: Vec = match args_type.get(1)? { DataType::Array(box DataType::Tuple(tys)) => { - vec![DataType::Number(NumberDataType::Float64); tys.len()] + if tys.len() == 2 { + vec![DataType::Number(NumberDataType::Float64); tys.len()] + } else { + return None; + } } _ => return None, }; - (0..args_type.len() - 1) + (1..args_type.len()) .for_each(|_| args.push(DataType::Array(Box::new(DataType::Tuple(arg2.clone()))))); Some(Arc::new(Function { diff --git a/tests/sqllogictests/suites/query/02_function/02_0060_function_geo b/tests/sqllogictests/suites/query/02_function/02_0060_function_geo index e2a2f4bab549..c9970bf5db3d 100644 --- a/tests/sqllogictests/suites/query/02_function/02_0060_function_geo +++ b/tests/sqllogictests/suites/query/02_function/02_0060_function_geo @@ -46,11 +46,33 @@ select geohash_encode(-5.60302734375, 42.593994140625) ---- ezs42d000000 +# form 1 query T select point_in_polygon((3., 3.), [(6, 0), (8, 4), (5, 8), (0, 2)]) ---- 1 +# form 2 +query T +select point_in_polygon((1., 1.), [[(4., 0.), (8., 4.), (4., 8.), (0., 4.)], [(3., 3.), (3., 5.), (5., 5.), (5., 3.)]]) +---- +0 + +# form 3 +query T +select point_in_polygon((2.5, 2.5), [(4., 0.), (8., 4.), (4., 8.), (0., 4.)], [(3., 3.), (3., 5.), (5., 5.), (5., 3.)]) +---- +1 + +statement error 1065 +select point_in_polygon((3,), [(6, 0), (8, 4)]) + +statement error 1065 +select point_in_polygon((1.,), [[(4., 0.), (8., 4.), (4., 8.), (0., 4.)], [(3., 3.), (3., 5.), (5., 5.), (5., 3.)]]) + +statement error 1065 +select point_in_polygon((2.5,), [(4., 0.), (8., 4.), (4., 8.), (0., 4.)], [(3., 3.), (3., 5.), (5., 5.), (5., 3.)]) + query T select great_circle_angle(-2181569507.9714413, 15253014773.129665, 0.5823419941455749, 0.5823419941455749) ---- From fc6b17e159e0319b2f789a4a93992eab3e3fe56b Mon Sep 17 00:00:00 2001 From: soyeric128 Date: Thu, 21 Sep 2023 13:59:56 +0800 Subject: [PATCH 09/18] Update 99-ansi-sql.md (#12949) --- docs/doc/13-sql-reference/99-ansi-sql.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/doc/13-sql-reference/99-ansi-sql.md b/docs/doc/13-sql-reference/99-ansi-sql.md index 25beb5c1e079..7967c20450a6 100644 --- a/docs/doc/13-sql-reference/99-ansi-sql.md +++ b/docs/doc/13-sql-reference/99-ansi-sql.md @@ -95,7 +95,7 @@ Databend aims to conform to the SQL standard, with particular support for ISO/IE | E121-17 | WITH HOLD cursors | No | | | **E131** | **Null value support (nulls in lieu of values)** | Yes | | | **E141** | **Basic integrity constraints** | No | | -| E141-01 | NOT NULL constraints | Yes | Default in Databend: All columns are non-nullable (NOT NULL). | +| E141-01 | NOT NULL constraints | Yes | Default in Databend: All columns are nullable. | | E141-02 | UNIQUE constraint of NOT NULL columns | No | | | E141-03 | PRIMARY KEY constraints | No | | | E141-04 | Basic FOREIGN KEY constraint with the NO ACTION default for both referential delete action and referential update action | No | | From 7a376d89af5e1bda8c489a4453de7875a06bf08e Mon Sep 17 00:00:00 2001 From: Yang Xiufeng Date: Thu, 21 Sep 2023 15:20:20 +0800 Subject: [PATCH 10/18] fix: update session on each resp in sqllogic http client. (#12952) --- tests/sqllogictests/src/client/http_client.rs | 70 +++++++++---------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/tests/sqllogictests/src/client/http_client.rs b/tests/sqllogictests/src/client/http_client.rs index c2014c435446..3256cb0d7657 100644 --- a/tests/sqllogictests/src/client/http_client.rs +++ b/tests/sqllogictests/src/client/http_client.rs @@ -77,29 +77,13 @@ impl HttpClient { let start = Instant::now(); let url = "http://127.0.0.1:8000/v1/query".to_string(); - let mut response = self.response(sql, &url, true).await?; - // Set session from response to client - // Then client will same session for different queries. - - if response.session.is_some() { - self.session = response.session.clone(); - } - - if let Some(error) = response.error { - return Err(format_error(error).into()); - } - - let rows = response.data; - let mut parsed_rows = parser_rows(&rows)?; + let mut parsed_rows = vec![]; + let mut response = + self.handle_response(self.post_query(sql, &url).await?, &mut parsed_rows)?; while let Some(next_uri) = response.next_uri { - let mut url = "http://127.0.0.1:8000".to_string(); - url.push_str(&next_uri); - response = self.response(sql, &url, false).await?; - if let Some(error) = response.error { - return Err(format_error(error).into()); - } - let rows = response.data; - parsed_rows.append(&mut parser_rows(&rows)?); + let url = format!("http://127.0.0.1:8000{next_uri}"); + response = + self.handle_response(self.poll_query_result(&url).await?, &mut parsed_rows)?; } // Todo: add types to compare let mut types = vec![]; @@ -120,27 +104,32 @@ impl HttpClient { }) } + fn handle_response( + &mut self, + response: QueryResponse, + parsed_rows: &mut Vec>, + ) -> Result { + if response.session.is_some() { + self.session = response.session.clone(); + } + if let Some(error) = response.error { + Err(format_error(error).into()) + } else { + parsed_rows.append(&mut parser_rows(&response.data)?); + Ok(response) + } + } + // Send request and get response by json format - async fn response(&mut self, sql: &str, url: &str, post: bool) -> Result { + async fn post_query(&self, sql: &str, url: &str) -> Result { let mut query = HashMap::new(); query.insert("sql", serde_json::to_value(sql)?); if let Some(session) = &self.session { query.insert("session", serde_json::to_value(session)?); } - if post { - return Ok(self - .client - .post(url) - .json(&query) - .basic_auth("root", Some("")) - .send() - .await? - .json::() - .await?); - } Ok(self .client - .get(url) + .post(url) .json(&query) .basic_auth("root", Some("")) .send() @@ -148,4 +137,15 @@ impl HttpClient { .json::() .await?) } + + async fn poll_query_result(&self, url: &str) -> Result { + Ok(self + .client + .get(url) + .basic_auth("root", Some("")) + .send() + .await? + .json::() + .await?) + } } From 9ca40ec8367e33cbb6d27794efa2ad002ef366a2 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Thu, 21 Sep 2023 16:32:08 +0800 Subject: [PATCH 11/18] ci: add nextest to rust-tools.txt (#12961) --- scripts/setup/rust-tools.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/setup/rust-tools.txt b/scripts/setup/rust-tools.txt index 1df282bb138f..5d68657a1398 100644 --- a/scripts/setup/rust-tools.txt +++ b/scripts/setup/rust-tools.txt @@ -2,3 +2,4 @@ cargo-audit@0.17.6 cargo-machete@0.5.0 taplo-cli@0.8.1 typos-cli@1.16.3 +nextest@0.9.58 From e7b3d00ae0dec5684f5985a8fa802f6295099161 Mon Sep 17 00:00:00 2001 From: baishen Date: Thu, 21 Sep 2023 16:39:22 +0800 Subject: [PATCH 12/18] feat(sqlsmith): Support generate subquery and with clause (#12956) --- src/query/functions/src/scalars/vector.rs | 36 +++- src/tests/sqlsmith/src/sql_gen/expr.rs | 14 +- src/tests/sqlsmith/src/sql_gen/query.rs | 181 ++++++++++++++++-- .../sqlsmith/src/sql_gen/sql_generator.rs | 5 + 4 files changed, 212 insertions(+), 24 deletions(-) diff --git a/src/query/functions/src/scalars/vector.rs b/src/query/functions/src/scalars/vector.rs index cf041d642256..be4efbcecdaf 100644 --- a/src/query/functions/src/scalars/vector.rs +++ b/src/query/functions/src/scalars/vector.rs @@ -90,8 +90,23 @@ pub fn register(registry: &mut FunctionRegistry) { return; } } - let data = std::str::from_utf8(data).unwrap(); + let data = match std::str::from_utf8(data) { + Ok(data) => data, + Err(_) => { + ctx.set_error( + output.len(), + format!("Invalid data: {:?}", String::from_utf8_lossy(data)), + ); + output.push(vec![F32::from(0.0)].into()); + return; + } + }; + if ctx.func_ctx.openai_api_key.is_empty() { + ctx.set_error(output.len(), "openai_api_key is empty".to_string()); + output.push(vec![F32::from(0.0)].into()); + return; + } let api_base = ctx.func_ctx.openai_api_embedding_base_url.clone(); let api_key = ctx.func_ctx.openai_api_key.clone(); let api_version = ctx.func_ctx.openai_api_version.clone(); @@ -140,7 +155,24 @@ pub fn register(registry: &mut FunctionRegistry) { } } - let data = std::str::from_utf8(data).unwrap(); + let data = match std::str::from_utf8(data) { + Ok(data) => data, + Err(_) => { + ctx.set_error( + output.len(), + format!("Invalid data: {:?}", String::from_utf8_lossy(data)), + ); + output.put_str(""); + output.commit_row(); + return; + } + }; + if ctx.func_ctx.openai_api_key.is_empty() { + ctx.set_error(output.len(), "openai_api_key is empty".to_string()); + output.put_str(""); + output.commit_row(); + return; + } let api_base = ctx.func_ctx.openai_api_chat_base_url.clone(); let api_key = ctx.func_ctx.openai_api_key.clone(); let api_version = ctx.func_ctx.openai_api_version.clone(); diff --git a/src/tests/sqlsmith/src/sql_gen/expr.rs b/src/tests/sqlsmith/src/sql_gen/expr.rs index 77bc5f9a880f..9290cd72ba6f 100644 --- a/src/tests/sqlsmith/src/sql_gen/expr.rs +++ b/src/tests/sqlsmith/src/sql_gen/expr.rs @@ -45,6 +45,14 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { } } + pub(crate) fn gen_simple_expr(&mut self, ty: &DataType) -> Expr { + if self.rng.gen_bool(0.6) { + self.gen_column(ty) + } else { + self.gen_scalar_value(ty) + } + } + fn gen_column(&mut self, ty: &DataType) -> Expr { for bound_column in &self.bound_columns { if bound_column.data_type == *ty { @@ -389,7 +397,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { } 7 => { let not = self.rng.gen_bool(0.5); - let subquery = self.gen_subquery(); + let (subquery, _) = self.gen_subquery(false); Expr::Exists { span: None, not, @@ -404,7 +412,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { 3 => Some(SubqueryModifier::Some), _ => unreachable!(), }; - let subquery = self.gen_subquery(); + let (subquery, _) = self.gen_subquery(true); Expr::Subquery { span: None, modifier, @@ -415,7 +423,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { let expr_ty = self.gen_simple_data_type(); let expr = self.gen_expr(&expr_ty); let not = self.rng.gen_bool(0.5); - let subquery = self.gen_subquery(); + let (subquery, _) = self.gen_subquery(true); Expr::InSubquery { span: None, expr: Box::new(expr), diff --git a/src/tests/sqlsmith/src/sql_gen/query.rs b/src/tests/sqlsmith/src/sql_gen/query.rs index f9bb81cbcfd6..cd3d652eb7cc 100644 --- a/src/tests/sqlsmith/src/sql_gen/query.rs +++ b/src/tests/sqlsmith/src/sql_gen/query.rs @@ -26,12 +26,18 @@ use common_ast::ast::Query; use common_ast::ast::SelectStmt; use common_ast::ast::SelectTarget; use common_ast::ast::SetExpr; +use common_ast::ast::TableAlias; use common_ast::ast::TableReference; +use common_ast::ast::With; +use common_ast::ast::CTE; +use common_expression::infer_schema_type; use common_expression::types::DataType; use common_expression::types::NumberDataType; use common_expression::TableDataType; use common_expression::TableField; +use common_expression::TableSchemaRef; use common_expression::TableSchemaRefExt; +use rand::distributions::Alphanumeric; use rand::Rng; use crate::sql_gen::Column; @@ -40,10 +46,12 @@ use crate::sql_gen::Table; impl<'a, R: Rng> SqlGenerator<'a, R> { pub(crate) fn gen_query(&mut self) -> Query { - self.bound_columns.clear(); + self.cte_tables.clear(); self.bound_tables.clear(); + self.bound_columns.clear(); self.is_join = false; + let with = self.gen_with(); let body = self.gen_set_expr(); let limit = self.gen_limit(); let offset = self.gen_offset(limit.len()); @@ -51,8 +59,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { Query { span: None, - // TODO - with: None, + with, body, order_by, limit, @@ -61,7 +68,10 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { } } - pub(crate) fn gen_subquery(&mut self) -> Query { + // Scalar, IN / NOT IN, ANY / SOME / ALL Subquery must return only one column + // EXISTS / NOT EXISTS Subquery can return any columns + pub(crate) fn gen_subquery(&mut self, one_column: bool) -> (Query, TableSchemaRef) { + let current_cte_tables = mem::take(&mut self.cte_tables); let current_bound_tables = mem::take(&mut self.bound_tables); let current_bound_columns = mem::take(&mut self.bound_columns); let current_is_join = self.is_join; @@ -70,13 +80,101 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { self.bound_columns = vec![]; self.is_join = false; - let query = self.gen_query(); + // Only generate simple subquery + // TODO: complex subquery + let from = self.gen_from(); + + let len = if one_column { + 1 + } else { + self.rng.gen_range(1..=5) + }; + + let name: String = (0..3) + .map(|_| self.rng.sample(Alphanumeric) as char) + .collect(); + let mut fields = Vec::with_capacity(len); + let mut select_list = Vec::with_capacity(len); + for i in 0..len { + let ty = self.gen_simple_data_type(); + let expr = self.gen_simple_expr(&ty); + let col_name = format!("c{}{}", name, i); + let table_type = infer_schema_type(&ty).unwrap(); + let field = TableField::new(&col_name, table_type); + fields.push(field); + let alias = Identifier::from_name(col_name); + let target = SelectTarget::AliasedExpr { + expr: Box::new(expr), + alias: Some(alias), + }; + select_list.push(target); + } + let schema = TableSchemaRefExt::create(fields); + let select = SelectStmt { + span: None, + hints: None, + distinct: false, + select_list, + from, + selection: None, + group_by: None, + having: None, + window_list: None, + }; + let body = SetExpr::Select(Box::new(select)); + + let query = Query { + span: None, + with: None, + body, + order_by: vec![], + limit: vec![], + offset: None, + ignore_result: false, + }; + + self.cte_tables = current_cte_tables; self.bound_tables = current_bound_tables; self.bound_columns = current_bound_columns; self.is_join = current_is_join; - query + (query, schema) + } + + fn gen_with(&mut self) -> Option { + if self.rng.gen_bool(0.8) { + return None; + } + + let len = self.rng.gen_range(1..=3); + let mut ctes = Vec::with_capacity(len); + for _ in 0..len { + let cte = self.gen_cte(); + ctes.push(cte); + } + + Some(With { + span: None, + recursive: false, + ctes, + }) + } + + fn gen_cte(&mut self) -> CTE { + let (subquery, schema) = self.gen_subquery(false); + + let (table, alias) = self.gen_subquery_table(schema); + self.cte_tables.push(table); + + let materialized = self.rng.gen_bool(0.5); + + CTE { + span: None, + alias, + materialized, + query: Box::new(subquery), + } } fn gen_set_expr(&mut self) -> SetExpr { @@ -304,17 +402,21 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { // TODO: generate more table reference // let table_ref_num = self.rng.gen_range(1..=3); match self.rng.gen_range(0..=10) { - 0..=7 => { - let i = self.rng.gen_range(0..self.tables.len()); - let table_ref = self.gen_table_ref(self.tables[i].clone()); + 0..=6 => { + let (table_ref, _) = self.gen_table_ref(); table_refs.push(table_ref); } // join - 8..=9 => { + 7..=8 => { self.is_join = true; let join = self.gen_join_table_ref(); table_refs.push(join); } + // subquery + 9 => { + let subquery = self.gen_subquery_table_ref(); + table_refs.push(subquery); + } 10 => { let table_func = self.gen_table_func(); table_refs.push(table_func); @@ -325,12 +427,21 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { table_refs } - fn gen_table_ref(&mut self, table: Table) -> TableReference { + fn gen_table_ref(&mut self) -> (TableReference, TableSchemaRef) { + let len = self.tables.len() + self.cte_tables.len(); + let i = self.rng.gen_range(0..len); + + let table = if i < self.tables.len() { + self.tables[i].clone() + } else { + self.cte_tables[len - i - 1].clone() + }; + let schema = table.schema.clone(); let table_name = Identifier::from_name(table.name.clone()); self.bound_table(table); - TableReference::Table { + let table_ref = TableReference::Table { span: None, // TODO catalog: None, @@ -345,7 +456,8 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { pivot: None, // TODO unpivot: None, - } + }; + (table_ref, schema) } // Only test: @@ -453,11 +565,10 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { _ => unreachable!(), } } + fn gen_join_table_ref(&mut self) -> TableReference { - let i = self.rng.gen_range(0..self.tables.len()); - let j = if i == self.tables.len() - 1 { 0 } else { i + 1 }; - let left_table = self.gen_table_ref(self.tables[i].clone()); - let right_table = self.gen_table_ref(self.tables[j].clone()); + let (left_table, left_schema) = self.gen_table_ref(); + let (right_table, right_schema) = self.gen_table_ref(); let op = match self.rng.gen_range(0..=8) { 0 => JoinOperator::Inner, @@ -479,8 +590,8 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { JoinCondition::On(Box::new(expr)) } 1 => { - let left_fields = self.tables[i].schema.fields(); - let right_fields = self.tables[j].schema.fields(); + let left_fields = left_schema.fields(); + let right_fields = right_schema.fields(); let mut names = Vec::new(); for left_field in left_fields { @@ -534,6 +645,19 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { TableReference::Join { span: None, join } } + fn gen_subquery_table_ref(&mut self) -> TableReference { + let (subquery, schema) = self.gen_subquery(false); + + let (table, alias) = self.gen_subquery_table(schema); + self.bound_table(table); + + TableReference::Subquery { + span: None, + subquery: Box::new(subquery), + alias: Some(alias), + } + } + fn gen_selection(&mut self) -> Option { match self.rng.gen_range(0..=9) { 0..=5 => Some(self.gen_expr(&DataType::Boolean)), @@ -545,6 +669,25 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { } } + fn gen_subquery_table(&mut self, schema: TableSchemaRef) -> (Table, TableAlias) { + let name: String = (0..4) + .map(|_| self.rng.sample(Alphanumeric) as char) + .collect(); + let table_name = format!("t{}", name); + let mut columns = Vec::with_capacity(schema.num_fields()); + for field in schema.fields() { + let column = Identifier::from_name(field.name.clone()); + columns.push(column); + } + let alias = TableAlias { + name: Identifier::from_name(table_name.clone()), + columns, + }; + let table = Table::new(table_name, schema); + + (table, alias) + } + fn bound_table(&mut self, table: Table) { for (i, field) in table.schema.fields().iter().enumerate() { let column = Column { diff --git a/src/tests/sqlsmith/src/sql_gen/sql_generator.rs b/src/tests/sqlsmith/src/sql_gen/sql_generator.rs index cc65518ee091..cfc50082fbde 100644 --- a/src/tests/sqlsmith/src/sql_gen/sql_generator.rs +++ b/src/tests/sqlsmith/src/sql_gen/sql_generator.rs @@ -41,6 +41,7 @@ pub(crate) struct Column { pub(crate) struct SqlGenerator<'a, R: Rng> { pub(crate) tables: Vec, + pub(crate) cte_tables: Vec
, pub(crate) bound_tables: Vec
, pub(crate) bound_columns: Vec, pub(crate) is_join: bool, @@ -54,6 +55,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { let mut scalar_func_sigs = Vec::new(); for (name, func_list) in BUILTIN_FUNCTIONS.funcs.iter() { // Ignore unsupported binary functions, avoid parse binary operator failure + // Ignore ai functions, avoid timeouts on http calls if name == "div" || name == "and" || name == "or" @@ -61,6 +63,8 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { || name == "like" || name == "regexp" || name == "rlike" + || name == "ai_embedding_vector" + || name == "ai_text_completion" { continue; } @@ -71,6 +75,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { SqlGenerator { tables: vec![], + cte_tables: vec![], bound_tables: vec![], bound_columns: vec![], is_join: false, From fa7be804837d6c2f55033d5154f277563120655d Mon Sep 17 00:00:00 2001 From: dantengsky Date: Thu, 21 Sep 2023 18:17:50 +0800 Subject: [PATCH 13/18] chore: move parquet_rs uts to `databend-query::it` (#12954) * refact: move parquet_rs uts to `databend-query::it` * fix: oops, remove dev-dependency `databend-query` from `common-storage-parquet` * clean up --- Cargo.lock | 2 +- src/query/service/Cargo.toml | 1 + src/query/service/tests/it/main.rs | 1 + .../tests/it/parquet_rs/data.rs | 0 .../tests/it/parquet_rs/mod.rs | 0 .../tests/it/parquet_rs/prune_pages.rs | 0 .../tests/it/parquet_rs/prune_row_groups.rs | 0 .../tests/it/parquet_rs/utils.rs | 0 src/query/storages/parquet/Cargo.toml | 1 - src/query/storages/parquet/tests/it/main.rs | 15 --------------- 10 files changed, 3 insertions(+), 17 deletions(-) rename src/query/{storages/parquet => service}/tests/it/parquet_rs/data.rs (100%) rename src/query/{storages/parquet => service}/tests/it/parquet_rs/mod.rs (100%) rename src/query/{storages/parquet => service}/tests/it/parquet_rs/prune_pages.rs (100%) rename src/query/{storages/parquet => service}/tests/it/parquet_rs/prune_row_groups.rs (100%) rename src/query/{storages/parquet => service}/tests/it/parquet_rs/utils.rs (100%) delete mode 100644 src/query/storages/parquet/tests/it/main.rs diff --git a/Cargo.lock b/Cargo.lock index 521ff607751d..d78802ff087b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2920,7 +2920,6 @@ dependencies = [ "common-pipeline-sources", "common-sql", "common-storage", - "databend-query", "ethnum", "futures", "log", @@ -3890,6 +3889,7 @@ dependencies = [ "ordered-float 3.7.0", "p256 0.13.0", "parking_lot 0.12.1", + "parquet", "paste", "petgraph", "pin-project-lite", diff --git a/src/query/service/Cargo.toml b/src/query/service/Cargo.toml index 7d69c3cb242d..a60e0c79653c 100644 --- a/src/query/service/Cargo.toml +++ b/src/query/service/Cargo.toml @@ -141,6 +141,7 @@ once_cell = "1.15.0" opendal = { workspace = true } opensrv-mysql = { version = "0.4.1", features = ["tls"] } parking_lot = "0.12.1" +parquet = { version = "46.0.0", features = ["async"] } paste = "1.0.9" petgraph = "0.6.2" pin-project-lite = "0.2.9" diff --git a/src/query/service/tests/it/main.rs b/src/query/service/tests/it/main.rs index 1bf22b18d2ac..30f478e714a4 100644 --- a/src/query/service/tests/it/main.rs +++ b/src/query/service/tests/it/main.rs @@ -28,6 +28,7 @@ mod databases; mod frame; mod interpreters; mod metrics; +mod parquet_rs; mod pipelines; mod servers; mod sessions; diff --git a/src/query/storages/parquet/tests/it/parquet_rs/data.rs b/src/query/service/tests/it/parquet_rs/data.rs similarity index 100% rename from src/query/storages/parquet/tests/it/parquet_rs/data.rs rename to src/query/service/tests/it/parquet_rs/data.rs diff --git a/src/query/storages/parquet/tests/it/parquet_rs/mod.rs b/src/query/service/tests/it/parquet_rs/mod.rs similarity index 100% rename from src/query/storages/parquet/tests/it/parquet_rs/mod.rs rename to src/query/service/tests/it/parquet_rs/mod.rs diff --git a/src/query/storages/parquet/tests/it/parquet_rs/prune_pages.rs b/src/query/service/tests/it/parquet_rs/prune_pages.rs similarity index 100% rename from src/query/storages/parquet/tests/it/parquet_rs/prune_pages.rs rename to src/query/service/tests/it/parquet_rs/prune_pages.rs diff --git a/src/query/storages/parquet/tests/it/parquet_rs/prune_row_groups.rs b/src/query/service/tests/it/parquet_rs/prune_row_groups.rs similarity index 100% rename from src/query/storages/parquet/tests/it/parquet_rs/prune_row_groups.rs rename to src/query/service/tests/it/parquet_rs/prune_row_groups.rs diff --git a/src/query/storages/parquet/tests/it/parquet_rs/utils.rs b/src/query/service/tests/it/parquet_rs/utils.rs similarity index 100% rename from src/query/storages/parquet/tests/it/parquet_rs/utils.rs rename to src/query/service/tests/it/parquet_rs/utils.rs diff --git a/src/query/storages/parquet/Cargo.toml b/src/query/storages/parquet/Cargo.toml index 9c46e8d4c2d6..803026960075 100644 --- a/src/query/storages/parquet/Cargo.toml +++ b/src/query/storages/parquet/Cargo.toml @@ -44,5 +44,4 @@ typetag = "0.2.3" [dev-dependencies] common-sql = { path = "../../sql" } -databend-query = { path = "../../service" } tempfile = "3.4.0" diff --git a/src/query/storages/parquet/tests/it/main.rs b/src/query/storages/parquet/tests/it/main.rs deleted file mode 100644 index bff09cbf2b3c..000000000000 --- a/src/query/storages/parquet/tests/it/main.rs +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -mod parquet_rs; From fdbbcea80657f55503bc81e34dd9d5462198d564 Mon Sep 17 00:00:00 2001 From: soyeric128 Date: Thu, 21 Sep 2023 18:18:13 +0800 Subject: [PATCH 14/18] docs: udf (#12938) * added * added * added * Update ddl-create-function.md * format --- .../00-ddl/50-udf/_category_.json | 6 +- .../00-ddl/50-udf/ddl-alter-function.md | 27 ++-- .../00-ddl/50-udf/ddl-create-function.md | 118 ++++++++++++++++- .../00-ddl/50-udf/ddl-drop-function.md | 6 +- .../14-sql-commands/00-ddl/50-udf/index.md | 125 ++++++++++++++++++ 5 files changed, 262 insertions(+), 20 deletions(-) create mode 100644 docs/doc/14-sql-commands/00-ddl/50-udf/index.md diff --git a/docs/doc/14-sql-commands/00-ddl/50-udf/_category_.json b/docs/doc/14-sql-commands/00-ddl/50-udf/_category_.json index fccc400f3f6b..0b9b999caf48 100644 --- a/docs/doc/14-sql-commands/00-ddl/50-udf/_category_.json +++ b/docs/doc/14-sql-commands/00-ddl/50-udf/_category_.json @@ -1,7 +1,3 @@ { - "label": "User-Defined Function", - "link": { - "type": "generated-index", - "slug": "/sql-commands/ddl/udf" - } + "label": "User-Defined Function" } \ No newline at end of file diff --git a/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-alter-function.md b/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-alter-function.md index 1a727c2b02d2..a196a7689aa9 100644 --- a/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-alter-function.md +++ b/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-alter-function.md @@ -3,22 +3,33 @@ title: ALTER FUNCTION description: Modifies the properties for an existing user-defined function. --- +import FunctionDescription from '@site/src/components/FunctionDescription'; + + + +Alters a user-defined function. ## Syntax ```sql -CREATE FUNCTION AS ([ argname ]) -> '' +-- Alter UDF created with lambda expression +ALTER FUNCTION [IF NOT EXISTS] + AS () -> + [DESC=''] + +-- Alter UDF created with UDF server +ALTER FUNCTION [IF NOT EXISTS] + AS () RETURNS LANGUAGE + HANDLER = '' ADDRESS = '' + [DESC=''] ``` ## Examples ```sql +CREATE FUNCTION a_plus_3 AS (a) -> a+3+3; ALTER FUNCTION a_plus_3 AS (a) -> a+3; -SELECT a_plus_3(2); -+---------+ -| (2 + 3) | -+---------+ -| 5 | -+---------+ -``` +CREATE FUNCTION gcd (INT, INT) RETURNS INT LANGUAGE python HANDLER = 'gcd' ADDRESS = 'http://0.0.0.0:8815'; +ALTER FUNCTION gcd (INT, INT) RETURNS INT LANGUAGE python HANDLER = 'gcd_new' ADDRESS = 'http://0.0.0.0:8815'; +``` \ No newline at end of file diff --git a/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-create-function.md b/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-create-function.md index 6e3d91a01412..303eff27943b 100644 --- a/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-create-function.md +++ b/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-create-function.md @@ -3,20 +3,44 @@ title: CREATE FUNCTION description: Create a new user-defined scalar function. --- +import FunctionDescription from '@site/src/components/FunctionDescription'; + -## CREATE FUNCTION - -Creates a new UDF (user-defined function), the UDF can contain an SQL expression. +Creates a user-defined function. ## Syntax ```sql -CREATE FUNCTION [ IF NOT EXISTS ] AS ([ argname ]) -> '' +-- Create with lambda expression +CREATE FUNCTION [IF NOT EXISTS] + AS () -> + [DESC=''] + + +-- Create with UDF server +CREATE FUNCTION [IF NOT EXISTS] + AS () RETURNS LANGUAGE + HANDLER = '' ADDRESS = '' + [DESC=''] ``` +| Parameter | Description | +|-----------------------|---------------------------------------------------------------------------------------------------| +| `` | The name of the function. | +| `` | The lambda expression or code snippet defining the function's behavior. | +| `DESC=''` | Description of the UDF.| +| `<`| A list of input parameter names. Separated by comma.| +| `<`| A list of input parameter types. Separated by comma.| +| `` | The return type of the function. | +| `LANGUAGE` | Specifies the language used to write the function. Available values: `python`. | +| `HANDLER = ''` | Specifies the name of the function's handler. | +| `ADDRESS = ''` | Specifies the address of the UDF server. | + ## Examples +### Creating UDF with Lambda Expression + ```sql CREATE FUNCTION a_plus_3 AS (a) -> a+3; @@ -53,3 +77,89 @@ DROP FUNCTION get_v2; DROP TABLE json_table; ``` + +### Creating UDF with UDF Server (Python) + +This example demonstrates how to enable and configure a UDF server in Python: + +1. Enable UDF server support by adding the following parameters to the [query] section in the [databend-query.toml](https://github.com/datafuselabs/databend/blob/main/scripts/distribution/configs/databend-query.toml) configuration file. + +```toml title='databend-query.toml' +[query] +... +enable_udf_server = true +# List the allowed UDF server addresses, separating multiple addresses with commas. +# For example, ['http://0.0.0.0:8815', 'http://example.com'] +udf_server_allow_list = ['http://0.0.0.0:8815'] +... +``` + +2. Define your function. This code defines and runs a UDF server in Python, which exposes a custom function *gcd* for calculating the greatest common divisor of two integers and allows remote execution of this function: + +:::note +The SDK package is not yet available. Prior to its release, please download the 'udf.py' file from https://github.com/datafuselabs/databend/blob/main/tests/udf-server/udf.py and ensure it is saved in the same directory as this Python script. This step is essential for the code to function correctly. +::: + +```python title='udf_server.py' +from udf import * + +@udf( + input_types=["INT", "INT"], + result_type="INT", + skip_null=True, +) +def gcd(x: int, y: int) -> int: + while y != 0: + (x, y) = (y, x % y) + return x + +if __name__ == '__main__': + # create a UDF server listening at '0.0.0.0:8815' + server = UdfServer("0.0.0.0:8815") + # add defined functions + server.add_function(gcd) + # start the UDF server + server.serve() +``` + +`@udf` is a decorator used for defining UDFs in Databend, supporting the following parameters: + +| Parameter | Description | +|--------------|-----------------------------------------------------------------------------------------------------| +| input_types | A list of strings or Arrow data types that specify the input data types. | +| result_type | A string or an Arrow data type that specifies the return value type. | +| name | An optional string specifying the function name. If not provided, the original name will be used. | +| io_threads | Number of I/O threads used per data chunk for I/O bound functions. | +| skip_null | A boolean value specifying whether to skip NULL values. If set to True, NULL values will not be passed to the function, and the corresponding return value is set to NULL. Default is False. | + +This table illustrates the correspondence between Databend data types and their corresponding Python equivalents: + +| Databend Type | Python Type | +|-----------------------|-----------------------| +| BOOLEAN | bool | +| TINYINT (UNSIGNED) | int | +| SMALLINT (UNSIGNED) | int | +| INT (UNSIGNED) | int | +| BIGINT (UNSIGNED) | int | +| FLOAT | float | +| DOUBLE | float | +| DECIMAL | decimal.Decimal | +| DATE | datetime.date | +| TIMESTAMP | datetime.datetime | +| VARCHAR | str | +| VARIANT | any | +| MAP(K,V) | dict | +| ARRAY(T) | list[T] | +| TUPLE(T...) | tuple(T...) | + +3. Run the Python file to start the UDF server: + +```shell +python3 udf_server.py +``` + +4. Register the function *gcd* with the [CREATE FUNCTION](ddl-create-function.md) in Databend: + +```sql +CREATE FUNCTION gcd (INT, INT) RETURNS INT LANGUAGE python HANDLER = 'gcd' ADDRESS = 'http://0.0.0.0:8815'ï¼› +``` \ No newline at end of file diff --git a/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-drop-function.md b/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-drop-function.md index b93365d5f630..5650295b770d 100644 --- a/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-drop-function.md +++ b/docs/doc/14-sql-commands/00-ddl/50-udf/ddl-drop-function.md @@ -4,12 +4,12 @@ description: Drop an existing user-defined function. --- -Drop an existing user-defined function. +Drops a user-defined function. ## Syntax ```sql -DROP FUNCTION [IF EXISTS] +DROP FUNCTION [IF EXISTS] ``` ## Examples @@ -19,4 +19,4 @@ DROP FUNCTION a_plus_3; SELECT a_plus_3(2); ERROR 1105 (HY000): Code: 2602, Text = Unknown Function a_plus_3 (while in analyze select projection). -``` +``` \ No newline at end of file diff --git a/docs/doc/14-sql-commands/00-ddl/50-udf/index.md b/docs/doc/14-sql-commands/00-ddl/50-udf/index.md new file mode 100644 index 000000000000..27ceaed90510 --- /dev/null +++ b/docs/doc/14-sql-commands/00-ddl/50-udf/index.md @@ -0,0 +1,125 @@ +--- +title: User-Defined Function +--- +import IndexOverviewList from '@site/src/components/IndexOverviewList'; + +## What are UDFs? + +User-Defined Functions (UDFs) enable you to define their own custom operations to process data within Databend. They are typically written using lambda expressions or implemented via a UDF server with programming languages such as Python and are executed as part of Databend's query processing pipeline. Advantages of using UDFs include: + +- Customized Data Transformations: UDFs empower you to perform data transformations that may not be achievable through built-in Databend functions alone. This customization is particularly valuable for handling unique data formats or business logic. + +- Performance Optimization: UDFs provide the flexibility to define and fine-tune your own custom functions, enabling you to optimize data processing to meet precise performance requirements. This means you can tailor the code for maximum efficiency, ensuring that your data processing tasks run as efficiently as possible. + +- Code Reusability: UDFs can be reused across multiple queries, saving time and effort in coding and maintaining data processing logic. + +## Managing UDFs + +To manage UDFs in Databend, use the following commands: + + + +## Usage Examples + +This section demonstrates two UDF implementation methods within Databend: one by creating UDFs with lambda expressions and the other by utilizing UDF servers in conjunction with Python. For additional examples of defining UDFs in various programming languages, see [CREATE FUNCTION](ddl-create-function.md). + +### UDF Implementation with Lambda Expression + +This example implements a UDF named *a_plus_3* using a lambda expression: + +```sql +CREATE FUNCTION a_plus_3 AS (a) -> a+3; + +SELECT a_plus_3(2); ++---------+ +| (2 + 3) | ++---------+ +| 5 | ++---------+ +``` + +### UDF Implementation via UDF Server + +This example demonstrates how to enable and configure a UDF server in Python: + +1. Enable UDF server support by adding the following parameters to the [query] section in the [databend-query.toml](https://github.com/datafuselabs/databend/blob/main/scripts/distribution/configs/databend-query.toml) configuration file. + +```toml title='databend-query.toml' +[query] +... +enable_udf_server = true +# List the allowed UDF server addresses, separating multiple addresses with commas. +# For example, ['http://0.0.0.0:8815', 'http://example.com'] +udf_server_allow_list = ['http://0.0.0.0:8815'] +... +``` + +2. Define your function. This code defines and runs a UDF server in Python, which exposes a custom function *gcd* for calculating the greatest common divisor of two integers and allows remote execution of this function: + +:::note +The SDK package is not yet available. Prior to its release, please download the 'udf.py' file from https://github.com/datafuselabs/databend/blob/main/tests/udf-server/udf.py and ensure it is saved in the same directory as this Python script. This step is essential for the code to function correctly. +::: + +```python title='udf_server.py' +from udf import * + +@udf( + input_types=["INT", "INT"], + result_type="INT", + skip_null=True, +) +def gcd(x: int, y: int) -> int: + while y != 0: + (x, y) = (y, x % y) + return x + +if __name__ == '__main__': + # create a UDF server listening at '0.0.0.0:8815' + server = UdfServer("0.0.0.0:8815") + # add defined functions + server.add_function(gcd) + # start the UDF server + server.serve() +``` + +`@udf` is a decorator used for defining UDFs in Databend, supporting the following parameters: + +| Parameter | Description | +|--------------|-----------------------------------------------------------------------------------------------------| +| input_types | A list of strings or Arrow data types that specify the input data types. | +| result_type | A string or an Arrow data type that specifies the return value type. | +| name | An optional string specifying the function name. If not provided, the original name will be used. | +| io_threads | Number of I/O threads used per data chunk for I/O bound functions. | +| skip_null | A boolean value specifying whether to skip NULL values. If set to True, NULL values will not be passed to the function, and the corresponding return value is set to NULL. Default is False. | + +This table illustrates the correspondence between Databend data types and their corresponding Python equivalents: + +| Databend Type | Python Type | +|-----------------------|-----------------------| +| BOOLEAN | bool | +| TINYINT (UNSIGNED) | int | +| SMALLINT (UNSIGNED) | int | +| INT (UNSIGNED) | int | +| BIGINT (UNSIGNED) | int | +| FLOAT | float | +| DOUBLE | float | +| DECIMAL | decimal.Decimal | +| DATE | datetime.date | +| TIMESTAMP | datetime.datetime | +| VARCHAR | str | +| VARIANT | any | +| MAP(K,V) | dict | +| ARRAY(T) | list[T] | +| TUPLE(T...) | tuple(T...) | + +3. Run the Python file to start the UDF server: + +```shell +python3 udf_server.py +``` + +4. Register the function *gcd* with the [CREATE FUNCTION](ddl-create-function.md) in Databend: + +```sql +CREATE FUNCTION gcd (INT, INT) RETURNS INT LANGUAGE python HANDLER = 'gcd' ADDRESS = 'http://0.0.0.0:8815'ï¼› +``` \ No newline at end of file From 510a6dae02af3c8f5571c0186f66a8c695a7517a Mon Sep 17 00:00:00 2001 From: JackTan25 <60096118+JackTan25@users.noreply.github.com> Date: Thu, 21 Sep 2023 18:23:13 +0800 Subject: [PATCH 15/18] feat: support star "*" for merge into (#12906) * add test first * support * and add more tests * fix clippy * fix check * fix check --- .../ast/src/ast/statements/merge_into.rs | 19 +- src/query/ast/src/parser/statement.rs | 85 ++++-- .../interpreters/interpreter_merge_into.rs | 5 +- .../sql/src/planner/binder/merge_into.rs | 256 +++++++++++++----- .../processor_merge_into_not_matched.rs | 3 +- .../base/09_fuse_engine/09_0026_merge_into | 56 ++++ 6 files changed, 327 insertions(+), 97 deletions(-) diff --git a/src/query/ast/src/ast/statements/merge_into.rs b/src/query/ast/src/ast/statements/merge_into.rs index 83291fa1b8ea..caa273e4ebe2 100644 --- a/src/query/ast/src/ast/statements/merge_into.rs +++ b/src/query/ast/src/ast/statements/merge_into.rs @@ -52,7 +52,10 @@ impl Display for MergeUpdateExpr { #[derive(Debug, Clone, PartialEq)] pub enum MatchOperation { - Update { update_list: Vec }, + Update { + update_list: Vec, + is_star: bool, + }, Delete, } @@ -66,6 +69,7 @@ pub struct MatchedClause { pub struct InsertOperation { pub columns: Option>, pub values: Vec, + pub is_star: bool, } #[derive(Debug, Clone, PartialEq)] @@ -116,9 +120,16 @@ impl Display for MergeIntoStmt { write!(f, " THEN ")?; match &match_clause.operation { - MatchOperation::Update { update_list } => { - write!(f, " UPDATE SET ")?; - write_comma_separated_list(f, update_list)?; + MatchOperation::Update { + update_list, + is_star, + } => { + if *is_star { + write!(f, " UPDATE * ")?; + } else { + write!(f, " UPDATE SET ")?; + write_comma_separated_list(f, update_list)?; + } } MatchOperation::Delete => { write!(f, " DELETE ")?; diff --git a/src/query/ast/src/parser/statement.rs b/src/query/ast/src/parser/statement.rs index fd6c453694dd..c3bc3e1f4856 100644 --- a/src/query/ast/src/parser/statement.rs +++ b/src/query/ast/src/parser/statement.rs @@ -2267,40 +2267,75 @@ fn match_operation(i: Input) -> IResult { rule! { UPDATE ~ SET ~ ^#comma_separated_list1(merge_update_expr) }, - |(_, _, update_list)| MatchOperation::Update { update_list }, + |(_, _, update_list)| MatchOperation::Update { + update_list, + is_star: false, + }, + ), + map( + rule! { + UPDATE ~ "*" + }, + |(_, _)| MatchOperation::Update { + update_list: Vec::new(), + is_star: true, + }, ), ))(i) } pub fn unmatch_clause(i: Input) -> IResult { - map( - rule! { - WHEN ~ NOT ~ MATCHED ~ (AND ~ ^#expr)? ~ THEN ~ INSERT ~ ( "(" ~ ^#comma_separated_list1(ident) ~ ^")" )? - ~ VALUES ~ ^#row_values - }, - |(_, _, _, expr_op, _, _, columns_op, _, values)| { - let selection = match expr_op { - Some(e) => Some(e.1), - None => None, - }; - match columns_op { - Some(columns) => MergeOption::Unmatch(UnmatchedClause { - insert_operation: InsertOperation { - columns: Some(columns.1), - values, - }, - selection, - }), - None => MergeOption::Unmatch(UnmatchedClause { + alt(( + map( + rule! { + WHEN ~ NOT ~ MATCHED ~ (AND ~ ^#expr)? ~ THEN ~ INSERT ~ ( "(" ~ ^#comma_separated_list1(ident) ~ ^")" )? + ~ VALUES ~ ^#row_values + }, + |(_, _, _, expr_op, _, _, columns_op, _, values)| { + let selection = match expr_op { + Some(e) => Some(e.1), + None => None, + }; + match columns_op { + Some(columns) => MergeOption::Unmatch(UnmatchedClause { + insert_operation: InsertOperation { + columns: Some(columns.1), + values, + is_star: false, + }, + selection, + }), + None => MergeOption::Unmatch(UnmatchedClause { + insert_operation: InsertOperation { + columns: None, + values, + is_star: false, + }, + selection, + }), + } + }, + ), + map( + rule! { + WHEN ~ NOT ~ MATCHED ~ (AND ~ ^#expr)? ~ THEN ~ INSERT ~ "*" + }, + |(_, _, _, expr_op, _, _, _)| { + let selection = match expr_op { + Some(e) => Some(e.1), + None => None, + }; + MergeOption::Unmatch(UnmatchedClause { insert_operation: InsertOperation { columns: None, - values, + values: Vec::new(), + is_star: true, }, selection, - }), - } - }, - )(i) + }) + }, + ), + ))(i) } pub fn add_column_option(i: Input) -> IResult { diff --git a/src/query/service/src/interpreters/interpreter_merge_into.rs b/src/query/service/src/interpreters/interpreter_merge_into.rs index b3590091cd62..d9f2142f5fc2 100644 --- a/src/query/service/src/interpreters/interpreter_merge_into.rs +++ b/src/query/service/src/interpreters/interpreter_merge_into.rs @@ -179,6 +179,7 @@ impl MergeIntoInterpreter { } else { None }; + let mut values_exprs = Vec::::with_capacity(item.values.len()); for scalar_expr in &item.values { @@ -208,6 +209,7 @@ impl MergeIntoInterpreter { } else { None }; + // update let update_list = if let Some(update_list) = &item.update { // use update_plan to get exprs @@ -224,7 +226,7 @@ impl MergeIntoInterpreter { let col_indices = if item.condition.is_none() { vec![] } else { - // we don't need to real col_indices here, just give a + // we don't need real col_indices here, just give a // dummy index, that's ok. vec![DUMMY_COL_INDEX] }; @@ -235,7 +237,6 @@ impl MergeIntoInterpreter { col_indices, Some(join_output_schema.num_fields()), )?; - let update_list = update_list .iter() .map(|(idx, remote_expr)| { diff --git a/src/query/sql/src/planner/binder/merge_into.rs b/src/query/sql/src/planner/binder/merge_into.rs index b5f967d65c83..987192f7c266 100644 --- a/src/query/sql/src/planner/binder/merge_into.rs +++ b/src/query/sql/src/planner/binder/merge_into.rs @@ -29,6 +29,7 @@ use common_catalog::plan::InternalColumnType; use common_exception::ErrorCode; use common_exception::Result; use common_expression::types::DataType; +use common_expression::FieldIndex; use common_expression::TableSchemaRef; use common_expression::ROW_ID_COL_NAME; use indexmap::IndexMap; @@ -38,15 +39,18 @@ use crate::binder::Binder; use crate::binder::InternalColumnBinding; use crate::normalize_identifier; use crate::optimizer::SExpr; +use crate::plans::BoundColumnRef; use crate::plans::MatchedEvaluator; use crate::plans::MergeInto; use crate::plans::Plan; use crate::plans::UnmatchedEvaluator; use crate::BindContext; +use crate::ColumnBindingBuilder; use crate::ColumnEntry; use crate::IndexType; use crate::ScalarBinder; use crate::ScalarExpr; +use crate::Visibility; // implementation of merge into for now: // use an left outer join for target_source and source. @@ -124,8 +128,66 @@ impl Binder { .await?; // add all left source columns for read + // todo: (JackTan25) do column prune after finish "split expr for target and source" let mut columns_set = left_context.column_set(); + let update_columns_star = if self.has_star_clause(&matched_clauses, &unmatched_clauses) { + // when there are "update *"/"insert *", we need to get the index of correlated columns in source. + let default_target_table_schema = table.schema().remove_computed_fields(); + let mut update_columns = HashMap::with_capacity( + default_target_table_schema + .remove_computed_fields() + .num_fields(), + ); + let source_output_columns = &left_context.columns; + // we use Vec as the value, because if there could be duplicate names + let mut name_map = HashMap::>::new(); + for column in source_output_columns { + name_map + .entry(column.column_name.clone()) + .or_insert_with(|| vec![]) + .push(column.index); + } + + for (field_idx, field) in default_target_table_schema.fields.iter().enumerate() { + let index = match name_map.get(field.name()) { + None => { + return Err(ErrorCode::SemanticError( + format!("can't find {} in source output", field.name).to_string(), + )); + } + Some(indices) => { + if indices.len() != 1 { + return Err(ErrorCode::SemanticError( + format!( + "there should be only one {} in source output,but we get {}", + field.name, + indices.len() + ) + .to_string(), + )); + } else { + indices[0] + } + } + }; + let column = ColumnBindingBuilder::new( + field.name.to_string(), + index, + Box::new(field.data_type().into()), + Visibility::Visible, + ) + .build(); + let col = ScalarExpr::BoundColumnRef(BoundColumnRef { span: None, column }); + update_columns.insert(field_idx, col); + } + Some(update_columns) + } else { + None + }; + + // Todo: (JackTan25) Maybe we can remove bind target_table + // when the target table has been binded in bind_merge_into_source // bind table for target table let (mut target_expr, mut right_context) = self .bind_single_table(&mut left_context, &target_table) @@ -193,6 +255,7 @@ impl Binder { .union(&scalar_binder.bind(join_expr).await?.0.used_columns()) .cloned() .collect(); + let column_entries = self.metadata.read().columns_by_table_index(table_index); let mut field_index_map = HashMap::::new(); // if true, read all columns of target table @@ -204,6 +267,7 @@ impl Binder { field_index_map.insert(idx, used_idx.to_string()); } } + // bind matched clause columns and add update fields and exprs for clause in &matched_clauses { matched_evaluators.push( @@ -212,6 +276,7 @@ impl Binder { clause, &mut columns_set, table_schema.clone(), + update_columns_star.clone(), ) .await?, ); @@ -225,6 +290,7 @@ impl Binder { clause, &mut columns_set, table_schema.clone(), + update_columns_star.clone(), ) .await?, ); @@ -252,6 +318,7 @@ impl Binder { clause: &MatchedClause, columns: &mut HashSet, schema: TableSchemaRef, + update_columns_star: Option>, ) -> Result { let condition = if let Some(expr) = &clause.selection { let (scalar_expr, _) = scalar_binder.bind(expr).await?; @@ -263,42 +330,54 @@ impl Binder { None }; - if let MatchOperation::Update { update_list } = &clause.operation { - let mut update_columns = HashMap::with_capacity(update_list.len()); - for update_expr in update_list { - let (scalar_expr, _) = scalar_binder.bind(&update_expr.expr).await?; - let col_name = - normalize_identifier(&update_expr.name, &self.name_resolution_ctx).name; - let index = schema.index_of(&col_name)?; - - if update_columns.contains_key(&index) { - return Err(ErrorCode::BadArguments(format!( - "Multiple assignments in the single statement to column `{}`", - col_name - ))); - } - - let field = schema.field(index); - if field.computed_expr().is_some() { - return Err(ErrorCode::BadArguments(format!( - "The value specified for computed column '{}' is not allowed", - field.name() - ))); + if let MatchOperation::Update { + update_list, + is_star, + } = &clause.operation + { + if *is_star { + Ok(MatchedEvaluator { + condition, + update: update_columns_star, + }) + } else { + let mut update_columns = HashMap::with_capacity(update_list.len()); + for update_expr in update_list { + let (scalar_expr, _) = scalar_binder.bind(&update_expr.expr).await?; + let col_name = + normalize_identifier(&update_expr.name, &self.name_resolution_ctx).name; + let index = schema.index_of(&col_name)?; + + if update_columns.contains_key(&index) { + return Err(ErrorCode::BadArguments(format!( + "Multiple assignments in the single statement to column `{}`", + col_name + ))); + } + + let field = schema.field(index); + if field.computed_expr().is_some() { + return Err(ErrorCode::BadArguments(format!( + "The value specified for computed column '{}' is not allowed", + field.name() + ))); + } + + if matches!(scalar_expr, ScalarExpr::SubqueryExpr(_)) { + return Err(ErrorCode::Internal( + "update_list in update clause does not support subquery temporarily", + )); + } + update_columns.insert(index, scalar_expr.clone()); } - if matches!(scalar_expr, ScalarExpr::SubqueryExpr(_)) { - return Err(ErrorCode::Internal( - "update_list in update clause does not support subquery temporarily", - )); - } - update_columns.insert(index, scalar_expr.clone()); + Ok(MatchedEvaluator { + condition, + update: Some(update_columns), + }) } - - Ok(MatchedEvaluator { - condition, - update: Some(update_columns), - }) } else { + // delete Ok(MatchedEvaluator { condition, update: None, @@ -312,6 +391,7 @@ impl Binder { clause: &UnmatchedClause, columns: &mut HashSet, table_schema: TableSchemaRef, + update_columns_star: Option>, ) -> Result { let condition = if let Some(expr) = &clause.selection { let (scalar_expr, _) = scalar_binder.bind(expr).await?; @@ -322,42 +402,59 @@ impl Binder { } else { None }; - - if clause.insert_operation.values.is_empty() { - return Err(ErrorCode::SemanticError( - "Values lists must have at least one row".to_string(), - )); - } - - let mut values = Vec::with_capacity(clause.insert_operation.values.len()); - - // we need to get source schema, and use it for filling columns. - let source_schema = if let Some(fields) = clause.insert_operation.columns.clone() { - self.schema_project(&table_schema, &fields)? + if clause.insert_operation.is_star { + let default_schema = table_schema.remove_computed_fields(); + let mut values = Vec::with_capacity(default_schema.num_fields()); + let update_columns_star = update_columns_star.unwrap(); + for idx in 0..default_schema.num_fields() { + values.push(update_columns_star.get(&idx).unwrap().clone()); + } + Ok(UnmatchedEvaluator { + source_schema: Arc::new(Arc::new(default_schema).into()), + condition, + values, + }) } else { - table_schema.clone() - }; - - for (idx, expr) in clause.insert_operation.values.iter().enumerate() { - let (mut scalar_expr, _) = scalar_binder.bind(expr).await?; - // type cast - scalar_expr = wrap_cast_scalar( - &scalar_expr, - &scalar_expr.data_type()?, - &DataType::from(source_schema.field(idx).data_type()), - )?; + if clause.insert_operation.values.is_empty() { + return Err(ErrorCode::SemanticError( + "Values lists must have at least one row".to_string(), + )); + } - values.push(scalar_expr.clone()); - for idx in scalar_expr.used_columns() { - columns.insert(idx); + let mut values = Vec::with_capacity(clause.insert_operation.values.len()); + + // we need to get source schema, and use it for filling columns. + let source_schema = if let Some(fields) = clause.insert_operation.columns.clone() { + self.schema_project(&table_schema, &fields)? + } else { + table_schema.clone() + }; + if clause.insert_operation.values.len() != source_schema.num_fields() { + return Err(ErrorCode::SemanticError( + "insert columns and values are not matched".to_string(), + )); + } + for (idx, expr) in clause.insert_operation.values.iter().enumerate() { + let (mut scalar_expr, _) = scalar_binder.bind(expr).await?; + // type cast + scalar_expr = wrap_cast_scalar( + &scalar_expr, + &scalar_expr.data_type()?, + &DataType::from(source_schema.field(idx).data_type()), + )?; + + values.push(scalar_expr.clone()); + for idx in scalar_expr.used_columns() { + columns.insert(idx); + } } - } - Ok(UnmatchedEvaluator { - source_schema: Arc::new(source_schema.into()), - condition, - values, - }) + Ok(UnmatchedEvaluator { + source_schema: Arc::new(source_schema.into()), + condition, + values, + }) + } } fn find_column_index( @@ -378,7 +475,36 @@ impl Binder { fn has_update(&self, matched_clauses: &Vec) -> bool { for clause in matched_clauses { - if let MatchOperation::Update { update_list: _ } = clause.operation { + if let MatchOperation::Update { + update_list: _, + is_star: _, + } = clause.operation + { + return true; + } + } + false + } + + fn has_star_clause( + &self, + matched_clauses: &Vec, + unmatched_clauses: &Vec, + ) -> bool { + for item in matched_clauses { + if let MatchOperation::Update { + update_list: _, + is_star, + } = item.operation + { + if is_star { + return true; + } + } + } + + for item in unmatched_clauses { + if item.insert_operation.is_star { return true; } } diff --git a/src/query/storages/fuse/src/operations/merge_into/processors/processor_merge_into_not_matched.rs b/src/query/storages/fuse/src/operations/merge_into/processors/processor_merge_into_not_matched.rs index b316fb4a64f1..c8e4d55cb095 100644 --- a/src/query/storages/fuse/src/operations/merge_into/processors/processor_merge_into_not_matched.rs +++ b/src/query/storages/fuse/src/operations/merge_into/processors/processor_merge_into_not_matched.rs @@ -34,7 +34,7 @@ use common_storage::metrics::merge_into::metrics_inc_merge_into_append_blocks_co use itertools::Itertools; use crate::operations::merge_into::mutator::SplitByExprMutator; - +// (source_schema,condition,values_exprs) type UnMatchedExprs = Vec<(DataSchemaRef, Option, Vec)>; struct InsertDataBlockMutation { @@ -65,6 +65,7 @@ impl MergeIntoNotMatchedProcessor { for (idx, item) in unmatched.iter().enumerate() { let eval_projections: HashSet = (input_schema.num_fields()..input_schema.num_fields() + item.2.len()).collect(); + println!("data_schema: {:?}", item.0.clone()); data_schemas.insert(idx, item.0.clone()); ops.push(InsertDataBlockMutation { op: BlockOperator::Map { diff --git a/tests/sqllogictests/suites/base/09_fuse_engine/09_0026_merge_into b/tests/sqllogictests/suites/base/09_fuse_engine/09_0026_merge_into index 0381d8a835b9..d42d90d7ae16 100644 --- a/tests/sqllogictests/suites/base/09_fuse_engine/09_0026_merge_into +++ b/tests/sqllogictests/suites/base/09_fuse_engine/09_0026_merge_into @@ -191,5 +191,61 @@ select * from t1 order by a,b,c; ---- 1 b1 c_5 +## test star for merge into +statement ok +truncate table t1; + +statement ok +truncate table t2; + +query I +select count(*) from t1; +---- +0 + +query I +select count(*) from t2; +---- +0 + +statement ok +insert into t1 values(1,'b1','c1'),(2,'b2','c2'); + +query TTT +select * from t1 order by a,b,c; +---- +1 b1 c1 +2 b2 c2 + +statement ok +insert into t2 values(1,'b3','c3'),(3,'b4','c4'); + +query TTT +select * from t2 order by a,b,c; +---- +1 b3 c3 +3 b4 c4 + +## test insert columns mismatch +statement error 1065 +merge into t1 using (select * from t2 as t2) on t1.a = t2.a when not matched then insert values(t2.a,t2.c); + +statement ok +merge into t1 using (select * from t2 as t2) on t1.a = t2.a when matched then update * when not matched then insert *; + +query TTT +select * from t1 order by a,b,c; +---- +1 b3 c3 +2 b2 c2 +3 b4 c4 + +## test multi same name for star +statement error 1065 +merge into t1 using (select a,b,c,a from t2 as t2) on t1.a = t2.a when matched then update *; + +statement error 1065 +merge into t1 using (select a,b,c,a,b from t2 as t2) on t1.a = t2.a when not matched then insert *; + statement ok set enable_experimental_merge_into = 0; \ No newline at end of file From 80d9e783b40a408aacea65a86288705397dc90e1 Mon Sep 17 00:00:00 2001 From: TCeason <33082201+TCeason@users.noreply.github.com> Date: Thu, 21 Sep 2023 23:20:50 +0800 Subject: [PATCH 16/18] chore(sqlsmith): support select stmt having && modify some err msg (#12959) * chore(sqlsmith): support select stmt having && modify some err msg * fix conversation * sqlsmith support generate window_list * fix ci err --- src/query/expression/src/type_check.rs | 7 +- .../sql/src/planner/semantic/type_check.rs | 93 +++++++++++++------ src/tests/sqlsmith/src/sql_gen/func.rs | 26 +++++- src/tests/sqlsmith/src/sql_gen/query.rs | 37 ++++++-- .../sqlsmith/src/sql_gen/sql_generator.rs | 2 + .../02_0000_function_aggregate_mix | 2 +- 6 files changed, 126 insertions(+), 41 deletions(-) diff --git a/src/query/expression/src/type_check.rs b/src/query/expression/src/type_check.rs index 412c0284fa4f..9c1ee5830be4 100755 --- a/src/query/expression/src/type_check.rs +++ b/src/query/expression/src/type_check.rs @@ -282,12 +282,7 @@ pub fn check_number( ErrorCode::InvalidArgument(format!("Expect {}, but got {}", T::data_type(), origin_ty)) .set_span(span) }), - _ => Err(ErrorCode::InvalidArgument(format!( - "Expect {}, but got {}", - T::data_type(), - origin_ty - )) - .set_span(span)), + _ => Err(ErrorCode::InvalidArgument("Need constant number.").set_span(span)), } } diff --git a/src/query/sql/src/planner/semantic/type_check.rs b/src/query/sql/src/planner/semantic/type_check.rs index 187400e9d297..f248b154e0ad 100644 --- a/src/query/sql/src/planner/semantic/type_check.rs +++ b/src/query/sql/src/planner/semantic/type_check.rs @@ -54,6 +54,7 @@ use common_expression::types::NumberDataType; use common_expression::types::NumberScalar; use common_expression::ColumnIndex; use common_expression::ConstantFolder; +use common_expression::Expr as EExpr; use common_expression::FunctionContext; use common_expression::FunctionKind; use common_expression::RawExpr; @@ -1410,19 +1411,28 @@ impl<'a> TypeChecker<'a> { arg_types: &[DataType], ) -> Result { if args.is_empty() || args.len() > 3 { - return Err(ErrorCode::InvalidArgument( - "Argument number is invalid".to_string(), - )); + return Err(ErrorCode::InvalidArgument(format!( + "Function {:?} only support 1 to 3 arguments", + func_name + ))); } let offset = if args.len() >= 2 { let off = args[1].as_expr()?; - Some(check_number::<_, i64>( - off.span(), - &self.func_ctx, - &off, - &BUILTIN_FUNCTIONS, - )?) + match off { + EExpr::Constant { .. } => Some(check_number::<_, i64>( + off.span(), + &self.func_ctx, + &off, + &BUILTIN_FUNCTIONS, + )?), + _ => { + return Err(ErrorCode::InvalidArgument(format!( + "The second argument to the function {:?} must be a constant", + func_name + ))); + } + } } else { None }; @@ -1473,9 +1483,10 @@ impl<'a> TypeChecker<'a> { Ok(match func_name { "first_value" | "first" => { if args.len() != 1 { - return Err(ErrorCode::InvalidArgument( - "Argument number is invalid".to_string(), - )); + return Err(ErrorCode::InvalidArgument(format!( + "The function {:?} must take one argument", + func_name + ))); } let return_type = arg_types[0].wrap_nullable(); WindowFuncType::NthValue(NthValueFunction { @@ -1486,9 +1497,10 @@ impl<'a> TypeChecker<'a> { } "last_value" | "last" => { if args.len() != 1 { - return Err(ErrorCode::InvalidArgument( - "Argument number is invalid".to_string(), - )); + return Err(ErrorCode::InvalidArgument(format!( + "The function {:?} must take one argument", + func_name + ))); } let return_type = arg_types[0].wrap_nullable(); WindowFuncType::NthValue(NthValueFunction { @@ -1501,17 +1513,24 @@ impl<'a> TypeChecker<'a> { // nth_value if args.len() != 2 { return Err(ErrorCode::InvalidArgument( - "Argument number is invalid".to_string(), + "The function nth_value must take two arguments".to_string(), )); } let return_type = arg_types[0].wrap_nullable(); let n_expr = args[1].as_expr()?; - let n = check_number::<_, u64>( - n_expr.span(), - &self.func_ctx, - &n_expr, - &BUILTIN_FUNCTIONS, - )?; + let n = match n_expr { + EExpr::Constant { .. } => check_number::<_, u64>( + n_expr.span(), + &self.func_ctx, + &n_expr, + &BUILTIN_FUNCTIONS, + )?, + _ => { + return Err(ErrorCode::InvalidArgument( + "The count of `nth_value` must be constant positive integer", + )); + } + }; if n == 0 { return Err(ErrorCode::InvalidArgument( "nth_value should count from 1".to_string(), @@ -1534,12 +1553,21 @@ impl<'a> TypeChecker<'a> { ) -> Result { if args.len() != 1 { return Err(ErrorCode::InvalidArgument( - "Argument number is invalid".to_string(), + "Function ntile can only take one argument".to_string(), )); } let n_expr = args[0].as_expr()?; let return_type = DataType::Number(NumberDataType::UInt64); - let n = check_number::<_, u64>(n_expr.span(), &self.func_ctx, &n_expr, &BUILTIN_FUNCTIONS)?; + let n = match n_expr { + EExpr::Constant { .. } => { + check_number::<_, u64>(n_expr.span(), &self.func_ctx, &n_expr, &BUILTIN_FUNCTIONS)? + } + _ => { + return Err(ErrorCode::InvalidArgument( + "The argument of `ntile` must be constant".to_string(), + )); + } + }; if n == 0 { return Err(ErrorCode::InvalidArgument( "ntile buckets must be greater than 0".to_string(), @@ -1981,7 +2009,7 @@ impl<'a> TypeChecker<'a> { ) .await } - _ => Err(ErrorCode::SemanticError("Only these interval types are currently supported: [year, month, day, hour, minute, second]".to_string()).set_span(span)), + _ => Err(ErrorCode::SemanticError("Only these interval types are currently supported: [year, quarter, month, day, hour, minute, second]".to_string()).set_span(span)), } } @@ -2247,7 +2275,20 @@ impl<'a> TypeChecker<'a> { let box (scalar, _) = self.resolve(args[0]).await?; let expr = scalar.as_expr()?; - check_number::<_, i64>(span, &self.func_ctx, &expr, &BUILTIN_FUNCTIONS)? + match expr { + EExpr::Constant { .. } => check_number::<_, i64>( + span, + &self.func_ctx, + &expr, + &BUILTIN_FUNCTIONS, + )?, + _ => { + return Some(Err(ErrorCode::BadArguments( + "last_query_id argument only support constant", + ) + .set_span(span))); + } + } } }; diff --git a/src/tests/sqlsmith/src/sql_gen/func.rs b/src/tests/sqlsmith/src/sql_gen/func.rs index 22209fb959cf..76b8f6fcaf61 100644 --- a/src/tests/sqlsmith/src/sql_gen/func.rs +++ b/src/tests/sqlsmith/src/sql_gen/func.rs @@ -21,6 +21,7 @@ use common_ast::ast::Window; use common_ast::ast::WindowFrame; use common_ast::ast::WindowFrameBound; use common_ast::ast::WindowFrameUnits; +use common_ast::ast::WindowRef; use common_ast::ast::WindowSpec; use common_expression::types::DataType; use common_expression::types::DecimalDataType::Decimal128; @@ -615,6 +616,27 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { } fn gen_window(&mut self) -> Option { + if self.rng.gen_bool(0.2) && !self.windows_name.is_empty() { + let len = self.windows_name.len(); + let name = if len == 1 { + self.windows_name[0].to_string() + } else { + self.windows_name[self.rng.gen_range(0..=len - 1)].to_string() + }; + Some(Window::WindowReference(WindowRef { + window_name: Identifier { + name, + quote: None, + span: None, + }, + })) + } else { + let window_spec = self.gen_window_spec(); + Some(Window::WindowSpec(window_spec)) + } + } + + pub(crate) fn gen_window_spec(&mut self) -> WindowSpec { let ty = self.gen_data_type(); let expr1 = self.gen_scalar_value(&ty); let expr2 = self.gen_scalar_value(&ty); @@ -633,7 +655,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { nulls_first: Some(true), }, ]; - Some(Window::WindowSpec(WindowSpec { + WindowSpec { existing_window_name: None, partition_by: vec![expr3, expr4], order_by, @@ -646,7 +668,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { end_bound: WindowFrameBound::CurrentRow, }) }, - })) + } } fn gen_func( diff --git a/src/tests/sqlsmith/src/sql_gen/query.rs b/src/tests/sqlsmith/src/sql_gen/query.rs index cd3d652eb7cc..20b61ecd2572 100644 --- a/src/tests/sqlsmith/src/sql_gen/query.rs +++ b/src/tests/sqlsmith/src/sql_gen/query.rs @@ -28,6 +28,7 @@ use common_ast::ast::SelectTarget; use common_ast::ast::SetExpr; use common_ast::ast::TableAlias; use common_ast::ast::TableReference; +use common_ast::ast::WindowDefinition; use common_ast::ast::With; use common_ast::ast::CTE; use common_expression::infer_schema_type; @@ -289,28 +290,52 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { } fn gen_select(&mut self) -> SelectStmt { + self.windows_name.clear(); let from = self.gen_from(); let group_by = self.gen_group_by(); self.group_by = group_by.clone(); + let window_list = self.gen_window_list(); + if let Some(window_list) = window_list { + for window in window_list { + self.windows_name.push(window.name.name) + } + } let select_list = self.gen_select_list(&group_by); let selection = self.gen_selection(); SelectStmt { span: None, // TODO hints: None, - // TODO - distinct: false, + distinct: self.rng.gen_bool(0.7), select_list, from, selection, group_by, - // TODO - having: None, - // TODO - window_list: None, + having: self.gen_selection(), + window_list: self.gen_window_list(), } } + fn gen_window_list(&mut self) -> Option> { + if self.rng.gen_bool(0.1) { + let mut res = vec![]; + for _ in 0..self.rng.gen_range(1..3) { + let name: String = (0..4) + .map(|_| self.rng.sample(Alphanumeric) as char) + .collect(); + let window_name = format!("w_{}", name); + let spec = self.gen_window_spec(); + let window_def = WindowDefinition { + name: Identifier::from_name(window_name), + spec, + }; + res.push(window_def); + } + return Some(res); + } + None + } + fn gen_group_by(&mut self) -> Option { if self.rng.gen_bool(0.8) { return None; diff --git a/src/tests/sqlsmith/src/sql_gen/sql_generator.rs b/src/tests/sqlsmith/src/sql_gen/sql_generator.rs index cfc50082fbde..b7a9b00001e8 100644 --- a/src/tests/sqlsmith/src/sql_gen/sql_generator.rs +++ b/src/tests/sqlsmith/src/sql_gen/sql_generator.rs @@ -48,6 +48,7 @@ pub(crate) struct SqlGenerator<'a, R: Rng> { pub(crate) scalar_func_sigs: Vec, pub(crate) rng: &'a mut R, pub(crate) group_by: Option, + pub(crate) windows_name: Vec, } impl<'a, R: Rng> SqlGenerator<'a, R> { @@ -82,6 +83,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { scalar_func_sigs, rng, group_by: None, + windows_name: vec![], } } } diff --git a/tests/sqllogictests/suites/query/02_function/02_0000_function_aggregate_mix b/tests/sqllogictests/suites/query/02_function/02_0000_function_aggregate_mix index 8cce74ab1dfb..62d74fdf39dd 100644 --- a/tests/sqllogictests/suites/query/02_function/02_0000_function_aggregate_mix +++ b/tests/sqllogictests/suites/query/02_function/02_0000_function_aggregate_mix @@ -323,7 +323,7 @@ select group_array_moving_avg(k), group_array_moving_avg(2)(v) from aggr; ---- [0.09090909090909091,0.2727272727272727,0.45454545454545453,0.6363636363636364,0.8181818181818182,1.0,1.1818181818181819,1.3636363636363635,1.5454545454545454,1.7272727272727273,1.9090909090909092] [5.0,10.0,10.0,10.0,15.0,20.0,22.5,27.5,30.0,30.0,30.0] -statement error Expect UInt64, but got String +statement error Need constant number SELECT group_array_moving_sum('x')(-1130932975.87767); query TTT From 2d61b2f6cee34977598e247226b9c4f7950334e3 Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Thu, 21 Sep 2023 17:46:12 -0700 Subject: [PATCH 17/18] feat(query): support decimal to int (#12967) * feat(query): support decimal to int * feat(query): test --- src/query/expression/src/types/decimal.rs | 14 + src/query/functions/src/scalars/arithmetic.rs | 137 ++++----- src/query/functions/src/scalars/decimal.rs | 167 ++++++++++- .../it/scalars/testdata/function_list.txt | 278 ++++++++++-------- .../11_data_type/11_0006_data_type_decimal | 11 + 5 files changed, 415 insertions(+), 192 deletions(-) diff --git a/src/query/expression/src/types/decimal.rs b/src/query/expression/src/types/decimal.rs index 6e0375d65f5f..4c1f07904483 100644 --- a/src/query/expression/src/types/decimal.rs +++ b/src/query/expression/src/types/decimal.rs @@ -22,6 +22,7 @@ use common_exception::Result; use enum_as_inner::EnumAsInner; use ethnum::i256; use itertools::Itertools; +use num_traits::NumCast; use num_traits::ToPrimitive; use serde::Deserialize; use serde::Serialize; @@ -285,6 +286,8 @@ pub trait Decimal: fn to_float32(self, scale: u8) -> f32; fn to_float64(self, scale: u8) -> f64; + fn to_int(self, scale: u8) -> Option; + fn try_downcast_column(column: &Column) -> Option<(Buffer, DecimalSize)>; fn try_downcast_builder<'a>(builder: &'a mut ColumnBuilder) -> Option<&'a mut Vec>; @@ -414,6 +417,11 @@ impl Decimal for i128 { self as f64 / div } + fn to_int(self, scale: u8) -> Option { + let div = 10i128.checked_pow(scale as u32)?; + num_traits::cast(self / div) + } + fn to_scalar(self, size: DecimalSize) -> DecimalScalar { DecimalScalar::Decimal128(self, size) } @@ -563,6 +571,12 @@ impl Decimal for i256 { self.as_f64() / div } + fn to_int(self, scale: u8) -> Option { + let div = i256::from(10).checked_pow(scale as u32)?; + let (h, l) = (self / div).into_words(); + if h > 0 { None } else { l.to_int(scale) } + } + fn to_scalar(self, size: DecimalSize) -> DecimalScalar { DecimalScalar::Decimal256(self, size) } diff --git a/src/query/functions/src/scalars/arithmetic.rs b/src/query/functions/src/scalars/arithmetic.rs index 129b6f76c990..6238364b4881 100644 --- a/src/query/functions/src/scalars/arithmetic.rs +++ b/src/query/functions/src/scalars/arithmetic.rs @@ -74,6 +74,7 @@ use num_traits::AsPrimitive; use super::arithmetic_modulo::vectorize_modulo; use super::decimal::register_decimal_to_float32; use super::decimal::register_decimal_to_float64; +use super::decimal::register_decimal_to_int; use crate::scalars::decimal::register_decimal_arithmetic; pub fn register(registry: &mut FunctionRegistry) { @@ -582,92 +583,96 @@ pub fn register_number_to_number(registry: &mut FunctionRegistry) { let name = format!("to_{dest_type}").to_lowercase(); if src_type.can_lossless_cast_to(*dest_type) { registry.register_1_arg::, NumberType, _, _>( - &name, - |_, domain| { - let (domain, overflowing) = domain.overflow_cast(); - debug_assert!(!overflowing); - FunctionDomain::Domain(domain) - }, - |val, _| { - val.as_() - }, - ); + &name, + |_, domain| { + let (domain, overflowing) = domain.overflow_cast(); + debug_assert!(!overflowing); + FunctionDomain::Domain(domain) + }, + |val, _| { + val.as_() + }, + ); } else { registry.register_passthrough_nullable_1_arg::, NumberType, _, _>( - &name, - |_, domain| { - let (domain, overflowing) = domain.overflow_cast(); - if overflowing { - FunctionDomain::MayThrow - } else { - FunctionDomain::Domain(domain) - } - }, - vectorize_with_builder_1_arg::, NumberType>( - move |val, output, ctx| { - match num_traits::cast::cast(val) { - Some(val) => output.push(val), - None => { - ctx.set_error(output.len(),"number overflowed"); - output.push(DEST_TYPE::default()); + &name, + |_, domain| { + let (domain, overflowing) = domain.overflow_cast(); + if overflowing { + FunctionDomain::MayThrow + } else { + FunctionDomain::Domain(domain) + } }, - } - } - ), - ); + vectorize_with_builder_1_arg::, NumberType>( + move |val, output, ctx| { + match num_traits::cast::cast(val) { + Some(val) => output.push(val), + None => { + ctx.set_error(output.len(),"number overflowed"); + output.push(DEST_TYPE::default()); + }, + } + } + ), + ); } let name = format!("try_to_{dest_type}").to_lowercase(); if src_type.can_lossless_cast_to(*dest_type) { registry.register_combine_nullable_1_arg::, NumberType, _, _>( - &name, - |_, domain| { - let (domain, overflowing) = domain.overflow_cast(); - debug_assert!(!overflowing); - FunctionDomain::Domain(NullableDomain { - has_null: false, - value: Some(Box::new( - domain, - )), - }) - }, - vectorize_1_arg::, NullableType>>(|val, _| { - Some(val.as_()) - }) - ); + &name, + |_, domain| { + let (domain, overflowing) = domain.overflow_cast(); + debug_assert!(!overflowing); + FunctionDomain::Domain(NullableDomain { + has_null: false, + value: Some(Box::new( + domain, + )), + }) + }, + vectorize_1_arg::, NullableType>>(|val, _| { + Some(val.as_()) + }) + ); } else { registry.register_combine_nullable_1_arg::, NumberType, _, _>( - &name, - |_, domain| { - let (domain, overflowing) = domain.overflow_cast(); - FunctionDomain::Domain(NullableDomain { - has_null: overflowing, - value: Some(Box::new( - domain, - )), - }) - }, - vectorize_with_builder_1_arg::, NullableType>>( - |val, output, _| { - if let Some(new_val) = num_traits::cast::cast(val) { - output.push(new_val); - } else { - output.push_null(); - } - } - ), - ); + &name, + |_, domain| { + let (domain, overflowing) = domain.overflow_cast(); + FunctionDomain::Domain(NullableDomain { + has_null: overflowing, + value: Some(Box::new( + domain, + )), + }) + }, + vectorize_with_builder_1_arg::, NullableType>>( + |val, output, _| { + if let Some(new_val) = num_traits::cast::cast(val) { + output.push(new_val); + } else { + output.push_null(); + } + } + ), + ); } } }), NumberClass::Decimal128 => { - // todo(youngsofun): add decimal try_cast and decimal to int + // todo(youngsofun): add decimal try_cast and decimal to int and float if matches!(dest_type, NumberDataType::Float32) { register_decimal_to_float32(registry); } if matches!(dest_type, NumberDataType::Float64) { register_decimal_to_float64(registry); } + + with_number_mapped_type!(|DEST_TYPE| match dest_type { + NumberDataType::DEST_TYPE => register_decimal_to_int::(registry), + }) } NumberClass::Decimal256 => { // already registered in Decimal128 branch diff --git a/src/query/functions/src/scalars/decimal.rs b/src/query/functions/src/scalars/decimal.rs index 8538e9c480cf..ee5ae09d755e 100644 --- a/src/query/functions/src/scalars/decimal.rs +++ b/src/query/functions/src/scalars/decimal.rs @@ -627,7 +627,7 @@ pub fn register(registry: &mut FunctionRegistry) { } pub(crate) fn register_decimal_to_float64(registry: &mut FunctionRegistry) { - registry.register_function_factory("to_float64", |_params, args_type| { + let factory = |_params: &[usize], args_type: &[DataType]| { if args_type.len() != 1 { return None; } @@ -638,7 +638,7 @@ pub(crate) fn register_decimal_to_float64(registry: &mut FunctionRegistry) { return None; } - Some(Arc::new(Function { + let function = Function { signature: FunctionSignature { name: "to_float64".to_string(), args_type: vec![arg_type.clone()], @@ -661,12 +661,32 @@ pub(crate) fn register_decimal_to_float64(registry: &mut FunctionRegistry) { }), eval: Box::new(move |args, tx| decimal_to_float64(&args[0], arg_type.clone(), tx)), }, - })) + }; + + Some(function) + }; + + registry.register_function_factory("to_float64", move |params, args_type| { + Some(Arc::new(factory(params, args_type)?)) + }); + registry.register_function_factory("to_float64", move |params, args_type| { + let f = factory(params, args_type)?; + Some(Arc::new(f.passthrough_nullable())) + }); + registry.register_function_factory("try_to_float64", move |params, args_type| { + let mut f = factory(params, args_type)?; + f.signature.name = "try_to_float64".to_string(); + Some(Arc::new(f.error_to_null())) + }); + registry.register_function_factory("try_to_float64", move |params, args_type| { + let mut f = factory(params, args_type)?; + f.signature.name = "try_to_float64".to_string(); + Some(Arc::new(f.error_to_null().passthrough_nullable())) }); } pub(crate) fn register_decimal_to_float32(registry: &mut FunctionRegistry) { - registry.register_function_factory("to_float32", |_params, args_type| { + let factory = |_params: &[usize], args_type: &[DataType]| { if args_type.len() != 1 { return None; } @@ -676,7 +696,7 @@ pub(crate) fn register_decimal_to_float32(registry: &mut FunctionRegistry) { return None; } - Some(Arc::new(Function { + let function = Function { signature: FunctionSignature { name: "to_float32".to_string(), args_type: vec![arg_type.clone()], @@ -699,7 +719,79 @@ pub(crate) fn register_decimal_to_float32(registry: &mut FunctionRegistry) { }), eval: Box::new(move |args, tx| decimal_to_float32(&args[0], arg_type.clone(), tx)), }, - })) + }; + + Some(function) + }; + + registry.register_function_factory("to_float32", move |params, args_type| { + Some(Arc::new(factory(params, args_type)?)) + }); + registry.register_function_factory("to_float32", move |params, args_type| { + let f = factory(params, args_type)?; + Some(Arc::new(f.passthrough_nullable())) + }); + registry.register_function_factory("try_to_float32", move |params, args_type| { + let mut f = factory(params, args_type)?; + f.signature.name = "try_to_float32".to_string(); + Some(Arc::new(f.error_to_null())) + }); + registry.register_function_factory("try_to_float32", move |params, args_type| { + let mut f = factory(params, args_type)?; + f.signature.name = "try_to_float32".to_string(); + Some(Arc::new(f.error_to_null().passthrough_nullable())) + }); +} + +pub(crate) fn register_decimal_to_int(registry: &mut FunctionRegistry) { + if T::data_type().is_float() { + return; + } + let name = format!("to_{}", T::data_type().to_string().to_lowercase()); + let try_name = format!("try_to_{}", T::data_type().to_string().to_lowercase()); + + let factory = |_params: &[usize], args_type: &[DataType]| { + if args_type.len() != 1 { + return None; + } + + let name = format!("to_{}", T::data_type().to_string().to_lowercase()); + let arg_type = args_type[0].remove_nullable(); + if !arg_type.is_decimal() { + return None; + } + + let function = Function { + signature: FunctionSignature { + name, + args_type: vec![arg_type.clone()], + return_type: DataType::Number(T::data_type()), + }, + eval: FunctionEval::Scalar { + calc_domain: Box::new(|_, _| FunctionDomain::MayThrow), + eval: Box::new(move |args, tx| decimal_to_int::(&args[0], arg_type.clone(), tx)), + }, + }; + + Some(function) + }; + + registry.register_function_factory(&name, move |params, args_type| { + Some(Arc::new(factory(params, args_type)?)) + }); + registry.register_function_factory(&name, move |params, args_type| { + let f = factory(params, args_type)?; + Some(Arc::new(f.passthrough_nullable())) + }); + registry.register_function_factory(&try_name, move |params, args_type| { + let mut f = factory(params, args_type)?; + f.signature.name = format!("try_to_{}", T::data_type().to_string().to_lowercase()); + Some(Arc::new(f.error_to_null())) + }); + registry.register_function_factory(&try_name, move |params, args_type| { + let mut f = factory(params, args_type)?; + f.signature.name = format!("try_to_{}", T::data_type().to_string().to_lowercase()); + Some(Arc::new(f.error_to_null().passthrough_nullable())) }); } @@ -1309,3 +1401,66 @@ fn decimal_to_float32( Value::Column(result) } } + +fn decimal_to_int( + arg: &ValueRef, + from_type: DataType, + ctx: &mut EvalContext, +) -> Value { + let mut is_scalar = false; + let column = match arg { + ValueRef::Column(column) => column.clone(), + ValueRef::Scalar(s) => { + is_scalar = true; + let builder = ColumnBuilder::repeat(s, 1, &from_type); + builder.build() + } + }; + + let from_type = from_type.as_decimal().unwrap(); + + let result = match from_type { + DecimalDataType::Decimal128(_) => { + let (buffer, from_size) = i128::try_downcast_column(&column).unwrap(); + + let mut values = Vec::with_capacity(ctx.num_rows); + + for (i, x) in buffer.iter().enumerate() { + let x = x.to_int(from_size.scale); + match x { + Some(x) => values.push(x), + None => { + ctx.set_error(i, "decimal cast to int overflow"); + values.push(T::default()) + } + } + } + + NumberType::::upcast_column(Buffer::from(values)) + } + + DecimalDataType::Decimal256(_) => { + let (buffer, from_size) = i256::try_downcast_column(&column).unwrap(); + let mut values = Vec::with_capacity(ctx.num_rows); + + for (i, x) in buffer.iter().enumerate() { + let x = x.to_int(from_size.scale); + match x { + Some(x) => values.push(x), + None => { + ctx.set_error(i, "decimal cast to int overflow"); + values.push(T::default()) + } + } + } + NumberType::::upcast_column(Buffer::from(values)) + } + }; + + if is_scalar { + let scalar = result.index(0).unwrap(); + Value::Scalar(scalar.to_owned()) + } else { + Value::Column(result) + } +} diff --git a/src/query/functions/tests/it/scalars/testdata/function_list.txt b/src/query/functions/tests/it/scalars/testdata/function_list.txt index e898b283a63b..cc2fc8b735b7 100644 --- a/src/query/functions/tests/it/scalars/testdata/function_list.txt +++ b/src/query/functions/tests/it/scalars/testdata/function_list.txt @@ -3219,10 +3219,11 @@ Functions overloads: 18 to_float32(Int64) :: Float32 19 to_float32(Int64 NULL) :: Float32 NULL 20 to_float32 FACTORY -21 to_float32(Float64) :: Float32 -22 to_float32(Float64 NULL) :: Float32 NULL -23 to_float32(Boolean) :: Float32 -24 to_float32(Boolean NULL) :: Float32 NULL +21 to_float32 FACTORY +22 to_float32(Float64) :: Float32 +23 to_float32(Float64 NULL) :: Float32 NULL +24 to_float32(Boolean) :: Float32 +25 to_float32(Boolean NULL) :: Float32 NULL 0 to_float64(Variant) :: Float64 1 to_float64(Variant NULL) :: Float64 NULL 2 to_float64(String) :: Float64 @@ -3244,10 +3245,11 @@ Functions overloads: 18 to_float64(Int64) :: Float64 19 to_float64(Int64 NULL) :: Float64 NULL 20 to_float64 FACTORY -21 to_float64(Float32) :: Float64 -22 to_float64(Float32 NULL) :: Float64 NULL -23 to_float64(Boolean) :: Float64 -24 to_float64(Boolean NULL) :: Float64 NULL +21 to_float64 FACTORY +22 to_float64(Float32) :: Float64 +23 to_float64(Float32 NULL) :: Float64 NULL +24 to_float64(Boolean) :: Float64 +25 to_float64(Boolean NULL) :: Float64 NULL 0 to_hour(Timestamp) :: UInt8 1 to_hour(Timestamp NULL) :: UInt8 NULL 0 to_int16(Variant) :: Int16 @@ -3268,12 +3270,14 @@ Functions overloads: 15 to_int16(UInt64 NULL) :: Int16 NULL 16 to_int16(Int64) :: Int16 17 to_int16(Int64 NULL) :: Int16 NULL -18 to_int16(Float32) :: Int16 -19 to_int16(Float32 NULL) :: Int16 NULL -20 to_int16(Float64) :: Int16 -21 to_int16(Float64 NULL) :: Int16 NULL -22 to_int16(Boolean) :: Int16 -23 to_int16(Boolean NULL) :: Int16 NULL +18 to_int16 FACTORY +19 to_int16 FACTORY +20 to_int16(Float32) :: Int16 +21 to_int16(Float32 NULL) :: Int16 NULL +22 to_int16(Float64) :: Int16 +23 to_int16(Float64 NULL) :: Int16 NULL +24 to_int16(Boolean) :: Int16 +25 to_int16(Boolean NULL) :: Int16 NULL 0 to_int32(Variant) :: Int32 1 to_int32(Variant NULL) :: Int32 NULL 2 to_int32(String) :: Int32 @@ -3292,12 +3296,14 @@ Functions overloads: 15 to_int32(UInt64 NULL) :: Int32 NULL 16 to_int32(Int64) :: Int32 17 to_int32(Int64 NULL) :: Int32 NULL -18 to_int32(Float32) :: Int32 -19 to_int32(Float32 NULL) :: Int32 NULL -20 to_int32(Float64) :: Int32 -21 to_int32(Float64 NULL) :: Int32 NULL -22 to_int32(Boolean) :: Int32 -23 to_int32(Boolean NULL) :: Int32 NULL +18 to_int32 FACTORY +19 to_int32 FACTORY +20 to_int32(Float32) :: Int32 +21 to_int32(Float32 NULL) :: Int32 NULL +22 to_int32(Float64) :: Int32 +23 to_int32(Float64 NULL) :: Int32 NULL +24 to_int32(Boolean) :: Int32 +25 to_int32(Boolean NULL) :: Int32 NULL 0 to_int64(Variant) :: Int64 1 to_int64(Variant NULL) :: Int64 NULL 2 to_int64(String) :: Int64 @@ -3316,16 +3322,18 @@ Functions overloads: 15 to_int64(Int32 NULL) :: Int64 NULL 16 to_int64(UInt64) :: Int64 17 to_int64(UInt64 NULL) :: Int64 NULL -18 to_int64(Float32) :: Int64 -19 to_int64(Float32 NULL) :: Int64 NULL -20 to_int64(Float64) :: Int64 -21 to_int64(Float64 NULL) :: Int64 NULL -22 to_int64(Boolean) :: Int64 -23 to_int64(Boolean NULL) :: Int64 NULL -24 to_int64(Date) :: Int64 -25 to_int64(Date NULL) :: Int64 NULL -26 to_int64(Timestamp) :: Int64 -27 to_int64(Timestamp NULL) :: Int64 NULL +18 to_int64 FACTORY +19 to_int64 FACTORY +20 to_int64(Float32) :: Int64 +21 to_int64(Float32 NULL) :: Int64 NULL +22 to_int64(Float64) :: Int64 +23 to_int64(Float64 NULL) :: Int64 NULL +24 to_int64(Boolean) :: Int64 +25 to_int64(Boolean NULL) :: Int64 NULL +26 to_int64(Date) :: Int64 +27 to_int64(Date NULL) :: Int64 NULL +28 to_int64(Timestamp) :: Int64 +29 to_int64(Timestamp NULL) :: Int64 NULL 0 to_int8(Variant) :: Int8 1 to_int8(Variant NULL) :: Int8 NULL 2 to_int8(String) :: Int8 @@ -3344,12 +3352,14 @@ Functions overloads: 15 to_int8(UInt64 NULL) :: Int8 NULL 16 to_int8(Int64) :: Int8 17 to_int8(Int64 NULL) :: Int8 NULL -18 to_int8(Float32) :: Int8 -19 to_int8(Float32 NULL) :: Int8 NULL -20 to_int8(Float64) :: Int8 -21 to_int8(Float64 NULL) :: Int8 NULL -22 to_int8(Boolean) :: Int8 -23 to_int8(Boolean NULL) :: Int8 NULL +18 to_int8 FACTORY +19 to_int8 FACTORY +20 to_int8(Float32) :: Int8 +21 to_int8(Float32 NULL) :: Int8 NULL +22 to_int8(Float64) :: Int8 +23 to_int8(Float64 NULL) :: Int8 NULL +24 to_int8(Boolean) :: Int8 +25 to_int8(Boolean NULL) :: Int8 NULL 0 to_minute(Timestamp) :: UInt8 1 to_minute(Timestamp NULL) :: UInt8 NULL 0 to_monday(Date) :: Date @@ -3467,12 +3477,14 @@ Functions overloads: 15 to_uint16(UInt64 NULL) :: UInt16 NULL 16 to_uint16(Int64) :: UInt16 17 to_uint16(Int64 NULL) :: UInt16 NULL -18 to_uint16(Float32) :: UInt16 -19 to_uint16(Float32 NULL) :: UInt16 NULL -20 to_uint16(Float64) :: UInt16 -21 to_uint16(Float64 NULL) :: UInt16 NULL -22 to_uint16(Boolean) :: UInt16 -23 to_uint16(Boolean NULL) :: UInt16 NULL +18 to_uint16 FACTORY +19 to_uint16 FACTORY +20 to_uint16(Float32) :: UInt16 +21 to_uint16(Float32 NULL) :: UInt16 NULL +22 to_uint16(Float64) :: UInt16 +23 to_uint16(Float64 NULL) :: UInt16 NULL +24 to_uint16(Boolean) :: UInt16 +25 to_uint16(Boolean NULL) :: UInt16 NULL 0 to_uint32(Variant) :: UInt32 1 to_uint32(Variant NULL) :: UInt32 NULL 2 to_uint32(String) :: UInt32 @@ -3491,12 +3503,14 @@ Functions overloads: 15 to_uint32(UInt64 NULL) :: UInt32 NULL 16 to_uint32(Int64) :: UInt32 17 to_uint32(Int64 NULL) :: UInt32 NULL -18 to_uint32(Float32) :: UInt32 -19 to_uint32(Float32 NULL) :: UInt32 NULL -20 to_uint32(Float64) :: UInt32 -21 to_uint32(Float64 NULL) :: UInt32 NULL -22 to_uint32(Boolean) :: UInt32 -23 to_uint32(Boolean NULL) :: UInt32 NULL +18 to_uint32 FACTORY +19 to_uint32 FACTORY +20 to_uint32(Float32) :: UInt32 +21 to_uint32(Float32 NULL) :: UInt32 NULL +22 to_uint32(Float64) :: UInt32 +23 to_uint32(Float64 NULL) :: UInt32 NULL +24 to_uint32(Boolean) :: UInt32 +25 to_uint32(Boolean NULL) :: UInt32 NULL 0 to_uint64(Variant) :: UInt64 1 to_uint64(Variant NULL) :: UInt64 NULL 2 to_uint64(String) :: UInt64 @@ -3515,12 +3529,14 @@ Functions overloads: 15 to_uint64(Int32 NULL) :: UInt64 NULL 16 to_uint64(Int64) :: UInt64 17 to_uint64(Int64 NULL) :: UInt64 NULL -18 to_uint64(Float32) :: UInt64 -19 to_uint64(Float32 NULL) :: UInt64 NULL -20 to_uint64(Float64) :: UInt64 -21 to_uint64(Float64 NULL) :: UInt64 NULL -22 to_uint64(Boolean) :: UInt64 -23 to_uint64(Boolean NULL) :: UInt64 NULL +18 to_uint64 FACTORY +19 to_uint64 FACTORY +20 to_uint64(Float32) :: UInt64 +21 to_uint64(Float32 NULL) :: UInt64 NULL +22 to_uint64(Float64) :: UInt64 +23 to_uint64(Float64 NULL) :: UInt64 NULL +24 to_uint64(Boolean) :: UInt64 +25 to_uint64(Boolean NULL) :: UInt64 NULL 0 to_uint8(Variant) :: UInt8 1 to_uint8(Variant NULL) :: UInt8 NULL 2 to_uint8(String) :: UInt8 @@ -3539,12 +3555,14 @@ Functions overloads: 15 to_uint8(UInt64 NULL) :: UInt8 NULL 16 to_uint8(Int64) :: UInt8 17 to_uint8(Int64 NULL) :: UInt8 NULL -18 to_uint8(Float32) :: UInt8 -19 to_uint8(Float32 NULL) :: UInt8 NULL -20 to_uint8(Float64) :: UInt8 -21 to_uint8(Float64 NULL) :: UInt8 NULL -22 to_uint8(Boolean) :: UInt8 -23 to_uint8(Boolean NULL) :: UInt8 NULL +18 to_uint8 FACTORY +19 to_uint8 FACTORY +20 to_uint8(Float32) :: UInt8 +21 to_uint8(Float32 NULL) :: UInt8 NULL +22 to_uint8(Float64) :: UInt8 +23 to_uint8(Float64 NULL) :: UInt8 NULL +24 to_uint8(Boolean) :: UInt8 +25 to_uint8(Boolean NULL) :: UInt8 NULL 0 to_unix_timestamp(Timestamp) :: Int64 1 to_unix_timestamp(Timestamp NULL) :: Int64 NULL 0 to_variant(T0) :: Variant @@ -3681,10 +3699,12 @@ Functions overloads: 17 try_to_float32(UInt64 NULL) :: Float32 NULL 18 try_to_float32(Int64) :: Float32 NULL 19 try_to_float32(Int64 NULL) :: Float32 NULL -20 try_to_float32(Float64) :: Float32 NULL -21 try_to_float32(Float64 NULL) :: Float32 NULL -22 try_to_float32(Boolean) :: Float32 NULL -23 try_to_float32(Boolean NULL) :: Float32 NULL +20 try_to_float32 FACTORY +21 try_to_float32 FACTORY +22 try_to_float32(Float64) :: Float32 NULL +23 try_to_float32(Float64 NULL) :: Float32 NULL +24 try_to_float32(Boolean) :: Float32 NULL +25 try_to_float32(Boolean NULL) :: Float32 NULL 0 try_to_float64(Variant) :: Float64 NULL 1 try_to_float64(Variant NULL) :: Float64 NULL 2 try_to_float64(String) :: Float64 NULL @@ -3705,10 +3725,12 @@ Functions overloads: 17 try_to_float64(UInt64 NULL) :: Float64 NULL 18 try_to_float64(Int64) :: Float64 NULL 19 try_to_float64(Int64 NULL) :: Float64 NULL -20 try_to_float64(Float32) :: Float64 NULL -21 try_to_float64(Float32 NULL) :: Float64 NULL -22 try_to_float64(Boolean) :: Float64 NULL -23 try_to_float64(Boolean NULL) :: Float64 NULL +20 try_to_float64 FACTORY +21 try_to_float64 FACTORY +22 try_to_float64(Float32) :: Float64 NULL +23 try_to_float64(Float32 NULL) :: Float64 NULL +24 try_to_float64(Boolean) :: Float64 NULL +25 try_to_float64(Boolean NULL) :: Float64 NULL 0 try_to_int16(Variant) :: Int16 NULL 1 try_to_int16(Variant NULL) :: Int16 NULL 2 try_to_int16(String) :: Int16 NULL @@ -3727,12 +3749,14 @@ Functions overloads: 15 try_to_int16(UInt64 NULL) :: Int16 NULL 16 try_to_int16(Int64) :: Int16 NULL 17 try_to_int16(Int64 NULL) :: Int16 NULL -18 try_to_int16(Float32) :: Int16 NULL -19 try_to_int16(Float32 NULL) :: Int16 NULL -20 try_to_int16(Float64) :: Int16 NULL -21 try_to_int16(Float64 NULL) :: Int16 NULL -22 try_to_int16(Boolean) :: Int16 NULL -23 try_to_int16(Boolean NULL) :: Int16 NULL +18 try_to_int16 FACTORY +19 try_to_int16 FACTORY +20 try_to_int16(Float32) :: Int16 NULL +21 try_to_int16(Float32 NULL) :: Int16 NULL +22 try_to_int16(Float64) :: Int16 NULL +23 try_to_int16(Float64 NULL) :: Int16 NULL +24 try_to_int16(Boolean) :: Int16 NULL +25 try_to_int16(Boolean NULL) :: Int16 NULL 0 try_to_int32(Variant) :: Int32 NULL 1 try_to_int32(Variant NULL) :: Int32 NULL 2 try_to_int32(String) :: Int32 NULL @@ -3751,12 +3775,14 @@ Functions overloads: 15 try_to_int32(UInt64 NULL) :: Int32 NULL 16 try_to_int32(Int64) :: Int32 NULL 17 try_to_int32(Int64 NULL) :: Int32 NULL -18 try_to_int32(Float32) :: Int32 NULL -19 try_to_int32(Float32 NULL) :: Int32 NULL -20 try_to_int32(Float64) :: Int32 NULL -21 try_to_int32(Float64 NULL) :: Int32 NULL -22 try_to_int32(Boolean) :: Int32 NULL -23 try_to_int32(Boolean NULL) :: Int32 NULL +18 try_to_int32 FACTORY +19 try_to_int32 FACTORY +20 try_to_int32(Float32) :: Int32 NULL +21 try_to_int32(Float32 NULL) :: Int32 NULL +22 try_to_int32(Float64) :: Int32 NULL +23 try_to_int32(Float64 NULL) :: Int32 NULL +24 try_to_int32(Boolean) :: Int32 NULL +25 try_to_int32(Boolean NULL) :: Int32 NULL 0 try_to_int64(Variant) :: Int64 NULL 1 try_to_int64(Variant NULL) :: Int64 NULL 2 try_to_int64(String) :: Int64 NULL @@ -3775,16 +3801,18 @@ Functions overloads: 15 try_to_int64(Int32 NULL) :: Int64 NULL 16 try_to_int64(UInt64) :: Int64 NULL 17 try_to_int64(UInt64 NULL) :: Int64 NULL -18 try_to_int64(Float32) :: Int64 NULL -19 try_to_int64(Float32 NULL) :: Int64 NULL -20 try_to_int64(Float64) :: Int64 NULL -21 try_to_int64(Float64 NULL) :: Int64 NULL -22 try_to_int64(Boolean) :: Int64 NULL -23 try_to_int64(Boolean NULL) :: Int64 NULL -24 try_to_int64(Date) :: Int64 NULL -25 try_to_int64(Date NULL) :: Int64 NULL -26 try_to_int64(Timestamp) :: Int64 NULL -27 try_to_int64(Timestamp NULL) :: Int64 NULL +18 try_to_int64 FACTORY +19 try_to_int64 FACTORY +20 try_to_int64(Float32) :: Int64 NULL +21 try_to_int64(Float32 NULL) :: Int64 NULL +22 try_to_int64(Float64) :: Int64 NULL +23 try_to_int64(Float64 NULL) :: Int64 NULL +24 try_to_int64(Boolean) :: Int64 NULL +25 try_to_int64(Boolean NULL) :: Int64 NULL +26 try_to_int64(Date) :: Int64 NULL +27 try_to_int64(Date NULL) :: Int64 NULL +28 try_to_int64(Timestamp) :: Int64 NULL +29 try_to_int64(Timestamp NULL) :: Int64 NULL 0 try_to_int8(Variant) :: Int8 NULL 1 try_to_int8(Variant NULL) :: Int8 NULL 2 try_to_int8(String) :: Int8 NULL @@ -3803,12 +3831,14 @@ Functions overloads: 15 try_to_int8(UInt64 NULL) :: Int8 NULL 16 try_to_int8(Int64) :: Int8 NULL 17 try_to_int8(Int64 NULL) :: Int8 NULL -18 try_to_int8(Float32) :: Int8 NULL -19 try_to_int8(Float32 NULL) :: Int8 NULL -20 try_to_int8(Float64) :: Int8 NULL -21 try_to_int8(Float64 NULL) :: Int8 NULL -22 try_to_int8(Boolean) :: Int8 NULL -23 try_to_int8(Boolean NULL) :: Int8 NULL +18 try_to_int8 FACTORY +19 try_to_int8 FACTORY +20 try_to_int8(Float32) :: Int8 NULL +21 try_to_int8(Float32 NULL) :: Int8 NULL +22 try_to_int8(Float64) :: Int8 NULL +23 try_to_int8(Float64 NULL) :: Int8 NULL +24 try_to_int8(Boolean) :: Int8 NULL +25 try_to_int8(Boolean NULL) :: Int8 NULL 0 try_to_string(Variant) :: String NULL 1 try_to_string(Variant NULL) :: String NULL 2 try_to_string(UInt8) :: String NULL @@ -3863,12 +3893,14 @@ Functions overloads: 15 try_to_uint16(UInt64 NULL) :: UInt16 NULL 16 try_to_uint16(Int64) :: UInt16 NULL 17 try_to_uint16(Int64 NULL) :: UInt16 NULL -18 try_to_uint16(Float32) :: UInt16 NULL -19 try_to_uint16(Float32 NULL) :: UInt16 NULL -20 try_to_uint16(Float64) :: UInt16 NULL -21 try_to_uint16(Float64 NULL) :: UInt16 NULL -22 try_to_uint16(Boolean) :: UInt16 NULL -23 try_to_uint16(Boolean NULL) :: UInt16 NULL +18 try_to_uint16 FACTORY +19 try_to_uint16 FACTORY +20 try_to_uint16(Float32) :: UInt16 NULL +21 try_to_uint16(Float32 NULL) :: UInt16 NULL +22 try_to_uint16(Float64) :: UInt16 NULL +23 try_to_uint16(Float64 NULL) :: UInt16 NULL +24 try_to_uint16(Boolean) :: UInt16 NULL +25 try_to_uint16(Boolean NULL) :: UInt16 NULL 0 try_to_uint32(Variant) :: UInt32 NULL 1 try_to_uint32(Variant NULL) :: UInt32 NULL 2 try_to_uint32(String) :: UInt32 NULL @@ -3887,12 +3919,14 @@ Functions overloads: 15 try_to_uint32(UInt64 NULL) :: UInt32 NULL 16 try_to_uint32(Int64) :: UInt32 NULL 17 try_to_uint32(Int64 NULL) :: UInt32 NULL -18 try_to_uint32(Float32) :: UInt32 NULL -19 try_to_uint32(Float32 NULL) :: UInt32 NULL -20 try_to_uint32(Float64) :: UInt32 NULL -21 try_to_uint32(Float64 NULL) :: UInt32 NULL -22 try_to_uint32(Boolean) :: UInt32 NULL -23 try_to_uint32(Boolean NULL) :: UInt32 NULL +18 try_to_uint32 FACTORY +19 try_to_uint32 FACTORY +20 try_to_uint32(Float32) :: UInt32 NULL +21 try_to_uint32(Float32 NULL) :: UInt32 NULL +22 try_to_uint32(Float64) :: UInt32 NULL +23 try_to_uint32(Float64 NULL) :: UInt32 NULL +24 try_to_uint32(Boolean) :: UInt32 NULL +25 try_to_uint32(Boolean NULL) :: UInt32 NULL 0 try_to_uint64(Variant) :: UInt64 NULL 1 try_to_uint64(Variant NULL) :: UInt64 NULL 2 try_to_uint64(String) :: UInt64 NULL @@ -3911,12 +3945,14 @@ Functions overloads: 15 try_to_uint64(Int32 NULL) :: UInt64 NULL 16 try_to_uint64(Int64) :: UInt64 NULL 17 try_to_uint64(Int64 NULL) :: UInt64 NULL -18 try_to_uint64(Float32) :: UInt64 NULL -19 try_to_uint64(Float32 NULL) :: UInt64 NULL -20 try_to_uint64(Float64) :: UInt64 NULL -21 try_to_uint64(Float64 NULL) :: UInt64 NULL -22 try_to_uint64(Boolean) :: UInt64 NULL -23 try_to_uint64(Boolean NULL) :: UInt64 NULL +18 try_to_uint64 FACTORY +19 try_to_uint64 FACTORY +20 try_to_uint64(Float32) :: UInt64 NULL +21 try_to_uint64(Float32 NULL) :: UInt64 NULL +22 try_to_uint64(Float64) :: UInt64 NULL +23 try_to_uint64(Float64 NULL) :: UInt64 NULL +24 try_to_uint64(Boolean) :: UInt64 NULL +25 try_to_uint64(Boolean NULL) :: UInt64 NULL 0 try_to_uint8(Variant) :: UInt8 NULL 1 try_to_uint8(Variant NULL) :: UInt8 NULL 2 try_to_uint8(String) :: UInt8 NULL @@ -3935,12 +3971,14 @@ Functions overloads: 15 try_to_uint8(UInt64 NULL) :: UInt8 NULL 16 try_to_uint8(Int64) :: UInt8 NULL 17 try_to_uint8(Int64 NULL) :: UInt8 NULL -18 try_to_uint8(Float32) :: UInt8 NULL -19 try_to_uint8(Float32 NULL) :: UInt8 NULL -20 try_to_uint8(Float64) :: UInt8 NULL -21 try_to_uint8(Float64 NULL) :: UInt8 NULL -22 try_to_uint8(Boolean) :: UInt8 NULL -23 try_to_uint8(Boolean NULL) :: UInt8 NULL +18 try_to_uint8 FACTORY +19 try_to_uint8 FACTORY +20 try_to_uint8(Float32) :: UInt8 NULL +21 try_to_uint8(Float32 NULL) :: UInt8 NULL +22 try_to_uint8(Float64) :: UInt8 NULL +23 try_to_uint8(Float64 NULL) :: UInt8 NULL +24 try_to_uint8(Boolean) :: UInt8 NULL +25 try_to_uint8(Boolean NULL) :: UInt8 NULL 0 try_to_variant(T0) :: Variant NULL 1 try_to_variant(T0 NULL) :: Variant NULL 0 tuple FACTORY diff --git a/tests/sqllogictests/suites/base/11_data_type/11_0006_data_type_decimal b/tests/sqllogictests/suites/base/11_data_type/11_0006_data_type_decimal index b9db0fedde04..65908c4e1e45 100644 --- a/tests/sqllogictests/suites/base/11_data_type/11_0006_data_type_decimal +++ b/tests/sqllogictests/suites/base/11_data_type/11_0006_data_type_decimal @@ -942,5 +942,16 @@ select sum(-b) as s from t group by a order by s; -0.01683589 0.01683589 +statement ok +truncate table t; + +statement ok +insert into t values(1, 355327908309); + +query TTT +select to_uint64(b), b::uint64, b from t; +---- +355327908309 355327908309 355327908309.00000000 + statement ok drop table t From 14819b3d95660d8ba03d92c2dfd97ef27e28dbd4 Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Thu, 21 Sep 2023 18:38:08 -0700 Subject: [PATCH 18/18] feat(query): support inverted_filter to omit the filter executor (#12934) * feat(query): support inverted_filter to omit the filter executor * feat(query): support inverted_filter to omit the filter executor * feat(query): support inverted_filter to omit the filter executor * feat(query): support inverted_filter to omit the filter executor * feat(query): support inverted_filter to omit the filter executor * feat(query): address comments --- src/common/storage/src/metrics/common.rs | 30 +++++ src/common/storage/src/metrics/mod.rs | 1 + src/query/catalog/src/plan/pushdown.rs | 10 +- src/query/catalog/src/table.rs | 8 -- src/query/expression/src/expression.rs | 8 +- .../service/src/interpreters/common/mod.rs | 1 + .../service/src/interpreters/common/util.rs | 25 +++++ .../src/interpreters/interpreter_delete.rs | 38 +------ .../interpreter_table_recluster.rs | 24 +++- .../table_functions/numbers/numbers_table.rs | 2 +- .../tests/it/parquet_rs/prune_row_groups.rs | 2 +- .../service/tests/it/storages/fuse/pruning.rs | 10 +- src/query/sql/src/executor/format.rs | 4 +- .../sql/src/executor/physical_plan_builder.rs | 62 +++++----- src/query/sql/src/executor/table_read_plan.rs | 11 +- .../sql/src/planner/expression_parser.rs | 21 +++- .../storages/fuse/src/operations/delete.rs | 14 +-- .../processors/processor_matched_split.rs | 1 + .../fuse/src/operations/read_partitions.rs | 4 +- .../storages/fuse/src/operations/update.rs | 20 +++- .../storages/fuse/src/pruning/fuse_pruner.rs | 13 ++- .../storages/hive/hive/src/hive_table.rs | 14 +-- src/query/storages/iceberg/src/table.rs | 9 +- .../src/parquet2/parquet_table/partition.rs | 24 +++- .../storages/parquet/src/parquet2/pruning.rs | 9 +- .../src/parquet_rs/parquet_reader/reader.rs | 106 ++++++++++++------ .../parquet_rs/parquet_reader/row_group.rs | 4 + .../src/parquet_rs/parquet_table/partition.rs | 12 +- .../parquet/src/parquet_rs/partition.rs | 1 + .../parquet/src/parquet_rs/pruning.rs | 52 ++++++--- .../storages/system/src/columns_table.rs | 2 +- src/query/storages/system/src/tables_table.rs | 2 +- .../suites/base/03_common/03_0025_delete_from | 11 ++ 33 files changed, 363 insertions(+), 192 deletions(-) create mode 100644 src/common/storage/src/metrics/common.rs diff --git a/src/common/storage/src/metrics/common.rs b/src/common/storage/src/metrics/common.rs new file mode 100644 index 000000000000..2596ceaf82b7 --- /dev/null +++ b/src/common/storage/src/metrics/common.rs @@ -0,0 +1,30 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use common_metrics::register_counter; +use common_metrics::Counter; +use lazy_static::lazy_static; + +lazy_static! { + static ref OMIT_FILTER_ROWGROUPS: Counter = register_counter("omit_filter_rowgroups"); + static ref OMIT_FILTER_ROWS: Counter = register_counter("omit_filter_rows"); +} + +pub fn metrics_inc_omit_filter_rowgroups(c: u64) { + OMIT_FILTER_ROWGROUPS.inc_by(c); +} + +pub fn metrics_inc_omit_filter_rows(c: u64) { + OMIT_FILTER_ROWS.inc_by(c); +} diff --git a/src/common/storage/src/metrics/mod.rs b/src/common/storage/src/metrics/mod.rs index 890e46a7413d..7d5d075ca8d4 100644 --- a/src/common/storage/src/metrics/mod.rs +++ b/src/common/storage/src/metrics/mod.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +pub mod common; pub mod copy; pub mod merge_into; mod storage_metrics; diff --git a/src/query/catalog/src/plan/pushdown.rs b/src/query/catalog/src/plan/pushdown.rs index 318e3a873171..e74e9bf227b8 100644 --- a/src/query/catalog/src/plan/pushdown.rs +++ b/src/query/catalog/src/plan/pushdown.rs @@ -77,9 +77,9 @@ pub struct PushDownInfo { /// The difference with `projection` is the removal of the source columns /// which were only used to generate virtual columns. pub output_columns: Option, - /// Optional filter expression plan + /// Optional filter and reverse filter expression plan /// Assumption: expression's data type must be `DataType::Boolean`. - pub filter: Option>, + pub filters: Option, pub is_deterministic: bool, /// Optional prewhere information /// used for prewhere optimization @@ -96,6 +96,12 @@ pub struct PushDownInfo { pub agg_index: Option, } +#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq)] +pub struct Filters { + pub filter: RemoteExpr, + pub inverted_filter: RemoteExpr, +} + /// TopK is a wrapper for topk push down items. /// We only take the first column in order_by as the topk column. #[derive(Debug, Clone)] diff --git a/src/query/catalog/src/table.rs b/src/query/catalog/src/table.rs index 6a59816a15bd..e78036bcc467 100644 --- a/src/query/catalog/src/table.rs +++ b/src/query/catalog/src/table.rs @@ -465,14 +465,6 @@ pub struct NavigationDescriptor { pub point: NavigationPoint, } -#[derive(Debug, Clone)] -pub struct DeletionFilters { - // the filter expression for the deletion - pub filter: RemoteExpr, - // just "not(filter)" - pub inverted_filter: RemoteExpr, -} - use std::collections::HashMap; #[derive(serde::Serialize, serde::Deserialize, Clone, Debug, Default)] diff --git a/src/query/expression/src/expression.rs b/src/query/expression/src/expression.rs index a9fd71538fd9..1762063491ab 100644 --- a/src/query/expression/src/expression.rs +++ b/src/query/expression/src/expression.rs @@ -124,14 +124,17 @@ pub enum Expr { /// /// The remote node will recover the `Arc` pointer within `FunctionCall` by looking /// up the function registry with the `FunctionID`. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, Educe, Serialize, Deserialize)] +#[educe(PartialEq, Eq, Hash)] pub enum RemoteExpr { Constant { + #[educe(Hash(ignore), PartialEq(ignore), Eq(ignore))] span: Span, scalar: Scalar, data_type: DataType, }, ColumnRef { + #[educe(Hash(ignore), PartialEq(ignore), Eq(ignore))] span: Span, id: Index, data_type: DataType, @@ -140,12 +143,14 @@ pub enum RemoteExpr { display_name: String, }, Cast { + #[educe(Hash(ignore), PartialEq(ignore), Eq(ignore))] span: Span, is_try: bool, expr: Box>, dest_type: DataType, }, FunctionCall { + #[educe(Hash(ignore), PartialEq(ignore), Eq(ignore))] span: Span, id: FunctionID, generics: Vec, @@ -153,6 +158,7 @@ pub enum RemoteExpr { return_type: DataType, }, UDFServerCall { + #[educe(Hash(ignore), PartialEq(ignore), Eq(ignore))] span: Span, func_name: String, server_addr: String, diff --git a/src/query/service/src/interpreters/common/mod.rs b/src/query/service/src/interpreters/common/mod.rs index 75c0539a6c74..96a74433eea2 100644 --- a/src/query/service/src/interpreters/common/mod.rs +++ b/src/query/service/src/interpreters/common/mod.rs @@ -25,5 +25,6 @@ pub use refresh_aggregating_index::hook_refresh_agg_index; pub use refresh_aggregating_index::RefreshAggIndexDesc; pub use table::check_referenced_computed_columns; pub use util::check_deduplicate_label; +pub use util::create_push_down_filters; pub use self::metrics::*; diff --git a/src/query/service/src/interpreters/common/util.rs b/src/query/service/src/interpreters/common/util.rs index 886fe1a58d1d..b57fcf183a92 100644 --- a/src/query/service/src/interpreters/common/util.rs +++ b/src/query/service/src/interpreters/common/util.rs @@ -14,11 +14,17 @@ use std::sync::Arc; +use common_catalog::plan::Filters; use common_catalog::table_context::TableContext; use common_exception::Result; +use common_expression::type_check::check_function; +use common_functions::BUILTIN_FUNCTIONS; use common_meta_kvapi::kvapi::KVApi; use common_users::UserApiProvider; +use crate::sql::executor::cast_expr_to_non_null_boolean; +use crate::sql::ScalarExpr; + /// Checks if a duplicate label exists in the meta store. /// /// # Arguments @@ -41,3 +47,22 @@ pub async fn check_deduplicate_label(ctx: Arc) -> Result } } } + +pub fn create_push_down_filters(scalar: &ScalarExpr) -> Result { + let filter = cast_expr_to_non_null_boolean( + scalar + .as_expr()? + .project_column_ref(|col| col.column_name.clone()), + )?; + + let remote_filter = filter.as_remote_expr(); + + // prepare the inverse filter expression + let remote_inverted_filter = + check_function(None, "not", &[], &[filter], &BUILTIN_FUNCTIONS)?.as_remote_expr(); + + Ok(Filters { + filter: remote_filter, + inverted_filter: remote_inverted_filter, + }) +} diff --git a/src/query/service/src/interpreters/interpreter_delete.rs b/src/query/service/src/interpreters/interpreter_delete.rs index ff7013d260c7..716ad266faa6 100644 --- a/src/query/service/src/interpreters/interpreter_delete.rs +++ b/src/query/service/src/interpreters/interpreter_delete.rs @@ -18,7 +18,6 @@ use std::sync::Arc; use common_base::runtime::GlobalIORuntime; use common_catalog::plan::Partitions; -use common_catalog::table::DeletionFilters; use common_exception::ErrorCode; use common_exception::Result; use common_expression::types::DataType; @@ -30,7 +29,6 @@ use common_functions::BUILTIN_FUNCTIONS; use common_meta_app::schema::CatalogInfo; use common_meta_app::schema::TableInfo; use common_sql::binder::ColumnBindingBuilder; -use common_sql::executor::cast_expr_to_non_null_boolean; use common_sql::executor::DeletePartial; use common_sql::executor::Exchange; use common_sql::executor::FragmentKind; @@ -60,6 +58,7 @@ use log::debug; use storages_common_table_meta::meta::TableSnapshot; use table_lock::TableLockHandlerWrapper; +use crate::interpreters::common::create_push_down_filters; use crate::interpreters::Interpreter; use crate::interpreters::SelectInterpreter; use crate::pipelines::executor::ExecutorSettings; @@ -164,36 +163,15 @@ impl Interpreter for DeleteInterpreter { let (filters, col_indices) = if let Some(scalar) = selection { // prepare the filter expression - let filter = cast_expr_to_non_null_boolean( - scalar - .as_expr()? - .project_column_ref(|col| col.column_name.clone()), - )? - .as_remote_expr(); - - let expr = filter.as_expr(&BUILTIN_FUNCTIONS); + let filters = create_push_down_filters(&scalar)?; + + let expr = filters.filter.as_expr(&BUILTIN_FUNCTIONS); if !expr.is_deterministic(&BUILTIN_FUNCTIONS) { return Err(ErrorCode::Unimplemented( "Delete must have deterministic predicate", )); } - // prepare the inverse filter expression - let inverted_filter = { - let inverse = ScalarExpr::FunctionCall(common_sql::planner::plans::FunctionCall { - span: None, - func_name: "not".to_string(), - params: vec![], - arguments: vec![scalar.clone()], - }); - cast_expr_to_non_null_boolean( - inverse - .as_expr()? - .project_column_ref(|col| col.column_name.clone()), - )? - .as_remote_expr() - }; - let col_indices: Vec = if !self.plan.subquery_desc.is_empty() { let mut col_indices = HashSet::new(); for subquery_desc in &self.plan.subquery_desc { @@ -203,13 +181,7 @@ impl Interpreter for DeleteInterpreter { } else { scalar.used_columns().into_iter().collect() }; - ( - Some(DeletionFilters { - filter, - inverted_filter, - }), - col_indices, - ) + (Some(filters), col_indices) } else { (None, vec![]) }; diff --git a/src/query/service/src/interpreters/interpreter_table_recluster.rs b/src/query/service/src/interpreters/interpreter_table_recluster.rs index 620d3efddbd7..98ff1891c245 100644 --- a/src/query/service/src/interpreters/interpreter_table_recluster.rs +++ b/src/query/service/src/interpreters/interpreter_table_recluster.rs @@ -16,9 +16,12 @@ use std::sync::Arc; use std::time::Duration; use std::time::SystemTime; +use common_catalog::plan::Filters; use common_catalog::plan::PushDownInfo; use common_exception::ErrorCode; use common_exception::Result; +use common_expression::type_check::check_function; +use common_functions::BUILTIN_FUNCTIONS; use log::error; use log::info; use log::warn; @@ -31,6 +34,7 @@ use crate::pipelines::Pipeline; use crate::pipelines::PipelineBuildResult; use crate::sessions::QueryContext; use crate::sessions::TableContext; +use crate::sql::executor::cast_expr_to_non_null_boolean; use crate::sql::plans::ReclusterTablePlan; pub struct ReclusterTableInterpreter { @@ -68,13 +72,23 @@ impl Interpreter for ReclusterTableInterpreter { // Build extras via push down scalar let extras = if let Some(scalar) = &plan.push_downs { - let filter = scalar - .as_expr()? - .project_column_ref(|col| col.column_name.clone()) - .as_remote_expr(); + // prepare the filter expression + let filter = cast_expr_to_non_null_boolean( + scalar + .as_expr()? + .project_column_ref(|col| col.column_name.clone()), + )?; + // prepare the inverse filter expression + let inverted_filter = + check_function(None, "not", &[], &[filter.clone()], &BUILTIN_FUNCTIONS)?; + + let filters = Filters { + filter: filter.as_remote_expr(), + inverted_filter: inverted_filter.as_remote_expr(), + }; Some(PushDownInfo { - filter: Some(filter), + filters: Some(filters), ..PushDownInfo::default() }) } else { diff --git a/src/query/service/src/table_functions/numbers/numbers_table.rs b/src/query/service/src/table_functions/numbers/numbers_table.rs index 02d76448e692..c188bc7a2332 100644 --- a/src/query/service/src/table_functions/numbers/numbers_table.rs +++ b/src/query/service/src/table_functions/numbers/numbers_table.rs @@ -137,7 +137,7 @@ impl Table for NumbersTable { let mut limit = None; if let Some(extras) = &push_downs { - if extras.limit.is_some() && extras.filter.is_none() && extras.order_by.is_empty() { + if extras.limit.is_some() && extras.filters.is_none() && extras.order_by.is_empty() { // It is allowed to have an error when we can't get sort columns from the expression. For // example 'select number from numbers(10) order by number+4 limit 10', the column 'number+4' // doesn't exist in the numbers table. diff --git a/src/query/service/tests/it/parquet_rs/prune_row_groups.rs b/src/query/service/tests/it/parquet_rs/prune_row_groups.rs index 4103d3a01b60..9fbd651cd2f5 100644 --- a/src/query/service/tests/it/parquet_rs/prune_row_groups.rs +++ b/src/query/service/tests/it/parquet_rs/prune_row_groups.rs @@ -57,7 +57,7 @@ async fn test_impl(scenario: Scenario, predicate: &str, expected_rgs: Vec ) .unwrap(); - let rgs = pruner.prune_row_groups(&parquet_meta, None).unwrap(); + let (rgs, _) = pruner.prune_row_groups(&parquet_meta, None).unwrap(); assert_eq!( expected_rgs, rgs, diff --git a/src/query/service/tests/it/storages/fuse/pruning.rs b/src/query/service/tests/it/storages/fuse/pruning.rs index 2b27124a8f0e..dfd0b5498ca4 100644 --- a/src/query/service/tests/it/storages/fuse/pruning.rs +++ b/src/query/service/tests/it/storages/fuse/pruning.rs @@ -29,7 +29,7 @@ use common_expression::TableDataType; use common_expression::TableField; use common_expression::TableSchemaRef; use common_expression::TableSchemaRefExt; -use common_sql::parse_to_remote_string_expr; +use common_sql::parse_to_filters; use common_sql::plans::CreateTablePlan; use common_sql::BloomIndexColumns; use common_storages_fuse::pruning::create_segment_location_vector; @@ -172,11 +172,7 @@ async fn test_block_pruner() -> Result<()> { // nothing is pruned let e1 = PushDownInfo { - filter: Some(parse_to_remote_string_expr( - ctx.clone(), - table.clone(), - "a > 3", - )?), + filters: Some(parse_to_filters(ctx.clone(), table.clone(), "a > 3")?), ..Default::default() }; @@ -184,7 +180,7 @@ async fn test_block_pruner() -> Result<()> { let mut e2 = PushDownInfo::default(); let max_val_of_b = 6u64; - e2.filter = Some(parse_to_remote_string_expr( + e2.filters = Some(parse_to_filters( ctx.clone(), table.clone(), "a > 0 and b > 6", diff --git a/src/query/sql/src/executor/format.rs b/src/query/sql/src/executor/format.rs index 37447a2028ff..f218b8e03e20 100644 --- a/src/query/sql/src/executor/format.rs +++ b/src/query/sql/src/executor/format.rs @@ -269,9 +269,9 @@ fn table_scan_to_format_tree( .as_ref() .and_then(|extras| { extras - .filter + .filters .as_ref() - .map(|expr| expr.as_expr(&BUILTIN_FUNCTIONS).sql_display()) + .map(|filters| filters.filter.as_expr(&BUILTIN_FUNCTIONS).sql_display()) }) .unwrap_or_default(); diff --git a/src/query/sql/src/executor/physical_plan_builder.rs b/src/query/sql/src/executor/physical_plan_builder.rs index 4f171e2514d7..e342abf515bb 100644 --- a/src/query/sql/src/executor/physical_plan_builder.rs +++ b/src/query/sql/src/executor/physical_plan_builder.rs @@ -21,6 +21,7 @@ use std::sync::Arc; use common_catalog::catalog::CatalogManager; use common_catalog::catalog_kind::CATALOG_DEFAULT; use common_catalog::plan::AggIndexInfo; +use common_catalog::plan::Filters; use common_catalog::plan::PrewhereInfo; use common_catalog::plan::Projection; use common_catalog::plan::PushDownInfo; @@ -1886,37 +1887,35 @@ impl PhysicalPlanBuilder { .push_down_predicates .as_ref() .filter(|p| !p.is_empty()) - .map( - |predicates: &Vec| -> Result> { - let predicates = predicates - .iter() - .map(|p| { - Ok(p.as_expr()? - .project_column_ref(|col| col.column_name.clone())) - }) - .collect::>>()?; - - let expr = predicates - .into_iter() - .try_reduce(|lhs, rhs| { - check_function( - None, - "and_filters", - &[], - &[lhs, rhs], - &BUILTIN_FUNCTIONS, - ) - })? - .unwrap(); + .map(|predicates: &Vec| -> Result { + let predicates = predicates + .iter() + .map(|p| { + Ok(p.as_expr()? + .project_column_ref(|col| col.column_name.clone())) + }) + .collect::>>()?; + + let expr = predicates + .into_iter() + .try_reduce(|lhs, rhs| { + check_function(None, "and_filters", &[], &[lhs, rhs], &BUILTIN_FUNCTIONS) + })? + .unwrap(); - let expr = cast_expr_to_non_null_boolean(expr)?; - let (expr, _) = ConstantFolder::fold(&expr, &self.func_ctx, &BUILTIN_FUNCTIONS); + let expr = cast_expr_to_non_null_boolean(expr)?; + let (expr, _) = ConstantFolder::fold(&expr, &self.func_ctx, &BUILTIN_FUNCTIONS); - is_deterministic = expr.is_deterministic(&BUILTIN_FUNCTIONS); + is_deterministic = expr.is_deterministic(&BUILTIN_FUNCTIONS); - Ok(expr.as_remote_expr()) - }, - ) + let inverted_filter = + check_function(None, "not", &[], &[expr.clone()], &BUILTIN_FUNCTIONS)?; + + Ok(Filters { + filter: expr.as_remote_expr(), + inverted_filter: inverted_filter.as_remote_expr(), + }) + }) .transpose()?; let prewhere_info = scan @@ -1970,12 +1969,13 @@ impl PhysicalPlanBuilder { }) }) .expect("there should be at least one predicate in prewhere"); + let filter = cast_expr_to_non_null_boolean( predicate .as_expr()? .project_column_ref(|col| col.column_name.clone()), - )? - .as_remote_expr(); + )?; + let filter = filter.as_remote_expr(); let virtual_columns = self.build_virtual_columns(&prewhere.prewhere_columns); Ok::(PrewhereInfo { @@ -2039,7 +2039,7 @@ impl PhysicalPlanBuilder { Ok(PushDownInfo { projection: Some(projection), output_columns, - filter: push_down_filter, + filters: push_down_filter, is_deterministic, prewhere: prewhere_info, limit: scan.limit, diff --git a/src/query/sql/src/executor/table_read_plan.rs b/src/query/sql/src/executor/table_read_plan.rs index d35ceb36acdf..176225bdd062 100644 --- a/src/query/sql/src/executor/table_read_plan.rs +++ b/src/query/sql/src/executor/table_read_plan.rs @@ -21,6 +21,7 @@ use common_ast::Dialect; use common_base::base::ProgressValues; use common_catalog::plan::DataSourceInfo; use common_catalog::plan::DataSourcePlan; +use common_catalog::plan::Filters; use common_catalog::plan::InternalColumn; use common_catalog::plan::PartStatistics; use common_catalog::plan::Partitions; @@ -88,9 +89,13 @@ impl ToReadDataSourcePlan for dyn Table { let catalog_info = ctx.get_catalog(&catalog).await?.info(); let (statistics, parts) = if let Some(PushDownInfo { - filter: - Some(RemoteExpr::Constant { - scalar: Scalar::Boolean(false), + filters: + Some(Filters { + filter: + RemoteExpr::Constant { + scalar: Scalar::Boolean(false), + .. + }, .. }), .. diff --git a/src/query/sql/src/planner/expression_parser.rs b/src/query/sql/src/planner/expression_parser.rs index 7779500b9407..545cf7c28df6 100644 --- a/src/query/sql/src/planner/expression_parser.rs +++ b/src/query/sql/src/planner/expression_parser.rs @@ -22,12 +22,14 @@ use common_ast::Dialect; use common_base::base::tokio::runtime::Handle; use common_base::base::tokio::task::block_in_place; use common_catalog::catalog::CATALOG_DEFAULT; +use common_catalog::plan::Filters; use common_catalog::table::Table; use common_catalog::table_context::TableContext; use common_exception::ErrorCode; use common_exception::Result; use common_expression::infer_schema_type; use common_expression::infer_table_schema; +use common_expression::type_check::check_function; use common_expression::types::DataType; use common_expression::ConstantFolder; use common_expression::DataBlock; @@ -137,11 +139,11 @@ pub fn parse_exprs( Ok(exprs) } -pub fn parse_to_remote_string_expr( +pub fn parse_to_filters( ctx: Arc, table_meta: Arc, sql: &str, -) -> Result> { +) -> Result { let schema = table_meta.schema(); let exprs = parse_exprs(ctx, table_meta, sql)?; let exprs: Vec> = exprs @@ -153,7 +155,20 @@ pub fn parse_to_remote_string_expr( .collect(); if exprs.len() == 1 { - Ok(exprs[0].clone()) + let filter = exprs[0].clone(); + + let inverted_filter = check_function( + None, + "not", + &[], + &[filter.as_expr(&BUILTIN_FUNCTIONS)], + &BUILTIN_FUNCTIONS, + )?; + + Ok(Filters { + filter, + inverted_filter: inverted_filter.as_remote_expr(), + }) } else { Err(ErrorCode::BadDataValueType(format!( "Expected single expr, but got {}", diff --git a/src/query/storages/fuse/src/operations/delete.rs b/src/query/storages/fuse/src/operations/delete.rs index 4a9e34049213..bf92e9ab2649 100644 --- a/src/query/storages/fuse/src/operations/delete.rs +++ b/src/query/storages/fuse/src/operations/delete.rs @@ -15,13 +15,13 @@ use std::sync::Arc; use common_base::base::ProgressValues; +use common_catalog::plan::Filters; use common_catalog::plan::PartInfoPtr; use common_catalog::plan::Partitions; use common_catalog::plan::PartitionsShuffleKind; use common_catalog::plan::Projection; use common_catalog::plan::PruningStatistics; use common_catalog::plan::PushDownInfo; -use common_catalog::table::DeletionFilters; use common_catalog::table::Table; use common_catalog::table_context::TableContext; use common_exception::Result; @@ -72,7 +72,7 @@ impl FuseTable { pub async fn fast_delete( &self, ctx: Arc, - filters: Option, + filters: Option, col_indices: Vec, query_row_id_col: bool, ) -> Result)>> { @@ -131,8 +131,7 @@ impl FuseTable { let (partitions, info) = self .do_mutation_block_pruning( ctx.clone(), - Some(deletion_filters.filter), - Some(deletion_filters.inverted_filter), + Some(deletion_filters), projection, &snapshot, true, @@ -280,8 +279,7 @@ impl FuseTable { pub async fn do_mutation_block_pruning( &self, ctx: Arc, - filter: Option>, - inverted_filter: Option>, + filters: Option, projection: Projection, base_snapshot: &TableSnapshot, with_origin: bool, @@ -289,7 +287,7 @@ impl FuseTable { ) -> Result<(Partitions, MutationTaskInfo)> { let push_down = Some(PushDownInfo { projection: Some(projection), - filter: filter.clone(), + filters: filters.clone(), ..PushDownInfo::default() }); @@ -304,7 +302,7 @@ impl FuseTable { let segment_locations = create_segment_location_vector(segment_locations, None); - if let Some(inverse) = inverted_filter { + if let Some(inverse) = filters.map(|f| f.inverted_filter) { // now the `block_metas` refers to the blocks that need to be deleted completely or partially. // // let's try pruning the blocks further to get the blocks that need to be deleted completely, so that diff --git a/src/query/storages/fuse/src/operations/merge_into/processors/processor_matched_split.rs b/src/query/storages/fuse/src/operations/merge_into/processors/processor_matched_split.rs index d26ca27c9080..39e7b19d3e9c 100644 --- a/src/query/storages/fuse/src/operations/merge_into/processors/processor_matched_split.rs +++ b/src/query/storages/fuse/src/operations/merge_into/processors/processor_matched_split.rs @@ -243,6 +243,7 @@ impl Processor for MatchedSplitProcessor { let (stage_block, mut row_ids) = delete_mutation .delete_mutator .delete_by_expr(current_block)?; + if stage_block.is_empty() { // delete all if !row_ids.is_empty() { diff --git a/src/query/storages/fuse/src/operations/read_partitions.rs b/src/query/storages/fuse/src/operations/read_partitions.rs index 6ab120c22183..dbbd89ae90de 100644 --- a/src/query/storages/fuse/src/operations/read_partitions.rs +++ b/src/query/storages/fuse/src/operations/read_partitions.rs @@ -267,7 +267,7 @@ impl FuseTable { ) -> (PartStatistics, Partitions) { let limit = push_downs .as_ref() - .filter(|p| p.order_by.is_empty() && p.filter.is_none()) + .filter(|p| p.order_by.is_empty() && p.filters.is_none()) .and_then(|p| p.limit) .unwrap_or(usize::MAX); @@ -333,7 +333,7 @@ impl FuseTable { fn is_exact(push_downs: &Option) -> bool { push_downs .as_ref() - .map_or(true, |extra| extra.filter.is_none()) + .map_or(true, |extra| extra.filters.is_none()) } fn all_columns_partitions( diff --git a/src/query/storages/fuse/src/operations/update.rs b/src/query/storages/fuse/src/operations/update.rs index 4a740018cee5..a2b765981c83 100644 --- a/src/query/storages/fuse/src/operations/update.rs +++ b/src/query/storages/fuse/src/operations/update.rs @@ -15,10 +15,12 @@ use std::collections::BTreeMap; use std::sync::Arc; +use common_catalog::plan::Filters; use common_catalog::plan::Projection; use common_catalog::table::Table; use common_catalog::table_context::TableContext; use common_exception::Result; +use common_expression::type_check::check_function; use common_expression::types::NumberDataType; use common_expression::FieldIndex; use common_expression::RemoteExpr; @@ -241,14 +243,25 @@ impl FuseTable { ); } let remain_reader = Arc::new(remain_reader); - let (filter_expr, filter) = if let Some(remote_expr) = filter { + let (filter_expr, filters) = if let Some(remote_expr) = filter { + let reverted_expr = check_function( + None, + "not", + &[], + &[remote_expr.as_expr(&BUILTIN_FUNCTIONS)], + &BUILTIN_FUNCTIONS, + )?; + ( Arc::new(Some( remote_expr .as_expr(&BUILTIN_FUNCTIONS) .project_column_ref(|name| schema.index_of(name).unwrap()), )), - Some(remote_expr), + Some(Filters { + filter: remote_expr, + inverted_filter: reverted_expr.as_remote_expr(), + }), ) } else { (Arc::new(None), None) @@ -257,8 +270,7 @@ impl FuseTable { let (parts, part_info) = self .do_mutation_block_pruning( ctx.clone(), - filter, - None, + filters, projection, base_snapshot, false, diff --git a/src/query/storages/fuse/src/pruning/fuse_pruner.rs b/src/query/storages/fuse/src/pruning/fuse_pruner.rs index aacaca3b1689..a6e0a8a4bf4b 100644 --- a/src/query/storages/fuse/src/pruning/fuse_pruner.rs +++ b/src/query/storages/fuse/src/pruning/fuse_pruner.rs @@ -81,15 +81,18 @@ impl PruningContext { ) -> Result> { let func_ctx = ctx.get_function_context()?; - let filter_expr = push_down - .as_ref() - .and_then(|extra| extra.filter.as_ref().map(|f| f.as_expr(&BUILTIN_FUNCTIONS))); + let filter_expr = push_down.as_ref().and_then(|extra| { + extra + .filters + .as_ref() + .map(|f| f.filter.as_expr(&BUILTIN_FUNCTIONS)) + }); // Limit pruner. // if there are ordering/filter clause, ignore limit, even it has been pushed down let limit = push_down .as_ref() - .filter(|p| p.order_by.is_empty() && p.filter.is_none()) + .filter(|p| p.order_by.is_empty() && p.filters.is_none()) .and_then(|p| p.limit); // prepare the limiter. in case that limit is none, an unlimited limiter will be returned let limit_pruner = LimiterPrunerCreator::create(limit); @@ -378,7 +381,7 @@ impl FusePruner { let push_down = self.push_down.clone(); if push_down .as_ref() - .filter(|p| !p.order_by.is_empty() && p.limit.is_some() && p.filter.is_none()) + .filter(|p| !p.order_by.is_empty() && p.limit.is_some() && p.filters.is_none()) .is_some() { let schema = self.table_schema.clone(); diff --git a/src/query/storages/hive/hive/src/hive_table.rs b/src/query/storages/hive/hive/src/hive_table.rs index f033d7e3adcf..afbb29cc3352 100644 --- a/src/query/storages/hive/hive/src/hive_table.rs +++ b/src/query/storages/hive/hive/src/hive_table.rs @@ -114,9 +114,9 @@ impl HiveTable { let filter_expression = push_downs.as_ref().and_then(|extra| { extra - .filter + .filters .as_ref() - .map(|expr| expr.as_expr(&BUILTIN_FUNCTIONS)) + .map(|filter| filter.filter.as_expr(&BUILTIN_FUNCTIONS)) }); let range_filter = match filter_expression { @@ -242,7 +242,7 @@ impl HiveTable { fn is_simple_select_query(&self, plan: &DataSourcePlan) -> bool { // couldn't get groupby order by info if let Some(PushDownInfo { - filter, + filters, limit: Some(lm), .. }) = &plan.push_downs @@ -253,10 +253,10 @@ impl HiveTable { // filter out the partition column related expressions let partition_keys = self.get_partition_key_sets(); - let columns = filter + let columns = filters .as_ref() .map(|f| { - let expr = f.as_expr(&BUILTIN_FUNCTIONS); + let expr = f.filter.as_expr(&BUILTIN_FUNCTIONS); expr.column_refs().keys().cloned().collect::>() }) .unwrap_or_default(); @@ -460,9 +460,9 @@ impl HiveTable { if let Some(partition_keys) = &self.table_options.partition_keys { if !partition_keys.is_empty() { let filter_expression = push_downs.as_ref().and_then(|p| { - p.filter + p.filters .as_ref() - .map(|expr| expr.as_expr(&BUILTIN_FUNCTIONS)) + .map(|filter| filter.filter.as_expr(&BUILTIN_FUNCTIONS)) }); return self diff --git a/src/query/storages/iceberg/src/table.rs b/src/query/storages/iceberg/src/table.rs index 8468b397d12a..57d9c1a1cd75 100644 --- a/src/query/storages/iceberg/src/table.rs +++ b/src/query/storages/iceberg/src/table.rs @@ -209,9 +209,12 @@ impl IcebergTable { ErrorCode::ReadTableDataError(format!("Cannot get current data files: {e:?}")) })?; - let filter = push_downs - .as_ref() - .and_then(|extra| extra.filter.as_ref().map(|f| f.as_expr(&BUILTIN_FUNCTIONS))); + let filter = push_downs.as_ref().and_then(|extra| { + extra + .filters + .as_ref() + .map(|f| f.filter.as_expr(&BUILTIN_FUNCTIONS)) + }); let schema = self.schema(); diff --git a/src/query/storages/parquet/src/parquet2/parquet_table/partition.rs b/src/query/storages/parquet/src/parquet2/parquet_table/partition.rs index eaaf5f755f5a..50431a37d017 100644 --- a/src/query/storages/parquet/src/parquet2/parquet_table/partition.rs +++ b/src/query/storages/parquet/src/parquet2/parquet_table/partition.rs @@ -78,9 +78,19 @@ impl Parquet2Table { project_parquet_schema(&self.arrow_schema, &self.schema_descr, &projection)?; let schema = Arc::new(arrow_to_table_schema(projected_arrow_schema)); - let filter = push_down - .as_ref() - .and_then(|extra| extra.filter.as_ref().map(|f| f.as_expr(&BUILTIN_FUNCTIONS))); + let filter = push_down.as_ref().and_then(|extra| { + extra + .filters + .as_ref() + .map(|f| f.filter.as_expr(&BUILTIN_FUNCTIONS)) + }); + + let inverted_filter = push_down.as_ref().and_then(|extra| { + extra + .filters + .as_ref() + .map(|f| f.inverted_filter.as_expr(&BUILTIN_FUNCTIONS)) + }); let top_k = top_k.map(|top_k| { let offset = projected_column_nodes @@ -94,11 +104,13 @@ impl Parquet2Table { let func_ctx = ctx.get_function_context()?; let row_group_pruner = if self.read_options.prune_row_groups() { - Some(RangePrunerCreator::try_create( + let p1 = RangePrunerCreator::try_create(func_ctx.clone(), &schema, filter.as_ref())?; + let p2 = RangePrunerCreator::try_create( func_ctx.clone(), &schema, - filter.as_ref(), - )?) + inverted_filter.as_ref(), + )?; + Some((p1, p2)) } else { None }; diff --git a/src/query/storages/parquet/src/parquet2/pruning.rs b/src/query/storages/parquet/src/parquet2/pruning.rs index cbf7565df367..4c3fa5365ca3 100644 --- a/src/query/storages/parquet/src/parquet2/pruning.rs +++ b/src/query/storages/parquet/src/parquet2/pruning.rs @@ -62,8 +62,11 @@ pub struct PartitionPruner { pub schema: TableSchemaRef, pub schema_descr: SchemaDescriptor, pub schema_from: String, - /// Pruner to prune row groups. - pub row_group_pruner: Option>, + /// Pruner to prune row groups. (filter & inverted filter) + pub row_group_pruner: Option<( + Arc, + Arc, + )>, /// Pruners to prune pages. pub page_pruners: Option, /// The projected column indices. @@ -120,7 +123,7 @@ impl PartitionPruner { let row_group_stats = if no_stats { None } else if self.row_group_pruner.is_some() && !self.skip_pruning { - let pruner = self.row_group_pruner.as_ref().unwrap(); + let (pruner, _) = self.row_group_pruner.as_ref().unwrap(); // If collecting stats fails or `should_keep` is true, we still read the row group. // Otherwise, the row group will be pruned. if let Ok(row_group_stats) = diff --git a/src/query/storages/parquet/src/parquet_rs/parquet_reader/reader.rs b/src/query/storages/parquet/src/parquet_rs/parquet_reader/reader.rs index 03bbfe6ea97c..51e72d9602e4 100644 --- a/src/query/storages/parquet/src/parquet_rs/parquet_reader/reader.rs +++ b/src/query/storages/parquet/src/parquet_rs/parquet_reader/reader.rs @@ -33,6 +33,8 @@ use common_expression::TableSchema; use common_expression::TableSchemaRef; use common_expression::TopKSorter; use common_functions::BUILTIN_FUNCTIONS; +use common_storage::metrics::common::metrics_inc_omit_filter_rowgroups; +use common_storage::metrics::common::metrics_inc_omit_filter_rows; use futures::StreamExt; use opendal::Operator; use opendal::Reader; @@ -242,30 +244,41 @@ impl ParquetRSReader { .with_projection(self.projection.clone()) .with_batch_size(self.batch_size); - // Prune row groups. - let file_meta = builder.metadata(); + let mut full_match = false; + + let file_meta = builder.metadata().clone(); + // Prune row groups. if let Some(pruner) = &self.pruner { - let selected_row_groups = pruner.prune_row_groups(file_meta, None)?; - let row_selection = pruner.prune_pages(file_meta, &selected_row_groups)?; + let (selected_row_groups, omits) = pruner.prune_row_groups(&file_meta, None)?; + full_match = omits.iter().all(|x| *x); + builder = builder.with_row_groups(selected_row_groups.clone()); - builder = builder.with_row_groups(selected_row_groups); - if let Some(row_selection) = row_selection { - builder = builder.with_row_selection(row_selection); + if !full_match { + let row_selection = pruner.prune_pages(&file_meta, &selected_row_groups)?; + + if let Some(row_selection) = row_selection { + builder = builder.with_row_selection(row_selection); + } + } else { + metrics_inc_omit_filter_rowgroups(file_meta.num_row_groups() as u64); + metrics_inc_omit_filter_rows(file_meta.file_metadata().num_rows() as u64); } } - if let Some(predicate) = self.predicate.as_ref() { - let projection = predicate.projection().clone(); - let predicate = predicate.clone(); - let predicate_fn = move |batch| { - predicate - .evaluate(&batch) - .map_err(|e| ArrowError::from_external_error(Box::new(e))) - }; - builder = builder.with_row_filter(RowFilter::new(vec![Box::new( - ArrowPredicateFn::new(projection, predicate_fn), - )])); + if !full_match { + if let Some(predicate) = self.predicate.as_ref() { + let projection = predicate.projection().clone(); + let predicate = predicate.clone(); + let predicate_fn = move |batch| { + predicate + .evaluate(&batch) + .map_err(|e| ArrowError::from_external_error(Box::new(e))) + }; + builder = builder.with_row_filter(RowFilter::new(vec![Box::new( + ArrowPredicateFn::new(projection, predicate_fn), + )])); + } } Ok(builder.build()?) @@ -319,29 +332,40 @@ impl ParquetRSReader { .with_batch_size(self.batch_size); // Prune row groups. - let file_meta = builder.metadata(); + let file_meta = builder.metadata().clone(); + let mut full_match = false; if let Some(pruner) = &self.pruner { - let selected_row_groups = pruner.prune_row_groups(file_meta, None)?; - let row_selection = pruner.prune_pages(file_meta, &selected_row_groups)?; + let (selected_row_groups, omits) = pruner.prune_row_groups(&file_meta, None)?; + + full_match = omits.iter().all(|x| *x); + builder = builder.with_row_groups(selected_row_groups.clone()); - builder = builder.with_row_groups(selected_row_groups); - if let Some(row_selection) = row_selection { - builder = builder.with_row_selection(row_selection); + if !full_match { + let row_selection = pruner.prune_pages(&file_meta, &selected_row_groups)?; + + if let Some(row_selection) = row_selection { + builder = builder.with_row_selection(row_selection); + } + } else { + metrics_inc_omit_filter_rowgroups(file_meta.num_row_groups() as u64); + metrics_inc_omit_filter_rows(file_meta.file_metadata().num_rows() as u64); } } - if let Some(predicate) = self.predicate.as_ref() { - let projection = predicate.projection().clone(); - let predicate = predicate.clone(); - let predicate_fn = move |batch| { - predicate - .evaluate(&batch) - .map_err(|e| ArrowError::from_external_error(Box::new(e))) - }; - builder = builder.with_row_filter(RowFilter::new(vec![Box::new( - ArrowPredicateFn::new(projection, predicate_fn), - )])); + if !full_match { + if let Some(predicate) = self.predicate.as_ref() { + let projection = predicate.projection().clone(); + let predicate = predicate.clone(); + let predicate_fn = move |batch| { + predicate + .evaluate(&batch) + .map_err(|e| ArrowError::from_external_error(Box::new(e))) + }; + builder = builder.with_row_filter(RowFilter::new(vec![Box::new( + ArrowPredicateFn::new(projection, predicate_fn), + )])); + } } let reader = builder.build()?; @@ -385,13 +409,23 @@ impl ParquetRSReader { }); // TODO(parquet): cache deserilaized columns to avoid deserialize multiple times. let mut row_group = InMemoryRowGroup::new(&part.meta, page_locations.as_deref()); + let mut selection = part .selectors .as_ref() .map(|x| x.iter().map(RowSelector::from).collect::>()) .map(RowSelection::from); - if let Some(predicate) = &self.predicate { + let mut predicate = self.predicate.as_ref(); + if part.omit_filter { + predicate = None; + selection = None; + + metrics_inc_omit_filter_rowgroups(1); + metrics_inc_omit_filter_rows(row_group.row_count() as u64); + } + + if let Some(predicate) = predicate { // Fetch columns used for eval predicate (prewhere). row_group .fetch( diff --git a/src/query/storages/parquet/src/parquet_rs/parquet_reader/row_group.rs b/src/query/storages/parquet/src/parquet_rs/parquet_reader/row_group.rs index e95d0c849345..0ac6d50e2288 100644 --- a/src/query/storages/parquet/src/parquet_rs/parquet_reader/row_group.rs +++ b/src/query/storages/parquet/src/parquet_rs/parquet_reader/row_group.rs @@ -109,6 +109,10 @@ impl<'a> InMemoryRowGroup<'a> { } } + pub fn row_count(&self) -> usize { + self.row_count + } + /// Fetches the necessary column data into memory /// /// If call `fetch` multiple times, it will only fetch the data that has not been fetched. diff --git a/src/query/storages/parquet/src/parquet_rs/parquet_table/partition.rs b/src/query/storages/parquet/src/parquet_rs/parquet_table/partition.rs index 54c33d98d549..546b019564cb 100644 --- a/src/query/storages/parquet/src/parquet_rs/parquet_table/partition.rs +++ b/src/query/storages/parquet/src/parquet_rs/parquet_table/partition.rs @@ -356,11 +356,16 @@ fn prune_and_generate_partitions( .. } = meta.as_ref(); part_stats.partitions_total += meta.num_row_groups(); - let rgs = pruner.prune_row_groups(meta, row_group_level_stats.as_deref())?; - let mut row_selections = pruner.prune_pages(meta, &rgs)?; + let (rgs, omits) = pruner.prune_row_groups(meta, row_group_level_stats.as_deref())?; + let mut row_selections = if omits.iter().all(|x| *x) { + None + } else { + pruner.prune_pages(meta, &rgs)? + }; + let mut rows_read = 0; // Rows read in current file. - for rg in rgs { + for (rg, omit) in rgs.into_iter().zip(omits.into_iter()) { let rg_meta = meta.row_group(rg); let num_rows = rg_meta.num_rows() as usize; // Split rows belonging to current row group. @@ -412,6 +417,7 @@ fn prune_and_generate_partitions( compressed_size, uncompressed_size, sort_min_max, + omit_filter: omit, }); } diff --git a/src/query/storages/parquet/src/parquet_rs/partition.rs b/src/query/storages/parquet/src/parquet_rs/partition.rs index 1e152583b4be..517398585d83 100644 --- a/src/query/storages/parquet/src/parquet_rs/partition.rs +++ b/src/query/storages/parquet/src/parquet_rs/partition.rs @@ -100,6 +100,7 @@ pub struct ParquetRSRowGroupPart { pub uncompressed_size: u64, pub compressed_size: u64, pub sort_min_max: Option<(Scalar, Scalar)>, + pub omit_filter: bool, } impl Eq for ParquetRSRowGroupPart {} diff --git a/src/query/storages/parquet/src/parquet_rs/pruning.rs b/src/query/storages/parquet/src/parquet_rs/pruning.rs index 6210085c302e..ffcda9935aeb 100644 --- a/src/query/storages/parquet/src/parquet_rs/pruning.rs +++ b/src/query/storages/parquet/src/parquet_rs/pruning.rs @@ -38,7 +38,10 @@ use crate::parquet_rs::statistics::convert_index_to_column_statistics; /// We can use this pruner to compute row groups and pages to skip. pub struct ParquetRSPruner { leaf_fields: Arc>, - range_pruner: Option>, + range_pruner: Option<( + Arc, + Arc, + )>, prune_row_groups: bool, prune_pages: bool, @@ -55,16 +58,19 @@ impl ParquetRSPruner { options: ParquetReadOptions, ) -> Result { // Build `RangePruner` by `filter`. - let filter = push_down - .as_ref() - .and_then(|p| p.filter.as_ref().map(|f| f.as_expr(&BUILTIN_FUNCTIONS))); + let filter = push_down.as_ref().and_then(|p| p.filters.as_ref()); let mut predicate_columns = vec![]; let range_pruner = if filter.is_some() && (options.prune_row_groups() || options.prune_pages()) { - predicate_columns = filter + let filter_expr = filter.as_ref().unwrap().filter.as_expr(&BUILTIN_FUNCTIONS); + let inverted_filter_expr = filter .as_ref() .unwrap() + .inverted_filter + .as_expr(&BUILTIN_FUNCTIONS); + + predicate_columns = filter_expr .column_refs() .into_keys() .map(|name| { @@ -75,8 +81,11 @@ impl ParquetRSPruner { }) .collect::>(); predicate_columns.sort(); - let pruner = RangePrunerCreator::try_create(func_ctx, &schema, filter.as_ref())?; - Some(pruner) + let pruner = + RangePrunerCreator::try_create(func_ctx.clone(), &schema, Some(&filter_expr))?; + let inverted_pruner = + RangePrunerCreator::try_create(func_ctx, &schema, Some(&inverted_filter_expr))?; + Some((pruner, inverted_pruner)) } else { None }; @@ -92,28 +101,36 @@ impl ParquetRSPruner { /// Prune row groups of a parquet file. /// - /// Return the selected row groups' indices in the meta. + /// Return the selected row groups' indices in the meta and omit filter flags. /// /// If `stats` is not [None], we use this statistics to prune but not collect again. pub fn prune_row_groups( &self, meta: &ParquetMetaData, stats: Option<&[StatisticsOfColumns]>, - ) -> Result> { + ) -> Result<(Vec, Vec)> { + let default_selection = (0..meta.num_row_groups()).collect(); + let default_omits = vec![false; meta.num_row_groups()]; if !self.prune_row_groups { - return Ok((0..meta.num_row_groups()).collect()); + return Ok((default_selection, default_omits)); } + match &self.range_pruner { - None => Ok((0..meta.num_row_groups()).collect()), - Some(pruner) => { + None => Ok((default_selection, default_omits)), + + Some((pruner, inverted_pruner)) => { let mut selection = Vec::with_capacity(meta.num_row_groups()); + let mut omits = Vec::with_capacity(meta.num_row_groups()); if let Some(row_group_stats) = stats { for (i, row_group) in row_group_stats.iter().enumerate() { if pruner.should_keep(row_group, None) { selection.push(i); + + let omit = !inverted_pruner.should_keep(row_group, None); + omits.push(omit); } } - Ok(selection) + Ok((selection, omits)) } else if let Some(row_group_stats) = collect_row_group_stats( meta.row_groups(), &self.leaf_fields, @@ -122,11 +139,14 @@ impl ParquetRSPruner { for (i, row_group) in row_group_stats.iter().enumerate() { if pruner.should_keep(row_group, None) { selection.push(i); + + let omit = !inverted_pruner.should_keep(row_group, None); + omits.push(omit); } } - Ok(selection) + Ok((selection, omits)) } else { - Ok((0..meta.num_row_groups()).collect()) + Ok((default_selection, default_omits)) } } } @@ -145,7 +165,7 @@ impl ParquetRSPruner { } match &self.range_pruner { None => Ok(None), - Some(pruner) => { + Some((pruner, _)) => { // Only if the file has page level statistics, we can use them to prune. if meta.column_index().is_none() || meta.offset_index().is_none() { return Ok(None); diff --git a/src/query/storages/system/src/columns_table.rs b/src/query/storages/system/src/columns_table.rs index 1dd15dcde79a..6e3e9f7a4ca3 100644 --- a/src/query/storages/system/src/columns_table.rs +++ b/src/query/storages/system/src/columns_table.rs @@ -155,7 +155,7 @@ impl ColumnsTable { let mut databases = Vec::new(); if let Some(push_downs) = push_downs { - if let Some(filter) = push_downs.filter { + if let Some(filter) = push_downs.filters.as_ref().map(|f| &f.filter) { let expr = filter.as_expr(&BUILTIN_FUNCTIONS); find_eq_filter(&expr, &mut |col_name, scalar| { if col_name == "database" { diff --git a/src/query/storages/system/src/tables_table.rs b/src/query/storages/system/src/tables_table.rs index cf1dc720389b..82282abfe815 100644 --- a/src/query/storages/system/src/tables_table.rs +++ b/src/query/storages/system/src/tables_table.rs @@ -124,7 +124,7 @@ where TablesTable: HistoryAware let mut dbs = Vec::new(); if let Some(push_downs) = &push_downs { let mut db_name = Vec::new(); - if let Some(filter) = &push_downs.filter { + if let Some(filter) = push_downs.filters.as_ref().map(|f| &f.filter) { let expr = filter.as_expr(&BUILTIN_FUNCTIONS); find_eq_filter(&expr, &mut |col_name, scalar| { if col_name == "database" { diff --git a/tests/sqllogictests/suites/base/03_common/03_0025_delete_from b/tests/sqllogictests/suites/base/03_common/03_0025_delete_from index 6804f58a1cf1..692e4f549964 100644 --- a/tests/sqllogictests/suites/base/03_common/03_0025_delete_from +++ b/tests/sqllogictests/suites/base/03_common/03_0025_delete_from @@ -114,6 +114,17 @@ select count(*) = 0 from t ---- 1 +statement ok +insert into t values (1), (NULL) + +statement ok +delete from t where c >= 1 + + +query T +select * from t +---- +NULL statement ok