From 3bea2b3b1600672df80ccec6fd1e6001d21c5170 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Mon, 23 Dec 2024 14:43:02 +0800 Subject: [PATCH 01/14] chore(query): add fuzz aggregate tests --- src/query/expression/src/values.rs | 65 ++++++++++++++----- .../common/table_option_validation.rs | 4 ++ .../common/table_meta/src/table/table_keys.rs | 2 + src/query/storages/random/src/random_table.rs | 54 +++++++++++---- .../19_fuzz/19_0001_fuzz_aggregate.result | 0 .../19_fuzz/19_0001_fuzz_aggregate.sh | 50 ++++++++++++++ 6 files changed, 144 insertions(+), 31 deletions(-) create mode 100755 tests/suites/0_stateless/19_fuzz/19_0001_fuzz_aggregate.result create mode 100755 tests/suites/0_stateless/19_fuzz/19_0001_fuzz_aggregate.sh diff --git a/src/query/expression/src/values.rs b/src/query/expression/src/values.rs index 4a30ae2fbbd8..545e9a5342f2 100755 --- a/src/query/expression/src/values.rs +++ b/src/query/expression/src/values.rs @@ -173,6 +173,23 @@ pub enum Column { Geography(GeographyColumn), } +#[derive(Clone, Debug, PartialEq)] +pub struct RandomOptions { + pub seed: Option, + pub max_string_len: usize, + pub max_array_len: usize, +} + +impl Default for RandomOptions { + fn default() -> Self { + RandomOptions { + seed: None, + max_string_len: 5, + max_array_len: 3, + } + } +} + #[derive(Clone, EnumAsInner, Debug, PartialEq)] pub enum ColumnVec { Null, @@ -1196,15 +1213,21 @@ impl Column { } } - pub fn random(ty: &DataType, len: usize, seed: Option) -> Self { + pub fn random(ty: &DataType, len: usize, options: Option) -> Self { use rand::distributions::Alphanumeric; use rand::rngs::SmallRng; use rand::Rng; use rand::SeedableRng; - let mut rng = match seed { - None => SmallRng::from_entropy(), - Some(seed) => SmallRng::seed_from_u64(seed), + let mut rng = match &options { + Some(RandomOptions { + seed: Some(seed), .. + }) => SmallRng::seed_from_u64(*seed), + _ => SmallRng::from_entropy(), }; + + let max_string_len = options.as_ref().map(|opt| opt.max_string_len).unwrap_or(5); + let max_arr_len = options.as_ref().map(|opt| opt.max_array_len).unwrap_or(3); + match ty { DataType::Null => Column::Null { len }, DataType::EmptyArray => Column::EmptyArray { len }, @@ -1215,13 +1238,16 @@ impl Column { DataType::Binary => BinaryType::from_data( (0..len) .map(|_| { - let rng = match seed { - None => SmallRng::from_entropy(), - Some(seed) => SmallRng::seed_from_u64(seed), + let mut rng = match &options { + Some(RandomOptions { + seed: Some(seed), .. + }) => SmallRng::seed_from_u64(*seed), + _ => SmallRng::from_entropy(), }; + let str_len = rng.gen_range(0..=max_string_len); rng.sample_iter(&Alphanumeric) // randomly generate 5 characters. - .take(5) + .take(str_len) .map(u8::from) .collect::>() }) @@ -1230,13 +1256,16 @@ impl Column { DataType::String => StringType::from_data( (0..len) .map(|_| { - let rng = match seed { - None => SmallRng::from_entropy(), - Some(seed) => SmallRng::seed_from_u64(seed), + let mut rng = match &options { + Some(RandomOptions { + seed: Some(seed), .. + }) => SmallRng::seed_from_u64(*seed), + _ => SmallRng::from_entropy(), }; + let str_len = rng.gen_range(0..=max_string_len); rng.sample_iter(&Alphanumeric) // randomly generate 5 characters. - .take(5) + .take(str_len) .map(char::from) .collect::() }) @@ -1277,7 +1306,7 @@ impl Column { ), DataType::Interval => unimplemented!(), DataType::Nullable(ty) => NullableColumn::new_column( - Column::random(ty, len, seed), + Column::random(ty, len, options), Bitmap::from((0..len).map(|_| rng.gen_bool(0.5)).collect::>()), ), DataType::Array(inner_ty) => { @@ -1285,11 +1314,11 @@ impl Column { let mut offsets: Vec = Vec::with_capacity(len + 1); offsets.push(0); for _ in 0..len { - inner_len += rng.gen_range(0..=3); + inner_len += rng.gen_range(0..=max_arr_len) as u64; offsets.push(inner_len); } Column::Array(Box::new(ArrayColumn { - values: Column::random(inner_ty, inner_len as usize, seed), + values: Column::random(inner_ty, inner_len as usize, options), offsets: offsets.into(), })) } @@ -1298,11 +1327,11 @@ impl Column { let mut offsets: Vec = Vec::with_capacity(len + 1); offsets.push(0); for _ in 0..len { - inner_len += rng.gen_range(0..=3); + inner_len += rng.gen_range(0..=max_arr_len) as u64; offsets.push(inner_len); } Column::Map(Box::new(ArrayColumn { - values: Column::random(inner_ty, inner_len as usize, seed), + values: Column::random(inner_ty, inner_len as usize, options), offsets: offsets.into(), })) } @@ -1321,7 +1350,7 @@ impl Column { DataType::Tuple(fields) => { let fields = fields .iter() - .map(|ty| Column::random(ty, len, seed)) + .map(|ty| Column::random(ty, len, options.clone())) .collect::>(); Column::Tuple(fields) } diff --git a/src/query/service/src/interpreters/common/table_option_validation.rs b/src/query/service/src/interpreters/common/table_option_validation.rs index b16132635d8c..548039d8494f 100644 --- a/src/query/service/src/interpreters/common/table_option_validation.rs +++ b/src/query/service/src/interpreters/common/table_option_validation.rs @@ -39,6 +39,8 @@ use databend_storages_common_table_meta::table::OPT_KEY_CONNECTION_NAME; use databend_storages_common_table_meta::table::OPT_KEY_DATABASE_ID; use databend_storages_common_table_meta::table::OPT_KEY_ENGINE; use databend_storages_common_table_meta::table::OPT_KEY_LOCATION; +use databend_storages_common_table_meta::table::OPT_KEY_RANDOM_MAX_ARRAY_LEN; +use databend_storages_common_table_meta::table::OPT_KEY_RANDOM_MAX_STRING_LEN; use databend_storages_common_table_meta::table::OPT_KEY_RANDOM_SEED; use databend_storages_common_table_meta::table::OPT_KEY_STORAGE_FORMAT; use databend_storages_common_table_meta::table::OPT_KEY_TABLE_COMPRESSION; @@ -84,6 +86,8 @@ pub static CREATE_RANDOM_OPTIONS: LazyLock> = LazyLock::ne let mut r = HashSet::new(); r.insert(OPT_KEY_ENGINE); r.insert(OPT_KEY_RANDOM_SEED); + r.insert(OPT_KEY_RANDOM_MAX_STRING_LEN); + r.insert(OPT_KEY_RANDOM_MAX_ARRAY_LEN); r }); diff --git a/src/query/storages/common/table_meta/src/table/table_keys.rs b/src/query/storages/common/table_meta/src/table/table_keys.rs index 11963947ce2a..1d466df03d5a 100644 --- a/src/query/storages/common/table_meta/src/table/table_keys.rs +++ b/src/query/storages/common/table_meta/src/table/table_keys.rs @@ -50,6 +50,8 @@ pub const OPT_KEY_ENGINE_META: &str = "engine_meta"; pub const OPT_KEY_LEGACY_SNAPSHOT_LOC: &str = "snapshot_loc"; // the following are used in for random engine pub const OPT_KEY_RANDOM_SEED: &str = "seed"; +pub const OPT_KEY_RANDOM_MAX_STRING_LEN: &str = "max_string_len"; +pub const OPT_KEY_RANDOM_MAX_ARRAY_LEN: &str = "max_array_len"; pub const OPT_KEY_CLUSTER_TYPE: &str = "cluster_type"; pub const LINEAR_CLUSTER_TYPE: &str = "linear"; diff --git a/src/query/storages/random/src/random_table.rs b/src/query/storages/random/src/random_table.rs index 43b8d30b4a48..38b584ca84cf 100644 --- a/src/query/storages/random/src/random_table.rs +++ b/src/query/storages/random/src/random_table.rs @@ -29,6 +29,7 @@ use databend_common_expression::types::DataType; use databend_common_expression::BlockEntry; use databend_common_expression::Column; use databend_common_expression::DataBlock; +use databend_common_expression::RandomOptions; use databend_common_expression::TableSchemaRef; use databend_common_expression::Value; use databend_common_meta_app::schema::TableInfo; @@ -44,16 +45,29 @@ use crate::RandomPartInfo; pub struct RandomTable { table_info: TableInfo, - seed: Option, + random_options: RandomOptions, } impl RandomTable { pub fn try_create(table_info: TableInfo) -> Result> { - let seed = match table_info.meta.options.get(OPT_KEY_RANDOM_SEED) { - None => None, - Some(seed_str) => Some(seed_str.parse::()?), - }; - Ok(Box::new(Self { table_info, seed })) + let mut random_options = RandomOptions::default(); + if let Some(seed_str) = table_info.meta.options.get(OPT_KEY_RANDOM_SEED) { + let seed = seed_str.parse::()?; + random_options.seed = Some(seed); + } + + if let Some(s) = table_info.meta.options.get("max_string_len") { + random_options.max_string_len = s.parse::()?; + } + + if let Some(s) = table_info.meta.options.get("max_array_len") { + random_options.max_array_len = s.parse::()?; + } + + Ok(Box::new(Self { + table_info, + random_options, + })) } pub fn description() -> StorageDescription { @@ -131,7 +145,7 @@ impl Table for RandomTable { .iter() .map(|f| { let data_type: DataType = f.data_type().into(); - let column = Column::random(&data_type, 1, self.seed); + let column = Column::random(&data_type, 1, Some(self.random_options.clone())); BlockEntry::new(data_type.clone(), Value::Column(column)) }) .collect::>(); @@ -188,7 +202,7 @@ impl Table for RandomTable { output, output_schema.clone(), parts.rows, - self.seed, + self.random_options.clone(), )?, ); } @@ -197,7 +211,13 @@ impl Table for RandomTable { let output = OutputPort::create(); builder.add_source( output.clone(), - RandomSource::create(ctx.clone(), output, output_schema, 0, self.seed)?, + RandomSource::create( + ctx.clone(), + output, + output_schema, + 0, + self.random_options.clone(), + )?, ); } @@ -210,7 +230,7 @@ struct RandomSource { schema: TableSchemaRef, /// how many rows are needed to generate rows: usize, - seed: Option, + random_options: RandomOptions, } impl RandomSource { @@ -219,9 +239,13 @@ impl RandomSource { output: Arc, schema: TableSchemaRef, rows: usize, - seed: Option, + random_options: RandomOptions, ) -> Result { - SyncSourcer::create(ctx, output, RandomSource { schema, rows, seed }) + SyncSourcer::create(ctx, output, RandomSource { + schema, + rows, + random_options, + }) } } @@ -240,7 +264,11 @@ impl SyncSource for RandomSource { .iter() .map(|f| { let data_type = f.data_type().into(); - let value = Value::Column(Column::random(&data_type, self.rows, self.seed)); + let value = Value::Column(Column::random( + &data_type, + self.rows, + Some(self.random_options.clone()), + )); BlockEntry::new(data_type, value) }) .collect(); diff --git a/tests/suites/0_stateless/19_fuzz/19_0001_fuzz_aggregate.result b/tests/suites/0_stateless/19_fuzz/19_0001_fuzz_aggregate.result new file mode 100755 index 000000000000..e69de29bb2d1 diff --git a/tests/suites/0_stateless/19_fuzz/19_0001_fuzz_aggregate.sh b/tests/suites/0_stateless/19_fuzz/19_0001_fuzz_aggregate.sh new file mode 100755 index 000000000000..3d02c27cce4e --- /dev/null +++ b/tests/suites/0_stateless/19_fuzz/19_0001_fuzz_aggregate.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../../../shell_env.sh + + +rows=1000 + + +echo """ +create or replace table agg_fuzz(a int, b string, c bool, d int, e Decimal(15, 2), f Decimal(39,2)); +create or replace table agg_fuzz_r like agg_fuzz Engine = Random max_string_len = 3 max_array_len = 2; +""" | $BENDSQL_CLIENT_OUTPUT_NULL + + +echo """ +insert into agg_fuzz select * from agg_fuzz_r limit ${rows}; +insert into agg_fuzz select * from agg_fuzz_r limit ${rows}; +insert into agg_fuzz select * from agg_fuzz_r limit ${rows}; +insert into agg_fuzz select * from agg_fuzz_r limit ${rows}; +insert into agg_fuzz select * from agg_fuzz_r limit ${rows}; +""" | $BENDSQL_CLIENT_OUTPUT_NULL + +for m in `seq 1 3 10`; do + echo """create or replace table agg_fuzz_result1 as select a, sum(d) d , sum(e) e, sum(f) f from ( +select a, c, sum(d) d , sum(e) e, sum(f) f from ( +select a % ${m} a, b, c, sum(d) d , sum(e) e, sum(f) f from agg_fuzz group by all +) group by all +) group by all; +""" | $BENDSQL_CLIENT_OUTPUT_NULL + + echo """create or replace table agg_fuzz_result2 as select a, sum(d) d , sum(e) e, sum(f) f from ( +select a, b, sum(d) d , sum(e) e, sum(f) f from ( +select a % ${m} a, b, c, sum(d) d , sum(e) e, sum(f) f from agg_fuzz group by all +) group by all +) group by all; +""" | $BENDSQL_CLIENT_OUTPUT_NULL + + echo "RESULT--${m}" + ## judge the result are same + echo """ + SELECT * FROM agg_fuzz_result1 + EXCEPT + SELECT * FROM agg_fuzz_result2 + UNION ALL + SELECT * FROM agg_fuzz_result2 + EXCEPT + SELECT * FROM agg_fuzz_result1; + """ | $BENDSQL_CLIENT_CONNECT +done From a388fc8f1a7a07dca8fa2c16b353c80adc434802 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Mon, 23 Dec 2024 17:34:44 +0800 Subject: [PATCH 02/14] chore(query): fix union projection --- src/query/service/src/schedulers/scheduler.rs | 10 +++- .../09_0005_remote_insert_into_select.test | 6 +-- .../09_fuse_engine/09_0029_random_fuzz.test | 16 +++--- tests/sqllogictests/suites/query/union.test | 27 ++++++++++ .../19_fuzz/19_0001_fuzz_aggregate.result | 7 +++ .../19_fuzz/19_0001_fuzz_aggregate.sh | 53 ++++++++++++------- .../0_stateless/19_fuzz/19_0002_fuzz_join.sh | 22 ++++++++ 7 files changed, 110 insertions(+), 31 deletions(-) create mode 100755 tests/suites/0_stateless/19_fuzz/19_0002_fuzz_join.sh diff --git a/src/query/service/src/schedulers/scheduler.rs b/src/query/service/src/schedulers/scheduler.rs index 5ab76f18a47e..44879c53768a 100644 --- a/src/query/service/src/schedulers/scheduler.rs +++ b/src/query/service/src/schedulers/scheduler.rs @@ -47,7 +47,15 @@ pub async fn build_query_pipeline( ignore_result: bool, ) -> Result { let mut build_res = build_query_pipeline_without_render_result_set(ctx, plan).await?; - if matches!(plan, PhysicalPlan::UnionAll { .. }) { + let input_schema = plan.output_schema()?; + + if matches!(plan, PhysicalPlan::UnionAll { .. }) + && result_columns.len() == input_schema.num_fields() + && result_columns + .iter() + .zip(input_schema.fields().iter()) + .all(|(r, f)| format!("{}", r.index).as_str() == f.name().as_str()) + { // Union doesn't need to add extra processor to project the result. // It will be handled in union processor. if ignore_result { diff --git a/tests/sqllogictests/suites/base/09_fuse_engine/09_0005_remote_insert_into_select.test b/tests/sqllogictests/suites/base/09_fuse_engine/09_0005_remote_insert_into_select.test index 48b5d929fb5d..d27bb975cb1c 100644 --- a/tests/sqllogictests/suites/base/09_fuse_engine/09_0005_remote_insert_into_select.test +++ b/tests/sqllogictests/suites/base/09_fuse_engine/09_0005_remote_insert_into_select.test @@ -75,12 +75,12 @@ statement ok create table t2 like t1 engine = Random; statement ok -insert into t1 select repeat(a,500) from t2 limit 4; +insert into t1 select repeat(a, 500) from t2 limit 10; query I -select count() from fuse_block('db_09_0005', 't1'); +select count() > 1 from fuse_block('db_09_0005', 't1'); ---- -4 +1 statement ok DROP TABLE t1 ALL diff --git a/tests/sqllogictests/suites/base/09_fuse_engine/09_0029_random_fuzz.test b/tests/sqllogictests/suites/base/09_fuse_engine/09_0029_random_fuzz.test index 39d3b5f94fd7..601231112a5c 100644 --- a/tests/sqllogictests/suites/base/09_fuse_engine/09_0029_random_fuzz.test +++ b/tests/sqllogictests/suites/base/09_fuse_engine/09_0029_random_fuzz.test @@ -7,43 +7,43 @@ drop table if exists t1 statement ok -create table rand(name String not null, id int, flag bool) Engine = random +create table rand(name String not null, id int, flag bool) Engine = random max_string_len = 5 statement ok create table t1 engine=fuse row_per_block = $RAND_30000_100000 as select * from rand limit $RAND_100000_300000 query III -select max(length(name)) = 5, min(length(name)) = 5, count(name) >= 100000 from t1 +select min(length(name)) <= 5, max(length(name)) <= 5, count(name) >= 100000 from t1 ---- 1 1 1 query III -select a is null or a = 5, b is null or b = 5 from (select max(length(name)) as a, min(length(name)) as b from t1 where id % 5 = 0) +select a is null or (a <= 5), b is null or (b <= 5) from (select min(length(name)) as a, max(length(name)) as b from t1 where id % 5 = 0) ---- 1 1 query III -select a is null or a = 5, b is null or b = 5 from (select max(length(name)) as a, min(length(name)) as b from t1 where id % 5 = 1) +select a is null or (a <= 5), b is null or (b <= 5) from (select min(length(name)) as a, max(length(name)) as b from t1 where id % 5 = 1) ---- 1 1 query III -select a is null or a = 5, b is null or b = 5 from (select max(length(name)) as a, min(length(name)) as b from t1 where id % 5 = 2) +select a is null or (a <= 5), b is null or (b <= 5) from (select min(length(name)) as a, max(length(name)) as b from t1 where id % 5 = 2) ---- 1 1 query III -select a is null or a = 5, b is null or b = 5 from (select max(length(name)) as a, min(length(name)) as b from t1 where id % 5 = 3) +select a is null or (a <= 5), b is null or (b <= 5) from (select min(length(name)) as a, max(length(name)) as b from t1 where id % 5 = 3) ---- 1 1 query III -select a is null or a = 5, b is null or b = 5 from (select max(length(name)) as a, min(length(name)) as b from t1 where id % 5 = 4) +select a is null or (a <= 5), b is null or (b <= 5) from (select min(length(name)) as a, max(length(name)) as b from t1 where id % 5 = 4) ---- 1 1 query III -select a is null or a = 5, b is null or b = 5 from (select max(length(name)) as a, min(length(name)) as b from t1 where id % 5 = 5) +select a is null or (a <= 5), b is null or (b <= 5) from (select min(length(name)) as a, max(length(name)) as b from t1 where id % 5 = 5) ---- 1 1 diff --git a/tests/sqllogictests/suites/query/union.test b/tests/sqllogictests/suites/query/union.test index 22d8099e718d..ed61084fe371 100644 --- a/tests/sqllogictests/suites/query/union.test +++ b/tests/sqllogictests/suites/query/union.test @@ -323,6 +323,30 @@ query I select * from numbers(100) union all select * from numbers(100) ignore_result; ---- +statement ok +create or replace table t1 (a int, b int); + +statement ok +create or replace table t2 (a int, b int); + +statement ok +insert into t1 values (1, 2), (2, 3); + +statement ok +insert into t2 values (1, 2), (2, 3); + +query II +select a, b from (select * from t1 where a>1 union all select * from t2 where b>2); +---- +2 3 +2 3 + +query II +select b,a from (select * from t1 where a>1 union all select * from t2 where b>2); +---- +3 2 +3 2 + statement ok create or replace table t1 (test bigint); @@ -336,3 +360,6 @@ WITH cte1 AS ( SELECT t1.test FROM t1 WHERE t1.test = 0 ) ,cte2 AS ( SELECT cte1 statement ok drop table t1; + +statement ok +drop table t2; diff --git a/tests/suites/0_stateless/19_fuzz/19_0001_fuzz_aggregate.result b/tests/suites/0_stateless/19_fuzz/19_0001_fuzz_aggregate.result index e69de29bb2d1..3cda1d50b3ab 100755 --- a/tests/suites/0_stateless/19_fuzz/19_0001_fuzz_aggregate.result +++ b/tests/suites/0_stateless/19_fuzz/19_0001_fuzz_aggregate.result @@ -0,0 +1,7 @@ +Eq +Eq +Eq +Eq +Eq +Eq +Eq diff --git a/tests/suites/0_stateless/19_fuzz/19_0001_fuzz_aggregate.sh b/tests/suites/0_stateless/19_fuzz/19_0001_fuzz_aggregate.sh index 3d02c27cce4e..f46e5252ae39 100755 --- a/tests/suites/0_stateless/19_fuzz/19_0001_fuzz_aggregate.sh +++ b/tests/suites/0_stateless/19_fuzz/19_0001_fuzz_aggregate.sh @@ -4,11 +4,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../../../shell_env.sh -rows=1000 +rows=100000 echo """ -create or replace table agg_fuzz(a int, b string, c bool, d int, e Decimal(15, 2), f Decimal(39,2)); +create or replace table agg_fuzz(a int, b string, c bool, d variant, e int, f Decimal(15, 2), g Decimal(39,2)); create or replace table agg_fuzz_r like agg_fuzz Engine = Random max_string_len = 3 max_array_len = 2; """ | $BENDSQL_CLIENT_OUTPUT_NULL @@ -21,30 +21,45 @@ insert into agg_fuzz select * from agg_fuzz_r limit ${rows}; insert into agg_fuzz select * from agg_fuzz_r limit ${rows}; """ | $BENDSQL_CLIENT_OUTPUT_NULL -for m in `seq 1 3 10`; do - echo """create or replace table agg_fuzz_result1 as select a, sum(d) d , sum(e) e, sum(f) f from ( -select a, c, sum(d) d , sum(e) e, sum(f) f from ( -select a % ${m} a, b, c, sum(d) d , sum(e) e, sum(f) f from agg_fuzz group by all +for m in `seq 1 3 20`; do + echo """create or replace table agg_fuzz_result1 as select a, sum(e) e, sum(f) f, sum(g) g from ( + select a, d, sum(e) e, sum(f) f, sum(g) g from ( +select a, c, d, sum(e) e, sum(f) f, sum(g) g from ( +select a % ${m} a, b, c, d, sum(e) e, sum(f) f, sum(g) g from agg_fuzz where b >= 'R' group by all +) group by all ) group by all ) group by all; """ | $BENDSQL_CLIENT_OUTPUT_NULL - echo """create or replace table agg_fuzz_result2 as select a, sum(d) d , sum(e) e, sum(f) f from ( -select a, b, sum(d) d , sum(e) e, sum(f) f from ( -select a % ${m} a, b, c, sum(d) d , sum(e) e, sum(f) f from agg_fuzz group by all + echo """create or replace table agg_fuzz_result2 as select a, sum(e) e, sum(f) f, sum(g) g from ( + select a, d, sum(e) e, sum(f) f, sum(g) g from ( +select a, c, d, sum(e) e, sum(f) f, sum(g) g from ( +select a % ${m} a, b, c, d, sum(e) e, sum(f) f, sum(g) g from agg_fuzz where b >= 'R' group by all +) group by all ) group by all ) group by all; """ | $BENDSQL_CLIENT_OUTPUT_NULL - echo "RESULT--${m}" - ## judge the result are same + ## judge the result are same, except has bugs + # echo """ + # SELECT * FROM agg_fuzz_result1 + # EXCEPT + # SELECT * FROM agg_fuzz_result2 + # UNION ALL + # SELECT * FROM agg_fuzz_result2 + # EXCEPT + # SELECT * FROM agg_fuzz_result1; + # """ | $BENDSQL_CLIENT_CONNECT + echo """ - SELECT * FROM agg_fuzz_result1 - EXCEPT - SELECT * FROM agg_fuzz_result2 - UNION ALL - SELECT * FROM agg_fuzz_result2 - EXCEPT - SELECT * FROM agg_fuzz_result1; - """ | $BENDSQL_CLIENT_CONNECT + SELECT + CASE + WHEN + (SELECT MD5(STRING_AGG(x.a::STRING, '') || STRING_AGG(x.e::STRING, '') || STRING_AGG(x.f::STRING, '') || STRING_AGG(x.g::STRING, '')) FROM (SELECT * FROM agg_fuzz_result1 ORDER BY a, e, f, g) x) = + (SELECT MD5(STRING_AGG(y.a::STRING, '') || STRING_AGG(y.e::STRING, '') || STRING_AGG(y.f::STRING, '') || STRING_AGG(y.g::STRING, '')) FROM (SELECT * FROM agg_fuzz_result1 ORDER BY a, e, f, g) y) + THEN 'Eq' + ELSE 'NotEq' + END AS comparison_result; +""" | $BENDSQL_CLIENT_CONNECT + done diff --git a/tests/suites/0_stateless/19_fuzz/19_0002_fuzz_join.sh b/tests/suites/0_stateless/19_fuzz/19_0002_fuzz_join.sh new file mode 100755 index 000000000000..e47efaea6998 --- /dev/null +++ b/tests/suites/0_stateless/19_fuzz/19_0002_fuzz_join.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../../../shell_env.sh + +## TODO after except join get correct result + +## fuzz join +## ideas +# The logical relationships of JOIN: + +# 1. The relationship between LEFT JOIN and RIGHT JOIN: +# - All rows in LEFT JOIN should appear in FULL OUTER JOIN. +# - All rows in RIGHT JOIN should also appear in FULL OUTER JOIN. +# - FULL OUTER JOIN is the union of LEFT JOIN and RIGHT JOIN. + +# 2. The complementarity of LEFT JOIN and RIGHT JOIN: +# - If certain rows in the result of LEFT JOIN have NULLs in the right table, these rows should have no corresponding entries in RIGHT JOIN. +# - Similarly, if certain rows in the result of RIGHT JOIN have NULLs in the left table, these rows should have no corresponding entries in LEFT JOIN. + +# 3. Verifying the decomposability of FULL OUTER JOIN: +# - FULL OUTER JOIN = LEFT JOIN + RIGHT JOIN - overlapping parts. From e2be6033da9bf7d2afb62f50e21de76518deae3f Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Mon, 23 Dec 2024 18:12:34 +0800 Subject: [PATCH 03/14] chore(query): add fuzz union --- .../19_fuzz/19_0003_fuzz_union.result | 21 +++++++ .../0_stateless/19_fuzz/19_0003_fuzz_union.sh | 56 +++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 tests/suites/0_stateless/19_fuzz/19_0003_fuzz_union.result create mode 100755 tests/suites/0_stateless/19_fuzz/19_0003_fuzz_union.sh diff --git a/tests/suites/0_stateless/19_fuzz/19_0003_fuzz_union.result b/tests/suites/0_stateless/19_fuzz/19_0003_fuzz_union.result new file mode 100644 index 000000000000..6ee20096dc35 --- /dev/null +++ b/tests/suites/0_stateless/19_fuzz/19_0003_fuzz_union.result @@ -0,0 +1,21 @@ +Eq +Eq +Eq +Eq +Eq +Eq +Eq +Eq +Eq +Eq +Eq +Eq +Eq +Eq +Eq +Eq +Eq +Eq +Eq +Eq +Eq diff --git a/tests/suites/0_stateless/19_fuzz/19_0003_fuzz_union.sh b/tests/suites/0_stateless/19_fuzz/19_0003_fuzz_union.sh new file mode 100755 index 000000000000..7d9a8ec175ce --- /dev/null +++ b/tests/suites/0_stateless/19_fuzz/19_0003_fuzz_union.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../../../shell_env.sh + + +rows=1000 + + +echo """ +create or replace table union_fuzz(a int, b string, c bool, d variant, e int, f Decimal(15, 2), g Decimal(39,2), h Array(String), i Array(Decimal(15, 2))); +create or replace table union_fuzz2(a int, b string, c bool, d variant, e int, f Decimal(15, 2), g Decimal(39,2), h Array(String), i Array(Decimal(15, 2))); +create or replace table union_fuzz_r like union_fuzz Engine = Random max_string_len = 5 max_array_len = 2; +""" | $BENDSQL_CLIENT_OUTPUT_NULL + + +echo """ +insert into union_fuzz select * from union_fuzz_r limit ${rows}; +insert into union_fuzz select * from union_fuzz_r limit ${rows}; +insert into union_fuzz2 select * from union_fuzz_r limit ${rows}; +insert into union_fuzz2 select * from union_fuzz_r limit ${rows}; +""" | $BENDSQL_CLIENT_OUTPUT_NULL + +fields=(a b c d e f g) + +# 获取数组的长度 +length=${#fields[@]} + +for ((i=0; i<$length; i++)); do + for ((j=i+1; j<$length; j++)); do + x=${fields[$i]} + y=${fields[$j]} + + # x,y + echo """create or replace table union_fuzz_result1 as + select $x, $y from union_fuzz union all select $x, $y from union_fuzz2; +""" | $BENDSQL_CLIENT_OUTPUT_NULL + + echo """create or replace table union_fuzz_result2 as + select $y, $x from (select $x, $y from union_fuzz2 union all select $x, $y from union_fuzz); +""" | $BENDSQL_CLIENT_OUTPUT_NULL + + + echo """ + SELECT + CASE + WHEN + (SELECT MD5(STRING_AGG(x.$x::STRING, '') || STRING_AGG(x.$y::STRING, '')) FROM (select * from union_fuzz_result1 order by $x, $y) x) = + (SELECT MD5(STRING_AGG(y.$x::STRING, '') || STRING_AGG(y.$y::STRING, '')) FROM (select * from union_fuzz_result2 order by $x, $y) y) + THEN 'Eq' + ELSE 'NotEq' + END AS comparison_result; +""" | $BENDSQL_CLIENT_CONNECT + + done +done From b812f57273a06a66eaf942ca482aa452c9edf1b2 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Mon, 23 Dec 2024 22:43:36 +0800 Subject: [PATCH 04/14] chore(query): fix union --- src/query/expression/src/values.rs | 7 +++-- .../common/table_option_validation.rs | 2 ++ .../physical_plans/physical_union_all.rs | 26 +++++++++++-------- src/query/sql/src/planner/binder/select.rs | 22 +++++++++------- .../common/table_meta/src/table/table_keys.rs | 1 + src/query/storages/random/src/random_table.rs | 11 ++++++-- .../cluster/distributed_copy_into_table.test | 2 +- .../cluster/distributed_copy_into_table2.test | 2 +- tests/sqllogictests/suites/query/union.test | 12 +++++++++ 9 files changed, 59 insertions(+), 26 deletions(-) diff --git a/src/query/expression/src/values.rs b/src/query/expression/src/values.rs index 545e9a5342f2..9ce27a459391 100755 --- a/src/query/expression/src/values.rs +++ b/src/query/expression/src/values.rs @@ -176,6 +176,7 @@ pub enum Column { #[derive(Clone, Debug, PartialEq)] pub struct RandomOptions { pub seed: Option, + pub min_string_len: usize, pub max_string_len: usize, pub max_array_len: usize, } @@ -184,6 +185,7 @@ impl Default for RandomOptions { fn default() -> Self { RandomOptions { seed: None, + min_string_len: 0, max_string_len: 5, max_array_len: 3, } @@ -1225,6 +1227,7 @@ impl Column { _ => SmallRng::from_entropy(), }; + let min_string_len = options.as_ref().map(|opt| opt.min_string_len).unwrap_or(0); let max_string_len = options.as_ref().map(|opt| opt.max_string_len).unwrap_or(5); let max_arr_len = options.as_ref().map(|opt| opt.max_array_len).unwrap_or(3); @@ -1244,7 +1247,7 @@ impl Column { }) => SmallRng::seed_from_u64(*seed), _ => SmallRng::from_entropy(), }; - let str_len = rng.gen_range(0..=max_string_len); + let str_len = rng.gen_range(min_string_len..=max_string_len); rng.sample_iter(&Alphanumeric) // randomly generate 5 characters. .take(str_len) @@ -1262,7 +1265,7 @@ impl Column { }) => SmallRng::seed_from_u64(*seed), _ => SmallRng::from_entropy(), }; - let str_len = rng.gen_range(0..=max_string_len); + let str_len = rng.gen_range(min_string_len..=max_string_len); rng.sample_iter(&Alphanumeric) // randomly generate 5 characters. .take(str_len) diff --git a/src/query/service/src/interpreters/common/table_option_validation.rs b/src/query/service/src/interpreters/common/table_option_validation.rs index 548039d8494f..f041c0f70d8c 100644 --- a/src/query/service/src/interpreters/common/table_option_validation.rs +++ b/src/query/service/src/interpreters/common/table_option_validation.rs @@ -41,6 +41,7 @@ use databend_storages_common_table_meta::table::OPT_KEY_ENGINE; use databend_storages_common_table_meta::table::OPT_KEY_LOCATION; use databend_storages_common_table_meta::table::OPT_KEY_RANDOM_MAX_ARRAY_LEN; use databend_storages_common_table_meta::table::OPT_KEY_RANDOM_MAX_STRING_LEN; +use databend_storages_common_table_meta::table::OPT_KEY_RANDOM_MIN_STRING_LEN; use databend_storages_common_table_meta::table::OPT_KEY_RANDOM_SEED; use databend_storages_common_table_meta::table::OPT_KEY_STORAGE_FORMAT; use databend_storages_common_table_meta::table::OPT_KEY_TABLE_COMPRESSION; @@ -86,6 +87,7 @@ pub static CREATE_RANDOM_OPTIONS: LazyLock> = LazyLock::ne let mut r = HashSet::new(); r.insert(OPT_KEY_ENGINE); r.insert(OPT_KEY_RANDOM_SEED); + r.insert(OPT_KEY_RANDOM_MIN_STRING_LEN); r.insert(OPT_KEY_RANDOM_MAX_STRING_LEN); r.insert(OPT_KEY_RANDOM_MAX_ARRAY_LEN); r diff --git a/src/query/sql/src/executor/physical_plans/physical_union_all.rs b/src/query/sql/src/executor/physical_plans/physical_union_all.rs index 7c89972d2455..350cca64c718 100644 --- a/src/query/sql/src/executor/physical_plans/physical_union_all.rs +++ b/src/query/sql/src/executor/physical_plans/physical_union_all.rs @@ -63,12 +63,11 @@ impl PhysicalPlanBuilder { let metadata = self.metadata.read().clone(); let lazy_columns = metadata.lazy_columns(); required.extend(lazy_columns.clone()); - let indices: Vec<_> = union_all - .left_outputs - .iter() - .enumerate() - .filter_map(|(index, v)| required.contains(&v.0).then_some(index)) + + let indices: Vec<_> = (0..union_all.left_outputs.len()) + .filter(|i| required.contains(i)) .collect(); + let (left_required, right_required) = if indices.is_empty() { ( HashSet::from([union_all.left_outputs[0].0]), @@ -98,13 +97,18 @@ impl PhysicalPlanBuilder { let fields = union_all .left_outputs .iter() - .filter(|(index, _)| left_required.contains(index)) - .map(|(index, expr)| { - if let Some(expr) = expr { - Ok(DataField::new(&index.to_string(), expr.data_type()?)) + .enumerate() + .filter(|(_, (index, _))| left_required.contains(index)) + .map(|(i, (index, expr))| { + let data_type = if let Some(expr) = expr { + expr.data_type()? } else { - Ok(left_schema.field_with_name(&index.to_string())?.clone()) - } + left_schema + .field_with_name(&index.to_string())? + .data_type() + .clone() + }; + Ok(DataField::new(&i.to_string(), data_type)) }) .collect::>>()?; diff --git a/src/query/sql/src/planner/binder/select.rs b/src/query/sql/src/planner/binder/select.rs index 256b42fdd942..853171e5bbec 100644 --- a/src/query/sql/src/planner/binder/select.rs +++ b/src/query/sql/src/planner/binder/select.rs @@ -280,6 +280,7 @@ impl Binder { right_outputs, cte_scan_names, }; + let mut new_expr = SExpr::create_binary( Arc::new(union_plan.into()), Arc::new(left_expr), @@ -440,18 +441,10 @@ impl Binder { left_col.index, Some(ScalarExpr::CastExpr(left_coercion_expr)), )); - let column_binding = ColumnBindingBuilder::new( - left_col.column_name.clone(), - left_col.index, - Box::new(coercion_types[idx].clone()), - Visibility::Visible, - ) - .build(); - new_bind_context.add_column_binding(column_binding); } else { left_outputs.push((left_col.index, None)); - new_bind_context.add_column_binding(left_col.clone()); } + if *right_col.data_type != coercion_types[idx] { let right_coercion_expr = CastExpr { span: right_span, @@ -472,7 +465,18 @@ impl Binder { } else { right_outputs.push((right_col.index, None)); } + + let index = new_bind_context.columns.len(); + let column_binding = ColumnBindingBuilder::new( + left_col.column_name.clone(), + index, + Box::new(coercion_types[idx].clone()), + Visibility::Visible, + ) + .build(); + new_bind_context.add_column_binding(column_binding); } + Ok((new_bind_context, left_outputs, right_outputs)) } diff --git a/src/query/storages/common/table_meta/src/table/table_keys.rs b/src/query/storages/common/table_meta/src/table/table_keys.rs index 1d466df03d5a..0e4296fc3c8b 100644 --- a/src/query/storages/common/table_meta/src/table/table_keys.rs +++ b/src/query/storages/common/table_meta/src/table/table_keys.rs @@ -50,6 +50,7 @@ pub const OPT_KEY_ENGINE_META: &str = "engine_meta"; pub const OPT_KEY_LEGACY_SNAPSHOT_LOC: &str = "snapshot_loc"; // the following are used in for random engine pub const OPT_KEY_RANDOM_SEED: &str = "seed"; +pub const OPT_KEY_RANDOM_MIN_STRING_LEN: &str = "min_string_len"; pub const OPT_KEY_RANDOM_MAX_STRING_LEN: &str = "max_string_len"; pub const OPT_KEY_RANDOM_MAX_ARRAY_LEN: &str = "max_array_len"; diff --git a/src/query/storages/random/src/random_table.rs b/src/query/storages/random/src/random_table.rs index 38b584ca84cf..4ae27d95596c 100644 --- a/src/query/storages/random/src/random_table.rs +++ b/src/query/storages/random/src/random_table.rs @@ -39,6 +39,9 @@ use databend_common_pipeline_core::Pipeline; use databend_common_pipeline_core::SourcePipeBuilder; use databend_common_pipeline_sources::SyncSource; use databend_common_pipeline_sources::SyncSourcer; +use databend_storages_common_table_meta::table::OPT_KEY_RANDOM_MAX_ARRAY_LEN; +use databend_storages_common_table_meta::table::OPT_KEY_RANDOM_MAX_STRING_LEN; +use databend_storages_common_table_meta::table::OPT_KEY_RANDOM_MIN_STRING_LEN; use databend_storages_common_table_meta::table::OPT_KEY_RANDOM_SEED; use crate::RandomPartInfo; @@ -56,11 +59,15 @@ impl RandomTable { random_options.seed = Some(seed); } - if let Some(s) = table_info.meta.options.get("max_string_len") { + if let Some(s) = table_info.meta.options.get(OPT_KEY_RANDOM_MIN_STRING_LEN) { + random_options.min_string_len = s.parse::()?; + } + + if let Some(s) = table_info.meta.options.get(OPT_KEY_RANDOM_MAX_STRING_LEN) { random_options.max_string_len = s.parse::()?; } - if let Some(s) = table_info.meta.options.get("max_array_len") { + if let Some(s) = table_info.meta.options.get(OPT_KEY_RANDOM_MAX_ARRAY_LEN) { random_options.max_array_len = s.parse::()?; } diff --git a/tests/sqllogictests/suites/mode/cluster/distributed_copy_into_table.test b/tests/sqllogictests/suites/mode/cluster/distributed_copy_into_table.test index a5a51cdd9466..037d32575aac 100644 --- a/tests/sqllogictests/suites/mode/cluster/distributed_copy_into_table.test +++ b/tests/sqllogictests/suites/mode/cluster/distributed_copy_into_table.test @@ -6,7 +6,7 @@ statement ok create or replace stage st FILE_FORMAT = (TYPE = CSV); statement ok -create or replace table table_random(a int not null,b string not null,c string not null) ENGINE = Random; +create or replace table table_random(a int not null,b string not null,c string not null) ENGINE = Random min_string_len = 1; statement ok copy into @st from (select a,b,c from table_random limit 1000000); diff --git a/tests/sqllogictests/suites/mode/cluster/distributed_copy_into_table2.test b/tests/sqllogictests/suites/mode/cluster/distributed_copy_into_table2.test index d8880c9330eb..64f672842a79 100644 --- a/tests/sqllogictests/suites/mode/cluster/distributed_copy_into_table2.test +++ b/tests/sqllogictests/suites/mode/cluster/distributed_copy_into_table2.test @@ -23,7 +23,7 @@ statement ok create table products (id int not null, name string not null, description string not null); statement ok -create table table_random(a int not null,b string not null,c string not null) ENGINE = Random; +create table table_random(a int not null,b string not null,c string not null) ENGINE = Random min_string_len = 1; statement ok copy into @s3 from (select a,b,c from table_random limit 10); diff --git a/tests/sqllogictests/suites/query/union.test b/tests/sqllogictests/suites/query/union.test index ed61084fe371..ef31f34dd96c 100644 --- a/tests/sqllogictests/suites/query/union.test +++ b/tests/sqllogictests/suites/query/union.test @@ -256,6 +256,18 @@ SELECT 3 AS id, ---- 3 10086 1111 +query II +SELECT * FROM (SELECT 3, + user_id AS r_uid, + user_id AS w_uid + FROM test3) t +UNION +SELECT 3 AS id, + 10086 AS r_uid, + 1111 AS w_uid; +---- +3 10086 1111 + statement ok drop table test3; From 6aa5d10a78e5abeea3a88f7c72fe8379e27a0340 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Mon, 23 Dec 2024 22:56:38 +0800 Subject: [PATCH 05/14] chore(query): fix union --- src/query/service/src/schedulers/scheduler.rs | 19 ------------------- tests/sqllogictests/suites/query/union.test | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 19 deletions(-) diff --git a/src/query/service/src/schedulers/scheduler.rs b/src/query/service/src/schedulers/scheduler.rs index 44879c53768a..726f380237aa 100644 --- a/src/query/service/src/schedulers/scheduler.rs +++ b/src/query/service/src/schedulers/scheduler.rs @@ -17,8 +17,6 @@ use std::sync::Arc; use async_trait::async_trait; use databend_common_exception::Result; use databend_common_expression::DataBlock; -use databend_common_pipeline_core::processors::ProcessorPtr; -use databend_common_pipeline_sinks::EmptySink; use databend_common_sql::planner::query_executor::QueryExecutor; use databend_common_sql::Planner; use futures_util::TryStreamExt; @@ -49,23 +47,6 @@ pub async fn build_query_pipeline( let mut build_res = build_query_pipeline_without_render_result_set(ctx, plan).await?; let input_schema = plan.output_schema()?; - if matches!(plan, PhysicalPlan::UnionAll { .. }) - && result_columns.len() == input_schema.num_fields() - && result_columns - .iter() - .zip(input_schema.fields().iter()) - .all(|(r, f)| format!("{}", r.index).as_str() == f.name().as_str()) - { - // Union doesn't need to add extra processor to project the result. - // It will be handled in union processor. - if ignore_result { - build_res - .main_pipeline - .add_sink(|input| Ok(ProcessorPtr::create(EmptySink::create(input))))?; - } - return Ok(build_res); - } - let input_schema = plan.output_schema()?; PipelineBuilder::build_result_projection( &ctx.get_function_context()?, input_schema, diff --git a/tests/sqllogictests/suites/query/union.test b/tests/sqllogictests/suites/query/union.test index ef31f34dd96c..a5bbc707ef14 100644 --- a/tests/sqllogictests/suites/query/union.test +++ b/tests/sqllogictests/suites/query/union.test @@ -268,6 +268,20 @@ SELECT 3 AS id, ---- 3 10086 1111 +query II +select w_uid, id, id + 5 from ( +SELECT * FROM (SELECT 3 as id, + user_id AS r_uid, + user_id AS w_uid + FROM test3) t +UNION +SELECT 3 AS id, + 10086 AS r_uid, + 1111 AS w_uid +); +---- +1111 3 8 + statement ok drop table test3; From 6d2d85114074928ca685eea6b7b5f8cd78e3bcc5 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Mon, 23 Dec 2024 23:16:52 +0800 Subject: [PATCH 06/14] chore(query): fix union --- tests/suites/0_stateless/19_fuzz/19_0003_fuzz_union.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/suites/0_stateless/19_fuzz/19_0003_fuzz_union.sh b/tests/suites/0_stateless/19_fuzz/19_0003_fuzz_union.sh index 7d9a8ec175ce..f946d53c791f 100755 --- a/tests/suites/0_stateless/19_fuzz/19_0003_fuzz_union.sh +++ b/tests/suites/0_stateless/19_fuzz/19_0003_fuzz_union.sh @@ -22,8 +22,6 @@ insert into union_fuzz2 select * from union_fuzz_r limit ${rows}; """ | $BENDSQL_CLIENT_OUTPUT_NULL fields=(a b c d e f g) - -# 获取数组的长度 length=${#fields[@]} for ((i=0; i<$length; i++)); do From ec4a5eebf423d8b4f912ad364a99396c81032565 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Tue, 24 Dec 2024 15:33:57 +0800 Subject: [PATCH 07/14] chore(query): fix union --- .../physical_plans/physical_union_all.rs | 7 ++++--- src/query/sql/src/planner/binder/select.rs | 18 ++++++++++-------- src/query/sql/src/planner/plans/union_all.rs | 1 + 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/query/sql/src/executor/physical_plans/physical_union_all.rs b/src/query/sql/src/executor/physical_plans/physical_union_all.rs index 350cca64c718..41caccea8bb9 100644 --- a/src/query/sql/src/executor/physical_plans/physical_union_all.rs +++ b/src/query/sql/src/executor/physical_plans/physical_union_all.rs @@ -64,8 +64,8 @@ impl PhysicalPlanBuilder { let lazy_columns = metadata.lazy_columns(); required.extend(lazy_columns.clone()); - let indices: Vec<_> = (0..union_all.left_outputs.len()) - .filter(|i| required.contains(i)) + let indices: Vec = (0..union_all.left_outputs.len()) + .filter(|index| required.contains(&union_all.output_indexes[*index])) .collect(); let (left_required, right_required) = if indices.is_empty() { @@ -108,7 +108,8 @@ impl PhysicalPlanBuilder { .data_type() .clone() }; - Ok(DataField::new(&i.to_string(), data_type)) + let output_index = union_all.output_indexes[i]; + Ok(DataField::new(&output_index.to_string(), data_type)) }) .collect::>>()?; diff --git a/src/query/sql/src/planner/binder/select.rs b/src/query/sql/src/planner/binder/select.rs index 853171e5bbec..4059801b2005 100644 --- a/src/query/sql/src/planner/binder/select.rs +++ b/src/query/sql/src/planner/binder/select.rs @@ -260,6 +260,7 @@ impl Binder { right_context.clone(), coercion_types, )?; + if let Some(cte_name) = &cte_name { for (col, cte_col) in new_bind_context.columns.iter_mut().zip( new_bind_context @@ -275,10 +276,13 @@ impl Binder { } } + let output_indexes = new_bind_context.columns.iter().map(|x| x.index).collect(); + let union_plan = UnionAll { left_outputs, right_outputs, cte_scan_names, + output_indexes, }; let mut new_expr = SExpr::create_binary( @@ -400,7 +404,7 @@ impl Binder { #[allow(clippy::type_complexity)] #[allow(clippy::too_many_arguments)] fn coercion_union_type( - &self, + &mut self, left_span: Span, right_span: Span, left_bind_context: BindContext, @@ -414,6 +418,7 @@ impl Binder { let mut left_outputs = Vec::with_capacity(left_bind_context.columns.len()); let mut right_outputs = Vec::with_capacity(right_bind_context.columns.len()); let mut new_bind_context = BindContext::new(); + new_bind_context .cte_context .set_cte_context(right_bind_context.cte_context); @@ -466,14 +471,11 @@ impl Binder { right_outputs.push((right_col.index, None)); } - let index = new_bind_context.columns.len(); - let column_binding = ColumnBindingBuilder::new( + let column_binding = self.create_derived_column_binding( left_col.column_name.clone(), - index, - Box::new(coercion_types[idx].clone()), - Visibility::Visible, - ) - .build(); + coercion_types[idx].clone(), + None, + ); new_bind_context.add_column_binding(column_binding); } diff --git a/src/query/sql/src/planner/plans/union_all.rs b/src/query/sql/src/planner/plans/union_all.rs index 31e5828cdda4..a5079106e096 100644 --- a/src/query/sql/src/planner/plans/union_all.rs +++ b/src/query/sql/src/planner/plans/union_all.rs @@ -41,6 +41,7 @@ pub struct UnionAll { // For example: `with recursive t as (select 1 as x union all select m.x+f.x from t as m, t as f where m.x < 3) select * from t` // The `cte_scan_names` are `m` and `f` pub cte_scan_names: Vec, + pub output_indexes: Vec, } impl UnionAll { From fafd8160b5cb18d3bcbe9250b748cf58a9bff7ba Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Tue, 24 Dec 2024 21:26:40 +0800 Subject: [PATCH 08/14] chore(query): fix union --- .../rewrite/rule_push_down_filter_union.rs | 45 +++++++++++-------- .../suites/mode/cluster/exchange.test | 28 ++++++------ .../mode/standalone/explain/explain.test | 22 ++++----- .../push_down_filter_eval_scalar.test | 8 ++-- .../suites/mode/standalone/explain/union.test | 24 +++++----- .../mode/standalone/explain/window.test | 30 ++++++------- .../standalone/explain_native/explain.test | 8 ++-- .../push_down_filter_eval_scalar.test | 8 ++-- .../mode/standalone/explain_native/union.test | 16 +++---- 9 files changed, 98 insertions(+), 91 deletions(-) diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_union.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_union.rs index eff9e73861a1..2701b85446fa 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_union.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_union.rs @@ -78,30 +78,37 @@ impl Rule for RulePushDownFilterUnion { return Ok(()); } - // Create a filter which matches union's right child. - let index_pairs: HashMap = union - .left_outputs - .iter() - .zip(union.right_outputs.iter()) - .map(|(left, right)| (left.0, right.0)) - .collect(); - let new_predicates = filter - .predicates - .iter() - .map(|predicate| replace_column_binding(&index_pairs, predicate.clone())) - .collect::>>()?; - let right_filer = Filter { - predicates: new_predicates, - }; - let mut union_left_child = union_s_expr.child(0)?.clone(); let mut union_right_child = union_s_expr.child(1)?.clone(); // Add filter to union children - union_left_child = SExpr::create_unary(Arc::new(filter.into()), Arc::new(union_left_child)); - union_right_child = - SExpr::create_unary(Arc::new(right_filer.into()), Arc::new(union_right_child)); + for (union_side, union_sexpr) in [&union.left_outputs, &union.right_outputs] + .iter() + .zip([&mut union_left_child, &mut union_right_child].iter_mut()) + { + // Create a filter which matches union's right child. + let index_pairs: HashMap = union + .output_indexes + .iter() + .zip(union_side.iter()) + .map(|(index, side)| (*index, side.0)) + .collect(); + + let new_predicates = filter + .predicates + .iter() + .map(|predicate| replace_column_binding(&index_pairs, predicate.clone())) + .collect::>>()?; + + let filter = Filter { + predicates: new_predicates, + }; + + let s = (*union_sexpr).clone(); + **union_sexpr = SExpr::create_unary(Arc::new(filter.into()), Arc::new(s)); + } + // Create a filter which matches union's right child. let result = SExpr::create_binary( Arc::new(union.into()), Arc::new(union_left_child), diff --git a/tests/sqllogictests/suites/mode/cluster/exchange.test b/tests/sqllogictests/suites/mode/cluster/exchange.test index 1b3a0595456a..e10a777723c5 100644 --- a/tests/sqllogictests/suites/mode/cluster/exchange.test +++ b/tests/sqllogictests/suites/mode/cluster/exchange.test @@ -275,19 +275,19 @@ select t1.a from t1, t3 where t1.a = t3.a ) t ---- AggregateFinal -├── output columns: [sum(a) (#4)] +├── output columns: [sum(a) (#5)] ├── group by: [] -├── aggregate functions: [sum(number)] +├── aggregate functions: [sum(a)] ├── estimated rows: 1.00 └── Exchange - ├── output columns: [sum(a) (#4)] + ├── output columns: [sum(a) (#5)] ├── exchange type: Merge └── AggregatePartial ├── group by: [] - ├── aggregate functions: [sum(number)] + ├── aggregate functions: [sum(a)] ├── estimated rows: 1.00 └── UnionAll - ├── output columns: [numbers.number (#0)] + ├── output columns: [a (#4)] ├── estimated rows: 10100.00 ├── HashJoin │ ├── output columns: [numbers.number (#0)] @@ -360,16 +360,16 @@ select sum(t1.a) from t1, t3 where t1.a = t3.a ) t ---- AggregateFinal -├── output columns: [sum(a) (#5)] +├── output columns: [sum(a) (#6)] ├── group by: [] -├── aggregate functions: [sum(number)] +├── aggregate functions: [sum(a)] ├── estimated rows: 1.00 └── AggregatePartial ├── group by: [] - ├── aggregate functions: [sum(number)] + ├── aggregate functions: [sum(a)] ├── estimated rows: 1.00 └── UnionAll - ├── output columns: [numbers.number (#0)] + ├── output columns: [a (#5)] ├── estimated rows: 10001.00 ├── Exchange │ ├── output columns: [numbers.number (#0)] @@ -456,19 +456,19 @@ select t1.a from t1 ) t ---- AggregateFinal -├── output columns: [sum(a) (#3)] +├── output columns: [sum(a) (#4)] ├── group by: [] -├── aggregate functions: [sum(number)] +├── aggregate functions: [sum(a)] ├── estimated rows: 1.00 └── Exchange - ├── output columns: [sum(a) (#3)] + ├── output columns: [sum(a) (#4)] ├── exchange type: Merge └── AggregatePartial ├── group by: [] - ├── aggregate functions: [sum(number)] + ├── aggregate functions: [sum(a)] ├── estimated rows: 1.00 └── UnionAll - ├── output columns: [numbers.number (#0)] + ├── output columns: [a (#3)] ├── estimated rows: 10010.00 ├── HashJoin │ ├── output columns: [numbers.number (#0)] diff --git a/tests/sqllogictests/suites/mode/standalone/explain/explain.test b/tests/sqllogictests/suites/mode/standalone/explain/explain.test index 28971cd47a0f..e4f5fe40817b 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/explain.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/explain.test @@ -475,7 +475,7 @@ query T explain select a from t1 UNION ALL select a from t2 ---- UnionAll -├── output columns: [t1.a (#0)] +├── output columns: [a (#4)] ├── estimated rows: 6.00 ├── TableScan │ ├── table: default.default.t1 @@ -750,7 +750,7 @@ Limit ├── push downs: [filters: [(t2.a (#2) > 2 OR t2.b (#3) < 4)], limit: NONE] └── estimated rows: 5.00 -query +query explain select * from t1,t2 where (t1.a > 1 or t1.b < 2) and (t1.a > 1 or t1.b < 2) ---- HashJoin @@ -785,7 +785,7 @@ HashJoin ├── push downs: [filters: [], limit: NONE] └── estimated rows: 5.00 -query +query explain select count(distinct a) from t1; ---- AggregateFinal @@ -817,7 +817,7 @@ AggregateFinal ├── push downs: [filters: [], limit: NONE] └── estimated rows: 1.00 -query +query explain select count_distinct(a) from t1; ---- AggregateFinal @@ -849,7 +849,7 @@ AggregateFinal ├── push downs: [filters: [], limit: NONE] └── estimated rows: 1.00 -query +query explain select * from (values(1, 'a'),(2, 'b')) t(c1,c2) ---- ConstantTableScan @@ -863,28 +863,28 @@ drop table t1 statement ok drop table t2 -query +query explain syntax select * from read_parquet('p1', 'p2', 'p3'); ---- SELECT * FROM read_parquet('p1', 'p2', 'p3') -query +query explain syntax select * from read_parquet(prune_page=>true, refresh_meta_cache=>true); ---- SELECT * FROM read_parquet(prune_page=>TRUE, refresh_meta_cache=>TRUE) -query +query explain syntax select * from read_parquet('p1', 'p2', 'p3', prune_page=>true, refresh_meta_cache=>true); ---- SELECT * FROM read_parquet('p1', 'p2', 'p3', prune_page=>TRUE, refresh_meta_cache=>TRUE) -query +query explain syntax select * from read_parquet('p1', 'p2', 'p3', prune_page=>true, refresh_meta_cache=>true); ---- SELECT * @@ -897,7 +897,7 @@ drop table if exists t4 statement ok create OR REPLACE table t4(a int, b string); -query +query explain select * from t4 where a = 1 and try_cast(get(try_parse_json(b),'bb') as varchar) = 'xx'; ---- Filter @@ -920,7 +920,7 @@ drop view if exists v4 statement ok create view v4 as select a as a, try_cast(get(try_parse_json(b), 'bb') as varchar) as b from t4; -query +query explain select * from v4 where b = 'xx'; ---- EvalScalar diff --git a/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_eval_scalar.test b/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_eval_scalar.test index 5c83998d444e..ccfd41742b78 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_eval_scalar.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_eval_scalar.test @@ -52,16 +52,16 @@ query T explain select distinct t.sc from v2 t where t.sc = 1; ---- AggregateFinal -├── output columns: [t.id (#0)] -├── group by: [id] +├── output columns: [sc (#9)] +├── group by: [sc] ├── aggregate functions: [] ├── estimated rows: 0.00 └── AggregatePartial - ├── group by: [id] + ├── group by: [sc] ├── aggregate functions: [] ├── estimated rows: 0.00 └── UnionAll - ├── output columns: [t.id (#0)] + ├── output columns: [sc (#9)] ├── estimated rows: 0.00 ├── AggregateFinal │ ├── output columns: [t.id (#0)] diff --git a/tests/sqllogictests/suites/mode/standalone/explain/union.test b/tests/sqllogictests/suites/mode/standalone/explain/union.test index 94c843087c79..91c1e368a5af 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/union.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/union.test @@ -26,7 +26,7 @@ query T explain select * from v where a > b ---- UnionAll -├── output columns: [t1.a (#0), t1.b (#1)] +├── output columns: [a (#4), b (#5)] ├── estimated rows: 0.80 ├── Filter │ ├── output columns: [t1.a (#0), t1.b (#1)] @@ -61,7 +61,7 @@ query T explain select * from v where a > 1 ---- UnionAll -├── output columns: [t1.a (#0), t1.b (#1)] +├── output columns: [a (#4), b (#5)] ├── estimated rows: 2.00 ├── Filter │ ├── output columns: [t1.a (#0), t1.b (#1)] @@ -96,12 +96,12 @@ query T explain select * from v limit 3 ---- Limit -├── output columns: [t1.a (#0), t1.b (#1)] +├── output columns: [a (#4), b (#5)] ├── limit: 3 ├── offset: 0 ├── estimated rows: 3.00 └── UnionAll - ├── output columns: [t1.a (#0), t1.b (#1)] + ├── output columns: [a (#4), b (#5)] ├── estimated rows: 4.00 ├── Limit │ ├── output columns: [t1.a (#0), t1.b (#1)] @@ -138,12 +138,12 @@ query T explain select * from v limit 3 offset 1 ---- Limit -├── output columns: [t1.a (#0), t1.b (#1)] +├── output columns: [a (#4), b (#5)] ├── limit: 3 ├── offset: 1 ├── estimated rows: 3.00 └── UnionAll - ├── output columns: [t1.a (#0), t1.b (#1)] + ├── output columns: [a (#4), b (#5)] ├── estimated rows: 4.00 ├── Limit │ ├── output columns: [t1.a (#0), t1.b (#1)] @@ -180,12 +180,12 @@ query T explain select * from t1 union all select * from t2 limit 1 ---- Limit -├── output columns: [t1.a (#0), t1.b (#1)] +├── output columns: [a (#4), b (#5)] ├── limit: 1 ├── offset: 0 ├── estimated rows: 1.00 └── UnionAll - ├── output columns: [t1.a (#0), t1.b (#1)] + ├── output columns: [a (#4), b (#5)] ├── estimated rows: 2.00 ├── Limit │ ├── output columns: [t1.a (#0), t1.b (#1)] @@ -223,7 +223,7 @@ query T explain select b from (select * from t1 where a>1 union all select * from t2 where b>2) ---- UnionAll -├── output columns: [t1.b (#1)] +├── output columns: [b (#5)] ├── estimated rows: 2.00 ├── Filter │ ├── output columns: [t1.b (#1)] @@ -277,7 +277,7 @@ query T explain select * from t1 where t1.a < 0 union all select * from t2 ; ---- UnionAll -├── output columns: [t1.a (#0)] +├── output columns: [a (#2)] ├── estimated rows: 10000.00 ├── EmptyResultScan └── TableScan @@ -297,9 +297,9 @@ explain select * from t1 union all select * from t2 where t2.b < 0; ---- TableScan ├── table: default.default.t1 -├── output columns: [a (#0)] +├── output columns: [] ├── read rows: 10000 -├── read size: 10.59 KiB +├── read size: 0 ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] diff --git a/tests/sqllogictests/suites/mode/standalone/explain/window.test b/tests/sqllogictests/suites/mode/standalone/explain/window.test index 454ab3efd904..0ebbdda0e7bc 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/window.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/window.test @@ -97,21 +97,21 @@ query T explain SELECT k, v FROM (SELECT *, rank() OVER (PARTITION BY k ORDER BY v DESC) AS rank FROM ((SELECT k, v FROM Test) UNION ALL (SELECT k, v FROM Test) ) t1 ) t2 WHERE rank = 1 AND k = 12; ---- Filter -├── output columns: [test.k (#0), test.v (#1)] -├── filters: [t2.rank (#4) = 1] +├── output columns: [k (#4), v (#5)] +├── filters: [t2.rank (#6) = 1] ├── estimated rows: 0.00 └── Window - ├── output columns: [test.k (#0), test.v (#1), rank() OVER ( PARTITION BY k ORDER BY v DESC ) (#4)] + ├── output columns: [k (#4), v (#5), rank() OVER ( PARTITION BY k ORDER BY v DESC ) (#6)] ├── aggregate function: [rank] ├── partition by: [k] ├── order by: [v] ├── frame: [Range: Preceding(None) ~ CurrentRow] └── WindowPartition - ├── output columns: [test.k (#0), test.v (#1)] + ├── output columns: [k (#4), v (#5)] ├── hash keys: [k] ├── estimated rows: 0.00 └── UnionAll - ├── output columns: [test.k (#0), test.v (#1)] + ├── output columns: [k (#4), v (#5)] ├── estimated rows: 0.00 ├── Filter │ ├── output columns: [test.k (#0), test.v (#1)] @@ -145,21 +145,21 @@ query T explain SELECT k, v FROM (SELECT *, rank() OVER (PARTITION BY v ORDER BY v DESC) AS rank FROM ((SELECT k, v FROM Test) UNION ALL (SELECT k, v FROM Test) ) t1 ) t2 WHERE rank = 1 AND k = 12; ---- Filter -├── output columns: [test.k (#0), test.v (#1)] -├── filters: [t2.rank (#4) = 1, is_true(t2.k (#0) = 12)] +├── output columns: [k (#4), v (#5)] +├── filters: [rank() OVER ( PARTITION BY v ORDER BY v DESC ) (#6) = 1, is_true(t1.k (#4) = 12)] ├── estimated rows: 0.00 └── Window - ├── output columns: [test.k (#0), test.v (#1), rank() OVER ( PARTITION BY v ORDER BY v DESC ) (#4)] + ├── output columns: [k (#4), v (#5), rank() OVER ( PARTITION BY v ORDER BY v DESC ) (#6)] ├── aggregate function: [rank] ├── partition by: [v] ├── order by: [v] ├── frame: [Range: Preceding(None) ~ CurrentRow] └── WindowPartition - ├── output columns: [test.k (#0), test.v (#1)] + ├── output columns: [k (#4), v (#5)] ├── hash keys: [v] ├── estimated rows: 0.00 └── UnionAll - ├── output columns: [test.k (#0), test.v (#1)] + ├── output columns: [k (#4), v (#5)] ├── estimated rows: 0.00 ├── TableScan │ ├── table: default.test_explain_window.test @@ -185,21 +185,21 @@ query T explain SELECT k, v FROM (SELECT *, rank() OVER (ORDER BY v DESC) AS rank FROM ((SELECT k, v FROM Test) UNION ALL (SELECT k, v FROM Test) ) t1 ) t2 WHERE rank = 1 AND k = 12; ---- Filter -├── output columns: [test.k (#0), test.v (#1)] -├── filters: [t2.rank (#4) = 1, is_true(t2.k (#0) = 12)] +├── output columns: [k (#4), v (#5)] +├── filters: [rank() OVER ( ORDER BY v DESC ) (#6) = 1, is_true(t1.k (#4) = 12)] ├── estimated rows: 0.00 └── Window - ├── output columns: [test.k (#0), test.v (#1), rank() OVER ( ORDER BY v DESC ) (#4)] + ├── output columns: [k (#4), v (#5), rank() OVER ( ORDER BY v DESC ) (#6)] ├── aggregate function: [rank] ├── partition by: [] ├── order by: [v] ├── frame: [Range: Preceding(None) ~ CurrentRow] └── Sort - ├── output columns: [test.k (#0), test.v (#1)] + ├── output columns: [k (#4), v (#5)] ├── sort keys: [v DESC NULLS LAST] ├── estimated rows: 0.00 └── UnionAll - ├── output columns: [test.k (#0), test.v (#1)] + ├── output columns: [k (#4), v (#5)] ├── estimated rows: 0.00 ├── TableScan │ ├── table: default.test_explain_window.test diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/explain.test b/tests/sqllogictests/suites/mode/standalone/explain_native/explain.test index 2d3bf614e96b..325390bb9f2b 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/explain.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/explain.test @@ -310,7 +310,7 @@ query T explain syntax with cte (a, b) as (select 1, 2 union all select 3, 4) select a, b from cte ---- WITH - cte(a, b) AS + cte(a, b) AS ( SELECT 1, @@ -329,7 +329,7 @@ query T explain syntax with cte (a, b) as (values(1,2),(3,4)) select a, b from cte ---- WITH - cte(a, b) AS + cte(a, b) AS ( VALUES(1, 2), (3, 4) ) @@ -451,7 +451,7 @@ query T explain select a from t1 UNION ALL select a from t2 ---- UnionAll -├── output columns: [t1.a (#0)] +├── output columns: [a (#4)] ├── estimated rows: 6.00 ├── TableScan │ ├── table: default.default.t1 @@ -789,7 +789,7 @@ AggregateFinal ├── push downs: [filters: [], limit: NONE] └── estimated rows: 1.00 -query +query explain select * from (values(1, 'a'),(2, 'b')) t(c1,c2) ---- ConstantTableScan diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_eval_scalar.test b/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_eval_scalar.test index 4fc1f97e0198..e279971cf0fe 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_eval_scalar.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_eval_scalar.test @@ -52,16 +52,16 @@ query T explain select distinct t.sc from v2 t where t.sc = 1; ---- AggregateFinal -├── output columns: [t.id (#0)] -├── group by: [id] +├── output columns: [sc (#9)] +├── group by: [sc] ├── aggregate functions: [] ├── estimated rows: 0.00 └── AggregatePartial - ├── group by: [id] + ├── group by: [sc] ├── aggregate functions: [] ├── estimated rows: 0.00 └── UnionAll - ├── output columns: [t.id (#0)] + ├── output columns: [sc (#9)] ├── estimated rows: 0.00 ├── AggregateFinal │ ├── output columns: [t.id (#0)] diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/union.test b/tests/sqllogictests/suites/mode/standalone/explain_native/union.test index e1541caf591e..307a46209081 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/union.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/union.test @@ -26,7 +26,7 @@ query T explain select * from v where a > b ---- UnionAll -├── output columns: [t1.a (#0), t1.b (#1)] +├── output columns: [a (#4), b (#5)] ├── estimated rows: 0.80 ├── TableScan │ ├── table: default.default.t1 @@ -53,7 +53,7 @@ query T explain select * from v where a > 1 ---- UnionAll -├── output columns: [t1.a (#0), t1.b (#1)] +├── output columns: [a (#4), b (#5)] ├── estimated rows: 2.00 ├── TableScan │ ├── table: default.default.t1 @@ -80,12 +80,12 @@ query T explain select * from v limit 3 ---- Limit -├── output columns: [t1.a (#0), t1.b (#1)] +├── output columns: [a (#4), b (#5)] ├── limit: 3 ├── offset: 0 ├── estimated rows: 3.00 └── UnionAll - ├── output columns: [t1.a (#0), t1.b (#1)] + ├── output columns: [a (#4), b (#5)] ├── estimated rows: 4.00 ├── Limit │ ├── output columns: [t1.a (#0), t1.b (#1)] @@ -122,12 +122,12 @@ query T explain select * from v limit 3 offset 1 ---- Limit -├── output columns: [t1.a (#0), t1.b (#1)] +├── output columns: [a (#4), b (#5)] ├── limit: 3 ├── offset: 1 ├── estimated rows: 3.00 └── UnionAll - ├── output columns: [t1.a (#0), t1.b (#1)] + ├── output columns: [a (#4), b (#5)] ├── estimated rows: 4.00 ├── Limit │ ├── output columns: [t1.a (#0), t1.b (#1)] @@ -164,12 +164,12 @@ query T explain select * from t1 union all select * from t2 limit 1 ---- Limit -├── output columns: [t1.a (#0), t1.b (#1)] +├── output columns: [a (#4), b (#5)] ├── limit: 1 ├── offset: 0 ├── estimated rows: 1.00 └── UnionAll - ├── output columns: [t1.a (#0), t1.b (#1)] + ├── output columns: [a (#4), b (#5)] ├── estimated rows: 2.00 ├── Limit │ ├── output columns: [t1.a (#0), t1.b (#1)] From dd0a9d078271994fdb7eecb02478ad8f23e83dfa Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Tue, 24 Dec 2024 22:14:12 +0800 Subject: [PATCH 09/14] update join binder --- .../planner/binder/bind_table_reference/bind_join.rs | 11 ++++++++++- .../suites/ee/06_ee_stream/06_0000_stream.test | 9 ++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/query/sql/src/planner/binder/bind_table_reference/bind_join.rs b/src/query/sql/src/planner/binder/bind_table_reference/bind_join.rs index 52117671fcb8..048eb748d6a4 100644 --- a/src/query/sql/src/planner/binder/bind_table_reference/bind_join.rs +++ b/src/query/sql/src/planner/binder/bind_table_reference/bind_join.rs @@ -377,7 +377,16 @@ impl Binder { let right_prop = RelExpr::with_s_expr(&right_child).derive_relational_prop()?; let mut is_lateral = false; let mut is_null_equal = Vec::new(); - if !right_prop.outer_columns.is_empty() { + + // Some outer columns maybe from union plan + let need_correlated = { + let metadata = self.metadata.read(); + metadata + .table_index_by_column_indexes(&right_prop.outer_columns) + .is_some() + }; + + if need_correlated { // If there are outer columns in right child, then the join is a correlated lateral join let mut decorrelator = SubqueryRewriter::new(self.metadata.clone(), Some(self.clone())); right_child = decorrelator.flatten_plan( diff --git a/tests/sqllogictests/suites/ee/06_ee_stream/06_0000_stream.test b/tests/sqllogictests/suites/ee/06_ee_stream/06_0000_stream.test index 437f29a4e13b..2c7bd11fd734 100644 --- a/tests/sqllogictests/suites/ee/06_ee_stream/06_0000_stream.test +++ b/tests/sqllogictests/suites/ee/06_ee_stream/06_0000_stream.test @@ -361,8 +361,15 @@ create table t4(a int, b int) statement ok insert into t4 values(1,1),(2,2),(3,3) +statement ok +explain merge into t4 using (select a, b from ( select 1 a, 2 b union all select 1 a, 3 b)) as t3 on t3.a=t4.a when matched then delete + +statement ok +explain merge into t4 using (select 1 a, 2 b union all select 1 a, 3 b) as t3 on t3.a=t4.a when matched then delete + + query II -merge into t4 using (select a, b, change$action from s3) as t3 on t3.a=t4.a when matched and change$action = 'DELETE' then delete +merge into t4 using (select a, b, change$action from s3) as t3 on t3.a=t4.a when matched and change$action = 'DELETE' then delete when not matched and change$action = 'INSERT' then insert (a,b) values(t3.a,t3.b) ---- 3 2 From 63e3f349a8a8fb1bdc5aa7390ecc553b6d469416 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Tue, 24 Dec 2024 23:43:57 +0800 Subject: [PATCH 10/14] update join binder --- .../planner/binder/bind_table_reference/bind_join.rs | 10 +--------- .../src/planner/optimizer/decorrelate/decorrelate.rs | 3 +++ .../src/planner/optimizer/decorrelate/flatten_plan.rs | 9 ++++++--- .../planner/optimizer/decorrelate/flatten_scalar.rs | 4 +++- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/query/sql/src/planner/binder/bind_table_reference/bind_join.rs b/src/query/sql/src/planner/binder/bind_table_reference/bind_join.rs index 048eb748d6a4..c380a513b1a0 100644 --- a/src/query/sql/src/planner/binder/bind_table_reference/bind_join.rs +++ b/src/query/sql/src/planner/binder/bind_table_reference/bind_join.rs @@ -378,15 +378,7 @@ impl Binder { let mut is_lateral = false; let mut is_null_equal = Vec::new(); - // Some outer columns maybe from union plan - let need_correlated = { - let metadata = self.metadata.read(); - metadata - .table_index_by_column_indexes(&right_prop.outer_columns) - .is_some() - }; - - if need_correlated { + if !right_prop.outer_columns.is_empty() { // If there are outer columns in right child, then the join is a correlated lateral join let mut decorrelator = SubqueryRewriter::new(self.metadata.clone(), Some(self.clone())); right_child = decorrelator.flatten_plan( diff --git a/src/query/sql/src/planner/optimizer/decorrelate/decorrelate.rs b/src/query/sql/src/planner/optimizer/decorrelate/decorrelate.rs index 6a3f074260fb..e97b31c7af38 100644 --- a/src/query/sql/src/planner/optimizer/decorrelate/decorrelate.rs +++ b/src/query/sql/src/planner/optimizer/decorrelate/decorrelate.rs @@ -443,6 +443,9 @@ impl SubqueryRewriter { let mut correlated_columns = correlated_columns.clone().into_iter().collect::>(); correlated_columns.sort(); for correlated_column in correlated_columns.iter() { + if !self.derived_columns.contains_key(correlated_column) { + continue; + } let metadata = self.metadata.read(); let column_entry = metadata.column(*correlated_column); let right_column = ScalarExpr::BoundColumnRef(BoundColumnRef { diff --git a/src/query/sql/src/planner/optimizer/decorrelate/flatten_plan.rs b/src/query/sql/src/planner/optimizer/decorrelate/flatten_plan.rs index c208878c78fd..922d2a162d43 100644 --- a/src/query/sql/src/planner/optimizer/decorrelate/flatten_plan.rs +++ b/src/query/sql/src/planner/optimizer/decorrelate/flatten_plan.rs @@ -71,9 +71,12 @@ impl SubqueryRewriter { let mut metadata = self.metadata.write(); // Currently, we don't support left plan's from clause contains subquery. // Such as: select t2.a from (select a + 1 as a from t) as t2 where (select sum(a) from t as t1 where t1.a < t2.a) = 1; - let table_index = metadata - .table_index_by_column_indexes(correlated_columns) - .unwrap(); + let table_index = metadata.table_index_by_column_indexes(correlated_columns); + if table_index.is_none() { + return Ok(plan.clone()); + } + + let table_index = table_index.unwrap(); let mut data_types = Vec::with_capacity(correlated_columns.len()); let mut scalar_items = vec![]; let mut scan_columns = ColumnSet::new(); diff --git a/src/query/sql/src/planner/optimizer/decorrelate/flatten_scalar.rs b/src/query/sql/src/planner/optimizer/decorrelate/flatten_scalar.rs index ff242dc062e4..0ac73f1dd0af 100644 --- a/src/query/sql/src/planner/optimizer/decorrelate/flatten_scalar.rs +++ b/src/query/sql/src/planner/optimizer/decorrelate/flatten_scalar.rs @@ -34,7 +34,9 @@ impl SubqueryRewriter { match scalar { ScalarExpr::BoundColumnRef(bound_column) => { let column_binding = bound_column.column.clone(); - if correlated_columns.contains(&column_binding.index) { + if correlated_columns.contains(&column_binding.index) + && self.derived_columns.contains_key(&column_binding.index) + { let index = self.derived_columns.get(&column_binding.index).unwrap(); let metadata = self.metadata.read(); let column_entry = metadata.column(*index); From 6c982c7f87dae650afe2f41be2279ea7678064f5 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Wed, 25 Dec 2024 12:37:32 +0800 Subject: [PATCH 11/14] update join binder --- .../sql/src/planner/optimizer/decorrelate/decorrelate.rs | 3 --- .../src/planner/optimizer/decorrelate/flatten_plan.rs | 9 +++------ .../src/planner/optimizer/decorrelate/flatten_scalar.rs | 4 +--- src/query/sql/src/planner/plans/union_all.rs | 7 +------ 4 files changed, 5 insertions(+), 18 deletions(-) diff --git a/src/query/sql/src/planner/optimizer/decorrelate/decorrelate.rs b/src/query/sql/src/planner/optimizer/decorrelate/decorrelate.rs index e97b31c7af38..6a3f074260fb 100644 --- a/src/query/sql/src/planner/optimizer/decorrelate/decorrelate.rs +++ b/src/query/sql/src/planner/optimizer/decorrelate/decorrelate.rs @@ -443,9 +443,6 @@ impl SubqueryRewriter { let mut correlated_columns = correlated_columns.clone().into_iter().collect::>(); correlated_columns.sort(); for correlated_column in correlated_columns.iter() { - if !self.derived_columns.contains_key(correlated_column) { - continue; - } let metadata = self.metadata.read(); let column_entry = metadata.column(*correlated_column); let right_column = ScalarExpr::BoundColumnRef(BoundColumnRef { diff --git a/src/query/sql/src/planner/optimizer/decorrelate/flatten_plan.rs b/src/query/sql/src/planner/optimizer/decorrelate/flatten_plan.rs index 922d2a162d43..c208878c78fd 100644 --- a/src/query/sql/src/planner/optimizer/decorrelate/flatten_plan.rs +++ b/src/query/sql/src/planner/optimizer/decorrelate/flatten_plan.rs @@ -71,12 +71,9 @@ impl SubqueryRewriter { let mut metadata = self.metadata.write(); // Currently, we don't support left plan's from clause contains subquery. // Such as: select t2.a from (select a + 1 as a from t) as t2 where (select sum(a) from t as t1 where t1.a < t2.a) = 1; - let table_index = metadata.table_index_by_column_indexes(correlated_columns); - if table_index.is_none() { - return Ok(plan.clone()); - } - - let table_index = table_index.unwrap(); + let table_index = metadata + .table_index_by_column_indexes(correlated_columns) + .unwrap(); let mut data_types = Vec::with_capacity(correlated_columns.len()); let mut scalar_items = vec![]; let mut scan_columns = ColumnSet::new(); diff --git a/src/query/sql/src/planner/optimizer/decorrelate/flatten_scalar.rs b/src/query/sql/src/planner/optimizer/decorrelate/flatten_scalar.rs index 0ac73f1dd0af..ff242dc062e4 100644 --- a/src/query/sql/src/planner/optimizer/decorrelate/flatten_scalar.rs +++ b/src/query/sql/src/planner/optimizer/decorrelate/flatten_scalar.rs @@ -34,9 +34,7 @@ impl SubqueryRewriter { match scalar { ScalarExpr::BoundColumnRef(bound_column) => { let column_binding = bound_column.column.clone(); - if correlated_columns.contains(&column_binding.index) - && self.derived_columns.contains_key(&column_binding.index) - { + if correlated_columns.contains(&column_binding.index) { let index = self.derived_columns.get(&column_binding.index).unwrap(); let metadata = self.metadata.read(); let column_entry = metadata.column(*index); diff --git a/src/query/sql/src/planner/plans/union_all.rs b/src/query/sql/src/planner/plans/union_all.rs index a5079106e096..221bbf3cf57c 100644 --- a/src/query/sql/src/planner/plans/union_all.rs +++ b/src/query/sql/src/planner/plans/union_all.rs @@ -98,12 +98,7 @@ impl Operator for UnionAll { let right_prop = rel_expr.derive_relational_prop_child(1)?; // Derive output columns - let mut output_columns = left_prop.output_columns.clone(); - output_columns = output_columns - .union(&right_prop.output_columns) - .cloned() - .collect(); - + let output_columns = self.output_indexes.iter().cloned().collect(); // Derive outer columns let mut outer_columns = left_prop.outer_columns.clone(); outer_columns = outer_columns From 312f2cd98a141d0c8112e267948fab0481591c62 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Wed, 25 Dec 2024 14:06:19 +0800 Subject: [PATCH 12/14] update join binder --- .../suites/mode/standalone/explain/window.test | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/sqllogictests/suites/mode/standalone/explain/window.test b/tests/sqllogictests/suites/mode/standalone/explain/window.test index 0ebbdda0e7bc..44f403949417 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/window.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/window.test @@ -146,7 +146,7 @@ explain SELECT k, v FROM (SELECT *, rank() OVER (PARTITION BY v ORDER BY v DESC) ---- Filter ├── output columns: [k (#4), v (#5)] -├── filters: [rank() OVER ( PARTITION BY v ORDER BY v DESC ) (#6) = 1, is_true(t1.k (#4) = 12)] +├── filters: [t2.rank (#6) = 1, is_true(t2.k (#4) = 12)] ├── estimated rows: 0.00 └── Window ├── output columns: [k (#4), v (#5), rank() OVER ( PARTITION BY v ORDER BY v DESC ) (#6)] @@ -186,7 +186,7 @@ explain SELECT k, v FROM (SELECT *, rank() OVER (ORDER BY v DESC) AS rank FROM ( ---- Filter ├── output columns: [k (#4), v (#5)] -├── filters: [rank() OVER ( ORDER BY v DESC ) (#6) = 1, is_true(t1.k (#4) = 12)] +├── filters: [t2.rank (#6) = 1, is_true(t2.k (#4) = 12)] ├── estimated rows: 0.00 └── Window ├── output columns: [k (#4), v (#5), rank() OVER ( ORDER BY v DESC ) (#6)] @@ -582,7 +582,7 @@ CompoundBlockOperator(Project) × 1 ## explain select a, sum(number - 1) over (partition by number % 3) from (select number, rank()over (partition by number % 3 order by number + 1) a ## from numbers(50)) t(number, a); query T -explain select a, sum(number - 1) over (partition by number % 3) from (select number, rank()over (partition by number % 3 order by number + 1) a +explain select a, sum(number - 1) over (partition by number % 3) from (select number, rank()over (partition by number % 3 order by number + 1) a from numbers(50)); ---- Window From 2356e40dfe98b398b45ace3fafb171bde5e435c2 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Wed, 25 Dec 2024 17:56:27 +0800 Subject: [PATCH 13/14] chore(query): update tests --- .../19_fuzz/19_0001_fuzz_aggregate.sh | 31 ++++++------------- .../0_stateless/19_fuzz/19_0003_fuzz_union.sh | 22 ++++++------- 2 files changed, 21 insertions(+), 32 deletions(-) diff --git a/tests/suites/0_stateless/19_fuzz/19_0001_fuzz_aggregate.sh b/tests/suites/0_stateless/19_fuzz/19_0001_fuzz_aggregate.sh index f46e5252ae39..d848c046a607 100755 --- a/tests/suites/0_stateless/19_fuzz/19_0001_fuzz_aggregate.sh +++ b/tests/suites/0_stateless/19_fuzz/19_0001_fuzz_aggregate.sh @@ -40,26 +40,15 @@ select a % ${m} a, b, c, d, sum(e) e, sum(f) f, sum(g) g from agg_fuzz where b ) group by all; """ | $BENDSQL_CLIENT_OUTPUT_NULL - ## judge the result are same, except has bugs - # echo """ - # SELECT * FROM agg_fuzz_result1 - # EXCEPT - # SELECT * FROM agg_fuzz_result2 - # UNION ALL - # SELECT * FROM agg_fuzz_result2 - # EXCEPT - # SELECT * FROM agg_fuzz_result1; - # """ | $BENDSQL_CLIENT_CONNECT - echo """ - SELECT - CASE - WHEN - (SELECT MD5(STRING_AGG(x.a::STRING, '') || STRING_AGG(x.e::STRING, '') || STRING_AGG(x.f::STRING, '') || STRING_AGG(x.g::STRING, '')) FROM (SELECT * FROM agg_fuzz_result1 ORDER BY a, e, f, g) x) = - (SELECT MD5(STRING_AGG(y.a::STRING, '') || STRING_AGG(y.e::STRING, '') || STRING_AGG(y.f::STRING, '') || STRING_AGG(y.g::STRING, '')) FROM (SELECT * FROM agg_fuzz_result1 ORDER BY a, e, f, g) y) - THEN 'Eq' - ELSE 'NotEq' - END AS comparison_result; -""" | $BENDSQL_CLIENT_CONNECT - + select if(count() == 0, 'Eq', 'NotEq') from ( + SELECT * FROM agg_fuzz_result1 + EXCEPT + SELECT * FROM agg_fuzz_result2 + UNION ALL + SELECT * FROM agg_fuzz_result2 + EXCEPT + SELECT * FROM agg_fuzz_result1 + ); + """ | $BENDSQL_CLIENT_CONNECT done diff --git a/tests/suites/0_stateless/19_fuzz/19_0003_fuzz_union.sh b/tests/suites/0_stateless/19_fuzz/19_0003_fuzz_union.sh index f946d53c791f..d0863f0072a4 100755 --- a/tests/suites/0_stateless/19_fuzz/19_0003_fuzz_union.sh +++ b/tests/suites/0_stateless/19_fuzz/19_0003_fuzz_union.sh @@ -38,17 +38,17 @@ for ((i=0; i<$length; i++)); do select $y, $x from (select $x, $y from union_fuzz2 union all select $x, $y from union_fuzz); """ | $BENDSQL_CLIENT_OUTPUT_NULL - - echo """ - SELECT - CASE - WHEN - (SELECT MD5(STRING_AGG(x.$x::STRING, '') || STRING_AGG(x.$y::STRING, '')) FROM (select * from union_fuzz_result1 order by $x, $y) x) = - (SELECT MD5(STRING_AGG(y.$x::STRING, '') || STRING_AGG(y.$y::STRING, '')) FROM (select * from union_fuzz_result2 order by $x, $y) y) - THEN 'Eq' - ELSE 'NotEq' - END AS comparison_result; -""" | $BENDSQL_CLIENT_CONNECT +echo """ + select if(count() == 0, 'Eq', 'NotEq') from ( + SELECT * FROM union_fuzz_result1 + EXCEPT + SELECT * FROM union_fuzz_result2 + UNION ALL + SELECT * FROM union_fuzz_result2 + EXCEPT + SELECT * FROM union_fuzz_result1 + ); + """ | $BENDSQL_CLIENT_CONNECT done done From dd26beb0debd116a3bdbc112d70bfdb9c58cb865 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Wed, 25 Dec 2024 21:46:50 +0800 Subject: [PATCH 14/14] chore(query): fix parser --- src/query/ast/src/ast/query.rs | 30 + src/query/ast/src/parser/query.rs | 5 + src/query/ast/tests/it/parser.rs | 10 + src/query/ast/tests/it/testdata/query.txt | 621 ++++++++++++++++++ .../suites/query/set_operator.test | 28 + .../19_fuzz/19_0001_fuzz_aggregate.result | 21 +- .../19_fuzz/19_0001_fuzz_aggregate.sh | 14 +- .../19_fuzz/19_0003_fuzz_union.result | 42 +- .../0_stateless/19_fuzz/19_0003_fuzz_union.sh | 14 +- 9 files changed, 749 insertions(+), 36 deletions(-) diff --git a/src/query/ast/src/ast/query.rs b/src/query/ast/src/ast/query.rs index bcf0308d78bf..12bc75bc9223 100644 --- a/src/query/ast/src/ast/query.rs +++ b/src/query/ast/src/ast/query.rs @@ -301,7 +301,18 @@ impl Display for SetExpr { write!(f, "({query})")?; } SetExpr::SetOperation(set_operation) => { + // Check if the left or right expressions are also SetOperations + let left_needs_parentheses = matches!(set_operation.left.as_ref(), SetExpr::SetOperation(left_op) if left_op.op < set_operation.op); + let right_needs_parentheses = matches!(set_operation.right.as_ref(), SetExpr::SetOperation(right_op) if right_op.op < set_operation.op); + + if left_needs_parentheses { + write!(f, "(")?; + } write!(f, "{}", set_operation.left)?; + if left_needs_parentheses { + write!(f, ")")?; + } + match set_operation.op { SetOperator::Union => { write!(f, " UNION ")?; @@ -316,7 +327,14 @@ impl Display for SetExpr { if set_operation.all { write!(f, "ALL ")?; } + // Add parentheses if necessary + if right_needs_parentheses { + write!(f, "(")?; + } write!(f, "{}", set_operation.right)?; + if right_needs_parentheses { + write!(f, ")")?; + } } SetExpr::Values { values, .. } => { write!(f, "VALUES")?; @@ -341,6 +359,18 @@ pub enum SetOperator { Intersect, } +impl PartialOrd for SetOperator { + fn partial_cmp(&self, other: &Self) -> Option { + if self == other { + Some(std::cmp::Ordering::Equal) + } else if self == &SetOperator::Intersect { + Some(std::cmp::Ordering::Greater) + } else { + Some(std::cmp::Ordering::Less) + } + } +} + /// `ORDER BY` clause #[derive(Debug, Clone, PartialEq, Drive, DriveMut)] pub struct OrderByExpr { diff --git a/src/query/ast/src/parser/query.rs b/src/query/ast/src/parser/query.rs index efa318b11726..8984a5ca3c9f 100644 --- a/src/query/ast/src/parser/query.rs +++ b/src/query/ast/src/parser/query.rs @@ -216,6 +216,11 @@ impl<'a, I: Iterator>> PrattParser fn query(&mut self, input: &Self::Input) -> Result { let affix = match &input.elem { + // https://learn.microsoft.com/en-us/sql/t-sql/language-elements/set-operators-except-and-intersect-transact-sql?view=sql-server-2017 + // If EXCEPT or INTERSECT is used together with other operators in an expression, it's evaluated in the context of the following precedence: + // 1. Expressions in parentheses + // 2. The INTERSECT operator + // 3. EXCEPT and UNION evaluated from left to right based on their position in the expression SetOperationElement::SetOperation { op, .. } => match op { SetOperator::Union | SetOperator::Except => { Affix::Infix(Precedence(10), Associativity::Left) diff --git a/src/query/ast/tests/it/parser.rs b/src/query/ast/tests/it/parser.rs index deecd6b6a1c7..a9dd1e1a9bf6 100644 --- a/src/query/ast/tests/it/parser.rs +++ b/src/query/ast/tests/it/parser.rs @@ -1101,7 +1101,17 @@ fn test_query() { r#"select * from t1 except select * from t2"#, r#"select * from t1 union select * from t2 union select * from t3"#, r#"select * from t1 union select * from t2 union all select * from t3"#, + r#"select * from ( + (SELECT f, g FROM union_fuzz_result1 + EXCEPT + SELECT f, g FROM union_fuzz_result2) + UNION ALL + (SELECT f, g FROM union_fuzz_result2 + EXCEPT + SELECT f, g FROM union_fuzz_result1) + )"#, r#"select * from t1 union select * from t2 intersect select * from t3"#, + r#"(select * from t1 union select * from t2) intersect select * from t3"#, r#"(select * from t1 union select * from t2) union select * from t3"#, r#"select * from t1 union (select * from t2 union select * from t3)"#, r#"SELECT * FROM ((SELECT *) EXCEPT (SELECT *)) foo"#, diff --git a/src/query/ast/tests/it/testdata/query.txt b/src/query/ast/tests/it/testdata/query.txt index c5bb30f5904b..d6d61ba217e1 100644 --- a/src/query/ast/tests/it/testdata/query.txt +++ b/src/query/ast/tests/it/testdata/query.txt @@ -4577,6 +4577,442 @@ Query { } +---------- Input ---------- +select * from ( + (SELECT f, g FROM union_fuzz_result1 + EXCEPT + SELECT f, g FROM union_fuzz_result2) + UNION ALL + (SELECT f, g FROM union_fuzz_result2 + EXCEPT + SELECT f, g FROM union_fuzz_result1) +) +---------- Output --------- +SELECT * FROM ((SELECT f, g FROM union_fuzz_result1 EXCEPT SELECT f, g FROM union_fuzz_result2) UNION ALL (SELECT f, g FROM union_fuzz_result2 EXCEPT SELECT f, g FROM union_fuzz_result1)) +---------- AST ------------ +Query { + span: Some( + 0..245, + ), + with: None, + body: Select( + SelectStmt { + span: Some( + 0..245, + ), + hints: None, + distinct: false, + top_n: None, + select_list: [ + StarColumns { + qualified: [ + Star( + Some( + 7..8, + ), + ), + ], + column_filter: None, + }, + ], + from: [ + Subquery { + span: Some( + 14..245, + ), + lateral: false, + subquery: Query { + span: Some( + 129..138, + ), + with: None, + body: SetOperation( + SetOperation { + span: Some( + 129..138, + ), + op: Union, + all: true, + left: SetOperation( + SetOperation { + span: Some( + 69..75, + ), + op: Except, + all: false, + left: Select( + SelectStmt { + span: Some( + 25..60, + ), + hints: None, + distinct: false, + top_n: None, + select_list: [ + AliasedExpr { + expr: ColumnRef { + span: Some( + 32..33, + ), + column: ColumnRef { + database: None, + table: None, + column: Name( + Identifier { + span: Some( + 32..33, + ), + name: "f", + quote: None, + ident_type: None, + }, + ), + }, + }, + alias: None, + }, + AliasedExpr { + expr: ColumnRef { + span: Some( + 35..36, + ), + column: ColumnRef { + database: None, + table: None, + column: Name( + Identifier { + span: Some( + 35..36, + ), + name: "g", + quote: None, + ident_type: None, + }, + ), + }, + }, + alias: None, + }, + ], + from: [ + Table { + span: Some( + 42..60, + ), + catalog: None, + database: None, + table: Identifier { + span: Some( + 42..60, + ), + name: "union_fuzz_result1", + quote: None, + ident_type: None, + }, + alias: None, + temporal: None, + with_options: None, + pivot: None, + unpivot: None, + sample: None, + }, + ], + selection: None, + group_by: None, + having: None, + window_list: None, + qualify: None, + }, + ), + right: Select( + SelectStmt { + span: Some( + 84..119, + ), + hints: None, + distinct: false, + top_n: None, + select_list: [ + AliasedExpr { + expr: ColumnRef { + span: Some( + 91..92, + ), + column: ColumnRef { + database: None, + table: None, + column: Name( + Identifier { + span: Some( + 91..92, + ), + name: "f", + quote: None, + ident_type: None, + }, + ), + }, + }, + alias: None, + }, + AliasedExpr { + expr: ColumnRef { + span: Some( + 94..95, + ), + column: ColumnRef { + database: None, + table: None, + column: Name( + Identifier { + span: Some( + 94..95, + ), + name: "g", + quote: None, + ident_type: None, + }, + ), + }, + }, + alias: None, + }, + ], + from: [ + Table { + span: Some( + 101..119, + ), + catalog: None, + database: None, + table: Identifier { + span: Some( + 101..119, + ), + name: "union_fuzz_result2", + quote: None, + ident_type: None, + }, + alias: None, + temporal: None, + with_options: None, + pivot: None, + unpivot: None, + sample: None, + }, + ], + selection: None, + group_by: None, + having: None, + window_list: None, + qualify: None, + }, + ), + }, + ), + right: SetOperation( + SetOperation { + span: Some( + 192..198, + ), + op: Except, + all: false, + left: Select( + SelectStmt { + span: Some( + 148..183, + ), + hints: None, + distinct: false, + top_n: None, + select_list: [ + AliasedExpr { + expr: ColumnRef { + span: Some( + 155..156, + ), + column: ColumnRef { + database: None, + table: None, + column: Name( + Identifier { + span: Some( + 155..156, + ), + name: "f", + quote: None, + ident_type: None, + }, + ), + }, + }, + alias: None, + }, + AliasedExpr { + expr: ColumnRef { + span: Some( + 158..159, + ), + column: ColumnRef { + database: None, + table: None, + column: Name( + Identifier { + span: Some( + 158..159, + ), + name: "g", + quote: None, + ident_type: None, + }, + ), + }, + }, + alias: None, + }, + ], + from: [ + Table { + span: Some( + 165..183, + ), + catalog: None, + database: None, + table: Identifier { + span: Some( + 165..183, + ), + name: "union_fuzz_result2", + quote: None, + ident_type: None, + }, + alias: None, + temporal: None, + with_options: None, + pivot: None, + unpivot: None, + sample: None, + }, + ], + selection: None, + group_by: None, + having: None, + window_list: None, + qualify: None, + }, + ), + right: Select( + SelectStmt { + span: Some( + 207..242, + ), + hints: None, + distinct: false, + top_n: None, + select_list: [ + AliasedExpr { + expr: ColumnRef { + span: Some( + 214..215, + ), + column: ColumnRef { + database: None, + table: None, + column: Name( + Identifier { + span: Some( + 214..215, + ), + name: "f", + quote: None, + ident_type: None, + }, + ), + }, + }, + alias: None, + }, + AliasedExpr { + expr: ColumnRef { + span: Some( + 217..218, + ), + column: ColumnRef { + database: None, + table: None, + column: Name( + Identifier { + span: Some( + 217..218, + ), + name: "g", + quote: None, + ident_type: None, + }, + ), + }, + }, + alias: None, + }, + ], + from: [ + Table { + span: Some( + 224..242, + ), + catalog: None, + database: None, + table: Identifier { + span: Some( + 224..242, + ), + name: "union_fuzz_result1", + quote: None, + ident_type: None, + }, + alias: None, + temporal: None, + with_options: None, + pivot: None, + unpivot: None, + sample: None, + }, + ], + selection: None, + group_by: None, + having: None, + window_list: None, + qualify: None, + }, + ), + }, + ), + }, + ), + order_by: [], + limit: [], + offset: None, + ignore_result: false, + }, + alias: None, + pivot: None, + unpivot: None, + }, + ], + selection: None, + group_by: None, + having: None, + window_list: None, + qualify: None, + }, + ), + order_by: [], + limit: [], + offset: None, + ignore_result: false, +} + + ---------- Input ---------- select * from t1 union select * from t2 intersect select * from t3 ---------- Output --------- @@ -4762,6 +5198,191 @@ Query { } +---------- Input ---------- +(select * from t1 union select * from t2) intersect select * from t3 +---------- Output --------- +(SELECT * FROM t1 UNION SELECT * FROM t2) INTERSECT SELECT * FROM t3 +---------- AST ------------ +Query { + span: Some( + 42..51, + ), + with: None, + body: SetOperation( + SetOperation { + span: Some( + 42..51, + ), + op: Intersect, + all: false, + left: SetOperation( + SetOperation { + span: Some( + 18..23, + ), + op: Union, + all: false, + left: Select( + SelectStmt { + span: Some( + 1..17, + ), + hints: None, + distinct: false, + top_n: None, + select_list: [ + StarColumns { + qualified: [ + Star( + Some( + 8..9, + ), + ), + ], + column_filter: None, + }, + ], + from: [ + Table { + span: Some( + 15..17, + ), + catalog: None, + database: None, + table: Identifier { + span: Some( + 15..17, + ), + name: "t1", + quote: None, + ident_type: None, + }, + alias: None, + temporal: None, + with_options: None, + pivot: None, + unpivot: None, + sample: None, + }, + ], + selection: None, + group_by: None, + having: None, + window_list: None, + qualify: None, + }, + ), + right: Select( + SelectStmt { + span: Some( + 24..40, + ), + hints: None, + distinct: false, + top_n: None, + select_list: [ + StarColumns { + qualified: [ + Star( + Some( + 31..32, + ), + ), + ], + column_filter: None, + }, + ], + from: [ + Table { + span: Some( + 38..40, + ), + catalog: None, + database: None, + table: Identifier { + span: Some( + 38..40, + ), + name: "t2", + quote: None, + ident_type: None, + }, + alias: None, + temporal: None, + with_options: None, + pivot: None, + unpivot: None, + sample: None, + }, + ], + selection: None, + group_by: None, + having: None, + window_list: None, + qualify: None, + }, + ), + }, + ), + right: Select( + SelectStmt { + span: Some( + 52..68, + ), + hints: None, + distinct: false, + top_n: None, + select_list: [ + StarColumns { + qualified: [ + Star( + Some( + 59..60, + ), + ), + ], + column_filter: None, + }, + ], + from: [ + Table { + span: Some( + 66..68, + ), + catalog: None, + database: None, + table: Identifier { + span: Some( + 66..68, + ), + name: "t3", + quote: None, + ident_type: None, + }, + alias: None, + temporal: None, + with_options: None, + pivot: None, + unpivot: None, + sample: None, + }, + ], + selection: None, + group_by: None, + having: None, + window_list: None, + qualify: None, + }, + ), + }, + ), + order_by: [], + limit: [], + offset: None, + ignore_result: false, +} + + ---------- Input ---------- (select * from t1 union select * from t2) union select * from t3 ---------- Output --------- diff --git a/tests/sqllogictests/suites/query/set_operator.test b/tests/sqllogictests/suites/query/set_operator.test index fec3913e7138..e0f3ab4dd79f 100644 --- a/tests/sqllogictests/suites/query/set_operator.test +++ b/tests/sqllogictests/suites/query/set_operator.test @@ -21,3 +21,31 @@ query T select * from a except select * from b union all select * from b intersect select * from a; ---- NULL 3 + +statement ok +create or replace table a as select null, 3 , '343', 1.2, 3.4, true, 4+5, 5-2; + +statement ok +create or replace table a_r like a; + +statement ok +insert into a select * from a_r limit 100000; + +statement ok +create or replace table b as select * from a; + +query T +select * from a except select * from b; +---- + +query T +select * from a except (select * from a intersect select * from b); +---- + +query T +select * from a except select * from b union all select * from b except select * from a; +---- + +query T +select * from a except (select * from b intersect select * from a); +---- diff --git a/tests/suites/0_stateless/19_fuzz/19_0001_fuzz_aggregate.result b/tests/suites/0_stateless/19_fuzz/19_0001_fuzz_aggregate.result index 3cda1d50b3ab..23751ef6c1fe 100755 --- a/tests/suites/0_stateless/19_fuzz/19_0001_fuzz_aggregate.result +++ b/tests/suites/0_stateless/19_fuzz/19_0001_fuzz_aggregate.result @@ -1,7 +1,14 @@ -Eq -Eq -Eq -Eq -Eq -Eq -Eq +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/suites/0_stateless/19_fuzz/19_0001_fuzz_aggregate.sh b/tests/suites/0_stateless/19_fuzz/19_0001_fuzz_aggregate.sh index d848c046a607..f100ab66889d 100755 --- a/tests/suites/0_stateless/19_fuzz/19_0001_fuzz_aggregate.sh +++ b/tests/suites/0_stateless/19_fuzz/19_0001_fuzz_aggregate.sh @@ -41,7 +41,7 @@ select a % ${m} a, b, c, d, sum(e) e, sum(f) f, sum(g) g from agg_fuzz where b """ | $BENDSQL_CLIENT_OUTPUT_NULL echo """ - select if(count() == 0, 'Eq', 'NotEq') from ( + select count() + 1 from ( SELECT * FROM agg_fuzz_result1 EXCEPT SELECT * FROM agg_fuzz_result2 @@ -51,4 +51,16 @@ select a % ${m} a, b, c, d, sum(e) e, sum(f) f, sum(g) g from agg_fuzz where b SELECT * FROM agg_fuzz_result1 ); """ | $BENDSQL_CLIENT_CONNECT + + echo """ + select count() + 1 from ( + (SELECT * FROM agg_fuzz_result1 + EXCEPT + SELECT * FROM agg_fuzz_result2) + UNION ALL + (SELECT * FROM agg_fuzz_result2 + EXCEPT + SELECT * FROM agg_fuzz_result1) + ); + """ | $BENDSQL_CLIENT_CONNECT done diff --git a/tests/suites/0_stateless/19_fuzz/19_0003_fuzz_union.result b/tests/suites/0_stateless/19_fuzz/19_0003_fuzz_union.result index 6ee20096dc35..338ecb0183d5 100644 --- a/tests/suites/0_stateless/19_fuzz/19_0003_fuzz_union.result +++ b/tests/suites/0_stateless/19_fuzz/19_0003_fuzz_union.result @@ -1,21 +1,21 @@ -Eq -Eq -Eq -Eq -Eq -Eq -Eq -Eq -Eq -Eq -Eq -Eq -Eq -Eq -Eq -Eq -Eq -Eq -Eq -Eq -Eq +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/suites/0_stateless/19_fuzz/19_0003_fuzz_union.sh b/tests/suites/0_stateless/19_fuzz/19_0003_fuzz_union.sh index d0863f0072a4..027aee4ebdcd 100755 --- a/tests/suites/0_stateless/19_fuzz/19_0003_fuzz_union.sh +++ b/tests/suites/0_stateless/19_fuzz/19_0003_fuzz_union.sh @@ -8,8 +8,8 @@ rows=1000 echo """ -create or replace table union_fuzz(a int, b string, c bool, d variant, e int, f Decimal(15, 2), g Decimal(39,2), h Array(String), i Array(Decimal(15, 2))); -create or replace table union_fuzz2(a int, b string, c bool, d variant, e int, f Decimal(15, 2), g Decimal(39,2), h Array(String), i Array(Decimal(15, 2))); +create or replace table union_fuzz(a int, b string, c bool, d variant, e int64, f Decimal(15, 2), g Decimal(39,2), h Array(String), i Array(Decimal(15, 2))); +create or replace table union_fuzz2(a int, b string, c bool, d variant, e int64, f Decimal(15, 2), g Decimal(39,2), h Array(String), i Array(Decimal(15, 2))); create or replace table union_fuzz_r like union_fuzz Engine = Random max_string_len = 5 max_array_len = 2; """ | $BENDSQL_CLIENT_OUTPUT_NULL @@ -39,14 +39,14 @@ for ((i=0; i<$length; i++)); do """ | $BENDSQL_CLIENT_OUTPUT_NULL echo """ - select if(count() == 0, 'Eq', 'NotEq') from ( - SELECT * FROM union_fuzz_result1 + select count() + 1 from ( + (SELECT $x, $y FROM union_fuzz_result1 EXCEPT - SELECT * FROM union_fuzz_result2 + SELECT $x, $y FROM union_fuzz_result2) UNION ALL - SELECT * FROM union_fuzz_result2 + (SELECT $x, $y FROM union_fuzz_result2 EXCEPT - SELECT * FROM union_fuzz_result1 + SELECT $x, $y FROM union_fuzz_result1) ); """ | $BENDSQL_CLIENT_CONNECT