Skip to content

Commit

Permalink
chore: remove the static filter for merge into (#14092)
Browse files Browse the repository at this point in the history
  • Loading branch information
dantengsky authored Dec 27, 2023
2 parents 51f3514 + 9b63dd3 commit a226452
Show file tree
Hide file tree
Showing 7 changed files with 101 additions and 41 deletions.
6 changes: 2 additions & 4 deletions src/query/service/src/interpreters/interpreter_merge_into.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,12 +193,10 @@ impl MergeIntoInterpreter {
(input, false)
};

// let optimized_input =
// Self::build_static_filter(&input, meta_data, self.ctx.clone(), check_table).await?;
let mut builder = PhysicalPlanBuilder::new(meta_data.clone(), self.ctx.clone(), false);

// build source for MergeInto
let join_input = builder.build(input.as_ref(), *columns_set.clone()).await?;
let join_input = builder.build(&input, *columns_set.clone()).await?;


// find row_id column index
let join_output_schema = join_input.output_schema()?;
Expand Down
1 change: 0 additions & 1 deletion src/query/service/src/interpreters/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ mod interpreter_index_refresh;
mod interpreter_insert;
mod interpreter_kill;
mod interpreter_merge_into;
mod interpreter_merge_into_static_filter;
mod interpreter_metrics;
mod interpreter_network_policies_show;
mod interpreter_network_policy_alter;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -738,7 +738,12 @@ impl PipelineBuilder {
// insert only
(output_lens, 0)
} else {
// (with row_id and without row_number/unmatched) or (without row_id and with row_number/unmatched)
// I. (with row_id and without row_number/unmatched) (need_match and !need_unmatch)
// II. (without row_id and with row_number/unmatched) (!need_match and need_unmatch)
// in fact for II, it should be (output_lens-1,1), but in this case, the
// output_lens = 1, so it will be (0,1), and we just need to append a dummy_item.
// but we use (output_lens - 1, 0) instead of (output_lens-1,1), because they will
// arrive the same result (that's appending only one dummy item)
(output_lens - 1, 0)
};
table.cluster_gen_for_append_with_specified_len(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ statement ok
drop table if exists distributed_source_test;

statement ok
create table distributed_target_test(a int,b string);
create table distributed_target_test(a int,b string) cluster by(a,b);

## multi blocks
statement ok
Expand Down Expand Up @@ -42,7 +42,7 @@ select * from distributed_target_test order by a;
8 x

statement ok
create table distributed_source_test(a int,b string,is_databend_deleted bool);
create table distributed_source_test(a int,b string,is_databend_deleted bool) cluster by(a,b);

statement ok
insert into distributed_source_test values(1,'d',true),(2,'e',true),(3,'f',false),(4,'e',true),(5,'f',false);
Expand Down Expand Up @@ -82,13 +82,13 @@ statement ok
drop table if exists corner_target_table;

statement ok
create table corner_target_table(a int,b string,c string);
create table corner_target_table(a int,b string,c string) cluster by(a,b);

statement ok
drop table if exists corner_source_table;

statement ok
create table corner_source_table(a int,b string,c string);
create table corner_source_table(a int,b string,c string) cluster by(a,b);

## add block1
statement ok
Expand Down Expand Up @@ -164,7 +164,7 @@ statement ok
merge into distributed_test_order as t using (select id,34 as id1,238 as id2, id3, id4, id5, id6, id7,s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13,d1, d2, d3, d4, d5, d6, d7, d8, d9, d10,insert_time,insert_time1,insert_time2,insert_time3,i from distributed_random_store) as s on t.id = s.id and t.insert_time = s.insert_time when matched then update * when not matched then insert *;

statement ok
create table orders2(a int,b string,c string);
create table orders2(a int,b string,c string) cluster by(a,b);

statement ok
insert into orders2 values(1,'a1','b1'),(2,'a2','b2'),(3,'a3','b3');
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ statement ok
set enable_experimental_merge_into = 1;

statement ok
create table merge_target_0(a int,b string);
create table merge_target_0(a int,b string) cluster by(a,b);

statement ok
create table merge_source_0(a int,b string);
create table merge_source_0(a int,b string) cluster by(a,b);

statement ok
insert into merge_target_0 values(1,'a1'),(2,'b1');
Expand Down Expand Up @@ -87,7 +87,7 @@ select * from merge_target_0 order by a,b;

### test copy into table unsupport
statement ok
create table copy_table_test0(a int,b string);
create table copy_table_test0(a int,b string) cluster by(a,b);

statement ok
create stage parquet_table0 FILE_FORMAT = (TYPE = PARQUET);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ statement ok
drop table if exists t2_separate;

statement ok
create table t1_separate(a int,b string, c string);
create table t1_separate(a int,b string, c string) cluster by(a,b);

statement ok
create table t2_separate(a int,b string, c string);
Expand Down Expand Up @@ -92,5 +92,63 @@ select * from t1_separate order by a,b,c;
8 a8 b8
9 a9 b9

## test insert-only cluster by
statement ok
truncate table t1_separate;

statement ok
truncate table t2_separate;

statement ok
insert into t2_separate values(8,'a8','b8'),(9,'a9','b9'),(1,'a5','b5'),(3,'a6','b6');

query T
merge into t1_separate as t1 using (select * from t2_separate) as t2 on t1.a = t2.a when not matched then insert *;
----
4

## without order by
query TTT
select * from t1_separate;
----
1 a5 b5
3 a6 b6
8 a8 b8
9 a9 b9

## test macthed-only cluster by
query T
merge into t1_separate as t1 using (select * from t2_separate) as t2 on t1.a = t2.a when matched then update *;
----
4

query TTT
select * from t1_separate;
----
1 a5 b5
3 a6 b6
8 a8 b8
9 a9 b9

## test full operation cluster by
statement ok
insert into t2_separate values(5,'a5','b5'),(7,'a7','b7');

query TT
merge into t1_separate as t1 using (select * from t2_separate) as t2 on t1.a = t2.a when matched then update * when not matched then insert *;
----
2 4

## we will do compact
query TTT
select * from t1_separate;
----
1 a5 b5
3 a6 b6
5 a5 b5
7 a7 b7
8 a8 b8
9 a9 b9

statement ok
set enable_experimental_merge_into = 0;
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ statement ok
drop table if exists t2;

statement ok
create table t1(a int,b string, c string);
create table t1(a int,b string, c string) cluster by(a,b);

statement ok
create table t2(a int,b string, c string);
create table t2(a int,b string, c string) cluster by(a,b);

statement ok
insert into t1 values(1,'b1','c1'),(2,'b2','c2');
Expand Down Expand Up @@ -276,7 +276,7 @@ statement ok
drop table if exists target_table;

statement ok
create table target_table(a int,b string,c string);
create table target_table(a int,b string,c string) cluster by(a,b);

statement ok
insert into target_table values(1,'a_1','b_1'),(2,'a_2','b_2');
Expand All @@ -288,7 +288,7 @@ select * from target_table order by a,b,c;
2 a_2 b_2

statement ok
create table test_stage(a int,b string,c string);
create table test_stage(a int,b string,c string) cluster by(a,b);

statement ok
insert into test_stage values(1,'a1','b1'),(2,'a2','b2'),(3,'a3','b3');
Expand Down Expand Up @@ -468,10 +468,10 @@ select * from t1 order by a,b,c;
1 a1 b1

statement ok
CREATE TABLE employees (employee_id INT, employee_name VARCHAR(255),department VARCHAR(255));
CREATE TABLE employees (employee_id INT, employee_name VARCHAR(255),department VARCHAR(255)) cluster by(employee_id,employee_name);

statement ok
CREATE TABLE salaries (employee_id INT,salary DECIMAL(10, 2));
CREATE TABLE salaries (employee_id INT,salary DECIMAL(10, 2)) cluster by(employee_id,salary);

statement ok
INSERT INTO employees VALUES(1, 'Alice', 'HR'),(2, 'Bob', 'IT'),(3, 'Charlie', 'Finance'),(4, 'David', 'HR');
Expand All @@ -494,10 +494,10 @@ select * from salaries order by employee_id;

## null cast bug fix
statement ok
create table t1_target(a int not null);
create table t1_target(a int not null) cluster by(a);

statement ok
create table t2_source(a int not null);
create table t2_source(a int not null) cluster by(a);

statement ok
insert into t1_target values(1);
Expand Down Expand Up @@ -559,13 +559,13 @@ statement ok
drop table if exists source_test;

statement ok
create table target_test(a int,b string);
create table target_test(a int,b string) cluster by(a,b);

statement ok
insert into target_test values(1,'a'),(2,'b'),(3,'c');

statement ok
create table source_test(a int,b string,delete_flag bool);
create table source_test(a int,b string,delete_flag bool) cluster by(a,b);

statement ok
insert into source_test values(1,'d',true),(2,'e',true),(3,'f',false),(4,'e',true),(5,'f',false);
Expand Down Expand Up @@ -609,10 +609,10 @@ merge into test_order as t using (select id,34 as id1,238 as id2, id3, id4, id5,

## test update list #13297
statement ok
create table t11(a int,b string, c string);
create table t11(a int,b string, c string) cluster by(a,b);

statement ok
create table t12(a int,b string, c string);
create table t12(a int,b string, c string) cluster by(a,b);

statement ok
insert into t11 values(1,'b1','c1'),(2,'b2','c2');
Expand All @@ -628,7 +628,7 @@ merge into t11 using (select a, c from t12) as t12 on t11.a = t12.a when matched

## test issue #13287
statement ok
create table tt1 (a int, b int);
create table tt1 (a int, b int) cluster by(a,b);

statement error 1065
merge into tt1 using(select 10, 20) as tt2 on tt1.a = 1 when not matched and tt1.b = 2 then insert values (10, 20);
Expand All @@ -645,7 +645,7 @@ select count(*) from tt1;

## test issue #13367
statement ok
create table tt2(a bool, b variant, c map(string, string));
create table tt2(a bool, b variant, c map(string, string)) cluster by(a);

statement ok
insert into tt2 values (true, '10', {'k1':'v1'}), (false, '20', {'k2':'v2'})
Expand All @@ -669,10 +669,10 @@ statement ok
drop table if exists t2;

statement ok
create table t1(a int);
create table t1(a int) cluster by(a);

statement ok
create table t2(a int);
create table t2(a int) cluster by(a);

statement ok
insert into t1 values(1);
Expand All @@ -697,10 +697,10 @@ statement ok
drop table if exists t2;

statement ok
create table t1(b int);
create table t1(b int) cluster by(b);

statement ok
create table t2(a int);
create table t2(a int) cluster by(a);

statement ok
insert into t1 values(1);
Expand All @@ -719,10 +719,10 @@ statement ok
drop table if exists t2;

statement ok
create table t1(a int,b string,c bool);
create table t1(a int,b string,c bool) cluster by(a,b);

statement ok
create table t2(a int,b string,c bool);
create table t2(a int,b string,c bool) cluster by(a,b);

statement ok
insert into t1 values(1,'a1',true),(2,'a2',false),(3,'a3',true);
Expand Down Expand Up @@ -783,7 +783,7 @@ statement ok
drop table if exists tt1;

statement ok
create table tt1(a bool, b int);
create table tt1(a bool, b int) cluster by(a,b);

statement ok
insert into tt1 values (true, 1), (false, 2);
Expand All @@ -806,10 +806,10 @@ statement ok
drop table if exists t12;

statement ok
create table t12 (a int, b int);
create table t12 (a int, b int) cluster by(a,b);

statement ok
create table t11 (a int, b int);
create table t11 (a int, b int) cluster by(a,b);

statement ok
insert into t11 values (1, 10),(2, 20),(3, 30),(4, 40);
Expand Down Expand Up @@ -1006,7 +1006,7 @@ FROM orders;
64.16764110 6.416764110000 1.97683658 19.29134884

statement ok
create table tb_01 (id int,c1 varchar,c2 datetime(0),c3 json);
create table tb_01 (id int,c1 varchar,c2 datetime(0),c3 json) cluster by(c1,c2);

statement ok
create table tmp_01 like tb_01;
Expand All @@ -1026,10 +1026,10 @@ select id,c1,to_date(c2),c3 from tb_01;

## test #issue13932
statement ok
create table null_target(a int not null,b text);
create table null_target(a int not null,b text) cluster by(a,b);

statement ok
create table null_source(a int not null,b text);
create table null_source(a int not null,b text) cluster by(a,b);

statement ok
insert into null_target values(1,'a1');
Expand Down

0 comments on commit a226452

Please sign in to comment.