From cbb97f838d360a42f3f188d2f4eed2256a8ad2ab Mon Sep 17 00:00:00 2001 From: morrySnow <101034200+morrySnow@users.noreply.github.com> Date: Tue, 29 Oct 2024 20:38:11 +0800 Subject: [PATCH 1/2] [fix](Nereids) offset do more than once when have shuffle after limit (#42576) (#42583) pick from master #42576 intro by #39316. it want to fix a problem intro by #36699. but forgot to remove all wrong code in #36699. after #39316, we should not set offset on exchange, when the exchange is on the top of a limit with offset. --- .../translator/PhysicalPlanTranslator.java | 3 --- .../sub_query_correlated.out | 9 ++++++++ .../data/nereids_syntax_p0/test_limit.out | 7 ------ .../test_csv_with_double_quotes.groovy | 2 +- .../nereids_syntax_p0/test_limit.groovy | 23 +++++++++++++++---- 5 files changed, 29 insertions(+), 15 deletions(-) delete mode 100644 regression-test/data/nereids_syntax_p0/test_limit.out diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index 6f9229e06bd554..aee22eb7911936 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -293,9 +293,6 @@ public PlanFragment visitPhysicalDistribute(PhysicalDistribute d .collect(Collectors.toList()); keys.addAll(validOutputIds); validOutputIds = keys; - } else if (child instanceof PhysicalLimit && ((PhysicalLimit) child).getPhase().isGlobal()) { - // because sort already contains Offset, we don't need to handle PhysicalTopN - exchangeNode.setOffset(((PhysicalLimit) child).getOffset()); } if (inputFragment instanceof MultiCastPlanFragment) { // TODO: remove this logic when we split to multi-window in logical window to physical window conversion diff --git a/regression-test/data/nereids_syntax_p0/sub_query_correlated.out b/regression-test/data/nereids_syntax_p0/sub_query_correlated.out index d57a673339b517..e7758d02a5f660 100644 --- a/regression-test/data/nereids_syntax_p0/sub_query_correlated.out +++ b/regression-test/data/nereids_syntax_p0/sub_query_correlated.out @@ -187,6 +187,15 @@ -- !exist_corr_limit0 -- -- !exist_unCorrelated_limit1_offset1 -- +1 2 +1 3 +2 4 +2 5 +3 3 +3 4 +20 2 +22 3 +24 4 -- !exist_unCorrelated_limit0_offset1 -- diff --git a/regression-test/data/nereids_syntax_p0/test_limit.out b/regression-test/data/nereids_syntax_p0/test_limit.out deleted file mode 100644 index 5ef4497f2f1f85..00000000000000 --- a/regression-test/data/nereids_syntax_p0/test_limit.out +++ /dev/null @@ -1,7 +0,0 @@ --- This file is automatically generated. You should know what you did if you want to edit this --- !limit1 -- -2 7844 TURNER SALESMAN 7698 1981-09-08 1500.0 0.0 30 - --- !lmit2 -- -3 7934 MILLER CLERK 7782 1982-01-23 1300.0 0.0 10 - diff --git a/regression-test/suites/load_p0/stream_load/test_csv_with_double_quotes.groovy b/regression-test/suites/load_p0/stream_load/test_csv_with_double_quotes.groovy index 1743d28d11719a..0b4e54165ff76b 100644 --- a/regression-test/suites/load_p0/stream_load/test_csv_with_double_quotes.groovy +++ b/regression-test/suites/load_p0/stream_load/test_csv_with_double_quotes.groovy @@ -44,7 +44,7 @@ suite("test_csv_with_double_quotes", "p0") { } sql "sync" - qt_sql "select * from ${tableName} order by k1, k2" + qt_sql "select * from ${tableName} order by k1, k2, v1, v2" sql """truncate table ${tableName}""" streamLoad { diff --git a/regression-test/suites/nereids_syntax_p0/test_limit.groovy b/regression-test/suites/nereids_syntax_p0/test_limit.groovy index aae261624421ed..f8ed3a43e6a322 100644 --- a/regression-test/suites/nereids_syntax_p0/test_limit.groovy +++ b/regression-test/suites/nereids_syntax_p0/test_limit.groovy @@ -33,6 +33,13 @@ suite("test_limit") { result([[1]]) } + test { + sql """ + select * from test1 t1 join (select * from test1 limit 1 offset 1) t2 + """ + result([[1,1],[1,1]]) + } + sql """ drop table if exists row_number_limit_tbl; """ @@ -55,16 +62,24 @@ suite("test_limit") { """ sql """ INSERT INTO row_number_limit_tbl VALUES (7788, 'SCOTT', 'ANALYST', 7566, '1987-04-19', 3000, 0, 20); """ sql """ INSERT INTO row_number_limit_tbl VALUES (7844, 'TURNER', 'SALESMAN', 7698, '1981-09-08', 1500, 0, 30); """ - qt_limit1 """ - select row_number() over(order by k6 desc) k6s, t.* from row_number_limit_tbl t order by k6s limit 1 offset 1; + + test { + sql """ + select row_number() over(order by k6 desc) k6s, t.* from row_number_limit_tbl t order by k6s limit 1 offset 1 """ + rowNum 1 + } sql """ truncate table row_number_limit_tbl; """ sql """ INSERT INTO row_number_limit_tbl VALUES (7788, 'SCOTT', 'ANALYST', 7566, '1987-04-19', 3000, 0, 20); """ sql """ INSERT INTO row_number_limit_tbl VALUES (7844, 'TURNER', 'SALESMAN', 7698, '1981-09-08', 1500, 0, 30); """ sql """ INSERT INTO row_number_limit_tbl VALUES (7934, 'MILLER', 'CLERK', 7782, '1982-01-23', 1300, 0, 10); """ - qt_lmit2 """ - select row_number() over(order by k6 desc) k6s, t.* from row_number_limit_tbl t limit 1 offset 2; + test { + sql """ + select row_number() over(order by k6 desc) k6s, t.* from row_number_limit_tbl t limit 1 offset 2 """ + rowNum 1 + } } + From 6d50c28bd2b7bbb5b7e082153e4f3916652a30b9 Mon Sep 17 00:00:00 2001 From: Luwei <814383175@qq.com> Date: Wed, 30 Oct 2024 10:03:28 +0800 Subject: [PATCH 2/2] [fix](schema-change) fix the bug of alter column nullable when double writing (#41737) (#42351) pick master #41737 --- be/src/exec/tablet_info.cpp | 30 +++++--- .../test_alter_uniq_null.groovy | 68 +++++++++++++++++++ 2 files changed, 88 insertions(+), 10 deletions(-) create mode 100644 regression-test/suites/schema_change_p0/test_alter_uniq_null.groovy diff --git a/be/src/exec/tablet_info.cpp b/be/src/exec/tablet_info.cpp index 1734a00ad6a7e3..32012b83b7ab92 100644 --- a/be/src/exec/tablet_info.cpp +++ b/be/src/exec/tablet_info.cpp @@ -27,6 +27,7 @@ #include #include +#include #include "olap/tablet_schema.h" #include "runtime/define_primitive_type.h" @@ -74,7 +75,8 @@ Status OlapTableSchemaParam::init(const POlapTableSchemaParam& pschema) { for (auto& col : pschema.partial_update_input_columns()) { _partial_update_input_columns.insert(col); } - std::unordered_map, SlotDescriptor*> slots_map; + std::unordered_map slots_map; + _tuple_desc = _obj_pool.add(new TupleDescriptor(pschema.tuple_desc())); // When FE version is less than 2.0.3, But BE upgrade to 2.0.3, // the filed col_type in slot_desc is INVALID_TYPE default. @@ -86,8 +88,10 @@ Status OlapTableSchemaParam::init(const POlapTableSchemaParam& pschema) { _tuple_desc->add_slot(slot_desc); string data_type; EnumToString(TPrimitiveType, to_thrift(slot_desc->col_type()), data_type); - slots_map.emplace(std::make_pair(to_lower(slot_desc->col_name()), - TabletColumn::get_field_type_by_string(data_type)), + std::string is_null_str = slot_desc->is_nullable() ? "true" : "false"; + std::string data_type_str = + std::to_string(int64_t(TabletColumn::get_field_type_by_string(data_type))); + slots_map.emplace(to_lower(slot_desc->col_name()) + "+" + data_type_str + is_null_str, slot_desc); } @@ -98,10 +102,12 @@ Status OlapTableSchemaParam::init(const POlapTableSchemaParam& pschema) { for (auto& pcolumn_desc : p_index.columns_desc()) { if (!_is_partial_update || _partial_update_input_columns.count(pcolumn_desc.name()) > 0) { - auto it = slots_map.find(std::make_pair( - to_lower(pcolumn_desc.name()), - TabletColumn::get_field_type_by_string( + std::string is_null_str = pcolumn_desc.is_nullable() ? "true" : "false"; + std::string data_type_str = + std::to_string(int64_t(TabletColumn::get_field_type_by_string( has_invalid_type ? "INVALID_TYPE" : pcolumn_desc.type()))); + auto it = slots_map.find(to_lower(pcolumn_desc.name()) + "+" + data_type_str + + is_null_str); if (it == std::end(slots_map)) { return Status::InternalError("unknown index column, column={}, type={}", pcolumn_desc.name(), pcolumn_desc.type()); @@ -140,7 +146,7 @@ Status OlapTableSchemaParam::init(const TOlapTableSchemaParam& tschema) { for (auto& tcolumn : tschema.partial_update_input_columns) { _partial_update_input_columns.insert(tcolumn); } - std::unordered_map, SlotDescriptor*> slots_map; + std::unordered_map slots_map; _tuple_desc = _obj_pool.add(new TupleDescriptor(tschema.tuple_desc)); // When FE version is less than 2.0.3, But BE upgrade to 2.0.3, // the filed col_type in slot_desc is INVALID_TYPE default. @@ -150,7 +156,9 @@ Status OlapTableSchemaParam::init(const TOlapTableSchemaParam& tschema) { auto slot_desc = _obj_pool.add(new SlotDescriptor(t_slot_desc)); if (slot_desc->col_type() == INVALID_TYPE) has_invalid_type = true; _tuple_desc->add_slot(slot_desc); - slots_map.emplace(std::make_pair(to_lower(slot_desc->col_name()), slot_desc->col_type()), + std::string is_null_str = slot_desc->is_nullable() ? "true" : "false"; + std::string data_type_str = std::to_string(int64_t(slot_desc->col_type())); + slots_map.emplace(to_lower(slot_desc->col_name()) + "+" + data_type_str + is_null_str, slot_desc); } @@ -162,10 +170,12 @@ Status OlapTableSchemaParam::init(const TOlapTableSchemaParam& tschema) { for (auto& tcolumn_desc : t_index.columns_desc) { if (!_is_partial_update || _partial_update_input_columns.count(tcolumn_desc.column_name) > 0) { + std::string is_null_str = tcolumn_desc.is_allow_null ? "true" : "false"; TPrimitiveType::type col_type = has_invalid_type ? TPrimitiveType::INVALID_TYPE : tcolumn_desc.column_type.type; - auto it = slots_map.find(std::make_pair(to_lower(tcolumn_desc.column_name), - thrift_to_type(col_type))); + std::string data_type_str = std::to_string(int64_t(thrift_to_type(col_type))); + auto it = slots_map.find(to_lower(tcolumn_desc.column_name) + "+" + data_type_str + + is_null_str); if (it == slots_map.end()) { return Status::InternalError("unknown index column, column={}, type={}", tcolumn_desc.column_name, diff --git a/regression-test/suites/schema_change_p0/test_alter_uniq_null.groovy b/regression-test/suites/schema_change_p0/test_alter_uniq_null.groovy new file mode 100644 index 00000000000000..47ebbe01983c7d --- /dev/null +++ b/regression-test/suites/schema_change_p0/test_alter_uniq_null.groovy @@ -0,0 +1,68 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// The cases is copied from https://github.com/trinodb/trino/tree/master +// /testing/trino-product-tests/src/main/resources/sql-tests/testcases +// and modified by Doris. + +suite("test_alter_uniq_null") { + def tableName = "test_alter_uniq_null_tbl" + + def getJobState = { tableName1 -> + def jobStateResult = sql """ SHOW ALTER TABLE COLUMN WHERE IndexName='${tableName1}' ORDER BY createtime DESC LIMIT 1 """ + println jobStateResult + return jobStateResult[0][9] + } + + sql """ DROP TABLE IF EXISTS ${tableName} """ + + sql """ + CREATE TABLE ${tableName} ( + `k1` VARCHAR(30) NOT NULL, + `k2` VARCHAR(24) NOT NULL, + `v1` VARCHAR(6) NULL, + `v2` INT NOT NULL + ) ENGINE=OLAP + UNIQUE KEY(`k1`, `k2`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`k2`) BUCKETS 1 + PROPERTIES ( + "light_schema_change" = "true", + "enable_unique_key_merge_on_write" = "false", + "replication_num" = "1" + ); + """ + + sql """alter table ${tableName} modify column `v2` INT NULL""" + sleep(10) + max_try_num = 1000 + while (max_try_num--) { + String res = getJobState(tableName) + if (res == "FINISHED" || res == "CANCELLED") { + assertEquals("FINISHED", res) + break + } else { + int val = 100000 + max_try_num + sql """ insert into ${tableName} values ("${val}", "client", "3", 100), ("${val}", "client", "4", 200)""" + sleep(10) + if (max_try_num < 1) { + println "test timeout," + "state:" + res + assertEquals("FINISHED",res) + } + } + } +}