From f96ac18f0826f4255ca8b12b01c81d18b1ff03fd Mon Sep 17 00:00:00 2001 From: airborne12 Date: Thu, 24 Oct 2024 14:05:44 +0800 Subject: [PATCH 001/143] [Improvement](segment iterator) Optimize column row reservation to reduce overhead #42060 (#42372) cherry pick from #42060 --- be/src/olap/rowset/segment_v2/segment_iterator.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 04ec5830d2885f..faad089e09ff72 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -1987,6 +1987,9 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { if (UNLIKELY(!_lazy_inited)) { RETURN_IF_ERROR(_lazy_init()); _lazy_inited = true; + // If the row bitmap size is smaller than block_row_max, there's no need to reserve that many column rows. + auto nrows_reserve_limit = + std::min(_row_bitmap.cardinality(), uint64_t(_opts.block_row_max)); if (_lazy_materialization_read || _opts.record_rowids || _is_need_expr_eval) { _block_rowids.resize(_opts.block_row_max); } @@ -2011,7 +2014,7 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { storage_column_type->is_nullable(), _opts.io_ctx.reader_type)); _current_return_columns[cid]->set_rowset_segment_id( {_segment->rowset_id(), _segment->id()}); - _current_return_columns[cid]->reserve(_opts.block_row_max); + _current_return_columns[cid]->reserve(nrows_reserve_limit); } else if (i >= block->columns()) { // if i >= block->columns means the column and not the pred_column means `column i` is // a delete condition column. but the column is not effective in the segment. so we just @@ -2022,7 +2025,7 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { // TODO: skip read the not effective delete column to speed up segment read. _current_return_columns[cid] = Schema::get_data_type_ptr(*column_desc)->create_column(); - _current_return_columns[cid]->reserve(_opts.block_row_max); + _current_return_columns[cid]->reserve(nrows_reserve_limit); } } @@ -2047,7 +2050,8 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { if (_can_opt_topn_reads()) { nrows_read_limit = std::min(static_cast(_opts.topn_limit), nrows_read_limit); } - + // If the row bitmap size is smaller than nrows_read_limit, there's no need to reserve that many column rows. + nrows_read_limit = std::min(_row_bitmap.cardinality(), uint64_t(nrows_read_limit)); DBUG_EXECUTE_IF("segment_iterator.topn_opt_1", { if (nrows_read_limit != 1) { return Status::Error("topn opt 1 execute failed: {}", From 24f2c470c54c5cc5d36cb26c500dcc5891c241e7 Mon Sep 17 00:00:00 2001 From: amory Date: Thu, 24 Oct 2024 14:20:04 +0800 Subject: [PATCH 002/143] [fix](array-funcs)fix array agg func with decimal type (#40839) (#42029) ## Proposed changes Issue Number: close #xxx --- .../array/function_array_aggregation.cpp | 13 + .../ComputePrecisionForArrayItemAgg.java | 10 +- .../functions/scalar/ArraysOverlap.java | 3 +- .../scalar_function/Array.out | 816 ++++++++++++++++++ .../test_array_large_decimal.csv | 100 +++ .../suites/nereids_function_p0/load.groovy | 23 + .../scalar_function/Array.groovy | 32 + 7 files changed, 994 insertions(+), 3 deletions(-) create mode 100644 regression-test/data/nereids_function_p0/test_array_large_decimal.csv diff --git a/be/src/vec/functions/array/function_array_aggregation.cpp b/be/src/vec/functions/array/function_array_aggregation.cpp index d2edfe34fb63af..18367816bc89e8 100644 --- a/be/src/vec/functions/array/function_array_aggregation.cpp +++ b/be/src/vec/functions/array/function_array_aggregation.cpp @@ -146,6 +146,18 @@ struct ArrayAggregateImpl { using Function = AggregateFunction>; const DataTypeArray* data_type_array = static_cast(remove_nullable(arguments[0]).get()); + if constexpr (operation != AggregateOperation::MIN && + operation != AggregateOperation::MAX) { + // only array_min and array_max support decimal256 type + if (is_decimal(remove_nullable(data_type_array->get_nested_type()))) { + const auto decimal_type = remove_nullable(data_type_array->get_nested_type()); + if (check_decimal(*decimal_type)) { + throw doris::Exception( + ErrorCode::INVALID_ARGUMENT, "Unexpected type {} for aggregation {}", + data_type_array->get_nested_type()->get_name(), operation); + } + } + } auto function = Function::create(data_type_array->get_nested_type()); if (function) { return function->get_return_type(); @@ -175,6 +187,7 @@ struct ArrayAggregateImpl { execute_type(res, type, data, offsets) || execute_type(res, type, data, offsets) || execute_type(res, type, data, offsets) || + execute_type(res, type, data, offsets) || execute_type(res, type, data, offsets) || execute_type(res, type, data, offsets) || execute_type(res, type, data, offsets) || diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/ComputePrecisionForArrayItemAgg.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/ComputePrecisionForArrayItemAgg.java index 50c9f1adfdb900..05efb92222bbfd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/ComputePrecisionForArrayItemAgg.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/ComputePrecisionForArrayItemAgg.java @@ -21,6 +21,7 @@ import org.apache.doris.nereids.types.ArrayType; import org.apache.doris.nereids.types.DataType; import org.apache.doris.nereids.types.DecimalV3Type; +import org.apache.doris.qe.ConnectContext; /** ComputePrecisionForSum */ public interface ComputePrecisionForArrayItemAgg extends ComputePrecision { @@ -29,8 +30,15 @@ default FunctionSignature computePrecision(FunctionSignature signature) { if (getArgumentType(0) instanceof ArrayType) { DataType itemType = ((ArrayType) getArgument(0).getDataType()).getItemType(); if (itemType instanceof DecimalV3Type) { + boolean enableDecimal256 = false; + ConnectContext connectContext = ConnectContext.get(); + if (connectContext != null) { + enableDecimal256 = connectContext.getSessionVariable().isEnableDecimal256(); + } DecimalV3Type returnType = DecimalV3Type.createDecimalV3Type( - DecimalV3Type.MAX_DECIMAL128_PRECISION, ((DecimalV3Type) itemType).getScale()); + enableDecimal256 ? DecimalV3Type.MAX_DECIMAL256_PRECISION + : DecimalV3Type.MAX_DECIMAL128_PRECISION, + ((DecimalV3Type) itemType).getScale()); if (signature.returnType instanceof ArrayType) { signature = signature.withReturnType(ArrayType.of(returnType)); } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArraysOverlap.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArraysOverlap.java index 9e40d732644341..d3c200cb92ed84 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArraysOverlap.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArraysOverlap.java @@ -26,7 +26,6 @@ import org.apache.doris.nereids.types.ArrayType; import org.apache.doris.nereids.types.BooleanType; import org.apache.doris.nereids.types.coercion.AnyDataType; -import org.apache.doris.nereids.types.coercion.FollowToAnyDataType; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; @@ -41,7 +40,7 @@ public class ArraysOverlap extends ScalarFunction implements ExplicitlyCastableS public static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(BooleanType.INSTANCE) - .args(ArrayType.of(new AnyDataType(0)), ArrayType.of(new FollowToAnyDataType(0))) + .args(ArrayType.of(new AnyDataType(0)), ArrayType.of(new AnyDataType(0))) ); /** diff --git a/regression-test/data/nereids_function_p0/scalar_function/Array.out b/regression-test/data/nereids_function_p0/scalar_function/Array.out index 17e8db9a68d22e..b9421a43793cc6 100644 --- a/regression-test/data/nereids_function_p0/scalar_function/Array.out +++ b/regression-test/data/nereids_function_p0/scalar_function/Array.out @@ -14454,3 +14454,819 @@ true -- !sql_array_map -- [1, 1, 1, 1] +-- !sql_array_min_decimal256 -- +1.12345678901234567890123456789012345600000000000000000000 +1234567890.09876543210987654321098765432109876500000000000000000000 +12345678901234567.43210987654321098765432109876543210900000000000000000000 +123456789012345678.23456789012345678901234567890123456700000000000000000000 +123456789012345678.23456789012345678901234567890123456700000000000000000000 +1234567890123456789.09876543210987654321098765432109876500000000000000000000 +1234567890123456789.09876543210987654321098765432109876500000000000000000000 +1234567890123456789.34567890123456789012345678901234567800000000000000000000 +1234567890123456789.43210987654321098765432109876543210900000000000000000000 +1234567890123456789.54321098765432109876543210987654321000000000000000000000 +1234567890123456789.54321098765432109876543210987654321000000000000000000000 +1234567890123456789.65432109876543210987654321098765432100000000000000000000 +1234567890123456789.65432109876543210987654321098765432100000000000000000000 +1234567890123456789.98765432109876543210987654321098765400000000000000000000 +1234567890123456789.98765432109876543210987654321098765400000000000000000000 +12345678901234567890.09876543210987654321098765432109876500000000000000000000 +12345678901234567890.09876543210987654321098765432109876500000000000000000000 +12345678901234567890.23456789012345678901234567890123456700000000000000000000 +12345678901234567890.54321098765432109876543210987654321000000000000000000000 +12345678901234567890.65432109876543210987654321098765432100000000000000000000 +12345678901234567890.65432109876543210987654321098765432100000000000000000000 +12345678901234567890.98765432109876543210987654321098765400000000000000000000 +2345678901234567.56789012345678901234567890123456789000000000000000000000 +234567890123456789.54321098765432109876543210987654321000000000000000000000 +234567890123456789.65432109876543210987654321098765432100000000000000000000 +2345678901234567890.09876543210987654321098765432109876500000000000000000000 +2345678901234567890.23456789012345678901234567890123456700000000000000000000 +2345678901234567890.54321098765432109876543210987654321000000000000000000000 +2345678901234567890.98765432109876543210987654321098765400000000000000000000 +23456789012345678901.65432109876543210987654321098765432100000000000000000000 +23456789012345678901.65432109876543210987654321098765432100000000000000000000 +23456789012345678901.87654321098765432109876543210987654321000000000000000000 +345678901234567890.23456789012345678901234567890123456700000000000000000000 +345678901234567890.34567890123456789012345678901234567800000000000000000000 +345678901234567890.65432109876543210987654321098765432100000000000000000000 +345678901234567890.98765432109876543210987654321098765400000000000000000000 +3456789012345678901.09876543210987654321098765432109876500000000000000000000 +3456789012345678901.23456789012345678901234567890123456700000000000000000000 +3456789012345678901.34567890123456789012345678901234567800000000000000000000 +3456789012345678901.87654321098765432109876543210987654321000000000000000000 +3456789012345678901.98765432109876543210987654321098765400000000000000000000 +34567890123456789012.09876543210987654321098765432109876500000000000000000000 +34567890123456789012.43210987654321098765432109876543210900000000000000000000 +4567890123456789.09876543210987654321098765432109876500000000000000000000 +4567890123456789012.12345678901234567890123456789012345600000000000000000000 +4567890123456789012.12345678901234567890123456789012345600000000000000000000 +4567890123456789012.23456789012345678901234567890123456700000000000000000000 +4567890123456789012.65432109876543210987654321098765432100000000000000000000 +4567890123456789012.98765432109876543210987654321098765400000000000000000000 +45678901234567890123.12345678901234567890123456789012345600000000000000000000 +567890123456789012.34567890123456789012345678901234567800000000000000000000 +567890123456789012.54321098765432109876543210987654321000000000000000000000 +5678901234567890123.23456789012345678901234567890123456700000000000000000000 +5678901234567890123.43210987654321098765432109876543210900000000000000000000 +5678901234567890123.65432109876543210987654321098765432100000000000000000000 +5678901234567890123.87654321098765432109876543210987654321000000000000000000 +5678901234567890123.98765432109876543210987654321098765400000000000000000000 +56789012345678901234.34567890123456789012345678901234567800000000000000000000 +56789012345678901234.43210987654321098765432109876543210900000000000000000000 +56789012345678901234.54321098765432109876543210987654321000000000000000000000 +67890123456789012.34567890123456789012345678901234567800000000000000000000 +67890123456789012.98765432109876543210987654321098765400000000000000000000 +678901234567890123.09876543210987654321098765432109876500000000000000000000 +678901234567890123.43210987654321098765432109876543210900000000000000000000 +678901234567890123.43210987654321098765432109876543210900000000000000000000 +678901234567890123.54321098765432109876543210987654321000000000000000000000 +6789012345678901234.12345678901234567890123456789012345600000000000000000000 +6789012345678901234.98765432109876543210987654321098765400000000000000000000 +67890123456789012345.12345678901234567890123456789012345600000000000000000000 +67890123456789012345.67890123456789012345678901234567890100000000000000000000 +789012345678901234.12345678901234567890123456789012345600000000000000000000 +789012345678901234.34567890123456789012345678901234567800000000000000000000 +789012345678901234.65432109876543210987654321098765432100000000000000000000 +789012345678901234.87654321098765432109876543210987654321000000000000000000 +7890123456789012345.09876543210987654321098765432109876500000000000000000000 +7890123456789012345.09876543210987654321098765432109876500000000000000000000 +7890123456789012345.09876543210987654321098765432109876500000000000000000000 +7890123456789012345.54321098765432109876543210987654321000000000000000000000 +7890123456789012345.98765432109876543210987654321098765400000000000000000000 +78901234567890123456.09876543210987654321098765432109876500000000000000000000 +78901234567890123456.23456789012345678901234567890123456700000000000000000000 +78901234567890123456.65432109876543210987654321098765432100000000000000000000 +89012345678901234.34567890123456789012345678901234567800000000000000000000 +89012345678901234.98765432109876543210987654321098765400000000000000000000 +890123456789012345.09876543210987654321098765432109876500000000000000000000 +890123456789012345.12345678901234567890123456789012345600000000000000000000 +890123456789012345.54321098765432109876543210987654321000000000000000000000 +8901234567890123456.23456789012345678901234567890123456700000000000000000000 +8901234567890123456.34567890123456789012345678901234567800000000000000000000 +8901234567890123456.43210987654321098765432109876543210900000000000000000000 +8901234567890123456.87654321098765432109876543210987654321000000000000000000 +8901234567890123456.98765432109876543210987654321098765400000000000000000000 +8901234567890123456.98765432109876543210987654321098765400000000000000000000 +8901234567890123456.98765432109876543210987654321098765400000000000000000000 +89012345678901234567.09876543210987654321098765432109876500000000000000000000 +89012345678901234567.43210987654321098765432109876543210900000000000000000000 +89012345678901234567.54321098765432109876543210987654321000000000000000000000 +89012345678901234567.87654321098765432109876543210987654321000000000000000000 +9876543210.54321098765432109876543210987654321000000000000000000000 +98765432109876543210.65432109876543210987654321098765432100000000000000000000 + +-- !sql_array_max_decimal256 -- +1.12345678901234567890123456789012345600000000000000000000 +1234567890.09876543210987654321098765432109876500000000000000000000 +12345678901234567.43210987654321098765432109876543210900000000000000000000 +123456789012345678.23456789012345678901234567890123456700000000000000000000 +123456789012345678.23456789012345678901234567890123456700000000000000000000 +1234567890123456789.09876543210987654321098765432109876500000000000000000000 +1234567890123456789.09876543210987654321098765432109876500000000000000000000 +1234567890123456789.34567890123456789012345678901234567800000000000000000000 +1234567890123456789.43210987654321098765432109876543210900000000000000000000 +1234567890123456789.54321098765432109876543210987654321000000000000000000000 +1234567890123456789.54321098765432109876543210987654321000000000000000000000 +1234567890123456789.65432109876543210987654321098765432100000000000000000000 +1234567890123456789.65432109876543210987654321098765432100000000000000000000 +1234567890123456789.98765432109876543210987654321098765400000000000000000000 +1234567890123456789.98765432109876543210987654321098765400000000000000000000 +12345678901234567890.09876543210987654321098765432109876500000000000000000000 +12345678901234567890.09876543210987654321098765432109876500000000000000000000 +12345678901234567890.23456789012345678901234567890123456700000000000000000000 +12345678901234567890.54321098765432109876543210987654321000000000000000000000 +12345678901234567890.65432109876543210987654321098765432100000000000000000000 +12345678901234567890.65432109876543210987654321098765432100000000000000000000 +12345678901234567890.98765432109876543210987654321098765400000000000000000000 +2345678901234567.56789012345678901234567890123456789000000000000000000000 +234567890123456789.54321098765432109876543210987654321000000000000000000000 +234567890123456789.65432109876543210987654321098765432100000000000000000000 +2345678901234567890.09876543210987654321098765432109876500000000000000000000 +2345678901234567890.23456789012345678901234567890123456700000000000000000000 +2345678901234567890.54321098765432109876543210987654321000000000000000000000 +2345678901234567890.98765432109876543210987654321098765400000000000000000000 +23456789012345678901.65432109876543210987654321098765432100000000000000000000 +23456789012345678901.65432109876543210987654321098765432100000000000000000000 +23456789012345678901.87654321098765432109876543210987654321000000000000000000 +345678901234567890.23456789012345678901234567890123456700000000000000000000 +345678901234567890.34567890123456789012345678901234567800000000000000000000 +345678901234567890.65432109876543210987654321098765432100000000000000000000 +345678901234567890.98765432109876543210987654321098765400000000000000000000 +3456789012345678901.09876543210987654321098765432109876500000000000000000000 +3456789012345678901.23456789012345678901234567890123456700000000000000000000 +3456789012345678901.34567890123456789012345678901234567800000000000000000000 +3456789012345678901.87654321098765432109876543210987654321000000000000000000 +3456789012345678901.98765432109876543210987654321098765400000000000000000000 +34567890123456789012.09876543210987654321098765432109876500000000000000000000 +34567890123456789012.43210987654321098765432109876543210900000000000000000000 +4567890123456789.09876543210987654321098765432109876500000000000000000000 +4567890123456789012.12345678901234567890123456789012345600000000000000000000 +4567890123456789012.12345678901234567890123456789012345600000000000000000000 +4567890123456789012.23456789012345678901234567890123456700000000000000000000 +4567890123456789012.65432109876543210987654321098765432100000000000000000000 +4567890123456789012.98765432109876543210987654321098765400000000000000000000 +45678901234567890123.12345678901234567890123456789012345600000000000000000000 +567890123456789012.34567890123456789012345678901234567800000000000000000000 +567890123456789012.54321098765432109876543210987654321000000000000000000000 +5678901234567890123.23456789012345678901234567890123456700000000000000000000 +5678901234567890123.43210987654321098765432109876543210900000000000000000000 +5678901234567890123.65432109876543210987654321098765432100000000000000000000 +5678901234567890123.87654321098765432109876543210987654321000000000000000000 +5678901234567890123.98765432109876543210987654321098765400000000000000000000 +56789012345678901234.34567890123456789012345678901234567800000000000000000000 +56789012345678901234.43210987654321098765432109876543210900000000000000000000 +56789012345678901234.54321098765432109876543210987654321000000000000000000000 +67890123456789012.34567890123456789012345678901234567800000000000000000000 +67890123456789012.98765432109876543210987654321098765400000000000000000000 +678901234567890123.09876543210987654321098765432109876500000000000000000000 +678901234567890123.43210987654321098765432109876543210900000000000000000000 +678901234567890123.43210987654321098765432109876543210900000000000000000000 +678901234567890123.54321098765432109876543210987654321000000000000000000000 +6789012345678901234.12345678901234567890123456789012345600000000000000000000 +6789012345678901234.98765432109876543210987654321098765400000000000000000000 +67890123456789012345.12345678901234567890123456789012345600000000000000000000 +67890123456789012345.67890123456789012345678901234567890100000000000000000000 +789012345678901234.12345678901234567890123456789012345600000000000000000000 +789012345678901234.34567890123456789012345678901234567800000000000000000000 +789012345678901234.65432109876543210987654321098765432100000000000000000000 +789012345678901234.87654321098765432109876543210987654321000000000000000000 +7890123456789012345.09876543210987654321098765432109876500000000000000000000 +7890123456789012345.09876543210987654321098765432109876500000000000000000000 +7890123456789012345.09876543210987654321098765432109876500000000000000000000 +7890123456789012345.54321098765432109876543210987654321000000000000000000000 +7890123456789012345.98765432109876543210987654321098765400000000000000000000 +78901234567890123456.09876543210987654321098765432109876500000000000000000000 +78901234567890123456.23456789012345678901234567890123456700000000000000000000 +78901234567890123456.65432109876543210987654321098765432100000000000000000000 +89012345678901234.34567890123456789012345678901234567800000000000000000000 +89012345678901234.98765432109876543210987654321098765400000000000000000000 +890123456789012345.09876543210987654321098765432109876500000000000000000000 +890123456789012345.12345678901234567890123456789012345600000000000000000000 +890123456789012345.54321098765432109876543210987654321000000000000000000000 +8901234567890123456.23456789012345678901234567890123456700000000000000000000 +8901234567890123456.34567890123456789012345678901234567800000000000000000000 +8901234567890123456.43210987654321098765432109876543210900000000000000000000 +8901234567890123456.87654321098765432109876543210987654321000000000000000000 +8901234567890123456.98765432109876543210987654321098765400000000000000000000 +8901234567890123456.98765432109876543210987654321098765400000000000000000000 +8901234567890123456.98765432109876543210987654321098765400000000000000000000 +89012345678901234567.09876543210987654321098765432109876500000000000000000000 +89012345678901234567.43210987654321098765432109876543210900000000000000000000 +89012345678901234567.54321098765432109876543210987654321000000000000000000000 +89012345678901234567.87654321098765432109876543210987654321000000000000000000 +9876543210.54321098765432109876543210987654321000000000000000000000 +98765432109876543210.65432109876543210987654321098765432100000000000000000000 + +-- !sql_array_overlaps_1 -- +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false + +-- !sql_array_overlaps_2 -- +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false +false + +-- !sql_array_overlaps_3 -- +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N + +-- !sql_array_overlaps_4 -- +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N + +-- !sql_array_overlaps_5 -- +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N + +-- !sql_array_overlaps_6 -- +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N + diff --git a/regression-test/data/nereids_function_p0/test_array_large_decimal.csv b/regression-test/data/nereids_function_p0/test_array_large_decimal.csv new file mode 100644 index 00000000000000..1c770088269571 --- /dev/null +++ b/regression-test/data/nereids_function_p0/test_array_large_decimal.csv @@ -0,0 +1,100 @@ +1;[1, null];[100];[1.123456789012345678901234567890123456, null] +2;[2, 3];[200, 300];[1234567890.098765432109876543210987654321098765, null] +3;[4, null];[400];[45678901234567890123.123456789012345678901234567890123456, null] +4;[5];[500];[9876543210.543210987654321098765432109876543210, null] +5;[6, 7];[600, 700];[12345678901234567890.234567890123456789012345678901234567, null] +6;[8];[800];[3456789012345678901.876543210987654321098765432109876543210, null] +7;[9, null];[900];[6789012345678901234.123456789012345678901234567890123456, null] +8;[10, 11];[1000, 1100];[89012345678901234567.543210987654321098765432109876543210, null] +9;[12];[1200];[123456789012345678.234567890123456789012345678901234567, null] +10;[13, null];[1300];[4567890123456789.098765432109876543210987654321098765, null] +11;[14, 15];[1400, 1500];[98765432109876543210.654321098765432109876543210987654321, null] +12;[16, null];[1600];[1234567890123456789.987654321098765432109876543210987654, null] +13;[17];[1700];[2345678901234567890.543210987654321098765432109876543210, null] +14;[18, 19];[1800, 1900];[34567890123456789012.432109876543210987654321098765432109, null] +15;[20];[2000];[67890123456789012345.678901234567890123456789012345678901, null] +16;[21, null];[2100];[8901234567890123456.345678901234567890123456789012345678, null] +17;[22, 23];[2200, 2300];[12345678901234567890.987654321098765432109876543210987654, null] +18;[24];[2400];[345678901234567890.234567890123456789012345678901234567, null] +19;[25, null];[2500];[4567890123456789012.654321098765432109876543210987654321, null] +20;[26, 27];[2600, 2700];[67890123456789012.345678901234567890123456789012345678, null] +21;[28, null];[2800];[789012345678901234.876543210987654321098765432109876543210, null] +22;[29, 30];[2900, 3000];[12345678901234567.432109876543210987654321098765432109, null] +23;[31];[3100];[2345678901234567.567890123456789012345678901234567890, null] +24;[32, null];[3200];[89012345678901234567.098765432109876543210987654321098765, null] +25;[33];[3300];[4567890123456789012.234567890123456789012345678901234567, null] +26;[34, 35];[3400, 3500];[6789012345678901234.987654321098765432109876543210987654, null] +27;[36];[3600];[7890123456789012345.543210987654321098765432109876543210, null] +28;[37, 38];[3700, 3800];[1234567890123456789.345678901234567890123456789012345678, null] +29;[39];[3900];[89012345678901234567.432109876543210987654321098765432109, null] +30;[40, 41];[4000, 4100];[234567890123456789.543210987654321098765432109876543210, null] +31;[42];[4200];[123456789012345678.234567890123456789012345678901234567, null] +32;[43, null];[4300];[5678901234567890123.987654321098765432109876543210987654, null] +33;[44];[4400];[3456789012345678901.345678901234567890123456789012345678, null] +34;[45, 46];[4500, 4600];[789012345678901234.123456789012345678901234567890123456, null] +35;[47];[4700];[89012345678901234567.876543210987654321098765432109876543210, null] +36;[48, null];[4800];[12345678901234567890.543210987654321098765432109876543210, null] +37;[49];[4900];[56789012345678901234.345678901234567890123456789012345678, null] +38;[50, 51];[5000, 5100];[678901234567890123.432109876543210987654321098765432109, null] +39;[52];[5200];[7890123456789012345.098765432109876543210987654321098765, null] +40;[53, null];[5300];[1234567890123456789.654321098765432109876543210987654321, null] +41;[54, 55];[5400, 5500];[3456789012345678901.987654321098765432109876543210987654, null] +42;[56];[5600];[2345678901234567890.234567890123456789012345678901234567, null] +43;[57, 58];[5700, 5800];[12345678901234567890.098765432109876543210987654321098765, null] +44;[59];[5900];[67890123456789012.987654321098765432109876543210987654, null] +45;[60, null];[6000];[8901234567890123456.876543210987654321098765432109876543210, null] +46;[61];[6100];[1234567890123456789.654321098765432109876543210987654321, null] +47;[62, 63];[6200, 6300];[5678901234567890123.234567890123456789012345678901234567, null] +48;[64];[6400];[23456789012345678901.654321098765432109876543210987654321, null] +49;[65, null];[6500];[345678901234567890.987654321098765432109876543210987654, null] +50;[66, 67];[6600, 6700];[5678901234567890123.432109876543210987654321098765432109, null] +51;[68];[6800];[789012345678901234.654321098765432109876543210987654321, null] +52;[69, 70];[6900, 7000];[890123456789012345.123456789012345678901234567890123456, null] +53;[71];[7100];[1234567890123456789.098765432109876543210987654321098765, null] +54;[72, null];[7200];[5678901234567890123.654321098765432109876543210987654321, null] +55;[73];[7300];[789012345678901234.345678901234567890123456789012345678, null] +56;[74, 75];[7400, 7500];[8901234567890123456.987654321098765432109876543210987654, null] +57;[76];[7600];[2345678901234567890.098765432109876543210987654321098765, null] +58;[77, 78];[7700, 7800];[12345678901234567890.654321098765432109876543210987654321, null] +59;[79];[7900];[89012345678901234.345678901234567890123456789012345678, null] +60;[80, null];[8000];[34567890123456789012.098765432109876543210987654321098765, null] +61;[81];[8100];[567890123456789012.543210987654321098765432109876543210, null] +62;[82, 83];[8200, 8300];[67890123456789012345.123456789012345678901234567890123456, null] +63;[84];[8400];[1234567890123456789.098765432109876543210987654321098765, null] +64;[85, null];[8500];[23456789012345678901.654321098765432109876543210987654321, null] +65;[86, 87];[8600, 8700];[345678901234567890.345678901234567890123456789012345678, null] +66;[88];[8800];[4567890123456789012.987654321098765432109876543210987654, null] +67;[89, null];[8900];[678901234567890123.432109876543210987654321098765432109, null] +68;[90, 91];[9000, 9100];[7890123456789012345.098765432109876543210987654321098765, null] +69;[92];[9200];[89012345678901234.987654321098765432109876543210987654, null] +70;[93, null];[9300];[234567890123456789.654321098765432109876543210987654321, null] +71;[94, 95];[9400, 9500];[12345678901234567890.098765432109876543210987654321098765, null] +72;[96];[9600];[4567890123456789012.123456789012345678901234567890123456, null] +73;[97, null];[9700];[56789012345678901234.543210987654321098765432109876543210, null] +74;[98, 99];[9800, 9900];[678901234567890123.098765432109876543210987654321098765, null] +75;[100];[10000];[78901234567890123456.234567890123456789012345678901234567, null] +76;[101, null];[10100];[8901234567890123456.987654321098765432109876543210987654, null] +77;[102, 103];[10200, 10300];[1234567890123456789.543210987654321098765432109876543210, null] +78;[104];[10400];[23456789012345678901.876543210987654321098765432109876543210, null] +79;[105, null];[10500];[56789012345678901234.432109876543210987654321098765432109, null] +80;[106, 107];[10600, 10700];[8901234567890123456.234567890123456789012345678901234567, null] +81;[108];[10800];[1234567890123456789.987654321098765432109876543210987654, null] +82;[109, null];[10900];[78901234567890123456.654321098765432109876543210987654321, null] +83;[110, 111];[11000, 11100];[4567890123456789012.123456789012345678901234567890123456, null] +84;[112];[11200];[678901234567890123.543210987654321098765432109876543210, null] +85;[113, null];[11300];[890123456789012345.098765432109876543210987654321098765, null] +86;[114, 115];[11400, 11500];[2345678901234567890.987654321098765432109876543210987654, null] +87;[116];[11600];[1234567890123456789.432109876543210987654321098765432109, null] +88;[117, null];[11700];[7890123456789012345.098765432109876543210987654321098765, null] +89;[118, 119];[11800, 11900];[345678901234567890.654321098765432109876543210987654321, null] +90;[120];[12000];[8901234567890123456.432109876543210987654321098765432109, null] +91;[121, null];[12100];[567890123456789012.345678901234567890123456789012345678, null] +92;[122, 123];[12200, 12300];[12345678901234567890.654321098765432109876543210987654321, null] +93;[124];[12400];[3456789012345678901.098765432109876543210987654321098765, null] +94;[125, null];[12500];[7890123456789012345.987654321098765432109876543210987654, null] +95;[126, 127];[12600, 12700];[890123456789012345.543210987654321098765432109876543210, null] +96;[128];[12800];[5678901234567890123.876543210987654321098765432109876543210, null] +97;[129, null];[12900];[3456789012345678901.234567890123456789012345678901234567, null] +98;[130, 131];[13000, 13100];[78901234567890123456.098765432109876543210987654321098765, null] +99;[132];[13200];[8901234567890123456.987654321098765432109876543210987654, null] +100;[133, null];[13300];[1234567890123456789.543210987654321098765432109876543210, null] diff --git a/regression-test/suites/nereids_function_p0/load.groovy b/regression-test/suites/nereids_function_p0/load.groovy index 104c2d30788f51..2e0fc1e953e3fd 100644 --- a/regression-test/suites/nereids_function_p0/load.groovy +++ b/regression-test/suites/nereids_function_p0/load.groovy @@ -264,4 +264,27 @@ suite("load") { sql """ insert into fn_test_bitmap_not_nullable select * from fn_test_bitmap where id is not null """ + + sql """ set enable_decimal256 = true """ + sql """ drop table if exists fn_test_array_with_large_decimal """ + sql """ + create table IF NOT EXISTS fn_test_array_with_large_decimal(id int, a array, b array, c array) properties('replication_num' = '1'); + """ + streamLoad { + table "fn_test_array_with_large_decimal" + db "regression_test_nereids_function_p0" + set 'column_separator', ';' + file "test_array_large_decimal.csv" + time 60000 + + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals(100, json.NumberTotalRows) + assertEquals(100, json.NumberLoadedRows) + } + } } diff --git a/regression-test/suites/nereids_function_p0/scalar_function/Array.groovy b/regression-test/suites/nereids_function_p0/scalar_function/Array.groovy index dc97d10945d78e..c4d17f5a339bfd 100644 --- a/regression-test/suites/nereids_function_p0/scalar_function/Array.groovy +++ b/regression-test/suites/nereids_function_p0/scalar_function/Array.groovy @@ -1349,4 +1349,36 @@ suite("nereids_scalar_fn_Array") { exception("errCode = 2") } + // agg for array types add decimal256 cases array_min/array_max/array_product/array_avg/array_sum with decimal256 + sql """ set enable_decimal256=true; """ + order_qt_sql_array_min_decimal256 "select array_min(c) from fn_test_array_with_large_decimal order by id" + order_qt_sql_array_max_decimal256 "select array_max(c) from fn_test_array_with_large_decimal order by id" + test { + sql "select array_product(c) from fn_test_array_with_large_decimal order by id" + check{result, exception, startTime, endTime -> + assertTrue(exception != null) + logger.info(exception.message) + } + } + test { + sql "select array_avg(c) from fn_test_array_with_large_decimal order by id" + check{result, exception, startTime, endTime -> + assertTrue(exception != null) + logger.info(exception.message) + } + } + test { + sql "select array_sum(c) from fn_test_array_with_large_decimal order by id" + check{result, exception, startTime, endTime -> + assertTrue(exception != null) + logger.info(exception.message) + } + } + // array_overlap for type correctness + order_qt_sql_array_overlaps_1 """select arrays_overlap(a, b) from fn_test_array_with_large_decimal order by id""" + order_qt_sql_array_overlaps_2 """select arrays_overlap(b, a) from fn_test_array_with_large_decimal order by id""" + order_qt_sql_array_overlaps_3 """select arrays_overlap(a, c) from fn_test_array_with_large_decimal order by id""" + order_qt_sql_array_overlaps_4 """select arrays_overlap(c, a) from fn_test_array_with_large_decimal order by id""" + order_qt_sql_array_overlaps_5 """select arrays_overlap(b, c) from fn_test_array_with_large_decimal order by id""" + order_qt_sql_array_overlaps_6 """select arrays_overlap(c, b) from fn_test_array_with_large_decimal order by id""" } From 5da2fa527f7e7d081dd3b8aa71e0cbe69200e6ea Mon Sep 17 00:00:00 2001 From: morrySnow <101034200+morrySnow@users.noreply.github.com> Date: Thu, 24 Oct 2024 15:12:03 +0800 Subject: [PATCH 003/143] [chore](planner) change decimal literal toSql as plainString (#41809) (#42378) pick from master #41809 for example, 0 with decimal(38,4) will return 0.0000 --- .../java/org/apache/doris/analysis/DecimalLiteral.java | 2 +- .../trees/expressions/literal/DecimalLiteral.java | 10 ++++++++++ .../trees/expressions/literal/DecimalV3Literal.java | 10 ++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DecimalLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DecimalLiteral.java index c4bbf6e2cce436..f084658936d6fe 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DecimalLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DecimalLiteral.java @@ -274,7 +274,7 @@ public String toSqlImpl() { @Override public String getStringValue() { - return value.toString(); + return value.toPlainString(); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DecimalLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DecimalLiteral.java index ea198d947aef65..4ffc92c634d709 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DecimalLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DecimalLiteral.java @@ -110,4 +110,14 @@ public boolean equals(Object o) { DecimalLiteral literal = (DecimalLiteral) o; return Objects.equals(dataType, literal.dataType); } + + @Override + public String toSql() { + return value.toPlainString(); + } + + @Override + public String toString() { + return toSql(); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DecimalV3Literal.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DecimalV3Literal.java index 9d311fe06646a5..1ff2e50169ca34 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DecimalV3Literal.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DecimalV3Literal.java @@ -145,4 +145,14 @@ public boolean equals(Object o) { DecimalV3Literal literal = (DecimalV3Literal) o; return Objects.equals(dataType, literal.dataType); } + + @Override + public String toSql() { + return value.toPlainString(); + } + + @Override + public String toString() { + return toSql(); + } } From e879ffc8b582db3c651222d8d9b8b2e1faf93e66 Mon Sep 17 00:00:00 2001 From: morrySnow <101034200+morrySnow@users.noreply.github.com> Date: Thu, 24 Oct 2024 16:28:04 +0800 Subject: [PATCH 004/143] [chore](Variant) forbid variant type as hash join key (#41673) (#42377) pick from master #41673 --- .../nereids/rules/analysis/CheckAfterRewrite.java | 13 +++++++++++++ .../suites/nereids_p0/join/test_join_on.groovy | 5 +++++ 2 files changed, 18 insertions(+) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java index df8ec64fc2e1ff..562e84275df0cd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java @@ -41,6 +41,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; import org.apache.doris.nereids.trees.plans.logical.LogicalDeferMaterializeOlapScan; import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; import org.apache.doris.nereids.trees.plans.logical.LogicalSort; import org.apache.doris.nereids.trees.plans.logical.LogicalTopN; @@ -179,6 +180,18 @@ private void checkMetricTypeIsUsedCorrectly(Plan plan) { throw new AnalysisException(Type.OnlyMetricTypeErrorMsg); } }); + } else if (plan instanceof LogicalJoin) { + LogicalJoin join = (LogicalJoin) plan; + for (Expression conjunct : join.getHashJoinConjuncts()) { + if (conjunct.anyMatch(e -> ((Expression) e).getDataType().isVariantType())) { + throw new AnalysisException("variant type could not in join equal conditions: " + conjunct.toSql()); + } + } + for (Expression conjunct : join.getMarkJoinConjuncts()) { + if (conjunct.anyMatch(e -> ((Expression) e).getDataType().isVariantType())) { + throw new AnalysisException("variant type could not in join equal conditions: " + conjunct.toSql()); + } + } } } diff --git a/regression-test/suites/nereids_p0/join/test_join_on.groovy b/regression-test/suites/nereids_p0/join/test_join_on.groovy index 752467d3028486..02b04479e98688 100644 --- a/regression-test/suites/nereids_p0/join/test_join_on.groovy +++ b/regression-test/suites/nereids_p0/join/test_join_on.groovy @@ -50,4 +50,9 @@ suite("test_join_on", "nereids_p0") { sql """ select * from join_on as j1 inner join join_on as j2 on j1.k3 = j2.k3; """ exception "data type BITMAP could not used in ComparisonPredicate" } + + test { + sql """select * from (select cast('' as variant) as a) t1 join (select cast('' as variant) as a) t2 on t1.a = t2.a""" + exception "variant type could not in join equal conditions" + } } From 725e70d3c09577b89cc5ec9e0499c924a862e96a Mon Sep 17 00:00:00 2001 From: Xin Liao Date: Thu, 24 Oct 2024 16:41:47 +0800 Subject: [PATCH 005/143] [fix](load) Fix potential data loss during disk migration #42296 (#42385) cherry pick from #42296 --- be/src/olap/txn_manager.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/be/src/olap/txn_manager.cpp b/be/src/olap/txn_manager.cpp index 1dd2d52f33b8ac..54436668c85c03 100644 --- a/be/src/olap/txn_manager.cpp +++ b/be/src/olap/txn_manager.cpp @@ -89,6 +89,15 @@ TxnManager::TxnManager(StorageEngine& engine, int32_t txn_map_shard_size, int32_ Status TxnManager::prepare_txn(TPartitionId partition_id, const Tablet& tablet, TTransactionId transaction_id, const PUniqueId& load_id, bool ingest) { + // check if the tablet has already been shutdown. If it has, it indicates that + // it is an old tablet, and data should not be imported into the old tablet. + // Otherwise, it may lead to data loss during migration. + if (tablet.tablet_state() == TABLET_SHUTDOWN) { + return Status::InternalError( + "The tablet's state is shutdown, tablet_id: {}. The tablet may have been dropped " + "or migrationed. Please check if the table has been dropped or try again.", + tablet.tablet_id()); + } return prepare_txn(partition_id, transaction_id, tablet.tablet_id(), tablet.tablet_uid(), load_id, ingest); } From 4d653dac85b86b47d7119bb683d7d728cd252609 Mon Sep 17 00:00:00 2001 From: TengJianPing <18241664+jacktengg@users.noreply.github.com> Date: Thu, 24 Oct 2024 16:55:15 +0800 Subject: [PATCH 006/143] [fix](decimal256) support decimal256 for many functions (#42136) (#42356) BP #42136 --- be/src/runtime/runtime_state.h | 2 +- .../aggregate_functions/aggregate_function.h | 4 + ...gregate_function_approx_count_distinct.cpp | 3 +- .../aggregate_function_avg.cpp | 15 +- .../aggregate_function_bitmap.cpp | 9 +- .../aggregate_function_bitmap_agg.cpp | 3 +- .../aggregate_function_collect.cpp | 3 +- .../aggregate_function_corr.cpp | 3 +- .../aggregate_function_count.cpp | 9 +- .../aggregate_function_count_by_enum.cpp | 3 +- .../aggregate_function_covar.cpp | 6 +- .../aggregate_function_distinct.cpp | 5 +- .../aggregate_function_foreach.cpp | 7 +- ...gregate_function_group_array_intersect.cpp | 3 +- .../aggregate_function_group_concat.cpp | 3 +- .../aggregate_function_histogram.cpp | 3 +- .../aggregate_function_kurtosis.cpp | 3 +- .../aggregate_function_map.cpp | 3 +- .../aggregate_function_min_max.cpp | 3 +- .../aggregate_function_min_max.h | 3 +- .../aggregate_function_min_max_by.h | 3 +- .../aggregate_function_orthogonal_bitmap.cpp | 3 +- .../aggregate_function_percentile.cpp | 9 +- .../aggregate_function_product.h | 16 +- .../aggregate_function_quantile_state.cpp | 6 +- .../aggregate_function_quantile_state.h | 6 +- .../aggregate_function_reader_first_last.h | 39 +-- .../aggregate_function_sequence_match.cpp | 3 +- .../aggregate_function_simple_factory.h | 15 +- .../aggregate_function_skew.cpp | 3 +- .../aggregate_function_stddev.cpp | 12 +- .../aggregate_function_sum.cpp | 15 +- .../aggregate_function_sum.h | 1 - .../aggregate_function_topn.cpp | 9 +- .../aggregate_function_uniq.cpp | 8 +- ...aggregate_function_uniq_distribute_key.cpp | 3 +- .../aggregate_function_window.cpp | 6 +- .../aggregate_function_window_funnel.cpp | 3 +- be/src/vec/aggregate_functions/helpers.h | 15 +- be/src/vec/core/wide_integer.h | 5 + be/src/vec/core/wide_integer_impl.h | 34 +- be/src/vec/exec/scan/vfile_scanner.cpp | 5 +- be/src/vec/exprs/vcase_expr.cpp | 6 +- be/src/vec/exprs/vcast_expr.cpp | 6 +- be/src/vec/exprs/vectorized_agg_fn.cpp | 4 +- be/src/vec/exprs/vectorized_fn_call.cpp | 3 +- be/src/vec/exprs/vin_predicate.cpp | 6 +- be/src/vec/exprs/vmatch_predicate.cpp | 6 +- be/src/vec/exprs/vtopn_pred.h | 2 +- .../array/function_array_aggregation.cpp | 100 ++++-- .../array/function_array_cum_sum.cpp | 31 +- .../functions/comparison_equal_for_null.cpp | 11 +- be/src/vec/functions/function.h | 4 + be/src/vec/functions/function_coalesce.cpp | 21 +- be/src/vec/functions/function_ifnull.h | 4 +- be/src/vec/functions/nullif.cpp | 11 +- .../vec/functions/simple_function_factory.h | 11 +- .../decimalv3}/aggregate_decimal256.out | 8 + .../decimalv3/test_decimal256_array.out | 63 ++++ .../test_decimal256_multi_distinct.out | 33 ++ .../scalar_function/Array.out | 306 ++++++++++++++++++ .../decimalv3}/aggregate_decimal256.groovy | 4 +- .../decimalv3/test_decimal256_array.groovy | 118 +++++++ .../test_decimal256_multi_distinct.groovy | 73 +++++ .../scalar_function/Array.groovy | 25 +- 65 files changed, 953 insertions(+), 215 deletions(-) rename regression-test/data/{query_p0/aggregate => datatype_p0/decimalv3}/aggregate_decimal256.out (95%) create mode 100644 regression-test/data/datatype_p0/decimalv3/test_decimal256_array.out create mode 100644 regression-test/data/datatype_p0/decimalv3/test_decimal256_multi_distinct.out rename regression-test/suites/{query_p0/aggregate => datatype_p0/decimalv3}/aggregate_decimal256.groovy (95%) create mode 100644 regression-test/suites/datatype_p0/decimalv3/test_decimal256_array.groovy create mode 100644 regression-test/suites/datatype_p0/decimalv3/test_decimal256_multi_distinct.groovy diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h index e7f2c18b09404a..b44aba5e7314de 100644 --- a/be/src/runtime/runtime_state.h +++ b/be/src/runtime/runtime_state.h @@ -174,7 +174,7 @@ class RuntimeState { _query_options.check_overflow_for_decimal; } - bool enable_decima256() const { + bool enable_decimal256() const { return _query_options.__isset.enable_decimal256 && _query_options.enable_decimal256; } diff --git a/be/src/vec/aggregate_functions/aggregate_function.h b/be/src/vec/aggregate_functions/aggregate_function.h index 05f1bd2a602c68..cd1f8922e1b459 100644 --- a/be/src/vec/aggregate_functions/aggregate_function.h +++ b/be/src/vec/aggregate_functions/aggregate_function.h @@ -38,6 +38,10 @@ class Arena; class IColumn; class IDataType; +struct AggregateFunctionAttr { + bool enable_decimal256 {false}; +}; + template class AggregateFunctionBitmapCount; template diff --git a/be/src/vec/aggregate_functions/aggregate_function_approx_count_distinct.cpp b/be/src/vec/aggregate_functions/aggregate_function_approx_count_distinct.cpp index 10616be4258477..18662bf66cf38c 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_approx_count_distinct.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_approx_count_distinct.cpp @@ -31,7 +31,8 @@ namespace doris::vectorized { AggregateFunctionPtr create_aggregate_function_approx_count_distinct( - const std::string& name, const DataTypes& argument_types, const bool result_is_nullable) { + const std::string& name, const DataTypes& argument_types, const bool result_is_nullable, + const AggregateFunctionAttr& attr) { WhichDataType which(remove_nullable(argument_types[0])); #define DISPATCH(TYPE, COLUMN_TYPE) \ diff --git a/be/src/vec/aggregate_functions/aggregate_function_avg.cpp b/be/src/vec/aggregate_functions/aggregate_function_avg.cpp index 0f3d0fd3bdad6b..6a6711f90f983e 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_avg.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_avg.cpp @@ -45,8 +45,17 @@ template using AggregateFuncAvgDecimal256 = typename AvgDecimal256::Function; void register_aggregate_function_avg(AggregateFunctionSimpleFactory& factory) { - factory.register_function_both("avg", creator_with_type::creator); - factory.register_function_both("avg_decimal256", - creator_with_type::creator); + AggregateFunctionCreator creator = [&](const std::string& name, const DataTypes& types, + const bool result_is_nullable, + const AggregateFunctionAttr& attr) { + if (attr.enable_decimal256) { + return creator_with_type::creator(name, types, + result_is_nullable, attr); + } else { + return creator_with_type::creator(name, types, result_is_nullable, + attr); + } + }; + factory.register_function_both("avg", creator); } } // namespace doris::vectorized diff --git a/be/src/vec/aggregate_functions/aggregate_function_bitmap.cpp b/be/src/vec/aggregate_functions/aggregate_function_bitmap.cpp index 0676fd5bc27090..e9c86d4b9556da 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_bitmap.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_bitmap.cpp @@ -40,9 +40,9 @@ AggregateFunctionPtr create_with_int_data_type(const DataTypes& argument_type) { return nullptr; } -AggregateFunctionPtr create_aggregate_function_bitmap_union_count(const std::string& name, - const DataTypes& argument_types, - const bool result_is_nullable) { +AggregateFunctionPtr create_aggregate_function_bitmap_union_count( + const std::string& name, const DataTypes& argument_types, const bool result_is_nullable, + const AggregateFunctionAttr& attr) { const bool arg_is_nullable = argument_types[0]->is_nullable(); if (arg_is_nullable) { return std::make_shared>(argument_types); @@ -53,7 +53,8 @@ AggregateFunctionPtr create_aggregate_function_bitmap_union_count(const std::str AggregateFunctionPtr create_aggregate_function_bitmap_union_int(const std::string& name, const DataTypes& argument_types, - const bool result_is_nullable) { + const bool result_is_nullable, + const AggregateFunctionAttr& attr) { const bool arg_is_nullable = argument_types[0]->is_nullable(); if (arg_is_nullable) { return AggregateFunctionPtr( diff --git a/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.cpp b/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.cpp index b8ae4c6530d575..0b95ddfd46f0d5 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.cpp @@ -41,7 +41,8 @@ AggregateFunctionPtr create_with_int_data_type(const DataTypes& argument_types) AggregateFunctionPtr create_aggregate_function_bitmap_agg(const std::string& name, const DataTypes& argument_types, - const bool result_is_nullable) { + const bool result_is_nullable, + const AggregateFunctionAttr& attr) { const bool arg_is_nullable = argument_types[0]->is_nullable(); if (arg_is_nullable) { return AggregateFunctionPtr(create_with_int_data_type(argument_types)); diff --git a/be/src/vec/aggregate_functions/aggregate_function_collect.cpp b/be/src/vec/aggregate_functions/aggregate_function_collect.cpp index 4fcf09b59b33c6..d726b7c6355318 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_collect.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_collect.cpp @@ -96,7 +96,8 @@ AggregateFunctionPtr create_aggregate_function_collect_impl(const std::string& n AggregateFunctionPtr create_aggregate_function_collect(const std::string& name, const DataTypes& argument_types, - const bool result_is_nullable) { + const bool result_is_nullable, + const AggregateFunctionAttr& attr) { if (argument_types.size() == 1) { if (name == "array_agg") { return create_aggregate_function_collect_impl( diff --git a/be/src/vec/aggregate_functions/aggregate_function_corr.cpp b/be/src/vec/aggregate_functions/aggregate_function_corr.cpp index a454afb45f22e0..cdaab6e086f4a5 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_corr.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_corr.cpp @@ -89,7 +89,8 @@ struct CorrMoment { AggregateFunctionPtr create_aggregate_corr_function(const std::string& name, const DataTypes& argument_types, - const bool result_is_nullable) { + const bool result_is_nullable, + const AggregateFunctionAttr& attr) { assert_binary(name, argument_types); return create_with_two_basic_numeric_types(argument_types[0], argument_types[1], argument_types, result_is_nullable); diff --git a/be/src/vec/aggregate_functions/aggregate_function_count.cpp b/be/src/vec/aggregate_functions/aggregate_function_count.cpp index 8c54714b046da1..5cfe5af41982f6 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_count.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_count.cpp @@ -29,15 +29,16 @@ namespace doris::vectorized { AggregateFunctionPtr create_aggregate_function_count(const std::string& name, const DataTypes& argument_types, - const bool result_is_nullable) { + const bool result_is_nullable, + const AggregateFunctionAttr& attr) { assert_arity_at_most<1>(name, argument_types); return std::make_shared(argument_types); } -AggregateFunctionPtr create_aggregate_function_count_not_null_unary(const std::string& name, - const DataTypes& argument_types, - const bool result_is_nullable) { +AggregateFunctionPtr create_aggregate_function_count_not_null_unary( + const std::string& name, const DataTypes& argument_types, const bool result_is_nullable, + const AggregateFunctionAttr& attr) { assert_arity_at_most<1>(name, argument_types); return std::make_shared(argument_types); diff --git a/be/src/vec/aggregate_functions/aggregate_function_count_by_enum.cpp b/be/src/vec/aggregate_functions/aggregate_function_count_by_enum.cpp index 1a0bf2518202f3..093b31d57db554 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_count_by_enum.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_count_by_enum.cpp @@ -29,7 +29,8 @@ namespace doris::vectorized { AggregateFunctionPtr create_aggregate_function_count_by_enum(const std::string& name, const DataTypes& argument_types, - const bool result_is_nullable) { + const bool result_is_nullable, + const AggregateFunctionAttr& attr) { if (argument_types.size() < 1) { LOG(WARNING) << fmt::format("Illegal number {} of argument for aggregate function {}", argument_types.size(), name); diff --git a/be/src/vec/aggregate_functions/aggregate_function_covar.cpp b/be/src/vec/aggregate_functions/aggregate_function_covar.cpp index b02d6ae0e12572..71d09f61de4302 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_covar.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_covar.cpp @@ -53,14 +53,16 @@ AggregateFunctionPtr create_function_single_value(const String& name, AggregateFunctionPtr create_aggregate_function_covariance_samp(const std::string& name, const DataTypes& argument_types, - const bool result_is_nullable) { + const bool result_is_nullable, + const AggregateFunctionAttr& attr) { return create_function_single_value( name, argument_types, result_is_nullable, NOTNULLABLE); } AggregateFunctionPtr create_aggregate_function_covariance_pop(const std::string& name, const DataTypes& argument_types, - const bool result_is_nullable) { + const bool result_is_nullable, + const AggregateFunctionAttr& attr) { return create_function_single_value( name, argument_types, result_is_nullable, NOTNULLABLE); } diff --git a/be/src/vec/aggregate_functions/aggregate_function_distinct.cpp b/be/src/vec/aggregate_functions/aggregate_function_distinct.cpp index 9bb2954207babb..fce58b38688b28 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_distinct.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_distinct.cpp @@ -83,7 +83,8 @@ const std::string DISTINCT_FUNCTION_PREFIX = "multi_distinct_"; void register_aggregate_function_combinator_distinct(AggregateFunctionSimpleFactory& factory) { AggregateFunctionCreator creator = [&](const std::string& name, const DataTypes& types, - const bool result_is_nullable) { + const bool result_is_nullable, + const AggregateFunctionAttr& attr) { // 1. we should get not nullable types; DataTypes nested_types(types.size()); std::transform(types.begin(), types.end(), nested_types.begin(), @@ -92,7 +93,7 @@ void register_aggregate_function_combinator_distinct(AggregateFunctionSimpleFact auto transform_arguments = function_combinator->transform_arguments(nested_types); auto nested_function_name = name.substr(DISTINCT_FUNCTION_PREFIX.size()); auto nested_function = factory.get(nested_function_name, transform_arguments, false, - BeExecVersionManager::get_newest_version()); + BeExecVersionManager::get_newest_version(), attr); return function_combinator->transform_aggregate_function(nested_function, types, result_is_nullable); }; diff --git a/be/src/vec/aggregate_functions/aggregate_function_foreach.cpp b/be/src/vec/aggregate_functions/aggregate_function_foreach.cpp index ab6d0142f6a8c0..c1cbcc89996caf 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_foreach.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_foreach.cpp @@ -34,8 +34,9 @@ namespace doris::vectorized { void register_aggregate_function_combinator_foreach(AggregateFunctionSimpleFactory& factory) { - AggregateFunctionCreator creator = [&](const std::string& name, const DataTypes& types, - const bool result_is_nullable) -> AggregateFunctionPtr { + AggregateFunctionCreator creator = + [&](const std::string& name, const DataTypes& types, const bool result_is_nullable, + const AggregateFunctionAttr& attr) -> AggregateFunctionPtr { const std::string& suffix = AggregateFunctionForEach::AGG_FOREACH_SUFFIX; DataTypes transform_arguments; for (const auto& t : types) { @@ -46,7 +47,7 @@ void register_aggregate_function_combinator_foreach(AggregateFunctionSimpleFacto auto nested_function_name = name.substr(0, name.size() - suffix.size()); auto nested_function = factory.get(nested_function_name, transform_arguments, result_is_nullable, - BeExecVersionManager::get_newest_version(), false); + BeExecVersionManager::get_newest_version(), attr); if (!nested_function) { throw Exception( ErrorCode::INTERNAL_ERROR, diff --git a/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.cpp b/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.cpp index b3b9a8b9af47c6..24faf58b2e1ff9 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.cpp @@ -70,7 +70,8 @@ inline AggregateFunctionPtr create_aggregate_function_group_array_intersect_impl } AggregateFunctionPtr create_aggregate_function_group_array_intersect( - const std::string& name, const DataTypes& argument_types, const bool result_is_nullable) { + const std::string& name, const DataTypes& argument_types, const bool result_is_nullable, + const AggregateFunctionAttr& attr) { assert_unary(name, argument_types); const DataTypePtr& argument_type = remove_nullable(argument_types[0]); diff --git a/be/src/vec/aggregate_functions/aggregate_function_group_concat.cpp b/be/src/vec/aggregate_functions/aggregate_function_group_concat.cpp index 9661b9c89d5700..286795ea2ba70c 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_group_concat.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_group_concat.cpp @@ -28,7 +28,8 @@ const std::string AggregateFunctionGroupConcatImplStr::separator = ","; AggregateFunctionPtr create_aggregate_function_group_concat(const std::string& name, const DataTypes& argument_types, - const bool result_is_nullable) { + const bool result_is_nullable, + const AggregateFunctionAttr& attr) { if (argument_types.size() == 1) { return creator_without_type::create< AggregateFunctionGroupConcat>( diff --git a/be/src/vec/aggregate_functions/aggregate_function_histogram.cpp b/be/src/vec/aggregate_functions/aggregate_function_histogram.cpp index 5b06af28399d71..fb2fa9c2513ec0 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_histogram.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_histogram.cpp @@ -47,7 +47,8 @@ AggregateFunctionPtr create_agg_function_histogram(const DataTypes& argument_typ AggregateFunctionPtr create_aggregate_function_histogram(const std::string& name, const DataTypes& argument_types, - const bool result_is_nullable) { + const bool result_is_nullable, + const AggregateFunctionAttr& attr) { WhichDataType type(remove_nullable(argument_types[0])); #define DISPATCH(TYPE) \ diff --git a/be/src/vec/aggregate_functions/aggregate_function_kurtosis.cpp b/be/src/vec/aggregate_functions/aggregate_function_kurtosis.cpp index 00ad1893eafcf6..a763721f3f4061 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_kurtosis.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_kurtosis.cpp @@ -45,7 +45,8 @@ AggregateFunctionPtr type_dispatch_for_aggregate_function_kurt(const DataTypes& AggregateFunctionPtr create_aggregate_function_kurt(const std::string& name, const DataTypes& argument_types, - const bool result_is_nullable) { + const bool result_is_nullable, + const AggregateFunctionAttr& attr) { if (argument_types.size() != 1) { LOG(WARNING) << "aggregate function " << name << " requires exactly 1 argument"; return nullptr; diff --git a/be/src/vec/aggregate_functions/aggregate_function_map.cpp b/be/src/vec/aggregate_functions/aggregate_function_map.cpp index bcf3f2d66dfeaf..f289d885f48f52 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_map.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_map.cpp @@ -32,7 +32,8 @@ AggregateFunctionPtr create_agg_function_map_agg(const DataTypes& argument_types AggregateFunctionPtr create_aggregate_function_map_agg(const std::string& name, const DataTypes& argument_types, - const bool result_is_nullable) { + const bool result_is_nullable, + const AggregateFunctionAttr& attr) { WhichDataType type(remove_nullable(argument_types[0])); #define DISPATCH(TYPE) \ diff --git a/be/src/vec/aggregate_functions/aggregate_function_min_max.cpp b/be/src/vec/aggregate_functions/aggregate_function_min_max.cpp index 8aa8850a314d84..c1a72fd52bdd76 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_min_max.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_min_max.cpp @@ -30,7 +30,8 @@ namespace doris::vectorized { template