From b19d5d1d632f7f9b6635bc72170f0fc003dc2bc4 Mon Sep 17 00:00:00 2001 From: smellthemoon <64083300+smellthemoon@users.noreply.github.com> Date: Thu, 31 Oct 2024 10:44:26 +0800 Subject: [PATCH] fix: mask with valid data when preCheckOverflow(#37221) (#37222) pr: #37221 issue: #37175 Signed-off-by: lixinguo Co-authored-by: lixinguo --- .../src/exec/expression/BinaryRangeExpr.cpp | 5 - .../src/exec/expression/BinaryRangeExpr.h | 1 - internal/core/src/exec/expression/Expr.h | 2 +- .../core/src/exec/expression/UnaryExpr.cpp | 36 ++---- internal/core/src/exec/expression/UnaryExpr.h | 1 - internal/core/unittest/test_expr.cpp | 117 +++++++++++++++++- 6 files changed, 125 insertions(+), 37 deletions(-) diff --git a/internal/core/src/exec/expression/BinaryRangeExpr.cpp b/internal/core/src/exec/expression/BinaryRangeExpr.cpp index 26467cd4646a3..f6684bda532c1 100644 --- a/internal/core/src/exec/expression/BinaryRangeExpr.cpp +++ b/internal/core/src/exec/expression/BinaryRangeExpr.cpp @@ -147,14 +147,9 @@ PhyBinaryRangeFilterExpr::PreCheckOverflow(HighPrecisionType& val1, ? active_count_ - overflow_check_pos_ : batch_size_; overflow_check_pos_ += batch_size; - if (cached_overflow_res_ != nullptr && - cached_overflow_res_->size() == batch_size) { - return cached_overflow_res_; - } auto valid_res = ProcessChunksForValid(is_index_mode_); auto res_vec = std::make_shared(TargetBitmap(batch_size), std::move(valid_res)); - cached_overflow_res_ = res_vec; return res_vec; }; diff --git a/internal/core/src/exec/expression/BinaryRangeExpr.h b/internal/core/src/exec/expression/BinaryRangeExpr.h index 145a8955ffe88..8f23d32a5682d 100644 --- a/internal/core/src/exec/expression/BinaryRangeExpr.h +++ b/internal/core/src/exec/expression/BinaryRangeExpr.h @@ -235,7 +235,6 @@ class PhyBinaryRangeFilterExpr : public SegmentExpr { private: std::shared_ptr expr_; - ColumnVectorPtr cached_overflow_res_{nullptr}; int64_t overflow_check_pos_{0}; }; } //namespace exec diff --git a/internal/core/src/exec/expression/Expr.h b/internal/core/src/exec/expression/Expr.h index 73a772f3e44cd..195e3777a9ebc 100644 --- a/internal/core/src/exec/expression/Expr.h +++ b/internal/core/src/exec/expression/Expr.h @@ -503,7 +503,7 @@ class SegmentExpr : public Expr { template TargetBitmap ProcessDataChunksForValid() { - TargetBitmap valid_result(batch_size_); + TargetBitmap valid_result(GetNextBatchSize()); valid_result.set(); int64_t processed_size = 0; for (size_t i = current_data_chunk_; i < num_data_chunk_; i++) { diff --git a/internal/core/src/exec/expression/UnaryExpr.cpp b/internal/core/src/exec/expression/UnaryExpr.cpp index ad3cd8cb294d1..748a0e993f28f 100644 --- a/internal/core/src/exec/expression/UnaryExpr.cpp +++ b/internal/core/src/exec/expression/UnaryExpr.cpp @@ -754,57 +754,37 @@ PhyUnaryRangeFilterExpr::PreCheckOverflow() { ? active_count_ - overflow_check_pos_ : batch_size_; overflow_check_pos_ += batch_size; - if (cached_overflow_res_ != nullptr && - cached_overflow_res_->size() == batch_size) { - return cached_overflow_res_; - } + auto valid = ProcessChunksForValid(CanUseIndex()); + auto res_vec = std::make_shared( + TargetBitmap(batch_size), std::move(valid)); + TargetBitmapView res(res_vec->GetRawData(), batch_size); + TargetBitmapView valid_res(res_vec->GetValidRawData(), batch_size); switch (expr_->op_type_) { case proto::plan::GreaterThan: case proto::plan::GreaterEqual: { - auto valid_res = ProcessChunksForValid(CanUseIndex()); - auto res_vec = std::make_shared( - TargetBitmap(batch_size), std::move(valid_res)); - TargetBitmapView res(res_vec->GetRawData(), batch_size); - cached_overflow_res_ = res_vec; - if (milvus::query::lt_lb(val)) { res.set(); + res &= valid_res; return res_vec; } return res_vec; } case proto::plan::LessThan: case proto::plan::LessEqual: { - auto valid_res = ProcessChunksForValid(CanUseIndex()); - auto res_vec = std::make_shared( - TargetBitmap(batch_size), std::move(valid_res)); - TargetBitmapView res(res_vec->GetRawData(), batch_size); - cached_overflow_res_ = res_vec; - if (milvus::query::gt_ub(val)) { res.set(); + res &= valid_res; return res_vec; } return res_vec; } case proto::plan::Equal: { - auto valid_res = ProcessChunksForValid(CanUseIndex()); - auto res_vec = std::make_shared( - TargetBitmap(batch_size), std::move(valid_res)); - TargetBitmapView res(res_vec->GetRawData(), batch_size); - cached_overflow_res_ = res_vec; - res.reset(); return res_vec; } case proto::plan::NotEqual: { - auto valid_res = ProcessChunksForValid(CanUseIndex()); - auto res_vec = std::make_shared( - TargetBitmap(batch_size), std::move(valid_res)); - TargetBitmapView res(res_vec->GetRawData(), batch_size); - cached_overflow_res_ = res_vec; - res.set(); + res &= valid_res; return res_vec; } default: { diff --git a/internal/core/src/exec/expression/UnaryExpr.h b/internal/core/src/exec/expression/UnaryExpr.h index 71a8869ecd291..9dac7b0b6a1da 100644 --- a/internal/core/src/exec/expression/UnaryExpr.h +++ b/internal/core/src/exec/expression/UnaryExpr.h @@ -346,7 +346,6 @@ class PhyUnaryRangeFilterExpr : public SegmentExpr { private: std::shared_ptr expr_; - ColumnVectorPtr cached_overflow_res_{nullptr}; int64_t overflow_check_pos_{0}; }; } // namespace exec diff --git a/internal/core/unittest/test_expr.cpp b/internal/core/unittest/test_expr.cpp index 1b08bca0c58fc..8fbf60592bd28 100644 --- a/internal/core/unittest/test_expr.cpp +++ b/internal/core/unittest/test_expr.cpp @@ -560,6 +560,117 @@ TEST_P(ExprTest, TestRangeNullable) { } return v != 2000; }}, + {R"(binary_range_expr: < + column_info: < + field_id: 103 + data_type: Int8 + > + lower_inclusive: false, + upper_inclusive: false, + lower_value: < + int64_val: 1000000 + > + upper_value: < + int64_val: 1000001 + > + >)", + [](int v, bool valid) { return false; }}, + {R"(binary_range_expr: < + column_info: < + field_id: 103 + data_type: Int8 + > + lower_inclusive: false, + upper_inclusive: false, + lower_value: < + int64_val: -1000001 + > + upper_value: < + int64_val: -1000000 + > + >)", + [](int v, bool valid) { return false; }}, + {R"(unary_range_expr: < + column_info: < + field_id: 103 + data_type: Int8 + > + op: GreaterEqual, + value: < + int64_val: 1000000 + > + >)", + [](int v, bool valid) { return false; }}, + {R"(unary_range_expr: < + column_info: < + field_id: 103 + data_type: Int8 + > + op: GreaterEqual, + value: < + int64_val: -1000000 + > + >)", + [](int v, bool valid) { + if (!valid) { + return false; + } + return true; + }}, + {R"(unary_range_expr: < + column_info: < + field_id: 103 + data_type: Int8 + > + op: LessEqual, + value: < + int64_val: 1000000 + > + >)", + [](int v, bool valid) { + if (!valid) { + return false; + } + return true; + }}, + {R"(unary_range_expr: < + column_info: < + field_id: 103 + data_type: Int8 + > + op: LessThan, + value: < + int64_val: -1000000 + > + >)", + [](int v, bool valid) { return false; }}, + {R"(unary_range_expr: < + column_info: < + field_id: 103 + data_type: Int8 + > + op: Equal, + value: < + int64_val: 1000000 + > + >)", + [](int v, bool valid) { return false; }}, + {R"(unary_range_expr: < + column_info: < + field_id: 103 + data_type: Int8 + > + op: NotEqual, + value: < + int64_val: 1000000 + > + >)", + [](int v, bool valid) { + if (!valid) { + return false; + } + return true; + }}, }; std::string raw_plan_tmp = R"(vector_anns: < @@ -582,6 +693,9 @@ TEST_P(ExprTest, TestRangeNullable) { auto nullable_fid = schema->AddDebugField("nullable", DataType::INT64, true); + auto nullable_fid_pre_check = + schema->AddDebugField("pre_check", DataType::INT8, true); + auto seg = CreateGrowingSegment(schema, empty_index_meta); int N = 1000; std::vector data_col; @@ -625,7 +739,8 @@ TEST_P(ExprTest, TestRangeNullable) { auto val = data_col[i]; auto valid_data = valid_data_col[i]; auto ref = ref_func(val, valid_data); - ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + ASSERT_EQ(ans, ref) + << clause << "@" << i << "!!" << val << "!!" << valid_data; } } }