From 63d7d0418d109196c755f9279e9d792093641bff Mon Sep 17 00:00:00 2001 From: chasingegg Date: Wed, 9 Oct 2024 10:28:35 +0800 Subject: [PATCH 1/5] Support post filter Signed-off-by: chasingegg --- internal/core/src/common/Chunk.cpp | 19 + internal/core/src/common/Chunk.h | 3 + internal/core/src/common/Consts.h | 1 + internal/core/src/common/QueryInfo.h | 1 + internal/core/src/exec/Driver.cpp | 12 +- internal/core/src/exec/QueryContext.h | 5 + .../src/exec/expression/AlwaysTrueExpr.cpp | 10 +- .../core/src/exec/expression/AlwaysTrueExpr.h | 11 +- .../expression/BinaryArithOpEvalRangeExpr.cpp | 999 ++++++---- .../expression/BinaryArithOpEvalRangeExpr.h | 297 +-- .../src/exec/expression/BinaryRangeExpr.cpp | 334 +++- .../src/exec/expression/BinaryRangeExpr.h | 123 +- .../core/src/exec/expression/CallExpr.cpp | 2 + internal/core/src/exec/expression/CallExpr.h | 6 +- .../core/src/exec/expression/ColumnExpr.cpp | 71 +- .../core/src/exec/expression/ColumnExpr.h | 27 +- .../core/src/exec/expression/CompareExpr.cpp | 183 +- .../core/src/exec/expression/CompareExpr.h | 205 +- .../core/src/exec/expression/ConjunctExpr.h | 14 +- internal/core/src/exec/expression/EvalCtx.h | 36 +- .../core/src/exec/expression/ExistsExpr.cpp | 46 +- .../core/src/exec/expression/ExistsExpr.h | 2 +- internal/core/src/exec/expression/Expr.h | 343 +++- .../src/exec/expression/JsonContainsExpr.cpp | 826 ++++---- .../src/exec/expression/JsonContainsExpr.h | 18 +- .../src/exec/expression/LogicalBinaryExpr.h | 12 +- .../src/exec/expression/LogicalUnaryExpr.h | 9 +- .../core/src/exec/expression/TermExpr.cpp | 303 ++- internal/core/src/exec/expression/TermExpr.h | 18 +- .../core/src/exec/expression/UnaryExpr.cpp | 419 +++-- internal/core/src/exec/expression/UnaryExpr.h | 111 +- .../core/src/exec/expression/ValueExpr.cpp | 10 +- internal/core/src/exec/expression/ValueExpr.h | 11 +- .../core/src/exec/operator/FilterBitsNode.cpp | 3 +- .../src/exec/operator/IterativeFilterNode.cpp | 273 +++ .../src/exec/operator/IterativeFilterNode.h | 83 + internal/core/src/exec/operator/Utils.h | 101 + .../src/exec/operator/VectorSearchNode.cpp | 1 + .../operator/groupby/SearchGroupByOperator.h | 43 - internal/core/src/mmap/ChunkedColumn.h | 14 + internal/core/src/mmap/Column.h | 19 + .../core/src/monitor/prometheus_client.cpp | 5 + internal/core/src/monitor/prometheus_client.h | 1 + internal/core/src/query/PlanProto.cpp | 56 +- internal/core/src/query/SearchBruteForce.cpp | 11 +- internal/core/src/query/SearchOnGrowing.cpp | 5 +- internal/core/src/query/SearchOnIndex.cpp | 2 +- internal/core/src/query/SearchOnSealed.cpp | 8 +- .../src/segcore/ChunkedSegmentSealedImpl.cpp | 26 +- .../src/segcore/ChunkedSegmentSealedImpl.h | 5 + .../core/src/segcore/SegmentGrowingImpl.cpp | 9 + .../core/src/segcore/SegmentGrowingImpl.h | 5 + internal/core/src/segcore/SegmentInterface.h | 27 + .../core/src/segcore/SegmentSealedImpl.cpp | 18 +- internal/core/src/segcore/SegmentSealedImpl.h | 5 + internal/core/unittest/CMakeLists.txt | 1 + .../core/unittest/test_always_true_expr.cpp | 19 + internal/core/unittest/test_array_expr.cpp | 209 ++ internal/core/unittest/test_expr.cpp | 1676 ++++++++++++++++- .../core/unittest/test_iterative_filter.cpp | 589 ++++++ internal/core/unittest/test_string_expr.cpp | 195 ++ .../core/unittest/test_utils/GenExprProto.h | 27 + 62 files changed, 6365 insertions(+), 1558 deletions(-) create mode 100644 internal/core/src/exec/operator/IterativeFilterNode.cpp create mode 100644 internal/core/src/exec/operator/IterativeFilterNode.h create mode 100644 internal/core/src/exec/operator/Utils.h create mode 100644 internal/core/unittest/test_iterative_filter.cpp diff --git a/internal/core/src/common/Chunk.cpp b/internal/core/src/common/Chunk.cpp index 6032c6b930d9c..037acabe591d4 100644 --- a/internal/core/src/common/Chunk.cpp +++ b/internal/core/src/common/Chunk.cpp @@ -27,6 +27,25 @@ StringChunk::StringViews() { return {ret, valid_}; } +std::pair, FixedVector> +StringChunk::ViewsByOffsets(const FixedVector& offsets) { + std::vector ret; + FixedVector valid_res; + size_t size = offsets.size(); + ret.reserve(size); + valid_res.reserve(size); + for (auto i = 0; i < size; ++i) { + uint32_t string_size; + char* pos = data_; + pos += offsets_[offsets[i]]; + string_size = *reinterpret_cast(pos); + pos += sizeof(uint32_t); + ret.emplace_back(std::string_view(pos, string_size)); + valid_res.emplace_back(isValid(offsets[i])); + } + return {ret, valid_res}; +} + void ArrayChunk::ConstructViews() { views_.reserve(row_nums_); diff --git a/internal/core/src/common/Chunk.h b/internal/core/src/common/Chunk.h index 7cfaa7fad466e..dc974769beda5 100644 --- a/internal/core/src/common/Chunk.h +++ b/internal/core/src/common/Chunk.h @@ -170,6 +170,9 @@ class StringChunk : public Chunk { return result; } + std::pair, FixedVector> + ViewsByOffsets(const FixedVector& offsets); + const char* ValueAt(int64_t idx) const override { return (*this)[idx].data(); diff --git a/internal/core/src/common/Consts.h b/internal/core/src/common/Consts.h index 1f9d51e447680..400a0eacf4b4b 100644 --- a/internal/core/src/common/Consts.h +++ b/internal/core/src/common/Consts.h @@ -47,6 +47,7 @@ const char KMEANS_CLUSTER[] = "KMEANS"; const char VEC_OPT_FIELDS[] = "opt_fields"; const char PAGE_RETAIN_ORDER[] = "page_retain_order"; const char TEXT_LOG_ROOT_PATH[] = "text_log"; +const char ITERATIVE_FILTER[] = "iterative_filter"; const char DEFAULT_PLANNODE_ID[] = "0"; const char DEAFULT_QUERY_ID[] = "0"; diff --git a/internal/core/src/common/QueryInfo.h b/internal/core/src/common/QueryInfo.h index 760409820ee47..66f4f17664cbd 100644 --- a/internal/core/src/common/QueryInfo.h +++ b/internal/core/src/common/QueryInfo.h @@ -35,6 +35,7 @@ struct SearchInfo { std::optional group_by_field_id_; tracer::TraceContext trace_ctx_; bool materialized_view_involved = false; + bool iterative_filter_execution = false; }; using SearchInfoPtr = std::shared_ptr; diff --git a/internal/core/src/exec/Driver.cpp b/internal/core/src/exec/Driver.cpp index fcdadb9580c28..39ef70d14dc38 100644 --- a/internal/core/src/exec/Driver.cpp +++ b/internal/core/src/exec/Driver.cpp @@ -23,6 +23,7 @@ #include "exec/operator/CallbackSink.h" #include "exec/operator/CountNode.h" #include "exec/operator/FilterBitsNode.h" +#include "exec/operator/IterativeFilterNode.h" #include "exec/operator/MvccNode.h" #include "exec/operator/Operator.h" #include "exec/operator/VectorSearchNode.h" @@ -52,11 +53,16 @@ DriverFactory::CreateDriver(std::unique_ptr ctx, for (size_t i = 0; i < plannodes_.size(); ++i) { auto id = operators.size(); auto plannode = plannodes_[i]; - if (auto filternode = + if (auto filterbitsnode = std::dynamic_pointer_cast( plannode)) { - operators.push_back( - std::make_unique(id, ctx.get(), filternode)); + operators.push_back(std::make_unique( + id, ctx.get(), filterbitsnode)); + } else if (auto filternode = + std::dynamic_pointer_cast( + plannode)) { + operators.push_back(std::make_unique( + id, ctx.get(), filternode)); } else if (auto mvccnode = std::dynamic_pointer_cast( plannode)) { diff --git a/internal/core/src/exec/QueryContext.h b/internal/core/src/exec/QueryContext.h index 4b49fe1a1482e..916eb73a3c9c2 100644 --- a/internal/core/src/exec/QueryContext.h +++ b/internal/core/src/exec/QueryContext.h @@ -230,6 +230,11 @@ class QueryContext : public Context { return search_info_; } + knowhere::MetricType + get_metric_type() { + return search_info_.metric_type_; + } + const query::PlaceholderGroup* get_placeholder_group() { return placeholder_group_; diff --git a/internal/core/src/exec/expression/AlwaysTrueExpr.cpp b/internal/core/src/exec/expression/AlwaysTrueExpr.cpp index 920fc86ee6a17..063515cc19ae1 100644 --- a/internal/core/src/exec/expression/AlwaysTrueExpr.cpp +++ b/internal/core/src/exec/expression/AlwaysTrueExpr.cpp @@ -21,9 +21,13 @@ namespace exec { void PhyAlwaysTrueExpr::Eval(EvalCtx& context, VectorPtr& result) { - int64_t real_batch_size = current_pos_ + batch_size_ >= active_count_ - ? active_count_ - current_pos_ - : batch_size_; + auto input = context.get_offset_input(); + has_offset_input_ = (input != nullptr); + int64_t real_batch_size = (has_offset_input_) + ? input->size() + : (current_pos_ + batch_size_ >= active_count_ + ? active_count_ - current_pos_ + : batch_size_); // always true no need to skip null if (real_batch_size == 0) { diff --git a/internal/core/src/exec/expression/AlwaysTrueExpr.h b/internal/core/src/exec/expression/AlwaysTrueExpr.h index ffb5750a311f8..5ef0dc10d493a 100644 --- a/internal/core/src/exec/expression/AlwaysTrueExpr.h +++ b/internal/core/src/exec/expression/AlwaysTrueExpr.h @@ -47,11 +47,14 @@ class PhyAlwaysTrueExpr : public Expr { void MoveCursor() override { - int64_t real_batch_size = current_pos_ + batch_size_ >= active_count_ - ? active_count_ - current_pos_ - : batch_size_; + if (!has_offset_input_) { + int64_t real_batch_size = + current_pos_ + batch_size_ >= active_count_ + ? active_count_ - current_pos_ + : batch_size_; - current_pos_ += real_batch_size; + current_pos_ += real_batch_size; + } } private: diff --git a/internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.cpp b/internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.cpp index e5b24ac4121ce..9f886c10dafe6 100644 --- a/internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.cpp +++ b/internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.cpp @@ -21,48 +21,50 @@ namespace exec { void PhyBinaryArithOpEvalRangeExpr::Eval(EvalCtx& context, VectorPtr& result) { + auto input = context.get_offset_input(); + SetHasOffsetInput((input != nullptr)); switch (expr_->column_.data_type_) { case DataType::BOOL: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::INT8: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::INT16: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::INT32: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::INT64: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::FLOAT: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::DOUBLE: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::JSON: { auto value_type = expr_->value_.val_case(); switch (value_type) { case proto::plan::GenericValue::ValCase::kBoolVal: { - result = ExecRangeVisitorImplForJson(); + result = ExecRangeVisitorImplForJson(input); break; } case proto::plan::GenericValue::ValCase::kInt64Val: { - result = ExecRangeVisitorImplForJson(); + result = ExecRangeVisitorImplForJson(input); break; } case proto::plan::GenericValue::ValCase::kFloatVal: { - result = ExecRangeVisitorImplForJson(); + result = ExecRangeVisitorImplForJson(input); break; } default: { @@ -79,12 +81,12 @@ PhyBinaryArithOpEvalRangeExpr::Eval(EvalCtx& context, VectorPtr& result) { switch (value_type) { case proto::plan::GenericValue::ValCase::kInt64Val: { SetNotUseIndex(); - result = ExecRangeVisitorImplForArray(); + result = ExecRangeVisitorImplForArray(input); break; } case proto::plan::GenericValue::ValCase::kFloatVal: { SetNotUseIndex(); - result = ExecRangeVisitorImplForArray(); + result = ExecRangeVisitorImplForArray(input); break; } default: { @@ -105,11 +107,13 @@ PhyBinaryArithOpEvalRangeExpr::Eval(EvalCtx& context, VectorPtr& result) { template VectorPtr -PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() { +PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson( + OffsetVector* input) { using GetType = std::conditional_t, std::string_view, ValueType>; - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -128,58 +132,92 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() { ? GetValueFromProto(expr_->right_operand_) : ValueType(); -#define BinaryArithRangeJSONCompare(cmp) \ - do { \ - for (size_t i = 0; i < size; ++i) { \ - if (valid_data != nullptr && !valid_data[i]) { \ - res[i] = false; \ - valid_res[i] = false; \ - continue; \ - } \ - auto x = data[i].template at(pointer); \ - if (x.error()) { \ - if constexpr (std::is_same_v) { \ - auto x = data[i].template at(pointer); \ - res[i] = !x.error() && (cmp); \ - continue; \ - } \ - res[i] = false; \ - continue; \ - } \ - res[i] = (cmp); \ - } \ +#define BinaryArithRangeJSONCompare(cmp) \ + do { \ + for (size_t i = 0; i < size; ++i) { \ + auto offset = i; \ + if constexpr (filter_type == FilterType::random) { \ + offset = (offsets) ? offsets[i] : i; \ + } \ + if (valid_data != nullptr && !valid_data[offset]) { \ + res[i] = false; \ + valid_res[i] = false; \ + continue; \ + } \ + auto x = data[offset].template at(pointer); \ + if (x.error()) { \ + if constexpr (std::is_same_v) { \ + auto x = data[offset].template at(pointer); \ + res[i] = !x.error() && (cmp); \ + continue; \ + } \ + res[i] = false; \ + continue; \ + } \ + res[i] = (cmp); \ + } \ } while (false) -#define BinaryArithRangeJSONCompareNotEqual(cmp) \ - do { \ - for (size_t i = 0; i < size; ++i) { \ - if (valid_data != nullptr && !valid_data[i]) { \ - res[i] = false; \ - valid_res[i] = false; \ - continue; \ - } \ - auto x = data[i].template at(pointer); \ - if (x.error()) { \ - if constexpr (std::is_same_v) { \ - auto x = data[i].template at(pointer); \ - res[i] = x.error() || (cmp); \ - continue; \ - } \ - res[i] = true; \ - continue; \ - } \ - res[i] = (cmp); \ - } \ +#define BinaryArithRangeJSONCompareNotEqual(cmp) \ + do { \ + for (size_t i = 0; i < size; ++i) { \ + auto offset = i; \ + if constexpr (filter_type == FilterType::random) { \ + offset = (offsets) ? offsets[i] : i; \ + } \ + if (valid_data != nullptr && !valid_data[offset]) { \ + res[i] = false; \ + valid_res[i] = false; \ + continue; \ + } \ + auto x = data[offset].template at(pointer); \ + if (x.error()) { \ + if constexpr (std::is_same_v) { \ + auto x = data[offset].template at(pointer); \ + res[i] = x.error() || (cmp); \ + continue; \ + } \ + res[i] = true; \ + continue; \ + } \ + res[i] = (cmp); \ + } \ } while (false) - auto execute_sub_batch = [op_type, arith_type](const milvus::Json* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - ValueType val, - ValueType right_operand, - const std::string& pointer) { +#define BinaryArithRangeJONCompareArrayLength(cmp) \ + do { \ + for (size_t i = 0; i < size; ++i) { \ + auto offset = i; \ + if constexpr (filter_type == FilterType::random) { \ + offset = (offsets) ? offsets[i] : i; \ + } \ + if (valid_data != nullptr && !valid_data[offset]) { \ + res[i] = false; \ + valid_res[i] = false; \ + continue; \ + } \ + int array_length = 0; \ + auto doc = data[offset].doc(); \ + auto array = doc.at_pointer(pointer).get_array(); \ + if (!array.error()) { \ + array_length = array.count_elements(); \ + } \ + res[i] = (cmp); \ + } \ + } while (false) + + auto execute_sub_batch = + [ op_type, + arith_type ]( + const milvus::Json* data, + const bool* valid_data, + const int32_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + ValueType val, + ValueType right_operand, + const std::string& pointer) { switch (op_type) { case proto::plan::OpType::Equal: { switch (arith_type) { @@ -210,20 +248,8 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() { break; } case proto::plan::ArithOpType::ArrayLength: { - for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { - res[i] = false; - valid_res[i] = false; - continue; - } - int array_length = 0; - auto doc = data[i].doc(); - auto array = doc.at_pointer(pointer).get_array(); - if (!array.error()) { - array_length = array.count_elements(); - } - res[i] = array_length == val; - } + BinaryArithRangeJONCompareArrayLength(array_length == + val); break; } default: @@ -264,20 +290,8 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() { break; } case proto::plan::ArithOpType::ArrayLength: { - for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { - res[i] = false; - valid_res[i] = false; - continue; - } - int array_length = 0; - auto doc = data[i].doc(); - auto array = doc.at_pointer(pointer).get_array(); - if (!array.error()) { - array_length = array.count_elements(); - } - res[i] = array_length != val; - } + BinaryArithRangeJONCompareArrayLength(array_length != + val); break; } default: @@ -318,20 +332,8 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() { break; } case proto::plan::ArithOpType::ArrayLength: { - for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { - res[i] = false; - valid_res[i] = false; - continue; - } - int array_length = 0; - auto doc = data[i].doc(); - auto array = doc.at_pointer(pointer).get_array(); - if (!array.error()) { - array_length = array.count_elements(); - } - res[i] = array_length > val; - } + BinaryArithRangeJONCompareArrayLength(array_length > + val); break; } default: @@ -372,20 +374,8 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() { break; } case proto::plan::ArithOpType::ArrayLength: { - for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { - res[i] = false; - valid_res[i] = false; - continue; - } - int array_length = 0; - auto doc = data[i].doc(); - auto array = doc.at_pointer(pointer).get_array(); - if (!array.error()) { - array_length = array.count_elements(); - } - res[i] = array_length >= val; - } + BinaryArithRangeJONCompareArrayLength(array_length >= + val); break; } default: @@ -426,20 +416,8 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() { break; } case proto::plan::ArithOpType::ArrayLength: { - for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { - res[i] = false; - valid_res[i] = false; - continue; - } - int array_length = 0; - auto doc = data[i].doc(); - auto array = doc.at_pointer(pointer).get_array(); - if (!array.error()) { - array_length = array.count_elements(); - } - res[i] = array_length < val; - } + BinaryArithRangeJONCompareArrayLength(array_length < + val); break; } default: @@ -480,20 +458,8 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() { break; } case proto::plan::ArithOpType::ArrayLength: { - for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { - res[i] = false; - valid_res[i] = false; - continue; - } - int array_length = 0; - auto doc = data[i].doc(); - auto array = doc.at_pointer(pointer).get_array(); - if (!array.error()) { - array_length = array.count_elements(); - } - res[i] = array_length <= val; - } + BinaryArithRangeJONCompareArrayLength(array_length <= + val); break; } default: @@ -512,13 +478,25 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() { op_type); } }; - int64_t processed_size = ProcessDataChunks(execute_sub_batch, - std::nullptr_t{}, - res, - valid_res, - value, - right_operand, - pointer); + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + value, + right_operand, + pointer); + } else { + processed_size = ProcessDataChunks(execute_sub_batch, + std::nullptr_t{}, + res, + valid_res, + value, + right_operand, + pointer); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -529,11 +507,13 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() { template VectorPtr -PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() { +PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray( + OffsetVector* input) { using GetType = std::conditional_t, std::string_view, ValueType>; - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -555,31 +535,54 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() { ? GetValueFromProto(expr_->right_operand_) : ValueType(); -#define BinaryArithRangeArrayCompare(cmp) \ - do { \ - for (size_t i = 0; i < size; ++i) { \ - if (valid_data != nullptr && !valid_data[i]) { \ - res[i] = false; \ - valid_res[i] = false; \ - continue; \ - } \ - if (index >= data[i].length()) { \ - res[i] = false; \ - continue; \ - } \ - auto value = data[i].get_data(index); \ - res[i] = (cmp); \ - } \ +#define BinaryArithRangeArrayCompare(cmp) \ + do { \ + for (size_t i = 0; i < size; ++i) { \ + auto offset = i; \ + if constexpr (filter_type == FilterType::random) { \ + offset = (offsets) ? offsets[i] : i; \ + } \ + if (valid_data != nullptr && !valid_data[offset]) { \ + res[i] = false; \ + valid_res[i] = false; \ + continue; \ + } \ + if (index >= data[offset].length()) { \ + res[i] = false; \ + continue; \ + } \ + auto value = data[offset].get_data(index); \ + res[i] = (cmp); \ + } \ + } while (false) + +#define BinaryArithRangeArrayLengthCompate(cmp) \ + do { \ + for (size_t i = 0; i < size; ++i) { \ + auto offset = i; \ + if constexpr (filter_type == FilterType::random) { \ + offset = (offsets) ? offsets[i] : i; \ + } \ + if (valid_data != nullptr && !valid_data[offset]) { \ + res[i] = valid_res[i] = false; \ + continue; \ + } \ + res[i] = (cmp); \ + } \ } while (false) - auto execute_sub_batch = [op_type, arith_type](const ArrayView* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - ValueType val, - ValueType right_operand, - int index) { + auto execute_sub_batch = + [ op_type, + arith_type ]( + const ArrayView* data, + const bool* valid_data, + const int32_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + ValueType val, + ValueType right_operand, + int index) { switch (op_type) { case proto::plan::OpType::Equal: { switch (arith_type) { @@ -611,13 +614,8 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() { break; } case proto::plan::ArithOpType::ArrayLength: { - for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { - res[i] = valid_res[i] = false; - continue; - } - res[i] = data[i].length() == val; - } + BinaryArithRangeArrayLengthCompate( + data[offset].length() == val); break; } default: @@ -658,13 +656,8 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() { break; } case proto::plan::ArithOpType::ArrayLength: { - for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { - res[i] = valid_res[i] = false; - continue; - } - res[i] = data[i].length() != val; - } + BinaryArithRangeArrayLengthCompate( + data[offset].length() != val); break; } default: @@ -705,13 +698,8 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() { break; } case proto::plan::ArithOpType::ArrayLength: { - for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { - res[i] = valid_res[i] = false; - continue; - } - res[i] = data[i].length() > val; - } + BinaryArithRangeArrayLengthCompate( + data[offset].length() > val); break; } default: @@ -752,13 +740,8 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() { break; } case proto::plan::ArithOpType::ArrayLength: { - for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { - res[i] = valid_res[i] = false; - continue; - } - res[i] = data[i].length() >= val; - } + BinaryArithRangeArrayLengthCompate( + data[offset].length() >= val); break; } default: @@ -799,13 +782,8 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() { break; } case proto::plan::ArithOpType::ArrayLength: { - for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { - res[i] = valid_res[i] = false; - continue; - } - res[i] = data[i].length() < val; - } + BinaryArithRangeArrayLengthCompate( + data[offset].length() < val); break; } default: @@ -846,13 +824,8 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() { break; } case proto::plan::ArithOpType::ArrayLength: { - for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { - res[i] = valid_res[i] = false; - continue; - } - res[i] = data[i].length() <= val; - } + BinaryArithRangeArrayLengthCompate( + data[offset].length() <= val); break; } default: @@ -872,14 +845,26 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() { } }; - int64_t processed_size = - ProcessDataChunks(execute_sub_batch, - std::nullptr_t{}, - res, - valid_res, - value, - right_operand, - index); + int64_t processed_size; + if (has_offset_input_) { + processed_size = + ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + value, + right_operand, + index); + } else { + processed_size = ProcessDataChunks(execute_sub_batch, + std::nullptr_t{}, + res, + valid_res, + value, + right_operand, + index); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -890,24 +875,26 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() { template VectorPtr -PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImpl() { +PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImpl(OffsetVector* input) { if (is_index_mode_ && IndexHasRawData()) { - return ExecRangeVisitorImplForIndex(); + return ExecRangeVisitorImplForIndex(input); } else { - return ExecRangeVisitorImplForData(); + return ExecRangeVisitorImplForData(input); } } template VectorPtr -PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForIndex() { +PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForIndex( + OffsetVector* input) { using Index = index::ScalarIndex; typedef std::conditional_t && !std::is_same_v, int64_t, T> HighPrecisionType; - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -916,12 +903,15 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForIndex() { GetValueFromProto(expr_->right_operand_); auto op_type = expr_->op_type_; auto arith_type = expr_->arith_op_type_; - auto sub_batch_size = size_per_chunk_; + auto sub_batch_size = has_offset_input_ ? input->size() : size_per_chunk_; - auto execute_sub_batch = [op_type, arith_type, sub_batch_size]( - Index* index_ptr, - HighPrecisionType value, - HighPrecisionType right_operand) { + auto execute_sub_batch = + [ op_type, arith_type, + sub_batch_size ]( + Index * index_ptr, + HighPrecisionType value, + HighPrecisionType right_operand, + const int32_t* offsets = nullptr) { TargetBitmap res; switch (op_type) { case proto::plan::OpType::Equal: { @@ -929,46 +919,66 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForIndex() { case proto::plan::ArithOpType::Add: { ArithOpIndexFunc + proto::plan::ArithOpType::Add, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Sub: { ArithOpIndexFunc + proto::plan::ArithOpType::Sub, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Mul: { ArithOpIndexFunc + proto::plan::ArithOpType::Mul, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Div: { ArithOpIndexFunc + proto::plan::ArithOpType::Div, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Mod: { ArithOpIndexFunc + proto::plan::ArithOpType::Mod, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } default: @@ -985,46 +995,66 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForIndex() { case proto::plan::ArithOpType::Add: { ArithOpIndexFunc + proto::plan::ArithOpType::Add, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Sub: { ArithOpIndexFunc + proto::plan::ArithOpType::Sub, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Mul: { ArithOpIndexFunc + proto::plan::ArithOpType::Mul, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Div: { ArithOpIndexFunc + proto::plan::ArithOpType::Div, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Mod: { ArithOpIndexFunc + proto::plan::ArithOpType::Mod, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } default: @@ -1041,46 +1071,66 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForIndex() { case proto::plan::ArithOpType::Add: { ArithOpIndexFunc + proto::plan::ArithOpType::Add, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Sub: { ArithOpIndexFunc + proto::plan::ArithOpType::Sub, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Mul: { ArithOpIndexFunc + proto::plan::ArithOpType::Mul, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Div: { ArithOpIndexFunc + proto::plan::ArithOpType::Div, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Mod: { ArithOpIndexFunc + proto::plan::ArithOpType::Mod, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } default: @@ -1097,46 +1147,66 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForIndex() { case proto::plan::ArithOpType::Add: { ArithOpIndexFunc + proto::plan::ArithOpType::Add, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Sub: { ArithOpIndexFunc + proto::plan::ArithOpType::Sub, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Mul: { ArithOpIndexFunc + proto::plan::ArithOpType::Mul, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Div: { ArithOpIndexFunc + proto::plan::ArithOpType::Div, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Mod: { ArithOpIndexFunc + proto::plan::ArithOpType::Mod, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } default: @@ -1153,46 +1223,66 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForIndex() { case proto::plan::ArithOpType::Add: { ArithOpIndexFunc + proto::plan::ArithOpType::Add, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Sub: { ArithOpIndexFunc + proto::plan::ArithOpType::Sub, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Mul: { ArithOpIndexFunc + proto::plan::ArithOpType::Mul, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Div: { ArithOpIndexFunc + proto::plan::ArithOpType::Div, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Mod: { ArithOpIndexFunc + proto::plan::ArithOpType::Mod, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } default: @@ -1209,46 +1299,66 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForIndex() { case proto::plan::ArithOpType::Add: { ArithOpIndexFunc + proto::plan::ArithOpType::Add, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Sub: { ArithOpIndexFunc + proto::plan::ArithOpType::Sub, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Mul: { ArithOpIndexFunc + proto::plan::ArithOpType::Mul, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Div: { ArithOpIndexFunc + proto::plan::ArithOpType::Div, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Mod: { ArithOpIndexFunc + proto::plan::ArithOpType::Mod, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } default: @@ -1268,25 +1378,39 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForIndex() { } return res; }; - auto res = ProcessIndexChunks(execute_sub_batch, value, right_operand); - AssertInfo(res->size() == real_batch_size, - "internal error: expr processed rows {} not equal " - "expect batch size {}", - res->size(), - real_batch_size); - // return std::make_shared(std::move(res)); - return res; + if (has_offset_input_) { + auto res = ProcessIndexChunksByOffsets( + execute_sub_batch, input, value, right_operand); + + AssertInfo(res->size() == real_batch_size, + "internal error: expr processed rows {} not equal " + "expect batch size {}", + res->size(), + real_batch_size); + return res; + } else { + auto res = + ProcessIndexChunks(execute_sub_batch, value, right_operand); + AssertInfo(res->size() == real_batch_size, + "internal error: expr processed rows {} not equal " + "expect batch size {}", + res->size(), + real_batch_size); + return res; + } } template VectorPtr -PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForData() { +PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForData( + OffsetVector* input) { typedef std::conditional_t && !std::is_same_v, int64_t, T> HighPrecisionType; - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -1302,55 +1426,64 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForData() { auto op_type = expr_->op_type_; auto arith_type = expr_->arith_op_type_; - auto execute_sub_batch = [op_type, arith_type]( - const T* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - HighPrecisionType value, - HighPrecisionType right_operand) { + + auto execute_sub_batch = + [ op_type, + arith_type ]( + const T* data, + const bool* valid_data, + const int32_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + HighPrecisionType value, + HighPrecisionType right_operand) { switch (op_type) { case proto::plan::OpType::Equal: { switch (arith_type) { case proto::plan::ArithOpType::Add: { ArithOpElementFunc + proto::plan::ArithOpType::Add, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Sub: { ArithOpElementFunc + proto::plan::ArithOpType::Sub, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Mul: { ArithOpElementFunc + proto::plan::ArithOpType::Mul, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Div: { ArithOpElementFunc + proto::plan::ArithOpType::Div, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Mod: { ArithOpElementFunc + proto::plan::ArithOpType::Mod, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } default: @@ -1367,41 +1500,46 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForData() { case proto::plan::ArithOpType::Add: { ArithOpElementFunc + proto::plan::ArithOpType::Add, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Sub: { ArithOpElementFunc + proto::plan::ArithOpType::Sub, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Mul: { ArithOpElementFunc + proto::plan::ArithOpType::Mul, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Div: { ArithOpElementFunc + proto::plan::ArithOpType::Div, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Mod: { ArithOpElementFunc + proto::plan::ArithOpType::Mod, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } default: @@ -1418,41 +1556,46 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForData() { case proto::plan::ArithOpType::Add: { ArithOpElementFunc + proto::plan::ArithOpType::Add, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Sub: { ArithOpElementFunc + proto::plan::ArithOpType::Sub, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Mul: { ArithOpElementFunc + proto::plan::ArithOpType::Mul, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Div: { ArithOpElementFunc + proto::plan::ArithOpType::Div, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Mod: { ArithOpElementFunc + proto::plan::ArithOpType::Mod, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } default: @@ -1469,41 +1612,46 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForData() { case proto::plan::ArithOpType::Add: { ArithOpElementFunc + proto::plan::ArithOpType::Add, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Sub: { ArithOpElementFunc + proto::plan::ArithOpType::Sub, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Mul: { ArithOpElementFunc + proto::plan::ArithOpType::Mul, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Div: { ArithOpElementFunc + proto::plan::ArithOpType::Div, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Mod: { ArithOpElementFunc + proto::plan::ArithOpType::Mod, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } default: @@ -1520,41 +1668,46 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForData() { case proto::plan::ArithOpType::Add: { ArithOpElementFunc + proto::plan::ArithOpType::Add, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Sub: { ArithOpElementFunc + proto::plan::ArithOpType::Sub, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Mul: { ArithOpElementFunc + proto::plan::ArithOpType::Mul, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Div: { ArithOpElementFunc + proto::plan::ArithOpType::Div, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Mod: { ArithOpElementFunc + proto::plan::ArithOpType::Mod, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } default: @@ -1571,41 +1724,46 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForData() { case proto::plan::ArithOpType::Add: { ArithOpElementFunc + proto::plan::ArithOpType::Add, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Sub: { ArithOpElementFunc + proto::plan::ArithOpType::Sub, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Mul: { ArithOpElementFunc + proto::plan::ArithOpType::Mul, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Div: { ArithOpElementFunc + proto::plan::ArithOpType::Div, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Mod: { ArithOpElementFunc + proto::plan::ArithOpType::Mod, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } default: @@ -1628,18 +1786,33 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForData() { // but to mask res with valid_data after the batch operation. if (valid_data != nullptr) { for (int i = 0; i < size; i++) { - if (!valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::random) { + offset = (offsets) ? offsets[i] : i; + } + if (!valid_data[offset]) { res[i] = valid_res[i] = false; } } } }; - int64_t processed_size = ProcessDataChunks(execute_sub_batch, - std::nullptr_t{}, - res, - valid_res, - value, - right_operand); + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + value, + right_operand); + } else { + processed_size = ProcessDataChunks(execute_sub_batch, + std::nullptr_t{}, + res, + valid_res, + value, + right_operand); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", diff --git a/internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.h b/internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.h index 5eef111438591..0ffb83a936cbf 100644 --- a/internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.h +++ b/internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.h @@ -88,7 +88,8 @@ struct ArithOpHelper { template + proto::plan::ArithOpType arith_op, + FilterType filter_type = FilterType::sequential> struct ArithOpElementFunc { typedef std::conditional_t && !std::is_same_v, @@ -100,145 +101,147 @@ struct ArithOpElementFunc { size_t size, HighPrecisonType val, HighPrecisonType right_operand, - TargetBitmapView res) { - /* + TargetBitmapView res, + const int32_t* offsets = nullptr) { // This is the original code, kept here for the documentation purposes - for (int i = 0; i < size; ++i) { - if constexpr (cmp_op == proto::plan::OpType::Equal) { - if constexpr (arith_op == proto::plan::ArithOpType::Add) { - res[i] = (src[i] + right_operand) == val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Sub) { - res[i] = (src[i] - right_operand) == val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Mul) { - res[i] = (src[i] * right_operand) == val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Div) { - res[i] = (src[i] / right_operand) == val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Mod) { - res[i] = (fmod(src[i], right_operand)) == val; - } else { - PanicInfo( - OpTypeInvalid, - fmt::format( - "unsupported arith type:{} for ArithOpElementFunc", - arith_op)); - } - } else if constexpr (cmp_op == proto::plan::OpType::NotEqual) { - if constexpr (arith_op == proto::plan::ArithOpType::Add) { - res[i] = (src[i] + right_operand) != val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Sub) { - res[i] = (src[i] - right_operand) != val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Mul) { - res[i] = (src[i] * right_operand) != val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Div) { - res[i] = (src[i] / right_operand) != val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Mod) { - res[i] = (fmod(src[i], right_operand)) != val; - } else { - PanicInfo( - OpTypeInvalid, - fmt::format( - "unsupported arith type:{} for ArithOpElementFunc", - arith_op)); - } - } else if constexpr (cmp_op == proto::plan::OpType::GreaterThan) { - if constexpr (arith_op == proto::plan::ArithOpType::Add) { - res[i] = (src[i] + right_operand) > val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Sub) { - res[i] = (src[i] - right_operand) > val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Mul) { - res[i] = (src[i] * right_operand) > val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Div) { - res[i] = (src[i] / right_operand) > val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Mod) { - res[i] = (fmod(src[i], right_operand)) > val; - } else { - PanicInfo( - OpTypeInvalid, - fmt::format( - "unsupported arith type:{} for ArithOpElementFunc", - arith_op)); - } - } else if constexpr (cmp_op == proto::plan::OpType::GreaterEqual) { - if constexpr (arith_op == proto::plan::ArithOpType::Add) { - res[i] = (src[i] + right_operand) >= val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Sub) { - res[i] = (src[i] - right_operand) >= val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Mul) { - res[i] = (src[i] * right_operand) >= val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Div) { - res[i] = (src[i] / right_operand) >= val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Mod) { - res[i] = (fmod(src[i], right_operand)) >= val; - } else { - PanicInfo( - OpTypeInvalid, - fmt::format( - "unsupported arith type:{} for ArithOpElementFunc", - arith_op)); - } - } else if constexpr (cmp_op == proto::plan::OpType::LessThan) { - if constexpr (arith_op == proto::plan::ArithOpType::Add) { - res[i] = (src[i] + right_operand) < val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Sub) { - res[i] = (src[i] - right_operand) < val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Mul) { - res[i] = (src[i] * right_operand) < val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Div) { - res[i] = (src[i] / right_operand) < val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Mod) { - res[i] = (fmod(src[i], right_operand)) < val; - } else { - PanicInfo( - OpTypeInvalid, - fmt::format( - "unsupported arith type:{} for ArithOpElementFunc", - arith_op)); - } - } else if constexpr (cmp_op == proto::plan::OpType::LessEqual) { - if constexpr (arith_op == proto::plan::ArithOpType::Add) { - res[i] = (src[i] + right_operand) <= val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Sub) { - res[i] = (src[i] - right_operand) <= val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Mul) { - res[i] = (src[i] * right_operand) <= val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Div) { - res[i] = (src[i] / right_operand) <= val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Mod) { - res[i] = (fmod(src[i], right_operand)) <= val; - } else { - PanicInfo( - OpTypeInvalid, - fmt::format( - "unsupported arith type:{} for ArithOpElementFunc", - arith_op)); + // and also this code will be used for iterative filter since iterative filter does not execute as a batch manner + if constexpr (filter_type == FilterType::random) { + for (int i = 0; i < size; ++i) { + auto offset = (offsets) ? offsets[i] : i; + if constexpr (cmp_op == proto::plan::OpType::Equal) { + if constexpr (arith_op == proto::plan::ArithOpType::Add) { + res[i] = (src[offset] + right_operand) == val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Sub) { + res[i] = (src[offset] - right_operand) == val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Mul) { + res[i] = (src[offset] * right_operand) == val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Div) { + res[i] = (src[offset] / right_operand) == val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Mod) { + res[i] = (fmod(src[offset], right_operand)) == val; + } else { + PanicInfo(OpTypeInvalid, + fmt::format("unsupported arith type:{} for " + "ArithOpElementFunc", + arith_op)); + } + } else if constexpr (cmp_op == proto::plan::OpType::NotEqual) { + if constexpr (arith_op == proto::plan::ArithOpType::Add) { + res[i] = (src[offset] + right_operand) != val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Sub) { + res[i] = (src[offset] - right_operand) != val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Mul) { + res[i] = (src[offset] * right_operand) != val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Div) { + res[i] = (src[offset] / right_operand) != val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Mod) { + res[i] = (fmod(src[offset], right_operand)) != val; + } else { + PanicInfo(OpTypeInvalid, + fmt::format("unsupported arith type:{} for " + "ArithOpElementFunc", + arith_op)); + } + } else if constexpr (cmp_op == + proto::plan::OpType::GreaterThan) { + if constexpr (arith_op == proto::plan::ArithOpType::Add) { + res[i] = (src[offset] + right_operand) > val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Sub) { + res[i] = (src[offset] - right_operand) > val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Mul) { + res[i] = (src[offset] * right_operand) > val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Div) { + res[i] = (src[offset] / right_operand) > val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Mod) { + res[i] = (fmod(src[offset], right_operand)) > val; + } else { + PanicInfo(OpTypeInvalid, + fmt::format("unsupported arith type:{} for " + "ArithOpElementFunc", + arith_op)); + } + } else if constexpr (cmp_op == + proto::plan::OpType::GreaterEqual) { + if constexpr (arith_op == proto::plan::ArithOpType::Add) { + res[i] = (src[offset] + right_operand) >= val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Sub) { + res[i] = (src[offset] - right_operand) >= val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Mul) { + res[i] = (src[offset] * right_operand) >= val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Div) { + res[i] = (src[offset] / right_operand) >= val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Mod) { + res[i] = (fmod(src[offset], right_operand)) >= val; + } else { + PanicInfo(OpTypeInvalid, + fmt::format("unsupported arith type:{} for " + "ArithOpElementFunc", + arith_op)); + } + } else if constexpr (cmp_op == proto::plan::OpType::LessThan) { + if constexpr (arith_op == proto::plan::ArithOpType::Add) { + res[i] = (src[offset] + right_operand) < val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Sub) { + res[i] = (src[offset] - right_operand) < val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Mul) { + res[i] = (src[offset] * right_operand) < val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Div) { + res[i] = (src[offset] / right_operand) < val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Mod) { + res[i] = (fmod(src[offset], right_operand)) < val; + } else { + PanicInfo(OpTypeInvalid, + fmt::format("unsupported arith type:{} for " + "ArithOpElementFunc", + arith_op)); + } + } else if constexpr (cmp_op == proto::plan::OpType::LessEqual) { + if constexpr (arith_op == proto::plan::ArithOpType::Add) { + res[i] = (src[offset] + right_operand) <= val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Sub) { + res[i] = (src[offset] - right_operand) <= val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Mul) { + res[i] = (src[offset] * right_operand) <= val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Div) { + res[i] = (src[offset] / right_operand) <= val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Mod) { + res[i] = (fmod(src[offset], right_operand)) <= val; + } else { + PanicInfo(OpTypeInvalid, + fmt::format("unsupported arith type:{} for " + "ArithOpElementFunc", + arith_op)); + } } } + return; } - */ + + // more efficient SIMD version if constexpr (!std::is_same_v::op), void>) { constexpr auto cmp_op_cvt = CmpOpHelper::op; @@ -266,7 +269,8 @@ struct ArithOpElementFunc { template + proto::plan::ArithOpType arith_op, + FilterType filter_type> struct ArithOpIndexFunc { typedef std::conditional_t && !std::is_same_v, @@ -278,10 +282,15 @@ struct ArithOpIndexFunc { operator()(Index* index, size_t size, HighPrecisonType val, - HighPrecisonType right_operand) { + HighPrecisonType right_operand, + const int32_t* offsets = nullptr) { TargetBitmap res(size); for (size_t i = 0; i < size; ++i) { - auto raw = index->Reverse_Lookup(i); + auto offset = i; + if constexpr (filter_type == FilterType::random) { + offset = (offsets) ? offsets[i] : i; + } + auto raw = index->Reverse_Lookup(offset); if (!raw.has_value()) { res[i] = false; continue; @@ -449,23 +458,23 @@ class PhyBinaryArithOpEvalRangeExpr : public SegmentExpr { private: template VectorPtr - ExecRangeVisitorImpl(); + ExecRangeVisitorImpl(OffsetVector* input = nullptr); template VectorPtr - ExecRangeVisitorImplForIndex(); + ExecRangeVisitorImplForIndex(OffsetVector* input = nullptr); template VectorPtr - ExecRangeVisitorImplForData(); + ExecRangeVisitorImplForData(OffsetVector* input = nullptr); template VectorPtr - ExecRangeVisitorImplForJson(); + ExecRangeVisitorImplForJson(OffsetVector* input = nullptr); template VectorPtr - ExecRangeVisitorImplForArray(); + ExecRangeVisitorImplForArray(OffsetVector* input = nullptr); private: std::shared_ptr expr_; diff --git a/internal/core/src/exec/expression/BinaryRangeExpr.cpp b/internal/core/src/exec/expression/BinaryRangeExpr.cpp index e22208a7dd6c3..a51e58ed8fc3a 100644 --- a/internal/core/src/exec/expression/BinaryRangeExpr.cpp +++ b/internal/core/src/exec/expression/BinaryRangeExpr.cpp @@ -24,33 +24,35 @@ namespace exec { void PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { + auto input = context.get_offset_input(); + SetHasOffsetInput((input != nullptr)); switch (expr_->column_.data_type_) { case DataType::BOOL: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::INT8: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::INT16: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::INT32: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::INT64: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::FLOAT: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::DOUBLE: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::VARCHAR: { @@ -58,9 +60,9 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { !storage::MmapManager::GetInstance() .GetMmapConfig() .growing_enable_mmap) { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); } else { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); } break; } @@ -68,15 +70,15 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { auto value_type = expr_->lower_val_.val_case(); switch (value_type) { case proto::plan::GenericValue::ValCase::kInt64Val: { - result = ExecRangeVisitorImplForJson(); + result = ExecRangeVisitorImplForJson(input); break; } case proto::plan::GenericValue::ValCase::kFloatVal: { - result = ExecRangeVisitorImplForJson(); + result = ExecRangeVisitorImplForJson(input); break; } case proto::plan::GenericValue::ValCase::kStringVal: { - result = ExecRangeVisitorImplForJson(); + result = ExecRangeVisitorImplForJson(input); break; } default: { @@ -93,17 +95,17 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { switch (value_type) { case proto::plan::GenericValue::ValCase::kInt64Val: { SetNotUseIndex(); - result = ExecRangeVisitorImplForArray(); + result = ExecRangeVisitorImplForArray(input); break; } case proto::plan::GenericValue::ValCase::kFloatVal: { SetNotUseIndex(); - result = ExecRangeVisitorImplForArray(); + result = ExecRangeVisitorImplForArray(input); break; } case proto::plan::GenericValue::ValCase::kStringVal: { SetNotUseIndex(); - result = ExecRangeVisitorImplForArray(); + result = ExecRangeVisitorImplForArray(input); break; } default: { @@ -124,11 +126,11 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { template VectorPtr -PhyBinaryRangeFilterExpr::ExecRangeVisitorImpl() { - if (is_index_mode_) { +PhyBinaryRangeFilterExpr::ExecRangeVisitorImpl(OffsetVector* input) { + if (is_index_mode_ && !has_offset_input_) { return ExecRangeVisitorImplForIndex(); } else { - return ExecRangeVisitorImplForData(); + return ExecRangeVisitorImplForData(input); } } @@ -137,17 +139,28 @@ ColumnVectorPtr PhyBinaryRangeFilterExpr::PreCheckOverflow(HighPrecisionType& val1, HighPrecisionType& val2, bool& lower_inclusive, - bool& upper_inclusive) { + bool& upper_inclusive, + OffsetVector* input) { lower_inclusive = expr_->lower_inclusive_; upper_inclusive = expr_->upper_inclusive_; val1 = GetValueFromProto(expr_->lower_val_); val2 = GetValueFromProto(expr_->upper_val_); - auto get_next_overflow_batch = [this]() -> ColumnVectorPtr { - int64_t batch_size = overflow_check_pos_ + batch_size_ >= active_count_ - ? active_count_ - overflow_check_pos_ - : batch_size_; - overflow_check_pos_ += batch_size; - auto valid_res = ProcessChunksForValid(is_index_mode_); + + auto get_next_overflow_batch = + [this](OffsetVector* input) -> ColumnVectorPtr { + int64_t batch_size; + if (input != nullptr) { + batch_size = input->size(); + } else { + batch_size = overflow_check_pos_ + batch_size_ >= active_count_ + ? active_count_ - overflow_check_pos_ + : batch_size_; + overflow_check_pos_ += batch_size; + } + auto valid_res = + (input != nullptr) + ? ProcessChunksForValidByOffsets(is_index_mode_, *input) + : ProcessChunksForValid(is_index_mode_); auto res_vec = std::make_shared(TargetBitmap(batch_size), std::move(valid_res)); return res_vec; @@ -155,7 +168,7 @@ PhyBinaryRangeFilterExpr::PreCheckOverflow(HighPrecisionType& val1, if constexpr (std::is_integral_v && !std::is_same_v) { if (milvus::query::gt_ub(val1)) { - return get_next_overflow_batch(); + return get_next_overflow_batch(input); } else if (milvus::query::lt_lb(val1)) { val1 = std::numeric_limits::min(); lower_inclusive = true; @@ -165,7 +178,7 @@ PhyBinaryRangeFilterExpr::PreCheckOverflow(HighPrecisionType& val1, val2 = std::numeric_limits::max(); upper_inclusive = true; } else if (milvus::query::lt_lb(val2)) { - return get_next_overflow_batch(); + return get_next_overflow_batch(input); } } return nullptr; @@ -216,7 +229,7 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForIndex() { template VectorPtr -PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData() { +PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) { typedef std:: conditional_t, std::string, T> IndexInnerType; @@ -226,57 +239,67 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData() { int64_t, IndexInnerType> HighPrecisionType; - auto real_batch_size = GetNextBatchSize(); - if (real_batch_size == 0) { - return nullptr; - } HighPrecisionType val1; HighPrecisionType val2; bool lower_inclusive = false; bool upper_inclusive = false; - if (auto res = - PreCheckOverflow(val1, val2, lower_inclusive, upper_inclusive)) { + if (auto res = PreCheckOverflow( + val1, val2, lower_inclusive, upper_inclusive, input)) { return res; } + + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); + if (real_batch_size == 0) { + return nullptr; + } auto res_vec = std::make_shared( TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); valid_res.set(); - auto execute_sub_batch = [lower_inclusive, upper_inclusive]( - const T* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - HighPrecisionType val1, - HighPrecisionType val2) { + auto execute_sub_batch = + [ lower_inclusive, + upper_inclusive ]( + const T* data, + const bool* valid_data, + const int32_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + HighPrecisionType val1, + HighPrecisionType val2) { if (lower_inclusive && upper_inclusive) { - BinaryRangeElementFunc func; - func(val1, val2, data, size, res); + BinaryRangeElementFunc func; + func(val1, val2, data, size, res, offsets); } else if (lower_inclusive && !upper_inclusive) { - BinaryRangeElementFunc func; - func(val1, val2, data, size, res); + BinaryRangeElementFunc func; + func(val1, val2, data, size, res, offsets); } else if (!lower_inclusive && upper_inclusive) { - BinaryRangeElementFunc func; - func(val1, val2, data, size, res); + BinaryRangeElementFunc func; + func(val1, val2, data, size, res, offsets); } else { - BinaryRangeElementFunc func; - func(val1, val2, data, size, res); + BinaryRangeElementFunc func; + func(val1, val2, data, size, res, offsets); } // there is a batch operation in BinaryRangeElementFunc, // so not divide data again for the reason that it may reduce performance if the null distribution is scattered // but to mask res with valid_data after the batch operation. if (valid_data != nullptr) { for (int i = 0; i < size; i++) { - if (!valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::random) { + offset = (offsets) ? offsets[i] : i; + } + if (!valid_data[offset]) { res[i] = valid_res[i] = false; } } } }; + auto skip_index_func = [val1, val2, lower_inclusive, upper_inclusive]( const SkipIndex& skip_index, FieldId field_id, int64_t chunk_id) { @@ -294,8 +317,19 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData() { field_id, chunk_id, val1, val2, false, false); } }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, skip_index_func, res, valid_res, val1, val2); + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessDataByOffsets(execute_sub_batch, + skip_index_func, + input, + res, + valid_res, + val1, + val2); + } else { + processed_size = ProcessDataChunks( + execute_sub_batch, skip_index_func, res, valid_res, val1, val2); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -306,11 +340,12 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData() { template VectorPtr -PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson() { +PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(OffsetVector* input) { using GetType = std::conditional_t, std::string_view, ValueType>; - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -326,30 +361,81 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson() { ValueType val2 = GetValueFromProto(expr_->upper_val_); auto pointer = milvus::Json::pointer(expr_->column_.nested_path_); - auto execute_sub_batch = [lower_inclusive, upper_inclusive, pointer]( - const milvus::Json* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - ValueType val1, - ValueType val2) { + auto execute_sub_batch = + [ lower_inclusive, upper_inclusive, + pointer ]( + const milvus::Json* data, + const bool* valid_data, + const int32_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + ValueType val1, + ValueType val2) { if (lower_inclusive && upper_inclusive) { - BinaryRangeElementFuncForJson func; - func(val1, val2, pointer, data, valid_data, size, res, valid_res); + BinaryRangeElementFuncForJson + func; + func(val1, + val2, + pointer, + data, + valid_data, + size, + res, + valid_res, + offsets); } else if (lower_inclusive && !upper_inclusive) { - BinaryRangeElementFuncForJson func; - func(val1, val2, pointer, data, valid_data, size, res, valid_res); + BinaryRangeElementFuncForJson + func; + func(val1, + val2, + pointer, + data, + valid_data, + size, + res, + valid_res, + offsets); + } else if (!lower_inclusive && upper_inclusive) { - BinaryRangeElementFuncForJson func; - func(val1, val2, pointer, data, valid_data, size, res, valid_res); + BinaryRangeElementFuncForJson + func; + func(val1, + val2, + pointer, + data, + valid_data, + size, + res, + valid_res, + offsets); } else { - BinaryRangeElementFuncForJson func; - func(val1, val2, pointer, data, valid_data, size, res, valid_res); + BinaryRangeElementFuncForJson + func; + func(val1, + val2, + pointer, + data, + valid_data, + size, + res, + valid_res, + offsets); } }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, val1, val2); + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + val1, + val2); + } else { + processed_size = ProcessDataChunks( + execute_sub_batch, std::nullptr_t{}, res, valid_res, val1, val2); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -360,11 +446,12 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson() { template VectorPtr -PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray() { +PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(OffsetVector* input) { using GetType = std::conditional_t, std::string_view, ValueType>; - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -383,31 +470,90 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray() { index = std::stoi(expr_->column_.nested_path_[0]); } - auto execute_sub_batch = [lower_inclusive, upper_inclusive]( - const milvus::ArrayView* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - ValueType val1, - ValueType val2, - int index) { + auto execute_sub_batch = + [ lower_inclusive, + upper_inclusive ]( + const milvus::ArrayView* data, + const bool* valid_data, + const int32_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + ValueType val1, + ValueType val2, + int index) { if (lower_inclusive && upper_inclusive) { - BinaryRangeElementFuncForArray func; - func(val1, val2, index, data, valid_data, size, res, valid_res); + BinaryRangeElementFuncForArray + func; + func(val1, + val2, + index, + data, + valid_data, + size, + res, + valid_res, + offsets); } else if (lower_inclusive && !upper_inclusive) { - BinaryRangeElementFuncForArray func; - func(val1, val2, index, data, valid_data, size, res, valid_res); + BinaryRangeElementFuncForArray + func; + func(val1, + val2, + index, + data, + valid_data, + size, + res, + valid_res, + offsets); + } else if (!lower_inclusive && upper_inclusive) { - BinaryRangeElementFuncForArray func; - func(val1, val2, index, data, valid_data, size, res, valid_res); + BinaryRangeElementFuncForArray + func; + func(val1, + val2, + index, + data, + valid_data, + size, + res, + valid_res, + offsets); + } else { - BinaryRangeElementFuncForArray func; - func(val1, val2, index, data, valid_data, size, res, valid_res); + BinaryRangeElementFuncForArray + func; + func(val1, + val2, + index, + data, + valid_data, + size, + res, + valid_res, + offsets); } }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, val1, val2, index); + int64_t processed_size; + if (has_offset_input_) { + processed_size = + ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + val1, + val2, + index); + } else { + processed_size = ProcessDataChunks(execute_sub_batch, + std::nullptr_t{}, + res, + valid_res, + val1, + val2, + index); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", diff --git a/internal/core/src/exec/expression/BinaryRangeExpr.h b/internal/core/src/exec/expression/BinaryRangeExpr.h index 8f23d32a5682d..1babfc6fd044e 100644 --- a/internal/core/src/exec/expression/BinaryRangeExpr.h +++ b/internal/core/src/exec/expression/BinaryRangeExpr.h @@ -27,7 +27,10 @@ namespace milvus { namespace exec { -template +template struct BinaryRangeElementFunc { typedef std::conditional_t && !std::is_same_v, @@ -35,7 +38,28 @@ struct BinaryRangeElementFunc { T> HighPrecisionType; void - operator()(T val1, T val2, const T* src, size_t n, TargetBitmapView res) { + operator()(T val1, + T val2, + const T* src, + size_t n, + TargetBitmapView res, + const int32_t* offsets = nullptr) { + if constexpr (filter_type == FilterType::random) { + for (size_t i = 0; i < n; ++i) { + auto offset = (offsets) ? offsets[i] : i; + if constexpr (lower_inclusive && upper_inclusive) { + res[i] = val1 <= src[offset] && src[offset] <= val2; + } else if constexpr (lower_inclusive && !upper_inclusive) { + res[i] = val1 <= src[offset] && src[offset] < val2; + } else if constexpr (!lower_inclusive && upper_inclusive) { + res[i] = val1 < src[offset] && src[offset] <= val2; + } else { + res[i] = val1 < src[offset] && src[offset] < val2; + } + } + return; + } + if constexpr (lower_inclusive && upper_inclusive) { res.inplace_within_range_val( val1, val2, src, n); @@ -52,30 +76,33 @@ struct BinaryRangeElementFunc { } }; -#define BinaryRangeJSONCompare(cmp) \ - do { \ - if (valid_data != nullptr && !valid_data[i]) { \ - res[i] = valid_res[i] = false; \ - break; \ - } \ - auto x = src[i].template at(pointer); \ - if (x.error()) { \ - if constexpr (std::is_same_v) { \ - auto x = src[i].template at(pointer); \ - if (!x.error()) { \ - auto value = x.value(); \ - res[i] = (cmp); \ - break; \ - } \ - } \ - res[i] = false; \ - break; \ - } \ - auto value = x.value(); \ - res[i] = (cmp); \ +#define BinaryRangeJSONCompare(cmp) \ + do { \ + if (valid_data != nullptr && !valid_data[offset]) { \ + res[i] = valid_res[i] = false; \ + break; \ + } \ + auto x = src[offset].template at(pointer); \ + if (x.error()) { \ + if constexpr (std::is_same_v) { \ + auto x = src[offset].template at(pointer); \ + if (!x.error()) { \ + auto value = x.value(); \ + res[i] = (cmp); \ + break; \ + } \ + } \ + res[i] = false; \ + break; \ + } \ + auto value = x.value(); \ + res[i] = (cmp); \ } while (false) -template +template struct BinaryRangeElementFuncForJson { using GetType = std::conditional_t, std::string_view, @@ -88,8 +115,13 @@ struct BinaryRangeElementFuncForJson { const bool* valid_data, size_t n, TargetBitmapView res, - TargetBitmapView valid_res) { + TargetBitmapView valid_res, + const int32_t* offsets = nullptr) { for (size_t i = 0; i < n; ++i) { + auto offset = i; + if constexpr (filter_type == FilterType::random) { + offset = (offsets) ? offsets[i] : i; + } if constexpr (lower_inclusive && upper_inclusive) { BinaryRangeJSONCompare(val1 <= value && value <= val2); } else if constexpr (lower_inclusive && !upper_inclusive) { @@ -103,7 +135,10 @@ struct BinaryRangeElementFuncForJson { } }; -template +template struct BinaryRangeElementFuncForArray { using GetType = std::conditional_t, std::string_view, @@ -116,39 +151,44 @@ struct BinaryRangeElementFuncForArray { const bool* valid_data, size_t n, TargetBitmapView res, - TargetBitmapView valid_res) { + TargetBitmapView valid_res, + const int32_t* offsets = nullptr) { for (size_t i = 0; i < n; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + size_t offset = i; + if constexpr (filter_type == FilterType::random) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } if constexpr (lower_inclusive && upper_inclusive) { - if (index >= src[i].length()) { + if (index >= src[offset].length()) { res[i] = false; continue; } - auto value = src[i].get_data(index); + auto value = src[offset].get_data(index); res[i] = val1 <= value && value <= val2; } else if constexpr (lower_inclusive && !upper_inclusive) { - if (index >= src[i].length()) { + if (index >= src[offset].length()) { res[i] = false; continue; } - auto value = src[i].get_data(index); + auto value = src[offset].get_data(index); res[i] = val1 <= value && value < val2; } else if constexpr (!lower_inclusive && upper_inclusive) { - if (index >= src[i].length()) { + if (index >= src[offset].length()) { res[i] = false; continue; } - auto value = src[i].get_data(index); + auto value = src[offset].get_data(index); res[i] = val1 < value && value <= val2; } else { - if (index >= src[i].length()) { + if (index >= src[offset].length()) { res[i] = false; continue; } - auto value = src[i].get_data(index); + auto value = src[offset].get_data(index); res[i] = val1 < value && value < val2; } } @@ -211,11 +251,12 @@ class PhyBinaryRangeFilterExpr : public SegmentExpr { PreCheckOverflow(HighPrecisionType& val1, HighPrecisionType& val2, bool& lower_inclusive, - bool& upper_inclusive); + bool& upper_inclusive, + OffsetVector* input = nullptr); template VectorPtr - ExecRangeVisitorImpl(); + ExecRangeVisitorImpl(OffsetVector* input = nullptr); template VectorPtr @@ -223,15 +264,15 @@ class PhyBinaryRangeFilterExpr : public SegmentExpr { template VectorPtr - ExecRangeVisitorImplForData(); + ExecRangeVisitorImplForData(OffsetVector* input = nullptr); template VectorPtr - ExecRangeVisitorImplForJson(); + ExecRangeVisitorImplForJson(OffsetVector* input = nullptr); template VectorPtr - ExecRangeVisitorImplForArray(); + ExecRangeVisitorImplForArray(OffsetVector* input = nullptr); private: std::shared_ptr expr_; diff --git a/internal/core/src/exec/expression/CallExpr.cpp b/internal/core/src/exec/expression/CallExpr.cpp index 0e6fb0fc5cd10..0ffcd170fcdc9 100644 --- a/internal/core/src/exec/expression/CallExpr.cpp +++ b/internal/core/src/exec/expression/CallExpr.cpp @@ -28,6 +28,8 @@ namespace exec { void PhyCallExpr::Eval(EvalCtx& context, VectorPtr& result) { + auto offset_input = context.get_offset_input(); + SetHasOffsetInput(offset_input != nullptr); AssertInfo(inputs_.size() == expr_->inputs().size(), "logical call expr needs {} inputs, but {} inputs are provided", expr_->inputs().size(), diff --git a/internal/core/src/exec/expression/CallExpr.h b/internal/core/src/exec/expression/CallExpr.h index f074c7b423e77..c4a690cbc09e5 100644 --- a/internal/core/src/exec/expression/CallExpr.h +++ b/internal/core/src/exec/expression/CallExpr.h @@ -61,8 +61,10 @@ class PhyCallExpr : public Expr { void MoveCursor() override { - for (auto input : inputs_) { - input->MoveCursor(); + if (!has_offset_input_) { + for (auto input : inputs_) { + input->MoveCursor(); + } } } diff --git a/internal/core/src/exec/expression/ColumnExpr.cpp b/internal/core/src/exec/expression/ColumnExpr.cpp index ca83a91df0245..9861a95045907 100644 --- a/internal/core/src/exec/expression/ColumnExpr.cpp +++ b/internal/core/src/exec/expression/ColumnExpr.cpp @@ -30,30 +30,32 @@ PhyColumnExpr::GetNextBatchSize() { void PhyColumnExpr::Eval(EvalCtx& context, VectorPtr& result) { + auto input = context.get_offset_input(); + SetHasOffsetInput(input != nullptr); switch (this->expr_->type()) { case DataType::BOOL: - result = DoEval(); + result = DoEval(input); break; case DataType::INT8: - result = DoEval(); + result = DoEval(input); break; case DataType::INT16: - result = DoEval(); + result = DoEval(input); break; case DataType::INT32: - result = DoEval(); + result = DoEval(input); break; case DataType::INT64: - result = DoEval(); + result = DoEval(input); break; case DataType::FLOAT: - result = DoEval(); + result = DoEval(input); break; case DataType::DOUBLE: - result = DoEval(); + result = DoEval(input); break; case DataType::VARCHAR: { - result = DoEval(); + result = DoEval(input); break; } default: @@ -65,8 +67,59 @@ PhyColumnExpr::Eval(EvalCtx& context, VectorPtr& result) { template VectorPtr -PhyColumnExpr::DoEval() { +PhyColumnExpr::DoEval(OffsetVector* input) { // similar to PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op) + // take offsets as input + if (has_offset_input_) { + auto real_batch_size = input->size(); + if (real_batch_size == 0) { + return nullptr; + } + + auto res_vec = std::make_shared( + expr_->GetColumn().data_type_, real_batch_size); + T* res_value = res_vec->RawAsValues(); + TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); + valid_res.set(); + + auto data_barrier = segment_chunk_reader_.segment_->num_chunk_data( + expr_->GetColumn().field_id_); + + int64_t processed_rows = 0; + const auto size_per_chunk = segment_chunk_reader_.SizePerChunk(); + for (auto i = 0; i < real_batch_size; ++i) { + auto offset = (*input)[i]; + auto [chunk_id, + chunk_offset] = [&]() -> std::pair { + if (segment_chunk_reader_.segment_->type() == + SegmentType::Growing) { + return {offset / size_per_chunk, offset % size_per_chunk}; + } else if (segment_chunk_reader_.segment_->is_chunked() && + data_barrier > 0) { + return segment_chunk_reader_.segment_->get_chunk_by_offset( + expr_->GetColumn().field_id_, offset); + } else { + return {0, offset}; + } + }(); + auto chunk_data = segment_chunk_reader_.GetChunkDataAccessor( + expr_->GetColumn().data_type_, + expr_->GetColumn().field_id_, + chunk_id, + data_barrier); + auto chunk_data_by_offset = chunk_data(chunk_offset); + if (!chunk_data_by_offset.has_value()) { + valid_res[processed_rows] = false; + } else { + res_value[processed_rows] = + boost::get(chunk_data_by_offset.value()); + } + processed_rows++; + } + return res_vec; + } + + // normal path if (segment_chunk_reader_.segment_->is_chunked()) { auto real_batch_size = GetNextBatchSize(); if (real_batch_size == 0) { diff --git a/internal/core/src/exec/expression/ColumnExpr.h b/internal/core/src/exec/expression/ColumnExpr.h index 4b8bdfd93662b..5564de7da8771 100644 --- a/internal/core/src/exec/expression/ColumnExpr.h +++ b/internal/core/src/exec/expression/ColumnExpr.h @@ -67,16 +67,21 @@ class PhyColumnExpr : public Expr { void MoveCursor() override { - if (segment_chunk_reader_.segment_->is_chunked()) { - segment_chunk_reader_.MoveCursorForMultipleChunk( - current_chunk_id_, - current_chunk_pos_, - expr_->GetColumn().field_id_, - num_chunk_, - batch_size_); - } else { - segment_chunk_reader_.MoveCursorForSingleChunk( - current_chunk_id_, current_chunk_pos_, num_chunk_, batch_size_); + if (!has_offset_input_) { + if (segment_chunk_reader_.segment_->is_chunked()) { + segment_chunk_reader_.MoveCursorForMultipleChunk( + current_chunk_id_, + current_chunk_pos_, + expr_->GetColumn().field_id_, + num_chunk_, + batch_size_); + } else { + segment_chunk_reader_.MoveCursorForSingleChunk( + current_chunk_id_, + current_chunk_pos_, + num_chunk_, + batch_size_); + } } } @@ -107,7 +112,7 @@ class PhyColumnExpr : public Expr { template VectorPtr - DoEval(); + DoEval(OffsetVector* input = nullptr); private: bool is_indexed_; diff --git a/internal/core/src/exec/expression/CompareExpr.cpp b/internal/core/src/exec/expression/CompareExpr.cpp index 7044f5917f11f..cb7994fbea0a9 100644 --- a/internal/core/src/exec/expression/CompareExpr.cpp +++ b/internal/core/src/exec/expression/CompareExpr.cpp @@ -38,7 +38,77 @@ PhyCompareFilterExpr::GetNextBatchSize() { template VectorPtr -PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op) { +PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op, + OffsetVector* input) { + // take offsets as input + if (has_offset_input_) { + auto real_batch_size = input->size(); + if (real_batch_size == 0) { + return nullptr; + } + + auto res_vec = std::make_shared( + TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); + TargetBitmapView res(res_vec->GetRawData(), real_batch_size); + TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); + valid_res.set(); + + auto left_data_barrier = segment_chunk_reader_.segment_->num_chunk_data( + expr_->left_field_id_); + auto right_data_barrier = + segment_chunk_reader_.segment_->num_chunk_data( + expr_->right_field_id_); + + int64_t processed_rows = 0; + const auto size_per_chunk = segment_chunk_reader_.SizePerChunk(); + for (auto i = 0; i < real_batch_size; ++i) { + auto offset = (*input)[i]; + auto get_chunk_id_and_offset = + [&](const FieldId field, + const int64_t data_barrier) -> std::pair { + if (segment_chunk_reader_.segment_->type() == + SegmentType::Growing) { + return {offset / size_per_chunk, offset % size_per_chunk}; + } else if (segment_chunk_reader_.segment_->is_chunked() && + data_barrier > 0) { + return segment_chunk_reader_.segment_->get_chunk_by_offset( + field, offset); + } else { + return {0, offset}; + } + }; + + auto [left_chunk_id, left_chunk_offset] = + get_chunk_id_and_offset(left_field_, left_data_barrier); + auto [right_chunk_id, right_chunk_offset] = + get_chunk_id_and_offset(right_field_, right_data_barrier); + auto left = segment_chunk_reader_.GetChunkDataAccessor( + expr_->left_data_type_, + expr_->left_field_id_, + left_chunk_id, + left_data_barrier); + auto right = segment_chunk_reader_.GetChunkDataAccessor( + expr_->right_data_type_, + expr_->right_field_id_, + right_chunk_id, + right_data_barrier); + auto left_opt = left(left_chunk_offset); + auto right_opt = right(right_chunk_offset); + if (!left_opt.has_value() || !right_opt.has_value()) { + res[processed_rows] = false; + valid_res[processed_rows] = false; + } else { + res[processed_rows] = boost::apply_visitor( + milvus::query::Relational{}, + left_opt.value(), + right_opt.value()); + } + processed_rows++; + } + return res_vec; + } + + // normal path if (segment_chunk_reader_.segment_->is_chunked()) { auto real_batch_size = GetNextBatchSize(); if (real_batch_size == 0) { @@ -140,39 +210,42 @@ PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op) { void PhyCompareFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { + auto input = context.get_offset_input(); + SetHasOffsetInput((input != nullptr)); // For segment both fields has no index, can use SIMD to speed up. // Avoiding too much call stack that blocks SIMD. if (!is_left_indexed_ && !is_right_indexed_ && !IsStringExpr()) { - result = ExecCompareExprDispatcherForBothDataSegment(); + result = ExecCompareExprDispatcherForBothDataSegment(input); return; } - result = ExecCompareExprDispatcherForHybridSegment(); + result = ExecCompareExprDispatcherForHybridSegment(input); } VectorPtr -PhyCompareFilterExpr::ExecCompareExprDispatcherForHybridSegment() { +PhyCompareFilterExpr::ExecCompareExprDispatcherForHybridSegment( + OffsetVector* input) { switch (expr_->op_type_) { case OpType::Equal: { - return ExecCompareExprDispatcher(std::equal_to<>{}); + return ExecCompareExprDispatcher(std::equal_to<>{}, input); } case OpType::NotEqual: { - return ExecCompareExprDispatcher(std::not_equal_to<>{}); + return ExecCompareExprDispatcher(std::not_equal_to<>{}, input); } case OpType::GreaterEqual: { - return ExecCompareExprDispatcher(std::greater_equal<>{}); + return ExecCompareExprDispatcher(std::greater_equal<>{}, input); } case OpType::GreaterThan: { - return ExecCompareExprDispatcher(std::greater<>{}); + return ExecCompareExprDispatcher(std::greater<>{}, input); } case OpType::LessEqual: { - return ExecCompareExprDispatcher(std::less_equal<>{}); + return ExecCompareExprDispatcher(std::less_equal<>{}, input); } case OpType::LessThan: { - return ExecCompareExprDispatcher(std::less<>{}); + return ExecCompareExprDispatcher(std::less<>{}, input); } case OpType::PrefixMatch: { return ExecCompareExprDispatcher( - milvus::query::MatchOp{}); + milvus::query::MatchOp{}, input); } // case OpType::PostfixMatch: { // } @@ -183,22 +256,23 @@ PhyCompareFilterExpr::ExecCompareExprDispatcherForHybridSegment() { } VectorPtr -PhyCompareFilterExpr::ExecCompareExprDispatcherForBothDataSegment() { +PhyCompareFilterExpr::ExecCompareExprDispatcherForBothDataSegment( + OffsetVector* input) { switch (expr_->left_data_type_) { case DataType::BOOL: - return ExecCompareLeftType(); + return ExecCompareLeftType(input); case DataType::INT8: - return ExecCompareLeftType(); + return ExecCompareLeftType(input); case DataType::INT16: - return ExecCompareLeftType(); + return ExecCompareLeftType(input); case DataType::INT32: - return ExecCompareLeftType(); + return ExecCompareLeftType(input); case DataType::INT64: - return ExecCompareLeftType(); + return ExecCompareLeftType(input); case DataType::FLOAT: - return ExecCompareLeftType(); + return ExecCompareLeftType(input); case DataType::DOUBLE: - return ExecCompareLeftType(); + return ExecCompareLeftType(input); default: PanicInfo( DataTypeInvalid, @@ -209,22 +283,22 @@ PhyCompareFilterExpr::ExecCompareExprDispatcherForBothDataSegment() { template VectorPtr -PhyCompareFilterExpr::ExecCompareLeftType() { +PhyCompareFilterExpr::ExecCompareLeftType(OffsetVector* input) { switch (expr_->right_data_type_) { case DataType::BOOL: - return ExecCompareRightType(); + return ExecCompareRightType(input); case DataType::INT8: - return ExecCompareRightType(); + return ExecCompareRightType(input); case DataType::INT16: - return ExecCompareRightType(); + return ExecCompareRightType(input); case DataType::INT32: - return ExecCompareRightType(); + return ExecCompareRightType(input); case DataType::INT64: - return ExecCompareRightType(); + return ExecCompareRightType(input); case DataType::FLOAT: - return ExecCompareRightType(); + return ExecCompareRightType(input); case DataType::DOUBLE: - return ExecCompareRightType(); + return ExecCompareRightType(input); default: PanicInfo( DataTypeInvalid, @@ -235,8 +309,9 @@ PhyCompareFilterExpr::ExecCompareLeftType() { template VectorPtr -PhyCompareFilterExpr::ExecCompareRightType() { - auto real_batch_size = GetNextBatchSize(); +PhyCompareFilterExpr::ExecCompareRightType(OffsetVector* input) { + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -248,39 +323,47 @@ PhyCompareFilterExpr::ExecCompareRightType() { valid_res.set(); auto expr_type = expr_->op_type_; - auto execute_sub_batch = [expr_type](const T* left, - const U* right, - const int size, - TargetBitmapView res) { + auto execute_sub_batch = [expr_type]( + const T* left, + const U* right, + const int32_t* offsets, + const int size, + TargetBitmapView res) { switch (expr_type) { case proto::plan::GreaterThan: { - CompareElementFunc func; - func(left, right, size, res); + CompareElementFunc + func; + func(left, right, size, res, offsets); break; } case proto::plan::GreaterEqual: { - CompareElementFunc func; - func(left, right, size, res); + CompareElementFunc + func; + func(left, right, size, res, offsets); break; } case proto::plan::LessThan: { - CompareElementFunc func; - func(left, right, size, res); + CompareElementFunc + func; + func(left, right, size, res, offsets); break; } case proto::plan::LessEqual: { - CompareElementFunc func; - func(left, right, size, res); + CompareElementFunc + func; + func(left, right, size, res, offsets); break; } case proto::plan::Equal: { - CompareElementFunc func; - func(left, right, size, res); + CompareElementFunc func; + func(left, right, size, res, offsets); break; } case proto::plan::NotEqual: { - CompareElementFunc func; - func(left, right, size, res); + CompareElementFunc + func; + func(left, right, size, res, offsets); break; } default: @@ -290,8 +373,14 @@ PhyCompareFilterExpr::ExecCompareRightType() { expr_type)); } }; - int64_t processed_size = - ProcessBothDataChunks(execute_sub_batch, res, valid_res); + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessBothDataByOffsets( + execute_sub_batch, input, res, valid_res); + } else { + processed_size = ProcessBothDataChunks( + execute_sub_batch, input, res, valid_res); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", diff --git a/internal/core/src/exec/expression/CompareExpr.h b/internal/core/src/exec/expression/CompareExpr.h index 25138e45ead19..7ab132b2338d0 100644 --- a/internal/core/src/exec/expression/CompareExpr.h +++ b/internal/core/src/exec/expression/CompareExpr.h @@ -30,36 +30,44 @@ namespace milvus { namespace exec { -template +template struct CompareElementFunc { void operator()(const T* left, const U* right, size_t size, - TargetBitmapView res) { - /* + TargetBitmapView res, + const int32_t* offsets = nullptr) { // This is the original code, kept here for the documentation purposes - for (int i = 0; i < size; ++i) { - if constexpr (op == proto::plan::OpType::Equal) { - res[i] = left[i] == right[i]; - } else if constexpr (op == proto::plan::OpType::NotEqual) { - res[i] = left[i] != right[i]; - } else if constexpr (op == proto::plan::OpType::GreaterThan) { - res[i] = left[i] > right[i]; - } else if constexpr (op == proto::plan::OpType::LessThan) { - res[i] = left[i] < right[i]; - } else if constexpr (op == proto::plan::OpType::GreaterEqual) { - res[i] = left[i] >= right[i]; - } else if constexpr (op == proto::plan::OpType::LessEqual) { - res[i] = left[i] <= right[i]; - } else { - PanicInfo( - OpTypeInvalid, - fmt::format("unsupported op_type:{} for CompareElementFunc", - op)); + // also, used for iterative filter + if constexpr (filter_type == FilterType::random) { + for (int i = 0; i < size; ++i) { + auto offset = (offsets != nullptr) ? offsets[i] : i; + if constexpr (op == proto::plan::OpType::Equal) { + res[i] = left[offset] == right[offset]; + } else if constexpr (op == proto::plan::OpType::NotEqual) { + res[i] = left[offset] != right[offset]; + } else if constexpr (op == proto::plan::OpType::GreaterThan) { + res[i] = left[offset] > right[offset]; + } else if constexpr (op == proto::plan::OpType::LessThan) { + res[i] = left[offset] < right[offset]; + } else if constexpr (op == proto::plan::OpType::GreaterEqual) { + res[i] = left[offset] >= right[offset]; + } else if constexpr (op == proto::plan::OpType::LessEqual) { + res[i] = left[offset] <= right[offset]; + } else { + PanicInfo( + OpTypeInvalid, + fmt::format( + "unsupported op_type:{} for CompareElementFunc", + op)); + } } + return; } - */ if constexpr (op == proto::plan::OpType::Equal) { res.inplace_compare_column( @@ -138,22 +146,27 @@ class PhyCompareFilterExpr : public Expr { void MoveCursor() override { - if (segment_chunk_reader_.segment_->is_chunked()) { - segment_chunk_reader_.MoveCursorForMultipleChunk( - left_current_chunk_id_, - left_current_chunk_pos_, - left_field_, - left_num_chunk_, - batch_size_); - segment_chunk_reader_.MoveCursorForMultipleChunk( - right_current_chunk_id_, - right_current_chunk_pos_, - right_field_, - right_num_chunk_, - batch_size_); - } else { - segment_chunk_reader_.MoveCursorForSingleChunk( - current_chunk_id_, current_chunk_pos_, num_chunk_, batch_size_); + if (!has_offset_input_) { + if (segment_chunk_reader_.segment_->is_chunked()) { + segment_chunk_reader_.MoveCursorForMultipleChunk( + left_current_chunk_id_, + left_current_chunk_pos_, + left_field_, + left_num_chunk_, + batch_size_); + segment_chunk_reader_.MoveCursorForMultipleChunk( + right_current_chunk_id_, + right_current_chunk_pos_, + right_field_, + right_num_chunk_, + batch_size_); + } else { + segment_chunk_reader_.MoveCursorForSingleChunk( + current_chunk_id_, + current_chunk_pos_, + num_chunk_, + batch_size_); + } } } @@ -188,6 +201,7 @@ class PhyCompareFilterExpr : public Expr { template int64_t ProcessBothDataChunks(FUNC func, + OffsetVector* input, TargetBitmapView res, TargetBitmapView valid_res, ValTypes... values) { @@ -203,6 +217,97 @@ class PhyCompareFilterExpr : public Expr { } } + template + int64_t + ProcessBothDataByOffsets(FUNC func, + OffsetVector* input, + TargetBitmapView res, + TargetBitmapView valid_res, + ValTypes... values) { + int64_t size = input->size(); + int64_t processed_size = 0; + const auto size_per_chunk = segment_chunk_reader_.SizePerChunk(); + if (segment_chunk_reader_.segment_->is_chunked() || + segment_chunk_reader_.segment_->type() == SegmentType::Growing) { + for (auto i = 0; i < size; ++i) { + auto offset = (*input)[i]; + auto get_chunk_id_and_offset = + [&](const FieldId field) -> std::pair { + if (segment_chunk_reader_.segment_->type() == + SegmentType::Growing) { + auto size_per_chunk = + segment_chunk_reader_.SizePerChunk(); + return {offset / size_per_chunk, + offset % size_per_chunk}; + } else { + return segment_chunk_reader_.segment_ + ->get_chunk_by_offset(field, offset); + } + }; + + auto [left_chunk_id, left_chunk_offset] = + get_chunk_id_and_offset(left_field_); + auto [right_chunk_id, right_chunk_offset] = + get_chunk_id_and_offset(right_field_); + + auto left_chunk = segment_chunk_reader_.segment_->chunk_data( + left_field_, left_chunk_id); + + auto right_chunk = + segment_chunk_reader_.segment_->chunk_data( + right_field_, right_chunk_id); + const T* left_data = left_chunk.data() + left_chunk_offset; + const U* right_data = right_chunk.data() + right_chunk_offset; + func.template operator()( + left_data, + right_data, + nullptr, + 1, + res + processed_size, + values...); + const bool* left_valid_data = left_chunk.valid_data(); + const bool* right_valid_data = right_chunk.valid_data(); + // mask with valid_data + if (left_valid_data && !left_valid_data[left_chunk_offset]) { + res[processed_size] = false; + valid_res[processed_size] = false; + continue; + } + if (right_valid_data && !right_valid_data[right_chunk_offset]) { + res[processed_size] = false; + valid_res[processed_size] = false; + } + processed_size++; + } + return processed_size; + } else { + auto left_chunk = + segment_chunk_reader_.segment_->chunk_data(left_field_, 0); + auto right_chunk = + segment_chunk_reader_.segment_->chunk_data(right_field_, 0); + const T* left_data = left_chunk.data(); + const U* right_data = right_chunk.data(); + func.template operator()( + left_data, right_data, input->data(), size, res, values...); + const bool* left_valid_data = left_chunk.valid_data(); + const bool* right_valid_data = right_chunk.valid_data(); + // mask with valid_data + for (int i = 0; i < size; ++i) { + if (left_valid_data && !left_valid_data[(*input)[i]]) { + res[i] = false; + valid_res[i] = false; + continue; + } + if (right_valid_data && !right_valid_data[(*input)[i]]) { + res[i] = false; + valid_res[i] = false; + } + } + processed_size += size; + return processed_size; + } + } + template int64_t ProcessBothDataChunksForSingleChunk(FUNC func, @@ -239,7 +344,12 @@ class PhyCompareFilterExpr : public Expr { const T* left_data = left_chunk.data() + data_pos; const U* right_data = right_chunk.data() + data_pos; - func(left_data, right_data, size, res + processed_size, values...); + func(left_data, + right_data, + nullptr, + size, + res + processed_size, + values...); const bool* left_valid_data = left_chunk.valid_data(); const bool* right_valid_data = right_chunk.valid_data(); // mask with valid_data @@ -307,7 +417,12 @@ class PhyCompareFilterExpr : public Expr { const T* left_data = left_chunk.data() + data_pos; const U* right_data = right_chunk.data() + data_pos; - func(left_data, right_data, size, res + processed_size, values...); + func(left_data, + right_data, + nullptr, + size, + res + processed_size, + values...); const bool* left_valid_data = left_chunk.valid_data(); const bool* right_valid_data = right_chunk.valid_data(); // mask with valid_data @@ -336,21 +451,21 @@ class PhyCompareFilterExpr : public Expr { template VectorPtr - ExecCompareExprDispatcher(OpType op); + ExecCompareExprDispatcher(OpType op, OffsetVector* input = nullptr); VectorPtr - ExecCompareExprDispatcherForHybridSegment(); + ExecCompareExprDispatcherForHybridSegment(OffsetVector* input = nullptr); VectorPtr - ExecCompareExprDispatcherForBothDataSegment(); + ExecCompareExprDispatcherForBothDataSegment(OffsetVector* input = nullptr); template VectorPtr - ExecCompareLeftType(); + ExecCompareLeftType(OffsetVector* input = nullptr); template VectorPtr - ExecCompareRightType(); + ExecCompareRightType(OffsetVector* input = nullptr); private: const FieldId left_field_; diff --git a/internal/core/src/exec/expression/ConjunctExpr.h b/internal/core/src/exec/expression/ConjunctExpr.h index de239bcb75516..a9de859bd4ef6 100644 --- a/internal/core/src/exec/expression/ConjunctExpr.h +++ b/internal/core/src/exec/expression/ConjunctExpr.h @@ -84,9 +84,21 @@ class PhyConjunctFilterExpr : public Expr { void MoveCursor() override { + if (!has_offset_input_) { + for (auto& input : inputs_) { + input->MoveCursor(); + } + } + } + + bool + SupportOffsetInput() override { for (auto& input : inputs_) { - input->MoveCursor(); + if (!(input->SupportOffsetInput())) { + return false; + } } + return true; } private: diff --git a/internal/core/src/exec/expression/EvalCtx.h b/internal/core/src/exec/expression/EvalCtx.h index c7cac949694ac..244dbff6cf284 100644 --- a/internal/core/src/exec/expression/EvalCtx.h +++ b/internal/core/src/exec/expression/EvalCtx.h @@ -28,17 +28,26 @@ namespace milvus { namespace exec { class ExprSet; + +using OffsetVector = FixedVector; class EvalCtx { public: - EvalCtx(ExecContext* exec_ctx, ExprSet* expr_set, RowVector* row) - : exec_ctx_(exec_ctx), expr_set_(expr_set), row_(row) { + EvalCtx(ExecContext* exec_ctx, + ExprSet* expr_set, + OffsetVector* offset_input) + : exec_ctx_(exec_ctx), + expr_set_(expr_set), + offset_input_(offset_input) { assert(exec_ctx_ != nullptr); assert(expr_set_ != nullptr); - // assert(row_ != nullptr); + } + + explicit EvalCtx(ExecContext* exec_ctx, ExprSet* expr_set) + : exec_ctx_(exec_ctx), expr_set_(expr_set), offset_input_(nullptr) { } explicit EvalCtx(ExecContext* exec_ctx) - : exec_ctx_(exec_ctx), expr_set_(nullptr), row_(nullptr) { + : exec_ctx_(exec_ctx), expr_set_(nullptr), offset_input_(nullptr) { } ExecContext* @@ -51,11 +60,22 @@ class EvalCtx { return exec_ctx_->get_query_config(); } + inline OffsetVector* + get_offset_input() { + return offset_input_; + } + + inline void + set_offset_input(OffsetVector* offset_input) { + offset_input_ = offset_input; + } + private: - ExecContext* exec_ctx_; - ExprSet* expr_set_; - RowVector* row_; - bool input_no_nulls_; + ExecContext* exec_ctx_ = nullptr; + ExprSet* expr_set_ = nullptr; + // we may accept offsets array as input and do expr filtering on these data + OffsetVector* offset_input_ = nullptr; + bool input_no_nulls_ = false; }; } // namespace exec diff --git a/internal/core/src/exec/expression/ExistsExpr.cpp b/internal/core/src/exec/expression/ExistsExpr.cpp index c73b4e007dc38..a4163e46aa0f7 100644 --- a/internal/core/src/exec/expression/ExistsExpr.cpp +++ b/internal/core/src/exec/expression/ExistsExpr.cpp @@ -22,13 +22,15 @@ namespace exec { void PhyExistsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { + auto input = context.get_offset_input(); + SetHasOffsetInput((input != nullptr)); switch (expr_->column_.data_type_) { case DataType::JSON: { if (is_index_mode_) { PanicInfo(ExprInvalid, "exists expr for json index mode not supported"); } - result = EvalJsonExistsForDataSegment(); + result = EvalJsonExistsForDataSegment(input); break; } default: @@ -39,8 +41,9 @@ PhyExistsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { } VectorPtr -PhyExistsFilterExpr::EvalJsonExistsForDataSegment() { - auto real_batch_size = GetNextBatchSize(); +PhyExistsFilterExpr::EvalJsonExistsForDataSegment(OffsetVector* input) { + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -51,23 +54,40 @@ PhyExistsFilterExpr::EvalJsonExistsForDataSegment() { valid_res.set(); auto pointer = milvus::Json::pointer(expr_->column_.nested_path_); - auto execute_sub_batch = [](const milvus::Json* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - const std::string& pointer) { + auto execute_sub_batch = + []( + const milvus::Json* data, + const bool* valid_data, + const int32_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + const std::string& pointer) { for (int i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::random) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } - res[i] = data[i].exist(pointer); + res[i] = data[offset].exist(pointer); } }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer); + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + pointer); + } else { + processed_size = ProcessDataChunks( + execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", diff --git a/internal/core/src/exec/expression/ExistsExpr.h b/internal/core/src/exec/expression/ExistsExpr.h index 2b24108531575..dc00f883c7400 100644 --- a/internal/core/src/exec/expression/ExistsExpr.h +++ b/internal/core/src/exec/expression/ExistsExpr.h @@ -57,7 +57,7 @@ class PhyExistsFilterExpr : public SegmentExpr { private: VectorPtr - EvalJsonExistsForDataSegment(); + EvalJsonExistsForDataSegment(OffsetVector* input = nullptr); private: std::shared_ptr expr_; diff --git a/internal/core/src/exec/expression/Expr.h b/internal/core/src/exec/expression/Expr.h index 7c49e6793debb..8314609ddce4f 100644 --- a/internal/core/src/exec/expression/Expr.h +++ b/internal/core/src/exec/expression/Expr.h @@ -31,6 +31,8 @@ namespace milvus { namespace exec { +enum class FilterType { sequential = 0, random = 1 }; + class Expr { public: Expr(DataType type, @@ -73,12 +75,26 @@ class Expr { MoveCursor() { } + void + SetHasOffsetInput(bool has_offset_input) { + has_offset_input_ = has_offset_input; + } + + virtual bool + SupportOffsetInput() { + return true; + } + protected: DataType type_; const std::vector> inputs_; std::string name_; // NOTE: unused std::shared_ptr vector_func_; + + // whether we have offset input and do expr filtering on these data + // default is false which means we will do expr filtering on the total segment data + bool has_offset_input_ = false; }; using ExprPtr = std::shared_ptr; @@ -204,13 +220,16 @@ class SegmentExpr : public Expr { void MoveCursor() override { - if (is_index_mode_) { - MoveCursorForIndex(); - if (segment_->HasFieldData(field_id_)) { + // when we specify input, do not maintain states + if (!has_offset_input_) { + if (is_index_mode_) { + MoveCursorForIndex(); + if (segment_->HasFieldData(field_id_)) { + MoveCursorForData(); + } + } else { MoveCursorForData(); } - } else { - MoveCursorForData(); } } @@ -275,6 +294,7 @@ class SegmentExpr : public Expr { // use valid_data to see if raw data is null func(views_info.first.data(), views_info.second.data(), + nullptr, need_size, res, valid_res, @@ -286,6 +306,253 @@ class SegmentExpr : public Expr { return need_size; } + // accept offsets array and process on the scalar data by offsets + // stateless! Just check and set bitset as result, does not need to move cursor + // used for processing raw data expr for sealed segments. + // now only used for std::string_view && json + // TODO: support more types + template + int64_t + ProcessDataByOffsetsForSealedSeg( + FUNC func, + std::function skip_func, + OffsetVector* input, + TargetBitmapView res, + TargetBitmapView valid_res, + ValTypes... values) { + // For non_chunked sealed segment, only single chunk + Assert(num_data_chunk_ == 1); + + auto& skip_index = segment_->GetSkipIndex(); + auto [data_vec, valid_data] = + segment_->get_views_by_offsets(field_id_, 0, *input); + if (!skip_func || !skip_func(skip_index, field_id_, 0)) { + func(data_vec.data(), + valid_data.data(), + nullptr, + input->size(), + res, + valid_res, + values...); + } else { + ApplyValidData(valid_data.data(), res, valid_res, input->size()); + } + return input->size(); + } + + template + VectorPtr + ProcessIndexChunksByOffsets(FUNC func, + OffsetVector* input, + ValTypes... values) { + AssertInfo(num_index_chunk_ == 1, "scalar index chunk num must be 1"); + typedef std:: + conditional_t, std::string, T> + IndexInnerType; + using Index = index::ScalarIndex; + TargetBitmap valid_res(input->size()); + + const Index& index = + segment_->chunk_scalar_index(field_id_, 0); + auto* index_ptr = const_cast(&index); + auto valid_result = index_ptr->IsNotNull(); + for (auto i = 0; i < input->size(); ++i) { + valid_res[i] = valid_result[(*input)[i]]; + } + auto result = std::move(func.template operator()( + index_ptr, values..., input->data())); + return std::make_shared(std::move(result), + std::move(valid_res)); + } + + // when we have scalar index and index contains raw data, could go with index chunk by offsets + template + int64_t + ProcessIndexLookupByOffsets( + FUNC func, + std::function skip_func, + OffsetVector* input, + TargetBitmapView res, + TargetBitmapView valid_res, + ValTypes... values) { + AssertInfo(num_index_chunk_ == 1, "scalar index chunk num must be 1"); + auto& skip_index = segment_->GetSkipIndex(); + + typedef std:: + conditional_t, std::string, T> + IndexInnerType; + using Index = index::ScalarIndex; + int64_t processed_size = 0; + const Index& index = + segment_->chunk_scalar_index(field_id_, 0); + auto* index_ptr = const_cast(&index); + auto valid_result = index_ptr->IsNotNull(); + auto batch_size = input->size(); + + if (!skip_func || !skip_func(skip_index, field_id_, 0)) { + for (auto i = 0; i < batch_size; ++i) { + auto offset = (*input)[i]; + auto raw = index_ptr->Reverse_Lookup(offset); + if (!raw.has_value()) { + res[i] = false; + continue; + } + T raw_data = raw.value(); + bool valid_data = valid_result[offset]; + func.template operator()(&raw_data, + &valid_data, + nullptr, + 1, + res + i, + valid_res + i, + values...); + } + } else { + for (auto i = 0; i < batch_size; ++i) { + auto offset = (*input)[i]; + res[i] = valid_res[i] = valid_result[offset]; + } + } + + return batch_size; + } + + // accept offsets array and process on the scalar data by offsets + // stateless! Just check and set bitset as result, does not need to move cursor + template + int64_t + ProcessDataByOffsets( + FUNC func, + std::function skip_func, + OffsetVector* input, + TargetBitmapView res, + TargetBitmapView valid_res, + ValTypes... values) { + int64_t processed_size = 0; + + // index reverse lookup + if (is_index_mode_ && num_data_chunk_ == 0) { + return ProcessIndexLookupByOffsets( + func, skip_func, input, res, valid_res, values...); + } + + auto& skip_index = segment_->GetSkipIndex(); + + // raw data scan + // sealed segment + if (segment_->type() == SegmentType::Sealed) { + if (segment_->is_chunked()) { + if constexpr (std::is_same_v || + std::is_same_v) { + for (size_t i = 0; i < input->size(); ++i) { + int64_t offset = (*input)[i]; + auto [chunk_id, chunk_offset] = + segment_->get_chunk_by_offset(field_id_, offset); + auto [data_vec, valid_data] = + segment_->get_views_by_offsets( + field_id_, chunk_id, {int32_t(chunk_offset)}); + if (!skip_func || + !skip_func(skip_index, field_id_, chunk_id)) { + func.template operator()( + data_vec.data(), + valid_data.data(), + nullptr, + 1, + res + processed_size, + valid_res + processed_size, + values...); + } else { + res[processed_size] = valid_res[processed_size] = + (valid_data[0]); + } + processed_size++; + } + return input->size(); + } + for (size_t i = 0; i < input->size(); ++i) { + int64_t offset = (*input)[i]; + auto [chunk_id, chunk_offset] = + segment_->get_chunk_by_offset(field_id_, offset); + auto chunk = segment_->chunk_data(field_id_, chunk_id); + const T* data = chunk.data() + chunk_offset; + const bool* valid_data = chunk.valid_data(); + if (valid_data != nullptr) { + valid_data += chunk_offset; + } + if (!skip_func || + !skip_func(skip_index, field_id_, chunk_id)) { + func.template operator()( + data, + valid_data, + nullptr, + 1, + res + processed_size, + valid_res + processed_size, + values...); + } else { + ApplyValidData(valid_data, + res + processed_size, + valid_res + processed_size, + 1); + } + processed_size++; + } + return input->size(); + } else { + if constexpr (std::is_same_v || + std::is_same_v) { + return ProcessDataByOffsetsForSealedSeg( + func, skip_func, input, res, valid_res, values...); + } + auto chunk = segment_->chunk_data(field_id_, 0); + const T* data = chunk.data(); + const bool* valid_data = chunk.valid_data(); + if (!skip_func || !skip_func(skip_index, field_id_, 0)) { + func.template operator()(data, + valid_data, + input->data(), + input->size(), + res, + valid_res, + values...); + } else { + ApplyValidData(valid_data, res, valid_res, input->size()); + } + return input->size(); + } + } else { + // growing segment + for (size_t i = 0; i < input->size(); ++i) { + int64_t offset = (*input)[i]; + auto chunk_id = offset / size_per_chunk_; + auto chunk_offset = offset % size_per_chunk_; + auto chunk = segment_->chunk_data(field_id_, chunk_id); + const T* data = chunk.data() + chunk_offset; + const bool* valid_data = chunk.valid_data(); + if (valid_data != nullptr) { + valid_data += chunk_offset; + } + if (!skip_func || !skip_func(skip_index, field_id_, chunk_id)) { + func.template operator()( + data, + valid_data, + nullptr, + 1, + res + processed_size, + valid_res + processed_size, + values...); + } else { + ApplyValidData(valid_data, + res + processed_size, + valid_res + processed_size, + 1); + } + processed_size++; + } + } + return input->size(); + } + template int64_t ProcessDataChunksForSingleChunk( @@ -328,6 +595,7 @@ class SegmentExpr : public Expr { const T* data = chunk.data() + data_pos; func(data, valid_data, + nullptr, size, res + processed_size, valid_res + processed_size, @@ -384,12 +652,12 @@ class SegmentExpr : public Expr { if (segment_->type() == SegmentType::Sealed) { // first is the raw data, second is valid_data // use valid_data to see if raw data is null - auto fetched_data = segment_->get_batch_views( - field_id_, i, data_pos, size); - auto data_vec = fetched_data.first; - auto valid_data = fetched_data.second; + auto [data_vec, valid_data] = + segment_->get_batch_views( + field_id_, i, data_pos, size); func(data_vec.data(), valid_data.data(), + nullptr, size, res + processed_size, valid_res + processed_size, @@ -406,6 +674,7 @@ class SegmentExpr : public Expr { } func(data, valid_data, + nullptr, size, res + processed_size, valid_res + processed_size, @@ -451,13 +720,14 @@ class SegmentExpr : public Expr { FUNC func, std::function skip_func, TargetBitmapView res, + TargetBitmapView valid_res, ValTypes... values) { if (segment_->is_chunked()) { return ProcessDataChunksForMultipleChunk( - func, skip_func, res, values...); + func, skip_func, res, valid_res, values...); } else { return ProcessDataChunksForSingleChunk( - func, skip_func, res, values...); + func, skip_func, res, valid_res, values...); } } @@ -538,6 +808,51 @@ class SegmentExpr : public Expr { } } + template + TargetBitmap + ProcessChunksForValidByOffsets(bool use_index, const OffsetVector& input) { + typedef std:: + conditional_t, std::string, T> + IndexInnerType; + using Index = index::ScalarIndex; + auto batch_size = input.size(); + TargetBitmap valid_result(batch_size); + valid_result.set(); + + if (use_index) { + const Index& index = + segment_->chunk_scalar_index(field_id_, 0); + auto* index_ptr = const_cast(&index); + const auto& res = index_ptr->IsNotNull(); + for (auto i = 0; i < batch_size; ++i) { + valid_result[i] = res[input[i]]; + } + } else { + for (auto i = 0; i < batch_size; ++i) { + auto offset = input[i]; + auto [chunk_id, + chunk_offset] = [&]() -> std::pair { + if (segment_->type() == SegmentType::Growing) { + return {offset / size_per_chunk_, + offset % size_per_chunk_}; + } else if (segment_->is_chunked()) { + return segment_->get_chunk_by_offset(field_id_, offset); + } else { + return {0, offset}; + } + }(); + auto chunk = segment_->chunk_data(field_id_, chunk_id); + const bool* valid_data = chunk.valid_data(); + if (valid_data != nullptr) { + valid_result[i] = valid_data[chunk_offset]; + } else { + break; + } + } + } + return valid_result; + } + template TargetBitmap ProcessDataChunksForValid() { @@ -569,9 +884,9 @@ class SegmentExpr : public Expr { return valid_result; } valid_data += data_pos; - for (int i = 0; i < size; i++) { - if (!valid_data[i]) { - valid_result[i + data_pos] = false; + for (int j = 0; j < size; j++) { + if (!valid_data[j]) { + valid_result[j + processed_size] = false; } } processed_size += size; diff --git a/internal/core/src/exec/expression/JsonContainsExpr.cpp b/internal/core/src/exec/expression/JsonContainsExpr.cpp index b21714b4c8b6b..3318a4822865f 100644 --- a/internal/core/src/exec/expression/JsonContainsExpr.cpp +++ b/internal/core/src/exec/expression/JsonContainsExpr.cpp @@ -23,22 +23,24 @@ namespace exec { void PhyJsonContainsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { + auto input = context.get_offset_input(); + SetHasOffsetInput((input != nullptr)); switch (expr_->column_.data_type_) { case DataType::ARRAY: { - if (is_index_mode_) { + if (is_index_mode_ && !has_offset_input_) { result = EvalArrayContainsForIndexSegment(); } else { - result = EvalJsonContainsForDataSegment(); + result = EvalJsonContainsForDataSegment(input); } break; } case DataType::JSON: { - if (is_index_mode_) { + if (is_index_mode_ && !has_offset_input_) { PanicInfo( ExprInvalid, "exists expr for json or array index mode not supported"); } - result = EvalJsonContainsForDataSegment(); + result = EvalJsonContainsForDataSegment(input); break; } default: @@ -49,7 +51,7 @@ PhyJsonContainsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { } VectorPtr -PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment() { +PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment(OffsetVector* input) { auto data_type = expr_->column_.data_type_; switch (expr_->op_) { case proto::plan::JSONContainsExpr_JSONOp_Contains: @@ -58,16 +60,16 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment() { auto val_type = expr_->vals_[0].val_case(); switch (val_type) { case proto::plan::GenericValue::kBoolVal: { - return ExecArrayContains(); + return ExecArrayContains(input); } case proto::plan::GenericValue::kInt64Val: { - return ExecArrayContains(); + return ExecArrayContains(input); } case proto::plan::GenericValue::kFloatVal: { - return ExecArrayContains(); + return ExecArrayContains(input); } case proto::plan::GenericValue::kStringVal: { - return ExecArrayContains(); + return ExecArrayContains(input); } default: PanicInfo( @@ -79,19 +81,19 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment() { auto val_type = expr_->vals_[0].val_case(); switch (val_type) { case proto::plan::GenericValue::kBoolVal: { - return ExecJsonContains(); + return ExecJsonContains(input); } case proto::plan::GenericValue::kInt64Val: { - return ExecJsonContains(); + return ExecJsonContains(input); } case proto::plan::GenericValue::kFloatVal: { - return ExecJsonContains(); + return ExecJsonContains(input); } case proto::plan::GenericValue::kStringVal: { - return ExecJsonContains(); + return ExecJsonContains(input); } case proto::plan::GenericValue::kArrayVal: { - return ExecJsonContainsArray(); + return ExecJsonContainsArray(input); } default: PanicInfo(DataTypeInvalid, @@ -99,7 +101,7 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment() { val_type); } } else { - return ExecJsonContainsWithDiffType(); + return ExecJsonContainsWithDiffType(input); } } } @@ -108,16 +110,16 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment() { auto val_type = expr_->vals_[0].val_case(); switch (val_type) { case proto::plan::GenericValue::kBoolVal: { - return ExecArrayContainsAll(); + return ExecArrayContainsAll(input); } case proto::plan::GenericValue::kInt64Val: { - return ExecArrayContainsAll(); + return ExecArrayContainsAll(input); } case proto::plan::GenericValue::kFloatVal: { - return ExecArrayContainsAll(); + return ExecArrayContainsAll(input); } case proto::plan::GenericValue::kStringVal: { - return ExecArrayContainsAll(); + return ExecArrayContainsAll(input); } default: PanicInfo( @@ -129,19 +131,19 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment() { auto val_type = expr_->vals_[0].val_case(); switch (val_type) { case proto::plan::GenericValue::kBoolVal: { - return ExecJsonContainsAll(); + return ExecJsonContainsAll(input); } case proto::plan::GenericValue::kInt64Val: { - return ExecJsonContainsAll(); + return ExecJsonContainsAll(input); } case proto::plan::GenericValue::kFloatVal: { - return ExecJsonContainsAll(); + return ExecJsonContainsAll(input); } case proto::plan::GenericValue::kStringVal: { - return ExecJsonContainsAll(); + return ExecJsonContainsAll(input); } case proto::plan::GenericValue::kArrayVal: { - return ExecJsonContainsAllArray(); + return ExecJsonContainsAllArray(input); } default: PanicInfo(DataTypeInvalid, @@ -149,7 +151,7 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment() { val_type); } } else { - return ExecJsonContainsAllWithDiffType(); + return ExecJsonContainsAllWithDiffType(input); } } } @@ -162,12 +164,13 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment() { template VectorPtr -PhyJsonContainsFilterExpr::ExecArrayContains() { +PhyJsonContainsFilterExpr::ExecArrayContains(OffsetVector* input) { using GetType = std::conditional_t, std::string_view, ExprValueType>; - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -184,12 +187,15 @@ PhyJsonContainsFilterExpr::ExecArrayContains() { for (auto const& element : expr_->vals_) { elements.insert(GetValueFromProto(element)); } - auto execute_sub_batch = [](const milvus::ArrayView* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - const std::unordered_set& elements) { + auto execute_sub_batch = + []( + const milvus::ArrayView* data, + const bool* valid_data, + const int32_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + const std::unordered_set& elements) { auto executor = [&](size_t i) { const auto& array = data[i]; for (int j = 0; j < array.length(); ++j) { @@ -200,16 +206,31 @@ PhyJsonContainsFilterExpr::ExecArrayContains() { return false; }; for (int i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::random) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } - res[i] = executor(i); + res[i] = executor(offset); } }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, elements); + int64_t processed_size; + if (has_offset_input_) { + processed_size = + ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + elements); + } else { + processed_size = ProcessDataChunks( + execute_sub_batch, std::nullptr_t{}, res, valid_res, elements); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -220,12 +241,13 @@ PhyJsonContainsFilterExpr::ExecArrayContains() { template VectorPtr -PhyJsonContainsFilterExpr::ExecJsonContains() { +PhyJsonContainsFilterExpr::ExecJsonContains(OffsetVector* input) { using GetType = std::conditional_t, std::string_view, ExprValueType>; - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -241,13 +263,16 @@ PhyJsonContainsFilterExpr::ExecJsonContains() { for (auto const& element : expr_->vals_) { elements.insert(GetValueFromProto(element)); } - auto execute_sub_batch = [](const milvus::Json* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - const std::string& pointer, - const std::unordered_set& elements) { + auto execute_sub_batch = + []( + const milvus::Json* data, + const bool* valid_data, + const int32_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + const std::string& pointer, + const std::unordered_set& elements) { auto executor = [&](size_t i) { auto doc = data[i].doc(); auto array = doc.at_pointer(pointer).get_array(); @@ -266,16 +291,35 @@ PhyJsonContainsFilterExpr::ExecJsonContains() { return false; }; for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::random) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } - res[i] = executor(i); + res[i] = executor(offset); } }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, elements); + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + pointer, + elements); + } else { + processed_size = ProcessDataChunks(execute_sub_batch, + std::nullptr_t{}, + res, + valid_res, + pointer, + elements); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -285,8 +329,9 @@ PhyJsonContainsFilterExpr::ExecJsonContains() { } VectorPtr -PhyJsonContainsFilterExpr::ExecJsonContainsArray() { - auto real_batch_size = GetNextBatchSize(); +PhyJsonContainsFilterExpr::ExecJsonContainsArray(OffsetVector* input) { + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -303,50 +348,71 @@ PhyJsonContainsFilterExpr::ExecJsonContainsArray() { elements.emplace_back(GetValueFromProto(element)); } auto execute_sub_batch = - [](const milvus::Json* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - const std::string& pointer, - const std::vector& elements) { - auto executor = [&](size_t i) -> bool { - auto doc = data[i].doc(); - auto array = doc.at_pointer(pointer).get_array(); - if (array.error()) { - return false; - } - for (auto&& it : array) { - auto val = it.get_array(); - if (val.error()) { - continue; - } - std::vector< - simdjson::simdjson_result> - json_array; - json_array.reserve(val.count_elements()); - for (auto&& e : val) { - json_array.emplace_back(e); - } - for (auto const& element : elements) { - if (CompareTwoJsonArray(json_array, element)) { - return true; - } - } - } + []( + const milvus::Json* data, + const bool* valid_data, + const int32_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + const std::string& pointer, + const std::vector& elements) { + auto executor = [&](size_t i) -> bool { + auto doc = data[i].doc(); + auto array = doc.at_pointer(pointer).get_array(); + if (array.error()) { return false; - }; - for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { - res[i] = valid_res[i] = false; + } + for (auto&& it : array) { + auto val = it.get_array(); + if (val.error()) { continue; } - res[i] = executor(i); + std::vector< + simdjson::simdjson_result> + json_array; + json_array.reserve(val.count_elements()); + for (auto&& e : val) { + json_array.emplace_back(e); + } + for (auto const& element : elements) { + if (CompareTwoJsonArray(json_array, element)) { + return true; + } + } } + return false; }; + for (size_t i = 0; i < size; ++i) { + auto offset = i; + if constexpr (filter_type == FilterType::random) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { + res[i] = valid_res[i] = false; + continue; + } + res[i] = executor(offset); + } + }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, elements); + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + pointer, + elements); + } else { + processed_size = ProcessDataChunks(execute_sub_batch, + std::nullptr_t{}, + res, + valid_res, + pointer, + elements); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -357,14 +423,15 @@ PhyJsonContainsFilterExpr::ExecJsonContainsArray() { template VectorPtr -PhyJsonContainsFilterExpr::ExecArrayContainsAll() { +PhyJsonContainsFilterExpr::ExecArrayContainsAll(OffsetVector* input) { using GetType = std::conditional_t, std::string_view, ExprValueType>; AssertInfo(expr_->column_.nested_path_.size() == 0, "[ExecArrayContainsAll]nested path must be null"); - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -380,12 +447,15 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll() { elements.insert(GetValueFromProto(element)); } - auto execute_sub_batch = [](const milvus::ArrayView* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - const std::unordered_set& elements) { + auto execute_sub_batch = + []( + const milvus::ArrayView* data, + const bool* valid_data, + const int32_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + const std::unordered_set& elements) { auto executor = [&](size_t i) { std::unordered_set tmp_elements(elements); // Note: array can only be iterated once @@ -398,16 +468,31 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll() { return tmp_elements.size() == 0; }; for (int i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::random) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } - res[i] = executor(i); + res[i] = executor(offset); } }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, elements); + int64_t processed_size; + if (has_offset_input_) { + processed_size = + ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + elements); + } else { + processed_size = ProcessDataChunks( + execute_sub_batch, std::nullptr_t{}, res, valid_res, elements); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -418,12 +503,13 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll() { template VectorPtr -PhyJsonContainsFilterExpr::ExecJsonContainsAll() { +PhyJsonContainsFilterExpr::ExecJsonContainsAll(OffsetVector* input) { using GetType = std::conditional_t, std::string_view, ExprValueType>; - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -440,13 +526,16 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll() { elements.insert(GetValueFromProto(element)); } - auto execute_sub_batch = [](const milvus::Json* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - const std::string& pointer, - const std::unordered_set& elements) { + auto execute_sub_batch = + []( + const milvus::Json* data, + const bool* valid_data, + const int32_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + const std::string& pointer, + const std::unordered_set& elements) { auto executor = [&](const size_t i) -> bool { auto doc = data[i].doc(); auto array = doc.at_pointer(pointer).get_array(); @@ -468,16 +557,35 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll() { return tmp_elements.size() == 0; }; for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::random) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } - res[i] = executor(i); + res[i] = executor(offset); } }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, elements); + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + pointer, + elements); + } else { + processed_size = ProcessDataChunks(execute_sub_batch, + std::nullptr_t{}, + res, + valid_res, + pointer, + elements); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -487,8 +595,10 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll() { } VectorPtr -PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType() { - auto real_batch_size = GetNextBatchSize(); +PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType( + OffsetVector* input) { + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -509,110 +619,126 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType() { } auto execute_sub_batch = - [](const milvus::Json* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - const std::string& pointer, - const std::vector& elements, - const std::unordered_set elements_index) { - auto executor = [&](size_t i) -> bool { - const auto& json = data[i]; - auto doc = json.doc(); - auto array = doc.at_pointer(pointer).get_array(); - if (array.error()) { - return false; - } - std::unordered_set tmp_elements_index(elements_index); - for (auto&& it : array) { - int i = -1; - for (auto& element : elements) { - i++; - switch (element.val_case()) { - case proto::plan::GenericValue::kBoolVal: { - auto val = it.template get(); - if (val.error()) { - continue; - } - if (val.value() == element.bool_val()) { - tmp_elements_index.erase(i); - } - break; + []( + const milvus::Json* data, + const bool* valid_data, + const int32_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + const std::string& pointer, + const std::vector& elements, + const std::unordered_set elements_index) { + auto executor = [&](size_t i) -> bool { + const auto& json = data[i]; + auto doc = json.doc(); + auto array = doc.at_pointer(pointer).get_array(); + if (array.error()) { + return false; + } + std::unordered_set tmp_elements_index(elements_index); + for (auto&& it : array) { + int i = -1; + for (auto& element : elements) { + i++; + switch (element.val_case()) { + case proto::plan::GenericValue::kBoolVal: { + auto val = it.template get(); + if (val.error()) { + continue; + } + if (val.value() == element.bool_val()) { + tmp_elements_index.erase(i); } - case proto::plan::GenericValue::kInt64Val: { - auto val = it.template get(); - if (val.error()) { - continue; - } - if (val.value() == element.int64_val()) { - tmp_elements_index.erase(i); - } - break; + break; + } + case proto::plan::GenericValue::kInt64Val: { + auto val = it.template get(); + if (val.error()) { + continue; } - case proto::plan::GenericValue::kFloatVal: { - auto val = it.template get(); - if (val.error()) { - continue; - } - if (val.value() == element.float_val()) { - tmp_elements_index.erase(i); - } - break; + if (val.value() == element.int64_val()) { + tmp_elements_index.erase(i); } - case proto::plan::GenericValue::kStringVal: { - auto val = it.template get(); - if (val.error()) { - continue; - } - if (val.value() == element.string_val()) { - tmp_elements_index.erase(i); - } - break; + break; + } + case proto::plan::GenericValue::kFloatVal: { + auto val = it.template get(); + if (val.error()) { + continue; } - case proto::plan::GenericValue::kArrayVal: { - auto val = it.get_array(); - if (val.error()) { - continue; - } - if (CompareTwoJsonArray(val, - element.array_val())) { - tmp_elements_index.erase(i); - } - break; + if (val.value() == element.float_val()) { + tmp_elements_index.erase(i); } - default: - PanicInfo( - DataTypeInvalid, - fmt::format("unsupported data type {}", - element.val_case())); + break; } - if (tmp_elements_index.size() == 0) { - return true; + case proto::plan::GenericValue::kStringVal: { + auto val = it.template get(); + if (val.error()) { + continue; + } + if (val.value() == element.string_val()) { + tmp_elements_index.erase(i); + } + break; } + case proto::plan::GenericValue::kArrayVal: { + auto val = it.get_array(); + if (val.error()) { + continue; + } + if (CompareTwoJsonArray(val, element.array_val())) { + tmp_elements_index.erase(i); + } + break; + } + default: + PanicInfo(DataTypeInvalid, + fmt::format("unsupported data type {}", + element.val_case())); } if (tmp_elements_index.size() == 0) { return true; } } - return tmp_elements_index.size() == 0; - }; - for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { - res[i] = valid_res[i] = false; - continue; + if (tmp_elements_index.size() == 0) { + return true; } - res[i] = executor(i); } + return tmp_elements_index.size() == 0; }; + for (size_t i = 0; i < size; ++i) { + auto offset = i; + if constexpr (filter_type == FilterType::random) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { + res[i] = valid_res[i] = false; + continue; + } + res[i] = executor(offset); + } + }; - int64_t processed_size = ProcessDataChunks(execute_sub_batch, - std::nullptr_t{}, - res, - valid_res, - pointer, - elements, - elements_index); + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + pointer, + elements, + elements_index); + } else { + processed_size = ProcessDataChunks(execute_sub_batch, + std::nullptr_t{}, + res, + valid_res, + pointer, + elements, + elements_index); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -622,8 +748,9 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType() { } VectorPtr -PhyJsonContainsFilterExpr::ExecJsonContainsAllArray() { - auto real_batch_size = GetNextBatchSize(); +PhyJsonContainsFilterExpr::ExecJsonContainsAllArray(OffsetVector* input) { + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -641,54 +768,75 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllArray() { elements.emplace_back(GetValueFromProto(element)); } auto execute_sub_batch = - [](const milvus::Json* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - const std::string& pointer, - const std::vector& elements) { - auto executor = [&](const size_t i) { - auto doc = data[i].doc(); - auto array = doc.at_pointer(pointer).get_array(); - if (array.error()) { - return false; + []( + const milvus::Json* data, + const bool* valid_data, + const int32_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + const std::string& pointer, + const std::vector& elements) { + auto executor = [&](const size_t i) { + auto doc = data[i].doc(); + auto array = doc.at_pointer(pointer).get_array(); + if (array.error()) { + return false; + } + std::unordered_set exist_elements_index; + for (auto&& it : array) { + auto val = it.get_array(); + if (val.error()) { + continue; } - std::unordered_set exist_elements_index; - for (auto&& it : array) { - auto val = it.get_array(); - if (val.error()) { - continue; - } - std::vector< - simdjson::simdjson_result> - json_array; - json_array.reserve(val.count_elements()); - for (auto&& e : val) { - json_array.emplace_back(e); - } - for (int index = 0; index < elements.size(); ++index) { - if (CompareTwoJsonArray(json_array, elements[index])) { - exist_elements_index.insert(index); - } - } - if (exist_elements_index.size() == elements.size()) { - return true; + std::vector< + simdjson::simdjson_result> + json_array; + json_array.reserve(val.count_elements()); + for (auto&& e : val) { + json_array.emplace_back(e); + } + for (int index = 0; index < elements.size(); ++index) { + if (CompareTwoJsonArray(json_array, elements[index])) { + exist_elements_index.insert(index); } } - return exist_elements_index.size() == elements.size(); - }; - for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { - res[i] = valid_res[i] = false; - continue; + if (exist_elements_index.size() == elements.size()) { + return true; } - res[i] = executor(i); } + return exist_elements_index.size() == elements.size(); }; + for (size_t i = 0; i < size; ++i) { + auto offset = i; + if constexpr (filter_type == FilterType::random) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { + res[i] = valid_res[i] = false; + continue; + } + res[i] = executor(offset); + } + }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, elements); + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + pointer, + elements); + } else { + processed_size = ProcessDataChunks(execute_sub_batch, + std::nullptr_t{}, + res, + valid_res, + pointer, + elements); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -698,8 +846,9 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllArray() { } VectorPtr -PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType() { - auto real_batch_size = GetNextBatchSize(); +PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType(OffsetVector* input) { + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -721,96 +870,115 @@ PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType() { } auto execute_sub_batch = - [](const milvus::Json* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - const std::string& pointer, - const std::vector& elements) { - auto executor = [&](const size_t i) { - auto& json = data[i]; - auto doc = json.doc(); - auto array = doc.at_pointer(pointer).get_array(); - if (array.error()) { - return false; - } - // Note: array can only be iterated once - for (auto&& it : array) { - for (auto const& element : elements) { - switch (element.val_case()) { - case proto::plan::GenericValue::kBoolVal: { - auto val = it.template get(); - if (val.error()) { - continue; - } - if (val.value() == element.bool_val()) { - return true; - } - break; + []( + const milvus::Json* data, + const bool* valid_data, + const int32_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + const std::string& pointer, + const std::vector& elements) { + auto executor = [&](const size_t i) { + auto& json = data[i]; + auto doc = json.doc(); + auto array = doc.at_pointer(pointer).get_array(); + if (array.error()) { + return false; + } + // Note: array can only be iterated once + for (auto&& it : array) { + for (auto const& element : elements) { + switch (element.val_case()) { + case proto::plan::GenericValue::kBoolVal: { + auto val = it.template get(); + if (val.error()) { + continue; } - case proto::plan::GenericValue::kInt64Val: { - auto val = it.template get(); - if (val.error()) { - continue; - } - if (val.value() == element.int64_val()) { - return true; - } - break; + if (val.value() == element.bool_val()) { + return true; } - case proto::plan::GenericValue::kFloatVal: { - auto val = it.template get(); - if (val.error()) { - continue; - } - if (val.value() == element.float_val()) { - return true; - } - break; + break; + } + case proto::plan::GenericValue::kInt64Val: { + auto val = it.template get(); + if (val.error()) { + continue; } - case proto::plan::GenericValue::kStringVal: { - auto val = it.template get(); - if (val.error()) { - continue; - } - if (val.value() == element.string_val()) { - return true; - } - break; + if (val.value() == element.int64_val()) { + return true; } - case proto::plan::GenericValue::kArrayVal: { - auto val = it.get_array(); - if (val.error()) { - continue; - } - if (CompareTwoJsonArray(val, - element.array_val())) { - return true; - } - break; + break; + } + case proto::plan::GenericValue::kFloatVal: { + auto val = it.template get(); + if (val.error()) { + continue; + } + if (val.value() == element.float_val()) { + return true; + } + break; + } + case proto::plan::GenericValue::kStringVal: { + auto val = it.template get(); + if (val.error()) { + continue; + } + if (val.value() == element.string_val()) { + return true; } - default: - PanicInfo( - DataTypeInvalid, - fmt::format("unsupported data type {}", - element.val_case())); + break; } + case proto::plan::GenericValue::kArrayVal: { + auto val = it.get_array(); + if (val.error()) { + continue; + } + if (CompareTwoJsonArray(val, element.array_val())) { + return true; + } + break; + } + default: + PanicInfo(DataTypeInvalid, + fmt::format("unsupported data type {}", + element.val_case())); } } - return false; - }; - for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { - res[i] = valid_res[i] = false; - continue; - } - res[i] = executor(i); } + return false; }; + for (size_t i = 0; i < size; ++i) { + auto offset = i; + if constexpr (filter_type == FilterType::random) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { + res[i] = valid_res[i] = false; + continue; + } + res[i] = executor(offset); + } + }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, elements); + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + pointer, + elements); + } else { + processed_size = ProcessDataChunks(execute_sub_batch, + std::nullptr_t{}, + res, + valid_res, + pointer, + elements); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", diff --git a/internal/core/src/exec/expression/JsonContainsExpr.h b/internal/core/src/exec/expression/JsonContainsExpr.h index a0cfdfdea0841..a0c8848cba188 100644 --- a/internal/core/src/exec/expression/JsonContainsExpr.h +++ b/internal/core/src/exec/expression/JsonContainsExpr.h @@ -50,35 +50,35 @@ class PhyJsonContainsFilterExpr : public SegmentExpr { private: VectorPtr - EvalJsonContainsForDataSegment(); + EvalJsonContainsForDataSegment(OffsetVector* input = nullptr); template VectorPtr - ExecJsonContains(); + ExecJsonContains(OffsetVector* input = nullptr); template VectorPtr - ExecArrayContains(); + ExecArrayContains(OffsetVector* input = nullptr); template VectorPtr - ExecJsonContainsAll(); + ExecJsonContainsAll(OffsetVector* input = nullptr); template VectorPtr - ExecArrayContainsAll(); + ExecArrayContainsAll(OffsetVector* input = nullptr); VectorPtr - ExecJsonContainsArray(); + ExecJsonContainsArray(OffsetVector* input = nullptr); VectorPtr - ExecJsonContainsAllArray(); + ExecJsonContainsAllArray(OffsetVector* input = nullptr); VectorPtr - ExecJsonContainsAllWithDiffType(); + ExecJsonContainsAllWithDiffType(OffsetVector* input = nullptr); VectorPtr - ExecJsonContainsWithDiffType(); + ExecJsonContainsWithDiffType(OffsetVector* input = nullptr); VectorPtr EvalArrayContainsForIndexSegment(); diff --git a/internal/core/src/exec/expression/LogicalBinaryExpr.h b/internal/core/src/exec/expression/LogicalBinaryExpr.h index 43680772fbbf1..4db60df939388 100644 --- a/internal/core/src/exec/expression/LogicalBinaryExpr.h +++ b/internal/core/src/exec/expression/LogicalBinaryExpr.h @@ -75,8 +75,16 @@ class PhyLogicalBinaryExpr : public Expr { void MoveCursor() override { - inputs_[0]->MoveCursor(); - inputs_[1]->MoveCursor(); + if (!has_offset_input_) { + inputs_[0]->MoveCursor(); + inputs_[1]->MoveCursor(); + } + } + + bool + SupportOffsetInput() override { + return inputs_[0]->SupportOffsetInput() && + inputs_[1]->SupportOffsetInput(); } private: diff --git a/internal/core/src/exec/expression/LogicalUnaryExpr.h b/internal/core/src/exec/expression/LogicalUnaryExpr.h index da5a0e0c97213..712774567010d 100644 --- a/internal/core/src/exec/expression/LogicalUnaryExpr.h +++ b/internal/core/src/exec/expression/LogicalUnaryExpr.h @@ -41,7 +41,14 @@ class PhyLogicalUnaryExpr : public Expr { void MoveCursor() override { - inputs_[0]->MoveCursor(); + if (!has_offset_input_) { + inputs_[0]->MoveCursor(); + } + } + + bool + SupportOffsetInput() override { + return inputs_[0]->SupportOffsetInput(); } private: diff --git a/internal/core/src/exec/expression/TermExpr.cpp b/internal/core/src/exec/expression/TermExpr.cpp index 68d36d0583238..a25b06c206026 100644 --- a/internal/core/src/exec/expression/TermExpr.cpp +++ b/internal/core/src/exec/expression/TermExpr.cpp @@ -24,37 +24,39 @@ namespace exec { void PhyTermFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { - if (is_pk_field_) { + auto input = context.get_offset_input(); + SetHasOffsetInput((input != nullptr)); + if (is_pk_field_ && !has_offset_input_) { result = ExecPkTermImpl(); return; } switch (expr_->column_.data_type_) { case DataType::BOOL: { - result = ExecVisitorImpl(); + result = ExecVisitorImpl(input); break; } case DataType::INT8: { - result = ExecVisitorImpl(); + result = ExecVisitorImpl(input); break; } case DataType::INT16: { - result = ExecVisitorImpl(); + result = ExecVisitorImpl(input); break; } case DataType::INT32: { - result = ExecVisitorImpl(); + result = ExecVisitorImpl(input); break; } case DataType::INT64: { - result = ExecVisitorImpl(); + result = ExecVisitorImpl(input); break; } case DataType::FLOAT: { - result = ExecVisitorImpl(); + result = ExecVisitorImpl(input); break; } case DataType::DOUBLE: { - result = ExecVisitorImpl(); + result = ExecVisitorImpl(input); break; } case DataType::VARCHAR: { @@ -62,30 +64,30 @@ PhyTermFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { !storage::MmapManager::GetInstance() .GetMmapConfig() .growing_enable_mmap) { - result = ExecVisitorImpl(); + result = ExecVisitorImpl(input); } else { - result = ExecVisitorImpl(); + result = ExecVisitorImpl(input); } break; } case DataType::JSON: { if (expr_->vals_.size() == 0) { - result = ExecVisitorImplTemplateJson(); + result = ExecVisitorImplTemplateJson(input); break; } auto type = expr_->vals_[0].val_case(); switch (type) { case proto::plan::GenericValue::ValCase::kBoolVal: - result = ExecVisitorImplTemplateJson(); + result = ExecVisitorImplTemplateJson(input); break; case proto::plan::GenericValue::ValCase::kInt64Val: - result = ExecVisitorImplTemplateJson(); + result = ExecVisitorImplTemplateJson(input); break; case proto::plan::GenericValue::ValCase::kFloatVal: - result = ExecVisitorImplTemplateJson(); + result = ExecVisitorImplTemplateJson(input); break; case proto::plan::GenericValue::ValCase::kStringVal: - result = ExecVisitorImplTemplateJson(); + result = ExecVisitorImplTemplateJson(input); break; default: PanicInfo(DataTypeInvalid, "unknown data type: {}", type); @@ -95,26 +97,26 @@ PhyTermFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { case DataType::ARRAY: { if (expr_->vals_.size() == 0) { SetNotUseIndex(); - result = ExecVisitorImplTemplateArray(); + result = ExecVisitorImplTemplateArray(input); break; } auto type = expr_->vals_[0].val_case(); switch (type) { case proto::plan::GenericValue::ValCase::kBoolVal: SetNotUseIndex(); - result = ExecVisitorImplTemplateArray(); + result = ExecVisitorImplTemplateArray(input); break; case proto::plan::GenericValue::ValCase::kInt64Val: SetNotUseIndex(); - result = ExecVisitorImplTemplateArray(); + result = ExecVisitorImplTemplateArray(input); break; case proto::plan::GenericValue::ValCase::kFloatVal: SetNotUseIndex(); - result = ExecVisitorImplTemplateArray(); + result = ExecVisitorImplTemplateArray(input); break; case proto::plan::GenericValue::ValCase::kStringVal: SetNotUseIndex(); - result = ExecVisitorImplTemplateArray(); + result = ExecVisitorImplTemplateArray(input); break; default: PanicInfo(DataTypeInvalid, "unknown data type: {}", type); @@ -230,31 +232,32 @@ PhyTermFilterExpr::ExecPkTermImpl() { template VectorPtr -PhyTermFilterExpr::ExecVisitorImplTemplateJson() { +PhyTermFilterExpr::ExecVisitorImplTemplateJson(OffsetVector* input) { if (expr_->is_in_field_) { - return ExecTermJsonVariableInField(); + return ExecTermJsonVariableInField(input); } else { - return ExecTermJsonFieldInVariable(); + return ExecTermJsonFieldInVariable(input); } } template VectorPtr -PhyTermFilterExpr::ExecVisitorImplTemplateArray() { +PhyTermFilterExpr::ExecVisitorImplTemplateArray(OffsetVector* input) { if (expr_->is_in_field_) { - return ExecTermArrayVariableInField(); + return ExecTermArrayVariableInField(input); } else { - return ExecTermArrayFieldInVariable(); + return ExecTermArrayFieldInVariable(input); } } template VectorPtr -PhyTermFilterExpr::ExecTermArrayVariableInField() { +PhyTermFilterExpr::ExecTermArrayVariableInField(OffsetVector* input) { using GetType = std::conditional_t, std::string_view, ValueType>; - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -269,15 +272,18 @@ PhyTermFilterExpr::ExecTermArrayVariableInField() { "element length in json array must be one"); ValueType target_val = GetValueFromProto(expr_->vals_[0]); - auto execute_sub_batch = [](const ArrayView* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - const ValueType& target_val) { - auto executor = [&](size_t idx) { - for (int i = 0; i < data[idx].length(); i++) { - auto val = data[idx].template get_data(i); + auto execute_sub_batch = + []( + const ArrayView* data, + const bool* valid_data, + const int32_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + const ValueType& target_val) { + auto executor = [&](size_t offset) { + for (int i = 0; i < data[offset].length(); i++) { + auto val = data[offset].template get_data(i); if (val == target_val) { return true; } @@ -285,16 +291,31 @@ PhyTermFilterExpr::ExecTermArrayVariableInField() { return false; }; for (int i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::random) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } - res[i] = executor(i); + res[i] = executor(offset); } }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, target_val); + int64_t processed_size; + if (has_offset_input_) { + processed_size = + ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + target_val); + } else { + processed_size = ProcessDataChunks( + execute_sub_batch, std::nullptr_t{}, res, valid_res, target_val); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -305,12 +326,13 @@ PhyTermFilterExpr::ExecTermArrayVariableInField() { template VectorPtr -PhyTermFilterExpr::ExecTermArrayFieldInVariable() { +PhyTermFilterExpr::ExecTermArrayFieldInVariable(OffsetVector* input) { using GetType = std::conditional_t, std::string_view, ValueType>; - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -336,29 +358,52 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable() { return res_vec; } - auto execute_sub_batch = [](const ArrayView* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - int index, - const std::unordered_set& term_set) { + auto execute_sub_batch = + []( + const ArrayView* data, + const bool* valid_data, + const int32_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + int index, + const std::unordered_set& term_set) { for (int i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::random) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } - if (term_set.empty() || index >= data[i].length()) { + if (term_set.empty() || index >= data[offset].length()) { res[i] = false; continue; } - auto value = data[i].get_data(index); + auto value = data[offset].get_data(index); res[i] = term_set.find(ValueType(value)) != term_set.end(); } }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, index, term_set); + int64_t processed_size; + if (has_offset_input_) { + processed_size = + ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + index, + term_set); + } else { + processed_size = ProcessDataChunks(execute_sub_batch, + std::nullptr_t{}, + res, + valid_res, + index, + term_set); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -369,11 +414,12 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable() { template VectorPtr -PhyTermFilterExpr::ExecTermJsonVariableInField() { +PhyTermFilterExpr::ExecTermJsonVariableInField(OffsetVector* input) { using GetType = std::conditional_t, std::string_view, ValueType>; - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -389,13 +435,16 @@ PhyTermFilterExpr::ExecTermJsonVariableInField() { ValueType val = GetValueFromProto(expr_->vals_[0]); auto pointer = milvus::Json::pointer(expr_->column_.nested_path_); - auto execute_sub_batch = [](const Json* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - const std::string pointer, - const ValueType& target_val) { + auto execute_sub_batch = + []( + const Json* data, + const bool* valid_data, + const int32_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + const std::string pointer, + const ValueType& target_val) { auto executor = [&](size_t i) { auto doc = data[i].doc(); auto array = doc.at_pointer(pointer).get_array(); @@ -413,15 +462,30 @@ PhyTermFilterExpr::ExecTermJsonVariableInField() { return false; }; for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::random) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } - res[i] = executor(i); + res[i] = executor(offset); } }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, val); + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + pointer, + val); + } else { + processed_size = ProcessDataChunks( + execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, val); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -432,11 +496,12 @@ PhyTermFilterExpr::ExecTermJsonVariableInField() { template VectorPtr -PhyTermFilterExpr::ExecTermJsonFieldInVariable() { +PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) { using GetType = std::conditional_t, std::string_view, ValueType>; - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -459,13 +524,16 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable() { return res_vec; } - auto execute_sub_batch = [](const Json* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - const std::string pointer, - const std::unordered_set& terms) { + auto execute_sub_batch = + []( + const Json* data, + const bool* valid_data, + const int32_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + const std::string pointer, + const std::unordered_set& terms) { auto executor = [&](size_t i) { auto x = data[i].template at(pointer); if (x.error()) { @@ -485,7 +553,11 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable() { return terms.find(ValueType(x.value())) != terms.end(); }; for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::random) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } @@ -493,11 +565,26 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable() { res[i] = false; continue; } - res[i] = executor(i); + res[i] = executor(offset); } }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, term_set); + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + pointer, + term_set); + } else { + processed_size = ProcessDataChunks(execute_sub_batch, + std::nullptr_t{}, + res, + valid_res, + pointer, + term_set); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -508,17 +595,17 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable() { template VectorPtr -PhyTermFilterExpr::ExecVisitorImpl() { - if (is_index_mode_) { - return ExecVisitorImplForIndex(); +PhyTermFilterExpr::ExecVisitorImpl(OffsetVector* input) { + if (is_index_mode_ && !has_offset_input_) { + return ExecVisitorImplForIndex(input); } else { - return ExecVisitorImplForData(); + return ExecVisitorImplForData(input); } } template VectorPtr -PhyTermFilterExpr::ExecVisitorImplForIndex() { +PhyTermFilterExpr::ExecVisitorImplForIndex(OffsetVector* input) { typedef std:: conditional_t, std::string, T> IndexInnerType; @@ -553,7 +640,7 @@ PhyTermFilterExpr::ExecVisitorImplForIndex() { template <> VectorPtr -PhyTermFilterExpr::ExecVisitorImplForIndex() { +PhyTermFilterExpr::ExecVisitorImplForIndex(OffsetVector* input) { using Index = index::ScalarIndex; auto real_batch_size = GetNextBatchSize(); if (real_batch_size == 0) { @@ -575,8 +662,9 @@ PhyTermFilterExpr::ExecVisitorImplForIndex() { template VectorPtr -PhyTermFilterExpr::ExecVisitorImplForData() { - auto real_batch_size = GetNextBatchSize(); +PhyTermFilterExpr::ExecVisitorImplForData(OffsetVector* input) { + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -597,23 +685,40 @@ PhyTermFilterExpr::ExecVisitorImplForData() { } } std::unordered_set vals_set(vals.begin(), vals.end()); - auto execute_sub_batch = [](const T* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - const std::unordered_set& vals) { + auto execute_sub_batch = + []( + const T* data, + const bool* valid_data, + const int32_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + const std::unordered_set& vals) { TermElementFuncSet func; for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::random) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } - res[i] = func(vals, data[i]); + res[i] = func(vals, data[offset]); } }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, vals_set); + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + vals_set); + } else { + processed_size = ProcessDataChunks( + execute_sub_batch, std::nullptr_t{}, res, valid_res, vals_set); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", diff --git a/internal/core/src/exec/expression/TermExpr.h b/internal/core/src/exec/expression/TermExpr.h index a816c6c9c6153..19f03b131b9c3 100644 --- a/internal/core/src/exec/expression/TermExpr.h +++ b/internal/core/src/exec/expression/TermExpr.h @@ -83,39 +83,39 @@ class PhyTermFilterExpr : public SegmentExpr { template VectorPtr - ExecVisitorImpl(); + ExecVisitorImpl(OffsetVector* input = nullptr); template VectorPtr - ExecVisitorImplForIndex(); + ExecVisitorImplForIndex(OffsetVector* input = nullptr); template VectorPtr - ExecVisitorImplForData(); + ExecVisitorImplForData(OffsetVector* input = nullptr); template VectorPtr - ExecVisitorImplTemplateJson(); + ExecVisitorImplTemplateJson(OffsetVector* input = nullptr); template VectorPtr - ExecTermJsonVariableInField(); + ExecTermJsonVariableInField(OffsetVector* input = nullptr); template VectorPtr - ExecTermJsonFieldInVariable(); + ExecTermJsonFieldInVariable(OffsetVector* input = nullptr); template VectorPtr - ExecVisitorImplTemplateArray(); + ExecVisitorImplTemplateArray(OffsetVector* input = nullptr); template VectorPtr - ExecTermArrayVariableInField(); + ExecTermArrayVariableInField(OffsetVector* input = nullptr); template VectorPtr - ExecTermArrayFieldInVariable(); + ExecTermArrayFieldInVariable(OffsetVector* input = nullptr); private: std::shared_ptr expr_; diff --git a/internal/core/src/exec/expression/UnaryExpr.cpp b/internal/core/src/exec/expression/UnaryExpr.cpp index a6d27393e7e44..dfcbfe7b12058 100644 --- a/internal/core/src/exec/expression/UnaryExpr.cpp +++ b/internal/core/src/exec/expression/UnaryExpr.cpp @@ -121,7 +121,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArrayForIndex< case DataType::FLOAT: case DataType::DOUBLE: { // not accurate on floating point number, rollback to bruteforce. - return ExecRangeVisitorImplArray(); + return ExecRangeVisitorImplArray( + nullptr); } case DataType::VARCHAR: { if (segment_->type() == SegmentType::Growing) { @@ -146,33 +147,35 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArrayForIndex< void PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { + auto input = context.get_offset_input(); + SetHasOffsetInput((input != nullptr)); switch (expr_->column_.data_type_) { case DataType::BOOL: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::INT8: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::INT16: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::INT32: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::INT64: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::FLOAT: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::DOUBLE: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::VARCHAR: { @@ -180,9 +183,9 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { !storage::MmapManager::GetInstance() .GetMmapConfig() .growing_enable_mmap) { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); } else { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); } break; } @@ -190,19 +193,20 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { auto val_type = expr_->val_.val_case(); switch (val_type) { case proto::plan::GenericValue::ValCase::kBoolVal: - result = ExecRangeVisitorImplJson(); + result = ExecRangeVisitorImplJson(input); break; case proto::plan::GenericValue::ValCase::kInt64Val: - result = ExecRangeVisitorImplJson(); + result = ExecRangeVisitorImplJson(input); break; case proto::plan::GenericValue::ValCase::kFloatVal: - result = ExecRangeVisitorImplJson(); + result = ExecRangeVisitorImplJson(input); break; case proto::plan::GenericValue::ValCase::kStringVal: - result = ExecRangeVisitorImplJson(); + result = ExecRangeVisitorImplJson(input); break; case proto::plan::GenericValue::ValCase::kArrayVal: - result = ExecRangeVisitorImplJson(); + result = + ExecRangeVisitorImplJson(input); break; default: PanicInfo( @@ -215,27 +219,28 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { switch (val_type) { case proto::plan::GenericValue::ValCase::kBoolVal: SetNotUseIndex(); - result = ExecRangeVisitorImplArray(); + result = ExecRangeVisitorImplArray(input); break; case proto::plan::GenericValue::ValCase::kInt64Val: SetNotUseIndex(); - result = ExecRangeVisitorImplArray(); + result = ExecRangeVisitorImplArray(input); break; case proto::plan::GenericValue::ValCase::kFloatVal: SetNotUseIndex(); - result = ExecRangeVisitorImplArray(); + result = ExecRangeVisitorImplArray(input); break; case proto::plan::GenericValue::ValCase::kStringVal: SetNotUseIndex(); - result = ExecRangeVisitorImplArray(); + result = ExecRangeVisitorImplArray(input); break; case proto::plan::GenericValue::ValCase::kArrayVal: - if (CanUseIndexForArray()) { + if (!has_offset_input_ && + CanUseIndexForArray()) { result = ExecRangeVisitorImplArrayForIndex< proto::plan::Array>(); } else { - result = - ExecRangeVisitorImplArray(); + result = ExecRangeVisitorImplArray( + input); } break; default: @@ -253,11 +258,12 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { template VectorPtr -PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray() { +PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) { using GetType = std::conditional_t, std::string_view, ValueType>; - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -273,51 +279,120 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray() { if (expr_->column_.nested_path_.size() > 0) { index = std::stoi(expr_->column_.nested_path_[0]); } - auto execute_sub_batch = [op_type](const milvus::ArrayView* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - ValueType val, - int index) { + auto execute_sub_batch = [op_type]( + const milvus::ArrayView* data, + const bool* valid_data, + const int32_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + ValueType val, + int index) { switch (op_type) { case proto::plan::GreaterThan: { - UnaryElementFuncForArray + UnaryElementFuncForArray func; - func(data, valid_data, size, val, index, res, valid_res); + func(data, + valid_data, + size, + val, + index, + res, + valid_res, + offsets); break; } case proto::plan::GreaterEqual: { - UnaryElementFuncForArray + UnaryElementFuncForArray func; - func(data, valid_data, size, val, index, res, valid_res); + func(data, + valid_data, + size, + val, + index, + res, + valid_res, + offsets); break; } case proto::plan::LessThan: { - UnaryElementFuncForArray func; - func(data, valid_data, size, val, index, res, valid_res); + UnaryElementFuncForArray + func; + func(data, + valid_data, + size, + val, + index, + res, + valid_res, + offsets); break; } case proto::plan::LessEqual: { - UnaryElementFuncForArray + UnaryElementFuncForArray func; - func(data, valid_data, size, val, index, res, valid_res); + func(data, + valid_data, + size, + val, + index, + res, + valid_res, + offsets); break; } case proto::plan::Equal: { - UnaryElementFuncForArray func; - func(data, valid_data, size, val, index, res, valid_res); + UnaryElementFuncForArray + func; + func(data, + valid_data, + size, + val, + index, + res, + valid_res, + offsets); break; } case proto::plan::NotEqual: { - UnaryElementFuncForArray func; - func(data, valid_data, size, val, index, res, valid_res); + UnaryElementFuncForArray + func; + func(data, + valid_data, + size, + val, + index, + res, + valid_res, + offsets); break; } case proto::plan::PrefixMatch: { - UnaryElementFuncForArray + UnaryElementFuncForArray func; - func(data, valid_data, size, val, index, res, valid_res); + func(data, + valid_data, + size, + val, + index, + res, + valid_res, + offsets); break; } case proto::plan::Match: { @@ -332,8 +407,20 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray() { op_type)); } }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, val, index); + int64_t processed_size; + if (has_offset_input_) { + processed_size = + ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + val, + index); + } else { + processed_size = ProcessDataChunks( + execute_sub_batch, std::nullptr_t{}, res, valid_res, val, index); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -454,12 +541,13 @@ PhyUnaryRangeFilterExpr::ExecArrayEqualForIndex(bool reverse) { template VectorPtr -PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() { +PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) { using GetType = std::conditional_t, std::string_view, ExprValueType>; - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -473,46 +561,53 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() { auto op_type = expr_->op_type_; auto pointer = milvus::Json::pointer(expr_->column_.nested_path_); -#define UnaryRangeJSONCompare(cmp) \ - do { \ - auto x = data[i].template at(pointer); \ - if (x.error()) { \ - if constexpr (std::is_same_v) { \ - auto x = data[i].template at(pointer); \ - res[i] = !x.error() && (cmp); \ - break; \ - } \ - res[i] = false; \ - break; \ - } \ - res[i] = (cmp); \ +#define UnaryRangeJSONCompare(cmp) \ + do { \ + auto x = data[offset].template at(pointer); \ + if (x.error()) { \ + if constexpr (std::is_same_v) { \ + auto x = data[offset].template at(pointer); \ + res[i] = !x.error() && (cmp); \ + break; \ + } \ + res[i] = false; \ + break; \ + } \ + res[i] = (cmp); \ } while (false) -#define UnaryRangeJSONCompareNotEqual(cmp) \ - do { \ - auto x = data[i].template at(pointer); \ - if (x.error()) { \ - if constexpr (std::is_same_v) { \ - auto x = data[i].template at(pointer); \ - res[i] = x.error() || (cmp); \ - break; \ - } \ - res[i] = true; \ - break; \ - } \ - res[i] = (cmp); \ +#define UnaryRangeJSONCompareNotEqual(cmp) \ + do { \ + auto x = data[offset].template at(pointer); \ + if (x.error()) { \ + if constexpr (std::is_same_v) { \ + auto x = data[offset].template at(pointer); \ + res[i] = x.error() || (cmp); \ + break; \ + } \ + res[i] = true; \ + break; \ + } \ + res[i] = (cmp); \ } while (false) - auto execute_sub_batch = [op_type, pointer](const milvus::Json* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - ExprValueType val) { + auto execute_sub_batch = + [ op_type, pointer ]( + const milvus::Json* data, + const bool* valid_data, + const int32_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + ExprValueType val) { switch (op_type) { case proto::plan::GreaterThan: { for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::random) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } @@ -526,7 +621,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() { } case proto::plan::GreaterEqual: { for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::random) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } @@ -540,7 +639,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() { } case proto::plan::LessThan: { for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::random) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } @@ -554,7 +657,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() { } case proto::plan::LessEqual: { for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::random) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } @@ -568,7 +675,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() { } case proto::plan::Equal: { for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::random) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } @@ -588,7 +699,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() { } case proto::plan::NotEqual: { for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::random) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } @@ -608,7 +723,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() { } case proto::plan::PrefixMatch: { for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::random) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } @@ -626,7 +745,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() { auto regex_pattern = translator(val); RegexMatcher matcher(regex_pattern); for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::random) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } @@ -646,8 +769,15 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() { op_type)); } }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, val); + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessDataByOffsets( + execute_sub_batch, std::nullptr_t{}, input, res, valid_res, val); + + } else { + processed_size = ProcessDataChunks( + execute_sub_batch, std::nullptr_t{}, res, valid_res, val); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -658,15 +788,20 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() { template VectorPtr -PhyUnaryRangeFilterExpr::ExecRangeVisitorImpl() { +PhyUnaryRangeFilterExpr::ExecRangeVisitorImpl(OffsetVector* input) { if (expr_->op_type_ == proto::plan::OpType::TextMatch) { + if (has_offset_input_) { + PanicInfo( + OpTypeInvalid, + fmt::format("text match does not support iterative filter")); + } return ExecTextMatch(); } - if (CanUseIndex()) { + if (CanUseIndex() && !has_offset_input_) { return ExecRangeVisitorImplForIndex(); } else { - return ExecRangeVisitorImplForData(); + return ExecRangeVisitorImplForData(input); } } @@ -749,17 +884,24 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForIndex() { template ColumnVectorPtr -PhyUnaryRangeFilterExpr::PreCheckOverflow() { +PhyUnaryRangeFilterExpr::PreCheckOverflow(OffsetVector* input) { if constexpr (std::is_integral_v && !std::is_same_v) { int64_t val = GetValueFromProto(expr_->val_); if (milvus::query::out_of_range(val)) { - int64_t batch_size = - overflow_check_pos_ + batch_size_ >= active_count_ - ? active_count_ - overflow_check_pos_ - : batch_size_; - overflow_check_pos_ += batch_size; - auto valid = ProcessChunksForValid(CanUseIndex()); + int64_t batch_size; + if (input != nullptr) { + batch_size = input->size(); + } else { + batch_size = overflow_check_pos_ + batch_size_ >= active_count_ + ? active_count_ - overflow_check_pos_ + : batch_size_; + overflow_check_pos_ += batch_size; + } + auto valid = (input != nullptr) + ? ProcessChunksForValidByOffsets( + CanUseIndex(), *input) + : ProcessChunksForValid(CanUseIndex()); auto res_vec = std::make_shared( TargetBitmap(batch_size), std::move(valid)); TargetBitmapView res(res_vec->GetRawData(), batch_size); @@ -805,18 +947,20 @@ PhyUnaryRangeFilterExpr::PreCheckOverflow() { template VectorPtr -PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData() { +PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) { typedef std:: conditional_t, std::string, T> IndexInnerType; - if (auto res = PreCheckOverflow()) { + if (auto res = PreCheckOverflow(input)) { return res; } - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } + IndexInnerType val = GetValueFromProto(expr_->val_); auto res_vec = std::make_shared( TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); @@ -824,51 +968,56 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData() { TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); valid_res.set(); auto expr_type = expr_->op_type_; - auto execute_sub_batch = [expr_type](const T* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - IndexInnerType val) { + + auto execute_sub_batch = [expr_type]( + const T* data, + const bool* valid_data, + const int32_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + IndexInnerType val) { switch (expr_type) { case proto::plan::GreaterThan: { - UnaryElementFunc func; - func(data, size, val, res); + UnaryElementFunc func; + func(data, size, val, res, offsets); break; } case proto::plan::GreaterEqual: { - UnaryElementFunc func; - func(data, size, val, res); + UnaryElementFunc + func; + func(data, size, val, res, offsets); break; } case proto::plan::LessThan: { - UnaryElementFunc func; - func(data, size, val, res); + UnaryElementFunc func; + func(data, size, val, res, offsets); break; } case proto::plan::LessEqual: { - UnaryElementFunc func; - func(data, size, val, res); + UnaryElementFunc func; + func(data, size, val, res, offsets); break; } case proto::plan::Equal: { - UnaryElementFunc func; - func(data, size, val, res); + UnaryElementFunc func; + func(data, size, val, res, offsets); break; } case proto::plan::NotEqual: { - UnaryElementFunc func; - func(data, size, val, res); + UnaryElementFunc func; + func(data, size, val, res, offsets); break; } case proto::plan::PrefixMatch: { - UnaryElementFunc func; - func(data, size, val, res); + UnaryElementFunc func; + func(data, size, val, res, offsets); break; } case proto::plan::Match: { - UnaryElementFunc func; - func(data, size, val, res); + UnaryElementFunc func; + func(data, size, val, res, offsets); break; } default: @@ -882,20 +1031,32 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData() { // but to mask res with valid_data after the batch operation. if (valid_data != nullptr) { for (int i = 0; i < size; i++) { - if (!valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::random) { + offset = (offsets) ? offsets[i] : i; + } + if (!valid_data[offset]) { res[i] = valid_res[i] = false; } } } }; + auto skip_index_func = [expr_type, val](const SkipIndex& skip_index, FieldId field_id, int64_t chunk_id) { return skip_index.CanSkipUnaryRange( field_id, chunk_id, expr_type, val); }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, skip_index_func, res, valid_res, val); + + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessDataByOffsets( + execute_sub_batch, skip_index_func, input, res, valid_res, val); + } else { + processed_size = ProcessDataChunks( + execute_sub_batch, skip_index_func, res, valid_res, val); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}, related params[active_count:{}, " diff --git a/internal/core/src/exec/expression/UnaryExpr.h b/internal/core/src/exec/expression/UnaryExpr.h index f9a0407a63245..f47c2f299d474 100644 --- a/internal/core/src/exec/expression/UnaryExpr.h +++ b/internal/core/src/exec/expression/UnaryExpr.h @@ -33,7 +33,7 @@ namespace milvus { namespace exec { -template +template struct UnaryElementFuncForMatch { typedef std:: conditional_t, std::string, T> @@ -43,58 +43,68 @@ struct UnaryElementFuncForMatch { operator()(const T* src, size_t size, IndexInnerType val, - TargetBitmapView res) { + TargetBitmapView res, + int64_t* offsets = nullptr) { PatternMatchTranslator translator; auto regex_pattern = translator(val); RegexMatcher matcher(regex_pattern); for (int i = 0; i < size; ++i) { - res[i] = matcher(src[i]); + if constexpr (filter_type == FilterType::random) { + res[i] = matcher(src[offsets ? offsets[i] : i]); + } else { + res[i] = matcher(src[i]); + } } } }; -template +template struct UnaryElementFunc { typedef std:: conditional_t, std::string, T> IndexInnerType; + void operator()(const T* src, size_t size, IndexInnerType val, - TargetBitmapView res) { + TargetBitmapView res, + const int32_t* offsets = nullptr) { if constexpr (op == proto::plan::OpType::Match) { - UnaryElementFuncForMatch func; + UnaryElementFuncForMatch func; func(src, size, val, res); return; } - /* // This is the original code, which is kept for the documentation purposes - for (int i = 0; i < size; ++i) { - if constexpr (op == proto::plan::OpType::Equal) { - res[i] = src[i] == val; - } else if constexpr (op == proto::plan::OpType::NotEqual) { - res[i] = src[i] != val; - } else if constexpr (op == proto::plan::OpType::GreaterThan) { - res[i] = src[i] > val; - } else if constexpr (op == proto::plan::OpType::LessThan) { - res[i] = src[i] < val; - } else if constexpr (op == proto::plan::OpType::GreaterEqual) { - res[i] = src[i] >= val; - } else if constexpr (op == proto::plan::OpType::LessEqual) { - res[i] = src[i] <= val; - } else if constexpr (op == proto::plan::OpType::PrefixMatch) { - res[i] = milvus::query::Match( - src[i], val, proto::plan::OpType::PrefixMatch); - } else { - PanicInfo( - OpTypeInvalid, - fmt::format("unsupported op_type:{} for UnaryElementFunc", - op)); + // also, for iterative filter + if constexpr (filter_type == FilterType::random) { + for (int i = 0; i < size; ++i) { + auto offset = (offsets != nullptr) ? offsets[i] : i; + if constexpr (op == proto::plan::OpType::Equal) { + res[i] = src[offset] == val; + } else if constexpr (op == proto::plan::OpType::NotEqual) { + res[i] = src[offset] != val; + } else if constexpr (op == proto::plan::OpType::GreaterThan) { + res[i] = src[offset] > val; + } else if constexpr (op == proto::plan::OpType::LessThan) { + res[i] = src[offset] < val; + } else if constexpr (op == proto::plan::OpType::GreaterEqual) { + res[i] = src[offset] >= val; + } else if constexpr (op == proto::plan::OpType::LessEqual) { + res[i] = src[offset] <= val; + } else if constexpr (op == proto::plan::OpType::PrefixMatch) { + res[i] = milvus::query::Match( + src[offset], val, proto::plan::OpType::PrefixMatch); + } else { + PanicInfo( + OpTypeInvalid, + fmt::format( + "unsupported op_type:{} for UnaryElementFunc", op)); + } } + return; } - */ if constexpr (op == proto::plan::OpType::PrefixMatch) { for (int i = 0; i < size; ++i) { @@ -141,7 +151,7 @@ struct UnaryElementFunc { } \ } while (false) -template +template struct UnaryElementFuncForArray { using GetType = std::conditional_t, std::string_view, @@ -153,32 +163,39 @@ struct UnaryElementFuncForArray { ValueType val, int index, TargetBitmapView res, - TargetBitmapView valid_res) { + TargetBitmapView valid_res, + const int32_t* offsets = nullptr) { for (int i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::random) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } if constexpr (op == proto::plan::OpType::Equal) { if constexpr (std::is_same_v) { - res[i] = src[i].is_same_array(val); + res[i] = src[offset].is_same_array(val); } else { - if (index >= src[i].length()) { + if (index >= src[offset].length()) { res[i] = false; continue; } - auto array_data = src[i].template get_data(index); + auto array_data = + src[offset].template get_data(index); res[i] = array_data == val; } } else if constexpr (op == proto::plan::OpType::NotEqual) { if constexpr (std::is_same_v) { - res[i] = !src[i].is_same_array(val); + res[i] = !src[offset].is_same_array(val); } else { - if (index >= src[i].length()) { + if (index >= src[offset].length()) { res[i] = false; continue; } - auto array_data = src[i].template get_data(index); + auto array_data = + src[offset].template get_data(index); res[i] = array_data != val; } } else if constexpr (op == proto::plan::OpType::GreaterThan) { @@ -313,10 +330,18 @@ class PhyUnaryRangeFilterExpr : public SegmentExpr { void Eval(EvalCtx& context, VectorPtr& result) override; + bool + SupportOffsetInput() override { + if (expr_->op_type_ == proto::plan::OpType::TextMatch) { + return false; + } + return true; + } + private: template VectorPtr - ExecRangeVisitorImpl(); + ExecRangeVisitorImpl(OffsetVector* input = nullptr); template VectorPtr @@ -324,15 +349,15 @@ class PhyUnaryRangeFilterExpr : public SegmentExpr { template VectorPtr - ExecRangeVisitorImplForData(); + ExecRangeVisitorImplForData(OffsetVector* input = nullptr); template VectorPtr - ExecRangeVisitorImplJson(); + ExecRangeVisitorImplJson(OffsetVector* input = nullptr); template VectorPtr - ExecRangeVisitorImplArray(); + ExecRangeVisitorImplArray(OffsetVector* input = nullptr); template VectorPtr @@ -345,7 +370,7 @@ class PhyUnaryRangeFilterExpr : public SegmentExpr { // Check overflow and cache result for performace template ColumnVectorPtr - PreCheckOverflow(); + PreCheckOverflow(OffsetVector* input = nullptr); template bool diff --git a/internal/core/src/exec/expression/ValueExpr.cpp b/internal/core/src/exec/expression/ValueExpr.cpp index 80330f7f15798..4ebec42903e48 100644 --- a/internal/core/src/exec/expression/ValueExpr.cpp +++ b/internal/core/src/exec/expression/ValueExpr.cpp @@ -22,9 +22,13 @@ namespace exec { void PhyValueExpr::Eval(EvalCtx& context, VectorPtr& result) { - int64_t real_batch_size = current_pos_ + batch_size_ >= active_count_ - ? active_count_ - current_pos_ - : batch_size_; + auto input = context.get_offset_input(); + SetHasOffsetInput((input != nullptr)); + int64_t real_batch_size = has_offset_input_ + ? input->size() + : (current_pos_ + batch_size_ >= active_count_ + ? active_count_ - current_pos_ + : batch_size_); if (real_batch_size == 0) { result = nullptr; diff --git a/internal/core/src/exec/expression/ValueExpr.h b/internal/core/src/exec/expression/ValueExpr.h index 044f46ac391e3..b2ccace22397c 100644 --- a/internal/core/src/exec/expression/ValueExpr.h +++ b/internal/core/src/exec/expression/ValueExpr.h @@ -49,11 +49,14 @@ class PhyValueExpr : public Expr { void MoveCursor() override { - int64_t real_batch_size = current_pos_ + batch_size_ >= active_count_ - ? active_count_ - current_pos_ - : batch_size_; + if (!has_offset_input_) { + int64_t real_batch_size = + current_pos_ + batch_size_ >= active_count_ + ? active_count_ - current_pos_ + : batch_size_; - current_pos_ += real_batch_size; + current_pos_ += real_batch_size; + } } private: diff --git a/internal/core/src/exec/operator/FilterBitsNode.cpp b/internal/core/src/exec/operator/FilterBitsNode.cpp index 72148e347c69e..14119cf525494 100644 --- a/internal/core/src/exec/operator/FilterBitsNode.cpp +++ b/internal/core/src/exec/operator/FilterBitsNode.cpp @@ -64,8 +64,7 @@ PhyFilterBitsNode::GetOutput() { std::chrono::high_resolution_clock::time_point scalar_start = std::chrono::high_resolution_clock::now(); - EvalCtx eval_ctx( - operator_context_->get_exec_context(), exprs_.get(), input_.get()); + EvalCtx eval_ctx(operator_context_->get_exec_context(), exprs_.get()); TargetBitmap bitset; TargetBitmap valid_bitset; diff --git a/internal/core/src/exec/operator/IterativeFilterNode.cpp b/internal/core/src/exec/operator/IterativeFilterNode.cpp new file mode 100644 index 0000000000000..27edeab3a4101 --- /dev/null +++ b/internal/core/src/exec/operator/IterativeFilterNode.cpp @@ -0,0 +1,273 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "IterativeFilterNode.h" + +namespace milvus { +namespace exec { +PhyIterativeFilterNode::PhyIterativeFilterNode( + int32_t operator_id, + DriverContext* driverctx, + const std::shared_ptr& filter) + : Operator(driverctx, + filter->output_type(), + operator_id, + filter->id(), + "PhyIterativeFilterNode") { + ExecContext* exec_context = operator_context_->get_exec_context(); + query_context_ = exec_context->get_query_context(); + std::vector filters; + filters.emplace_back(filter->filter()); + exprs_ = std::make_unique(filters, exec_context); + const auto& exprs = exprs_->exprs(); + for (const auto& expr : exprs) { + is_native_supported_ = + (is_native_supported_ && (expr->SupportOffsetInput())); + } + need_process_rows_ = query_context_->get_active_count(); + num_processed_rows_ = 0; +} + +void +PhyIterativeFilterNode::AddInput(RowVectorPtr& input) { + input_ = std::move(input); +} + +bool +PhyIterativeFilterNode::IsFinished() { + return is_finished_; +} + +template +inline size_t +find_binsert_position(const std::vector& distances, + size_t lo, + size_t hi, + float dist) { + while (lo < hi) { + size_t mid = lo + ((hi - lo) >> 1); + if constexpr (large_is_better) { + if (distances[mid] < dist) { + hi = mid; + } else { + lo = mid + 1; + } + } else { + if (distances[mid] > dist) { + hi = mid; + } else { + lo = mid + 1; + } + } + } + return lo; +} + +inline void +insert_helper(milvus::SearchResult& search_result, + int& topk, + const bool large_is_better, + const FixedVector& distances, + const FixedVector& offsets, + const int64_t nq_index, + const int64_t unity_topk, + const int i) { + auto pos = large_is_better + ? find_binsert_position(search_result.distances_, + nq_index * unity_topk, + nq_index * unity_topk + topk, + distances[i]) + : find_binsert_position(search_result.distances_, + nq_index * unity_topk, + nq_index * unity_topk + topk, + distances[i]); + if (topk > pos) { + std::memmove(&search_result.distances_[pos + 1], + &search_result.distances_[pos], + (topk - pos) * sizeof(float)); + std::memmove(&search_result.seg_offsets_[pos + 1], + &search_result.seg_offsets_[pos], + (topk - pos) * sizeof(int64_t)); + } + search_result.seg_offsets_[pos] = offsets[i]; + search_result.distances_[pos] = distances[i]; + ++topk; +} + +RowVectorPtr +PhyIterativeFilterNode::GetOutput() { + if (is_finished_ || !no_more_input_) { + return nullptr; + } + + DeferLambda([&]() { is_finished_ = true; }); + + if (input_ == nullptr) { + return nullptr; + } + + std::chrono::high_resolution_clock::time_point scalar_start = + std::chrono::high_resolution_clock::now(); + + milvus::SearchResult search_result = query_context_->get_search_result(); + int64_t nq = search_result.total_nq_; + int64_t unity_topk = search_result.unity_topK_; + knowhere::MetricType metric_type = query_context_->get_metric_type(); + bool large_is_better = PositivelyRelated(metric_type); + TargetBitmap bitset; + // get bitset of whole segment first + if (!is_native_supported_) { + EvalCtx eval_ctx(operator_context_->get_exec_context(), exprs_.get()); + + TargetBitmap valid_bitset; + while (num_processed_rows_ < need_process_rows_) { + exprs_->Eval(0, 1, true, eval_ctx, results_); + + AssertInfo( + results_.size() == 1 && results_[0] != nullptr, + "PhyIterativeFilterNode result size should be size one and not " + "be nullptr"); + + if (auto col_vec = + std::dynamic_pointer_cast(results_[0])) { + if (col_vec->IsBitmap()) { + auto col_vec_size = col_vec->size(); + TargetBitmapView view(col_vec->GetRawData(), col_vec_size); + bitset.append(view); + TargetBitmapView valid_view(col_vec->GetValidRawData(), + col_vec_size); + valid_bitset.append(valid_view); + num_processed_rows_ += col_vec_size; + } else { + PanicInfo(ExprInvalid, + "PhyIterativeFilterNode result should be bitmap"); + } + } else { + PanicInfo( + ExprInvalid, + "PhyIterativeFilterNode result should be ColumnVector"); + } + } + Assert(bitset.size() == need_process_rows_); + Assert(valid_bitset.size() == need_process_rows_); + } + if (search_result.vector_iterators_.has_value()) { + AssertInfo(search_result.vector_iterators_.value().size() == + search_result.total_nq_, + "Vector Iterators' count must be equal to total_nq_, Check " + "your code"); + int nq_index = 0; + + AssertInfo(nq = search_result.vector_iterators_.value().size(), + "nq and iterator not equal size"); + search_result.seg_offsets_.resize(nq * unity_topk, INVALID_SEG_OFFSET); + search_result.distances_.resize(nq * unity_topk); + for (auto& iterator : search_result.vector_iterators_.value()) { + EvalCtx eval_ctx(operator_context_->get_exec_context(), + exprs_.get()); + int topk = 0; + while (iterator->HasNext() && topk < unity_topk) { + FixedVector offsets; + FixedVector distances; + // remain unfilled size as iterator batch size + int64_t batch_size = unity_topk - topk; + offsets.reserve(batch_size); + distances.reserve(batch_size); + while (iterator->HasNext()) { + auto offset_dis_pair = iterator->Next(); + AssertInfo( + offset_dis_pair.has_value(), + "Wrong state! iterator cannot return valid result " + "whereas it still" + "tells hasNext, terminate operation"); + auto offset = offset_dis_pair.value().first; + auto dis = offset_dis_pair.value().second; + offsets.emplace_back(offset); + distances.emplace_back(dis); + if (offsets.size() == batch_size) { + break; + } + } + if (is_native_supported_) { + eval_ctx.set_offset_input(&offsets); + std::vector results; + exprs_->Eval(0, 1, true, eval_ctx, results); + AssertInfo( + results.size() == 1 && results[0] != nullptr, + "PhyIterativeFilterNode result size should be size " + "one and not " + "be nullptr"); + + auto col_vec = + std::dynamic_pointer_cast(results[0]); + auto col_vec_size = col_vec->size(); + TargetBitmapView bitsetview(col_vec->GetRawData(), + col_vec_size); + Assert(bitsetview.size() <= batch_size); + Assert(bitsetview.size() == offsets.size()); + for (auto i = 0; i < offsets.size(); ++i) { + if (bitsetview[i] > 0) { + insert_helper(search_result, + topk, + large_is_better, + distances, + offsets, + nq_index, + unity_topk, + i); + if (topk == unity_topk) { + break; + } + } + } + } else { + for (auto i = 0; i < offsets.size(); ++i) { + if (bitset[offsets[i]] > 0) { + insert_helper(search_result, + topk, + large_is_better, + distances, + offsets, + nq_index, + unity_topk, + i); + if (topk == unity_topk) { + break; + } + } + } + } + if (topk == unity_topk) { + break; + } + } + nq_index++; + } + } + query_context_->set_search_result(std::move(search_result)); + std::chrono::high_resolution_clock::time_point scalar_end = + std::chrono::high_resolution_clock::now(); + double scalar_cost = + std::chrono::duration(scalar_end - scalar_start) + .count(); + monitor::internal_core_search_latency_iterative_filter.Observe(scalar_cost / + 1000); + + return input_; +} + +} // namespace exec +} // namespace milvus diff --git a/internal/core/src/exec/operator/IterativeFilterNode.h b/internal/core/src/exec/operator/IterativeFilterNode.h new file mode 100644 index 0000000000000..07404d974b7ca --- /dev/null +++ b/internal/core/src/exec/operator/IterativeFilterNode.h @@ -0,0 +1,83 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +#include "exec/Driver.h" +#include "exec/expression/Expr.h" +#include "exec/operator/Operator.h" +#include "exec/QueryContext.h" + +// difference between FilterBitsNode and IterativeFilterNode is that +// FilterBitsNode will go through whole segment and return bitset to indicate which offset is filtered out or not +// IterativeFilterNode will accept offsets array and execute over these and generate result valid offsets +namespace milvus { +namespace exec { +class PhyIterativeFilterNode : public Operator { + public: + PhyIterativeFilterNode( + int32_t operator_id, + DriverContext* ctx, + const std::shared_ptr& filter); + + bool + IsFilter() override { + return true; + } + + bool + NeedInput() const override { + return !is_finished_; + } + + void + AddInput(RowVectorPtr& input) override; + + RowVectorPtr + GetOutput() override; + + bool + IsFinished() override; + + void + Close() override { + Operator::Close(); + exprs_->Clear(); + } + + BlockingReason + IsBlocked(ContinueFuture* /* unused */) override { + return BlockingReason::kNotBlocked; + } + + virtual std::string + ToString() const override { + return "PhyIterativeFilterNode"; + } + + private: + std::unique_ptr exprs_; + QueryContext* query_context_; + int64_t num_processed_rows_; + int64_t need_process_rows_; + bool is_finished_{false}; + bool is_native_supported_{true}; +}; +} // namespace exec +} // namespace milvus diff --git a/internal/core/src/exec/operator/Utils.h b/internal/core/src/exec/operator/Utils.h new file mode 100644 index 0000000000000..19249587bf2de --- /dev/null +++ b/internal/core/src/exec/operator/Utils.h @@ -0,0 +1,101 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "common/QueryInfo.h" +#include "knowhere/index/index_node.h" +#include "segcore/SegmentInterface.h" +#include "segcore/SegmentGrowingImpl.h" +#include "segcore/SegmentSealedImpl.h" +#include "segcore/ConcurrentVector.h" +#include "common/Span.h" +#include "query/Utils.h" +#include "common/EasyAssert.h" + +namespace milvus { +namespace exec { + +static bool +UseVectorIterator(const SearchInfo& search_info) { + return search_info.group_by_field_id_.has_value() || + search_info.iterative_filter_execution; +} + +static bool +PrepareVectorIteratorsFromIndex(const SearchInfo& search_info, + int nq, + const DatasetPtr dataset, + SearchResult& search_result, + const BitsetView& bitset, + const index::VectorIndex& index) { + // when we use group by, we will use vector iterator to continously get results and group on them + // when we use iterative filtered search, we will use vector iterator to continously get results and check scalar attr on them + // until we get valid topk results + if (UseVectorIterator(search_info)) { + try { + auto search_conf = index.PrepareSearchParams(search_info); + knowhere::expected> + iterators_val = + index.VectorIterators(dataset, search_conf, bitset); + if (iterators_val.has_value()) { + search_result.AssembleChunkVectorIterators( + nq, 1, {0}, iterators_val.value()); + } else { + std::string operator_type = ""; + if (search_info.group_by_field_id_.has_value()) { + operator_type = "group_by"; + } else { + operator_type = "iterative filter"; + } + LOG_ERROR( + "Returned knowhere iterator has non-ready iterators " + "inside, terminate {} operation:{}", + operator_type, + knowhere::Status2String(iterators_val.error())); + PanicInfo( + ErrorCode::Unsupported, + fmt::format( + "Returned knowhere iterator has non-ready iterators " + "inside, terminate {} operation", + operator_type)); + } + search_result.total_nq_ = dataset->GetRows(); + search_result.unity_topK_ = search_info.topk_; + } catch (const std::runtime_error& e) { + std::string operator_type = ""; + if (search_info.group_by_field_id_.has_value()) { + operator_type = "group_by"; + } else { + operator_type = "iterative filter"; + } + LOG_ERROR( + "Caught error:{} when trying to initialize ann iterators for " + "{}: " + "operation will be terminated", + e.what(), + operator_type); + PanicInfo(ErrorCode::Unsupported, + fmt::format("Failed to {}, current index:" + + index.GetIndexType() + " doesn't support", + operator_type)); + } + return true; + } + return false; +} +} // namespace exec +} // namespace milvus \ No newline at end of file diff --git a/internal/core/src/exec/operator/VectorSearchNode.cpp b/internal/core/src/exec/operator/VectorSearchNode.cpp index df7a550f6cd86..92d7d06764cfc 100644 --- a/internal/core/src/exec/operator/VectorSearchNode.cpp +++ b/internal/core/src/exec/operator/VectorSearchNode.cpp @@ -86,6 +86,7 @@ PhyVectorSearchNode::GetOutput() { query_timestamp_, final_view, search_result); + search_result.total_data_cnt_ = final_view.size(); query_context_->set_search_result(std::move(search_result)); std::chrono::high_resolution_clock::time_point vector_end = diff --git a/internal/core/src/exec/operator/groupby/SearchGroupByOperator.h b/internal/core/src/exec/operator/groupby/SearchGroupByOperator.h index 838e80354422b..6de135fb9cf7c 100644 --- a/internal/core/src/exec/operator/groupby/SearchGroupByOperator.h +++ b/internal/core/src/exec/operator/groupby/SearchGroupByOperator.h @@ -125,49 +125,6 @@ GetDataGetter(const segcore::SegmentInternalInterface& segment, } } -static bool -PrepareVectorIteratorsFromIndex(const SearchInfo& search_info, - int nq, - const DatasetPtr dataset, - SearchResult& search_result, - const BitsetView& bitset, - const index::VectorIndex& index) { - if (search_info.group_by_field_id_.has_value()) { - try { - auto search_conf = index.PrepareSearchParams(search_info); - knowhere::expected> - iterators_val = - index.VectorIterators(dataset, search_conf, bitset); - if (iterators_val.has_value()) { - search_result.AssembleChunkVectorIterators( - nq, 1, {0}, iterators_val.value()); - } else { - LOG_ERROR( - "Returned knowhere iterator has non-ready iterators " - "inside, terminate group_by operation:{}", - knowhere::Status2String(iterators_val.error())); - PanicInfo(ErrorCode::Unsupported, - "Returned knowhere iterator has non-ready iterators " - "inside, terminate group_by operation"); - } - search_result.total_nq_ = dataset->GetRows(); - search_result.unity_topK_ = search_info.topk_; - } catch (const std::runtime_error& e) { - LOG_ERROR( - "Caught error:{} when trying to initialize ann iterators for " - "group_by: " - "group_by operation will be terminated", - e.what()); - PanicInfo( - ErrorCode::Unsupported, - "Failed to groupBy, current index:" + index.GetIndexType() + - " doesn't support search_group_by"); - } - return true; - } - return false; -} - void SearchGroupBy(const std::vector>& iterators, const SearchInfo& searchInfo, diff --git a/internal/core/src/mmap/ChunkedColumn.h b/internal/core/src/mmap/ChunkedColumn.h index 082d8e10e1aa1..e2924147428f7 100644 --- a/internal/core/src/mmap/ChunkedColumn.h +++ b/internal/core/src/mmap/ChunkedColumn.h @@ -155,6 +155,13 @@ class ChunkedColumnBase : public ColumnBase { "StringViews only supported for VariableColumn"); } + virtual std::pair, FixedVector> + ViewsByOffsets(int64_t chunk_id, + const FixedVector& offsets) const { + PanicInfo(ErrorCode::Unsupported, + "viewsbyoffsets only supported for VariableColumn"); + } + std::pair GetChunkIDByOffset(int64_t offset) const { AssertInfo(offset < num_rows_, @@ -333,6 +340,13 @@ class ChunkedVariableColumn : public ChunkedColumnBase { return chunks_[chunk_id]; } + std::pair, FixedVector> + ViewsByOffsets(int64_t chunk_id, + const FixedVector& offsets) const override { + return std::dynamic_pointer_cast(chunks_[chunk_id]) + ->ViewsByOffsets(offsets); + } + BufferView GetBatchBuffer(int64_t chunk_id, int64_t start_offset, diff --git a/internal/core/src/mmap/Column.h b/internal/core/src/mmap/Column.h index 097ec1d3e2cc2..59f48ef608b80 100644 --- a/internal/core/src/mmap/Column.h +++ b/internal/core/src/mmap/Column.h @@ -323,6 +323,12 @@ class SingleChunkColumnBase : public ColumnBase { "StringViews only supported for VariableColumn"); } + virtual std::pair, FixedVector> + ViewsByOffsets(const FixedVector& offsets) const { + PanicInfo(ErrorCode::Unsupported, + "viewsbyoffsets only supported for VariableColumn"); + } + virtual void AppendBatch(const FieldDataPtr data) { size_t required_size = data_size_ + data->DataSize(); @@ -698,6 +704,19 @@ class SingleChunkVariableColumn : public SingleChunkColumnBase { return std::make_pair(res, valid_data_); } + std::pair, FixedVector> + ViewsByOffsets(const FixedVector& offsets) const { + std::vector res; + FixedVector valid; + res.reserve(offsets.size()); + valid.reserve(offsets.size()); + for (size_t i = 0; i < offsets.size(); ++i) { + res.emplace_back(RawAt(offsets[i])); + valid.emplace_back(IsValid(offsets[i])); + } + return {res, valid}; + } + [[nodiscard]] std::vector Views() const { std::vector res; diff --git a/internal/core/src/monitor/prometheus_client.cpp b/internal/core/src/monitor/prometheus_client.cpp index 77bcbb5de3949..6ca081ed8be6b 100644 --- a/internal/core/src/monitor/prometheus_client.cpp +++ b/internal/core/src/monitor/prometheus_client.cpp @@ -177,6 +177,8 @@ std::map vectorLatencyLabels{ {"type", "vector_latency"}}; std::map groupbyLatencyLabels{ {"type", "groupby_latency"}}; +std::map iterativeFilterLatencyLabels{ + {"type", "iterative_filter_latency"}}; std::map scalarProportionLabels{ {"type", "scalar_proportion"}}; DEFINE_PROMETHEUS_HISTOGRAM_FAMILY(internal_core_search_latency, @@ -190,6 +192,9 @@ DEFINE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_vector, DEFINE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_groupby, internal_core_search_latency, groupbyLatencyLabels) +DEFINE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_iterative_filter, + internal_core_search_latency, + iterativeFilterLatencyLabels) DEFINE_PROMETHEUS_HISTOGRAM_WITH_BUCKETS( internal_core_search_latency_scalar_proportion, internal_core_search_latency, diff --git a/internal/core/src/monitor/prometheus_client.h b/internal/core/src/monitor/prometheus_client.h index 3a1692a21fe32..ed8e21cef5661 100644 --- a/internal/core/src/monitor/prometheus_client.h +++ b/internal/core/src/monitor/prometheus_client.h @@ -136,6 +136,7 @@ DECLARE_PROMETHEUS_HISTOGRAM_FAMILY(internal_core_search_latency); DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_scalar); DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_vector); DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_groupby); +DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_iterative_filter); DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_scalar_proportion); } // namespace milvus::monitor diff --git a/internal/core/src/query/PlanProto.cpp b/internal/core/src/query/PlanProto.cpp index 3d7e325ce116c..2427d7287f3a2 100644 --- a/internal/core/src/query/PlanProto.cpp +++ b/internal/core/src/query/PlanProto.cpp @@ -53,6 +53,12 @@ ProtoParser::PlanNodeFromProto(const planpb::PlanNode& plan_node_proto) { nlohmann::json::parse(query_info_proto.search_params()); search_info.materialized_view_involved = query_info_proto.materialized_view_involved(); + // currently, iterative filter does not support range search + if (search_info.search_params_.contains(ITERATIVE_FILTER) && + !search_info.search_params_.contains(RADIUS)) { + search_info.iterative_filter_execution = + search_info.search_params_[ITERATIVE_FILTER]; + } if (query_info_proto.bm25_avgdl() > 0) { search_info.search_params_[knowhere::meta::BM25_AVGDL] = @@ -94,7 +100,24 @@ ProtoParser::PlanNodeFromProto(const planpb::PlanNode& plan_node_proto) { milvus::plan::PlanNodePtr plannode; std::vector sources; - if (anns_proto.has_predicates()) { + + // mvcc node -> vector search node -> iterative filter node + auto iterative_filter_plan = [&]() { + plannode = std::make_shared( + milvus::plan::GetNextPlanNodeId()); + sources = std::vector{plannode}; + plannode = std::make_shared( + milvus::plan::GetNextPlanNodeId(), sources); + sources = std::vector{plannode}; + + auto expr = ParseExprs(anns_proto.predicates()); + plannode = std::make_shared( + milvus::plan::GetNextPlanNodeId(), expr, sources); + sources = std::vector{plannode}; + }; + + // pre filter node -> mvcc node -> vector search node + auto pre_filter_plan = [&]() { plannode = std::move(expr_parser()); if (plan_node->search_info_.materialized_view_involved) { const auto expr_info = plannode->GatherInfo(); @@ -113,15 +136,32 @@ ProtoParser::PlanNodeFromProto(const planpb::PlanNode& plan_node_proto) { materialized_view_search_info; } sources = std::vector{plannode}; - } + plannode = std::make_shared( + milvus::plan::GetNextPlanNodeId(), sources); + sources = std::vector{plannode}; - plannode = std::make_shared( - milvus::plan::GetNextPlanNodeId(), sources); - sources = std::vector{plannode}; + plannode = std::make_shared( + milvus::plan::GetNextPlanNodeId(), sources); + sources = std::vector{plannode}; + }; - plannode = std::make_shared( - milvus::plan::GetNextPlanNodeId(), sources); - sources = std::vector{plannode}; + if (anns_proto.has_predicates()) { + // currently limit iterative filter scope to search only + if (plan_node->search_info_.iterative_filter_execution && + plan_node->search_info_.group_by_field_id_ == std::nullopt) { + iterative_filter_plan(); + } else { + pre_filter_plan(); + } + } else { + plannode = std::make_shared( + milvus::plan::GetNextPlanNodeId(), sources); + sources = std::vector{plannode}; + + plannode = std::make_shared( + milvus::plan::GetNextPlanNodeId(), sources); + sources = std::vector{plannode}; + } if (plan_node->search_info_.group_by_field_id_ != std::nullopt) { plannode = std::make_shared( diff --git a/internal/core/src/query/SearchBruteForce.cpp b/internal/core/src/query/SearchBruteForce.cpp index 9df66690b8396..7b4e0d46982f6 100644 --- a/internal/core/src/query/SearchBruteForce.cpp +++ b/internal/core/src/query/SearchBruteForce.cpp @@ -272,12 +272,11 @@ BruteForceSearchIterators(const dataset::SearchDataset& query_ds, "equal to nq:{} for single chunk", iterators_val.value().size(), nq); - SubSearchResult subSearchResult(query_ds.num_queries, - query_ds.topk, - query_ds.metric_type, - query_ds.round_decimal, - iterators_val.value()); - return std::move(subSearchResult); + return SubSearchResult(query_ds.num_queries, + query_ds.topk, + query_ds.metric_type, + query_ds.round_decimal, + iterators_val.value()); } else { LOG_ERROR( "Failed to get valid knowhere brute-force-iterators from chunk, " diff --git a/internal/core/src/query/SearchOnGrowing.cpp b/internal/core/src/query/SearchOnGrowing.cpp index 2add5f5a1fde8..c968b48ac52f2 100644 --- a/internal/core/src/query/SearchOnGrowing.cpp +++ b/internal/core/src/query/SearchOnGrowing.cpp @@ -20,6 +20,7 @@ #include "log/Log.h" #include "query/SearchBruteForce.h" #include "query/SearchOnIndex.h" +#include "exec/operator/Utils.h" namespace milvus::query { @@ -138,7 +139,7 @@ SearchOnGrowing(const segcore::SegmentGrowingImpl& segment, auto sub_data = query::dataset::RawDataset{ element_begin, dim, size_per_chunk, chunk_data}; - if (info.group_by_field_id_.has_value()) { + if (milvus::exec::UseVectorIterator(info)) { auto sub_qr = BruteForceSearchIterators(search_dataset, sub_data, info, @@ -156,7 +157,7 @@ SearchOnGrowing(const segcore::SegmentGrowingImpl& segment, final_qr.merge(sub_qr); } } - if (info.group_by_field_id_.has_value()) { + if (milvus::exec::UseVectorIterator(info)) { std::vector chunk_rows(max_chunk, 0); for (int i = 1; i < max_chunk; ++i) { chunk_rows[i] = i * vec_size_per_chunk; diff --git a/internal/core/src/query/SearchOnIndex.cpp b/internal/core/src/query/SearchOnIndex.cpp index 0204f791ce217..8556340a168c0 100644 --- a/internal/core/src/query/SearchOnIndex.cpp +++ b/internal/core/src/query/SearchOnIndex.cpp @@ -10,7 +10,7 @@ // or implied. See the License for the specific language governing permissions and limitations under the License #include "SearchOnIndex.h" -#include "exec/operator/groupby/SearchGroupByOperator.h" +#include "exec/operator/Utils.h" namespace milvus::query { void diff --git a/internal/core/src/query/SearchOnSealed.cpp b/internal/core/src/query/SearchOnSealed.cpp index 59146b2447a0b..5eb0eee1c587d 100644 --- a/internal/core/src/query/SearchOnSealed.cpp +++ b/internal/core/src/query/SearchOnSealed.cpp @@ -21,7 +21,7 @@ #include "query/SearchBruteForce.h" #include "query/SearchOnSealed.h" #include "query/helper.h" -#include "exec/operator/groupby/SearchGroupByOperator.h" +#include "exec/operator/Utils.h" namespace milvus::query { @@ -119,7 +119,7 @@ SearchOnSealed(const Schema& schema, auto data_id = offset; auto raw_dataset = query::dataset::RawDataset{offset, dim, chunk_size, vec_data}; - if (search_info.group_by_field_id_.has_value()) { + if (milvus::exec::UseVectorIterator(search_info)) { auto sub_qr = BruteForceSearchIterators(query_dataset, raw_dataset, search_info, @@ -139,7 +139,7 @@ SearchOnSealed(const Schema& schema, offset += chunk_size; } - if (search_info.group_by_field_id_.has_value()) { + if (milvus::exec::UseVectorIterator(search_info)) { result.AssembleChunkVectorIterators(num_queries, num_chunk, column->GetNumRowsUntilChunk(), @@ -180,7 +180,7 @@ SearchOnSealed(const Schema& schema, auto data_type = field.get_data_type(); CheckBruteForceSearchParam(field, search_info); auto raw_dataset = query::dataset::RawDataset{0, dim, row_count, vec_data}; - if (search_info.group_by_field_id_.has_value()) { + if (milvus::exec::UseVectorIterator(search_info)) { auto sub_qr = BruteForceSearchIterators(query_dataset, raw_dataset, search_info, diff --git a/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp b/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp index ce45f09333132..0e7b589e3b8b2 100644 --- a/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp +++ b/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp @@ -723,7 +723,11 @@ ChunkedSegmentSealedImpl::num_chunk_index(FieldId field_id) const { int64_t ChunkedSegmentSealedImpl::num_chunk_data(FieldId field_id) const { - return fields_.at(field_id)->num_chunks(); + return get_bit(field_data_ready_bitset_, field_id) + ? fields_.find(field_id) != fields_.end() + ? fields_.at(field_id)->num_chunks() + : 1 + : 0; } int64_t @@ -732,7 +736,7 @@ ChunkedSegmentSealedImpl::num_chunk(FieldId field_id) const { ? fields_.find(field_id) != fields_.end() ? fields_.at(field_id)->num_chunks() : 1 - : 0; + : 1; } int64_t @@ -800,7 +804,6 @@ ChunkedSegmentSealedImpl::chunk_data_impl(FieldId field_id, std::shared_lock lck(mutex_); AssertInfo(get_bit(field_data_ready_bitset_, field_id), "Can't get bitset element at " + std::to_string(field_id.get())); - auto& field_meta = schema_->operator[](field_id); if (auto it = fields_.find(field_id); it != fields_.end()) { auto& field_data = it->second; return field_data->Span(chunk_id); @@ -818,7 +821,6 @@ ChunkedSegmentSealedImpl::chunk_view_impl(FieldId field_id, std::shared_lock lck(mutex_); AssertInfo(get_bit(field_data_ready_bitset_, field_id), "Can't get bitset element at " + std::to_string(field_id.get())); - auto& field_meta = schema_->operator[](field_id); if (auto it = fields_.find(field_id); it != fields_.end()) { auto& field_data = it->second; return field_data->StringViews(chunk_id); @@ -827,6 +829,22 @@ ChunkedSegmentSealedImpl::chunk_view_impl(FieldId field_id, "chunk_view_impl only used for variable column field "); } +std::pair, FixedVector> +ChunkedSegmentSealedImpl::chunk_view_by_offsets( + FieldId field_id, + int64_t chunk_id, + const FixedVector& offsets) const { + std::shared_lock lck(mutex_); + AssertInfo(get_bit(field_data_ready_bitset_, field_id), + "Can't get bitset element at " + std::to_string(field_id.get())); + if (auto it = fields_.find(field_id); it != fields_.end()) { + auto& field_data = it->second; + return field_data->ViewsByOffsets(chunk_id, offsets); + } + PanicInfo(ErrorCode::UnexpectedError, + "chunk_view_by_offsets only used for variable column field "); +} + const index::IndexBase* ChunkedSegmentSealedImpl::chunk_index_impl(FieldId field_id, int64_t chunk_id) const { diff --git a/internal/core/src/segcore/ChunkedSegmentSealedImpl.h b/internal/core/src/segcore/ChunkedSegmentSealedImpl.h index 60314f019ec26..2c8861ae38eff 100644 --- a/internal/core/src/segcore/ChunkedSegmentSealedImpl.h +++ b/internal/core/src/segcore/ChunkedSegmentSealedImpl.h @@ -206,6 +206,11 @@ class ChunkedSegmentSealedImpl : public SegmentSealed { std::pair, FixedVector> chunk_view_impl(FieldId field_id, int64_t chunk_id) const override; + std::pair, FixedVector> + chunk_view_by_offsets(FieldId field_id, + int64_t chunk_id, + const FixedVector& offsets) const override; + std::pair> get_chunk_buffer(FieldId field_id, int64_t chunk_id, diff --git a/internal/core/src/segcore/SegmentGrowingImpl.cpp b/internal/core/src/segcore/SegmentGrowingImpl.cpp index b67191ddbb35b..bdea0fbb5e2d5 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.cpp +++ b/internal/core/src/segcore/SegmentGrowingImpl.cpp @@ -409,6 +409,15 @@ SegmentGrowingImpl::chunk_view_impl(FieldId field_id, int64_t chunk_id) const { "chunk view impl not implement for growing segment"); } +std::pair, FixedVector> +SegmentGrowingImpl::chunk_view_by_offsets( + FieldId field_id, + int64_t chunk_id, + const FixedVector& offsets) const { + PanicInfo(ErrorCode::NotImplemented, + "chunk view by offsets not implemented for growing segment"); +} + int64_t SegmentGrowingImpl::num_chunk(FieldId field_id) const { auto size = get_insert_record().ack_responder_.GetAck(); diff --git a/internal/core/src/segcore/SegmentGrowingImpl.h b/internal/core/src/segcore/SegmentGrowingImpl.h index 4d28fd8115e6e..1ccb863064677 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.h +++ b/internal/core/src/segcore/SegmentGrowingImpl.h @@ -344,6 +344,11 @@ class SegmentGrowingImpl : public SegmentGrowing { std::pair, FixedVector> chunk_view_impl(FieldId field_id, int64_t chunk_id) const override; + std::pair, FixedVector> + chunk_view_by_offsets(FieldId field_id, + int64_t chunk_id, + const FixedVector& offsets) const override; + std::pair> get_chunk_buffer(FieldId field_id, int64_t chunk_id, diff --git a/internal/core/src/segcore/SegmentInterface.h b/internal/core/src/segcore/SegmentInterface.h index f835153bc8e9c..5d4fc10ce6d2f 100644 --- a/internal/core/src/segcore/SegmentInterface.h +++ b/internal/core/src/segcore/SegmentInterface.h @@ -200,6 +200,28 @@ class SegmentInternalInterface : public SegmentInterface { return std::make_pair(res, chunk_info.second); } + template + std::pair, FixedVector> + get_views_by_offsets(FieldId field_id, + int64_t chunk_id, + const FixedVector& offsets) const { + if (this->type() == SegmentType::Growing) { + PanicInfo(ErrorCode::Unsupported, + "get chunk views not supported for growing segment"); + } + auto chunk_view = chunk_view_by_offsets(field_id, chunk_id, offsets); + if constexpr (std::is_same_v) { + return chunk_view; + } else { + std::vector res; + res.reserve(chunk_view.first.size()); + for (const auto& view : chunk_view.first) { + res.emplace_back(view); + } + return {res, chunk_view.second}; + } + } + template const index::ScalarIndex& chunk_scalar_index(FieldId field_id, int64_t chunk_id) const { @@ -414,6 +436,11 @@ class SegmentInternalInterface : public SegmentInterface { int64_t start_offset, int64_t length) const = 0; + virtual std::pair, FixedVector> + chunk_view_by_offsets(FieldId field_id, + int64_t chunk_id, + const FixedVector& offsets) const = 0; + // internal API: return chunk_index in span, support scalar index only virtual const index::IndexBase* chunk_index_impl(FieldId field_id, int64_t chunk_id) const = 0; diff --git a/internal/core/src/segcore/SegmentSealedImpl.cpp b/internal/core/src/segcore/SegmentSealedImpl.cpp index bfd847df1f753..decc168e88494 100644 --- a/internal/core/src/segcore/SegmentSealedImpl.cpp +++ b/internal/core/src/segcore/SegmentSealedImpl.cpp @@ -770,7 +770,6 @@ SegmentSealedImpl::chunk_data_impl(FieldId field_id, int64_t chunk_id) const { std::shared_lock lck(mutex_); AssertInfo(get_bit(field_data_ready_bitset_, field_id), "Can't get bitset element at " + std::to_string(field_id.get())); - auto& field_meta = schema_->operator[](field_id); if (auto it = fields_.find(field_id); it != fields_.end()) { auto& field_data = it->second; return field_data->Span(); @@ -787,7 +786,6 @@ SegmentSealedImpl::chunk_view_impl(FieldId field_id, int64_t chunk_id) const { std::shared_lock lck(mutex_); AssertInfo(get_bit(field_data_ready_bitset_, field_id), "Can't get bitset element at " + std::to_string(field_id.get())); - auto& field_meta = schema_->operator[](field_id); if (auto it = fields_.find(field_id); it != fields_.end()) { auto& field_data = it->second; return field_data->StringViews(); @@ -796,6 +794,22 @@ SegmentSealedImpl::chunk_view_impl(FieldId field_id, int64_t chunk_id) const { "chunk_view_impl only used for variable column field "); } +std::pair, FixedVector> +SegmentSealedImpl::chunk_view_by_offsets( + FieldId field_id, + int64_t chunk_id, + const FixedVector& offsets) const { + std::shared_lock lck(mutex_); + AssertInfo(get_bit(field_data_ready_bitset_, field_id), + "Can't get bitset element at " + std::to_string(field_id.get())); + if (auto it = fields_.find(field_id); it != fields_.end()) { + auto& field_data = it->second; + return field_data->ViewsByOffsets(offsets); + } + PanicInfo(ErrorCode::UnexpectedError, + "chunk_view_by_offsets only used for variable column field "); +} + const index::IndexBase* SegmentSealedImpl::chunk_index_impl(FieldId field_id, int64_t chunk_id) const { AssertInfo(scalar_indexings_.find(field_id) != scalar_indexings_.end(), diff --git a/internal/core/src/segcore/SegmentSealedImpl.h b/internal/core/src/segcore/SegmentSealedImpl.h index cc16f5568a831..0266916dd5f7a 100644 --- a/internal/core/src/segcore/SegmentSealedImpl.h +++ b/internal/core/src/segcore/SegmentSealedImpl.h @@ -212,6 +212,11 @@ class SegmentSealedImpl : public SegmentSealed { std::pair, FixedVector> chunk_view_impl(FieldId field_id, int64_t chunk_id) const override; + std::pair, FixedVector> + chunk_view_by_offsets(FieldId field_id, + int64_t chunk_id, + const FixedVector& offsets) const override; + std::pair> get_chunk_buffer(FieldId field_id, int64_t chunk_id, diff --git a/internal/core/unittest/CMakeLists.txt b/internal/core/unittest/CMakeLists.txt index 56123c7ef06e6..72cf5fe64966e 100644 --- a/internal/core/unittest/CMakeLists.txt +++ b/internal/core/unittest/CMakeLists.txt @@ -51,6 +51,7 @@ set(MILVUS_TEST_FILES test_function.cpp test_futures.cpp test_group_by.cpp + test_iterative_filter.cpp test_growing.cpp test_growing_index.cpp test_hybrid_index.cpp diff --git a/internal/core/unittest/test_always_true_expr.cpp b/internal/core/unittest/test_always_true_expr.cpp index 2d54525e8a306..3e395122fc449 100644 --- a/internal/core/unittest/test_always_true_expr.cpp +++ b/internal/core/unittest/test_always_true_expr.cpp @@ -67,10 +67,29 @@ TEST_P(ExprAlwaysTrueTest, AlwaysTrue) { final = ExecuteQueryExpr(plan, seg_promote, N * num_iters, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res(plan->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto val = age_col[i]; ASSERT_EQ(ans, true) << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], true) << "@" << i << "!!" << val; + } } } diff --git a/internal/core/unittest/test_array_expr.cpp b/internal/core/unittest/test_array_expr.cpp index 14b42521891ee..4133045a3b200 100644 --- a/internal/core/unittest/test_array_expr.cpp +++ b/internal/core/unittest/test_array_expr.cpp @@ -27,6 +27,7 @@ #include "segcore/SegmentGrowingImpl.h" #include "simdjson/padded_string.h" #include "test_utils/DataGen.h" +#include "test_utils/GenExprProto.h" using namespace milvus; using namespace milvus::query; @@ -611,11 +612,31 @@ TEST(Expr, TestArrayRange) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Array(array_cols[array_type][i]); auto ref = ref_func(array); ASSERT_EQ(ans, ref); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref); + } } } } @@ -728,6 +749,23 @@ TEST(Expr, TestArrayEqual) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Array(long_array_col[i]); @@ -737,6 +775,9 @@ TEST(Expr, TestArrayEqual) { } auto ref = ref_func(array_values); ASSERT_EQ(ans, ref); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref); + } } } } @@ -927,6 +968,19 @@ TEST(Expr, TestArrayContains) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Array(array_cols["bool"][i]); @@ -935,6 +989,9 @@ TEST(Expr, TestArrayContains) { res.push_back(array.get_data(j)); } ASSERT_EQ(ans, check(res)) << "@" << i; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)) << "@" << i; + } } } @@ -982,6 +1039,19 @@ TEST(Expr, TestArrayContains) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Array(array_cols["double"][i]); @@ -990,6 +1060,9 @@ TEST(Expr, TestArrayContains) { res.push_back(array.get_data(j)); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } @@ -1027,6 +1100,19 @@ TEST(Expr, TestArrayContains) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Array(array_cols["float"][i]); @@ -1035,6 +1121,9 @@ TEST(Expr, TestArrayContains) { res.push_back(array.get_data(j)); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } @@ -1082,6 +1171,19 @@ TEST(Expr, TestArrayContains) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Array(array_cols["int"][i]); @@ -1090,6 +1192,9 @@ TEST(Expr, TestArrayContains) { res.push_back(array.get_data(j)); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } @@ -1128,6 +1233,19 @@ TEST(Expr, TestArrayContains) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Array(array_cols["long"][i]); @@ -1136,6 +1254,9 @@ TEST(Expr, TestArrayContains) { res.push_back(array.get_data(j)); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } @@ -1181,6 +1302,19 @@ TEST(Expr, TestArrayContains) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Array(array_cols["string"][i]); @@ -1189,6 +1323,9 @@ TEST(Expr, TestArrayContains) { res.push_back(array.get_data(j)); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } } @@ -2127,11 +2264,31 @@ TEST(Expr, TestArrayBinaryArith) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Array(array_cols[array_type][i]); auto ref = ref_func(array); ASSERT_EQ(ans, ref); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref); + } } } } @@ -2217,10 +2374,26 @@ TEST(Expr, TestArrayStringMatch) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Array(array_cols["string"][i]); ASSERT_EQ(ans, testcase.check_func(array)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], testcase.check_func(array)); + } } } } @@ -2420,10 +2593,30 @@ TEST(Expr, TestArrayInTerm) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Array(array_cols[array_type][i]); ASSERT_EQ(ans, ref_func(array)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref_func(array)); + } } } } @@ -2510,10 +2703,26 @@ TEST(Expr, TestTermInArray) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Array(array_cols["long"][i]); ASSERT_EQ(ans, testcase.check_func(array)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], testcase.check_func(array)); + } } } } diff --git a/internal/core/unittest/test_expr.cpp b/internal/core/unittest/test_expr.cpp index 25c9a9b0b32c4..e161d2ae08794 100644 --- a/internal/core/unittest/test_expr.cpp +++ b/internal/core/unittest/test_expr.cpp @@ -31,6 +31,7 @@ #include "simdjson/padded_string.h" #include "segcore/segment_c.h" #include "test_utils/DataGen.h" +#include "test_utils/GenExprProto.h" #include "index/IndexFactory.h" #include "exec/expression/Expr.h" #include "exec/Task.h" @@ -369,7 +370,22 @@ TEST_P(ExprTest, TestRange) { seg_promote, N * num_iters, MAX_TIMESTAMP); + + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + for (auto i = 0; i < num_iters; ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(final.size(), N * num_iters); + EXPECT_EQ(view.size(), num_iters); for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; @@ -377,6 +393,10 @@ TEST_P(ExprTest, TestRange) { auto val = age_col[i]; auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + + if (i < num_iters) { + ASSERT_EQ(view[i], ref) << clause << "@" << i << "!!" << val; + } } } } @@ -731,7 +751,24 @@ TEST_P(ExprTest, TestRangeNullable) { seg_promote, N * num_iters, MAX_TIMESTAMP); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(final.size(), N * num_iters); + EXPECT_EQ(view.size(), int(N * num_iters / 2)); for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; @@ -741,6 +778,10 @@ TEST_P(ExprTest, TestRangeNullable) { auto ref = ref_func(val, valid_data); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val << "!!" << valid_data; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val << "!!" << valid_data; + } } } } @@ -817,6 +858,22 @@ TEST_P(ExprTest, TestBinaryRangeJSON) { plannode, seg_promote, N * num_iters, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res(plannode.get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; @@ -828,6 +885,11 @@ TEST_P(ExprTest, TestBinaryRangeJSON) { ASSERT_EQ(ans, ref) << val << testcase.lower_inclusive << testcase.lower << testcase.upper_inclusive << testcase.upper; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << val << testcase.lower_inclusive << testcase.lower + << testcase.upper_inclusive << testcase.upper; + } } else { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .template at(pointer) @@ -836,6 +898,11 @@ TEST_P(ExprTest, TestBinaryRangeJSON) { ASSERT_EQ(ans, ref) << val << testcase.lower_inclusive << testcase.lower << testcase.upper_inclusive << testcase.upper; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << val << testcase.lower_inclusive << testcase.lower + << testcase.upper_inclusive << testcase.upper; + } } } } @@ -920,6 +987,22 @@ TEST_P(ExprTest, TestBinaryRangeJSONNullable) { plannode, seg_promote, N * num_iters, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res(plannode.get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; @@ -931,6 +1014,11 @@ TEST_P(ExprTest, TestBinaryRangeJSONNullable) { ASSERT_EQ(ans, ref) << val << testcase.lower_inclusive << testcase.lower << testcase.upper_inclusive << testcase.upper; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << val << testcase.lower_inclusive << testcase.lower + << testcase.upper_inclusive << testcase.upper; + } } else { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .template at(pointer) @@ -939,6 +1027,11 @@ TEST_P(ExprTest, TestBinaryRangeJSONNullable) { ASSERT_EQ(ans, ref) << val << testcase.lower_inclusive << testcase.lower << testcase.upper_inclusive << testcase.upper; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << val << testcase.lower_inclusive << testcase.lower + << testcase.upper_inclusive << testcase.upper; + } } } } @@ -993,12 +1086,28 @@ TEST_P(ExprTest, TestExistsJson) { plannode, seg_promote, N * num_iters, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res(plannode.get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto val = milvus::Json(simdjson::padded_string(json_col[i])) .exist(pointer); auto ref = check(val); ASSERT_EQ(ans, ref); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], ref); + } } } } @@ -1059,12 +1168,28 @@ TEST_P(ExprTest, TestExistsJsonNullable) { plannode, seg_promote, N * num_iters, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res(plannode.get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto val = milvus::Json(simdjson::padded_string(json_col[i])) .exist(pointer); auto ref = check(val, valid_data_col[i]); ASSERT_EQ(ans, ref); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], ref); + } } } } @@ -1197,6 +1322,22 @@ TEST_P(ExprTest, TestUnaryRangeJson) { plan, seg_promote, N * num_iters, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res(plan.get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (testcase.nested_path[0] == "int") { @@ -1206,6 +1347,9 @@ TEST_P(ExprTest, TestUnaryRangeJson) { .value(); auto ref = f(val); ASSERT_EQ(ans, ref); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref); + } } else { auto val = milvus::Json(simdjson::padded_string(json_col[i])) @@ -1213,6 +1357,9 @@ TEST_P(ExprTest, TestUnaryRangeJson) { .value(); auto ref = f(val); ASSERT_EQ(ans, ref); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref); + } } } } @@ -1260,10 +1407,29 @@ TEST_P(ExprTest, TestUnaryRangeJson) { plan, seg_promote, N * num_iters, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res(plan.get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto ref = check(op); ASSERT_EQ(ans, ref) << "@" << i << "op" << op; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) << "@" << i << "op" << op; + } } } } @@ -1401,6 +1567,22 @@ TEST_P(ExprTest, TestUnaryRangeJsonNullable) { plan, seg_promote, N * num_iters, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res(plan.get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (testcase.nested_path[0] == "int") { @@ -1410,6 +1592,9 @@ TEST_P(ExprTest, TestUnaryRangeJsonNullable) { .value(); auto ref = f(val, valid_data_col[i]); ASSERT_EQ(ans, ref); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref); + } } else { auto val = milvus::Json(simdjson::padded_string(json_col[i])) @@ -1417,6 +1602,9 @@ TEST_P(ExprTest, TestUnaryRangeJsonNullable) { .value(); auto ref = f(val, valid_data_col[i]); ASSERT_EQ(ans, ref); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref); + } } } } @@ -1537,6 +1725,19 @@ TEST_P(ExprTest, TestTermJson) { ExecuteQueryExpr(plan, seg_promote, N * num_iters, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto val = milvus::Json(simdjson::padded_string(json_col[i])) @@ -1544,6 +1745,9 @@ TEST_P(ExprTest, TestTermJson) { .value(); auto ref = check(val); ASSERT_EQ(ans, ref); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref); + } } } } @@ -1616,6 +1820,20 @@ TEST_P(ExprTest, TestTermJsonNullable) { final = ExecuteQueryExpr(plan, seg_promote, N * num_iters, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto val = milvus::Json(simdjson::padded_string(json_col[i])) @@ -1623,6 +1841,9 @@ TEST_P(ExprTest, TestTermJsonNullable) { .value(); auto ref = check(val, valid_data_col[i]); ASSERT_EQ(ans, ref); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref); + } } } } @@ -1713,12 +1934,29 @@ TEST_P(ExprTest, TestTerm) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto val = age_col[i]; auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], ref) << clause << "@" << i << "!!" << val; + } } } } @@ -1846,12 +2084,29 @@ TEST_P(ExprTest, TestTermNullable) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto val = nullable_col[i]; auto ref = ref_func(val, valid_data_col[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], ref) << clause << "@" << i << "!!" << val; + } } } } @@ -1955,11 +2210,32 @@ TEST_P(ExprTest, TestCall) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; ASSERT_EQ(ans, ref_func(address_col[i])) << "@" << i << "!!" << address_col[i]; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref_func(address_col[i])) + << "@" << i << "!!" << address_col[i]; + } } } @@ -2103,6 +2379,23 @@ TEST_P(ExprTest, TestCompare) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; @@ -2111,6 +2404,11 @@ TEST_P(ExprTest, TestCompare) { auto ref = ref_func(val1, val2); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << boost::format("[%1%, %2%]") % val1 % val2; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" + << boost::format("[%1%, %2%]") % val1 % val2; + } } } } @@ -2235,6 +2533,23 @@ TEST_P(ExprTest, TestCompareNullable) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; @@ -2243,6 +2558,11 @@ TEST_P(ExprTest, TestCompareNullable) { auto ref = ref_func(val1, val2, valid_data_col[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << boost::format("[%1%, %2%]") % val1 % val2; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" + << boost::format("[%1%, %2%]") % val1 % val2; + } } } } @@ -2367,6 +2687,23 @@ TEST_P(ExprTest, TestCompareNullable2) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; @@ -2375,6 +2712,11 @@ TEST_P(ExprTest, TestCompareNullable2) { auto ref = ref_func(val1, val2, valid_data_col[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << boost::format("[%1%, %2%]") % val1 % val2; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" + << boost::format("[%1%, %2%]") % val1 % val2; + } } } } @@ -2464,6 +2806,23 @@ TEST_P(ExprTest, TestCompareWithScalarIndex) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N / 2); + for (auto i = 0; i < N; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg.get(), + N, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N / 2); + for (int i = 0; i < N; ++i) { auto ans = final[i]; auto val1 = age32_col[i]; @@ -2471,6 +2830,11 @@ TEST_P(ExprTest, TestCompareWithScalarIndex) { auto ref = ref_func(val1, val2); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << boost::format("[%1%, %2%]") % val1 % val2; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" + << boost::format("[%1%, %2%]") % val1 % val2; + } } } } @@ -2600,6 +2964,23 @@ TEST_P(ExprTest, TestCompareWithScalarIndexNullable) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N / 2); + for (auto i = 0; i < N; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg.get(), + N, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N / 2); + for (int i = 0; i < N; ++i) { auto ans = final[i]; auto val1 = nullable_col[i]; @@ -2607,6 +2988,11 @@ TEST_P(ExprTest, TestCompareWithScalarIndexNullable) { auto ref = ref_func(val1, val2, valid_data_col[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << boost::format("[%1%, %2%]") % val1 % val2; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" + << boost::format("[%1%, %2%]") % val1 % val2; + } } } } @@ -2736,6 +3122,23 @@ TEST_P(ExprTest, TestCompareWithScalarIndexNullable2) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N / 2); + for (auto i = 0; i < N; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg.get(), + N, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N / 2); + for (int i = 0; i < N; ++i) { auto ans = final[i]; auto val2 = nullable_col[i]; @@ -2743,6 +3146,11 @@ TEST_P(ExprTest, TestCompareWithScalarIndexNullable2) { auto ref = ref_func(val1, val2, valid_data_col[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << boost::format("[%1%, %2%]") % val1 % val2; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" + << boost::format("[%1%, %2%]") % val1 % val2; + } } } } @@ -2807,8 +3215,25 @@ TEST_P(ExprTest, test_term_pk_with_sorted) { plan = std::make_shared(DEFAULT_PLANNODE_ID, expr); final = ExecuteQueryExpr(plan, seg.get(), N, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N); + + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N / 2); + for (auto i = 0; i < N; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg.get(), N, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N / 2); + for (int i = 0; i < N; ++i) { EXPECT_EQ(final[i], false); + if (i % 2 == 0) { + EXPECT_EQ(view[int(i / 2)], false); + } } } @@ -3888,9 +4313,26 @@ TEST(Expr, TestExprNOT) { auto start = std::chrono::steady_clock::now(); final = ExecuteQueryExpr(plan, seg.get(), N, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N); + + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N / 2); + for (auto i = 0; i < N; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg.get(), N, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N / 2); + for (int i = 0; i < N; i++) { if (!valid_data[i]) { EXPECT_EQ(final[i], false); + if (i % 2 == 0) { + EXPECT_EQ(view[int(i / 2)], false); + } } } }; @@ -4058,8 +4500,25 @@ TEST_P(ExprTest, test_term_pk) { plan = std::make_shared(DEFAULT_PLANNODE_ID, expr); final = ExecuteQueryExpr(plan, seg.get(), N, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N); + + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N / 2); + for (auto i = 0; i < N; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg.get(), N, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N / 2); + for (int i = 0; i < N; ++i) { EXPECT_EQ(final[i], false); + if (i % 2 == 0) { + EXPECT_EQ(view[int(i / 2)], false); + } } } @@ -4179,8 +4638,25 @@ TEST_P(ExprTest, TestConjuctExpr) { std::make_shared(DEFAULT_PLANNODE_ID, expr); BitsetType final; final = ExecuteQueryExpr(plan, seg.get(), N, MAX_TIMESTAMP); + + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N / 2); + for (auto i = 0; i < N; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg.get(), N, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N / 2); for (int i = 0; i < N; ++i) { EXPECT_EQ(final[i], pair.first < i && i < pair.second) << i; + if (i % 2 == 0) { + EXPECT_EQ(view[int(i / 2)], pair.first < i && i < pair.second) + << i; + } } } } @@ -4250,8 +4726,25 @@ TEST_P(ExprTest, TestConjuctExprNullable) { std::make_shared(DEFAULT_PLANNODE_ID, expr); BitsetType final; final = ExecuteQueryExpr(plan, seg.get(), N, MAX_TIMESTAMP); + + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N / 2); + for (auto i = 0; i < N; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg.get(), N, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N / 2); for (int i = 0; i < N; ++i) { EXPECT_EQ(final[i], pair.first < i && i < pair.second) << i; + if (i % 2 == 0) { + EXPECT_EQ(view[int(i / 2)], pair.first < i && i < pair.second) + << i; + } } } } @@ -4975,6 +5468,23 @@ TEST_P(ExprTest, TestCompareWithScalarIndexMaris) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N / 2); + for (auto i = 0; i < N; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg.get(), + N, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N / 2); + for (int i = 0; i < N; ++i) { auto ans = final[i]; auto val1 = str1_col[i]; @@ -4982,6 +5492,11 @@ TEST_P(ExprTest, TestCompareWithScalarIndexMaris) { auto ref = ref_func(val1, val2); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << boost::format("[%1%, %2%]") % val1 % val2; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" + << boost::format("[%1%, %2%]") % val1 % val2; + } } } } @@ -5106,6 +5621,23 @@ TEST_P(ExprTest, TestCompareWithScalarIndexMarisNullable) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N / 2); + for (auto i = 0; i < N; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg.get(), + N, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N / 2); + for (int i = 0; i < N; ++i) { auto ans = final[i]; auto val1 = str1_col[i]; @@ -5113,6 +5645,11 @@ TEST_P(ExprTest, TestCompareWithScalarIndexMarisNullable) { auto ref = ref_func(val1, val2, valid_data_col[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << boost::format("[%1%, %2%]") % val1 % val2; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" + << boost::format("[%1%, %2%]") % val1 % val2; + } } } } @@ -5237,6 +5774,23 @@ TEST_P(ExprTest, TestCompareWithScalarIndexMarisNullable2) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N / 2); + for (auto i = 0; i < N; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg.get(), + N, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N / 2); + for (int i = 0; i < N; ++i) { auto ans = final[i]; auto val1 = nullable_col[i]; @@ -5244,6 +5798,11 @@ TEST_P(ExprTest, TestCompareWithScalarIndexMarisNullable2) { auto ref = ref_func(val1, val2, valid_data_col[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << boost::format("[%1%, %2%]") % val1 % val2; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" + << boost::format("[%1%, %2%]") % val1 % val2; + } } } } @@ -5949,6 +6508,23 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRange) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (dtype == DataType::INT8) { @@ -5956,26 +6532,50 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRange) { auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val << std::endl; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val << std::endl; + } } else if (dtype == DataType::INT16) { auto val = age16_col[i]; auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::INT32) { auto val = age32_col[i]; auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::INT64) { auto val = age64_col[i]; auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::FLOAT) { auto val = age_float_col[i]; auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::DOUBLE) { auto val = age_double_col[i]; auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else { ASSERT_TRUE(false) << "No test case defined for this data type"; } @@ -6888,6 +7488,23 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeNullable) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (dtype == DataType::INT8) { @@ -6895,26 +7512,50 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeNullable) { auto ref = ref_func(val, age8_valid_col[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val << std::endl; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val << std::endl; + } } else if (dtype == DataType::INT16) { auto val = age16_col[i]; auto ref = ref_func(val, age16_valid_col[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::INT32) { auto val = age32_col[i]; auto ref = ref_func(val, age32_valid_col[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::INT64) { auto val = age64_col[i]; auto ref = ref_func(val, age64_valid_col[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::FLOAT) { auto val = age_float_col[i]; auto ref = ref_func(val, age_float_valid_col[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::DOUBLE) { auto val = age_double_col[i]; auto ref = ref_func(val, age_double_valid_col[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else { ASSERT_TRUE(false) << "No test case defined for this data type"; } @@ -7726,11 +8367,32 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeJSON) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto ref = ref_func(milvus::Json(simdjson::padded_string(json_col[i]))); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << json_col[i]; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << json_col[i]; + } } } } @@ -8650,12 +9312,33 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeJSONNullable) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto ref = ref_func(milvus::Json(simdjson::padded_string(json_col[i])), valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << json_col[i]; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << json_col[i]; + } } } } @@ -8729,6 +9412,16 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeJSONFloat) { ExecuteQueryExpr(plan, seg_promote, N * num_iters, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; @@ -8738,6 +9431,10 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeJSONFloat) { auto ref = check(val); ASSERT_EQ(ans, ref) << testcase.value << " " << val << " " << testcase.op; + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], ref) + << testcase.value << " " << val << " " << testcase.op; + } } } @@ -8772,6 +9469,16 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeJSONFloat) { ExecuteQueryExpr(plan, seg_promote, N * num_iters, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; @@ -8784,6 +9491,10 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeJSONFloat) { } auto ref = check(array_length); ASSERT_EQ(ans, ref) << testcase.value << " " << array_length; + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], ref) + << testcase.value << " " << array_length; + } } } } @@ -8862,6 +9573,16 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeJSONFloatNullable) { ExecuteQueryExpr(plan, seg_promote, N * num_iters, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; @@ -8871,6 +9592,10 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeJSONFloatNullable) { auto ref = check(val, valid_data[i]); ASSERT_EQ(ans, ref) << testcase.value << " " << val << " " << testcase.op; + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], ref) + << testcase.value << " " << val << " " << testcase.op; + } } } @@ -8908,6 +9633,16 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeJSONFloatNullable) { ExecuteQueryExpr(plan, seg_promote, N * num_iters, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; @@ -8920,6 +9655,10 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeJSONFloatNullable) { } auto ref = check(array_length, valid_data[i]); ASSERT_EQ(ans, ref) << testcase.value << " " << array_length; + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], ref) + << testcase.value << " " << array_length; + } } } } @@ -9410,32 +10149,73 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeWithScalarSortIndex) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N / 2); + for (auto i = 0; i < N; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N / 2); + for (int i = 0; i < N; ++i) { auto ans = final[i]; if (dtype == DataType::INT8) { auto val = age8_col[i]; auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::INT16) { auto val = age16_col[i]; auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::INT32) { auto val = age32_col[i]; auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::INT64) { auto val = age64_col[i]; auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::FLOAT) { auto val = age_float_col[i]; auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::DOUBLE) { auto val = age_double_col[i]; auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else { ASSERT_TRUE(false) << "No test case defined for this data type"; } @@ -10114,32 +10894,70 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeWithScalarSortIndexNullable) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + for (auto i = 0; i < std::min(N, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N, 10)); + for (int i = 0; i < N; ++i) { auto ans = final[i]; if (dtype == DataType::INT8) { auto val = age8_col[i]; auto ref = ref_func(val, i8_valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i < std::min(N, 10)) { + ASSERT_EQ(view[i], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::INT16) { auto val = age16_col[i]; auto ref = ref_func(val, i16_valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i < std::min(N, 10)) { + ASSERT_EQ(view[i], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::INT32) { auto val = age32_col[i]; auto ref = ref_func(val, i32_valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i < std::min(N, 10)) { + ASSERT_EQ(view[i], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::INT64) { auto val = age64_col[i]; auto ref = ref_func(val, i64_valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i < std::min(N, 10)) { + ASSERT_EQ(view[i], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::FLOAT) { auto val = age_float_col[i]; auto ref = ref_func(val, float_valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i < std::min(N, 10)) { + ASSERT_EQ(view[i], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::DOUBLE) { auto val = age_double_col[i]; auto ref = ref_func(val, double_valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i < std::min(N, 10)) { + ASSERT_EQ(view[i], ref) + << clause << "@" << i << "!!" << val; + } } else { ASSERT_TRUE(false) << "No test case defined for this data type"; } @@ -10314,6 +11132,20 @@ TEST_P(ExprTest, TestUnaryRangeWithJSON) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (dtype == DataType::BOOL) { @@ -10322,24 +11154,40 @@ TEST_P(ExprTest, TestUnaryRangeWithJSON) { .value(); auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::INT64) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .template at("/int") .value(); auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::DOUBLE) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .template at("/double") .value(); auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::STRING) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .template at("/string") .value(); auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], ref) + << clause << "@" << i << "!!" << val; + } } else { ASSERT_TRUE(false) << "No test case defined for this data type"; } @@ -10544,6 +11392,20 @@ TEST_P(ExprTest, TestUnaryRangeWithJSONNullable) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (dtype == DataType::BOOL) { @@ -10552,24 +11414,40 @@ TEST_P(ExprTest, TestUnaryRangeWithJSONNullable) { .value(); auto ref = ref_func(val, valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::INT64) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .template at("/int") .value(); auto ref = ref_func(val, valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::DOUBLE) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .template at("/double") .value(); auto ref = ref_func(val, valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::STRING) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .template at("/string") .value(); auto ref = ref_func(val, valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], ref) + << clause << "@" << i << "!!" << val; + } } else { ASSERT_TRUE(false) << "No test case defined for this data type"; } @@ -10722,6 +11600,23 @@ TEST_P(ExprTest, TestTermWithJSON) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (dtype == DataType::BOOL) { @@ -10730,24 +11625,40 @@ TEST_P(ExprTest, TestTermWithJSON) { .value(); auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::INT64) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .template at("/int") .value(); auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::DOUBLE) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .template at("/double") .value(); auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::STRING) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .template at("/string") .value(); auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else { ASSERT_TRUE(false) << "No test case defined for this data type"; } @@ -10922,6 +11833,23 @@ TEST_P(ExprTest, TestTermWithJSONNullable) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (dtype == DataType::BOOL) { @@ -10930,24 +11858,40 @@ TEST_P(ExprTest, TestTermWithJSONNullable) { .value(); auto ref = ref_func(val, valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::INT64) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .template at("/int") .value(); auto ref = ref_func(val, valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::DOUBLE) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .template at("/double") .value(); auto ref = ref_func(val, valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::STRING) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .template at("/string") .value(); auto ref = ref_func(val, valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else { ASSERT_TRUE(false) << "No test case defined for this data type"; } @@ -11074,6 +12018,23 @@ TEST_P(ExprTest, TestExistsWithJSON) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (dtype == DataType::BOOL) { @@ -11081,26 +12042,46 @@ TEST_P(ExprTest, TestExistsWithJSON) { .exist("/bool"); auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::INT64) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .exist("/int"); auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::DOUBLE) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .exist("/double"); auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::STRING) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .exist("/string"); auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::VARCHAR) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .exist("/varchar"); auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else { ASSERT_TRUE(false) << "No test case defined for this data type"; } @@ -11265,6 +12246,23 @@ TEST_P(ExprTest, TestExistsWithJSONNullable) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (dtype == DataType::BOOL) { @@ -11272,26 +12270,46 @@ TEST_P(ExprTest, TestExistsWithJSONNullable) { .exist("/bool"); auto ref = ref_func(val, valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::INT64) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .exist("/int"); auto ref = ref_func(val, valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::DOUBLE) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .exist("/double"); auto ref = ref_func(val, valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::STRING) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .exist("/string"); auto ref = ref_func(val, valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::VARCHAR) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .exist("/varchar"); auto ref = ref_func(val, valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else { ASSERT_TRUE(false) << "No test case defined for this data type"; } @@ -11359,13 +12377,21 @@ TEST_P(ExprTest, TestTermInFieldJson) { auto start = std::chrono::steady_clock::now(); final = ExecuteQueryExpr(plan, seg_promote, N * num_iters, MAX_TIMESTAMP); - // std::cout << "cost" - // << std::chrono::duration_cast( - // std::chrono::steady_clock::now() - start) - // .count() - // << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -11375,6 +12401,9 @@ TEST_P(ExprTest, TestTermInFieldJson) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } @@ -11415,6 +12444,19 @@ TEST_P(ExprTest, TestTermInFieldJson) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -11424,6 +12466,9 @@ TEST_P(ExprTest, TestTermInFieldJson) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } @@ -11464,6 +12509,19 @@ TEST_P(ExprTest, TestTermInFieldJson) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -11473,6 +12531,9 @@ TEST_P(ExprTest, TestTermInFieldJson) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } @@ -11513,6 +12574,19 @@ TEST_P(ExprTest, TestTermInFieldJson) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -11522,6 +12596,9 @@ TEST_P(ExprTest, TestTermInFieldJson) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } } @@ -11591,6 +12668,19 @@ TEST_P(ExprTest, TestTermInFieldJsonNullable) { // << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -11600,6 +12690,9 @@ TEST_P(ExprTest, TestTermInFieldJsonNullable) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res, valid_data[i])); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res, valid_data[i])); + } } } @@ -11643,6 +12736,19 @@ TEST_P(ExprTest, TestTermInFieldJsonNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -11652,6 +12758,9 @@ TEST_P(ExprTest, TestTermInFieldJsonNullable) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res, valid_data[i])); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res, valid_data[i])); + } } } @@ -11695,6 +12804,19 @@ TEST_P(ExprTest, TestTermInFieldJsonNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -11704,6 +12826,9 @@ TEST_P(ExprTest, TestTermInFieldJsonNullable) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res, valid_data[i])); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res, valid_data[i])); + } } } @@ -11748,6 +12873,19 @@ TEST_P(ExprTest, TestTermInFieldJsonNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -11757,6 +12895,9 @@ TEST_P(ExprTest, TestTermInFieldJsonNullable) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res, valid_data[i])); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res, valid_data[i])); + } } } } @@ -11964,6 +13105,19 @@ TEST_P(ExprTest, TestJsonContainsAny) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -11973,6 +13127,9 @@ TEST_P(ExprTest, TestJsonContainsAny) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } @@ -12014,6 +13171,19 @@ TEST_P(ExprTest, TestJsonContainsAny) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -12023,6 +13193,9 @@ TEST_P(ExprTest, TestJsonContainsAny) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } @@ -12064,6 +13237,19 @@ TEST_P(ExprTest, TestJsonContainsAny) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -12073,6 +13259,9 @@ TEST_P(ExprTest, TestJsonContainsAny) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } @@ -12114,6 +13303,19 @@ TEST_P(ExprTest, TestJsonContainsAny) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -12123,6 +13325,9 @@ TEST_P(ExprTest, TestJsonContainsAny) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } } @@ -12193,6 +13398,19 @@ TEST_P(ExprTest, TestJsonContainsAnyNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -12202,6 +13420,9 @@ TEST_P(ExprTest, TestJsonContainsAnyNullable) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res, valid_data[i])); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res, valid_data[i])); + } } } @@ -12246,6 +13467,19 @@ TEST_P(ExprTest, TestJsonContainsAnyNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -12255,6 +13489,9 @@ TEST_P(ExprTest, TestJsonContainsAnyNullable) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res, valid_data[i])); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res, valid_data[i])); + } } } @@ -12299,6 +13536,19 @@ TEST_P(ExprTest, TestJsonContainsAnyNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -12308,6 +13558,9 @@ TEST_P(ExprTest, TestJsonContainsAnyNullable) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res, valid_data[i])); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res, valid_data[i])); + } } } @@ -12353,6 +13606,19 @@ TEST_P(ExprTest, TestJsonContainsAnyNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -12362,6 +13628,9 @@ TEST_P(ExprTest, TestJsonContainsAnyNullable) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res, valid_data[i])); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res, valid_data[i])); + } } } } @@ -12431,6 +13700,19 @@ TEST_P(ExprTest, TestJsonContainsAll) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -12440,6 +13722,9 @@ TEST_P(ExprTest, TestJsonContainsAll) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } @@ -12489,6 +13774,19 @@ TEST_P(ExprTest, TestJsonContainsAll) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -12498,6 +13796,9 @@ TEST_P(ExprTest, TestJsonContainsAll) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } @@ -12546,6 +13847,19 @@ TEST_P(ExprTest, TestJsonContainsAll) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -12555,6 +13869,9 @@ TEST_P(ExprTest, TestJsonContainsAll) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } @@ -12601,6 +13918,19 @@ TEST_P(ExprTest, TestJsonContainsAll) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -12610,6 +13940,9 @@ TEST_P(ExprTest, TestJsonContainsAll) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } } @@ -12684,6 +14017,19 @@ TEST_P(ExprTest, TestJsonContainsAllNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -12693,6 +14039,9 @@ TEST_P(ExprTest, TestJsonContainsAllNullable) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res, valid_data[i])); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res, valid_data[i])); + } } } @@ -12744,6 +14093,19 @@ TEST_P(ExprTest, TestJsonContainsAllNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -12753,6 +14115,9 @@ TEST_P(ExprTest, TestJsonContainsAllNullable) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res, valid_data[i])); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res, valid_data[i])); + } } } @@ -12804,6 +14169,19 @@ TEST_P(ExprTest, TestJsonContainsAllNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -12813,6 +14191,9 @@ TEST_P(ExprTest, TestJsonContainsAllNullable) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res, valid_data[i])); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res, valid_data[i])); + } } } @@ -12863,6 +14244,19 @@ TEST_P(ExprTest, TestJsonContainsAllNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -12872,6 +14266,9 @@ TEST_P(ExprTest, TestJsonContainsAllNullable) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res, valid_data[i])); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res, valid_data[i])); + } } } } @@ -12970,10 +14367,26 @@ TEST_P(ExprTest, TestJsonContainsArray) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; std::vector res; ASSERT_EQ(ans, check(res, i)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res, i)); + } } } @@ -13004,10 +14417,23 @@ TEST_P(ExprTest, TestJsonContainsArray) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; std::vector res; ASSERT_EQ(ans, check(res, i)); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], check(res, i)); + } } } @@ -13057,9 +14483,22 @@ TEST_P(ExprTest, TestJsonContainsArray) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; ASSERT_EQ(ans, check()); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], check()); + } } } @@ -13087,10 +14526,23 @@ TEST_P(ExprTest, TestJsonContainsArray) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; std::vector res; ASSERT_EQ(ans, check(res, i)); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], check(res, i)); + } } } @@ -13142,10 +14594,23 @@ TEST_P(ExprTest, TestJsonContainsArray) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; std::vector res; ASSERT_EQ(ans, check(res, i)); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], check(res, i)); + } } } @@ -13173,10 +14638,23 @@ TEST_P(ExprTest, TestJsonContainsArray) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; std::vector res; ASSERT_EQ(ans, check(res, i)); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], check(res, i)); + } } } } @@ -13279,10 +14757,23 @@ TEST_P(ExprTest, TestJsonContainsArrayNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; std::vector res; ASSERT_EQ(ans, check(res, i, valid_data[i])); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], check(res, i, valid_data[i])); + } } } @@ -13316,10 +14807,23 @@ TEST_P(ExprTest, TestJsonContainsArrayNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; std::vector res; ASSERT_EQ(ans, check(res, i, valid_data[i])); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], check(res, i, valid_data[i])); + } } } @@ -13374,9 +14878,22 @@ TEST_P(ExprTest, TestJsonContainsArrayNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; ASSERT_EQ(ans, check(valid_data[i])); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], check(valid_data[i])); + } } } @@ -13407,10 +14924,23 @@ TEST_P(ExprTest, TestJsonContainsArrayNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; std::vector res; ASSERT_EQ(ans, check(res, i, valid_data[i])); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], check(res, i, valid_data[i])); + } } } @@ -13462,10 +14992,23 @@ TEST_P(ExprTest, TestJsonContainsArrayNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; std::vector res; ASSERT_EQ(ans, check(res, i)); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], check(res, i)); + } } } @@ -13493,10 +15036,23 @@ TEST_P(ExprTest, TestJsonContainsArrayNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; std::vector res; ASSERT_EQ(ans, check(res, i)); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], check(res, i)); + } } } } @@ -13597,9 +15153,22 @@ TEST_P(ExprTest, TestJsonContainsDiffTypeArray) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; ASSERT_EQ(ans, check()); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], check()); + } } } @@ -13625,9 +15194,22 @@ TEST_P(ExprTest, TestJsonContainsDiffTypeArray) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; ASSERT_EQ(ans, check()); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], check()); + } } } } @@ -13708,9 +15290,22 @@ TEST_P(ExprTest, TestJsonContainsDiffTypeArrayNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; ASSERT_EQ(ans, check(valid_data[i])); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], check(valid_data[i])); + } } } @@ -13736,9 +15331,22 @@ TEST_P(ExprTest, TestJsonContainsDiffTypeArrayNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; ASSERT_EQ(ans, check()); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], check()); + } } } } @@ -13817,9 +15425,22 @@ TEST_P(ExprTest, TestJsonContainsDiffType) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; ASSERT_EQ(ans, testcase.res); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], testcase.res); + } } } @@ -13844,9 +15465,22 @@ TEST_P(ExprTest, TestJsonContainsDiffType) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; ASSERT_EQ(ans, testcase.res); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], testcase.res); + } } } } @@ -13927,12 +15561,28 @@ TEST_P(ExprTest, TestJsonContainsDiffTypeNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (!valid_data[i]) { ASSERT_EQ(ans, false); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], false); + } } else { ASSERT_EQ(ans, testcase.res); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], testcase.res); + } } } } @@ -13958,12 +15608,28 @@ TEST_P(ExprTest, TestJsonContainsDiffTypeNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (!valid_data[i]) { ASSERT_EQ(ans, false); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], false); + } } else { ASSERT_EQ(ans, testcase.res); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], testcase.res); + } } } } diff --git a/internal/core/unittest/test_iterative_filter.cpp b/internal/core/unittest/test_iterative_filter.cpp new file mode 100644 index 0000000000000..17f4265a25ab0 --- /dev/null +++ b/internal/core/unittest/test_iterative_filter.cpp @@ -0,0 +1,589 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + +#include +#include "common/Schema.h" +#include "query/Plan.h" +#include "segcore/SegmentSealedImpl.h" +#include "segcore/reduce_c.h" +#include "segcore/plan_c.h" +#include "segcore/segment_c.h" +#include "test_utils/DataGen.h" +#include "test_utils/c_api_test_utils.h" + +using namespace milvus; +using namespace milvus::query; +using namespace milvus::segcore; +using namespace milvus::storage; +using namespace milvus::tracer; + +/** + * this UT is to cover Iterative filtering execution logic (knowhere iterator next() -> scalar filtering) + * so we will not cover all expr type here, just some examples + */ + +void +prepareSegmentFieldData(const std::unique_ptr& segment, + size_t row_count, + GeneratedData& data_set) { + auto field_data = + std::make_shared>(DataType::INT64, false); + field_data->FillFieldData(data_set.row_ids_.data(), row_count); + auto field_data_info = + FieldDataInfo{RowFieldID.get(), + row_count, + std::vector{field_data}}; + segment->LoadFieldData(RowFieldID, field_data_info); + + field_data = + std::make_shared>(DataType::INT64, false); + field_data->FillFieldData(data_set.timestamps_.data(), row_count); + field_data_info = + FieldDataInfo{TimestampFieldID.get(), + row_count, + std::vector{field_data}}; + segment->LoadFieldData(TimestampFieldID, field_data_info); +} + +void +CheckFilterSearchResult(const SearchResult& search_result_by_iterative_filter, + const SearchResult& search_result_by_pre_filter, + int topK, + int nq) { + ASSERT_EQ(search_result_by_pre_filter.seg_offsets_.size(), topK * nq); + ASSERT_EQ(search_result_by_pre_filter.distances_.size(), topK * nq); + ASSERT_EQ(search_result_by_iterative_filter.seg_offsets_.size(), topK * nq); + ASSERT_EQ(search_result_by_iterative_filter.distances_.size(), topK * nq); + + for (int i = 0; i < topK * nq; ++i) { + std::cout << search_result_by_pre_filter.seg_offsets_[i] << " " + << search_result_by_pre_filter.distances_[i] << " " + << search_result_by_iterative_filter.seg_offsets_[i] << " " + << search_result_by_iterative_filter.distances_[i] + << std::endl; + ASSERT_EQ(search_result_by_pre_filter.seg_offsets_[i], + search_result_by_iterative_filter.seg_offsets_[i]); + } +} + +TEST(IterativeFilter, SealedIndex) { + using namespace milvus; + using namespace milvus::query; + using namespace milvus::segcore; + + //0. prepare schema + int dim = 64; + auto schema = std::make_shared(); + auto vec_fid = schema->AddDebugField( + "fakevec", DataType::VECTOR_FLOAT, dim, knowhere::metric::L2); + auto int8_fid = schema->AddDebugField("int8", DataType::INT8); + auto int16_fid = schema->AddDebugField("int16", DataType::INT16); + auto int32_fid = schema->AddDebugField("int32", DataType::INT32); + auto int64_fid = schema->AddDebugField("int64", DataType::INT64); + auto str_fid = schema->AddDebugField("string1", DataType::VARCHAR); + auto bool_fid = schema->AddDebugField("bool", DataType::BOOL); + schema->set_primary_field_id(str_fid); + auto segment = CreateSealedSegment(schema); + size_t N = 50; + + //2. load raw data + auto raw_data = DataGen(schema, N, 42, 0, 8, 10, false, false); + auto fields = schema->get_fields(); + for (auto field_data : raw_data.raw_->fields_data()) { + int64_t field_id = field_data.field_id(); + + auto info = FieldDataInfo(field_data.field_id(), N); + auto field_meta = fields.at(FieldId(field_id)); + info.channel->push( + CreateFieldDataFromDataArray(N, &field_data, field_meta)); + info.channel->close(); + + segment->LoadFieldData(FieldId(field_id), info); + } + prepareSegmentFieldData(segment, N, raw_data); + + //3. load index + auto vector_data = raw_data.get_col(vec_fid); + auto indexing = GenVecIndexing( + N, dim, vector_data.data(), knowhere::IndexEnum::INDEX_HNSW); + LoadIndexInfo load_index_info; + load_index_info.field_id = vec_fid.get(); + load_index_info.index = std::move(indexing); + load_index_info.index_params["metric_type"] = knowhere::metric::L2; + segment->LoadIndex(load_index_info); + int topK = 10; + int group_size = 3; + + // int8 binaryRange + { + const char* raw_plan = R"(vector_anns: < + field_id: 100 + predicates: < + binary_range_expr: < + column_info: < + field_id: 101 + data_type: Int8 + > + lower_inclusive: true, + upper_inclusive: false, + lower_value: < + int64_val: -1 + > + upper_value: < + int64_val: 100 + > + > + > + query_info: < + topk: 10 + metric_type: "L2" + search_params: "{\"ef\": 50, \"iterative_filter\": true}" + > + placeholder_tag: "$0">)"; + proto::plan::PlanNode plan_node; + auto ok = + google::protobuf::TextFormat::ParseFromString(raw_plan, &plan_node); + auto plan = CreateSearchPlanFromPlanNode(*schema, plan_node); + auto num_queries = 1; + auto seed = 1024; + auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, seed); + auto ph_group = + ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); + auto search_result = + segment->Search(plan.get(), ph_group.get(), 1L << 63); + + const char* raw_plan2 = R"(vector_anns: < + field_id: 100 + predicates: < + binary_range_expr: < + column_info: < + field_id: 101 + data_type: Int8 + > + lower_inclusive: true, + upper_inclusive: false, + lower_value: < + int64_val: -1 + > + upper_value: < + int64_val: 100 + > + > + > + query_info: < + topk: 10 + metric_type: "L2" + search_params: "{\"ef\": 50}" + > + placeholder_tag: "$0">)"; + proto::plan::PlanNode plan_node2; + auto ok2 = google::protobuf::TextFormat::ParseFromString(raw_plan2, + &plan_node2); + auto plan2 = CreateSearchPlanFromPlanNode(*schema, plan_node2); + auto search_result2 = + segment->Search(plan2.get(), ph_group.get(), 1L << 63); + CheckFilterSearchResult( + *search_result, *search_result2, topK, num_queries); + } + + // int16 Termexpr + { + const char* raw_plan = R"(vector_anns: < + field_id: 100 + predicates: < + term_expr: < + column_info: < + field_id: 102 + data_type: Int16 + > + values: values: + > + > + query_info: < + topk: 10 + metric_type: "L2" + search_params: "{\"ef\": 50, \"iterative_filter\": true}" + > + placeholder_tag: "$0">)"; + proto::plan::PlanNode plan_node; + auto ok = + google::protobuf::TextFormat::ParseFromString(raw_plan, &plan_node); + auto plan = CreateSearchPlanFromPlanNode(*schema, plan_node); + auto num_queries = 1; + auto seed = 1024; + auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, seed); + auto ph_group = + ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); + auto search_result = + segment->Search(plan.get(), ph_group.get(), 1L << 63); + + const char* raw_plan2 = R"(vector_anns: < + field_id: 100 + predicates: < + term_expr: < + column_info: < + field_id: 102 + data_type: Int16 + > + values: values: + > + > + query_info: < + topk: 10 + metric_type: "L2" + search_params: "{\"ef\": 50}" + > + placeholder_tag: "$0">)"; + proto::plan::PlanNode plan_node2; + auto ok2 = google::protobuf::TextFormat::ParseFromString(raw_plan2, + &plan_node2); + auto plan2 = CreateSearchPlanFromPlanNode(*schema, plan_node2); + auto search_result2 = + segment->Search(plan2.get(), ph_group.get(), 1L << 63); + CheckFilterSearchResult( + *search_result, *search_result2, topK, num_queries); + } +} + +TEST(IterativeFilter, SealedData) { + using namespace milvus; + using namespace milvus::query; + using namespace milvus::segcore; + + //0. prepare schema + int dim = 64; + auto schema = std::make_shared(); + auto vec_fid = schema->AddDebugField( + "fakevec", DataType::VECTOR_FLOAT, dim, knowhere::metric::L2); + auto int8_fid = schema->AddDebugField("int8", DataType::INT8); + auto int16_fid = schema->AddDebugField("int16", DataType::INT16); + auto int32_fid = schema->AddDebugField("int32", DataType::INT32); + auto int64_fid = schema->AddDebugField("int64", DataType::INT64); + auto str_fid = schema->AddDebugField("string1", DataType::VARCHAR); + auto bool_fid = schema->AddDebugField("bool", DataType::BOOL); + schema->set_primary_field_id(str_fid); + auto segment = CreateSealedSegment(schema); + size_t N = 100; + + //2. load raw data + auto raw_data = DataGen(schema, N, 42, 0, 8, 10, false, false); + auto fields = schema->get_fields(); + for (auto field_data : raw_data.raw_->fields_data()) { + int64_t field_id = field_data.field_id(); + + auto info = FieldDataInfo(field_data.field_id(), N); + auto field_meta = fields.at(FieldId(field_id)); + info.channel->push( + CreateFieldDataFromDataArray(N, &field_data, field_meta)); + info.channel->close(); + + segment->LoadFieldData(FieldId(field_id), info); + } + prepareSegmentFieldData(segment, N, raw_data); + + int topK = 10; + // int8 binaryRange + { + const char* raw_plan = R"(vector_anns: < + field_id: 100 + predicates: < + binary_range_expr: < + column_info: < + field_id: 101 + data_type: Int8 + > + lower_inclusive: true, + upper_inclusive: false, + lower_value: < + int64_val: -1 + > + upper_value: < + int64_val: 100 + > + > + > + query_info: < + topk: 10 + metric_type: "L2" + search_params: "{\"ef\": 50, \"iterative_filter\": true}" + > + placeholder_tag: "$0">)"; + proto::plan::PlanNode plan_node; + auto ok = + google::protobuf::TextFormat::ParseFromString(raw_plan, &plan_node); + auto plan = CreateSearchPlanFromPlanNode(*schema, plan_node); + auto num_queries = 1; + auto seed = 1024; + auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, seed); + auto ph_group = + ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); + auto search_result = + segment->Search(plan.get(), ph_group.get(), 1L << 63); + + const char* raw_plan2 = R"(vector_anns: < + field_id: 100 + predicates: < + binary_range_expr: < + column_info: < + field_id: 101 + data_type: Int8 + > + lower_inclusive: true, + upper_inclusive: false, + lower_value: < + int64_val: -1 + > + upper_value: < + int64_val: 100 + > + > + > + query_info: < + topk: 10 + metric_type: "L2" + search_params: "{\"ef\": 50}" + > + placeholder_tag: "$0">)"; + proto::plan::PlanNode plan_node2; + auto ok2 = google::protobuf::TextFormat::ParseFromString(raw_plan2, + &plan_node2); + auto plan2 = CreateSearchPlanFromPlanNode(*schema, plan_node2); + auto search_result2 = + segment->Search(plan2.get(), ph_group.get(), 1L << 63); + CheckFilterSearchResult( + *search_result, *search_result2, topK, num_queries); + } +} + +TEST(IterativeFilter, GrowingRawData) { + int dim = 128; + uint64_t seed = 512; + auto schema = std::make_shared(); + auto metric_type = knowhere::metric::L2; + auto int64_field_id = schema->AddDebugField("int64", DataType::INT64); + auto int32_field_id = schema->AddDebugField("int32", DataType::INT32); + auto vec_field_id = schema->AddDebugField( + "embeddings", DataType::VECTOR_FLOAT, 128, metric_type); + schema->set_primary_field_id(int64_field_id); + + auto config = SegcoreConfig::default_config(); + config.set_chunk_rows(8); + config.set_enable_interim_segment_index( + false); //no growing index, test brute force + auto segment_growing = CreateGrowingSegment(schema, nullptr, 1, config); + auto segment_growing_impl = + dynamic_cast(segment_growing.get()); + + int64_t rows_per_batch = 30; + int n_batch = 1; + for (int i = 0; i < n_batch; i++) { + auto data_set = + DataGen(schema, rows_per_batch, 42, 0, 8, 10, false, false); + auto offset = segment_growing_impl->PreInsert(rows_per_batch); + segment_growing_impl->Insert(offset, + rows_per_batch, + data_set.row_ids_.data(), + data_set.timestamps_.data(), + data_set.raw_); + } + + auto topK = 10; + // int8 binaryRange + { + const char* raw_plan = R"(vector_anns: < + field_id: 102 + predicates: < + binary_range_expr: < + column_info: < + field_id: 100 + data_type: Int64 + > + lower_inclusive: true, + upper_inclusive: false, + lower_value: < + int64_val: -1 + > + upper_value: < + int64_val: 1 + > + > + > + query_info: < + topk: 10 + metric_type: "L2" + search_params: "{\"ef\": 50, \"iterative_filter\": true}" + > + placeholder_tag: "$0">)"; + proto::plan::PlanNode plan_node; + auto ok = + google::protobuf::TextFormat::ParseFromString(raw_plan, &plan_node); + auto plan = CreateSearchPlanFromPlanNode(*schema, plan_node); + auto num_queries = 1; + auto seed = 1024; + auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, seed); + auto ph_group = + ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); + auto search_result = + segment_growing_impl->Search(plan.get(), ph_group.get(), 1L << 63); + + const char* raw_plan2 = R"(vector_anns: < + field_id: 102 + predicates: < + binary_range_expr: < + column_info: < + field_id: 100 + data_type: Int64 + > + lower_inclusive: true, + upper_inclusive: false, + lower_value: < + int64_val: -1 + > + upper_value: < + int64_val: 1 + > + > + > + query_info: < + topk: 10 + metric_type: "L2" + search_params: "{\"ef\": 50}" + > + placeholder_tag: "$0">)"; + proto::plan::PlanNode plan_node2; + auto ok2 = google::protobuf::TextFormat::ParseFromString(raw_plan2, + &plan_node2); + auto plan2 = CreateSearchPlanFromPlanNode(*schema, plan_node2); + auto search_result2 = + segment_growing_impl->Search(plan2.get(), ph_group.get(), 1L << 63); + CheckFilterSearchResult( + *search_result, *search_result2, topK, num_queries); + } +} + +TEST(IterativeFilter, GrowingIndex) { + int dim = 128; + uint64_t seed = 512; + auto schema = std::make_shared(); + auto metric_type = knowhere::metric::L2; + auto int64_field_id = schema->AddDebugField("int64", DataType::INT64); + auto int32_field_id = schema->AddDebugField("int32", DataType::INT32); + auto vec_field_id = schema->AddDebugField( + "embeddings", DataType::VECTOR_FLOAT, 128, metric_type); + schema->set_primary_field_id(int64_field_id); + + std::map index_params = { + {"index_type", "IVF_FLAT"}, + {"metric_type", metric_type}, + {"nlist", "4"}}; + std::map type_params = {{"dim", "128"}}; + FieldIndexMeta fieldIndexMeta( + vec_field_id, std::move(index_params), std::move(type_params)); + std::map fieldMap = { + {vec_field_id, fieldIndexMeta}}; + IndexMetaPtr metaPtr = + std::make_shared(10000, std::move(fieldMap)); + + auto config = SegcoreConfig::default_config(); + config.set_chunk_rows(16); + config.set_enable_interim_segment_index(true); // test growing inter index + config.set_nlist(4); + config.set_nlist(4); + auto segment_growing = CreateGrowingSegment(schema, metaPtr, 1, config); + auto segment_growing_impl = + dynamic_cast(segment_growing.get()); + + //1. prepare raw data in growing segment + int64_t rows_per_batch = 100; + int n_batch = 1; + for (int i = 0; i < n_batch; i++) { + auto data_set = + DataGen(schema, rows_per_batch, 42, 0, 8, 10, false, false); + auto offset = segment_growing_impl->PreInsert(rows_per_batch); + segment_growing_impl->Insert(offset, + rows_per_batch, + data_set.row_ids_.data(), + data_set.timestamps_.data(), + data_set.raw_); + } + + auto topK = 10; + { + const char* raw_plan = R"(vector_anns: < + field_id: 102 + predicates: < + binary_range_expr: < + column_info: < + field_id: 100 + data_type: Int64 + > + lower_inclusive: true, + upper_inclusive: false, + lower_value: < + int64_val: -1 + > + upper_value: < + int64_val: 1 + > + > + > + query_info: < + topk: 10 + metric_type: "L2" + search_params: "{\"nprobe\": 4, \"iterative_filter\": true}" + > + placeholder_tag: "$0">)"; + proto::plan::PlanNode plan_node; + auto ok = + google::protobuf::TextFormat::ParseFromString(raw_plan, &plan_node); + auto plan = CreateSearchPlanFromPlanNode(*schema, plan_node); + auto num_queries = 1; + auto seed = 1024; + auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, seed); + auto ph_group = + ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); + auto search_result = + segment_growing_impl->Search(plan.get(), ph_group.get(), 1L << 63); + + const char* raw_plan2 = R"(vector_anns: < + field_id: 102 + predicates: < + binary_range_expr: < + column_info: < + field_id: 100 + data_type: Int64 + > + lower_inclusive: true, + upper_inclusive: false, + lower_value: < + int64_val: -1 + > + upper_value: < + int64_val: 1 + > + > + > + query_info: < + topk: 10 + metric_type: "L2" + search_params: "{\"nprobe\": 4}" + > + placeholder_tag: "$0">)"; + proto::plan::PlanNode plan_node2; + auto ok2 = google::protobuf::TextFormat::ParseFromString(raw_plan2, + &plan_node2); + auto plan2 = CreateSearchPlanFromPlanNode(*schema, plan_node2); + auto search_result2 = + segment_growing_impl->Search(plan2.get(), ph_group.get(), 1L << 63); + CheckFilterSearchResult( + *search_result, *search_result2, topK, num_queries); + } +} \ No newline at end of file diff --git a/internal/core/unittest/test_string_expr.cpp b/internal/core/unittest/test_string_expr.cpp index a8c690e19d629..98e3fcb539ee9 100644 --- a/internal/core/unittest/test_string_expr.cpp +++ b/internal/core/unittest/test_string_expr.cpp @@ -290,12 +290,32 @@ TEST(StringExpr, Term) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto val = str_col[i]; auto ref = std::find(term.begin(), term.end(), val) != term.end(); ASSERT_EQ(ans, ref) << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) << "@" << i << "!!" << val; + } } } } @@ -363,6 +383,23 @@ TEST(StringExpr, TermNullable) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (!valid_data[i]) { @@ -372,6 +409,9 @@ TEST(StringExpr, TermNullable) { auto val = str_col[i]; auto ref = std::find(term.begin(), term.end(), val) != term.end(); ASSERT_EQ(ans, ref) << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) << "@" << i << "!!" << val; + } } } } @@ -481,6 +521,23 @@ TEST(StringExpr, Compare) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; @@ -488,6 +545,10 @@ TEST(StringExpr, Compare) { auto another_val = another_str_col[i]; auto ref = ref_func(val, another_val); ASSERT_EQ(ans, ref) << "@" << op << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << "@" << op << "@" << i << "!!" << val; + } } } } @@ -609,6 +670,23 @@ TEST(StringExpr, CompareNullable) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (!valid_data[i]) { @@ -619,6 +697,10 @@ TEST(StringExpr, CompareNullable) { auto another_val = another_str_col[i]; auto ref = ref_func(val, another_val); ASSERT_EQ(ans, ref) << "@" << op << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << "@" << op << "@" << i << "!!" << val; + } } } } @@ -741,16 +823,40 @@ TEST(StringExpr, CompareNullable2) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (!valid_data[i]) { ASSERT_EQ(ans, false); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], false); + } continue; } auto val = str_col[i]; auto another_val = another_str_col[i]; auto ref = ref_func(val, another_val); ASSERT_EQ(ans, ref) << "@" << op << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << "@" << op << "@" << i << "!!" << val; + } } } } @@ -840,6 +946,23 @@ TEST(StringExpr, UnaryRange) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; @@ -847,6 +970,10 @@ TEST(StringExpr, UnaryRange) { auto ref = ref_func(val); ASSERT_EQ(ans, ref) << "@" << op << "@" << value << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << "@" << op << "@" << value << "@" << i << "!!" << val; + } } } } @@ -947,6 +1074,23 @@ TEST(StringExpr, UnaryRangeNullable) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (!valid_data[i]) { @@ -957,6 +1101,10 @@ TEST(StringExpr, UnaryRangeNullable) { auto ref = ref_func(val); ASSERT_EQ(ans, ref) << "@" << op << "@" << value << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << "@" << op << "@" << value << "@" << i << "!!" << val; + } } } } @@ -1064,6 +1212,23 @@ TEST(StringExpr, BinaryRange) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; @@ -1072,6 +1237,11 @@ TEST(StringExpr, BinaryRange) { ASSERT_EQ(ans, ref) << "@" << lb_inclusive << "@" << ub_inclusive << "@" << lb << "@" << ub << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << "@" << lb_inclusive << "@" << ub_inclusive << "@" << lb + << "@" << ub << "@" << i << "!!" << val; + } } } } @@ -1191,10 +1361,30 @@ TEST(StringExpr, BinaryRangeNullable) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (!valid_data[i]) { ASSERT_EQ(ans, false); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], false); + } continue; } auto val = str_col[i]; @@ -1202,6 +1392,11 @@ TEST(StringExpr, BinaryRangeNullable) { ASSERT_EQ(ans, ref) << "@" << lb_inclusive << "@" << ub_inclusive << "@" << lb << "@" << ub << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << "@" << lb_inclusive << "@" << ub_inclusive << "@" << lb + << "@" << ub << "@" << i << "!!" << val; + } } } } diff --git a/internal/core/unittest/test_utils/GenExprProto.h b/internal/core/unittest/test_utils/GenExprProto.h index a1744d3c5e268..372e07e356f24 100644 --- a/internal/core/unittest/test_utils/GenExprProto.h +++ b/internal/core/unittest/test_utils/GenExprProto.h @@ -16,6 +16,7 @@ #include "common/Consts.h" #include "expr/ITypeExpr.h" +#include "exec/expression/Expr.h" #include "pb/plan.pb.h" #include "plan/PlanNode.h" @@ -104,4 +105,30 @@ CreateSearchPlanByExpr(std::shared_ptr expr) { return plannode; } +inline ColumnVectorPtr +gen_filter_res(milvus::plan::PlanNode* plan_node, + const milvus::segcore::SegmentInternalInterface* segment, + uint64_t active_count, + uint64_t timestamp, + FixedVector* offsets = nullptr) { + auto filter_node = dynamic_cast(plan_node); + assert(filter_node != nullptr); + std::vector filters; + filters.emplace_back(filter_node->filter()); + auto query_context = std::make_shared( + DEAFULT_QUERY_ID, segment, active_count, timestamp); + + std::unique_ptr exec_context = + std::make_unique(query_context.get()); + auto exprs_ = + std::make_unique(filters, exec_context.get()); + std::vector results_; + milvus::exec::EvalCtx eval_ctx(exec_context.get(), exprs_.get()); + eval_ctx.set_offset_input(offsets); + exprs_->Eval(0, 1, true, eval_ctx, results_); + + auto col_vec = std::dynamic_pointer_cast(results_[0]); + return col_vec; +} + } // namespace milvus::test From 987b61becd8673ce4fd2c66895076700dc462ee8 Mon Sep 17 00:00:00 2001 From: chasingegg Date: Thu, 5 Dec 2024 09:21:18 +0800 Subject: [PATCH 2/5] update Signed-off-by: chasingegg --- internal/core/src/common/Chunk.cpp | 9 +++------ internal/core/src/common/Chunk.h | 5 ++++- internal/core/src/common/Consts.h | 1 + internal/core/src/query/PlanProto.cpp | 5 ++--- internal/distributed/proxy/httpserver/handler_v2.go | 1 + internal/distributed/proxy/httpserver/request_v2.go | 1 + internal/proto/plan.proto | 1 + internal/proxy/search_util.go | 6 ++++++ pkg/common/common.go | 1 + 9 files changed, 20 insertions(+), 10 deletions(-) diff --git a/internal/core/src/common/Chunk.cpp b/internal/core/src/common/Chunk.cpp index 037acabe591d4..4b5c17ac1df6e 100644 --- a/internal/core/src/common/Chunk.cpp +++ b/internal/core/src/common/Chunk.cpp @@ -21,6 +21,7 @@ namespace milvus { std::pair, FixedVector> StringChunk::StringViews() { std::vector ret; + ret.reserve(row_nums_); for (int i = 0; i < row_nums_; i++) { ret.emplace_back(data_ + offsets_[i], offsets_[i + 1] - offsets_[i]); } @@ -35,12 +36,8 @@ StringChunk::ViewsByOffsets(const FixedVector& offsets) { ret.reserve(size); valid_res.reserve(size); for (auto i = 0; i < size; ++i) { - uint32_t string_size; - char* pos = data_; - pos += offsets_[offsets[i]]; - string_size = *reinterpret_cast(pos); - pos += sizeof(uint32_t); - ret.emplace_back(std::string_view(pos, string_size)); + ret.emplace_back(data_ + offsets_[offsets[i]], + offsets_[offsets[i] + 1] - offsets_[offsets[i]]); valid_res.emplace_back(isValid(offsets[i])); } return {ret, valid_res}; diff --git a/internal/core/src/common/Chunk.h b/internal/core/src/common/Chunk.h index dc974769beda5..7a1f56fec3c33 100644 --- a/internal/core/src/common/Chunk.h +++ b/internal/core/src/common/Chunk.h @@ -73,7 +73,10 @@ class Chunk { virtual bool isValid(int offset) { - return valid_[offset]; + if (nullable_) { + return valid_[offset]; + } + return true; }; protected: diff --git a/internal/core/src/common/Consts.h b/internal/core/src/common/Consts.h index 400a0eacf4b4b..c8e10347db8f4 100644 --- a/internal/core/src/common/Consts.h +++ b/internal/core/src/common/Consts.h @@ -48,6 +48,7 @@ const char VEC_OPT_FIELDS[] = "opt_fields"; const char PAGE_RETAIN_ORDER[] = "page_retain_order"; const char TEXT_LOG_ROOT_PATH[] = "text_log"; const char ITERATIVE_FILTER[] = "iterative_filter"; +const char HINTS[] = "hints"; const char DEFAULT_PLANNODE_ID[] = "0"; const char DEAFULT_QUERY_ID[] = "0"; diff --git a/internal/core/src/query/PlanProto.cpp b/internal/core/src/query/PlanProto.cpp index 2427d7287f3a2..ee218fbfbeefa 100644 --- a/internal/core/src/query/PlanProto.cpp +++ b/internal/core/src/query/PlanProto.cpp @@ -54,10 +54,9 @@ ProtoParser::PlanNodeFromProto(const planpb::PlanNode& plan_node_proto) { search_info.materialized_view_involved = query_info_proto.materialized_view_involved(); // currently, iterative filter does not support range search - if (search_info.search_params_.contains(ITERATIVE_FILTER) && - !search_info.search_params_.contains(RADIUS)) { + if (!search_info.search_params_.contains(RADIUS)) { search_info.iterative_filter_execution = - search_info.search_params_[ITERATIVE_FILTER]; + (query_info_proto.hints() == ITERATIVE_FILTER); } if (query_info_proto.bm25_avgdl() > 0) { diff --git a/internal/distributed/proxy/httpserver/handler_v2.go b/internal/distributed/proxy/httpserver/handler_v2.go index 681fff529fc0b..b835921629f30 100644 --- a/internal/distributed/proxy/httpserver/handler_v2.go +++ b/internal/distributed/proxy/httpserver/handler_v2.go @@ -968,6 +968,7 @@ func generateSearchParams(reqSearchParams searchParams) []*commonpb.KeyValuePair bs, _ := json.Marshal(reqSearchParams.Params) searchParams = append(searchParams, &commonpb.KeyValuePair{Key: Params, Value: string(bs)}) searchParams = append(searchParams, &commonpb.KeyValuePair{Key: common.IgnoreGrowing, Value: strconv.FormatBool(reqSearchParams.IgnoreGrowing)}) + searchParams = append(searchParams, &commonpb.KeyValuePair{Key: common.HintsKey, Value: reqSearchParams.Hints}) // need to exposure ParamRoundDecimal in req? searchParams = append(searchParams, &commonpb.KeyValuePair{Key: ParamRoundDecimal, Value: "-1"}) return searchParams diff --git a/internal/distributed/proxy/httpserver/request_v2.go b/internal/distributed/proxy/httpserver/request_v2.go index 41dae907426dd..137f50e747d0f 100644 --- a/internal/distributed/proxy/httpserver/request_v2.go +++ b/internal/distributed/proxy/httpserver/request_v2.go @@ -175,6 +175,7 @@ type searchParams struct { MetricType string `json:"metricType"` Params map[string]interface{} `json:"params"` IgnoreGrowing bool `json:"ignoreGrowing"` + Hints string `json:"hints"` } type SearchReqV2 struct { diff --git a/internal/proto/plan.proto b/internal/proto/plan.proto index 3fa2cf3b8b63c..abf32317c6be2 100644 --- a/internal/proto/plan.proto +++ b/internal/proto/plan.proto @@ -66,6 +66,7 @@ message QueryInfo { bool strict_group_size = 9; double bm25_avgdl = 10; int64 query_field_id =11; + string hints = 12; } message ColumnInfo { diff --git a/internal/proxy/search_util.go b/internal/proxy/search_util.go index cea3dc49c63a6..ed2f37c313ea1 100644 --- a/internal/proxy/search_util.go +++ b/internal/proxy/search_util.go @@ -153,6 +153,11 @@ func parseSearchInfo(searchParamsPair []*commonpb.KeyValuePair, schema *schemapb roundDecimalStr = "-1" } + hints, err := funcutil.GetAttrByKeyFromRepeatedKV(common.HintsKey, searchParamsPair) + if err != nil { + hints = "" + } + roundDecimal, err := strconv.ParseInt(roundDecimalStr, 0, 64) if err != nil { return &SearchInfo{planInfo: nil, offset: 0, isIterator: false, parseError: fmt.Errorf("%s [%s] is invalid, should be -1 or an integer in range [0, 6]", RoundDecimalKey, roundDecimalStr)} @@ -200,6 +205,7 @@ func parseSearchInfo(searchParamsPair []*commonpb.KeyValuePair, schema *schemapb GroupByFieldId: groupByFieldId, GroupSize: groupSize, StrictGroupSize: strictGroupSize, + Hints: hints, }, offset: offset, isIterator: isIterator, diff --git a/pkg/common/common.go b/pkg/common/common.go index 86219ed5aeb2a..d32ab48c6fe26 100644 --- a/pkg/common/common.go +++ b/pkg/common/common.go @@ -138,6 +138,7 @@ const ( BitmapCardinalityLimitKey = "bitmap_cardinality_limit" IgnoreGrowing = "ignore_growing" ConsistencyLevel = "consistency_level" + HintsKey = "hints" ) // Doc-in-doc-out From b30afcce28bbb89ac6343457aed6e70edc9e07b5 Mon Sep 17 00:00:00 2001 From: chasingegg Date: Thu, 5 Dec 2024 11:58:33 +0800 Subject: [PATCH 3/5] update Signed-off-by: chasingegg --- internal/core/unittest/test_iterative_filter.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/internal/core/unittest/test_iterative_filter.cpp b/internal/core/unittest/test_iterative_filter.cpp index 17f4265a25ab0..d8df0b3a29d22 100644 --- a/internal/core/unittest/test_iterative_filter.cpp +++ b/internal/core/unittest/test_iterative_filter.cpp @@ -145,7 +145,8 @@ TEST(IterativeFilter, SealedIndex) { query_info: < topk: 10 metric_type: "L2" - search_params: "{\"ef\": 50, \"iterative_filter\": true}" + hints: "iterative_filter" + search_params: "{\"ef\": 50}" > placeholder_tag: "$0">)"; proto::plan::PlanNode plan_node; @@ -210,7 +211,8 @@ TEST(IterativeFilter, SealedIndex) { query_info: < topk: 10 metric_type: "L2" - search_params: "{\"ef\": 50, \"iterative_filter\": true}" + hints: "iterative_filter" + search_params: "{\"ef\": 50}" > placeholder_tag: "$0">)"; proto::plan::PlanNode plan_node; @@ -313,7 +315,8 @@ TEST(IterativeFilter, SealedData) { query_info: < topk: 10 metric_type: "L2" - search_params: "{\"ef\": 50, \"iterative_filter\": true}" + hints: "iterative_filter" + search_params: "{\"ef\": 50}" > placeholder_tag: "$0">)"; proto::plan::PlanNode plan_node; @@ -419,7 +422,8 @@ TEST(IterativeFilter, GrowingRawData) { query_info: < topk: 10 metric_type: "L2" - search_params: "{\"ef\": 50, \"iterative_filter\": true}" + hints: "iterative_filter" + search_params: "{\"ef\": 50}" > placeholder_tag: "$0">)"; proto::plan::PlanNode plan_node; @@ -538,7 +542,8 @@ TEST(IterativeFilter, GrowingIndex) { query_info: < topk: 10 metric_type: "L2" - search_params: "{\"nprobe\": 4, \"iterative_filter\": true}" + hints: "iterative_filter" + search_params: "{\"nprobe\": 4}" > placeholder_tag: "$0">)"; proto::plan::PlanNode plan_node; From 90340c3ca9231abce3365a8251795505eaa4a84e Mon Sep 17 00:00:00 2001 From: chasingegg Date: Thu, 5 Dec 2024 12:11:05 +0800 Subject: [PATCH 4/5] update Signed-off-by: chasingegg --- internal/core/src/exec/expression/UnaryExpr.cpp | 14 ++++++++++++-- internal/core/src/exec/expression/UnaryExpr.h | 5 +++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/internal/core/src/exec/expression/UnaryExpr.cpp b/internal/core/src/exec/expression/UnaryExpr.cpp index dfcbfe7b12058..0b446f6df7f54 100644 --- a/internal/core/src/exec/expression/UnaryExpr.cpp +++ b/internal/core/src/exec/expression/UnaryExpr.cpp @@ -396,8 +396,18 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) { break; } case proto::plan::Match: { - UnaryElementFuncForArray func; - func(data, valid_data, size, val, index, res, valid_res); + UnaryElementFuncForArray + func; + func(data, + valid_data, + size, + val, + index, + res, + valid_res, + offsets); break; } default: diff --git a/internal/core/src/exec/expression/UnaryExpr.h b/internal/core/src/exec/expression/UnaryExpr.h index f47c2f299d474..ee8593a1fd251 100644 --- a/internal/core/src/exec/expression/UnaryExpr.h +++ b/internal/core/src/exec/expression/UnaryExpr.h @@ -212,14 +212,15 @@ struct UnaryElementFuncForArray { if constexpr (std::is_same_v) { res[i] = false; } else { - if (index >= src[i].length()) { + if (index >= src[offset].length()) { res[i] = false; continue; } PatternMatchTranslator translator; auto regex_pattern = translator(val); RegexMatcher matcher(regex_pattern); - auto array_data = src[i].template get_data(index); + auto array_data = + src[offset].template get_data(index); res[i] = matcher(array_data); } } else { From 6881210849dab0a89338d887e94fd4e3f71575ab Mon Sep 17 00:00:00 2001 From: chasingegg Date: Thu, 5 Dec 2024 12:31:29 +0800 Subject: [PATCH 5/5] update Signed-off-by: chasingegg --- internal/core/src/query/PlanProto.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/internal/core/src/query/PlanProto.cpp b/internal/core/src/query/PlanProto.cpp index ee218fbfbeefa..72597258b06c3 100644 --- a/internal/core/src/query/PlanProto.cpp +++ b/internal/core/src/query/PlanProto.cpp @@ -57,6 +57,11 @@ ProtoParser::PlanNodeFromProto(const planpb::PlanNode& plan_node_proto) { if (!search_info.search_params_.contains(RADIUS)) { search_info.iterative_filter_execution = (query_info_proto.hints() == ITERATIVE_FILTER); + if (!search_info.iterative_filter_execution && + search_info.search_params_.contains(HINTS)) { + search_info.iterative_filter_execution = + (search_info.search_params_[HINTS] == ITERATIVE_FILTER); + } } if (query_info_proto.bm25_avgdl() > 0) {