diff --git a/internal/core/src/common/Chunk.cpp b/internal/core/src/common/Chunk.cpp index 6032c6b930d9c..1e6839e47847e 100644 --- a/internal/core/src/common/Chunk.cpp +++ b/internal/core/src/common/Chunk.cpp @@ -27,6 +27,25 @@ StringChunk::StringViews() { return {ret, valid_}; } +std::pair, FixedVector> +StringChunk::ViewsByOffsets(const FixedVector& offsets) { + std::vector ret; + FixedVector valid_res; + size_t size = offsets.size(); + ret.reserve(size); + valid_res.reserve(size); + for (auto i = 0; i < size; ++i) { + uint32_t string_size; + char* pos = data_; + pos += offsets_[offsets[i]]; + string_size = *reinterpret_cast(pos); + pos += sizeof(uint32_t); + ret.emplace_back(std::string_view(pos, string_size)); + valid_res.emplace_back(isValid(offsets[i])); + } + return {ret, valid_res}; +} + void ArrayChunk::ConstructViews() { views_.reserve(row_nums_); diff --git a/internal/core/src/common/Chunk.h b/internal/core/src/common/Chunk.h index 7cfaa7fad466e..303b6ab3bec16 100644 --- a/internal/core/src/common/Chunk.h +++ b/internal/core/src/common/Chunk.h @@ -170,6 +170,9 @@ class StringChunk : public Chunk { return result; } + std::pair, FixedVector> + ViewsByOffsets(const FixedVector& offsets); + const char* ValueAt(int64_t idx) const override { return (*this)[idx].data(); diff --git a/internal/core/src/common/Consts.h b/internal/core/src/common/Consts.h index 1f9d51e447680..26bf344e8516d 100644 --- a/internal/core/src/common/Consts.h +++ b/internal/core/src/common/Consts.h @@ -47,6 +47,7 @@ const char KMEANS_CLUSTER[] = "KMEANS"; const char VEC_OPT_FIELDS[] = "opt_fields"; const char PAGE_RETAIN_ORDER[] = "page_retain_order"; const char TEXT_LOG_ROOT_PATH[] = "text_log"; +const char POST_FILTER[] = "post_filter"; const char DEFAULT_PLANNODE_ID[] = "0"; const char DEAFULT_QUERY_ID[] = "0"; diff --git a/internal/core/src/common/QueryInfo.h b/internal/core/src/common/QueryInfo.h index 440194d33c9f7..6fe9b21608fdd 100644 --- a/internal/core/src/common/QueryInfo.h +++ b/internal/core/src/common/QueryInfo.h @@ -35,6 +35,7 @@ struct SearchInfo { std::optional group_by_field_id_; tracer::TraceContext trace_ctx_; bool materialized_view_involved = false; + bool post_filter_execution = false; }; using SearchInfoPtr = std::shared_ptr; diff --git a/internal/core/src/exec/Driver.cpp b/internal/core/src/exec/Driver.cpp index fcdadb9580c28..35c939a31fef5 100644 --- a/internal/core/src/exec/Driver.cpp +++ b/internal/core/src/exec/Driver.cpp @@ -23,6 +23,7 @@ #include "exec/operator/CallbackSink.h" #include "exec/operator/CountNode.h" #include "exec/operator/FilterBitsNode.h" +#include "exec/operator/FilterNode.h" #include "exec/operator/MvccNode.h" #include "exec/operator/Operator.h" #include "exec/operator/VectorSearchNode.h" @@ -52,11 +53,16 @@ DriverFactory::CreateDriver(std::unique_ptr ctx, for (size_t i = 0; i < plannodes_.size(); ++i) { auto id = operators.size(); auto plannode = plannodes_[i]; - if (auto filternode = + if (auto filterbitsnode = std::dynamic_pointer_cast( plannode)) { + operators.push_back(std::make_unique( + id, ctx.get(), filterbitsnode)); + } else if (auto filternode = + std::dynamic_pointer_cast( + plannode)) { operators.push_back( - std::make_unique(id, ctx.get(), filternode)); + std::make_unique(id, ctx.get(), filternode)); } else if (auto mvccnode = std::dynamic_pointer_cast( plannode)) { diff --git a/internal/core/src/exec/QueryContext.h b/internal/core/src/exec/QueryContext.h index 4b49fe1a1482e..916eb73a3c9c2 100644 --- a/internal/core/src/exec/QueryContext.h +++ b/internal/core/src/exec/QueryContext.h @@ -230,6 +230,11 @@ class QueryContext : public Context { return search_info_; } + knowhere::MetricType + get_metric_type() { + return search_info_.metric_type_; + } + const query::PlaceholderGroup* get_placeholder_group() { return placeholder_group_; diff --git a/internal/core/src/exec/expression/AlwaysTrueExpr.cpp b/internal/core/src/exec/expression/AlwaysTrueExpr.cpp index 920fc86ee6a17..063515cc19ae1 100644 --- a/internal/core/src/exec/expression/AlwaysTrueExpr.cpp +++ b/internal/core/src/exec/expression/AlwaysTrueExpr.cpp @@ -21,9 +21,13 @@ namespace exec { void PhyAlwaysTrueExpr::Eval(EvalCtx& context, VectorPtr& result) { - int64_t real_batch_size = current_pos_ + batch_size_ >= active_count_ - ? active_count_ - current_pos_ - : batch_size_; + auto input = context.get_offset_input(); + has_offset_input_ = (input != nullptr); + int64_t real_batch_size = (has_offset_input_) + ? input->size() + : (current_pos_ + batch_size_ >= active_count_ + ? active_count_ - current_pos_ + : batch_size_); // always true no need to skip null if (real_batch_size == 0) { diff --git a/internal/core/src/exec/expression/AlwaysTrueExpr.h b/internal/core/src/exec/expression/AlwaysTrueExpr.h index ffb5750a311f8..5ef0dc10d493a 100644 --- a/internal/core/src/exec/expression/AlwaysTrueExpr.h +++ b/internal/core/src/exec/expression/AlwaysTrueExpr.h @@ -47,11 +47,14 @@ class PhyAlwaysTrueExpr : public Expr { void MoveCursor() override { - int64_t real_batch_size = current_pos_ + batch_size_ >= active_count_ - ? active_count_ - current_pos_ - : batch_size_; + if (!has_offset_input_) { + int64_t real_batch_size = + current_pos_ + batch_size_ >= active_count_ + ? active_count_ - current_pos_ + : batch_size_; - current_pos_ += real_batch_size; + current_pos_ += real_batch_size; + } } private: diff --git a/internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.cpp b/internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.cpp index e5b24ac4121ce..014c614f68d1c 100644 --- a/internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.cpp +++ b/internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.cpp @@ -21,48 +21,50 @@ namespace exec { void PhyBinaryArithOpEvalRangeExpr::Eval(EvalCtx& context, VectorPtr& result) { + auto input = context.get_offset_input(); + SetHasOffsetInput((input != nullptr)); switch (expr_->column_.data_type_) { case DataType::BOOL: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::INT8: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::INT16: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::INT32: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::INT64: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::FLOAT: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::DOUBLE: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::JSON: { auto value_type = expr_->value_.val_case(); switch (value_type) { case proto::plan::GenericValue::ValCase::kBoolVal: { - result = ExecRangeVisitorImplForJson(); + result = ExecRangeVisitorImplForJson(input); break; } case proto::plan::GenericValue::ValCase::kInt64Val: { - result = ExecRangeVisitorImplForJson(); + result = ExecRangeVisitorImplForJson(input); break; } case proto::plan::GenericValue::ValCase::kFloatVal: { - result = ExecRangeVisitorImplForJson(); + result = ExecRangeVisitorImplForJson(input); break; } default: { @@ -79,12 +81,12 @@ PhyBinaryArithOpEvalRangeExpr::Eval(EvalCtx& context, VectorPtr& result) { switch (value_type) { case proto::plan::GenericValue::ValCase::kInt64Val: { SetNotUseIndex(); - result = ExecRangeVisitorImplForArray(); + result = ExecRangeVisitorImplForArray(input); break; } case proto::plan::GenericValue::ValCase::kFloatVal: { SetNotUseIndex(); - result = ExecRangeVisitorImplForArray(); + result = ExecRangeVisitorImplForArray(input); break; } default: { @@ -105,11 +107,13 @@ PhyBinaryArithOpEvalRangeExpr::Eval(EvalCtx& context, VectorPtr& result) { template VectorPtr -PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() { +PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson( + OffsetVector* input) { using GetType = std::conditional_t, std::string_view, ValueType>; - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -128,58 +132,91 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() { ? GetValueFromProto(expr_->right_operand_) : ValueType(); -#define BinaryArithRangeJSONCompare(cmp) \ - do { \ - for (size_t i = 0; i < size; ++i) { \ - if (valid_data != nullptr && !valid_data[i]) { \ - res[i] = false; \ - valid_res[i] = false; \ - continue; \ - } \ - auto x = data[i].template at(pointer); \ - if (x.error()) { \ - if constexpr (std::is_same_v) { \ - auto x = data[i].template at(pointer); \ - res[i] = !x.error() && (cmp); \ - continue; \ - } \ - res[i] = false; \ - continue; \ - } \ - res[i] = (cmp); \ - } \ +#define BinaryArithRangeJSONCompare(cmp) \ + do { \ + for (size_t i = 0; i < size; ++i) { \ + auto offset = i; \ + if constexpr (filter_type == FilterType::post) { \ + offset = (offsets) ? offsets[i] : i; \ + } \ + if (valid_data != nullptr && !valid_data[offset]) { \ + res[i] = false; \ + valid_res[i] = false; \ + continue; \ + } \ + auto x = data[offset].template at(pointer); \ + if (x.error()) { \ + if constexpr (std::is_same_v) { \ + auto x = data[offset].template at(pointer); \ + res[i] = !x.error() && (cmp); \ + continue; \ + } \ + res[i] = false; \ + continue; \ + } \ + res[i] = (cmp); \ + } \ } while (false) -#define BinaryArithRangeJSONCompareNotEqual(cmp) \ - do { \ - for (size_t i = 0; i < size; ++i) { \ - if (valid_data != nullptr && !valid_data[i]) { \ - res[i] = false; \ - valid_res[i] = false; \ - continue; \ - } \ - auto x = data[i].template at(pointer); \ - if (x.error()) { \ - if constexpr (std::is_same_v) { \ - auto x = data[i].template at(pointer); \ - res[i] = x.error() || (cmp); \ - continue; \ - } \ - res[i] = true; \ - continue; \ - } \ - res[i] = (cmp); \ - } \ +#define BinaryArithRangeJSONCompareNotEqual(cmp) \ + do { \ + for (size_t i = 0; i < size; ++i) { \ + auto offset = i; \ + if constexpr (filter_type == FilterType::post) { \ + offset = (offsets) ? offsets[i] : i; \ + } \ + if (valid_data != nullptr && !valid_data[offset]) { \ + res[i] = false; \ + valid_res[i] = false; \ + continue; \ + } \ + auto x = data[offset].template at(pointer); \ + if (x.error()) { \ + if constexpr (std::is_same_v) { \ + auto x = data[offset].template at(pointer); \ + res[i] = x.error() || (cmp); \ + continue; \ + } \ + res[i] = true; \ + continue; \ + } \ + res[i] = (cmp); \ + } \ } while (false) - auto execute_sub_batch = [op_type, arith_type](const milvus::Json* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - ValueType val, - ValueType right_operand, - const std::string& pointer) { +#define BinaryArithRangeJONCompareArrayLength(cmp) \ + do { \ + for (size_t i = 0; i < size; ++i) { \ + auto offset = i; \ + if constexpr (filter_type == FilterType::post) { \ + offset = (offsets) ? offsets[i] : i; \ + } \ + if (valid_data != nullptr && !valid_data[offset]) { \ + res[i] = false; \ + valid_res[i] = false; \ + continue; \ + } \ + int array_length = 0; \ + auto doc = data[offset].doc(); \ + auto array = doc.at_pointer(pointer).get_array(); \ + if (!array.error()) { \ + array_length = array.count_elements(); \ + } \ + res[i] = (cmp); \ + } \ + } while (false) + + auto execute_sub_batch = + [ op_type, arith_type ]( + const milvus::Json* data, + const bool* valid_data, + const int64_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + ValueType val, + ValueType right_operand, + const std::string& pointer) { switch (op_type) { case proto::plan::OpType::Equal: { switch (arith_type) { @@ -210,20 +247,8 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() { break; } case proto::plan::ArithOpType::ArrayLength: { - for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { - res[i] = false; - valid_res[i] = false; - continue; - } - int array_length = 0; - auto doc = data[i].doc(); - auto array = doc.at_pointer(pointer).get_array(); - if (!array.error()) { - array_length = array.count_elements(); - } - res[i] = array_length == val; - } + BinaryArithRangeJONCompareArrayLength(array_length == + val); break; } default: @@ -264,20 +289,8 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() { break; } case proto::plan::ArithOpType::ArrayLength: { - for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { - res[i] = false; - valid_res[i] = false; - continue; - } - int array_length = 0; - auto doc = data[i].doc(); - auto array = doc.at_pointer(pointer).get_array(); - if (!array.error()) { - array_length = array.count_elements(); - } - res[i] = array_length != val; - } + BinaryArithRangeJONCompareArrayLength(array_length != + val); break; } default: @@ -318,20 +331,8 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() { break; } case proto::plan::ArithOpType::ArrayLength: { - for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { - res[i] = false; - valid_res[i] = false; - continue; - } - int array_length = 0; - auto doc = data[i].doc(); - auto array = doc.at_pointer(pointer).get_array(); - if (!array.error()) { - array_length = array.count_elements(); - } - res[i] = array_length > val; - } + BinaryArithRangeJONCompareArrayLength(array_length > + val); break; } default: @@ -372,20 +373,8 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() { break; } case proto::plan::ArithOpType::ArrayLength: { - for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { - res[i] = false; - valid_res[i] = false; - continue; - } - int array_length = 0; - auto doc = data[i].doc(); - auto array = doc.at_pointer(pointer).get_array(); - if (!array.error()) { - array_length = array.count_elements(); - } - res[i] = array_length >= val; - } + BinaryArithRangeJONCompareArrayLength(array_length >= + val); break; } default: @@ -426,20 +415,8 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() { break; } case proto::plan::ArithOpType::ArrayLength: { - for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { - res[i] = false; - valid_res[i] = false; - continue; - } - int array_length = 0; - auto doc = data[i].doc(); - auto array = doc.at_pointer(pointer).get_array(); - if (!array.error()) { - array_length = array.count_elements(); - } - res[i] = array_length < val; - } + BinaryArithRangeJONCompareArrayLength(array_length < + val); break; } default: @@ -480,20 +457,8 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() { break; } case proto::plan::ArithOpType::ArrayLength: { - for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { - res[i] = false; - valid_res[i] = false; - continue; - } - int array_length = 0; - auto doc = data[i].doc(); - auto array = doc.at_pointer(pointer).get_array(); - if (!array.error()) { - array_length = array.count_elements(); - } - res[i] = array_length <= val; - } + BinaryArithRangeJONCompareArrayLength(array_length <= + val); break; } default: @@ -512,13 +477,25 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() { op_type); } }; - int64_t processed_size = ProcessDataChunks(execute_sub_batch, - std::nullptr_t{}, - res, - valid_res, - value, - right_operand, - pointer); + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + value, + right_operand, + pointer); + } else { + processed_size = ProcessDataChunks(execute_sub_batch, + std::nullptr_t{}, + res, + valid_res, + value, + right_operand, + pointer); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -529,11 +506,13 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() { template VectorPtr -PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() { +PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray( + OffsetVector* input) { using GetType = std::conditional_t, std::string_view, ValueType>; - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -555,31 +534,53 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() { ? GetValueFromProto(expr_->right_operand_) : ValueType(); -#define BinaryArithRangeArrayCompare(cmp) \ - do { \ - for (size_t i = 0; i < size; ++i) { \ - if (valid_data != nullptr && !valid_data[i]) { \ - res[i] = false; \ - valid_res[i] = false; \ - continue; \ - } \ - if (index >= data[i].length()) { \ - res[i] = false; \ - continue; \ - } \ - auto value = data[i].get_data(index); \ - res[i] = (cmp); \ - } \ +#define BinaryArithRangeArrayCompare(cmp) \ + do { \ + for (size_t i = 0; i < size; ++i) { \ + auto offset = i; \ + if constexpr (filter_type == FilterType::post) { \ + offset = (offsets) ? offsets[i] : i; \ + } \ + if (valid_data != nullptr && !valid_data[offset]) { \ + res[i] = false; \ + valid_res[i] = false; \ + continue; \ + } \ + if (index >= data[offset].length()) { \ + res[i] = false; \ + continue; \ + } \ + auto value = data[offset].get_data(index); \ + res[i] = (cmp); \ + } \ + } while (false) + +#define BinaryArithRangeArrayLengthCompate(cmp) \ + do { \ + for (size_t i = 0; i < size; ++i) { \ + auto offset = i; \ + if constexpr (filter_type == FilterType::post) { \ + offset = (offsets) ? offsets[i] : i; \ + } \ + if (valid_data != nullptr && !valid_data[offset]) { \ + res[i] = valid_res[i] = false; \ + continue; \ + } \ + res[i] = (cmp); \ + } \ } while (false) - auto execute_sub_batch = [op_type, arith_type](const ArrayView* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - ValueType val, - ValueType right_operand, - int index) { + auto execute_sub_batch = + [ op_type, arith_type ]( + const ArrayView* data, + const bool* valid_data, + const int64_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + ValueType val, + ValueType right_operand, + int index) { switch (op_type) { case proto::plan::OpType::Equal: { switch (arith_type) { @@ -611,13 +612,8 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() { break; } case proto::plan::ArithOpType::ArrayLength: { - for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { - res[i] = valid_res[i] = false; - continue; - } - res[i] = data[i].length() == val; - } + BinaryArithRangeArrayLengthCompate( + data[offset].length() == val); break; } default: @@ -658,13 +654,8 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() { break; } case proto::plan::ArithOpType::ArrayLength: { - for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { - res[i] = valid_res[i] = false; - continue; - } - res[i] = data[i].length() != val; - } + BinaryArithRangeArrayLengthCompate( + data[offset].length() != val); break; } default: @@ -705,13 +696,8 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() { break; } case proto::plan::ArithOpType::ArrayLength: { - for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { - res[i] = valid_res[i] = false; - continue; - } - res[i] = data[i].length() > val; - } + BinaryArithRangeArrayLengthCompate( + data[offset].length() > val); break; } default: @@ -752,13 +738,8 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() { break; } case proto::plan::ArithOpType::ArrayLength: { - for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { - res[i] = valid_res[i] = false; - continue; - } - res[i] = data[i].length() >= val; - } + BinaryArithRangeArrayLengthCompate( + data[offset].length() >= val); break; } default: @@ -799,13 +780,8 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() { break; } case proto::plan::ArithOpType::ArrayLength: { - for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { - res[i] = valid_res[i] = false; - continue; - } - res[i] = data[i].length() < val; - } + BinaryArithRangeArrayLengthCompate( + data[offset].length() < val); break; } default: @@ -846,13 +822,8 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() { break; } case proto::plan::ArithOpType::ArrayLength: { - for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { - res[i] = valid_res[i] = false; - continue; - } - res[i] = data[i].length() <= val; - } + BinaryArithRangeArrayLengthCompate( + data[offset].length() <= val); break; } default: @@ -872,14 +843,26 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() { } }; - int64_t processed_size = - ProcessDataChunks(execute_sub_batch, - std::nullptr_t{}, - res, - valid_res, - value, - right_operand, - index); + int64_t processed_size; + if (has_offset_input_) { + processed_size = + ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + value, + right_operand, + index); + } else { + processed_size = ProcessDataChunks(execute_sub_batch, + std::nullptr_t{}, + res, + valid_res, + value, + right_operand, + index); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -890,24 +873,26 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() { template VectorPtr -PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImpl() { +PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImpl(OffsetVector* input) { if (is_index_mode_ && IndexHasRawData()) { - return ExecRangeVisitorImplForIndex(); + return ExecRangeVisitorImplForIndex(input); } else { - return ExecRangeVisitorImplForData(); + return ExecRangeVisitorImplForData(input); } } template VectorPtr -PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForIndex() { +PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForIndex( + OffsetVector* input) { using Index = index::ScalarIndex; typedef std::conditional_t && !std::is_same_v, int64_t, T> HighPrecisionType; - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -916,12 +901,15 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForIndex() { GetValueFromProto(expr_->right_operand_); auto op_type = expr_->op_type_; auto arith_type = expr_->arith_op_type_; - auto sub_batch_size = size_per_chunk_; + auto sub_batch_size = has_offset_input_ ? input->size() : size_per_chunk_; - auto execute_sub_batch = [op_type, arith_type, sub_batch_size]( - Index* index_ptr, - HighPrecisionType value, - HighPrecisionType right_operand) { + auto execute_sub_batch = + [ op_type, arith_type, + sub_batch_size ]( + Index * index_ptr, + HighPrecisionType value, + HighPrecisionType right_operand, + const int64_t* offsets = nullptr) { TargetBitmap res; switch (op_type) { case proto::plan::OpType::Equal: { @@ -929,46 +917,66 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForIndex() { case proto::plan::ArithOpType::Add: { ArithOpIndexFunc + proto::plan::ArithOpType::Add, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Sub: { ArithOpIndexFunc + proto::plan::ArithOpType::Sub, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Mul: { ArithOpIndexFunc + proto::plan::ArithOpType::Mul, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Div: { ArithOpIndexFunc + proto::plan::ArithOpType::Div, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Mod: { ArithOpIndexFunc + proto::plan::ArithOpType::Mod, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } default: @@ -985,46 +993,66 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForIndex() { case proto::plan::ArithOpType::Add: { ArithOpIndexFunc + proto::plan::ArithOpType::Add, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Sub: { ArithOpIndexFunc + proto::plan::ArithOpType::Sub, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Mul: { ArithOpIndexFunc + proto::plan::ArithOpType::Mul, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Div: { ArithOpIndexFunc + proto::plan::ArithOpType::Div, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Mod: { ArithOpIndexFunc + proto::plan::ArithOpType::Mod, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } default: @@ -1041,46 +1069,66 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForIndex() { case proto::plan::ArithOpType::Add: { ArithOpIndexFunc + proto::plan::ArithOpType::Add, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Sub: { ArithOpIndexFunc + proto::plan::ArithOpType::Sub, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Mul: { ArithOpIndexFunc + proto::plan::ArithOpType::Mul, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Div: { ArithOpIndexFunc + proto::plan::ArithOpType::Div, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Mod: { ArithOpIndexFunc + proto::plan::ArithOpType::Mod, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } default: @@ -1097,46 +1145,66 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForIndex() { case proto::plan::ArithOpType::Add: { ArithOpIndexFunc + proto::plan::ArithOpType::Add, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Sub: { ArithOpIndexFunc + proto::plan::ArithOpType::Sub, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Mul: { ArithOpIndexFunc + proto::plan::ArithOpType::Mul, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Div: { ArithOpIndexFunc + proto::plan::ArithOpType::Div, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Mod: { ArithOpIndexFunc + proto::plan::ArithOpType::Mod, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } default: @@ -1153,46 +1221,66 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForIndex() { case proto::plan::ArithOpType::Add: { ArithOpIndexFunc + proto::plan::ArithOpType::Add, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Sub: { ArithOpIndexFunc + proto::plan::ArithOpType::Sub, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Mul: { ArithOpIndexFunc + proto::plan::ArithOpType::Mul, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Div: { ArithOpIndexFunc + proto::plan::ArithOpType::Div, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Mod: { ArithOpIndexFunc + proto::plan::ArithOpType::Mod, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } default: @@ -1209,46 +1297,66 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForIndex() { case proto::plan::ArithOpType::Add: { ArithOpIndexFunc + proto::plan::ArithOpType::Add, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Sub: { ArithOpIndexFunc + proto::plan::ArithOpType::Sub, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Mul: { ArithOpIndexFunc + proto::plan::ArithOpType::Mul, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Div: { ArithOpIndexFunc + proto::plan::ArithOpType::Div, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } case proto::plan::ArithOpType::Mod: { ArithOpIndexFunc + proto::plan::ArithOpType::Mod, + filter_type> func; - res = std::move(func( - index_ptr, sub_batch_size, value, right_operand)); + res = std::move(func(index_ptr, + sub_batch_size, + value, + right_operand, + offsets)); break; } default: @@ -1268,25 +1376,39 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForIndex() { } return res; }; - auto res = ProcessIndexChunks(execute_sub_batch, value, right_operand); - AssertInfo(res->size() == real_batch_size, - "internal error: expr processed rows {} not equal " - "expect batch size {}", - res->size(), - real_batch_size); - // return std::make_shared(std::move(res)); - return res; + if (has_offset_input_) { + auto res = ProcessIndexChunksByOffsets( + execute_sub_batch, input, value, right_operand); + + AssertInfo(res->size() == real_batch_size, + "internal error: expr processed rows {} not equal " + "expect batch size {}", + res->size(), + real_batch_size); + return res; + } else { + auto res = + ProcessIndexChunks(execute_sub_batch, value, right_operand); + AssertInfo(res->size() == real_batch_size, + "internal error: expr processed rows {} not equal " + "expect batch size {}", + res->size(), + real_batch_size); + return res; + } } template VectorPtr -PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForData() { +PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForData( + OffsetVector* input) { typedef std::conditional_t && !std::is_same_v, int64_t, T> HighPrecisionType; - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -1302,55 +1424,63 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForData() { auto op_type = expr_->op_type_; auto arith_type = expr_->arith_op_type_; - auto execute_sub_batch = [op_type, arith_type]( - const T* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - HighPrecisionType value, - HighPrecisionType right_operand) { + + auto execute_sub_batch = + [ op_type, arith_type ]( + const T* data, + const bool* valid_data, + const int64_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + HighPrecisionType value, + HighPrecisionType right_operand) { switch (op_type) { case proto::plan::OpType::Equal: { switch (arith_type) { case proto::plan::ArithOpType::Add: { ArithOpElementFunc + proto::plan::ArithOpType::Add, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Sub: { ArithOpElementFunc + proto::plan::ArithOpType::Sub, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Mul: { ArithOpElementFunc + proto::plan::ArithOpType::Mul, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Div: { ArithOpElementFunc + proto::plan::ArithOpType::Div, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Mod: { ArithOpElementFunc + proto::plan::ArithOpType::Mod, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } default: @@ -1367,41 +1497,46 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForData() { case proto::plan::ArithOpType::Add: { ArithOpElementFunc + proto::plan::ArithOpType::Add, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Sub: { ArithOpElementFunc + proto::plan::ArithOpType::Sub, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Mul: { ArithOpElementFunc + proto::plan::ArithOpType::Mul, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Div: { ArithOpElementFunc + proto::plan::ArithOpType::Div, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Mod: { ArithOpElementFunc + proto::plan::ArithOpType::Mod, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } default: @@ -1418,41 +1553,46 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForData() { case proto::plan::ArithOpType::Add: { ArithOpElementFunc + proto::plan::ArithOpType::Add, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Sub: { ArithOpElementFunc + proto::plan::ArithOpType::Sub, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Mul: { ArithOpElementFunc + proto::plan::ArithOpType::Mul, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Div: { ArithOpElementFunc + proto::plan::ArithOpType::Div, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Mod: { ArithOpElementFunc + proto::plan::ArithOpType::Mod, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } default: @@ -1469,41 +1609,46 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForData() { case proto::plan::ArithOpType::Add: { ArithOpElementFunc + proto::plan::ArithOpType::Add, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Sub: { ArithOpElementFunc + proto::plan::ArithOpType::Sub, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Mul: { ArithOpElementFunc + proto::plan::ArithOpType::Mul, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Div: { ArithOpElementFunc + proto::plan::ArithOpType::Div, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Mod: { ArithOpElementFunc + proto::plan::ArithOpType::Mod, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } default: @@ -1520,41 +1665,46 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForData() { case proto::plan::ArithOpType::Add: { ArithOpElementFunc + proto::plan::ArithOpType::Add, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Sub: { ArithOpElementFunc + proto::plan::ArithOpType::Sub, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Mul: { ArithOpElementFunc + proto::plan::ArithOpType::Mul, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Div: { ArithOpElementFunc + proto::plan::ArithOpType::Div, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Mod: { ArithOpElementFunc + proto::plan::ArithOpType::Mod, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } default: @@ -1571,41 +1721,46 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForData() { case proto::plan::ArithOpType::Add: { ArithOpElementFunc + proto::plan::ArithOpType::Add, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Sub: { ArithOpElementFunc + proto::plan::ArithOpType::Sub, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Mul: { ArithOpElementFunc + proto::plan::ArithOpType::Mul, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Div: { ArithOpElementFunc + proto::plan::ArithOpType::Div, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } case proto::plan::ArithOpType::Mod: { ArithOpElementFunc + proto::plan::ArithOpType::Mod, + filter_type> func; - func(data, size, value, right_operand, res); + func(data, size, value, right_operand, res, offsets); break; } default: @@ -1628,18 +1783,33 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForData() { // but to mask res with valid_data after the batch operation. if (valid_data != nullptr) { for (int i = 0; i < size; i++) { - if (!valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::post) { + offset = (offsets) ? offsets[i] : i; + } + if (!valid_data[offset]) { res[i] = valid_res[i] = false; } } } }; - int64_t processed_size = ProcessDataChunks(execute_sub_batch, - std::nullptr_t{}, - res, - valid_res, - value, - right_operand); + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + value, + right_operand); + } else { + processed_size = ProcessDataChunks(execute_sub_batch, + std::nullptr_t{}, + res, + valid_res, + value, + right_operand); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", diff --git a/internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.h b/internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.h index 5eef111438591..49255e066e34d 100644 --- a/internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.h +++ b/internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.h @@ -88,7 +88,8 @@ struct ArithOpHelper { template + proto::plan::ArithOpType arith_op, + FilterType filter_type = FilterType::pre> struct ArithOpElementFunc { typedef std::conditional_t && !std::is_same_v, @@ -100,145 +101,147 @@ struct ArithOpElementFunc { size_t size, HighPrecisonType val, HighPrecisonType right_operand, - TargetBitmapView res) { - /* + TargetBitmapView res, + const int64_t* offsets = nullptr) { // This is the original code, kept here for the documentation purposes - for (int i = 0; i < size; ++i) { - if constexpr (cmp_op == proto::plan::OpType::Equal) { - if constexpr (arith_op == proto::plan::ArithOpType::Add) { - res[i] = (src[i] + right_operand) == val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Sub) { - res[i] = (src[i] - right_operand) == val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Mul) { - res[i] = (src[i] * right_operand) == val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Div) { - res[i] = (src[i] / right_operand) == val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Mod) { - res[i] = (fmod(src[i], right_operand)) == val; - } else { - PanicInfo( - OpTypeInvalid, - fmt::format( - "unsupported arith type:{} for ArithOpElementFunc", - arith_op)); - } - } else if constexpr (cmp_op == proto::plan::OpType::NotEqual) { - if constexpr (arith_op == proto::plan::ArithOpType::Add) { - res[i] = (src[i] + right_operand) != val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Sub) { - res[i] = (src[i] - right_operand) != val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Mul) { - res[i] = (src[i] * right_operand) != val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Div) { - res[i] = (src[i] / right_operand) != val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Mod) { - res[i] = (fmod(src[i], right_operand)) != val; - } else { - PanicInfo( - OpTypeInvalid, - fmt::format( - "unsupported arith type:{} for ArithOpElementFunc", - arith_op)); - } - } else if constexpr (cmp_op == proto::plan::OpType::GreaterThan) { - if constexpr (arith_op == proto::plan::ArithOpType::Add) { - res[i] = (src[i] + right_operand) > val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Sub) { - res[i] = (src[i] - right_operand) > val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Mul) { - res[i] = (src[i] * right_operand) > val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Div) { - res[i] = (src[i] / right_operand) > val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Mod) { - res[i] = (fmod(src[i], right_operand)) > val; - } else { - PanicInfo( - OpTypeInvalid, - fmt::format( - "unsupported arith type:{} for ArithOpElementFunc", - arith_op)); - } - } else if constexpr (cmp_op == proto::plan::OpType::GreaterEqual) { - if constexpr (arith_op == proto::plan::ArithOpType::Add) { - res[i] = (src[i] + right_operand) >= val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Sub) { - res[i] = (src[i] - right_operand) >= val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Mul) { - res[i] = (src[i] * right_operand) >= val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Div) { - res[i] = (src[i] / right_operand) >= val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Mod) { - res[i] = (fmod(src[i], right_operand)) >= val; - } else { - PanicInfo( - OpTypeInvalid, - fmt::format( - "unsupported arith type:{} for ArithOpElementFunc", - arith_op)); - } - } else if constexpr (cmp_op == proto::plan::OpType::LessThan) { - if constexpr (arith_op == proto::plan::ArithOpType::Add) { - res[i] = (src[i] + right_operand) < val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Sub) { - res[i] = (src[i] - right_operand) < val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Mul) { - res[i] = (src[i] * right_operand) < val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Div) { - res[i] = (src[i] / right_operand) < val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Mod) { - res[i] = (fmod(src[i], right_operand)) < val; - } else { - PanicInfo( - OpTypeInvalid, - fmt::format( - "unsupported arith type:{} for ArithOpElementFunc", - arith_op)); - } - } else if constexpr (cmp_op == proto::plan::OpType::LessEqual) { - if constexpr (arith_op == proto::plan::ArithOpType::Add) { - res[i] = (src[i] + right_operand) <= val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Sub) { - res[i] = (src[i] - right_operand) <= val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Mul) { - res[i] = (src[i] * right_operand) <= val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Div) { - res[i] = (src[i] / right_operand) <= val; - } else if constexpr (arith_op == - proto::plan::ArithOpType::Mod) { - res[i] = (fmod(src[i], right_operand)) <= val; - } else { - PanicInfo( - OpTypeInvalid, - fmt::format( - "unsupported arith type:{} for ArithOpElementFunc", - arith_op)); + // and also this code will be used for post filter since post filter does not execute as a batch manner + if constexpr (filter_type == FilterType::post) { + for (int i = 0; i < size; ++i) { + auto offset = (offsets) ? offsets[i] : i; + if constexpr (cmp_op == proto::plan::OpType::Equal) { + if constexpr (arith_op == proto::plan::ArithOpType::Add) { + res[i] = (src[offset] + right_operand) == val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Sub) { + res[i] = (src[offset] - right_operand) == val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Mul) { + res[i] = (src[offset] * right_operand) == val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Div) { + res[i] = (src[offset] / right_operand) == val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Mod) { + res[i] = (fmod(src[offset], right_operand)) == val; + } else { + PanicInfo(OpTypeInvalid, + fmt::format("unsupported arith type:{} for " + "ArithOpElementFunc", + arith_op)); + } + } else if constexpr (cmp_op == proto::plan::OpType::NotEqual) { + if constexpr (arith_op == proto::plan::ArithOpType::Add) { + res[i] = (src[offset] + right_operand) != val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Sub) { + res[i] = (src[offset] - right_operand) != val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Mul) { + res[i] = (src[offset] * right_operand) != val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Div) { + res[i] = (src[offset] / right_operand) != val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Mod) { + res[i] = (fmod(src[offset], right_operand)) != val; + } else { + PanicInfo(OpTypeInvalid, + fmt::format("unsupported arith type:{} for " + "ArithOpElementFunc", + arith_op)); + } + } else if constexpr (cmp_op == + proto::plan::OpType::GreaterThan) { + if constexpr (arith_op == proto::plan::ArithOpType::Add) { + res[i] = (src[offset] + right_operand) > val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Sub) { + res[i] = (src[offset] - right_operand) > val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Mul) { + res[i] = (src[offset] * right_operand) > val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Div) { + res[i] = (src[offset] / right_operand) > val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Mod) { + res[i] = (fmod(src[offset], right_operand)) > val; + } else { + PanicInfo(OpTypeInvalid, + fmt::format("unsupported arith type:{} for " + "ArithOpElementFunc", + arith_op)); + } + } else if constexpr (cmp_op == + proto::plan::OpType::GreaterEqual) { + if constexpr (arith_op == proto::plan::ArithOpType::Add) { + res[i] = (src[offset] + right_operand) >= val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Sub) { + res[i] = (src[offset] - right_operand) >= val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Mul) { + res[i] = (src[offset] * right_operand) >= val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Div) { + res[i] = (src[offset] / right_operand) >= val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Mod) { + res[i] = (fmod(src[offset], right_operand)) >= val; + } else { + PanicInfo(OpTypeInvalid, + fmt::format("unsupported arith type:{} for " + "ArithOpElementFunc", + arith_op)); + } + } else if constexpr (cmp_op == proto::plan::OpType::LessThan) { + if constexpr (arith_op == proto::plan::ArithOpType::Add) { + res[i] = (src[offset] + right_operand) < val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Sub) { + res[i] = (src[offset] - right_operand) < val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Mul) { + res[i] = (src[offset] * right_operand) < val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Div) { + res[i] = (src[offset] / right_operand) < val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Mod) { + res[i] = (fmod(src[offset], right_operand)) < val; + } else { + PanicInfo(OpTypeInvalid, + fmt::format("unsupported arith type:{} for " + "ArithOpElementFunc", + arith_op)); + } + } else if constexpr (cmp_op == proto::plan::OpType::LessEqual) { + if constexpr (arith_op == proto::plan::ArithOpType::Add) { + res[i] = (src[offset] + right_operand) <= val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Sub) { + res[i] = (src[offset] - right_operand) <= val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Mul) { + res[i] = (src[offset] * right_operand) <= val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Div) { + res[i] = (src[offset] / right_operand) <= val; + } else if constexpr (arith_op == + proto::plan::ArithOpType::Mod) { + res[i] = (fmod(src[offset], right_operand)) <= val; + } else { + PanicInfo(OpTypeInvalid, + fmt::format("unsupported arith type:{} for " + "ArithOpElementFunc", + arith_op)); + } } } + return; } - */ + + // more efficient SIMD version if constexpr (!std::is_same_v::op), void>) { constexpr auto cmp_op_cvt = CmpOpHelper::op; @@ -266,7 +269,8 @@ struct ArithOpElementFunc { template + proto::plan::ArithOpType arith_op, + FilterType filter_type> struct ArithOpIndexFunc { typedef std::conditional_t && !std::is_same_v, @@ -278,10 +282,15 @@ struct ArithOpIndexFunc { operator()(Index* index, size_t size, HighPrecisonType val, - HighPrecisonType right_operand) { + HighPrecisonType right_operand, + const int64_t* offsets = nullptr) { TargetBitmap res(size); for (size_t i = 0; i < size; ++i) { - auto raw = index->Reverse_Lookup(i); + auto offset = i; + if constexpr (filter_type == FilterType::post) { + offset = (offsets) ? offsets[i] : i; + } + auto raw = index->Reverse_Lookup(offset); if (!raw.has_value()) { res[i] = false; continue; @@ -449,23 +458,23 @@ class PhyBinaryArithOpEvalRangeExpr : public SegmentExpr { private: template VectorPtr - ExecRangeVisitorImpl(); + ExecRangeVisitorImpl(OffsetVector* input = nullptr); template VectorPtr - ExecRangeVisitorImplForIndex(); + ExecRangeVisitorImplForIndex(OffsetVector* input = nullptr); template VectorPtr - ExecRangeVisitorImplForData(); + ExecRangeVisitorImplForData(OffsetVector* input = nullptr); template VectorPtr - ExecRangeVisitorImplForJson(); + ExecRangeVisitorImplForJson(OffsetVector* input = nullptr); template VectorPtr - ExecRangeVisitorImplForArray(); + ExecRangeVisitorImplForArray(OffsetVector* input = nullptr); private: std::shared_ptr expr_; diff --git a/internal/core/src/exec/expression/BinaryRangeExpr.cpp b/internal/core/src/exec/expression/BinaryRangeExpr.cpp index e22208a7dd6c3..c2d67bd0f270f 100644 --- a/internal/core/src/exec/expression/BinaryRangeExpr.cpp +++ b/internal/core/src/exec/expression/BinaryRangeExpr.cpp @@ -24,33 +24,35 @@ namespace exec { void PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { + auto input = context.get_offset_input(); + SetHasOffsetInput((input != nullptr)); switch (expr_->column_.data_type_) { case DataType::BOOL: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::INT8: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::INT16: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::INT32: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::INT64: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::FLOAT: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::DOUBLE: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::VARCHAR: { @@ -58,9 +60,9 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { !storage::MmapManager::GetInstance() .GetMmapConfig() .growing_enable_mmap) { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); } else { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); } break; } @@ -68,15 +70,15 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { auto value_type = expr_->lower_val_.val_case(); switch (value_type) { case proto::plan::GenericValue::ValCase::kInt64Val: { - result = ExecRangeVisitorImplForJson(); + result = ExecRangeVisitorImplForJson(input); break; } case proto::plan::GenericValue::ValCase::kFloatVal: { - result = ExecRangeVisitorImplForJson(); + result = ExecRangeVisitorImplForJson(input); break; } case proto::plan::GenericValue::ValCase::kStringVal: { - result = ExecRangeVisitorImplForJson(); + result = ExecRangeVisitorImplForJson(input); break; } default: { @@ -93,17 +95,17 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { switch (value_type) { case proto::plan::GenericValue::ValCase::kInt64Val: { SetNotUseIndex(); - result = ExecRangeVisitorImplForArray(); + result = ExecRangeVisitorImplForArray(input); break; } case proto::plan::GenericValue::ValCase::kFloatVal: { SetNotUseIndex(); - result = ExecRangeVisitorImplForArray(); + result = ExecRangeVisitorImplForArray(input); break; } case proto::plan::GenericValue::ValCase::kStringVal: { SetNotUseIndex(); - result = ExecRangeVisitorImplForArray(); + result = ExecRangeVisitorImplForArray(input); break; } default: { @@ -124,11 +126,11 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { template VectorPtr -PhyBinaryRangeFilterExpr::ExecRangeVisitorImpl() { - if (is_index_mode_) { +PhyBinaryRangeFilterExpr::ExecRangeVisitorImpl(OffsetVector* input) { + if (is_index_mode_ && !has_offset_input_) { return ExecRangeVisitorImplForIndex(); } else { - return ExecRangeVisitorImplForData(); + return ExecRangeVisitorImplForData(input); } } @@ -137,17 +139,28 @@ ColumnVectorPtr PhyBinaryRangeFilterExpr::PreCheckOverflow(HighPrecisionType& val1, HighPrecisionType& val2, bool& lower_inclusive, - bool& upper_inclusive) { + bool& upper_inclusive, + OffsetVector* input) { lower_inclusive = expr_->lower_inclusive_; upper_inclusive = expr_->upper_inclusive_; val1 = GetValueFromProto(expr_->lower_val_); val2 = GetValueFromProto(expr_->upper_val_); - auto get_next_overflow_batch = [this]() -> ColumnVectorPtr { - int64_t batch_size = overflow_check_pos_ + batch_size_ >= active_count_ - ? active_count_ - overflow_check_pos_ - : batch_size_; - overflow_check_pos_ += batch_size; - auto valid_res = ProcessChunksForValid(is_index_mode_); + + auto get_next_overflow_batch = + [this](OffsetVector* input) -> ColumnVectorPtr { + int64_t batch_size; + if (input != nullptr) { + batch_size = input->size(); + } else { + batch_size = overflow_check_pos_ + batch_size_ >= active_count_ + ? active_count_ - overflow_check_pos_ + : batch_size_; + overflow_check_pos_ += batch_size; + } + auto valid_res = + (input != nullptr) + ? ProcessChunksForValidByOffsets(is_index_mode_, *input) + : ProcessChunksForValid(is_index_mode_); auto res_vec = std::make_shared(TargetBitmap(batch_size), std::move(valid_res)); return res_vec; @@ -155,7 +168,7 @@ PhyBinaryRangeFilterExpr::PreCheckOverflow(HighPrecisionType& val1, if constexpr (std::is_integral_v && !std::is_same_v) { if (milvus::query::gt_ub(val1)) { - return get_next_overflow_batch(); + return get_next_overflow_batch(input); } else if (milvus::query::lt_lb(val1)) { val1 = std::numeric_limits::min(); lower_inclusive = true; @@ -165,7 +178,7 @@ PhyBinaryRangeFilterExpr::PreCheckOverflow(HighPrecisionType& val1, val2 = std::numeric_limits::max(); upper_inclusive = true; } else if (milvus::query::lt_lb(val2)) { - return get_next_overflow_batch(); + return get_next_overflow_batch(input); } } return nullptr; @@ -216,7 +229,7 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForIndex() { template VectorPtr -PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData() { +PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) { typedef std:: conditional_t, std::string, T> IndexInnerType; @@ -226,57 +239,67 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData() { int64_t, IndexInnerType> HighPrecisionType; - auto real_batch_size = GetNextBatchSize(); - if (real_batch_size == 0) { - return nullptr; - } HighPrecisionType val1; HighPrecisionType val2; bool lower_inclusive = false; bool upper_inclusive = false; - if (auto res = - PreCheckOverflow(val1, val2, lower_inclusive, upper_inclusive)) { + if (auto res = PreCheckOverflow( + val1, val2, lower_inclusive, upper_inclusive, input)) { return res; } + + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); + if (real_batch_size == 0) { + return nullptr; + } auto res_vec = std::make_shared( TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); valid_res.set(); - auto execute_sub_batch = [lower_inclusive, upper_inclusive]( - const T* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - HighPrecisionType val1, - HighPrecisionType val2) { + auto execute_sub_batch = + [ lower_inclusive, + upper_inclusive ]( + const T* data, + const bool* valid_data, + const int64_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + HighPrecisionType val1, + HighPrecisionType val2) { if (lower_inclusive && upper_inclusive) { - BinaryRangeElementFunc func; - func(val1, val2, data, size, res); + BinaryRangeElementFunc func; + func(val1, val2, data, size, res, offsets); } else if (lower_inclusive && !upper_inclusive) { - BinaryRangeElementFunc func; - func(val1, val2, data, size, res); + BinaryRangeElementFunc func; + func(val1, val2, data, size, res, offsets); } else if (!lower_inclusive && upper_inclusive) { - BinaryRangeElementFunc func; - func(val1, val2, data, size, res); + BinaryRangeElementFunc func; + func(val1, val2, data, size, res, offsets); } else { - BinaryRangeElementFunc func; - func(val1, val2, data, size, res); + BinaryRangeElementFunc func; + func(val1, val2, data, size, res, offsets); } // there is a batch operation in BinaryRangeElementFunc, // so not divide data again for the reason that it may reduce performance if the null distribution is scattered // but to mask res with valid_data after the batch operation. if (valid_data != nullptr) { for (int i = 0; i < size; i++) { - if (!valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::post) { + offset = (offsets) ? offsets[i] : i; + } + if (!valid_data[offset]) { res[i] = valid_res[i] = false; } } } }; + auto skip_index_func = [val1, val2, lower_inclusive, upper_inclusive]( const SkipIndex& skip_index, FieldId field_id, int64_t chunk_id) { @@ -294,8 +317,19 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData() { field_id, chunk_id, val1, val2, false, false); } }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, skip_index_func, res, valid_res, val1, val2); + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessDataByOffsets(execute_sub_batch, + skip_index_func, + input, + res, + valid_res, + val1, + val2); + } else { + processed_size = ProcessDataChunks( + execute_sub_batch, skip_index_func, res, valid_res, val1, val2); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -306,11 +340,12 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData() { template VectorPtr -PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson() { +PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(OffsetVector* input) { using GetType = std::conditional_t, std::string_view, ValueType>; - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -326,30 +361,81 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson() { ValueType val2 = GetValueFromProto(expr_->upper_val_); auto pointer = milvus::Json::pointer(expr_->column_.nested_path_); - auto execute_sub_batch = [lower_inclusive, upper_inclusive, pointer]( - const milvus::Json* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - ValueType val1, - ValueType val2) { + auto execute_sub_batch = + [ lower_inclusive, upper_inclusive, + pointer ]( + const milvus::Json* data, + const bool* valid_data, + const int64_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + ValueType val1, + ValueType val2) { if (lower_inclusive && upper_inclusive) { - BinaryRangeElementFuncForJson func; - func(val1, val2, pointer, data, valid_data, size, res, valid_res); + BinaryRangeElementFuncForJson + func; + func(val1, + val2, + pointer, + data, + valid_data, + size, + res, + valid_res, + offsets); } else if (lower_inclusive && !upper_inclusive) { - BinaryRangeElementFuncForJson func; - func(val1, val2, pointer, data, valid_data, size, res, valid_res); + BinaryRangeElementFuncForJson + func; + func(val1, + val2, + pointer, + data, + valid_data, + size, + res, + valid_res, + offsets); + } else if (!lower_inclusive && upper_inclusive) { - BinaryRangeElementFuncForJson func; - func(val1, val2, pointer, data, valid_data, size, res, valid_res); + BinaryRangeElementFuncForJson + func; + func(val1, + val2, + pointer, + data, + valid_data, + size, + res, + valid_res, + offsets); } else { - BinaryRangeElementFuncForJson func; - func(val1, val2, pointer, data, valid_data, size, res, valid_res); + BinaryRangeElementFuncForJson + func; + func(val1, + val2, + pointer, + data, + valid_data, + size, + res, + valid_res, + offsets); } }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, val1, val2); + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + val1, + val2); + } else { + processed_size = ProcessDataChunks( + execute_sub_batch, std::nullptr_t{}, res, valid_res, val1, val2); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -360,11 +446,12 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson() { template VectorPtr -PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray() { +PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(OffsetVector* input) { using GetType = std::conditional_t, std::string_view, ValueType>; - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -383,31 +470,90 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray() { index = std::stoi(expr_->column_.nested_path_[0]); } - auto execute_sub_batch = [lower_inclusive, upper_inclusive]( - const milvus::ArrayView* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - ValueType val1, - ValueType val2, - int index) { + auto execute_sub_batch = + [ lower_inclusive, + upper_inclusive ]( + const milvus::ArrayView* data, + const bool* valid_data, + const int64_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + ValueType val1, + ValueType val2, + int index) { if (lower_inclusive && upper_inclusive) { - BinaryRangeElementFuncForArray func; - func(val1, val2, index, data, valid_data, size, res, valid_res); + BinaryRangeElementFuncForArray + func; + func(val1, + val2, + index, + data, + valid_data, + size, + res, + valid_res, + offsets); } else if (lower_inclusive && !upper_inclusive) { - BinaryRangeElementFuncForArray func; - func(val1, val2, index, data, valid_data, size, res, valid_res); + BinaryRangeElementFuncForArray + func; + func(val1, + val2, + index, + data, + valid_data, + size, + res, + valid_res, + offsets); + } else if (!lower_inclusive && upper_inclusive) { - BinaryRangeElementFuncForArray func; - func(val1, val2, index, data, valid_data, size, res, valid_res); + BinaryRangeElementFuncForArray + func; + func(val1, + val2, + index, + data, + valid_data, + size, + res, + valid_res, + offsets); + } else { - BinaryRangeElementFuncForArray func; - func(val1, val2, index, data, valid_data, size, res, valid_res); + BinaryRangeElementFuncForArray + func; + func(val1, + val2, + index, + data, + valid_data, + size, + res, + valid_res, + offsets); } }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, val1, val2, index); + int64_t processed_size; + if (has_offset_input_) { + processed_size = + ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + val1, + val2, + index); + } else { + processed_size = ProcessDataChunks(execute_sub_batch, + std::nullptr_t{}, + res, + valid_res, + val1, + val2, + index); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", diff --git a/internal/core/src/exec/expression/BinaryRangeExpr.h b/internal/core/src/exec/expression/BinaryRangeExpr.h index 8f23d32a5682d..05bd2f54682b1 100644 --- a/internal/core/src/exec/expression/BinaryRangeExpr.h +++ b/internal/core/src/exec/expression/BinaryRangeExpr.h @@ -27,7 +27,10 @@ namespace milvus { namespace exec { -template +template struct BinaryRangeElementFunc { typedef std::conditional_t && !std::is_same_v, @@ -35,7 +38,28 @@ struct BinaryRangeElementFunc { T> HighPrecisionType; void - operator()(T val1, T val2, const T* src, size_t n, TargetBitmapView res) { + operator()(T val1, + T val2, + const T* src, + size_t n, + TargetBitmapView res, + const int64_t* offsets = nullptr) { + if constexpr (filter_type == FilterType::post) { + for (size_t i = 0; i < n; ++i) { + auto offset = (offsets) ? offsets[i] : i; + if constexpr (lower_inclusive && upper_inclusive) { + res[i] = val1 <= src[offset] && src[offset] <= val2; + } else if constexpr (lower_inclusive && !upper_inclusive) { + res[i] = val1 <= src[offset] && src[offset] < val2; + } else if constexpr (!lower_inclusive && upper_inclusive) { + res[i] = val1 < src[offset] && src[offset] <= val2; + } else { + res[i] = val1 < src[offset] && src[offset] < val2; + } + } + return; + } + if constexpr (lower_inclusive && upper_inclusive) { res.inplace_within_range_val( val1, val2, src, n); @@ -52,30 +76,33 @@ struct BinaryRangeElementFunc { } }; -#define BinaryRangeJSONCompare(cmp) \ - do { \ - if (valid_data != nullptr && !valid_data[i]) { \ - res[i] = valid_res[i] = false; \ - break; \ - } \ - auto x = src[i].template at(pointer); \ - if (x.error()) { \ - if constexpr (std::is_same_v) { \ - auto x = src[i].template at(pointer); \ - if (!x.error()) { \ - auto value = x.value(); \ - res[i] = (cmp); \ - break; \ - } \ - } \ - res[i] = false; \ - break; \ - } \ - auto value = x.value(); \ - res[i] = (cmp); \ +#define BinaryRangeJSONCompare(cmp) \ + do { \ + if (valid_data != nullptr && !valid_data[offset]) { \ + res[i] = valid_res[i] = false; \ + break; \ + } \ + auto x = src[offset].template at(pointer); \ + if (x.error()) { \ + if constexpr (std::is_same_v) { \ + auto x = src[offset].template at(pointer); \ + if (!x.error()) { \ + auto value = x.value(); \ + res[i] = (cmp); \ + break; \ + } \ + } \ + res[i] = false; \ + break; \ + } \ + auto value = x.value(); \ + res[i] = (cmp); \ } while (false) -template +template struct BinaryRangeElementFuncForJson { using GetType = std::conditional_t, std::string_view, @@ -88,8 +115,13 @@ struct BinaryRangeElementFuncForJson { const bool* valid_data, size_t n, TargetBitmapView res, - TargetBitmapView valid_res) { + TargetBitmapView valid_res, + const int64_t* offsets = nullptr) { for (size_t i = 0; i < n; ++i) { + auto offset = i; + if constexpr (filter_type == FilterType::post) { + offset = (offsets) ? offsets[i] : i; + } if constexpr (lower_inclusive && upper_inclusive) { BinaryRangeJSONCompare(val1 <= value && value <= val2); } else if constexpr (lower_inclusive && !upper_inclusive) { @@ -103,7 +135,10 @@ struct BinaryRangeElementFuncForJson { } }; -template +template struct BinaryRangeElementFuncForArray { using GetType = std::conditional_t, std::string_view, @@ -116,39 +151,44 @@ struct BinaryRangeElementFuncForArray { const bool* valid_data, size_t n, TargetBitmapView res, - TargetBitmapView valid_res) { + TargetBitmapView valid_res, + const int64_t* offsets = nullptr) { for (size_t i = 0; i < n; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + size_t offset = i; + if constexpr (filter_type == FilterType::post) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } if constexpr (lower_inclusive && upper_inclusive) { - if (index >= src[i].length()) { + if (index >= src[offset].length()) { res[i] = false; continue; } - auto value = src[i].get_data(index); + auto value = src[offset].get_data(index); res[i] = val1 <= value && value <= val2; } else if constexpr (lower_inclusive && !upper_inclusive) { - if (index >= src[i].length()) { + if (index >= src[offset].length()) { res[i] = false; continue; } - auto value = src[i].get_data(index); + auto value = src[offset].get_data(index); res[i] = val1 <= value && value < val2; } else if constexpr (!lower_inclusive && upper_inclusive) { - if (index >= src[i].length()) { + if (index >= src[offset].length()) { res[i] = false; continue; } - auto value = src[i].get_data(index); + auto value = src[offset].get_data(index); res[i] = val1 < value && value <= val2; } else { - if (index >= src[i].length()) { + if (index >= src[offset].length()) { res[i] = false; continue; } - auto value = src[i].get_data(index); + auto value = src[offset].get_data(index); res[i] = val1 < value && value < val2; } } @@ -211,11 +251,12 @@ class PhyBinaryRangeFilterExpr : public SegmentExpr { PreCheckOverflow(HighPrecisionType& val1, HighPrecisionType& val2, bool& lower_inclusive, - bool& upper_inclusive); + bool& upper_inclusive, + OffsetVector* input = nullptr); template VectorPtr - ExecRangeVisitorImpl(); + ExecRangeVisitorImpl(OffsetVector* input = nullptr); template VectorPtr @@ -223,15 +264,15 @@ class PhyBinaryRangeFilterExpr : public SegmentExpr { template VectorPtr - ExecRangeVisitorImplForData(); + ExecRangeVisitorImplForData(OffsetVector* input = nullptr); template VectorPtr - ExecRangeVisitorImplForJson(); + ExecRangeVisitorImplForJson(OffsetVector* input = nullptr); template VectorPtr - ExecRangeVisitorImplForArray(); + ExecRangeVisitorImplForArray(OffsetVector* input = nullptr); private: std::shared_ptr expr_; diff --git a/internal/core/src/exec/expression/ColumnExpr.cpp b/internal/core/src/exec/expression/ColumnExpr.cpp index ca83a91df0245..5bcef2cec04ff 100644 --- a/internal/core/src/exec/expression/ColumnExpr.cpp +++ b/internal/core/src/exec/expression/ColumnExpr.cpp @@ -30,30 +30,32 @@ PhyColumnExpr::GetNextBatchSize() { void PhyColumnExpr::Eval(EvalCtx& context, VectorPtr& result) { + auto input = context.get_offset_input(); + SetHasOffsetInput(input != nullptr); switch (this->expr_->type()) { case DataType::BOOL: - result = DoEval(); + result = DoEval(input); break; case DataType::INT8: - result = DoEval(); + result = DoEval(input); break; case DataType::INT16: - result = DoEval(); + result = DoEval(input); break; case DataType::INT32: - result = DoEval(); + result = DoEval(input); break; case DataType::INT64: - result = DoEval(); + result = DoEval(input); break; case DataType::FLOAT: - result = DoEval(); + result = DoEval(input); break; case DataType::DOUBLE: - result = DoEval(); + result = DoEval(input); break; case DataType::VARCHAR: { - result = DoEval(); + result = DoEval(input); break; } default: @@ -65,8 +67,58 @@ PhyColumnExpr::Eval(EvalCtx& context, VectorPtr& result) { template VectorPtr -PhyColumnExpr::DoEval() { +PhyColumnExpr::DoEval(OffsetVector* input) { // similar to PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op) + // take offsets as input + if (has_offset_input_) { + auto real_batch_size = input->size(); + if (real_batch_size == 0) { + return nullptr; + } + + auto res_vec = std::make_shared( + expr_->GetColumn().data_type_, real_batch_size); + T* res_value = res_vec->RawAsValues(); + TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); + valid_res.set(); + + auto data_barrier = segment_chunk_reader_.segment_->num_chunk_data( + expr_->GetColumn().field_id_); + + int64_t processed_rows = 0; + const auto size_per_chunk = segment_chunk_reader_.SizePerChunk(); + for (auto i = 0; i < real_batch_size; ++i) { + auto offset = (*input)[i]; + auto [chunk_id, + chunk_offset] = [&]() -> std::pair { + if (segment_chunk_reader_.segment_->type() == + SegmentType::Growing) { + return {offset / size_per_chunk, offset % size_per_chunk}; + } else if (segment_chunk_reader_.segment_->is_chunked()) { + return segment_chunk_reader_.segment_->get_chunk_by_offset( + expr_->GetColumn().field_id_, offset); + } else { + return {0, offset}; + } + }(); + auto chunk_data = segment_chunk_reader_.GetChunkDataAccessor( + expr_->GetColumn().data_type_, + expr_->GetColumn().field_id_, + chunk_id, + data_barrier); + auto chunk_data_by_offset = chunk_data(chunk_offset); + if (!chunk_data_by_offset.has_value()) { + valid_res[processed_rows] = false; + } else { + res_value[processed_rows] = + boost::get(chunk_data_by_offset.value()); + } + processed_rows++; + } + return res_vec; + } + + // normal path if (segment_chunk_reader_.segment_->is_chunked()) { auto real_batch_size = GetNextBatchSize(); if (real_batch_size == 0) { diff --git a/internal/core/src/exec/expression/ColumnExpr.h b/internal/core/src/exec/expression/ColumnExpr.h index 4b8bdfd93662b..a8a543c986c08 100644 --- a/internal/core/src/exec/expression/ColumnExpr.h +++ b/internal/core/src/exec/expression/ColumnExpr.h @@ -107,7 +107,7 @@ class PhyColumnExpr : public Expr { template VectorPtr - DoEval(); + DoEval(OffsetVector* input = nullptr); private: bool is_indexed_; diff --git a/internal/core/src/exec/expression/CompareExpr.cpp b/internal/core/src/exec/expression/CompareExpr.cpp index 7044f5917f11f..883ac10a537df 100644 --- a/internal/core/src/exec/expression/CompareExpr.cpp +++ b/internal/core/src/exec/expression/CompareExpr.cpp @@ -38,7 +38,70 @@ PhyCompareFilterExpr::GetNextBatchSize() { template VectorPtr -PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op) { +PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op, + OffsetVector* input) { + // take offsets as input + if (has_offset_input_) { + auto real_batch_size = input->size(); + if (real_batch_size == 0) { + return nullptr; + } + + auto res_vec = std::make_shared( + TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); + TargetBitmapView res(res_vec->GetRawData(), real_batch_size); + TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); + valid_res.set(); + + auto left_data_barrier = segment_chunk_reader_.segment_->num_chunk_data( + expr_->left_field_id_); + auto right_data_barrier = + segment_chunk_reader_.segment_->num_chunk_data( + expr_->right_field_id_); + + int64_t processed_rows = 0; + const auto size_per_chunk = segment_chunk_reader_.SizePerChunk(); + for (auto i = 0; i < real_batch_size; ++i) { + auto offset = (*input)[i]; + auto [chunk_id, + chunk_offset] = [&]() -> std::pair { + if (segment_chunk_reader_.segment_->type() == + SegmentType::Growing) { + return {offset / size_per_chunk, offset % size_per_chunk}; + } else if (segment_chunk_reader_.segment_->is_chunked()) { + return segment_chunk_reader_.segment_->get_chunk_by_offset( + left_field_, offset); + } else { + return {0, offset}; + } + }(); + auto left = segment_chunk_reader_.GetChunkDataAccessor( + expr_->left_data_type_, + expr_->left_field_id_, + chunk_id, + left_data_barrier); + auto right = segment_chunk_reader_.GetChunkDataAccessor( + expr_->right_data_type_, + expr_->right_field_id_, + chunk_id, + right_data_barrier); + auto left_opt = left(chunk_offset); + auto right_opt = right(chunk_offset); + if (!left_opt.has_value() || !right_opt.has_value()) { + res[processed_rows] = false; + valid_res[processed_rows] = false; + } else { + res[processed_rows] = boost::apply_visitor( + milvus::query::Relational{}, + left_opt.value(), + right_opt.value()); + } + processed_rows++; + } + return res_vec; + } + + // normal path if (segment_chunk_reader_.segment_->is_chunked()) { auto real_batch_size = GetNextBatchSize(); if (real_batch_size == 0) { @@ -140,39 +203,42 @@ PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op) { void PhyCompareFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { + auto input = context.get_offset_input(); + SetHasOffsetInput((input != nullptr)); // For segment both fields has no index, can use SIMD to speed up. // Avoiding too much call stack that blocks SIMD. if (!is_left_indexed_ && !is_right_indexed_ && !IsStringExpr()) { - result = ExecCompareExprDispatcherForBothDataSegment(); + result = ExecCompareExprDispatcherForBothDataSegment(input); return; } - result = ExecCompareExprDispatcherForHybridSegment(); + result = ExecCompareExprDispatcherForHybridSegment(input); } VectorPtr -PhyCompareFilterExpr::ExecCompareExprDispatcherForHybridSegment() { +PhyCompareFilterExpr::ExecCompareExprDispatcherForHybridSegment( + OffsetVector* input) { switch (expr_->op_type_) { case OpType::Equal: { - return ExecCompareExprDispatcher(std::equal_to<>{}); + return ExecCompareExprDispatcher(std::equal_to<>{}, input); } case OpType::NotEqual: { - return ExecCompareExprDispatcher(std::not_equal_to<>{}); + return ExecCompareExprDispatcher(std::not_equal_to<>{}, input); } case OpType::GreaterEqual: { - return ExecCompareExprDispatcher(std::greater_equal<>{}); + return ExecCompareExprDispatcher(std::greater_equal<>{}, input); } case OpType::GreaterThan: { - return ExecCompareExprDispatcher(std::greater<>{}); + return ExecCompareExprDispatcher(std::greater<>{}, input); } case OpType::LessEqual: { - return ExecCompareExprDispatcher(std::less_equal<>{}); + return ExecCompareExprDispatcher(std::less_equal<>{}, input); } case OpType::LessThan: { - return ExecCompareExprDispatcher(std::less<>{}); + return ExecCompareExprDispatcher(std::less<>{}, input); } case OpType::PrefixMatch: { return ExecCompareExprDispatcher( - milvus::query::MatchOp{}); + milvus::query::MatchOp{}, input); } // case OpType::PostfixMatch: { // } @@ -183,22 +249,23 @@ PhyCompareFilterExpr::ExecCompareExprDispatcherForHybridSegment() { } VectorPtr -PhyCompareFilterExpr::ExecCompareExprDispatcherForBothDataSegment() { +PhyCompareFilterExpr::ExecCompareExprDispatcherForBothDataSegment( + OffsetVector* input) { switch (expr_->left_data_type_) { case DataType::BOOL: - return ExecCompareLeftType(); + return ExecCompareLeftType(input); case DataType::INT8: - return ExecCompareLeftType(); + return ExecCompareLeftType(input); case DataType::INT16: - return ExecCompareLeftType(); + return ExecCompareLeftType(input); case DataType::INT32: - return ExecCompareLeftType(); + return ExecCompareLeftType(input); case DataType::INT64: - return ExecCompareLeftType(); + return ExecCompareLeftType(input); case DataType::FLOAT: - return ExecCompareLeftType(); + return ExecCompareLeftType(input); case DataType::DOUBLE: - return ExecCompareLeftType(); + return ExecCompareLeftType(input); default: PanicInfo( DataTypeInvalid, @@ -209,22 +276,22 @@ PhyCompareFilterExpr::ExecCompareExprDispatcherForBothDataSegment() { template VectorPtr -PhyCompareFilterExpr::ExecCompareLeftType() { +PhyCompareFilterExpr::ExecCompareLeftType(OffsetVector* input) { switch (expr_->right_data_type_) { case DataType::BOOL: - return ExecCompareRightType(); + return ExecCompareRightType(input); case DataType::INT8: - return ExecCompareRightType(); + return ExecCompareRightType(input); case DataType::INT16: - return ExecCompareRightType(); + return ExecCompareRightType(input); case DataType::INT32: - return ExecCompareRightType(); + return ExecCompareRightType(input); case DataType::INT64: - return ExecCompareRightType(); + return ExecCompareRightType(input); case DataType::FLOAT: - return ExecCompareRightType(); + return ExecCompareRightType(input); case DataType::DOUBLE: - return ExecCompareRightType(); + return ExecCompareRightType(input); default: PanicInfo( DataTypeInvalid, @@ -235,8 +302,9 @@ PhyCompareFilterExpr::ExecCompareLeftType() { template VectorPtr -PhyCompareFilterExpr::ExecCompareRightType() { - auto real_batch_size = GetNextBatchSize(); +PhyCompareFilterExpr::ExecCompareRightType(OffsetVector* input) { + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -248,39 +316,47 @@ PhyCompareFilterExpr::ExecCompareRightType() { valid_res.set(); auto expr_type = expr_->op_type_; - auto execute_sub_batch = [expr_type](const T* left, - const U* right, - const int size, - TargetBitmapView res) { + auto execute_sub_batch = [expr_type]( + const T* left, + const U* right, + const int64_t* offsets, + const int size, + TargetBitmapView res) { switch (expr_type) { case proto::plan::GreaterThan: { - CompareElementFunc func; - func(left, right, size, res); + CompareElementFunc + func; + func(left, right, size, res, offsets); break; } case proto::plan::GreaterEqual: { - CompareElementFunc func; - func(left, right, size, res); + CompareElementFunc + func; + func(left, right, size, res, offsets); break; } case proto::plan::LessThan: { - CompareElementFunc func; - func(left, right, size, res); + CompareElementFunc + func; + func(left, right, size, res, offsets); break; } case proto::plan::LessEqual: { - CompareElementFunc func; - func(left, right, size, res); + CompareElementFunc + func; + func(left, right, size, res, offsets); break; } case proto::plan::Equal: { - CompareElementFunc func; - func(left, right, size, res); + CompareElementFunc func; + func(left, right, size, res, offsets); break; } case proto::plan::NotEqual: { - CompareElementFunc func; - func(left, right, size, res); + CompareElementFunc + func; + func(left, right, size, res, offsets); break; } default: @@ -290,8 +366,14 @@ PhyCompareFilterExpr::ExecCompareRightType() { expr_type)); } }; - int64_t processed_size = - ProcessBothDataChunks(execute_sub_batch, res, valid_res); + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessBothDataByOffsets( + execute_sub_batch, input, res, valid_res); + } else { + processed_size = ProcessBothDataChunks( + execute_sub_batch, input, res, valid_res); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", diff --git a/internal/core/src/exec/expression/CompareExpr.h b/internal/core/src/exec/expression/CompareExpr.h index b8815357441a3..c67aec0d89dfd 100644 --- a/internal/core/src/exec/expression/CompareExpr.h +++ b/internal/core/src/exec/expression/CompareExpr.h @@ -30,36 +30,44 @@ namespace milvus { namespace exec { -template +template struct CompareElementFunc { void operator()(const T* left, const U* right, size_t size, - TargetBitmapView res) { - /* + TargetBitmapView res, + const int64_t* offsets = nullptr) { // This is the original code, kept here for the documentation purposes - for (int i = 0; i < size; ++i) { - if constexpr (op == proto::plan::OpType::Equal) { - res[i] = left[i] == right[i]; - } else if constexpr (op == proto::plan::OpType::NotEqual) { - res[i] = left[i] != right[i]; - } else if constexpr (op == proto::plan::OpType::GreaterThan) { - res[i] = left[i] > right[i]; - } else if constexpr (op == proto::plan::OpType::LessThan) { - res[i] = left[i] < right[i]; - } else if constexpr (op == proto::plan::OpType::GreaterEqual) { - res[i] = left[i] >= right[i]; - } else if constexpr (op == proto::plan::OpType::LessEqual) { - res[i] = left[i] <= right[i]; - } else { - PanicInfo( - OpTypeInvalid, - fmt::format("unsupported op_type:{} for CompareElementFunc", - op)); + // also, used for post filter + if constexpr (filter_type == FilterType::post) { + for (int i = 0; i < size; ++i) { + auto offset = (offsets != nullptr) ? offsets[i] : i; + if constexpr (op == proto::plan::OpType::Equal) { + res[i] = left[offset] == right[offset]; + } else if constexpr (op == proto::plan::OpType::NotEqual) { + res[i] = left[offset] != right[offset]; + } else if constexpr (op == proto::plan::OpType::GreaterThan) { + res[i] = left[offset] > right[offset]; + } else if constexpr (op == proto::plan::OpType::LessThan) { + res[i] = left[offset] < right[offset]; + } else if constexpr (op == proto::plan::OpType::GreaterEqual) { + res[i] = left[offset] >= right[offset]; + } else if constexpr (op == proto::plan::OpType::LessEqual) { + res[i] = left[offset] <= right[offset]; + } else { + PanicInfo( + OpTypeInvalid, + fmt::format( + "unsupported op_type:{} for CompareElementFunc", + op)); + } } + return; } - */ if constexpr (op == proto::plan::OpType::Equal) { res.inplace_compare_column( @@ -138,22 +146,27 @@ class PhyCompareFilterExpr : public Expr { void MoveCursor() override { - if (segment_chunk_reader_.segment_->is_chunked()) { - segment_chunk_reader_.MoveCursorForMultipleChunk( - left_current_chunk_id_, - left_current_chunk_pos_, - left_field_, - left_num_chunk_, - batch_size_); - segment_chunk_reader_.MoveCursorForMultipleChunk( - right_current_chunk_id_, - right_current_chunk_pos_, - right_field_, - right_num_chunk_, - batch_size_); - } else { - segment_chunk_reader_.MoveCursorForSingleChunk( - current_chunk_id_, current_chunk_pos_, num_chunk_, batch_size_); + if (!has_offset_input_) { + if (segment_chunk_reader_.segment_->is_chunked()) { + segment_chunk_reader_.MoveCursorForMultipleChunk( + left_current_chunk_id_, + left_current_chunk_pos_, + left_field_, + left_num_chunk_, + batch_size_); + segment_chunk_reader_.MoveCursorForMultipleChunk( + right_current_chunk_id_, + right_current_chunk_pos_, + right_field_, + right_num_chunk_, + batch_size_); + } else { + segment_chunk_reader_.MoveCursorForSingleChunk( + current_chunk_id_, + current_chunk_pos_, + num_chunk_, + batch_size_); + } } } @@ -188,6 +201,7 @@ class PhyCompareFilterExpr : public Expr { template int64_t ProcessBothDataChunks(FUNC func, + OffsetVector* input, TargetBitmapView res, TargetBitmapView valid_res, ValTypes... values) { @@ -203,6 +217,90 @@ class PhyCompareFilterExpr : public Expr { } } + template + int64_t + ProcessBothDataByOffsets(FUNC func, + OffsetVector* input, + TargetBitmapView res, + TargetBitmapView valid_res, + ValTypes... values) { + int64_t size = input->size(); + int64_t processed_size = 0; + const auto size_per_chunk = segment_chunk_reader_.SizePerChunk(); + if (segment_chunk_reader_.segment_->is_chunked() || + segment_chunk_reader_.segment_->type() == SegmentType::Growing) { + for (auto i = 0; i < size; ++i) { + auto offset = (*input)[i]; + auto [chunk_id, + chunk_offset] = [&]() -> std::pair { + if (segment_chunk_reader_.segment_->type() == + SegmentType::Growing) { + auto size_per_chunk = + segment_chunk_reader_.SizePerChunk(); + return {offset / size_per_chunk, + offset % size_per_chunk}; + } else { + return segment_chunk_reader_.segment_ + ->get_chunk_by_offset(right_field_, offset); + } + }(); + auto left_chunk = segment_chunk_reader_.segment_->chunk_data( + left_field_, chunk_id); + + auto right_chunk = + segment_chunk_reader_.segment_->chunk_data(right_field_, + chunk_id); + const T* left_data = left_chunk.data() + chunk_offset; + const U* right_data = right_chunk.data() + chunk_offset; + func.template operator()(left_data, + right_data, + nullptr, + 1, + res + processed_size, + values...); + const bool* left_valid_data = left_chunk.valid_data(); + const bool* right_valid_data = right_chunk.valid_data(); + // mask with valid_data + if (left_valid_data && !left_valid_data[chunk_offset]) { + res[processed_size] = false; + valid_res[processed_size] = false; + continue; + } + if (right_valid_data && !right_valid_data[chunk_offset]) { + res[processed_size] = false; + valid_res[processed_size] = false; + } + processed_size++; + } + return processed_size; + } else { + auto left_chunk = + segment_chunk_reader_.segment_->chunk_data(left_field_, 0); + auto right_chunk = + segment_chunk_reader_.segment_->chunk_data(right_field_, 0); + const T* left_data = left_chunk.data(); + const U* right_data = right_chunk.data(); + func.template operator()( + left_data, right_data, input->data(), size, res, values...); + const bool* left_valid_data = left_chunk.valid_data(); + const bool* right_valid_data = right_chunk.valid_data(); + // mask with valid_data + for (int i = 0; i < size; ++i) { + if (left_valid_data && !left_valid_data[(*input)[i]]) { + res[i] = false; + valid_res[i] = false; + continue; + } + if (right_valid_data && !right_valid_data[(*input)[i]]) { + res[i] = false; + valid_res[i] = false; + } + } + processed_size += size; + return processed_size; + } + } + template int64_t ProcessBothDataChunksForSingleChunk(FUNC func, @@ -239,7 +337,12 @@ class PhyCompareFilterExpr : public Expr { const T* left_data = left_chunk.data() + data_pos; const U* right_data = right_chunk.data() + data_pos; - func(left_data, right_data, size, res + processed_size, values...); + func(left_data, + right_data, + nullptr, + size, + res + processed_size, + values...); const bool* left_valid_data = left_chunk.valid_data(); const bool* right_valid_data = right_chunk.valid_data(); // mask with valid_data @@ -307,7 +410,12 @@ class PhyCompareFilterExpr : public Expr { const T* left_data = left_chunk.data() + data_pos; const U* right_data = right_chunk.data() + data_pos; - func(left_data, right_data, size, res + processed_size, values...); + func(left_data, + right_data, + nullptr, + size, + res + processed_size, + values...); const bool* left_valid_data = left_chunk.valid_data(); const bool* right_valid_data = right_chunk.valid_data(); // mask with valid_data @@ -336,21 +444,21 @@ class PhyCompareFilterExpr : public Expr { template VectorPtr - ExecCompareExprDispatcher(OpType op); + ExecCompareExprDispatcher(OpType op, OffsetVector* input = nullptr); VectorPtr - ExecCompareExprDispatcherForHybridSegment(); + ExecCompareExprDispatcherForHybridSegment(OffsetVector* input = nullptr); VectorPtr - ExecCompareExprDispatcherForBothDataSegment(); + ExecCompareExprDispatcherForBothDataSegment(OffsetVector* input = nullptr); template VectorPtr - ExecCompareLeftType(); + ExecCompareLeftType(OffsetVector* input = nullptr); template VectorPtr - ExecCompareRightType(); + ExecCompareRightType(OffsetVector* input = nullptr); private: const FieldId left_field_; diff --git a/internal/core/src/exec/expression/EvalCtx.h b/internal/core/src/exec/expression/EvalCtx.h index c7cac949694ac..82c5b7a18ff51 100644 --- a/internal/core/src/exec/expression/EvalCtx.h +++ b/internal/core/src/exec/expression/EvalCtx.h @@ -28,17 +28,26 @@ namespace milvus { namespace exec { class ExprSet; + +using OffsetVector = FixedVector; class EvalCtx { public: - EvalCtx(ExecContext* exec_ctx, ExprSet* expr_set, RowVector* row) - : exec_ctx_(exec_ctx), expr_set_(expr_set), row_(row) { + EvalCtx(ExecContext* exec_ctx, + ExprSet* expr_set, + OffsetVector* offset_input) + : exec_ctx_(exec_ctx), + expr_set_(expr_set), + offset_input_(offset_input) { assert(exec_ctx_ != nullptr); assert(expr_set_ != nullptr); - // assert(row_ != nullptr); + } + + explicit EvalCtx(ExecContext* exec_ctx, ExprSet* expr_set) + : exec_ctx_(exec_ctx), expr_set_(expr_set), offset_input_(nullptr) { } explicit EvalCtx(ExecContext* exec_ctx) - : exec_ctx_(exec_ctx), expr_set_(nullptr), row_(nullptr) { + : exec_ctx_(exec_ctx), expr_set_(nullptr), offset_input_(nullptr) { } ExecContext* @@ -51,11 +60,22 @@ class EvalCtx { return exec_ctx_->get_query_config(); } + inline OffsetVector* + get_offset_input() { + return offset_input_; + } + + inline void + set_offset_input(OffsetVector* offset_input) { + offset_input_ = offset_input; + } + private: - ExecContext* exec_ctx_; - ExprSet* expr_set_; - RowVector* row_; - bool input_no_nulls_; + ExecContext* exec_ctx_ = nullptr; + ExprSet* expr_set_ = nullptr; + // we may accept offsets(int64 array) as input and do expr filtering on these data + OffsetVector* offset_input_ = nullptr; + bool input_no_nulls_ = false; }; } // namespace exec diff --git a/internal/core/src/exec/expression/ExistsExpr.cpp b/internal/core/src/exec/expression/ExistsExpr.cpp index c73b4e007dc38..ff83b8f8a202d 100644 --- a/internal/core/src/exec/expression/ExistsExpr.cpp +++ b/internal/core/src/exec/expression/ExistsExpr.cpp @@ -22,13 +22,15 @@ namespace exec { void PhyExistsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { + auto input = context.get_offset_input(); + SetHasOffsetInput((input != nullptr)); switch (expr_->column_.data_type_) { case DataType::JSON: { if (is_index_mode_) { PanicInfo(ExprInvalid, "exists expr for json index mode not supported"); } - result = EvalJsonExistsForDataSegment(); + result = EvalJsonExistsForDataSegment(input); break; } default: @@ -39,8 +41,9 @@ PhyExistsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { } VectorPtr -PhyExistsFilterExpr::EvalJsonExistsForDataSegment() { - auto real_batch_size = GetNextBatchSize(); +PhyExistsFilterExpr::EvalJsonExistsForDataSegment(OffsetVector* input) { + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -51,23 +54,39 @@ PhyExistsFilterExpr::EvalJsonExistsForDataSegment() { valid_res.set(); auto pointer = milvus::Json::pointer(expr_->column_.nested_path_); - auto execute_sub_batch = [](const milvus::Json* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - const std::string& pointer) { + auto execute_sub_batch = []( + const milvus::Json* data, + const bool* valid_data, + const int64_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + const std::string& pointer) { for (int i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::post) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } - res[i] = data[i].exist(pointer); + res[i] = data[offset].exist(pointer); } }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer); + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + pointer); + } else { + processed_size = ProcessDataChunks( + execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", diff --git a/internal/core/src/exec/expression/ExistsExpr.h b/internal/core/src/exec/expression/ExistsExpr.h index 2b24108531575..dc00f883c7400 100644 --- a/internal/core/src/exec/expression/ExistsExpr.h +++ b/internal/core/src/exec/expression/ExistsExpr.h @@ -57,7 +57,7 @@ class PhyExistsFilterExpr : public SegmentExpr { private: VectorPtr - EvalJsonExistsForDataSegment(); + EvalJsonExistsForDataSegment(OffsetVector* input = nullptr); private: std::shared_ptr expr_; diff --git a/internal/core/src/exec/expression/Expr.h b/internal/core/src/exec/expression/Expr.h index 195e3777a9ebc..b1f19a4be927c 100644 --- a/internal/core/src/exec/expression/Expr.h +++ b/internal/core/src/exec/expression/Expr.h @@ -31,6 +31,13 @@ namespace milvus { namespace exec { +enum class FilterType { + pre = + 0, // pre filter takes whole segment as input and perform scalar filtering at whole data + post = + 1 // post filter takes offsets as input and perform scalar filtering at these offsets +}; + class Expr { public: Expr(DataType type, @@ -73,12 +80,21 @@ class Expr { MoveCursor() { } + void + SetHasOffsetInput(bool has_offset_input) { + has_offset_input_ = has_offset_input; + } + protected: DataType type_; const std::vector> inputs_; std::string name_; // NOTE: unused std::shared_ptr vector_func_; + + // whether we have offset input and do expr filtering on these data + // default is false which means we will do expr filtering on the total segment data + bool has_offset_input_ = false; }; using ExprPtr = std::shared_ptr; @@ -204,13 +220,16 @@ class SegmentExpr : public Expr { void MoveCursor() override { - if (is_index_mode_) { - MoveCursorForIndex(); - if (segment_->HasFieldData(field_id_)) { + // when we specify input, do not maintain states + if (!has_offset_input_) { + if (is_index_mode_) { + MoveCursorForIndex(); + if (segment_->HasFieldData(field_id_)) { + MoveCursorForData(); + } + } else { MoveCursorForData(); } - } else { - MoveCursorForData(); } } @@ -261,6 +280,7 @@ class SegmentExpr : public Expr { // use valid_data to see if raw data is null func(views_info.first.data(), views_info.second.data(), + nullptr, need_size, res, valid_res, @@ -270,6 +290,229 @@ class SegmentExpr : public Expr { return need_size; } + // accept offsets array and process on the scalar data by offsets + // stateless! Just check and set bitset as result, does not need to move cursor + // used for processing raw data expr for sealed segments. + // now only used for std::string_view && json + // TODO: support more types + template + int64_t + ProcessDataByOffsetsForSealedSeg( + FUNC func, + std::function skip_func, + OffsetVector* input, + TargetBitmapView res, + TargetBitmapView valid_res, + ValTypes... values) { + // For non_chunked sealed segment, only single chunk + Assert(num_data_chunk_ == 1); + + auto& skip_index = segment_->GetSkipIndex(); + if (!skip_func || !skip_func(skip_index, field_id_, 0)) { + auto [data_vec, valid_data] = + segment_->get_views_by_offsets(field_id_, 0, *input); + func(data_vec.data(), + valid_data.data(), + nullptr, + input->size(), + res, + valid_res, + values...); + } + return input->size(); + } + + template + VectorPtr + ProcessIndexChunksByOffsets(FUNC func, + OffsetVector* input, + ValTypes... values) { + AssertInfo(num_index_chunk_ == 1, "scalar index chunk num must be 1"); + typedef std:: + conditional_t, std::string, T> + IndexInnerType; + using Index = index::ScalarIndex; + TargetBitmap valid_res(input->size()); + + const Index& index = + segment_->chunk_scalar_index(field_id_, 0); + auto* index_ptr = const_cast(&index); + auto valid_result = index_ptr->IsNotNull(); + for (auto i = 0; i < input->size(); ++i) { + valid_res[i] = valid_result[(*input)[i]]; + } + auto result = std::move(func.template operator()( + index_ptr, values..., input->data())); + return std::make_shared(std::move(result), + std::move(valid_res)); + } + + // when we have scalar index and index contains raw data, could go with index chunk by offsets + template + int64_t + ProcessIndexLookupByOffsets( + FUNC func, + std::function skip_func, + OffsetVector* input, + TargetBitmapView res, + TargetBitmapView valid_res, + ValTypes... values) { + AssertInfo(num_index_chunk_ == 1, "scalar index chunk num must be 1"); + auto& skip_index = segment_->GetSkipIndex(); + + typedef std:: + conditional_t, std::string, T> + IndexInnerType; + using Index = index::ScalarIndex; + int64_t processed_size = 0; + const Index& index = + segment_->chunk_scalar_index(field_id_, 0); + auto* index_ptr = const_cast(&index); + auto valid_result = index_ptr->IsNotNull(); + auto batch_size = input->size(); + + if (!skip_func || !skip_func(skip_index, field_id_, 0)) { + for (auto i = 0; i < batch_size; ++i) { + auto offset = (*input)[i]; + auto raw = index_ptr->Reverse_Lookup(offset); + if (!raw.has_value()) { + res[i] = false; + continue; + } + T raw_data = raw.value(); + bool valid_data = valid_result[offset]; + func.template operator()(&raw_data, + &valid_data, + nullptr, + 1, + res + i, + valid_res + i, + values...); + } + } + + return batch_size; + } + + // accept offsets array and process on the scalar data by offsets + // stateless! Just check and set bitset as result, does not need to move cursor + template + int64_t + ProcessDataByOffsets( + FUNC func, + std::function skip_func, + OffsetVector* input, + TargetBitmapView res, + TargetBitmapView valid_res, + ValTypes... values) { + int64_t processed_size = 0; + + if (is_index_mode_ && num_data_chunk_ == 0) { + return ProcessIndexLookupByOffsets( + func, skip_func, input, res, valid_res, values...); + } + + auto& skip_index = segment_->GetSkipIndex(); + + // sealed segment + if (segment_->type() == SegmentType::Sealed) { + if (segment_->is_chunked()) { + if constexpr (std::is_same_v || + std::is_same_v) { + for (size_t i = 0; i < input->size(); ++i) { + int64_t offset = (*input)[i]; + auto [chunk_id, chunk_offset] = + segment_->get_chunk_by_offset(field_id_, offset); + if (!skip_func || + !skip_func(skip_index, field_id_, chunk_id)) { + auto [data_vec, valid_data] = + segment_->get_views_by_offsets( + field_id_, chunk_id, {chunk_offset}); + func.template operator()( + data_vec.data(), + valid_data.data(), + nullptr, + 1, + res + processed_size, + valid_res + processed_size, + values...); + processed_size++; + } + } + return input->size(); + } + for (size_t i = 0; i < input->size(); ++i) { + int64_t offset = (*input)[i]; + auto [chunk_id, chunk_offset] = + segment_->get_chunk_by_offset(field_id_, offset); + if (!skip_func || + !skip_func(skip_index, field_id_, chunk_id)) { + auto chunk = + segment_->chunk_data(field_id_, chunk_id); + const T* data = chunk.data() + chunk_offset; + const bool* valid_data = chunk.valid_data(); + if (valid_data != nullptr) { + valid_data += chunk_offset; + } + func.template operator()( + data, + valid_data, + nullptr, + 1, + res + processed_size, + valid_res + processed_size, + values...); + processed_size++; + } + } + } else { + if constexpr (std::is_same_v || + std::is_same_v) { + return ProcessDataByOffsetsForSealedSeg( + func, skip_func, input, res, valid_res, values...); + } + if (!skip_func || !skip_func(skip_index, field_id_, 0)) { + auto chunk = segment_->chunk_data(field_id_, 0); + const T* data = chunk.data(); + const bool* valid_data = chunk.valid_data(); + func.template operator()(data, + valid_data, + input->data(), + input->size(), + res, + valid_res, + values...); + return input->size(); + } + } + } else { + // growing segment + for (size_t i = 0; i < input->size(); ++i) { + int64_t offset = (*input)[i]; + auto chunk_id = offset / size_per_chunk_; + auto chunk_offset = offset % size_per_chunk_; + if (!skip_func || !skip_func(skip_index, field_id_, chunk_id)) { + auto chunk = segment_->chunk_data(field_id_, chunk_id); + const T* data = chunk.data() + chunk_offset; + const bool* valid_data = chunk.valid_data(); + if (valid_data != nullptr) { + valid_data += chunk_offset; + } + func.template operator()( + data, + valid_data, + nullptr, + 1, + res + processed_size, + valid_res + processed_size, + values...); + processed_size++; + } + } + } + return input->size(); + } + template int64_t ProcessDataChunksForSingleChunk( @@ -312,6 +555,7 @@ class SegmentExpr : public Expr { } func(data, valid_data, + nullptr, size, res + processed_size, valid_res + processed_size, @@ -363,16 +607,12 @@ class SegmentExpr : public Expr { if (segment_->type() == SegmentType::Sealed) { // first is the raw data, second is valid_data // use valid_data to see if raw data is null - auto data_vec = segment_ - ->get_batch_views( - field_id_, i, data_pos, size) - .first; - auto valid_data = segment_ - ->get_batch_views( - field_id_, i, data_pos, size) - .second; + auto [data_vec, valid_data] = + segment_->get_batch_views( + field_id_, i, data_pos, size); func(data_vec.data(), valid_data.data(), + nullptr, size, res + processed_size, valid_res + processed_size, @@ -389,6 +629,7 @@ class SegmentExpr : public Expr { } func(data, valid_data, + nullptr, size, res + processed_size, valid_res + processed_size, @@ -413,13 +654,14 @@ class SegmentExpr : public Expr { FUNC func, std::function skip_func, TargetBitmapView res, + TargetBitmapView valid_res, ValTypes... values) { if (segment_->is_chunked()) { return ProcessDataChunksForMultipleChunk( - func, skip_func, res, values...); + func, skip_func, res, valid_res, values...); } else { return ProcessDataChunksForSingleChunk( - func, skip_func, res, values...); + func, skip_func, res, valid_res, values...); } } @@ -500,6 +742,51 @@ class SegmentExpr : public Expr { } } + template + TargetBitmap + ProcessChunksForValidByOffsets(bool use_index, const OffsetVector& input) { + typedef std:: + conditional_t, std::string, T> + IndexInnerType; + using Index = index::ScalarIndex; + auto batch_size = input.size(); + TargetBitmap valid_result(batch_size); + valid_result.set(); + + if (use_index) { + const Index& index = + segment_->chunk_scalar_index(field_id_, 0); + auto* index_ptr = const_cast(&index); + const auto& res = index_ptr->IsNotNull(); + for (auto i = 0; i < batch_size; ++i) { + valid_result[i] = res[input[i]]; + } + } else { + for (auto i = 0; i < batch_size; ++i) { + auto offset = input[i]; + auto [chunk_id, + chunk_offset] = [&]() -> std::pair { + if (segment_->type() == SegmentType::Growing) { + return {offset / size_per_chunk_, + offset % size_per_chunk_}; + } else if (segment_->is_chunked()) { + return segment_->get_chunk_by_offset(field_id_, offset); + } else { + return {0, offset}; + } + }(); + auto chunk = segment_->chunk_data(field_id_, chunk_id); + const bool* valid_data = chunk.valid_data(); + if (valid_data != nullptr) { + valid_result[i] = valid_data[chunk_offset]; + } else { + break; + } + } + } + return valid_result; + } + template TargetBitmap ProcessDataChunksForValid() { @@ -531,9 +818,9 @@ class SegmentExpr : public Expr { return valid_result; } valid_data += data_pos; - for (int i = 0; i < size; i++) { - if (!valid_data[i]) { - valid_result[i + data_pos] = false; + for (int j = 0; j < size; j++) { + if (!valid_data[j]) { + valid_result[j + processed_size] = false; } } processed_size += size; diff --git a/internal/core/src/exec/expression/JsonContainsExpr.cpp b/internal/core/src/exec/expression/JsonContainsExpr.cpp index b21714b4c8b6b..d2dfe9a4e191e 100644 --- a/internal/core/src/exec/expression/JsonContainsExpr.cpp +++ b/internal/core/src/exec/expression/JsonContainsExpr.cpp @@ -23,22 +23,24 @@ namespace exec { void PhyJsonContainsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { + auto input = context.get_offset_input(); + SetHasOffsetInput((input != nullptr)); switch (expr_->column_.data_type_) { case DataType::ARRAY: { - if (is_index_mode_) { + if (is_index_mode_ && !has_offset_input_) { result = EvalArrayContainsForIndexSegment(); } else { - result = EvalJsonContainsForDataSegment(); + result = EvalJsonContainsForDataSegment(input); } break; } case DataType::JSON: { - if (is_index_mode_) { + if (is_index_mode_ && !has_offset_input_) { PanicInfo( ExprInvalid, "exists expr for json or array index mode not supported"); } - result = EvalJsonContainsForDataSegment(); + result = EvalJsonContainsForDataSegment(input); break; } default: @@ -49,7 +51,7 @@ PhyJsonContainsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { } VectorPtr -PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment() { +PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment(OffsetVector* input) { auto data_type = expr_->column_.data_type_; switch (expr_->op_) { case proto::plan::JSONContainsExpr_JSONOp_Contains: @@ -58,16 +60,16 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment() { auto val_type = expr_->vals_[0].val_case(); switch (val_type) { case proto::plan::GenericValue::kBoolVal: { - return ExecArrayContains(); + return ExecArrayContains(input); } case proto::plan::GenericValue::kInt64Val: { - return ExecArrayContains(); + return ExecArrayContains(input); } case proto::plan::GenericValue::kFloatVal: { - return ExecArrayContains(); + return ExecArrayContains(input); } case proto::plan::GenericValue::kStringVal: { - return ExecArrayContains(); + return ExecArrayContains(input); } default: PanicInfo( @@ -79,19 +81,19 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment() { auto val_type = expr_->vals_[0].val_case(); switch (val_type) { case proto::plan::GenericValue::kBoolVal: { - return ExecJsonContains(); + return ExecJsonContains(input); } case proto::plan::GenericValue::kInt64Val: { - return ExecJsonContains(); + return ExecJsonContains(input); } case proto::plan::GenericValue::kFloatVal: { - return ExecJsonContains(); + return ExecJsonContains(input); } case proto::plan::GenericValue::kStringVal: { - return ExecJsonContains(); + return ExecJsonContains(input); } case proto::plan::GenericValue::kArrayVal: { - return ExecJsonContainsArray(); + return ExecJsonContainsArray(input); } default: PanicInfo(DataTypeInvalid, @@ -99,7 +101,7 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment() { val_type); } } else { - return ExecJsonContainsWithDiffType(); + return ExecJsonContainsWithDiffType(input); } } } @@ -108,16 +110,16 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment() { auto val_type = expr_->vals_[0].val_case(); switch (val_type) { case proto::plan::GenericValue::kBoolVal: { - return ExecArrayContainsAll(); + return ExecArrayContainsAll(input); } case proto::plan::GenericValue::kInt64Val: { - return ExecArrayContainsAll(); + return ExecArrayContainsAll(input); } case proto::plan::GenericValue::kFloatVal: { - return ExecArrayContainsAll(); + return ExecArrayContainsAll(input); } case proto::plan::GenericValue::kStringVal: { - return ExecArrayContainsAll(); + return ExecArrayContainsAll(input); } default: PanicInfo( @@ -129,19 +131,19 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment() { auto val_type = expr_->vals_[0].val_case(); switch (val_type) { case proto::plan::GenericValue::kBoolVal: { - return ExecJsonContainsAll(); + return ExecJsonContainsAll(input); } case proto::plan::GenericValue::kInt64Val: { - return ExecJsonContainsAll(); + return ExecJsonContainsAll(input); } case proto::plan::GenericValue::kFloatVal: { - return ExecJsonContainsAll(); + return ExecJsonContainsAll(input); } case proto::plan::GenericValue::kStringVal: { - return ExecJsonContainsAll(); + return ExecJsonContainsAll(input); } case proto::plan::GenericValue::kArrayVal: { - return ExecJsonContainsAllArray(); + return ExecJsonContainsAllArray(input); } default: PanicInfo(DataTypeInvalid, @@ -149,7 +151,7 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment() { val_type); } } else { - return ExecJsonContainsAllWithDiffType(); + return ExecJsonContainsAllWithDiffType(input); } } } @@ -162,12 +164,13 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment() { template VectorPtr -PhyJsonContainsFilterExpr::ExecArrayContains() { +PhyJsonContainsFilterExpr::ExecArrayContains(OffsetVector* input) { using GetType = std::conditional_t, std::string_view, ExprValueType>; - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -184,12 +187,14 @@ PhyJsonContainsFilterExpr::ExecArrayContains() { for (auto const& element : expr_->vals_) { elements.insert(GetValueFromProto(element)); } - auto execute_sub_batch = [](const milvus::ArrayView* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - const std::unordered_set& elements) { + auto execute_sub_batch = []( + const milvus::ArrayView* data, + const bool* valid_data, + const int64_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + const std::unordered_set& elements) { auto executor = [&](size_t i) { const auto& array = data[i]; for (int j = 0; j < array.length(); ++j) { @@ -200,16 +205,31 @@ PhyJsonContainsFilterExpr::ExecArrayContains() { return false; }; for (int i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::post) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } - res[i] = executor(i); + res[i] = executor(offset); } }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, elements); + int64_t processed_size; + if (has_offset_input_) { + processed_size = + ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + elements); + } else { + processed_size = ProcessDataChunks( + execute_sub_batch, std::nullptr_t{}, res, valid_res, elements); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -220,12 +240,13 @@ PhyJsonContainsFilterExpr::ExecArrayContains() { template VectorPtr -PhyJsonContainsFilterExpr::ExecJsonContains() { +PhyJsonContainsFilterExpr::ExecJsonContains(OffsetVector* input) { using GetType = std::conditional_t, std::string_view, ExprValueType>; - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -241,13 +262,15 @@ PhyJsonContainsFilterExpr::ExecJsonContains() { for (auto const& element : expr_->vals_) { elements.insert(GetValueFromProto(element)); } - auto execute_sub_batch = [](const milvus::Json* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - const std::string& pointer, - const std::unordered_set& elements) { + auto execute_sub_batch = []( + const milvus::Json* data, + const bool* valid_data, + const int64_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + const std::string& pointer, + const std::unordered_set& elements) { auto executor = [&](size_t i) { auto doc = data[i].doc(); auto array = doc.at_pointer(pointer).get_array(); @@ -266,16 +289,35 @@ PhyJsonContainsFilterExpr::ExecJsonContains() { return false; }; for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::post) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } - res[i] = executor(i); + res[i] = executor(offset); } }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, elements); + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + pointer, + elements); + } else { + processed_size = ProcessDataChunks(execute_sub_batch, + std::nullptr_t{}, + res, + valid_res, + pointer, + elements); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -285,8 +327,9 @@ PhyJsonContainsFilterExpr::ExecJsonContains() { } VectorPtr -PhyJsonContainsFilterExpr::ExecJsonContainsArray() { - auto real_batch_size = GetNextBatchSize(); +PhyJsonContainsFilterExpr::ExecJsonContainsArray(OffsetVector* input) { + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -302,51 +345,71 @@ PhyJsonContainsFilterExpr::ExecJsonContainsArray() { for (auto const& element : expr_->vals_) { elements.emplace_back(GetValueFromProto(element)); } - auto execute_sub_batch = - [](const milvus::Json* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - const std::string& pointer, - const std::vector& elements) { - auto executor = [&](size_t i) -> bool { - auto doc = data[i].doc(); - auto array = doc.at_pointer(pointer).get_array(); - if (array.error()) { - return false; - } - for (auto&& it : array) { - auto val = it.get_array(); - if (val.error()) { - continue; - } - std::vector< - simdjson::simdjson_result> - json_array; - json_array.reserve(val.count_elements()); - for (auto&& e : val) { - json_array.emplace_back(e); - } - for (auto const& element : elements) { - if (CompareTwoJsonArray(json_array, element)) { - return true; - } - } - } + auto execute_sub_batch = []( + const milvus::Json* data, + const bool* valid_data, + const int64_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + const std::string& pointer, + const std::vector& elements) { + auto executor = [&](size_t i) -> bool { + auto doc = data[i].doc(); + auto array = doc.at_pointer(pointer).get_array(); + if (array.error()) { return false; - }; - for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { - res[i] = valid_res[i] = false; + } + for (auto&& it : array) { + auto val = it.get_array(); + if (val.error()) { continue; } - res[i] = executor(i); + std::vector< + simdjson::simdjson_result> + json_array; + json_array.reserve(val.count_elements()); + for (auto&& e : val) { + json_array.emplace_back(e); + } + for (auto const& element : elements) { + if (CompareTwoJsonArray(json_array, element)) { + return true; + } + } } + return false; }; + for (size_t i = 0; i < size; ++i) { + auto offset = i; + if constexpr (filter_type == FilterType::post) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { + res[i] = valid_res[i] = false; + continue; + } + res[i] = executor(offset); + } + }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, elements); + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + pointer, + elements); + } else { + processed_size = ProcessDataChunks(execute_sub_batch, + std::nullptr_t{}, + res, + valid_res, + pointer, + elements); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -357,14 +420,15 @@ PhyJsonContainsFilterExpr::ExecJsonContainsArray() { template VectorPtr -PhyJsonContainsFilterExpr::ExecArrayContainsAll() { +PhyJsonContainsFilterExpr::ExecArrayContainsAll(OffsetVector* input) { using GetType = std::conditional_t, std::string_view, ExprValueType>; AssertInfo(expr_->column_.nested_path_.size() == 0, "[ExecArrayContainsAll]nested path must be null"); - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -380,12 +444,14 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll() { elements.insert(GetValueFromProto(element)); } - auto execute_sub_batch = [](const milvus::ArrayView* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - const std::unordered_set& elements) { + auto execute_sub_batch = []( + const milvus::ArrayView* data, + const bool* valid_data, + const int64_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + const std::unordered_set& elements) { auto executor = [&](size_t i) { std::unordered_set tmp_elements(elements); // Note: array can only be iterated once @@ -398,16 +464,31 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll() { return tmp_elements.size() == 0; }; for (int i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::post) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } - res[i] = executor(i); + res[i] = executor(offset); } }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, elements); + int64_t processed_size; + if (has_offset_input_) { + processed_size = + ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + elements); + } else { + processed_size = ProcessDataChunks( + execute_sub_batch, std::nullptr_t{}, res, valid_res, elements); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -418,12 +499,13 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll() { template VectorPtr -PhyJsonContainsFilterExpr::ExecJsonContainsAll() { +PhyJsonContainsFilterExpr::ExecJsonContainsAll(OffsetVector* input) { using GetType = std::conditional_t, std::string_view, ExprValueType>; - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -440,13 +522,15 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll() { elements.insert(GetValueFromProto(element)); } - auto execute_sub_batch = [](const milvus::Json* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - const std::string& pointer, - const std::unordered_set& elements) { + auto execute_sub_batch = []( + const milvus::Json* data, + const bool* valid_data, + const int64_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + const std::string& pointer, + const std::unordered_set& elements) { auto executor = [&](const size_t i) -> bool { auto doc = data[i].doc(); auto array = doc.at_pointer(pointer).get_array(); @@ -468,16 +552,35 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll() { return tmp_elements.size() == 0; }; for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::post) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } - res[i] = executor(i); + res[i] = executor(offset); } }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, elements); + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + pointer, + elements); + } else { + processed_size = ProcessDataChunks(execute_sub_batch, + std::nullptr_t{}, + res, + valid_res, + pointer, + elements); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -487,8 +590,10 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll() { } VectorPtr -PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType() { - auto real_batch_size = GetNextBatchSize(); +PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType( + OffsetVector* input) { + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -508,111 +613,126 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType() { i++; } - auto execute_sub_batch = - [](const milvus::Json* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - const std::string& pointer, - const std::vector& elements, - const std::unordered_set elements_index) { - auto executor = [&](size_t i) -> bool { - const auto& json = data[i]; - auto doc = json.doc(); - auto array = doc.at_pointer(pointer).get_array(); - if (array.error()) { - return false; - } - std::unordered_set tmp_elements_index(elements_index); - for (auto&& it : array) { - int i = -1; - for (auto& element : elements) { - i++; - switch (element.val_case()) { - case proto::plan::GenericValue::kBoolVal: { - auto val = it.template get(); - if (val.error()) { - continue; - } - if (val.value() == element.bool_val()) { - tmp_elements_index.erase(i); - } - break; + auto execute_sub_batch = []( + const milvus::Json* data, + const bool* valid_data, + const int64_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + const std::string& pointer, + const std::vector& elements, + const std::unordered_set elements_index) { + auto executor = [&](size_t i) -> bool { + const auto& json = data[i]; + auto doc = json.doc(); + auto array = doc.at_pointer(pointer).get_array(); + if (array.error()) { + return false; + } + std::unordered_set tmp_elements_index(elements_index); + for (auto&& it : array) { + int i = -1; + for (auto& element : elements) { + i++; + switch (element.val_case()) { + case proto::plan::GenericValue::kBoolVal: { + auto val = it.template get(); + if (val.error()) { + continue; + } + if (val.value() == element.bool_val()) { + tmp_elements_index.erase(i); + } + break; + } + case proto::plan::GenericValue::kInt64Val: { + auto val = it.template get(); + if (val.error()) { + continue; } - case proto::plan::GenericValue::kInt64Val: { - auto val = it.template get(); - if (val.error()) { - continue; - } - if (val.value() == element.int64_val()) { - tmp_elements_index.erase(i); - } - break; + if (val.value() == element.int64_val()) { + tmp_elements_index.erase(i); } - case proto::plan::GenericValue::kFloatVal: { - auto val = it.template get(); - if (val.error()) { - continue; - } - if (val.value() == element.float_val()) { - tmp_elements_index.erase(i); - } - break; + break; + } + case proto::plan::GenericValue::kFloatVal: { + auto val = it.template get(); + if (val.error()) { + continue; } - case proto::plan::GenericValue::kStringVal: { - auto val = it.template get(); - if (val.error()) { - continue; - } - if (val.value() == element.string_val()) { - tmp_elements_index.erase(i); - } - break; + if (val.value() == element.float_val()) { + tmp_elements_index.erase(i); } - case proto::plan::GenericValue::kArrayVal: { - auto val = it.get_array(); - if (val.error()) { - continue; - } - if (CompareTwoJsonArray(val, - element.array_val())) { - tmp_elements_index.erase(i); - } - break; + break; + } + case proto::plan::GenericValue::kStringVal: { + auto val = it.template get(); + if (val.error()) { + continue; } - default: - PanicInfo( - DataTypeInvalid, - fmt::format("unsupported data type {}", - element.val_case())); + if (val.value() == element.string_val()) { + tmp_elements_index.erase(i); + } + break; } - if (tmp_elements_index.size() == 0) { - return true; + case proto::plan::GenericValue::kArrayVal: { + auto val = it.get_array(); + if (val.error()) { + continue; + } + if (CompareTwoJsonArray(val, element.array_val())) { + tmp_elements_index.erase(i); + } + break; } + default: + PanicInfo(DataTypeInvalid, + fmt::format("unsupported data type {}", + element.val_case())); } if (tmp_elements_index.size() == 0) { return true; } } - return tmp_elements_index.size() == 0; - }; - for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { - res[i] = valid_res[i] = false; - continue; + if (tmp_elements_index.size() == 0) { + return true; } - res[i] = executor(i); } + return tmp_elements_index.size() == 0; }; + for (size_t i = 0; i < size; ++i) { + auto offset = i; + if constexpr (filter_type == FilterType::post) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { + res[i] = valid_res[i] = false; + continue; + } + res[i] = executor(offset); + } + }; - int64_t processed_size = ProcessDataChunks(execute_sub_batch, - std::nullptr_t{}, - res, - valid_res, - pointer, - elements, - elements_index); + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + pointer, + elements, + elements_index); + } else { + processed_size = ProcessDataChunks(execute_sub_batch, + std::nullptr_t{}, + res, + valid_res, + pointer, + elements, + elements_index); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -622,8 +742,9 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType() { } VectorPtr -PhyJsonContainsFilterExpr::ExecJsonContainsAllArray() { - auto real_batch_size = GetNextBatchSize(); +PhyJsonContainsFilterExpr::ExecJsonContainsAllArray(OffsetVector* input) { + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -640,55 +761,75 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllArray() { for (auto const& element : expr_->vals_) { elements.emplace_back(GetValueFromProto(element)); } - auto execute_sub_batch = - [](const milvus::Json* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - const std::string& pointer, - const std::vector& elements) { - auto executor = [&](const size_t i) { - auto doc = data[i].doc(); - auto array = doc.at_pointer(pointer).get_array(); - if (array.error()) { - return false; + auto execute_sub_batch = []( + const milvus::Json* data, + const bool* valid_data, + const int64_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + const std::string& pointer, + const std::vector& elements) { + auto executor = [&](const size_t i) { + auto doc = data[i].doc(); + auto array = doc.at_pointer(pointer).get_array(); + if (array.error()) { + return false; + } + std::unordered_set exist_elements_index; + for (auto&& it : array) { + auto val = it.get_array(); + if (val.error()) { + continue; } - std::unordered_set exist_elements_index; - for (auto&& it : array) { - auto val = it.get_array(); - if (val.error()) { - continue; - } - std::vector< - simdjson::simdjson_result> - json_array; - json_array.reserve(val.count_elements()); - for (auto&& e : val) { - json_array.emplace_back(e); - } - for (int index = 0; index < elements.size(); ++index) { - if (CompareTwoJsonArray(json_array, elements[index])) { - exist_elements_index.insert(index); - } - } - if (exist_elements_index.size() == elements.size()) { - return true; + std::vector< + simdjson::simdjson_result> + json_array; + json_array.reserve(val.count_elements()); + for (auto&& e : val) { + json_array.emplace_back(e); + } + for (int index = 0; index < elements.size(); ++index) { + if (CompareTwoJsonArray(json_array, elements[index])) { + exist_elements_index.insert(index); } } - return exist_elements_index.size() == elements.size(); - }; - for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { - res[i] = valid_res[i] = false; - continue; + if (exist_elements_index.size() == elements.size()) { + return true; } - res[i] = executor(i); } + return exist_elements_index.size() == elements.size(); }; + for (size_t i = 0; i < size; ++i) { + auto offset = i; + if constexpr (filter_type == FilterType::post) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { + res[i] = valid_res[i] = false; + continue; + } + res[i] = executor(offset); + } + }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, elements); + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + pointer, + elements); + } else { + processed_size = ProcessDataChunks(execute_sub_batch, + std::nullptr_t{}, + res, + valid_res, + pointer, + elements); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -698,8 +839,9 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllArray() { } VectorPtr -PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType() { - auto real_batch_size = GetNextBatchSize(); +PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType(OffsetVector* input) { + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -720,97 +862,115 @@ PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType() { i++; } - auto execute_sub_batch = - [](const milvus::Json* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - const std::string& pointer, - const std::vector& elements) { - auto executor = [&](const size_t i) { - auto& json = data[i]; - auto doc = json.doc(); - auto array = doc.at_pointer(pointer).get_array(); - if (array.error()) { - return false; - } - // Note: array can only be iterated once - for (auto&& it : array) { - for (auto const& element : elements) { - switch (element.val_case()) { - case proto::plan::GenericValue::kBoolVal: { - auto val = it.template get(); - if (val.error()) { - continue; - } - if (val.value() == element.bool_val()) { - return true; - } - break; + auto execute_sub_batch = []( + const milvus::Json* data, + const bool* valid_data, + const int64_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + const std::string& pointer, + const std::vector& elements) { + auto executor = [&](const size_t i) { + auto& json = data[i]; + auto doc = json.doc(); + auto array = doc.at_pointer(pointer).get_array(); + if (array.error()) { + return false; + } + // Note: array can only be iterated once + for (auto&& it : array) { + for (auto const& element : elements) { + switch (element.val_case()) { + case proto::plan::GenericValue::kBoolVal: { + auto val = it.template get(); + if (val.error()) { + continue; } - case proto::plan::GenericValue::kInt64Val: { - auto val = it.template get(); - if (val.error()) { - continue; - } - if (val.value() == element.int64_val()) { - return true; - } - break; + if (val.value() == element.bool_val()) { + return true; } - case proto::plan::GenericValue::kFloatVal: { - auto val = it.template get(); - if (val.error()) { - continue; - } - if (val.value() == element.float_val()) { - return true; - } - break; + break; + } + case proto::plan::GenericValue::kInt64Val: { + auto val = it.template get(); + if (val.error()) { + continue; } - case proto::plan::GenericValue::kStringVal: { - auto val = it.template get(); - if (val.error()) { - continue; - } - if (val.value() == element.string_val()) { - return true; - } - break; + if (val.value() == element.int64_val()) { + return true; } - case proto::plan::GenericValue::kArrayVal: { - auto val = it.get_array(); - if (val.error()) { - continue; - } - if (CompareTwoJsonArray(val, - element.array_val())) { - return true; - } - break; + break; + } + case proto::plan::GenericValue::kFloatVal: { + auto val = it.template get(); + if (val.error()) { + continue; + } + if (val.value() == element.float_val()) { + return true; + } + break; + } + case proto::plan::GenericValue::kStringVal: { + auto val = it.template get(); + if (val.error()) { + continue; + } + if (val.value() == element.string_val()) { + return true; } - default: - PanicInfo( - DataTypeInvalid, - fmt::format("unsupported data type {}", - element.val_case())); + break; } + case proto::plan::GenericValue::kArrayVal: { + auto val = it.get_array(); + if (val.error()) { + continue; + } + if (CompareTwoJsonArray(val, element.array_val())) { + return true; + } + break; + } + default: + PanicInfo(DataTypeInvalid, + fmt::format("unsupported data type {}", + element.val_case())); } } - return false; - }; - for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { - res[i] = valid_res[i] = false; - continue; - } - res[i] = executor(i); } + return false; }; + for (size_t i = 0; i < size; ++i) { + auto offset = i; + if constexpr (filter_type == FilterType::post) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { + res[i] = valid_res[i] = false; + continue; + } + res[i] = executor(offset); + } + }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, elements); + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + pointer, + elements); + } else { + processed_size = ProcessDataChunks(execute_sub_batch, + std::nullptr_t{}, + res, + valid_res, + pointer, + elements); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", diff --git a/internal/core/src/exec/expression/JsonContainsExpr.h b/internal/core/src/exec/expression/JsonContainsExpr.h index a0cfdfdea0841..a0c8848cba188 100644 --- a/internal/core/src/exec/expression/JsonContainsExpr.h +++ b/internal/core/src/exec/expression/JsonContainsExpr.h @@ -50,35 +50,35 @@ class PhyJsonContainsFilterExpr : public SegmentExpr { private: VectorPtr - EvalJsonContainsForDataSegment(); + EvalJsonContainsForDataSegment(OffsetVector* input = nullptr); template VectorPtr - ExecJsonContains(); + ExecJsonContains(OffsetVector* input = nullptr); template VectorPtr - ExecArrayContains(); + ExecArrayContains(OffsetVector* input = nullptr); template VectorPtr - ExecJsonContainsAll(); + ExecJsonContainsAll(OffsetVector* input = nullptr); template VectorPtr - ExecArrayContainsAll(); + ExecArrayContainsAll(OffsetVector* input = nullptr); VectorPtr - ExecJsonContainsArray(); + ExecJsonContainsArray(OffsetVector* input = nullptr); VectorPtr - ExecJsonContainsAllArray(); + ExecJsonContainsAllArray(OffsetVector* input = nullptr); VectorPtr - ExecJsonContainsAllWithDiffType(); + ExecJsonContainsAllWithDiffType(OffsetVector* input = nullptr); VectorPtr - ExecJsonContainsWithDiffType(); + ExecJsonContainsWithDiffType(OffsetVector* input = nullptr); VectorPtr EvalArrayContainsForIndexSegment(); diff --git a/internal/core/src/exec/expression/LogicalBinaryExpr.h b/internal/core/src/exec/expression/LogicalBinaryExpr.h index 43680772fbbf1..e5eb63763183e 100644 --- a/internal/core/src/exec/expression/LogicalBinaryExpr.h +++ b/internal/core/src/exec/expression/LogicalBinaryExpr.h @@ -75,8 +75,10 @@ class PhyLogicalBinaryExpr : public Expr { void MoveCursor() override { - inputs_[0]->MoveCursor(); - inputs_[1]->MoveCursor(); + if (!has_offset_input_) { + inputs_[0]->MoveCursor(); + inputs_[1]->MoveCursor(); + } } private: diff --git a/internal/core/src/exec/expression/LogicalUnaryExpr.h b/internal/core/src/exec/expression/LogicalUnaryExpr.h index da5a0e0c97213..5fdfab90d1fff 100644 --- a/internal/core/src/exec/expression/LogicalUnaryExpr.h +++ b/internal/core/src/exec/expression/LogicalUnaryExpr.h @@ -41,7 +41,9 @@ class PhyLogicalUnaryExpr : public Expr { void MoveCursor() override { - inputs_[0]->MoveCursor(); + if (!has_offset_input_) { + inputs_[0]->MoveCursor(); + } } private: diff --git a/internal/core/src/exec/expression/TermExpr.cpp b/internal/core/src/exec/expression/TermExpr.cpp index 4cf6555cb9d85..406648922895d 100644 --- a/internal/core/src/exec/expression/TermExpr.cpp +++ b/internal/core/src/exec/expression/TermExpr.cpp @@ -23,37 +23,39 @@ namespace exec { void PhyTermFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { - if (is_pk_field_) { + auto input = context.get_offset_input(); + SetHasOffsetInput((input != nullptr)); + if (is_pk_field_ && !has_offset_input_) { result = ExecPkTermImpl(); return; } switch (expr_->column_.data_type_) { case DataType::BOOL: { - result = ExecVisitorImpl(); + result = ExecVisitorImpl(input); break; } case DataType::INT8: { - result = ExecVisitorImpl(); + result = ExecVisitorImpl(input); break; } case DataType::INT16: { - result = ExecVisitorImpl(); + result = ExecVisitorImpl(input); break; } case DataType::INT32: { - result = ExecVisitorImpl(); + result = ExecVisitorImpl(input); break; } case DataType::INT64: { - result = ExecVisitorImpl(); + result = ExecVisitorImpl(input); break; } case DataType::FLOAT: { - result = ExecVisitorImpl(); + result = ExecVisitorImpl(input); break; } case DataType::DOUBLE: { - result = ExecVisitorImpl(); + result = ExecVisitorImpl(input); break; } case DataType::VARCHAR: { @@ -61,30 +63,30 @@ PhyTermFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { !storage::MmapManager::GetInstance() .GetMmapConfig() .growing_enable_mmap) { - result = ExecVisitorImpl(); + result = ExecVisitorImpl(input); } else { - result = ExecVisitorImpl(); + result = ExecVisitorImpl(input); } break; } case DataType::JSON: { if (expr_->vals_.size() == 0) { - result = ExecVisitorImplTemplateJson(); + result = ExecVisitorImplTemplateJson(input); break; } auto type = expr_->vals_[0].val_case(); switch (type) { case proto::plan::GenericValue::ValCase::kBoolVal: - result = ExecVisitorImplTemplateJson(); + result = ExecVisitorImplTemplateJson(input); break; case proto::plan::GenericValue::ValCase::kInt64Val: - result = ExecVisitorImplTemplateJson(); + result = ExecVisitorImplTemplateJson(input); break; case proto::plan::GenericValue::ValCase::kFloatVal: - result = ExecVisitorImplTemplateJson(); + result = ExecVisitorImplTemplateJson(input); break; case proto::plan::GenericValue::ValCase::kStringVal: - result = ExecVisitorImplTemplateJson(); + result = ExecVisitorImplTemplateJson(input); break; default: PanicInfo(DataTypeInvalid, "unknown data type: {}", type); @@ -94,26 +96,26 @@ PhyTermFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { case DataType::ARRAY: { if (expr_->vals_.size() == 0) { SetNotUseIndex(); - result = ExecVisitorImplTemplateArray(); + result = ExecVisitorImplTemplateArray(input); break; } auto type = expr_->vals_[0].val_case(); switch (type) { case proto::plan::GenericValue::ValCase::kBoolVal: SetNotUseIndex(); - result = ExecVisitorImplTemplateArray(); + result = ExecVisitorImplTemplateArray(input); break; case proto::plan::GenericValue::ValCase::kInt64Val: SetNotUseIndex(); - result = ExecVisitorImplTemplateArray(); + result = ExecVisitorImplTemplateArray(input); break; case proto::plan::GenericValue::ValCase::kFloatVal: SetNotUseIndex(); - result = ExecVisitorImplTemplateArray(); + result = ExecVisitorImplTemplateArray(input); break; case proto::plan::GenericValue::ValCase::kStringVal: SetNotUseIndex(); - result = ExecVisitorImplTemplateArray(); + result = ExecVisitorImplTemplateArray(input); break; default: PanicInfo(DataTypeInvalid, "unknown data type: {}", type); @@ -217,31 +219,32 @@ PhyTermFilterExpr::ExecPkTermImpl() { template VectorPtr -PhyTermFilterExpr::ExecVisitorImplTemplateJson() { +PhyTermFilterExpr::ExecVisitorImplTemplateJson(OffsetVector* input) { if (expr_->is_in_field_) { - return ExecTermJsonVariableInField(); + return ExecTermJsonVariableInField(input); } else { - return ExecTermJsonFieldInVariable(); + return ExecTermJsonFieldInVariable(input); } } template VectorPtr -PhyTermFilterExpr::ExecVisitorImplTemplateArray() { +PhyTermFilterExpr::ExecVisitorImplTemplateArray(OffsetVector* input) { if (expr_->is_in_field_) { - return ExecTermArrayVariableInField(); + return ExecTermArrayVariableInField(input); } else { - return ExecTermArrayFieldInVariable(); + return ExecTermArrayFieldInVariable(input); } } template VectorPtr -PhyTermFilterExpr::ExecTermArrayVariableInField() { +PhyTermFilterExpr::ExecTermArrayVariableInField(OffsetVector* input) { using GetType = std::conditional_t, std::string_view, ValueType>; - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -256,15 +259,17 @@ PhyTermFilterExpr::ExecTermArrayVariableInField() { "element length in json array must be one"); ValueType target_val = GetValueFromProto(expr_->vals_[0]); - auto execute_sub_batch = [](const ArrayView* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - const ValueType& target_val) { - auto executor = [&](size_t i) { - for (int i = 0; i < data[i].length(); i++) { - auto val = data[i].template get_data(i); + auto execute_sub_batch = []( + const ArrayView* data, + const bool* valid_data, + const int64_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + const ValueType& target_val) { + auto executor = [&](size_t offset) { + for (int i = 0; i < data[offset].length(); i++) { + auto val = data[offset].template get_data(i); if (val == target_val) { return true; } @@ -272,16 +277,31 @@ PhyTermFilterExpr::ExecTermArrayVariableInField() { return false; }; for (int i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::post) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } - res[i] = executor(i); + res[i] = executor(offset); } }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, target_val); + int64_t processed_size; + if (has_offset_input_) { + processed_size = + ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + target_val); + } else { + processed_size = ProcessDataChunks( + execute_sub_batch, std::nullptr_t{}, res, valid_res, target_val); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -292,12 +312,13 @@ PhyTermFilterExpr::ExecTermArrayVariableInField() { template VectorPtr -PhyTermFilterExpr::ExecTermArrayFieldInVariable() { +PhyTermFilterExpr::ExecTermArrayFieldInVariable(OffsetVector* input) { using GetType = std::conditional_t, std::string_view, ValueType>; - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -323,29 +344,51 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable() { return res_vec; } - auto execute_sub_batch = [](const ArrayView* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - int index, - const std::unordered_set& term_set) { + auto execute_sub_batch = []( + const ArrayView* data, + const bool* valid_data, + const int64_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + int index, + const std::unordered_set& term_set) { for (int i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::post) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } - if (term_set.empty() || index >= data[i].length()) { + if (term_set.empty() || index >= data[offset].length()) { res[i] = false; continue; } - auto value = data[i].get_data(index); + auto value = data[offset].get_data(index); res[i] = term_set.find(ValueType(value)) != term_set.end(); } }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, index, term_set); + int64_t processed_size; + if (has_offset_input_) { + processed_size = + ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + index, + term_set); + } else { + processed_size = ProcessDataChunks(execute_sub_batch, + std::nullptr_t{}, + res, + valid_res, + index, + term_set); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -356,11 +399,12 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable() { template VectorPtr -PhyTermFilterExpr::ExecTermJsonVariableInField() { +PhyTermFilterExpr::ExecTermJsonVariableInField(OffsetVector* input) { using GetType = std::conditional_t, std::string_view, ValueType>; - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -376,13 +420,15 @@ PhyTermFilterExpr::ExecTermJsonVariableInField() { ValueType val = GetValueFromProto(expr_->vals_[0]); auto pointer = milvus::Json::pointer(expr_->column_.nested_path_); - auto execute_sub_batch = [](const Json* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - const std::string pointer, - const ValueType& target_val) { + auto execute_sub_batch = []( + const Json* data, + const bool* valid_data, + const int64_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + const std::string pointer, + const ValueType& target_val) { auto executor = [&](size_t i) { auto doc = data[i].doc(); auto array = doc.at_pointer(pointer).get_array(); @@ -400,15 +446,30 @@ PhyTermFilterExpr::ExecTermJsonVariableInField() { return false; }; for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::post) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } - res[i] = executor(i); + res[i] = executor(offset); } }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, val); + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + pointer, + val); + } else { + processed_size = ProcessDataChunks( + execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, val); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -419,11 +480,12 @@ PhyTermFilterExpr::ExecTermJsonVariableInField() { template VectorPtr -PhyTermFilterExpr::ExecTermJsonFieldInVariable() { +PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) { using GetType = std::conditional_t, std::string_view, ValueType>; - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -446,13 +508,15 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable() { return res_vec; } - auto execute_sub_batch = [](const Json* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - const std::string pointer, - const std::unordered_set& terms) { + auto execute_sub_batch = []( + const Json* data, + const bool* valid_data, + const int64_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + const std::string pointer, + const std::unordered_set& terms) { auto executor = [&](size_t i) { auto x = data[i].template at(pointer); if (x.error()) { @@ -472,7 +536,11 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable() { return terms.find(ValueType(x.value())) != terms.end(); }; for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::post) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } @@ -480,11 +548,26 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable() { res[i] = false; continue; } - res[i] = executor(i); + res[i] = executor(offset); } }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, term_set); + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + pointer, + term_set); + } else { + processed_size = ProcessDataChunks(execute_sub_batch, + std::nullptr_t{}, + res, + valid_res, + pointer, + term_set); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -495,17 +578,17 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable() { template VectorPtr -PhyTermFilterExpr::ExecVisitorImpl() { - if (is_index_mode_) { - return ExecVisitorImplForIndex(); +PhyTermFilterExpr::ExecVisitorImpl(OffsetVector* input) { + if (is_index_mode_ && !has_offset_input_) { + return ExecVisitorImplForIndex(input); } else { - return ExecVisitorImplForData(); + return ExecVisitorImplForData(input); } } template VectorPtr -PhyTermFilterExpr::ExecVisitorImplForIndex() { +PhyTermFilterExpr::ExecVisitorImplForIndex(OffsetVector* input) { typedef std:: conditional_t, std::string, T> IndexInnerType; @@ -540,7 +623,7 @@ PhyTermFilterExpr::ExecVisitorImplForIndex() { template <> VectorPtr -PhyTermFilterExpr::ExecVisitorImplForIndex() { +PhyTermFilterExpr::ExecVisitorImplForIndex(OffsetVector* input) { using Index = index::ScalarIndex; auto real_batch_size = GetNextBatchSize(); if (real_batch_size == 0) { @@ -562,8 +645,9 @@ PhyTermFilterExpr::ExecVisitorImplForIndex() { template VectorPtr -PhyTermFilterExpr::ExecVisitorImplForData() { - auto real_batch_size = GetNextBatchSize(); +PhyTermFilterExpr::ExecVisitorImplForData(OffsetVector* input) { + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -584,23 +668,39 @@ PhyTermFilterExpr::ExecVisitorImplForData() { } } std::unordered_set vals_set(vals.begin(), vals.end()); - auto execute_sub_batch = [](const T* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - const std::unordered_set& vals) { + auto execute_sub_batch = []( + const T* data, + const bool* valid_data, + const int64_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + const std::unordered_set& vals) { TermElementFuncSet func; for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::post) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } - res[i] = func(vals, data[i]); + res[i] = func(vals, data[offset]); } }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, vals_set); + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + vals_set); + } else { + processed_size = ProcessDataChunks( + execute_sub_batch, std::nullptr_t{}, res, valid_res, vals_set); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", diff --git a/internal/core/src/exec/expression/TermExpr.h b/internal/core/src/exec/expression/TermExpr.h index a816c6c9c6153..19f03b131b9c3 100644 --- a/internal/core/src/exec/expression/TermExpr.h +++ b/internal/core/src/exec/expression/TermExpr.h @@ -83,39 +83,39 @@ class PhyTermFilterExpr : public SegmentExpr { template VectorPtr - ExecVisitorImpl(); + ExecVisitorImpl(OffsetVector* input = nullptr); template VectorPtr - ExecVisitorImplForIndex(); + ExecVisitorImplForIndex(OffsetVector* input = nullptr); template VectorPtr - ExecVisitorImplForData(); + ExecVisitorImplForData(OffsetVector* input = nullptr); template VectorPtr - ExecVisitorImplTemplateJson(); + ExecVisitorImplTemplateJson(OffsetVector* input = nullptr); template VectorPtr - ExecTermJsonVariableInField(); + ExecTermJsonVariableInField(OffsetVector* input = nullptr); template VectorPtr - ExecTermJsonFieldInVariable(); + ExecTermJsonFieldInVariable(OffsetVector* input = nullptr); template VectorPtr - ExecVisitorImplTemplateArray(); + ExecVisitorImplTemplateArray(OffsetVector* input = nullptr); template VectorPtr - ExecTermArrayVariableInField(); + ExecTermArrayVariableInField(OffsetVector* input = nullptr); template VectorPtr - ExecTermArrayFieldInVariable(); + ExecTermArrayFieldInVariable(OffsetVector* input = nullptr); private: std::shared_ptr expr_; diff --git a/internal/core/src/exec/expression/UnaryExpr.cpp b/internal/core/src/exec/expression/UnaryExpr.cpp index 748a0e993f28f..5f0daa7176abc 100644 --- a/internal/core/src/exec/expression/UnaryExpr.cpp +++ b/internal/core/src/exec/expression/UnaryExpr.cpp @@ -87,7 +87,7 @@ PhyUnaryRangeFilterExpr::CanUseIndexForArray() { template VectorPtr PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArrayForIndex() { - return ExecRangeVisitorImplArray(); + return ExecRangeVisitorImplArray(nullptr); } template <> @@ -121,7 +121,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArrayForIndex< case DataType::FLOAT: case DataType::DOUBLE: { // not accurate on floating point number, rollback to bruteforce. - return ExecRangeVisitorImplArray(); + return ExecRangeVisitorImplArray( + nullptr); } case DataType::VARCHAR: { if (segment_->type() == SegmentType::Growing) { @@ -140,39 +141,41 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArrayForIndex< } } default: - return ExecRangeVisitorImplArray(); + return ExecRangeVisitorImplArray(nullptr); } } void PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { + auto input = context.get_offset_input(); + SetHasOffsetInput((input != nullptr)); switch (expr_->column_.data_type_) { case DataType::BOOL: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::INT8: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::INT16: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::INT32: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::INT64: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::FLOAT: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::DOUBLE: { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); break; } case DataType::VARCHAR: { @@ -180,9 +183,9 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { !storage::MmapManager::GetInstance() .GetMmapConfig() .growing_enable_mmap) { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); } else { - result = ExecRangeVisitorImpl(); + result = ExecRangeVisitorImpl(input); } break; } @@ -190,19 +193,20 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { auto val_type = expr_->val_.val_case(); switch (val_type) { case proto::plan::GenericValue::ValCase::kBoolVal: - result = ExecRangeVisitorImplJson(); + result = ExecRangeVisitorImplJson(input); break; case proto::plan::GenericValue::ValCase::kInt64Val: - result = ExecRangeVisitorImplJson(); + result = ExecRangeVisitorImplJson(input); break; case proto::plan::GenericValue::ValCase::kFloatVal: - result = ExecRangeVisitorImplJson(); + result = ExecRangeVisitorImplJson(input); break; case proto::plan::GenericValue::ValCase::kStringVal: - result = ExecRangeVisitorImplJson(); + result = ExecRangeVisitorImplJson(input); break; case proto::plan::GenericValue::ValCase::kArrayVal: - result = ExecRangeVisitorImplJson(); + result = + ExecRangeVisitorImplJson(input); break; default: PanicInfo( @@ -215,27 +219,28 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { switch (val_type) { case proto::plan::GenericValue::ValCase::kBoolVal: SetNotUseIndex(); - result = ExecRangeVisitorImplArray(); + result = ExecRangeVisitorImplArray(input); break; case proto::plan::GenericValue::ValCase::kInt64Val: SetNotUseIndex(); - result = ExecRangeVisitorImplArray(); + result = ExecRangeVisitorImplArray(input); break; case proto::plan::GenericValue::ValCase::kFloatVal: SetNotUseIndex(); - result = ExecRangeVisitorImplArray(); + result = ExecRangeVisitorImplArray(input); break; case proto::plan::GenericValue::ValCase::kStringVal: SetNotUseIndex(); - result = ExecRangeVisitorImplArray(); + result = ExecRangeVisitorImplArray(input); break; case proto::plan::GenericValue::ValCase::kArrayVal: - if (CanUseIndexForArray()) { + if (!has_offset_input_ && + CanUseIndexForArray()) { result = ExecRangeVisitorImplArrayForIndex< proto::plan::Array>(); } else { - result = - ExecRangeVisitorImplArray(); + result = ExecRangeVisitorImplArray( + input); } break; default: @@ -253,11 +258,12 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { template VectorPtr -PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray() { +PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) { using GetType = std::conditional_t, std::string_view, ValueType>; - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -273,51 +279,120 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray() { if (expr_->column_.nested_path_.size() > 0) { index = std::stoi(expr_->column_.nested_path_[0]); } - auto execute_sub_batch = [op_type](const milvus::ArrayView* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - ValueType val, - int index) { + auto + execute_sub_batch = [op_type]( + const milvus::ArrayView* data, + const bool* valid_data, + const int64_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + ValueType val, + int index) { switch (op_type) { case proto::plan::GreaterThan: { - UnaryElementFuncForArray + UnaryElementFuncForArray func; - func(data, valid_data, size, val, index, res, valid_res); + func(data, + valid_data, + size, + val, + index, + res, + valid_res, + offsets); break; } case proto::plan::GreaterEqual: { - UnaryElementFuncForArray + UnaryElementFuncForArray func; - func(data, valid_data, size, val, index, res, valid_res); + func(data, + valid_data, + size, + val, + index, + res, + valid_res, + offsets); break; } case proto::plan::LessThan: { - UnaryElementFuncForArray func; - func(data, valid_data, size, val, index, res, valid_res); + UnaryElementFuncForArray + func; + func(data, + valid_data, + size, + val, + index, + res, + valid_res, + offsets); break; } case proto::plan::LessEqual: { - UnaryElementFuncForArray + UnaryElementFuncForArray func; - func(data, valid_data, size, val, index, res, valid_res); + func(data, + valid_data, + size, + val, + index, + res, + valid_res, + offsets); break; } case proto::plan::Equal: { - UnaryElementFuncForArray func; - func(data, valid_data, size, val, index, res, valid_res); + UnaryElementFuncForArray + func; + func(data, + valid_data, + size, + val, + index, + res, + valid_res, + offsets); break; } case proto::plan::NotEqual: { - UnaryElementFuncForArray func; - func(data, valid_data, size, val, index, res, valid_res); + UnaryElementFuncForArray + func; + func(data, + valid_data, + size, + val, + index, + res, + valid_res, + offsets); break; } case proto::plan::PrefixMatch: { - UnaryElementFuncForArray + UnaryElementFuncForArray func; - func(data, valid_data, size, val, index, res, valid_res); + func(data, + valid_data, + size, + val, + index, + res, + valid_res, + offsets); break; } default: @@ -327,8 +402,20 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray() { op_type)); } }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, val, index); + int64_t processed_size; + if (has_offset_input_) { + processed_size = + ProcessDataByOffsets(execute_sub_batch, + std::nullptr_t{}, + input, + res, + valid_res, + val, + index); + } else { + processed_size = ProcessDataChunks( + execute_sub_batch, std::nullptr_t{}, res, valid_res, val, index); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -449,12 +536,13 @@ PhyUnaryRangeFilterExpr::ExecArrayEqualForIndex(bool reverse) { template VectorPtr -PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() { +PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) { using GetType = std::conditional_t, std::string_view, ExprValueType>; - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } @@ -468,46 +556,53 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() { auto op_type = expr_->op_type_; auto pointer = milvus::Json::pointer(expr_->column_.nested_path_); -#define UnaryRangeJSONCompare(cmp) \ - do { \ - auto x = data[i].template at(pointer); \ - if (x.error()) { \ - if constexpr (std::is_same_v) { \ - auto x = data[i].template at(pointer); \ - res[i] = !x.error() && (cmp); \ - break; \ - } \ - res[i] = false; \ - break; \ - } \ - res[i] = (cmp); \ +#define UnaryRangeJSONCompare(cmp) \ + do { \ + auto x = data[offset].template at(pointer); \ + if (x.error()) { \ + if constexpr (std::is_same_v) { \ + auto x = data[offset].template at(pointer); \ + res[i] = !x.error() && (cmp); \ + break; \ + } \ + res[i] = false; \ + break; \ + } \ + res[i] = (cmp); \ } while (false) -#define UnaryRangeJSONCompareNotEqual(cmp) \ - do { \ - auto x = data[i].template at(pointer); \ - if (x.error()) { \ - if constexpr (std::is_same_v) { \ - auto x = data[i].template at(pointer); \ - res[i] = x.error() || (cmp); \ - break; \ - } \ - res[i] = true; \ - break; \ - } \ - res[i] = (cmp); \ +#define UnaryRangeJSONCompareNotEqual(cmp) \ + do { \ + auto x = data[offset].template at(pointer); \ + if (x.error()) { \ + if constexpr (std::is_same_v) { \ + auto x = data[offset].template at(pointer); \ + res[i] = x.error() || (cmp); \ + break; \ + } \ + res[i] = true; \ + break; \ + } \ + res[i] = (cmp); \ } while (false) - auto execute_sub_batch = [op_type, pointer](const milvus::Json* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - ExprValueType val) { + auto execute_sub_batch = + [ op_type, pointer ]( + const milvus::Json* data, + const bool* valid_data, + const int64_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + ExprValueType val) { switch (op_type) { case proto::plan::GreaterThan: { for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::post) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } @@ -521,7 +616,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() { } case proto::plan::GreaterEqual: { for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::post) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } @@ -535,7 +634,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() { } case proto::plan::LessThan: { for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::post) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } @@ -549,7 +652,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() { } case proto::plan::LessEqual: { for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::post) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } @@ -563,7 +670,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() { } case proto::plan::Equal: { for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::post) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } @@ -583,7 +694,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() { } case proto::plan::NotEqual: { for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::post) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } @@ -603,7 +718,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() { } case proto::plan::PrefixMatch: { for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::post) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } @@ -621,7 +740,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() { auto regex_pattern = translator(val); RegexMatcher matcher(regex_pattern); for (size_t i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::post) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } @@ -641,8 +764,15 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() { op_type)); } }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, std::nullptr_t{}, res, valid_res, val); + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessDataByOffsets( + execute_sub_batch, std::nullptr_t{}, input, res, valid_res, val); + + } else { + processed_size = ProcessDataChunks( + execute_sub_batch, std::nullptr_t{}, res, valid_res, val); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", @@ -653,15 +783,16 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() { template VectorPtr -PhyUnaryRangeFilterExpr::ExecRangeVisitorImpl() { - if (expr_->op_type_ == proto::plan::OpType::TextMatch) { +PhyUnaryRangeFilterExpr::ExecRangeVisitorImpl(OffsetVector* input) { + if (expr_->op_type_ == proto::plan::OpType::TextMatch && + !has_offset_input_) { return ExecTextMatch(); } - if (CanUseIndex()) { + if (CanUseIndex() && !has_offset_input_) { return ExecRangeVisitorImplForIndex(); } else { - return ExecRangeVisitorImplForData(); + return ExecRangeVisitorImplForData(input); } } @@ -744,17 +875,24 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForIndex() { template ColumnVectorPtr -PhyUnaryRangeFilterExpr::PreCheckOverflow() { +PhyUnaryRangeFilterExpr::PreCheckOverflow(OffsetVector* input) { if constexpr (std::is_integral_v && !std::is_same_v) { int64_t val = GetValueFromProto(expr_->val_); if (milvus::query::out_of_range(val)) { - int64_t batch_size = - overflow_check_pos_ + batch_size_ >= active_count_ - ? active_count_ - overflow_check_pos_ - : batch_size_; - overflow_check_pos_ += batch_size; - auto valid = ProcessChunksForValid(CanUseIndex()); + int64_t batch_size; + if (input != nullptr) { + batch_size = input->size(); + } else { + batch_size = overflow_check_pos_ + batch_size_ >= active_count_ + ? active_count_ - overflow_check_pos_ + : batch_size_; + overflow_check_pos_ += batch_size; + } + auto valid = (input != nullptr) + ? ProcessChunksForValidByOffsets( + CanUseIndex(), *input) + : ProcessChunksForValid(CanUseIndex()); auto res_vec = std::make_shared( TargetBitmap(batch_size), std::move(valid)); TargetBitmapView res(res_vec->GetRawData(), batch_size); @@ -800,18 +938,20 @@ PhyUnaryRangeFilterExpr::PreCheckOverflow() { template VectorPtr -PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData() { +PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) { typedef std:: conditional_t, std::string, T> IndexInnerType; - if (auto res = PreCheckOverflow()) { + if (auto res = PreCheckOverflow(input)) { return res; } - auto real_batch_size = GetNextBatchSize(); + auto real_batch_size = + has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } + IndexInnerType val = GetValueFromProto(expr_->val_); auto res_vec = std::make_shared( TargetBitmap(real_batch_size), TargetBitmap(real_batch_size)); @@ -819,51 +959,56 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData() { TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); valid_res.set(); auto expr_type = expr_->op_type_; - auto execute_sub_batch = [expr_type](const T* data, - const bool* valid_data, - const int size, - TargetBitmapView res, - TargetBitmapView valid_res, - IndexInnerType val) { + + auto execute_sub_batch = [expr_type]( + const T* data, + const bool* valid_data, + const int64_t* offsets, + const int size, + TargetBitmapView res, + TargetBitmapView valid_res, + IndexInnerType val) { switch (expr_type) { case proto::plan::GreaterThan: { - UnaryElementFunc func; - func(data, size, val, res); + UnaryElementFunc func; + func(data, size, val, res, offsets); break; } case proto::plan::GreaterEqual: { - UnaryElementFunc func; - func(data, size, val, res); + UnaryElementFunc + func; + func(data, size, val, res, offsets); break; } case proto::plan::LessThan: { - UnaryElementFunc func; - func(data, size, val, res); + UnaryElementFunc func; + func(data, size, val, res, offsets); break; } case proto::plan::LessEqual: { - UnaryElementFunc func; - func(data, size, val, res); + UnaryElementFunc func; + func(data, size, val, res, offsets); break; } case proto::plan::Equal: { - UnaryElementFunc func; - func(data, size, val, res); + UnaryElementFunc func; + func(data, size, val, res, offsets); break; } case proto::plan::NotEqual: { - UnaryElementFunc func; - func(data, size, val, res); + UnaryElementFunc func; + func(data, size, val, res, offsets); break; } case proto::plan::PrefixMatch: { - UnaryElementFunc func; - func(data, size, val, res); + UnaryElementFunc func; + func(data, size, val, res, offsets); break; } case proto::plan::Match: { - UnaryElementFunc func; - func(data, size, val, res); + UnaryElementFunc func; + func(data, size, val, res, offsets); break; } default: @@ -877,20 +1022,32 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData() { // but to mask res with valid_data after the batch operation. if (valid_data != nullptr) { for (int i = 0; i < size; i++) { - if (!valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::post) { + offset = (offsets) ? offsets[i] : i; + } + if (!valid_data[offset]) { res[i] = valid_res[i] = false; } } } }; + auto skip_index_func = [expr_type, val](const SkipIndex& skip_index, FieldId field_id, int64_t chunk_id) { return skip_index.CanSkipUnaryRange( field_id, chunk_id, expr_type, val); }; - int64_t processed_size = ProcessDataChunks( - execute_sub_batch, skip_index_func, res, valid_res, val); + + int64_t processed_size; + if (has_offset_input_) { + processed_size = ProcessDataByOffsets( + execute_sub_batch, skip_index_func, input, res, valid_res, val); + } else { + processed_size = ProcessDataChunks( + execute_sub_batch, skip_index_func, res, valid_res, val); + } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}, related params[active_count:{}, " diff --git a/internal/core/src/exec/expression/UnaryExpr.h b/internal/core/src/exec/expression/UnaryExpr.h index 9dac7b0b6a1da..998a46f40f38a 100644 --- a/internal/core/src/exec/expression/UnaryExpr.h +++ b/internal/core/src/exec/expression/UnaryExpr.h @@ -33,7 +33,7 @@ namespace milvus { namespace exec { -template +template struct UnaryElementFuncForMatch { typedef std:: conditional_t, std::string, T> @@ -43,58 +43,68 @@ struct UnaryElementFuncForMatch { operator()(const T* src, size_t size, IndexInnerType val, - TargetBitmapView res) { + TargetBitmapView res, + int64_t* offsets = nullptr) { PatternMatchTranslator translator; auto regex_pattern = translator(val); RegexMatcher matcher(regex_pattern); for (int i = 0; i < size; ++i) { - res[i] = matcher(src[i]); + if constexpr (filter_type == FilterType::post) { + res[i] = matcher(src[offsets ? offsets[i] : i]); + } else { + res[i] = matcher(src[i]); + } } } }; -template +template struct UnaryElementFunc { typedef std:: conditional_t, std::string, T> IndexInnerType; + void operator()(const T* src, size_t size, IndexInnerType val, - TargetBitmapView res) { + TargetBitmapView res, + const int64_t* offsets = nullptr) { if constexpr (op == proto::plan::OpType::Match) { - UnaryElementFuncForMatch func; + UnaryElementFuncForMatch func; func(src, size, val, res); return; } - /* // This is the original code, which is kept for the documentation purposes - for (int i = 0; i < size; ++i) { - if constexpr (op == proto::plan::OpType::Equal) { - res[i] = src[i] == val; - } else if constexpr (op == proto::plan::OpType::NotEqual) { - res[i] = src[i] != val; - } else if constexpr (op == proto::plan::OpType::GreaterThan) { - res[i] = src[i] > val; - } else if constexpr (op == proto::plan::OpType::LessThan) { - res[i] = src[i] < val; - } else if constexpr (op == proto::plan::OpType::GreaterEqual) { - res[i] = src[i] >= val; - } else if constexpr (op == proto::plan::OpType::LessEqual) { - res[i] = src[i] <= val; - } else if constexpr (op == proto::plan::OpType::PrefixMatch) { - res[i] = milvus::query::Match( - src[i], val, proto::plan::OpType::PrefixMatch); - } else { - PanicInfo( - OpTypeInvalid, - fmt::format("unsupported op_type:{} for UnaryElementFunc", - op)); + // also, for post filter + if constexpr (filter_type == FilterType::post) { + for (int i = 0; i < size; ++i) { + auto offset = (offsets != nullptr) ? offsets[i] : i; + if constexpr (op == proto::plan::OpType::Equal) { + res[i] = src[offset] == val; + } else if constexpr (op == proto::plan::OpType::NotEqual) { + res[i] = src[offset] != val; + } else if constexpr (op == proto::plan::OpType::GreaterThan) { + res[i] = src[offset] > val; + } else if constexpr (op == proto::plan::OpType::LessThan) { + res[i] = src[offset] < val; + } else if constexpr (op == proto::plan::OpType::GreaterEqual) { + res[i] = src[offset] >= val; + } else if constexpr (op == proto::plan::OpType::LessEqual) { + res[i] = src[offset] <= val; + } else if constexpr (op == proto::plan::OpType::PrefixMatch) { + res[i] = milvus::query::Match( + src[offset], val, proto::plan::OpType::PrefixMatch); + } else { + PanicInfo( + OpTypeInvalid, + fmt::format( + "unsupported op_type:{} for UnaryElementFunc", op)); + } } + return; } - */ if constexpr (op == proto::plan::OpType::PrefixMatch) { for (int i = 0; i < size; ++i) { @@ -141,7 +151,7 @@ struct UnaryElementFunc { } \ } while (false) -template +template struct UnaryElementFuncForArray { using GetType = std::conditional_t, std::string_view, @@ -153,32 +163,39 @@ struct UnaryElementFuncForArray { ValueType val, int index, TargetBitmapView res, - TargetBitmapView valid_res) { + TargetBitmapView valid_res, + const int64_t* offsets = nullptr) { for (int i = 0; i < size; ++i) { - if (valid_data != nullptr && !valid_data[i]) { + auto offset = i; + if constexpr (filter_type == FilterType::post) { + offset = (offsets) ? offsets[i] : i; + } + if (valid_data != nullptr && !valid_data[offset]) { res[i] = valid_res[i] = false; continue; } if constexpr (op == proto::plan::OpType::Equal) { if constexpr (std::is_same_v) { - res[i] = src[i].is_same_array(val); + res[i] = src[offset].is_same_array(val); } else { - if (index >= src[i].length()) { + if (index >= src[offset].length()) { res[i] = false; continue; } - auto array_data = src[i].template get_data(index); + auto array_data = + src[offset].template get_data(index); res[i] = array_data == val; } } else if constexpr (op == proto::plan::OpType::NotEqual) { if constexpr (std::is_same_v) { - res[i] = !src[i].is_same_array(val); + res[i] = !src[offset].is_same_array(val); } else { - if (index >= src[i].length()) { + if (index >= src[offset].length()) { res[i] = false; continue; } - auto array_data = src[i].template get_data(index); + auto array_data = + src[offset].template get_data(index); res[i] = array_data != val; } } else if constexpr (op == proto::plan::OpType::GreaterThan) { @@ -302,7 +319,7 @@ class PhyUnaryRangeFilterExpr : public SegmentExpr { private: template VectorPtr - ExecRangeVisitorImpl(); + ExecRangeVisitorImpl(OffsetVector* input = nullptr); template VectorPtr @@ -310,15 +327,15 @@ class PhyUnaryRangeFilterExpr : public SegmentExpr { template VectorPtr - ExecRangeVisitorImplForData(); + ExecRangeVisitorImplForData(OffsetVector* input = nullptr); template VectorPtr - ExecRangeVisitorImplJson(); + ExecRangeVisitorImplJson(OffsetVector* input = nullptr); template VectorPtr - ExecRangeVisitorImplArray(); + ExecRangeVisitorImplArray(OffsetVector* input = nullptr); template VectorPtr @@ -331,7 +348,7 @@ class PhyUnaryRangeFilterExpr : public SegmentExpr { // Check overflow and cache result for performace template ColumnVectorPtr - PreCheckOverflow(); + PreCheckOverflow(OffsetVector* input = nullptr); template bool diff --git a/internal/core/src/exec/expression/ValueExpr.cpp b/internal/core/src/exec/expression/ValueExpr.cpp index 80330f7f15798..4ebec42903e48 100644 --- a/internal/core/src/exec/expression/ValueExpr.cpp +++ b/internal/core/src/exec/expression/ValueExpr.cpp @@ -22,9 +22,13 @@ namespace exec { void PhyValueExpr::Eval(EvalCtx& context, VectorPtr& result) { - int64_t real_batch_size = current_pos_ + batch_size_ >= active_count_ - ? active_count_ - current_pos_ - : batch_size_; + auto input = context.get_offset_input(); + SetHasOffsetInput((input != nullptr)); + int64_t real_batch_size = has_offset_input_ + ? input->size() + : (current_pos_ + batch_size_ >= active_count_ + ? active_count_ - current_pos_ + : batch_size_); if (real_batch_size == 0) { result = nullptr; diff --git a/internal/core/src/exec/expression/ValueExpr.h b/internal/core/src/exec/expression/ValueExpr.h index 044f46ac391e3..b2ccace22397c 100644 --- a/internal/core/src/exec/expression/ValueExpr.h +++ b/internal/core/src/exec/expression/ValueExpr.h @@ -49,11 +49,14 @@ class PhyValueExpr : public Expr { void MoveCursor() override { - int64_t real_batch_size = current_pos_ + batch_size_ >= active_count_ - ? active_count_ - current_pos_ - : batch_size_; + if (!has_offset_input_) { + int64_t real_batch_size = + current_pos_ + batch_size_ >= active_count_ + ? active_count_ - current_pos_ + : batch_size_; - current_pos_ += real_batch_size; + current_pos_ += real_batch_size; + } } private: diff --git a/internal/core/src/exec/operator/FilterBitsNode.cpp b/internal/core/src/exec/operator/FilterBitsNode.cpp index 3bf6d0396811b..7ab1d99424563 100644 --- a/internal/core/src/exec/operator/FilterBitsNode.cpp +++ b/internal/core/src/exec/operator/FilterBitsNode.cpp @@ -64,8 +64,7 @@ PhyFilterBitsNode::GetOutput() { std::chrono::high_resolution_clock::time_point scalar_start = std::chrono::high_resolution_clock::now(); - EvalCtx eval_ctx( - operator_context_->get_exec_context(), exprs_.get(), input_.get()); + EvalCtx eval_ctx(operator_context_->get_exec_context(), exprs_.get()); TargetBitmap bitset; TargetBitmap valid_bitset; diff --git a/internal/core/src/exec/operator/FilterNode.cpp b/internal/core/src/exec/operator/FilterNode.cpp new file mode 100644 index 0000000000000..84d9e0c20bb14 --- /dev/null +++ b/internal/core/src/exec/operator/FilterNode.cpp @@ -0,0 +1,192 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "FilterNode.h" + +namespace milvus { +namespace exec { +PhyFilterNode::PhyFilterNode( + int32_t operator_id, + DriverContext* driverctx, + const std::shared_ptr& filter) + : Operator(driverctx, + filter->output_type(), + operator_id, + filter->id(), + "PhyFilterNode") { + ExecContext* exec_context = operator_context_->get_exec_context(); + query_context_ = exec_context->get_query_context(); + std::vector filters; + filters.emplace_back(filter->filter()); + exprs_ = std::make_unique(filters, exec_context); + need_process_rows_ = query_context_->get_active_count(); + num_processed_rows_ = 0; +} + +void +PhyFilterNode::AddInput(RowVectorPtr& input) { + input_ = std::move(input); +} + +bool +PhyFilterNode::IsFinished() { + return is_finished_; +} + +template +inline size_t +find_binsert_position(const std::vector& distances, + size_t lo, + size_t hi, + float dist) { + while (lo < hi) { + size_t mid = lo + ((hi - lo) >> 1); + if constexpr (large_is_better) { + if (distances[mid] < dist) { + hi = mid; + } else { + lo = mid + 1; + } + } else { + if (distances[mid] > dist) { + hi = mid; + } else { + lo = mid + 1; + } + } + } + return lo; +} + +RowVectorPtr +PhyFilterNode::GetOutput() { + if (is_finished_ || !no_more_input_) { + return nullptr; + } + + DeferLambda([&]() { is_finished_ = true; }); + + if (input_ == nullptr) { + return nullptr; + } + + std::chrono::high_resolution_clock::time_point scalar_start = + std::chrono::high_resolution_clock::now(); + + milvus::SearchResult search_result = query_context_->get_search_result(); + int64_t nq = search_result.total_nq_; + int64_t unity_topk = search_result.unity_topK_; + knowhere::MetricType metric_type = query_context_->get_metric_type(); + bool large_is_better = PositivelyRelated(metric_type); + if (search_result.vector_iterators_.has_value()) { + AssertInfo(search_result.vector_iterators_.value().size() == + search_result.total_nq_, + "Vector Iterators' count must be equal to total_nq_, Check " + "your code"); + int nq_index = 0; + + AssertInfo(nq = search_result.vector_iterators_.value().size(), + "nq and iterator not equal size"); + search_result.seg_offsets_.resize(nq * unity_topk, INVALID_SEG_OFFSET); + search_result.distances_.resize(nq * unity_topk); + for (auto& iterator : search_result.vector_iterators_.value()) { + EvalCtx eval_ctx(operator_context_->get_exec_context(), + exprs_.get()); + int topk = 0; + while (iterator->HasNext() && topk < unity_topk) { + FixedVector offsets; + FixedVector distances; + // remain unfilled size as iterator batch size + int64_t batch_size = unity_topk - topk; + offsets.reserve(batch_size); + distances.reserve(batch_size); + while (iterator->HasNext()) { + auto offset_dis_pair = iterator->Next(); + AssertInfo( + offset_dis_pair.has_value(), + "Wrong state! iterator cannot return valid result " + "whereas it still" + "tells hasNext, terminate operation"); + auto offset = offset_dis_pair.value().first; + auto dis = offset_dis_pair.value().second; + offsets.emplace_back(offset); + distances.emplace_back(dis); + if (offsets.size() == batch_size) { + break; + } + } + eval_ctx.set_offset_input(&offsets); + exprs_->Eval(0, 1, true, eval_ctx, results_); + AssertInfo( + results_.size() == 1 && results_[0] != nullptr, + "PhyFilterNode result size should be size one and not " + "be nullptr"); + + auto col_vec = + std::dynamic_pointer_cast(results_[0]); + auto col_vec_size = col_vec->size(); + TargetBitmapView bitsetview(col_vec->GetRawData(), + col_vec_size); + Assert(bitsetview.size() <= batch_size); + for (auto i = 0; i < bitsetview.size(); ++i) { + if (bitsetview[i] > 0) { + auto pos = large_is_better + ? find_binsert_position( + search_result.distances_, + nq_index * unity_topk, + nq_index * unity_topk + topk, + distances[i]) + : find_binsert_position( + search_result.distances_, + nq_index * unity_topk, + nq_index * unity_topk + topk, + distances[i]); + if (topk > pos) { + std::memmove(&search_result.distances_[pos + 1], + &search_result.distances_[pos], + (topk - pos) * sizeof(float)); + std::memmove(&search_result.seg_offsets_[pos + 1], + &search_result.seg_offsets_[pos], + (topk - pos) * sizeof(int64_t)); + } + search_result.seg_offsets_[pos] = offsets[i]; + search_result.distances_[pos] = distances[i]; + ++topk; + if (topk == unity_topk) { + break; + } + } + } + if (topk == unity_topk) { + break; + } + } + nq_index++; + } + } + query_context_->set_search_result(std::move(search_result)); + std::chrono::high_resolution_clock::time_point scalar_end = + std::chrono::high_resolution_clock::now(); + double scalar_cost = + std::chrono::duration(scalar_end - scalar_start) + .count(); + monitor::internal_core_search_latency_postfilter.Observe(scalar_cost); + + return input_; +} + +} // namespace exec +} // namespace milvus diff --git a/internal/core/src/exec/operator/FilterNode.h b/internal/core/src/exec/operator/FilterNode.h new file mode 100644 index 0000000000000..d63dadb5d014d --- /dev/null +++ b/internal/core/src/exec/operator/FilterNode.h @@ -0,0 +1,81 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +#include "exec/Driver.h" +#include "exec/expression/Expr.h" +#include "exec/operator/Operator.h" +#include "exec/QueryContext.h" + +// difference between FilterBitsNode and FilterNode is that +// FilterBitsNode will go through whole segment and return bitset to indicate which offset is filtered out or not +// FilterNode will accept offsets array and execute over these and generate result valid offsets +namespace milvus { +namespace exec { +class PhyFilterNode : public Operator { + public: + PhyFilterNode(int32_t operator_id, + DriverContext* ctx, + const std::shared_ptr& filter); + + bool + IsFilter() override { + return true; + } + + bool + NeedInput() const override { + return !is_finished_; + } + + void + AddInput(RowVectorPtr& input) override; + + RowVectorPtr + GetOutput() override; + + bool + IsFinished() override; + + void + Close() override { + Operator::Close(); + exprs_->Clear(); + } + + BlockingReason + IsBlocked(ContinueFuture* /* unused */) override { + return BlockingReason::kNotBlocked; + } + + virtual std::string + ToString() const override { + return "PhyFilterNode"; + } + + private: + std::unique_ptr exprs_; + QueryContext* query_context_; + int64_t num_processed_rows_; + int64_t need_process_rows_; + bool is_finished_{false}; +}; +} // namespace exec +} // namespace milvus diff --git a/internal/core/src/exec/operator/Utils.h b/internal/core/src/exec/operator/Utils.h new file mode 100644 index 0000000000000..3fc1f097c2b8a --- /dev/null +++ b/internal/core/src/exec/operator/Utils.h @@ -0,0 +1,101 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "common/QueryInfo.h" +#include "knowhere/index/index_node.h" +#include "segcore/SegmentInterface.h" +#include "segcore/SegmentGrowingImpl.h" +#include "segcore/SegmentSealedImpl.h" +#include "segcore/ConcurrentVector.h" +#include "common/Span.h" +#include "query/Utils.h" +#include "common/EasyAssert.h" + +namespace milvus { +namespace exec { + +static bool +UseVectorIterator(const SearchInfo& search_info) { + return search_info.group_by_field_id_.has_value() || + search_info.post_filter_execution; +} + +static bool +PrepareVectorIteratorsFromIndex(const SearchInfo& search_info, + int nq, + const DatasetPtr dataset, + SearchResult& search_result, + const BitsetView& bitset, + const index::VectorIndex& index) { + // when we use group by, we will use vector iterator to continously get results and group on them + // when we use post filtered search, we will use vector iterator to continously get results and check scalar attr on them + // until we get valid topk results + if (UseVectorIterator(search_info)) { + try { + auto search_conf = index.PrepareSearchParams(search_info); + knowhere::expected> + iterators_val = + index.VectorIterators(dataset, search_conf, bitset); + if (iterators_val.has_value()) { + search_result.AssembleChunkVectorIterators( + nq, 1, {0}, iterators_val.value()); + } else { + std::string operator_type = ""; + if (search_info.group_by_field_id_.has_value()) { + operator_type = "group_by"; + } else { + operator_type = "post filter"; + } + LOG_ERROR( + "Returned knowhere iterator has non-ready iterators " + "inside, terminate {} operation:{}", + operator_type, + knowhere::Status2String(iterators_val.error())); + PanicInfo( + ErrorCode::Unsupported, + fmt::format( + "Returned knowhere iterator has non-ready iterators " + "inside, terminate {} operation", + operator_type)); + } + search_result.total_nq_ = dataset->GetRows(); + search_result.unity_topK_ = search_info.topk_; + } catch (const std::runtime_error& e) { + std::string operator_type = ""; + if (search_info.group_by_field_id_.has_value()) { + operator_type = "group_by"; + } else { + operator_type = "post filter"; + } + LOG_ERROR( + "Caught error:{} when trying to initialize ann iterators for " + "{}: " + "operation will be terminated", + e.what(), + operator_type); + PanicInfo(ErrorCode::Unsupported, + fmt::format("Failed to {}, current index:" + + index.GetIndexType() + " doesn't support", + operator_type)); + } + return true; + } + return false; +} +} // namespace exec +} // namespace milvus \ No newline at end of file diff --git a/internal/core/src/exec/operator/VectorSearchNode.cpp b/internal/core/src/exec/operator/VectorSearchNode.cpp index 57aa0999c021f..6aa96b51e1418 100644 --- a/internal/core/src/exec/operator/VectorSearchNode.cpp +++ b/internal/core/src/exec/operator/VectorSearchNode.cpp @@ -86,6 +86,7 @@ PhyVectorSearchNode::GetOutput() { query_timestamp_, final_view, search_result); + search_result.total_data_cnt_ = final_view.size(); query_context_->set_search_result(std::move(search_result)); std::chrono::high_resolution_clock::time_point vector_end = diff --git a/internal/core/src/exec/operator/groupby/SearchGroupByOperator.h b/internal/core/src/exec/operator/groupby/SearchGroupByOperator.h index 640789518cdf1..53942f088bd21 100644 --- a/internal/core/src/exec/operator/groupby/SearchGroupByOperator.h +++ b/internal/core/src/exec/operator/groupby/SearchGroupByOperator.h @@ -125,49 +125,6 @@ GetDataGetter(const segcore::SegmentInternalInterface& segment, } } -static bool -PrepareVectorIteratorsFromIndex(const SearchInfo& search_info, - int nq, - const DatasetPtr dataset, - SearchResult& search_result, - const BitsetView& bitset, - const index::VectorIndex& index) { - if (search_info.group_by_field_id_.has_value()) { - try { - auto search_conf = index.PrepareSearchParams(search_info); - knowhere::expected> - iterators_val = - index.VectorIterators(dataset, search_conf, bitset); - if (iterators_val.has_value()) { - search_result.AssembleChunkVectorIterators( - nq, 1, {0}, iterators_val.value()); - } else { - LOG_ERROR( - "Returned knowhere iterator has non-ready iterators " - "inside, terminate group_by operation:{}", - knowhere::Status2String(iterators_val.error())); - PanicInfo(ErrorCode::Unsupported, - "Returned knowhere iterator has non-ready iterators " - "inside, terminate group_by operation"); - } - search_result.total_nq_ = dataset->GetRows(); - search_result.unity_topK_ = search_info.topk_; - } catch (const std::runtime_error& e) { - LOG_ERROR( - "Caught error:{} when trying to initialize ann iterators for " - "group_by: " - "group_by operation will be terminated", - e.what()); - PanicInfo( - ErrorCode::Unsupported, - "Failed to groupBy, current index:" + index.GetIndexType() + - " doesn't support search_group_by"); - } - return true; - } - return false; -} - void SearchGroupBy(const std::vector>& iterators, const SearchInfo& searchInfo, diff --git a/internal/core/src/mmap/ChunkedColumn.h b/internal/core/src/mmap/ChunkedColumn.h index 31255b60b53a8..f59be52adb698 100644 --- a/internal/core/src/mmap/ChunkedColumn.h +++ b/internal/core/src/mmap/ChunkedColumn.h @@ -147,6 +147,13 @@ class ChunkedColumnBase : public ColumnBase { "StringViews only supported for VariableColumn"); } + virtual std::pair, FixedVector> + ViewsByOffsets(int64_t chunk_id, + const FixedVector& offsets) const { + PanicInfo(ErrorCode::Unsupported, + "viewsbyoffsets only supported for VariableColumn"); + } + std::pair GetChunkIDByOffset(int64_t offset) const { int chunk_id = 0; @@ -322,6 +329,13 @@ class ChunkedVariableColumn : public ChunkedColumnBase { return chunks_[chunk_id]; } + std::pair, FixedVector> + ViewsByOffsets(int64_t chunk_id, + const FixedVector& offsets) const override { + return std::dynamic_pointer_cast(chunks_[chunk_id]) + ->ViewsByOffsets(offsets); + } + BufferView GetBatchBuffer(int64_t start_offset, int64_t length) override { if (start_offset < 0 || start_offset > num_rows_ || diff --git a/internal/core/src/mmap/Column.h b/internal/core/src/mmap/Column.h index b50a582e90312..d4c67a53b9754 100644 --- a/internal/core/src/mmap/Column.h +++ b/internal/core/src/mmap/Column.h @@ -323,6 +323,12 @@ class SingleChunkColumnBase : public ColumnBase { "StringViews only supported for VariableColumn"); } + virtual std::pair, FixedVector> + ViewsByOffsets(const FixedVector& offsets) const { + PanicInfo(ErrorCode::Unsupported, + "viewsbyoffsets only supported for VariableColumn"); + } + virtual void AppendBatch(const FieldDataPtr data) { size_t required_size = data_size_ + data->DataSize(); @@ -694,6 +700,19 @@ class SingleChunkVariableColumn : public SingleChunkColumnBase { return std::make_pair(res, valid_data_); } + std::pair, FixedVector> + ViewsByOffsets(const FixedVector& offsets) const { + std::vector res; + FixedVector valid; + res.reserve(offsets.size()); + valid.reserve(offsets.size()); + for (size_t i = 0; i < offsets.size(); ++i) { + res.emplace_back(RawAt(offsets[i])); + valid.emplace_back(IsValid(offsets[i])); + } + return {res, valid}; + } + [[nodiscard]] std::vector Views() const { std::vector res; diff --git a/internal/core/src/monitor/prometheus_client.cpp b/internal/core/src/monitor/prometheus_client.cpp index 213182ba3f063..2c13e67164407 100644 --- a/internal/core/src/monitor/prometheus_client.cpp +++ b/internal/core/src/monitor/prometheus_client.cpp @@ -177,6 +177,8 @@ std::map vectorLatencyLabels{ {"type", "vector_latency"}}; std::map groupbyLatencyLabels{ {"type", "groupby_latency"}}; +std::map postfilterLatencyLabels{ + {"type", "postfilter_latency"}}; std::map scalarProportionLabels{ {"type", "scalar_proportion"}}; DEFINE_PROMETHEUS_HISTOGRAM_FAMILY(internal_core_search_latency, @@ -190,6 +192,9 @@ DEFINE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_vector, DEFINE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_groupby, internal_core_search_latency, groupbyLatencyLabels) +DEFINE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_postfilter, + internal_core_search_latency, + postfilterLatencyLabels) DEFINE_PROMETHEUS_HISTOGRAM_WITH_BUCKETS( internal_core_search_latency_scalar_proportion, internal_core_search_latency, diff --git a/internal/core/src/monitor/prometheus_client.h b/internal/core/src/monitor/prometheus_client.h index 7bde7a422933c..da50506cbcaf1 100644 --- a/internal/core/src/monitor/prometheus_client.h +++ b/internal/core/src/monitor/prometheus_client.h @@ -133,6 +133,7 @@ DECLARE_PROMETHEUS_HISTOGRAM_FAMILY(internal_core_search_latency); DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_scalar); DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_vector); DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_groupby); +DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_postfilter); DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_scalar_proportion); } // namespace milvus::monitor diff --git a/internal/core/src/query/PlanProto.cpp b/internal/core/src/query/PlanProto.cpp index b3ddb01dc0f72..bcc09308b79c0 100644 --- a/internal/core/src/query/PlanProto.cpp +++ b/internal/core/src/query/PlanProto.cpp @@ -53,6 +53,12 @@ ProtoParser::PlanNodeFromProto(const planpb::PlanNode& plan_node_proto) { nlohmann::json::parse(query_info_proto.search_params()); search_info.materialized_view_involved = query_info_proto.materialized_view_involved(); + // currently, post filter does not support range search + if (search_info.search_params_.contains(POST_FILTER) && + !search_info.search_params_.contains(RADIUS)) { + search_info.post_filter_execution = + search_info.search_params_[POST_FILTER]; + } if (query_info_proto.bm25_avgdl() > 0) { search_info.search_params_[knowhere::meta::BM25_AVGDL] = @@ -95,33 +101,60 @@ ProtoParser::PlanNodeFromProto(const planpb::PlanNode& plan_node_proto) { milvus::plan::PlanNodePtr plannode; std::vector sources; if (anns_proto.has_predicates()) { - plannode = std::move(expr_parser()); - if (plan_node->search_info_.materialized_view_involved) { - const auto expr_info = plannode->GatherInfo(); - knowhere::MaterializedViewSearchInfo materialized_view_search_info; - for (const auto& [expr_field_id, vals] : - expr_info.field_id_to_values) { - materialized_view_search_info - .field_id_to_touched_categories_cnt[expr_field_id] = - vals.size(); + // currently limit post filter scope to search only + if (plan_node->search_info_.post_filter_execution && + plan_node->search_info_.group_by_field_id_ == std::nullopt) { + plannode = std::make_shared( + milvus::plan::GetNextPlanNodeId()); + sources = std::vector{plannode}; + plannode = std::make_shared( + milvus::plan::GetNextPlanNodeId(), sources); + sources = std::vector{plannode}; + + // add filter nodes after vector search node + auto expr = ParseExprs(anns_proto.predicates()); + plannode = std::make_shared( + milvus::plan::GetNextPlanNodeId(), expr, sources); + sources = std::vector{plannode}; + } else { + plannode = std::move(expr_parser()); + if (plan_node->search_info_.materialized_view_involved) { + const auto expr_info = plannode->GatherInfo(); + knowhere::MaterializedViewSearchInfo + materialized_view_search_info; + for (const auto& [expr_field_id, vals] : + expr_info.field_id_to_values) { + materialized_view_search_info + .field_id_to_touched_categories_cnt[expr_field_id] = + vals.size(); + } + materialized_view_search_info.is_pure_and = + expr_info.is_pure_and; + materialized_view_search_info.has_not = expr_info.has_not; + + plan_node->search_info_.search_params_ + [knowhere::meta::MATERIALIZED_VIEW_SEARCH_INFO] = + materialized_view_search_info; } - materialized_view_search_info.is_pure_and = expr_info.is_pure_and; - materialized_view_search_info.has_not = expr_info.has_not; + sources = std::vector{plannode}; + plannode = std::make_shared( + milvus::plan::GetNextPlanNodeId(), sources); + sources = std::vector{plannode}; - plan_node->search_info_ - .search_params_[knowhere::meta::MATERIALIZED_VIEW_SEARCH_INFO] = - materialized_view_search_info; + plannode = std::make_shared( + milvus::plan::GetNextPlanNodeId(), sources); + sources = std::vector{plannode}; } - sources = std::vector{plannode}; - } - plannode = std::make_shared( - milvus::plan::GetNextPlanNodeId(), sources); - sources = std::vector{plannode}; + } else { + plannode = std::make_shared( + milvus::plan::GetNextPlanNodeId(), sources); + sources = std::vector{plannode}; - plannode = std::make_shared( - milvus::plan::GetNextPlanNodeId(), sources); - sources = std::vector{plannode}; + plannode = std::make_shared( + milvus::plan::GetNextPlanNodeId(), sources); + sources = std::vector{plannode}; + } if (plan_node->search_info_.group_by_field_id_ != std::nullopt) { plannode = std::make_shared( diff --git a/internal/core/src/query/SearchBruteForce.cpp b/internal/core/src/query/SearchBruteForce.cpp index b5f112c5b9da3..eb344d17702fc 100644 --- a/internal/core/src/query/SearchBruteForce.cpp +++ b/internal/core/src/query/SearchBruteForce.cpp @@ -241,12 +241,11 @@ BruteForceSearchIterators(const dataset::SearchDataset& dataset, "equal to nq:{} for single chunk", iterators_val.value().size(), nq); - SubSearchResult subSearchResult(dataset.num_queries, - dataset.topk, - dataset.metric_type, - dataset.round_decimal, - iterators_val.value()); - return std::move(subSearchResult); + return SubSearchResult(dataset.num_queries, + dataset.topk, + dataset.metric_type, + dataset.round_decimal, + iterators_val.value()); } else { LOG_ERROR( "Failed to get valid knowhere brute-force-iterators from chunk, " diff --git a/internal/core/src/query/SearchOnGrowing.cpp b/internal/core/src/query/SearchOnGrowing.cpp index f71efb7562a6b..6ced1ff6b8c9f 100644 --- a/internal/core/src/query/SearchOnGrowing.cpp +++ b/internal/core/src/query/SearchOnGrowing.cpp @@ -16,6 +16,7 @@ #include "SearchOnGrowing.h" #include "query/SearchBruteForce.h" #include "query/SearchOnIndex.h" +#include "exec/operator/Utils.h" namespace milvus::query { @@ -124,7 +125,7 @@ SearchOnGrowing(const segcore::SegmentGrowingImpl& segment, auto size_per_chunk = element_end - element_begin; auto sub_view = bitset.subview(element_begin, size_per_chunk); - if (info.group_by_field_id_.has_value()) { + if (milvus::exec::UseVectorIterator(info)) { auto sub_qr = BruteForceSearchIterators(search_dataset, chunk_data, size_per_chunk, @@ -149,7 +150,7 @@ SearchOnGrowing(const segcore::SegmentGrowingImpl& segment, final_qr.merge(sub_qr); } } - if (info.group_by_field_id_.has_value()) { + if (milvus::exec::UseVectorIterator(info)) { std::vector chunk_rows(max_chunk, 0); for (int i = 1; i < max_chunk; ++i) { chunk_rows[i] = i * vec_size_per_chunk; diff --git a/internal/core/src/query/SearchOnIndex.cpp b/internal/core/src/query/SearchOnIndex.cpp index 0204f791ce217..8556340a168c0 100644 --- a/internal/core/src/query/SearchOnIndex.cpp +++ b/internal/core/src/query/SearchOnIndex.cpp @@ -10,7 +10,7 @@ // or implied. See the License for the specific language governing permissions and limitations under the License #include "SearchOnIndex.h" -#include "exec/operator/groupby/SearchGroupByOperator.h" +#include "exec/operator/Utils.h" namespace milvus::query { void diff --git a/internal/core/src/query/SearchOnSealed.cpp b/internal/core/src/query/SearchOnSealed.cpp index 2bd7e8edb8ac6..ed0b97708b17d 100644 --- a/internal/core/src/query/SearchOnSealed.cpp +++ b/internal/core/src/query/SearchOnSealed.cpp @@ -21,7 +21,7 @@ #include "query/SearchBruteForce.h" #include "query/SearchOnSealed.h" #include "query/helper.h" -#include "exec/operator/groupby/SearchGroupByOperator.h" +#include "exec/operator/Utils.h" namespace milvus::query { @@ -132,7 +132,7 @@ SearchOnSealed(const Schema& schema, } BitsetView bitset_view(bitset_ptr, chunk_size); - if (search_info.group_by_field_id_.has_value()) { + if (milvus::exec::UseVectorIterator(search_info)) { auto sub_qr = BruteForceSearchIterators(dataset, vec_data, chunk_size, @@ -160,7 +160,7 @@ SearchOnSealed(const Schema& schema, } offset += chunk_size; } - if (search_info.group_by_field_id_.has_value()) { + if (milvus::exec::UseVectorIterator(search_info)) { result.AssembleChunkVectorIterators(num_queries, num_chunk, column->GetNumRowsUntilChunk(), @@ -199,7 +199,7 @@ SearchOnSealed(const Schema& schema, auto data_type = field.get_data_type(); CheckBruteForceSearchParam(field, search_info); - if (search_info.group_by_field_id_.has_value()) { + if (milvus::exec::UseVectorIterator(search_info)) { auto sub_qr = BruteForceSearchIterators( dataset, vec_data, row_count, search_info, bitset, data_type); result.AssembleChunkVectorIterators( diff --git a/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp b/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp index b3719ed38df2b..42d9dfc226f30 100644 --- a/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp +++ b/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp @@ -825,6 +825,23 @@ ChunkedSegmentSealedImpl::chunk_view_impl(FieldId field_id, "chunk_view_impl only used for variable column field "); } +std::pair, FixedVector> +ChunkedSegmentSealedImpl::chunk_view_by_offsets( + FieldId field_id, + int64_t chunk_id, + const FixedVector& offsets) const { + std::shared_lock lck(mutex_); + AssertInfo(get_bit(field_data_ready_bitset_, field_id), + "Can't get bitset element at " + std::to_string(field_id.get())); + auto& field_meta = schema_->operator[](field_id); + if (auto it = fields_.find(field_id); it != fields_.end()) { + auto& field_data = it->second; + return field_data->ViewsByOffsets(chunk_id, offsets); + } + PanicInfo(ErrorCode::UnexpectedError, + "chunk_view_by_offsets only used for variable column field "); +} + const index::IndexBase* ChunkedSegmentSealedImpl::chunk_index_impl(FieldId field_id, int64_t chunk_id) const { diff --git a/internal/core/src/segcore/ChunkedSegmentSealedImpl.h b/internal/core/src/segcore/ChunkedSegmentSealedImpl.h index 60314f019ec26..1e1fd42475c4a 100644 --- a/internal/core/src/segcore/ChunkedSegmentSealedImpl.h +++ b/internal/core/src/segcore/ChunkedSegmentSealedImpl.h @@ -206,6 +206,11 @@ class ChunkedSegmentSealedImpl : public SegmentSealed { std::pair, FixedVector> chunk_view_impl(FieldId field_id, int64_t chunk_id) const override; + std::pair, FixedVector> + chunk_view_by_offsets(FieldId field_id, + int64_t chunk_id, + const FixedVector& offsets) const override; + std::pair> get_chunk_buffer(FieldId field_id, int64_t chunk_id, diff --git a/internal/core/src/segcore/SegmentGrowingImpl.cpp b/internal/core/src/segcore/SegmentGrowingImpl.cpp index bacfdab588774..2f2fe76123857 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.cpp +++ b/internal/core/src/segcore/SegmentGrowingImpl.cpp @@ -398,6 +398,15 @@ SegmentGrowingImpl::chunk_view_impl(FieldId field_id, int64_t chunk_id) const { "chunk view impl not implement for growing segment"); } +std::pair, FixedVector> +SegmentGrowingImpl::chunk_view_by_offsets( + FieldId field_id, + int64_t chunk_id, + const FixedVector& offsets) const { + PanicInfo(ErrorCode::NotImplemented, + "chunk view by offsets not implemented for growing segment"); +} + int64_t SegmentGrowingImpl::num_chunk(FieldId field_id) const { auto size = get_insert_record().ack_responder_.GetAck(); diff --git a/internal/core/src/segcore/SegmentGrowingImpl.h b/internal/core/src/segcore/SegmentGrowingImpl.h index f90bba0f5df1d..940b82dd9d390 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.h +++ b/internal/core/src/segcore/SegmentGrowingImpl.h @@ -344,6 +344,11 @@ class SegmentGrowingImpl : public SegmentGrowing { std::pair, FixedVector> chunk_view_impl(FieldId field_id, int64_t chunk_id) const override; + std::pair, FixedVector> + chunk_view_by_offsets(FieldId field_id, + int64_t chunk_id, + const FixedVector& offsets) const override; + std::pair> get_chunk_buffer(FieldId field_id, int64_t chunk_id, diff --git a/internal/core/src/segcore/SegmentInterface.h b/internal/core/src/segcore/SegmentInterface.h index fecb45fec6641..33ec4196e05b9 100644 --- a/internal/core/src/segcore/SegmentInterface.h +++ b/internal/core/src/segcore/SegmentInterface.h @@ -200,6 +200,28 @@ class SegmentInternalInterface : public SegmentInterface { return std::make_pair(res, chunk_info.second); } + template + std::pair, FixedVector> + get_views_by_offsets(FieldId field_id, + int64_t chunk_id, + const FixedVector& offsets) const { + if (this->type() == SegmentType::Growing) { + PanicInfo(ErrorCode::Unsupported, + "get chunk views not supported for growing segment"); + } + auto chunk_view = chunk_view_by_offsets(field_id, chunk_id, offsets); + if constexpr (std::is_same_v) { + return chunk_view; + } else { + std::vector res; + res.reserve(chunk_view.first.size()); + for (const auto& view : chunk_view.first) { + res.emplace_back(view); + } + return {res, chunk_view.second}; + } + } + template const index::ScalarIndex& chunk_scalar_index(FieldId field_id, int64_t chunk_id) const { @@ -407,6 +429,11 @@ class SegmentInternalInterface : public SegmentInterface { int64_t start_offset, int64_t length) const = 0; + virtual std::pair, FixedVector> + chunk_view_by_offsets(FieldId field_id, + int64_t chunk_id, + const FixedVector& offsets) const = 0; + // internal API: return chunk_index in span, support scalar index only virtual const index::IndexBase* chunk_index_impl(FieldId field_id, int64_t chunk_id) const = 0; diff --git a/internal/core/src/segcore/SegmentSealedImpl.cpp b/internal/core/src/segcore/SegmentSealedImpl.cpp index be2b2635e2917..0352e983aa6b8 100644 --- a/internal/core/src/segcore/SegmentSealedImpl.cpp +++ b/internal/core/src/segcore/SegmentSealedImpl.cpp @@ -796,6 +796,23 @@ SegmentSealedImpl::chunk_view_impl(FieldId field_id, int64_t chunk_id) const { "chunk_view_impl only used for variable column field "); } +std::pair, FixedVector> +SegmentSealedImpl::chunk_view_by_offsets( + FieldId field_id, + int64_t chunk_id, + const FixedVector& offsets) const { + std::shared_lock lck(mutex_); + AssertInfo(get_bit(field_data_ready_bitset_, field_id), + "Can't get bitset element at " + std::to_string(field_id.get())); + auto& field_meta = schema_->operator[](field_id); + if (auto it = fields_.find(field_id); it != fields_.end()) { + auto& field_data = it->second; + return field_data->ViewsByOffsets(offsets); + } + PanicInfo(ErrorCode::UnexpectedError, + "chunk_view_by_offsets only used for variable column field "); +} + const index::IndexBase* SegmentSealedImpl::chunk_index_impl(FieldId field_id, int64_t chunk_id) const { AssertInfo(scalar_indexings_.find(field_id) != scalar_indexings_.end(), diff --git a/internal/core/src/segcore/SegmentSealedImpl.h b/internal/core/src/segcore/SegmentSealedImpl.h index 520e82ec0d34f..cd577c0e20506 100644 --- a/internal/core/src/segcore/SegmentSealedImpl.h +++ b/internal/core/src/segcore/SegmentSealedImpl.h @@ -212,6 +212,11 @@ class SegmentSealedImpl : public SegmentSealed { std::pair, FixedVector> chunk_view_impl(FieldId field_id, int64_t chunk_id) const override; + std::pair, FixedVector> + chunk_view_by_offsets(FieldId field_id, + int64_t chunk_id, + const FixedVector& offsets) const override; + std::pair> get_chunk_buffer(FieldId field_id, int64_t chunk_id, diff --git a/internal/core/unittest/CMakeLists.txt b/internal/core/unittest/CMakeLists.txt index 67d97b83c3fff..fc5994b836601 100644 --- a/internal/core/unittest/CMakeLists.txt +++ b/internal/core/unittest/CMakeLists.txt @@ -48,9 +48,9 @@ set(MILVUS_TEST_FILES test_expr.cpp test_expr_materialized_view.cpp test_float16.cpp - test_function.cpp test_futures.cpp test_group_by.cpp + test_post_filter.cpp test_growing.cpp test_growing_index.cpp test_hybrid_index.cpp diff --git a/internal/core/unittest/test_always_true_expr.cpp b/internal/core/unittest/test_always_true_expr.cpp index 2d54525e8a306..6ed1746146448 100644 --- a/internal/core/unittest/test_always_true_expr.cpp +++ b/internal/core/unittest/test_always_true_expr.cpp @@ -67,10 +67,29 @@ TEST_P(ExprAlwaysTrueTest, AlwaysTrue) { final = ExecuteQueryExpr(plan, seg_promote, N * num_iters, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res(plan->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto val = age_col[i]; ASSERT_EQ(ans, true) << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], true) << "@" << i << "!!" << val; + } } } diff --git a/internal/core/unittest/test_array_expr.cpp b/internal/core/unittest/test_array_expr.cpp index 14b42521891ee..87f83261dbff5 100644 --- a/internal/core/unittest/test_array_expr.cpp +++ b/internal/core/unittest/test_array_expr.cpp @@ -27,6 +27,7 @@ #include "segcore/SegmentGrowingImpl.h" #include "simdjson/padded_string.h" #include "test_utils/DataGen.h" +#include "test_utils/GenExprProto.h" using namespace milvus; using namespace milvus::query; @@ -611,11 +612,31 @@ TEST(Expr, TestArrayRange) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Array(array_cols[array_type][i]); auto ref = ref_func(array); ASSERT_EQ(ans, ref); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref); + } } } } @@ -728,6 +749,23 @@ TEST(Expr, TestArrayEqual) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Array(long_array_col[i]); @@ -737,6 +775,9 @@ TEST(Expr, TestArrayEqual) { } auto ref = ref_func(array_values); ASSERT_EQ(ans, ref); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref); + } } } } @@ -927,6 +968,19 @@ TEST(Expr, TestArrayContains) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Array(array_cols["bool"][i]); @@ -935,6 +989,9 @@ TEST(Expr, TestArrayContains) { res.push_back(array.get_data(j)); } ASSERT_EQ(ans, check(res)) << "@" << i; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)) << "@" << i; + } } } @@ -982,6 +1039,19 @@ TEST(Expr, TestArrayContains) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Array(array_cols["double"][i]); @@ -990,6 +1060,9 @@ TEST(Expr, TestArrayContains) { res.push_back(array.get_data(j)); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } @@ -1027,6 +1100,19 @@ TEST(Expr, TestArrayContains) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Array(array_cols["float"][i]); @@ -1035,6 +1121,9 @@ TEST(Expr, TestArrayContains) { res.push_back(array.get_data(j)); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } @@ -1082,6 +1171,19 @@ TEST(Expr, TestArrayContains) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Array(array_cols["int"][i]); @@ -1090,6 +1192,9 @@ TEST(Expr, TestArrayContains) { res.push_back(array.get_data(j)); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } @@ -1128,6 +1233,19 @@ TEST(Expr, TestArrayContains) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Array(array_cols["long"][i]); @@ -1136,6 +1254,9 @@ TEST(Expr, TestArrayContains) { res.push_back(array.get_data(j)); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } @@ -1181,6 +1302,19 @@ TEST(Expr, TestArrayContains) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Array(array_cols["string"][i]); @@ -1189,6 +1323,9 @@ TEST(Expr, TestArrayContains) { res.push_back(array.get_data(j)); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } } @@ -2127,11 +2264,31 @@ TEST(Expr, TestArrayBinaryArith) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Array(array_cols[array_type][i]); auto ref = ref_func(array); ASSERT_EQ(ans, ref); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref); + } } } } @@ -2217,10 +2374,26 @@ TEST(Expr, TestArrayStringMatch) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Array(array_cols["string"][i]); ASSERT_EQ(ans, testcase.check_func(array)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], testcase.check_func(array)); + } } } } @@ -2420,10 +2593,30 @@ TEST(Expr, TestArrayInTerm) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Array(array_cols[array_type][i]); ASSERT_EQ(ans, ref_func(array)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref_func(array)); + } } } } @@ -2510,10 +2703,26 @@ TEST(Expr, TestTermInArray) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Array(array_cols["long"][i]); ASSERT_EQ(ans, testcase.check_func(array)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], testcase.check_func(array)); + } } } } diff --git a/internal/core/unittest/test_expr.cpp b/internal/core/unittest/test_expr.cpp index 8fbf60592bd28..ca6321d22db73 100644 --- a/internal/core/unittest/test_expr.cpp +++ b/internal/core/unittest/test_expr.cpp @@ -31,6 +31,7 @@ #include "simdjson/padded_string.h" #include "segcore/segment_c.h" #include "test_utils/DataGen.h" +#include "test_utils/GenExprProto.h" #include "index/IndexFactory.h" #include "exec/expression/Expr.h" #include "exec/Task.h" @@ -369,7 +370,22 @@ TEST_P(ExprTest, TestRange) { seg_promote, N * num_iters, MAX_TIMESTAMP); + + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + for (auto i = 0; i < num_iters; ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(final.size(), N * num_iters); + EXPECT_EQ(view.size(), num_iters); for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; @@ -377,6 +393,10 @@ TEST_P(ExprTest, TestRange) { auto val = age_col[i]; auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + + if (i < num_iters) { + ASSERT_EQ(view[i], ref) << clause << "@" << i << "!!" << val; + } } } } @@ -731,7 +751,24 @@ TEST_P(ExprTest, TestRangeNullable) { seg_promote, N * num_iters, MAX_TIMESTAMP); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(final.size(), N * num_iters); + EXPECT_EQ(view.size(), int(N * num_iters / 2)); for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; @@ -741,6 +778,10 @@ TEST_P(ExprTest, TestRangeNullable) { auto ref = ref_func(val, valid_data); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val << "!!" << valid_data; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val << "!!" << valid_data; + } } } } @@ -817,6 +858,22 @@ TEST_P(ExprTest, TestBinaryRangeJSON) { plannode, seg_promote, N * num_iters, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res(plannode.get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; @@ -828,6 +885,11 @@ TEST_P(ExprTest, TestBinaryRangeJSON) { ASSERT_EQ(ans, ref) << val << testcase.lower_inclusive << testcase.lower << testcase.upper_inclusive << testcase.upper; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << val << testcase.lower_inclusive << testcase.lower + << testcase.upper_inclusive << testcase.upper; + } } else { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .template at(pointer) @@ -836,6 +898,11 @@ TEST_P(ExprTest, TestBinaryRangeJSON) { ASSERT_EQ(ans, ref) << val << testcase.lower_inclusive << testcase.lower << testcase.upper_inclusive << testcase.upper; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << val << testcase.lower_inclusive << testcase.lower + << testcase.upper_inclusive << testcase.upper; + } } } } @@ -920,6 +987,22 @@ TEST_P(ExprTest, TestBinaryRangeJSONNullable) { plannode, seg_promote, N * num_iters, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res(plannode.get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; @@ -931,6 +1014,11 @@ TEST_P(ExprTest, TestBinaryRangeJSONNullable) { ASSERT_EQ(ans, ref) << val << testcase.lower_inclusive << testcase.lower << testcase.upper_inclusive << testcase.upper; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << val << testcase.lower_inclusive << testcase.lower + << testcase.upper_inclusive << testcase.upper; + } } else { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .template at(pointer) @@ -939,6 +1027,11 @@ TEST_P(ExprTest, TestBinaryRangeJSONNullable) { ASSERT_EQ(ans, ref) << val << testcase.lower_inclusive << testcase.lower << testcase.upper_inclusive << testcase.upper; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << val << testcase.lower_inclusive << testcase.lower + << testcase.upper_inclusive << testcase.upper; + } } } } @@ -993,12 +1086,28 @@ TEST_P(ExprTest, TestExistsJson) { plannode, seg_promote, N * num_iters, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res(plannode.get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto val = milvus::Json(simdjson::padded_string(json_col[i])) .exist(pointer); auto ref = check(val); ASSERT_EQ(ans, ref); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], ref); + } } } } @@ -1059,12 +1168,28 @@ TEST_P(ExprTest, TestExistsJsonNullable) { plannode, seg_promote, N * num_iters, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res(plannode.get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto val = milvus::Json(simdjson::padded_string(json_col[i])) .exist(pointer); auto ref = check(val, valid_data_col[i]); ASSERT_EQ(ans, ref); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], ref); + } } } } @@ -1197,6 +1322,22 @@ TEST_P(ExprTest, TestUnaryRangeJson) { plan, seg_promote, N * num_iters, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res(plan.get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (testcase.nested_path[0] == "int") { @@ -1206,6 +1347,9 @@ TEST_P(ExprTest, TestUnaryRangeJson) { .value(); auto ref = f(val); ASSERT_EQ(ans, ref); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref); + } } else { auto val = milvus::Json(simdjson::padded_string(json_col[i])) @@ -1213,6 +1357,9 @@ TEST_P(ExprTest, TestUnaryRangeJson) { .value(); auto ref = f(val); ASSERT_EQ(ans, ref); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref); + } } } } @@ -1260,10 +1407,29 @@ TEST_P(ExprTest, TestUnaryRangeJson) { plan, seg_promote, N * num_iters, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res(plan.get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto ref = check(op); ASSERT_EQ(ans, ref) << "@" << i << "op" << op; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) << "@" << i << "op" << op; + } } } } @@ -1401,6 +1567,22 @@ TEST_P(ExprTest, TestUnaryRangeJsonNullable) { plan, seg_promote, N * num_iters, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res(plan.get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (testcase.nested_path[0] == "int") { @@ -1410,6 +1592,9 @@ TEST_P(ExprTest, TestUnaryRangeJsonNullable) { .value(); auto ref = f(val, valid_data_col[i]); ASSERT_EQ(ans, ref); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref); + } } else { auto val = milvus::Json(simdjson::padded_string(json_col[i])) @@ -1417,6 +1602,9 @@ TEST_P(ExprTest, TestUnaryRangeJsonNullable) { .value(); auto ref = f(val, valid_data_col[i]); ASSERT_EQ(ans, ref); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref); + } } } } @@ -1537,6 +1725,19 @@ TEST_P(ExprTest, TestTermJson) { ExecuteQueryExpr(plan, seg_promote, N * num_iters, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto val = milvus::Json(simdjson::padded_string(json_col[i])) @@ -1544,6 +1745,9 @@ TEST_P(ExprTest, TestTermJson) { .value(); auto ref = check(val); ASSERT_EQ(ans, ref); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref); + } } } } @@ -1616,6 +1820,20 @@ TEST_P(ExprTest, TestTermJsonNullable) { final = ExecuteQueryExpr(plan, seg_promote, N * num_iters, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto val = milvus::Json(simdjson::padded_string(json_col[i])) @@ -1623,6 +1841,9 @@ TEST_P(ExprTest, TestTermJsonNullable) { .value(); auto ref = check(val, valid_data_col[i]); ASSERT_EQ(ans, ref); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref); + } } } } @@ -1713,12 +1934,29 @@ TEST_P(ExprTest, TestTerm) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto val = age_col[i]; auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], ref) << clause << "@" << i << "!!" << val; + } } } } @@ -1846,12 +2084,29 @@ TEST_P(ExprTest, TestTermNullable) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto val = nullable_col[i]; auto ref = ref_func(val, valid_data_col[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], ref) << clause << "@" << i << "!!" << val; + } } } } @@ -1955,11 +2210,32 @@ TEST_P(ExprTest, TestCall) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; ASSERT_EQ(ans, ref_func(address_col[i])) << "@" << i << "!!" << address_col[i]; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref_func(address_col[i])) + << "@" << i << "!!" << address_col[i]; + } } } @@ -2103,6 +2379,23 @@ TEST_P(ExprTest, TestCompare) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; @@ -2111,6 +2404,11 @@ TEST_P(ExprTest, TestCompare) { auto ref = ref_func(val1, val2); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << boost::format("[%1%, %2%]") % val1 % val2; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" + << boost::format("[%1%, %2%]") % val1 % val2; + } } } } @@ -2235,6 +2533,23 @@ TEST_P(ExprTest, TestCompareNullable) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; @@ -2243,6 +2558,11 @@ TEST_P(ExprTest, TestCompareNullable) { auto ref = ref_func(val1, val2, valid_data_col[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << boost::format("[%1%, %2%]") % val1 % val2; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" + << boost::format("[%1%, %2%]") % val1 % val2; + } } } } @@ -2367,6 +2687,23 @@ TEST_P(ExprTest, TestCompareNullable2) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; @@ -2375,6 +2712,11 @@ TEST_P(ExprTest, TestCompareNullable2) { auto ref = ref_func(val1, val2, valid_data_col[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << boost::format("[%1%, %2%]") % val1 % val2; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" + << boost::format("[%1%, %2%]") % val1 % val2; + } } } } @@ -2464,6 +2806,23 @@ TEST_P(ExprTest, TestCompareWithScalarIndex) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N / 2); + for (auto i = 0; i < N; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg.get(), + N, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N / 2); + for (int i = 0; i < N; ++i) { auto ans = final[i]; auto val1 = age32_col[i]; @@ -2471,6 +2830,11 @@ TEST_P(ExprTest, TestCompareWithScalarIndex) { auto ref = ref_func(val1, val2); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << boost::format("[%1%, %2%]") % val1 % val2; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" + << boost::format("[%1%, %2%]") % val1 % val2; + } } } } @@ -2600,6 +2964,23 @@ TEST_P(ExprTest, TestCompareWithScalarIndexNullable) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N / 2); + for (auto i = 0; i < N; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg.get(), + N, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N / 2); + for (int i = 0; i < N; ++i) { auto ans = final[i]; auto val1 = nullable_col[i]; @@ -2607,6 +2988,11 @@ TEST_P(ExprTest, TestCompareWithScalarIndexNullable) { auto ref = ref_func(val1, val2, valid_data_col[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << boost::format("[%1%, %2%]") % val1 % val2; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" + << boost::format("[%1%, %2%]") % val1 % val2; + } } } } @@ -2736,6 +3122,23 @@ TEST_P(ExprTest, TestCompareWithScalarIndexNullable2) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N / 2); + for (auto i = 0; i < N; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg.get(), + N, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N / 2); + for (int i = 0; i < N; ++i) { auto ans = final[i]; auto val2 = nullable_col[i]; @@ -2743,6 +3146,11 @@ TEST_P(ExprTest, TestCompareWithScalarIndexNullable2) { auto ref = ref_func(val1, val2, valid_data_col[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << boost::format("[%1%, %2%]") % val1 % val2; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" + << boost::format("[%1%, %2%]") % val1 % val2; + } } } } @@ -2807,8 +3215,25 @@ TEST_P(ExprTest, test_term_pk_with_sorted) { plan = std::make_shared(DEFAULT_PLANNODE_ID, expr); final = ExecuteQueryExpr(plan, seg.get(), N, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N); + + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N / 2); + for (auto i = 0; i < N; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg.get(), N, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N / 2); + for (int i = 0; i < N; ++i) { EXPECT_EQ(final[i], false); + if (i % 2 == 0) { + EXPECT_EQ(view[int(i / 2)], false); + } } } @@ -3890,9 +4315,26 @@ TEST(Expr, TestExprNOT) { auto start = std::chrono::steady_clock::now(); final = ExecuteQueryExpr(plan, seg.get(), N, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N); + + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N / 2); + for (auto i = 0; i < N; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg.get(), N, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N / 2); + for (int i = 0; i < N; i++) { if (!valid_data[i]) { EXPECT_EQ(final[i], false); + if (i % 2 == 0) { + EXPECT_EQ(view[int(i / 2)], false); + } } } }; @@ -4060,8 +4502,25 @@ TEST_P(ExprTest, test_term_pk) { plan = std::make_shared(DEFAULT_PLANNODE_ID, expr); final = ExecuteQueryExpr(plan, seg.get(), N, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N); + + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N / 2); + for (auto i = 0; i < N; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg.get(), N, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N / 2); + for (int i = 0; i < N; ++i) { EXPECT_EQ(final[i], false); + if (i % 2 == 0) { + EXPECT_EQ(view[int(i / 2)], false); + } } } @@ -4181,8 +4640,25 @@ TEST_P(ExprTest, TestConjuctExpr) { std::make_shared(DEFAULT_PLANNODE_ID, expr); BitsetType final; final = ExecuteQueryExpr(plan, seg.get(), N, MAX_TIMESTAMP); + + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N / 2); + for (auto i = 0; i < N; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg.get(), N, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N / 2); for (int i = 0; i < N; ++i) { EXPECT_EQ(final[i], pair.first < i && i < pair.second) << i; + if (i % 2 == 0) { + EXPECT_EQ(view[int(i / 2)], pair.first < i && i < pair.second) + << i; + } } } } @@ -4252,8 +4728,25 @@ TEST_P(ExprTest, TestConjuctExprNullable) { std::make_shared(DEFAULT_PLANNODE_ID, expr); BitsetType final; final = ExecuteQueryExpr(plan, seg.get(), N, MAX_TIMESTAMP); + + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N / 2); + for (auto i = 0; i < N; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg.get(), N, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N / 2); for (int i = 0; i < N; ++i) { EXPECT_EQ(final[i], pair.first < i && i < pair.second) << i; + if (i % 2 == 0) { + EXPECT_EQ(view[int(i / 2)], pair.first < i && i < pair.second) + << i; + } } } } @@ -4977,6 +5470,23 @@ TEST_P(ExprTest, TestCompareWithScalarIndexMaris) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N / 2); + for (auto i = 0; i < N; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg.get(), + N, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N / 2); + for (int i = 0; i < N; ++i) { auto ans = final[i]; auto val1 = str1_col[i]; @@ -4984,6 +5494,11 @@ TEST_P(ExprTest, TestCompareWithScalarIndexMaris) { auto ref = ref_func(val1, val2); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << boost::format("[%1%, %2%]") % val1 % val2; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" + << boost::format("[%1%, %2%]") % val1 % val2; + } } } } @@ -5108,6 +5623,23 @@ TEST_P(ExprTest, TestCompareWithScalarIndexMarisNullable) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N / 2); + for (auto i = 0; i < N; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg.get(), + N, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N / 2); + for (int i = 0; i < N; ++i) { auto ans = final[i]; auto val1 = str1_col[i]; @@ -5115,6 +5647,11 @@ TEST_P(ExprTest, TestCompareWithScalarIndexMarisNullable) { auto ref = ref_func(val1, val2, valid_data_col[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << boost::format("[%1%, %2%]") % val1 % val2; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" + << boost::format("[%1%, %2%]") % val1 % val2; + } } } } @@ -5239,6 +5776,23 @@ TEST_P(ExprTest, TestCompareWithScalarIndexMarisNullable2) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N / 2); + for (auto i = 0; i < N; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg.get(), + N, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N / 2); + for (int i = 0; i < N; ++i) { auto ans = final[i]; auto val1 = nullable_col[i]; @@ -5246,6 +5800,11 @@ TEST_P(ExprTest, TestCompareWithScalarIndexMarisNullable2) { auto ref = ref_func(val1, val2, valid_data_col[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << boost::format("[%1%, %2%]") % val1 % val2; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" + << boost::format("[%1%, %2%]") % val1 % val2; + } } } } @@ -5951,6 +6510,23 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRange) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (dtype == DataType::INT8) { @@ -5958,26 +6534,50 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRange) { auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val << std::endl; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val << std::endl; + } } else if (dtype == DataType::INT16) { auto val = age16_col[i]; auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::INT32) { auto val = age32_col[i]; auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::INT64) { auto val = age64_col[i]; auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::FLOAT) { auto val = age_float_col[i]; auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::DOUBLE) { auto val = age_double_col[i]; auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else { ASSERT_TRUE(false) << "No test case defined for this data type"; } @@ -6890,6 +7490,23 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeNullable) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (dtype == DataType::INT8) { @@ -6897,26 +7514,50 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeNullable) { auto ref = ref_func(val, age8_valid_col[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val << std::endl; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val << std::endl; + } } else if (dtype == DataType::INT16) { auto val = age16_col[i]; auto ref = ref_func(val, age16_valid_col[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::INT32) { auto val = age32_col[i]; auto ref = ref_func(val, age32_valid_col[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::INT64) { auto val = age64_col[i]; auto ref = ref_func(val, age64_valid_col[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::FLOAT) { auto val = age_float_col[i]; auto ref = ref_func(val, age_float_valid_col[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::DOUBLE) { auto val = age_double_col[i]; auto ref = ref_func(val, age_double_valid_col[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else { ASSERT_TRUE(false) << "No test case defined for this data type"; } @@ -7728,11 +8369,32 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeJSON) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto ref = ref_func(milvus::Json(simdjson::padded_string(json_col[i]))); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << json_col[i]; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << json_col[i]; + } } } } @@ -8652,12 +9314,33 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeJSONNullable) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto ref = ref_func(milvus::Json(simdjson::padded_string(json_col[i])), valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << json_col[i]; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << json_col[i]; + } } } } @@ -8731,6 +9414,16 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeJSONFloat) { ExecuteQueryExpr(plan, seg_promote, N * num_iters, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; @@ -8740,6 +9433,10 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeJSONFloat) { auto ref = check(val); ASSERT_EQ(ans, ref) << testcase.value << " " << val << " " << testcase.op; + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], ref) + << testcase.value << " " << val << " " << testcase.op; + } } } @@ -8774,6 +9471,16 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeJSONFloat) { ExecuteQueryExpr(plan, seg_promote, N * num_iters, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; @@ -8786,6 +9493,10 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeJSONFloat) { } auto ref = check(array_length); ASSERT_EQ(ans, ref) << testcase.value << " " << array_length; + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], ref) + << testcase.value << " " << array_length; + } } } } @@ -8864,6 +9575,16 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeJSONFloatNullable) { ExecuteQueryExpr(plan, seg_promote, N * num_iters, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; @@ -8873,6 +9594,10 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeJSONFloatNullable) { auto ref = check(val, valid_data[i]); ASSERT_EQ(ans, ref) << testcase.value << " " << val << " " << testcase.op; + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], ref) + << testcase.value << " " << val << " " << testcase.op; + } } } @@ -8910,6 +9635,16 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeJSONFloatNullable) { ExecuteQueryExpr(plan, seg_promote, N * num_iters, MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; @@ -8922,6 +9657,10 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeJSONFloatNullable) { } auto ref = check(array_length, valid_data[i]); ASSERT_EQ(ans, ref) << testcase.value << " " << array_length; + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], ref) + << testcase.value << " " << array_length; + } } } } @@ -9412,32 +10151,73 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeWithScalarSortIndex) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N / 2); + for (auto i = 0; i < N; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N / 2); + for (int i = 0; i < N; ++i) { auto ans = final[i]; if (dtype == DataType::INT8) { auto val = age8_col[i]; auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::INT16) { auto val = age16_col[i]; auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::INT32) { auto val = age32_col[i]; auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::INT64) { auto val = age64_col[i]; auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::FLOAT) { auto val = age_float_col[i]; auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::DOUBLE) { auto val = age_double_col[i]; auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else { ASSERT_TRUE(false) << "No test case defined for this data type"; } @@ -10116,32 +10896,70 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeWithScalarSortIndexNullable) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + for (auto i = 0; i < std::min(N, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N, 10)); + for (int i = 0; i < N; ++i) { auto ans = final[i]; if (dtype == DataType::INT8) { auto val = age8_col[i]; auto ref = ref_func(val, i8_valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i < std::min(N, 10)) { + ASSERT_EQ(view[i], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::INT16) { auto val = age16_col[i]; auto ref = ref_func(val, i16_valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i < std::min(N, 10)) { + ASSERT_EQ(view[i], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::INT32) { auto val = age32_col[i]; auto ref = ref_func(val, i32_valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i < std::min(N, 10)) { + ASSERT_EQ(view[i], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::INT64) { auto val = age64_col[i]; auto ref = ref_func(val, i64_valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i < std::min(N, 10)) { + ASSERT_EQ(view[i], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::FLOAT) { auto val = age_float_col[i]; auto ref = ref_func(val, float_valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i < std::min(N, 10)) { + ASSERT_EQ(view[i], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::DOUBLE) { auto val = age_double_col[i]; auto ref = ref_func(val, double_valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i < std::min(N, 10)) { + ASSERT_EQ(view[i], ref) + << clause << "@" << i << "!!" << val; + } } else { ASSERT_TRUE(false) << "No test case defined for this data type"; } @@ -10316,6 +11134,20 @@ TEST_P(ExprTest, TestUnaryRangeWithJSON) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (dtype == DataType::BOOL) { @@ -10324,24 +11156,40 @@ TEST_P(ExprTest, TestUnaryRangeWithJSON) { .value(); auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::INT64) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .template at("/int") .value(); auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::DOUBLE) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .template at("/double") .value(); auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::STRING) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .template at("/string") .value(); auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], ref) + << clause << "@" << i << "!!" << val; + } } else { ASSERT_TRUE(false) << "No test case defined for this data type"; } @@ -10546,6 +11394,20 @@ TEST_P(ExprTest, TestUnaryRangeWithJSONNullable) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (dtype == DataType::BOOL) { @@ -10554,24 +11416,40 @@ TEST_P(ExprTest, TestUnaryRangeWithJSONNullable) { .value(); auto ref = ref_func(val, valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::INT64) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .template at("/int") .value(); auto ref = ref_func(val, valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::DOUBLE) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .template at("/double") .value(); auto ref = ref_func(val, valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::STRING) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .template at("/string") .value(); auto ref = ref_func(val, valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], ref) + << clause << "@" << i << "!!" << val; + } } else { ASSERT_TRUE(false) << "No test case defined for this data type"; } @@ -10724,6 +11602,23 @@ TEST_P(ExprTest, TestTermWithJSON) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (dtype == DataType::BOOL) { @@ -10732,24 +11627,40 @@ TEST_P(ExprTest, TestTermWithJSON) { .value(); auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::INT64) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .template at("/int") .value(); auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::DOUBLE) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .template at("/double") .value(); auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::STRING) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .template at("/string") .value(); auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else { ASSERT_TRUE(false) << "No test case defined for this data type"; } @@ -10924,6 +11835,23 @@ TEST_P(ExprTest, TestTermWithJSONNullable) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (dtype == DataType::BOOL) { @@ -10932,24 +11860,40 @@ TEST_P(ExprTest, TestTermWithJSONNullable) { .value(); auto ref = ref_func(val, valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::INT64) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .template at("/int") .value(); auto ref = ref_func(val, valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::DOUBLE) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .template at("/double") .value(); auto ref = ref_func(val, valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::STRING) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .template at("/string") .value(); auto ref = ref_func(val, valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else { ASSERT_TRUE(false) << "No test case defined for this data type"; } @@ -11076,6 +12020,23 @@ TEST_P(ExprTest, TestExistsWithJSON) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (dtype == DataType::BOOL) { @@ -11083,26 +12044,46 @@ TEST_P(ExprTest, TestExistsWithJSON) { .exist("/bool"); auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::INT64) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .exist("/int"); auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::DOUBLE) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .exist("/double"); auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::STRING) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .exist("/string"); auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::VARCHAR) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .exist("/varchar"); auto ref = ref_func(val); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else { ASSERT_TRUE(false) << "No test case defined for this data type"; } @@ -11267,6 +12248,23 @@ TEST_P(ExprTest, TestExistsWithJSONNullable) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (dtype == DataType::BOOL) { @@ -11274,26 +12272,46 @@ TEST_P(ExprTest, TestExistsWithJSONNullable) { .exist("/bool"); auto ref = ref_func(val, valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::INT64) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .exist("/int"); auto ref = ref_func(val, valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::DOUBLE) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .exist("/double"); auto ref = ref_func(val, valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::STRING) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .exist("/string"); auto ref = ref_func(val, valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else if (dtype == DataType::VARCHAR) { auto val = milvus::Json(simdjson::padded_string(json_col[i])) .exist("/varchar"); auto ref = ref_func(val, valid_data[i]); ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << clause << "@" << i << "!!" << val; + } } else { ASSERT_TRUE(false) << "No test case defined for this data type"; } @@ -11361,13 +12379,21 @@ TEST_P(ExprTest, TestTermInFieldJson) { auto start = std::chrono::steady_clock::now(); final = ExecuteQueryExpr(plan, seg_promote, N * num_iters, MAX_TIMESTAMP); - // std::cout << "cost" - // << std::chrono::duration_cast( - // std::chrono::steady_clock::now() - start) - // .count() - // << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -11377,6 +12403,9 @@ TEST_P(ExprTest, TestTermInFieldJson) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } @@ -11417,6 +12446,19 @@ TEST_P(ExprTest, TestTermInFieldJson) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -11426,6 +12468,9 @@ TEST_P(ExprTest, TestTermInFieldJson) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } @@ -11466,6 +12511,19 @@ TEST_P(ExprTest, TestTermInFieldJson) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -11475,6 +12533,9 @@ TEST_P(ExprTest, TestTermInFieldJson) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } @@ -11515,6 +12576,19 @@ TEST_P(ExprTest, TestTermInFieldJson) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -11524,6 +12598,9 @@ TEST_P(ExprTest, TestTermInFieldJson) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } } @@ -11593,6 +12670,19 @@ TEST_P(ExprTest, TestTermInFieldJsonNullable) { // << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -11602,6 +12692,9 @@ TEST_P(ExprTest, TestTermInFieldJsonNullable) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res, valid_data[i])); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res, valid_data[i])); + } } } @@ -11645,6 +12738,19 @@ TEST_P(ExprTest, TestTermInFieldJsonNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -11654,6 +12760,9 @@ TEST_P(ExprTest, TestTermInFieldJsonNullable) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res, valid_data[i])); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res, valid_data[i])); + } } } @@ -11697,6 +12806,19 @@ TEST_P(ExprTest, TestTermInFieldJsonNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -11706,6 +12828,9 @@ TEST_P(ExprTest, TestTermInFieldJsonNullable) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res, valid_data[i])); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res, valid_data[i])); + } } } @@ -11750,6 +12875,19 @@ TEST_P(ExprTest, TestTermInFieldJsonNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -11759,6 +12897,9 @@ TEST_P(ExprTest, TestTermInFieldJsonNullable) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res, valid_data[i])); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res, valid_data[i])); + } } } } @@ -11966,6 +13107,19 @@ TEST_P(ExprTest, TestJsonContainsAny) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -11975,6 +13129,9 @@ TEST_P(ExprTest, TestJsonContainsAny) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } @@ -12016,6 +13173,19 @@ TEST_P(ExprTest, TestJsonContainsAny) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -12025,6 +13195,9 @@ TEST_P(ExprTest, TestJsonContainsAny) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } @@ -12066,6 +13239,19 @@ TEST_P(ExprTest, TestJsonContainsAny) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -12075,6 +13261,9 @@ TEST_P(ExprTest, TestJsonContainsAny) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } @@ -12116,6 +13305,19 @@ TEST_P(ExprTest, TestJsonContainsAny) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -12125,6 +13327,9 @@ TEST_P(ExprTest, TestJsonContainsAny) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } } @@ -12195,6 +13400,19 @@ TEST_P(ExprTest, TestJsonContainsAnyNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -12204,6 +13422,9 @@ TEST_P(ExprTest, TestJsonContainsAnyNullable) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res, valid_data[i])); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res, valid_data[i])); + } } } @@ -12248,6 +13469,19 @@ TEST_P(ExprTest, TestJsonContainsAnyNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -12257,6 +13491,9 @@ TEST_P(ExprTest, TestJsonContainsAnyNullable) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res, valid_data[i])); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res, valid_data[i])); + } } } @@ -12301,6 +13538,19 @@ TEST_P(ExprTest, TestJsonContainsAnyNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -12310,6 +13560,9 @@ TEST_P(ExprTest, TestJsonContainsAnyNullable) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res, valid_data[i])); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res, valid_data[i])); + } } } @@ -12355,6 +13608,19 @@ TEST_P(ExprTest, TestJsonContainsAnyNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -12364,6 +13630,9 @@ TEST_P(ExprTest, TestJsonContainsAnyNullable) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res, valid_data[i])); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res, valid_data[i])); + } } } } @@ -12433,6 +13702,19 @@ TEST_P(ExprTest, TestJsonContainsAll) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -12442,6 +13724,9 @@ TEST_P(ExprTest, TestJsonContainsAll) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } @@ -12491,6 +13776,19 @@ TEST_P(ExprTest, TestJsonContainsAll) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -12500,6 +13798,9 @@ TEST_P(ExprTest, TestJsonContainsAll) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } @@ -12548,6 +13849,19 @@ TEST_P(ExprTest, TestJsonContainsAll) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -12557,6 +13871,9 @@ TEST_P(ExprTest, TestJsonContainsAll) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } @@ -12603,6 +13920,19 @@ TEST_P(ExprTest, TestJsonContainsAll) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -12612,6 +13942,9 @@ TEST_P(ExprTest, TestJsonContainsAll) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res)); + } } } } @@ -12686,6 +14019,19 @@ TEST_P(ExprTest, TestJsonContainsAllNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -12695,6 +14041,9 @@ TEST_P(ExprTest, TestJsonContainsAllNullable) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res, valid_data[i])); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res, valid_data[i])); + } } } @@ -12746,6 +14095,19 @@ TEST_P(ExprTest, TestJsonContainsAllNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -12755,6 +14117,9 @@ TEST_P(ExprTest, TestJsonContainsAllNullable) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res, valid_data[i])); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res, valid_data[i])); + } } } @@ -12806,6 +14171,19 @@ TEST_P(ExprTest, TestJsonContainsAllNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -12815,6 +14193,9 @@ TEST_P(ExprTest, TestJsonContainsAllNullable) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res, valid_data[i])); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res, valid_data[i])); + } } } @@ -12865,6 +14246,19 @@ TEST_P(ExprTest, TestJsonContainsAllNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto array = milvus::Json(simdjson::padded_string(json_col[i])) @@ -12874,6 +14268,9 @@ TEST_P(ExprTest, TestJsonContainsAllNullable) { res.push_back(element.template get()); } ASSERT_EQ(ans, check(res, valid_data[i])); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res, valid_data[i])); + } } } } @@ -12972,10 +14369,26 @@ TEST_P(ExprTest, TestJsonContainsArray) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; std::vector res; ASSERT_EQ(ans, check(res, i)); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], check(res, i)); + } } } @@ -13006,10 +14419,23 @@ TEST_P(ExprTest, TestJsonContainsArray) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; std::vector res; ASSERT_EQ(ans, check(res, i)); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], check(res, i)); + } } } @@ -13059,9 +14485,22 @@ TEST_P(ExprTest, TestJsonContainsArray) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; ASSERT_EQ(ans, check()); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], check()); + } } } @@ -13089,10 +14528,23 @@ TEST_P(ExprTest, TestJsonContainsArray) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; std::vector res; ASSERT_EQ(ans, check(res, i)); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], check(res, i)); + } } } @@ -13144,10 +14596,23 @@ TEST_P(ExprTest, TestJsonContainsArray) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; std::vector res; ASSERT_EQ(ans, check(res, i)); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], check(res, i)); + } } } @@ -13175,10 +14640,23 @@ TEST_P(ExprTest, TestJsonContainsArray) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; std::vector res; ASSERT_EQ(ans, check(res, i)); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], check(res, i)); + } } } } @@ -13281,10 +14759,23 @@ TEST_P(ExprTest, TestJsonContainsArrayNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; std::vector res; ASSERT_EQ(ans, check(res, i, valid_data[i])); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], check(res, i, valid_data[i])); + } } } @@ -13318,10 +14809,23 @@ TEST_P(ExprTest, TestJsonContainsArrayNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; std::vector res; ASSERT_EQ(ans, check(res, i, valid_data[i])); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], check(res, i, valid_data[i])); + } } } @@ -13376,9 +14880,22 @@ TEST_P(ExprTest, TestJsonContainsArrayNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; ASSERT_EQ(ans, check(valid_data[i])); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], check(valid_data[i])); + } } } @@ -13409,10 +14926,23 @@ TEST_P(ExprTest, TestJsonContainsArrayNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; std::vector res; ASSERT_EQ(ans, check(res, i, valid_data[i])); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], check(res, i, valid_data[i])); + } } } @@ -13464,10 +14994,23 @@ TEST_P(ExprTest, TestJsonContainsArrayNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; std::vector res; ASSERT_EQ(ans, check(res, i)); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], check(res, i)); + } } } @@ -13495,10 +15038,23 @@ TEST_P(ExprTest, TestJsonContainsArrayNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; std::vector res; ASSERT_EQ(ans, check(res, i)); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], check(res, i)); + } } } } @@ -13599,9 +15155,22 @@ TEST_P(ExprTest, TestJsonContainsDiffTypeArray) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; ASSERT_EQ(ans, check()); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], check()); + } } } @@ -13627,9 +15196,22 @@ TEST_P(ExprTest, TestJsonContainsDiffTypeArray) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; ASSERT_EQ(ans, check()); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], check()); + } } } } @@ -13710,9 +15292,22 @@ TEST_P(ExprTest, TestJsonContainsDiffTypeArrayNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; ASSERT_EQ(ans, check(valid_data[i])); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], check(valid_data[i])); + } } } @@ -13738,9 +15333,22 @@ TEST_P(ExprTest, TestJsonContainsDiffTypeArrayNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; ASSERT_EQ(ans, check()); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], check()); + } } } } @@ -13819,9 +15427,22 @@ TEST_P(ExprTest, TestJsonContainsDiffType) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; ASSERT_EQ(ans, testcase.res); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], testcase.res); + } } } @@ -13846,9 +15467,22 @@ TEST_P(ExprTest, TestJsonContainsDiffType) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; ASSERT_EQ(ans, testcase.res); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], testcase.res); + } } } } @@ -13929,12 +15563,28 @@ TEST_P(ExprTest, TestJsonContainsDiffTypeNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (!valid_data[i]) { ASSERT_EQ(ans, false); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], false); + } } else { ASSERT_EQ(ans, testcase.res); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], testcase.res); + } } } } @@ -13960,12 +15610,28 @@ TEST_P(ExprTest, TestJsonContainsDiffTypeNullable) { << std::endl; EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + for (auto i = 0; i < std::min(N * num_iters, 10); ++i) { + offsets.emplace_back(i); + } + auto col_vec = milvus::test::gen_filter_res( + plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), std::min(N * num_iters, 10)); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (!valid_data[i]) { ASSERT_EQ(ans, false); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], false); + } } else { ASSERT_EQ(ans, testcase.res); + if (i < std::min(N * num_iters, 10)) { + ASSERT_EQ(view[i], testcase.res); + } } } } diff --git a/internal/core/unittest/test_post_filter.cpp b/internal/core/unittest/test_post_filter.cpp new file mode 100644 index 0000000000000..312756c1e864c --- /dev/null +++ b/internal/core/unittest/test_post_filter.cpp @@ -0,0 +1,588 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + +#include +#include "common/Schema.h" +#include "query/Plan.h" +#include "segcore/SegmentSealedImpl.h" +#include "segcore/reduce_c.h" +#include "segcore/plan_c.h" +#include "segcore/segment_c.h" +#include "test_utils/DataGen.h" +#include "test_utils/c_api_test_utils.h" + +using namespace milvus; +using namespace milvus::query; +using namespace milvus::segcore; +using namespace milvus::storage; +using namespace milvus::tracer; + +/** + * this UT is to cover post filtering execution logic (knowhere iterator next() -> scalar filtering) + * so we will not cover all expr type here, just some examples + */ + +void +prepareSegmentFieldData(const std::unique_ptr& segment, + size_t row_count, + GeneratedData& data_set) { + auto field_data = + std::make_shared>(DataType::INT64, false); + field_data->FillFieldData(data_set.row_ids_.data(), row_count); + auto field_data_info = + FieldDataInfo{RowFieldID.get(), + row_count, + std::vector{field_data}}; + segment->LoadFieldData(RowFieldID, field_data_info); + + field_data = + std::make_shared>(DataType::INT64, false); + field_data->FillFieldData(data_set.timestamps_.data(), row_count); + field_data_info = + FieldDataInfo{TimestampFieldID.get(), + row_count, + std::vector{field_data}}; + segment->LoadFieldData(TimestampFieldID, field_data_info); +} + +void +CheckFilterSearchResult(const SearchResult& search_result_by_post_filter, + const SearchResult& search_result_by_pre_filter, + int topK, + int nq) { + ASSERT_EQ(search_result_by_pre_filter.seg_offsets_.size(), topK * nq); + ASSERT_EQ(search_result_by_pre_filter.distances_.size(), topK * nq); + ASSERT_EQ(search_result_by_post_filter.seg_offsets_.size(), topK * nq); + ASSERT_EQ(search_result_by_post_filter.distances_.size(), topK * nq); + + for (int i = 0; i < topK * nq; ++i) { + std::cout << search_result_by_pre_filter.seg_offsets_[i] << " " + << search_result_by_pre_filter.distances_[i] << " " + << search_result_by_post_filter.seg_offsets_[i] << " " + << search_result_by_post_filter.distances_[i] << std::endl; + ASSERT_EQ(search_result_by_pre_filter.seg_offsets_[i], + search_result_by_post_filter.seg_offsets_[i]); + } +} + +TEST(PostFilter, SealedIndex) { + using namespace milvus; + using namespace milvus::query; + using namespace milvus::segcore; + + //0. prepare schema + int dim = 64; + auto schema = std::make_shared(); + auto vec_fid = schema->AddDebugField( + "fakevec", DataType::VECTOR_FLOAT, dim, knowhere::metric::L2); + auto int8_fid = schema->AddDebugField("int8", DataType::INT8); + auto int16_fid = schema->AddDebugField("int16", DataType::INT16); + auto int32_fid = schema->AddDebugField("int32", DataType::INT32); + auto int64_fid = schema->AddDebugField("int64", DataType::INT64); + auto str_fid = schema->AddDebugField("string1", DataType::VARCHAR); + auto bool_fid = schema->AddDebugField("bool", DataType::BOOL); + schema->set_primary_field_id(str_fid); + auto segment = CreateSealedSegment(schema); + size_t N = 50; + + //2. load raw data + auto raw_data = DataGen(schema, N, 42, 0, 8, 10, false, false); + auto fields = schema->get_fields(); + for (auto field_data : raw_data.raw_->fields_data()) { + int64_t field_id = field_data.field_id(); + + auto info = FieldDataInfo(field_data.field_id(), N); + auto field_meta = fields.at(FieldId(field_id)); + info.channel->push( + CreateFieldDataFromDataArray(N, &field_data, field_meta)); + info.channel->close(); + + segment->LoadFieldData(FieldId(field_id), info); + } + prepareSegmentFieldData(segment, N, raw_data); + + //3. load index + auto vector_data = raw_data.get_col(vec_fid); + auto indexing = GenVecIndexing( + N, dim, vector_data.data(), knowhere::IndexEnum::INDEX_HNSW); + LoadIndexInfo load_index_info; + load_index_info.field_id = vec_fid.get(); + load_index_info.index = std::move(indexing); + load_index_info.index_params["metric_type"] = knowhere::metric::L2; + segment->LoadIndex(load_index_info); + int topK = 10; + int group_size = 3; + + // int8 binaryRange + { + const char* raw_plan = R"(vector_anns: < + field_id: 100 + predicates: < + binary_range_expr: < + column_info: < + field_id: 101 + data_type: Int8 + > + lower_inclusive: true, + upper_inclusive: false, + lower_value: < + int64_val: -1 + > + upper_value: < + int64_val: 100 + > + > + > + query_info: < + topk: 10 + metric_type: "L2" + search_params: "{\"ef\": 50, \"post_filter\": true}" + > + placeholder_tag: "$0">)"; + proto::plan::PlanNode plan_node; + auto ok = + google::protobuf::TextFormat::ParseFromString(raw_plan, &plan_node); + auto plan = CreateSearchPlanFromPlanNode(*schema, plan_node); + auto num_queries = 1; + auto seed = 1024; + auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, seed); + auto ph_group = + ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); + auto search_result = + segment->Search(plan.get(), ph_group.get(), 1L << 63); + + const char* raw_plan2 = R"(vector_anns: < + field_id: 100 + predicates: < + binary_range_expr: < + column_info: < + field_id: 101 + data_type: Int8 + > + lower_inclusive: true, + upper_inclusive: false, + lower_value: < + int64_val: -1 + > + upper_value: < + int64_val: 100 + > + > + > + query_info: < + topk: 10 + metric_type: "L2" + search_params: "{\"ef\": 50}" + > + placeholder_tag: "$0">)"; + proto::plan::PlanNode plan_node2; + auto ok2 = google::protobuf::TextFormat::ParseFromString(raw_plan2, + &plan_node2); + auto plan2 = CreateSearchPlanFromPlanNode(*schema, plan_node2); + auto search_result2 = + segment->Search(plan2.get(), ph_group.get(), 1L << 63); + CheckFilterSearchResult( + *search_result, *search_result2, topK, num_queries); + } + + // int16 Termexpr + { + const char* raw_plan = R"(vector_anns: < + field_id: 100 + predicates: < + term_expr: < + column_info: < + field_id: 102 + data_type: Int16 + > + values: values: + > + > + query_info: < + topk: 10 + metric_type: "L2" + search_params: "{\"ef\": 50, \"post_filter\": true}" + > + placeholder_tag: "$0">)"; + proto::plan::PlanNode plan_node; + auto ok = + google::protobuf::TextFormat::ParseFromString(raw_plan, &plan_node); + auto plan = CreateSearchPlanFromPlanNode(*schema, plan_node); + auto num_queries = 1; + auto seed = 1024; + auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, seed); + auto ph_group = + ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); + auto search_result = + segment->Search(plan.get(), ph_group.get(), 1L << 63); + + const char* raw_plan2 = R"(vector_anns: < + field_id: 100 + predicates: < + term_expr: < + column_info: < + field_id: 102 + data_type: Int16 + > + values: values: + > + > + query_info: < + topk: 10 + metric_type: "L2" + search_params: "{\"ef\": 50}" + > + placeholder_tag: "$0">)"; + proto::plan::PlanNode plan_node2; + auto ok2 = google::protobuf::TextFormat::ParseFromString(raw_plan2, + &plan_node2); + auto plan2 = CreateSearchPlanFromPlanNode(*schema, plan_node2); + auto search_result2 = + segment->Search(plan2.get(), ph_group.get(), 1L << 63); + CheckFilterSearchResult( + *search_result, *search_result2, topK, num_queries); + } +} + +TEST(PostFilter, SealedData) { + using namespace milvus; + using namespace milvus::query; + using namespace milvus::segcore; + + //0. prepare schema + int dim = 64; + auto schema = std::make_shared(); + auto vec_fid = schema->AddDebugField( + "fakevec", DataType::VECTOR_FLOAT, dim, knowhere::metric::L2); + auto int8_fid = schema->AddDebugField("int8", DataType::INT8); + auto int16_fid = schema->AddDebugField("int16", DataType::INT16); + auto int32_fid = schema->AddDebugField("int32", DataType::INT32); + auto int64_fid = schema->AddDebugField("int64", DataType::INT64); + auto str_fid = schema->AddDebugField("string1", DataType::VARCHAR); + auto bool_fid = schema->AddDebugField("bool", DataType::BOOL); + schema->set_primary_field_id(str_fid); + auto segment = CreateSealedSegment(schema); + size_t N = 100; + + //2. load raw data + auto raw_data = DataGen(schema, N, 42, 0, 8, 10, false, false); + auto fields = schema->get_fields(); + for (auto field_data : raw_data.raw_->fields_data()) { + int64_t field_id = field_data.field_id(); + + auto info = FieldDataInfo(field_data.field_id(), N); + auto field_meta = fields.at(FieldId(field_id)); + info.channel->push( + CreateFieldDataFromDataArray(N, &field_data, field_meta)); + info.channel->close(); + + segment->LoadFieldData(FieldId(field_id), info); + } + prepareSegmentFieldData(segment, N, raw_data); + + int topK = 10; + // int8 binaryRange + { + const char* raw_plan = R"(vector_anns: < + field_id: 100 + predicates: < + binary_range_expr: < + column_info: < + field_id: 101 + data_type: Int8 + > + lower_inclusive: true, + upper_inclusive: false, + lower_value: < + int64_val: -1 + > + upper_value: < + int64_val: 100 + > + > + > + query_info: < + topk: 10 + metric_type: "L2" + search_params: "{\"ef\": 50, \"post_filter\": true}" + > + placeholder_tag: "$0">)"; + proto::plan::PlanNode plan_node; + auto ok = + google::protobuf::TextFormat::ParseFromString(raw_plan, &plan_node); + auto plan = CreateSearchPlanFromPlanNode(*schema, plan_node); + auto num_queries = 1; + auto seed = 1024; + auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, seed); + auto ph_group = + ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); + auto search_result = + segment->Search(plan.get(), ph_group.get(), 1L << 63); + + const char* raw_plan2 = R"(vector_anns: < + field_id: 100 + predicates: < + binary_range_expr: < + column_info: < + field_id: 101 + data_type: Int8 + > + lower_inclusive: true, + upper_inclusive: false, + lower_value: < + int64_val: -1 + > + upper_value: < + int64_val: 100 + > + > + > + query_info: < + topk: 10 + metric_type: "L2" + search_params: "{\"ef\": 50}" + > + placeholder_tag: "$0">)"; + proto::plan::PlanNode plan_node2; + auto ok2 = google::protobuf::TextFormat::ParseFromString(raw_plan2, + &plan_node2); + auto plan2 = CreateSearchPlanFromPlanNode(*schema, plan_node2); + auto search_result2 = + segment->Search(plan2.get(), ph_group.get(), 1L << 63); + CheckFilterSearchResult( + *search_result, *search_result2, topK, num_queries); + } +} + +TEST(PostFilter, GrowingRawData) { + int dim = 128; + uint64_t seed = 512; + auto schema = std::make_shared(); + auto metric_type = knowhere::metric::L2; + auto int64_field_id = schema->AddDebugField("int64", DataType::INT64); + auto int32_field_id = schema->AddDebugField("int32", DataType::INT32); + auto vec_field_id = schema->AddDebugField( + "embeddings", DataType::VECTOR_FLOAT, 128, metric_type); + schema->set_primary_field_id(int64_field_id); + + auto config = SegcoreConfig::default_config(); + config.set_chunk_rows(8); + config.set_enable_interim_segment_index( + false); //no growing index, test brute force + auto segment_growing = CreateGrowingSegment(schema, nullptr, 1, config); + auto segment_growing_impl = + dynamic_cast(segment_growing.get()); + + int64_t rows_per_batch = 30; + int n_batch = 1; + for (int i = 0; i < n_batch; i++) { + auto data_set = + DataGen(schema, rows_per_batch, 42, 0, 8, 10, false, false); + auto offset = segment_growing_impl->PreInsert(rows_per_batch); + segment_growing_impl->Insert(offset, + rows_per_batch, + data_set.row_ids_.data(), + data_set.timestamps_.data(), + data_set.raw_); + } + + auto topK = 10; + // int8 binaryRange + { + const char* raw_plan = R"(vector_anns: < + field_id: 102 + predicates: < + binary_range_expr: < + column_info: < + field_id: 100 + data_type: Int64 + > + lower_inclusive: true, + upper_inclusive: false, + lower_value: < + int64_val: -1 + > + upper_value: < + int64_val: 1 + > + > + > + query_info: < + topk: 10 + metric_type: "L2" + search_params: "{\"ef\": 50, \"post_filter\": true}" + > + placeholder_tag: "$0">)"; + proto::plan::PlanNode plan_node; + auto ok = + google::protobuf::TextFormat::ParseFromString(raw_plan, &plan_node); + auto plan = CreateSearchPlanFromPlanNode(*schema, plan_node); + auto num_queries = 1; + auto seed = 1024; + auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, seed); + auto ph_group = + ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); + auto search_result = + segment_growing_impl->Search(plan.get(), ph_group.get(), 1L << 63); + + const char* raw_plan2 = R"(vector_anns: < + field_id: 102 + predicates: < + binary_range_expr: < + column_info: < + field_id: 100 + data_type: Int64 + > + lower_inclusive: true, + upper_inclusive: false, + lower_value: < + int64_val: -1 + > + upper_value: < + int64_val: 1 + > + > + > + query_info: < + topk: 10 + metric_type: "L2" + search_params: "{\"ef\": 50}" + > + placeholder_tag: "$0">)"; + proto::plan::PlanNode plan_node2; + auto ok2 = google::protobuf::TextFormat::ParseFromString(raw_plan2, + &plan_node2); + auto plan2 = CreateSearchPlanFromPlanNode(*schema, plan_node2); + auto search_result2 = + segment_growing_impl->Search(plan2.get(), ph_group.get(), 1L << 63); + CheckFilterSearchResult( + *search_result, *search_result2, topK, num_queries); + } +} + +TEST(PostFilter, GrowingIndex) { + int dim = 128; + uint64_t seed = 512; + auto schema = std::make_shared(); + auto metric_type = knowhere::metric::L2; + auto int64_field_id = schema->AddDebugField("int64", DataType::INT64); + auto int32_field_id = schema->AddDebugField("int32", DataType::INT32); + auto vec_field_id = schema->AddDebugField( + "embeddings", DataType::VECTOR_FLOAT, 128, metric_type); + schema->set_primary_field_id(int64_field_id); + + std::map index_params = { + {"index_type", "IVF_FLAT"}, + {"metric_type", metric_type}, + {"nlist", "4"}}; + std::map type_params = {{"dim", "128"}}; + FieldIndexMeta fieldIndexMeta( + vec_field_id, std::move(index_params), std::move(type_params)); + std::map fieldMap = { + {vec_field_id, fieldIndexMeta}}; + IndexMetaPtr metaPtr = + std::make_shared(10000, std::move(fieldMap)); + + auto config = SegcoreConfig::default_config(); + config.set_chunk_rows(16); + config.set_enable_interim_segment_index(true); // test growing inter index + config.set_nlist(4); + config.set_nlist(4); + auto segment_growing = CreateGrowingSegment(schema, metaPtr, 1, config); + auto segment_growing_impl = + dynamic_cast(segment_growing.get()); + + //1. prepare raw data in growing segment + int64_t rows_per_batch = 100; + int n_batch = 1; + for (int i = 0; i < n_batch; i++) { + auto data_set = + DataGen(schema, rows_per_batch, 42, 0, 8, 10, false, false); + auto offset = segment_growing_impl->PreInsert(rows_per_batch); + segment_growing_impl->Insert(offset, + rows_per_batch, + data_set.row_ids_.data(), + data_set.timestamps_.data(), + data_set.raw_); + } + + auto topK = 10; + { + const char* raw_plan = R"(vector_anns: < + field_id: 102 + predicates: < + binary_range_expr: < + column_info: < + field_id: 100 + data_type: Int64 + > + lower_inclusive: true, + upper_inclusive: false, + lower_value: < + int64_val: -1 + > + upper_value: < + int64_val: 1 + > + > + > + query_info: < + topk: 10 + metric_type: "L2" + search_params: "{\"nprobe\": 4, \"post_filter\": true}" + > + placeholder_tag: "$0">)"; + proto::plan::PlanNode plan_node; + auto ok = + google::protobuf::TextFormat::ParseFromString(raw_plan, &plan_node); + auto plan = CreateSearchPlanFromPlanNode(*schema, plan_node); + auto num_queries = 1; + auto seed = 1024; + auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, seed); + auto ph_group = + ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); + auto search_result = + segment_growing_impl->Search(plan.get(), ph_group.get(), 1L << 63); + + const char* raw_plan2 = R"(vector_anns: < + field_id: 102 + predicates: < + binary_range_expr: < + column_info: < + field_id: 100 + data_type: Int64 + > + lower_inclusive: true, + upper_inclusive: false, + lower_value: < + int64_val: -1 + > + upper_value: < + int64_val: 1 + > + > + > + query_info: < + topk: 10 + metric_type: "L2" + search_params: "{\"nprobe\": 4}" + > + placeholder_tag: "$0">)"; + proto::plan::PlanNode plan_node2; + auto ok2 = google::protobuf::TextFormat::ParseFromString(raw_plan2, + &plan_node2); + auto plan2 = CreateSearchPlanFromPlanNode(*schema, plan_node2); + auto search_result2 = + segment_growing_impl->Search(plan2.get(), ph_group.get(), 1L << 63); + CheckFilterSearchResult( + *search_result, *search_result2, topK, num_queries); + } +} \ No newline at end of file diff --git a/internal/core/unittest/test_string_expr.cpp b/internal/core/unittest/test_string_expr.cpp index cb4ccf4131cbd..d4287114a6f40 100644 --- a/internal/core/unittest/test_string_expr.cpp +++ b/internal/core/unittest/test_string_expr.cpp @@ -290,12 +290,32 @@ TEST(StringExpr, Term) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; auto val = str_col[i]; auto ref = std::find(term.begin(), term.end(), val) != term.end(); ASSERT_EQ(ans, ref) << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) << "@" << i << "!!" << val; + } } } } @@ -363,6 +383,23 @@ TEST(StringExpr, TermNullable) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (!valid_data[i]) { @@ -372,6 +409,9 @@ TEST(StringExpr, TermNullable) { auto val = str_col[i]; auto ref = std::find(term.begin(), term.end(), val) != term.end(); ASSERT_EQ(ans, ref) << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) << "@" << i << "!!" << val; + } } } } @@ -481,6 +521,23 @@ TEST(StringExpr, Compare) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; @@ -488,6 +545,10 @@ TEST(StringExpr, Compare) { auto another_val = another_str_col[i]; auto ref = ref_func(val, another_val); ASSERT_EQ(ans, ref) << "@" << op << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << "@" << op << "@" << i << "!!" << val; + } } } } @@ -609,6 +670,23 @@ TEST(StringExpr, CompareNullable) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (!valid_data[i]) { @@ -619,6 +697,10 @@ TEST(StringExpr, CompareNullable) { auto another_val = another_str_col[i]; auto ref = ref_func(val, another_val); ASSERT_EQ(ans, ref) << "@" << op << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << "@" << op << "@" << i << "!!" << val; + } } } } @@ -741,16 +823,40 @@ TEST(StringExpr, CompareNullable2) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (!valid_data[i]) { ASSERT_EQ(ans, false); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], false); + } continue; } auto val = str_col[i]; auto another_val = another_str_col[i]; auto ref = ref_func(val, another_val); ASSERT_EQ(ans, ref) << "@" << op << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << "@" << op << "@" << i << "!!" << val; + } } } } @@ -840,6 +946,23 @@ TEST(StringExpr, UnaryRange) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; @@ -847,6 +970,10 @@ TEST(StringExpr, UnaryRange) { auto ref = ref_func(val); ASSERT_EQ(ans, ref) << "@" << op << "@" << value << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << "@" << op << "@" << value << "@" << i << "!!" << val; + } } } } @@ -947,6 +1074,23 @@ TEST(StringExpr, UnaryRangeNullable) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (!valid_data[i]) { @@ -957,6 +1101,10 @@ TEST(StringExpr, UnaryRangeNullable) { auto ref = ref_func(val); ASSERT_EQ(ans, ref) << "@" << op << "@" << value << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << "@" << op << "@" << value << "@" << i << "!!" << val; + } } } } @@ -1064,6 +1212,23 @@ TEST(StringExpr, BinaryRange) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; @@ -1072,6 +1237,11 @@ TEST(StringExpr, BinaryRange) { ASSERT_EQ(ans, ref) << "@" << lb_inclusive << "@" << ub_inclusive << "@" << lb << "@" << ub << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << "@" << lb_inclusive << "@" << ub_inclusive << "@" << lb + << "@" << ub << "@" << i << "!!" << val; + } } } } @@ -1191,10 +1361,30 @@ TEST(StringExpr, BinaryRangeNullable) { MAX_TIMESTAMP); EXPECT_EQ(final.size(), N * num_iters); + // specify some offsets and do scalar filtering on these offsets + FixedVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { auto ans = final[i]; if (!valid_data[i]) { ASSERT_EQ(ans, false); + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], false); + } continue; } auto val = str_col[i]; @@ -1202,6 +1392,11 @@ TEST(StringExpr, BinaryRangeNullable) { ASSERT_EQ(ans, ref) << "@" << lb_inclusive << "@" << ub_inclusive << "@" << lb << "@" << ub << "@" << i << "!!" << val; + if (i % 2 == 0) { + ASSERT_EQ(view[int(i / 2)], ref) + << "@" << lb_inclusive << "@" << ub_inclusive << "@" << lb + << "@" << ub << "@" << i << "!!" << val; + } } } } diff --git a/internal/core/unittest/test_utils/GenExprProto.h b/internal/core/unittest/test_utils/GenExprProto.h index a1744d3c5e268..eec423310e9ef 100644 --- a/internal/core/unittest/test_utils/GenExprProto.h +++ b/internal/core/unittest/test_utils/GenExprProto.h @@ -16,6 +16,7 @@ #include "common/Consts.h" #include "expr/ITypeExpr.h" +#include "exec/expression/Expr.h" #include "pb/plan.pb.h" #include "plan/PlanNode.h" @@ -104,4 +105,30 @@ CreateSearchPlanByExpr(std::shared_ptr expr) { return plannode; } +inline ColumnVectorPtr +gen_filter_res(milvus::plan::PlanNode* plan_node, + const milvus::segcore::SegmentInternalInterface* segment, + uint64_t active_count, + uint64_t timestamp, + FixedVector* offsets = nullptr) { + auto filter_node = dynamic_cast(plan_node); + assert(filter_node != nullptr); + std::vector filters; + filters.emplace_back(filter_node->filter()); + auto query_context = std::make_shared( + DEAFULT_QUERY_ID, segment, active_count, timestamp); + + std::unique_ptr exec_context = + std::make_unique(query_context.get()); + auto exprs_ = + std::make_unique(filters, exec_context.get()); + std::vector results_; + milvus::exec::EvalCtx eval_ctx(exec_context.get(), exprs_.get()); + eval_ctx.set_offset_input(offsets); + exprs_->Eval(0, 1, true, eval_ctx, results_); + + auto col_vec = std::dynamic_pointer_cast(results_[0]); + return col_vec; +} + } // namespace milvus::test