Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support inverted index for array #33452

Merged
merged 10 commits into from
May 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 95 additions & 3 deletions internal/core/src/exec/expression/JsonContainsExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,14 @@
void
PhyJsonContainsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
switch (expr_->column_.data_type_) {
case DataType::ARRAY:
case DataType::ARRAY: {
if (is_index_mode_) {
result = EvalArrayContainsForIndexSegment();

Check warning on line 28 in internal/core/src/exec/expression/JsonContainsExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/JsonContainsExpr.cpp#L28

Added line #L28 was not covered by tests
} else {
result = EvalJsonContainsForDataSegment();
}
break;
}
case DataType::JSON: {
if (is_index_mode_) {
PanicInfo(
Expand Down Expand Up @@ -94,7 +101,6 @@
return ExecJsonContainsWithDiffType();
}
}
break;
}
case proto::plan::JSONContainsExpr_JSONOp_ContainsAll: {
if (IsArrayDataType(data_type)) {
Expand Down Expand Up @@ -145,7 +151,6 @@
return ExecJsonContainsAllWithDiffType();
}
}
break;
}
default:
PanicInfo(ExprInvalid,
Expand Down Expand Up @@ -748,5 +753,92 @@
return res_vec;
}

VectorPtr
PhyJsonContainsFilterExpr::EvalArrayContainsForIndexSegment() {
switch (expr_->column_.element_type_) {
case DataType::BOOL: {
return ExecArrayContainsForIndexSegmentImpl<bool>();

Check warning on line 760 in internal/core/src/exec/expression/JsonContainsExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/JsonContainsExpr.cpp#L757-L760

Added lines #L757 - L760 were not covered by tests
}
case DataType::INT8: {
return ExecArrayContainsForIndexSegmentImpl<int8_t>();

Check warning on line 763 in internal/core/src/exec/expression/JsonContainsExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/JsonContainsExpr.cpp#L762-L763

Added lines #L762 - L763 were not covered by tests
}
case DataType::INT16: {
return ExecArrayContainsForIndexSegmentImpl<int16_t>();

Check warning on line 766 in internal/core/src/exec/expression/JsonContainsExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/JsonContainsExpr.cpp#L765-L766

Added lines #L765 - L766 were not covered by tests
}
case DataType::INT32: {
return ExecArrayContainsForIndexSegmentImpl<int32_t>();

Check warning on line 769 in internal/core/src/exec/expression/JsonContainsExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/JsonContainsExpr.cpp#L768-L769

Added lines #L768 - L769 were not covered by tests
}
case DataType::INT64: {
return ExecArrayContainsForIndexSegmentImpl<int64_t>();

Check warning on line 772 in internal/core/src/exec/expression/JsonContainsExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/JsonContainsExpr.cpp#L771-L772

Added lines #L771 - L772 were not covered by tests
}
case DataType::FLOAT: {
return ExecArrayContainsForIndexSegmentImpl<float>();

Check warning on line 775 in internal/core/src/exec/expression/JsonContainsExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/JsonContainsExpr.cpp#L774-L775

Added lines #L774 - L775 were not covered by tests
}
case DataType::DOUBLE: {
return ExecArrayContainsForIndexSegmentImpl<double>();

Check warning on line 778 in internal/core/src/exec/expression/JsonContainsExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/JsonContainsExpr.cpp#L777-L778

Added lines #L777 - L778 were not covered by tests
}
case DataType::VARCHAR:

Check warning on line 780 in internal/core/src/exec/expression/JsonContainsExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/JsonContainsExpr.cpp#L780

Added line #L780 was not covered by tests
case DataType::STRING: {
return ExecArrayContainsForIndexSegmentImpl<std::string>();

Check warning on line 782 in internal/core/src/exec/expression/JsonContainsExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/JsonContainsExpr.cpp#L782

Added line #L782 was not covered by tests
}
default:
PanicInfo(DataTypeInvalid,

Check warning on line 785 in internal/core/src/exec/expression/JsonContainsExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/JsonContainsExpr.cpp#L784-L785

Added lines #L784 - L785 were not covered by tests
fmt::format("unsupported data type for "
"ExecArrayContainsForIndexSegmentImpl: {}",
expr_->column_.element_type_));
}
}

template <typename ExprValueType>
VectorPtr
PhyJsonContainsFilterExpr::ExecArrayContainsForIndexSegmentImpl() {

Check warning on line 794 in internal/core/src/exec/expression/JsonContainsExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/JsonContainsExpr.cpp#L794

Added line #L794 was not covered by tests
typedef std::conditional_t<std::is_same_v<ExprValueType, std::string_view>,
std::string,
ExprValueType>
GetType;
using Index = index::ScalarIndex<GetType>;
auto real_batch_size = GetNextBatchSize();
if (real_batch_size == 0) {
return nullptr;

Check warning on line 802 in internal/core/src/exec/expression/JsonContainsExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/JsonContainsExpr.cpp#L800-L802

Added lines #L800 - L802 were not covered by tests
}

std::unordered_set<GetType> elements;
for (auto const& element : expr_->vals_) {
elements.insert(GetValueFromProto<GetType>(element));

Check warning on line 807 in internal/core/src/exec/expression/JsonContainsExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/JsonContainsExpr.cpp#L805-L807

Added lines #L805 - L807 were not covered by tests
}
boost::container::vector<GetType> elems(elements.begin(), elements.end());
auto execute_sub_batch =
[this](Index* index_ptr,

Check warning on line 811 in internal/core/src/exec/expression/JsonContainsExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/JsonContainsExpr.cpp#L809-L811

Added lines #L809 - L811 were not covered by tests
const boost::container::vector<GetType>& vals) {
switch (expr_->op_) {
case proto::plan::JSONContainsExpr_JSONOp_Contains:

Check warning on line 814 in internal/core/src/exec/expression/JsonContainsExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/JsonContainsExpr.cpp#L813-L814

Added lines #L813 - L814 were not covered by tests
case proto::plan::JSONContainsExpr_JSONOp_ContainsAny: {
return index_ptr->In(vals.size(), vals.data());

Check warning on line 816 in internal/core/src/exec/expression/JsonContainsExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/JsonContainsExpr.cpp#L816

Added line #L816 was not covered by tests
}
case proto::plan::JSONContainsExpr_JSONOp_ContainsAll: {
TargetBitmap result(index_ptr->Count());
result.set();
for (size_t i = 0; i < vals.size(); i++) {
auto sub = index_ptr->In(1, &vals[i]);
result &= sub;

Check warning on line 823 in internal/core/src/exec/expression/JsonContainsExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/JsonContainsExpr.cpp#L818-L823

Added lines #L818 - L823 were not covered by tests
}
return result;

Check warning on line 825 in internal/core/src/exec/expression/JsonContainsExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/JsonContainsExpr.cpp#L825

Added line #L825 was not covered by tests
}
default:
PanicInfo(

Check warning on line 828 in internal/core/src/exec/expression/JsonContainsExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/JsonContainsExpr.cpp#L827-L828

Added lines #L827 - L828 were not covered by tests
ExprInvalid,
"unsupported array contains type {}",
proto::plan::JSONContainsExpr_JSONOp_Name(expr_->op_));
}
};
auto res = ProcessIndexChunks<GetType>(execute_sub_batch, elems);
AssertInfo(res.size() == real_batch_size,

Check warning on line 835 in internal/core/src/exec/expression/JsonContainsExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/JsonContainsExpr.cpp#L834-L835

Added lines #L834 - L835 were not covered by tests
"internal error: expr processed rows {} not equal "
"expect batch size {}",
res.size(),
real_batch_size);
return std::make_shared<ColumnVector>(std::move(res));

Check warning on line 840 in internal/core/src/exec/expression/JsonContainsExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/JsonContainsExpr.cpp#L840

Added line #L840 was not covered by tests
}

} //namespace exec
} // namespace milvus
7 changes: 7 additions & 0 deletions internal/core/src/exec/expression/JsonContainsExpr.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,13 @@ class PhyJsonContainsFilterExpr : public SegmentExpr {
VectorPtr
ExecJsonContainsWithDiffType();

VectorPtr
EvalArrayContainsForIndexSegment();

template <typename ExprValueType>
VectorPtr
ExecArrayContainsForIndexSegmentImpl();

private:
std::shared_ptr<const milvus::expr::JsonContainsExpr> expr_;
};
Expand Down
17 changes: 13 additions & 4 deletions internal/core/src/expr/ITypeExpr.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,11 +113,13 @@
struct ColumnInfo {
FieldId field_id_;
DataType data_type_;
DataType element_type_;
std::vector<std::string> nested_path_;

ColumnInfo(const proto::plan::ColumnInfo& column_info)
: field_id_(column_info.field_id()),
data_type_(static_cast<DataType>(column_info.data_type())),
element_type_(static_cast<DataType>(column_info.element_type())),
nested_path_(column_info.nested_path().begin(),
column_info.nested_path().end()) {
}
Expand All @@ -127,6 +129,7 @@
std::vector<std::string> nested_path = {})
: field_id_(field_id),
data_type_(data_type),
element_type_(DataType::NONE),
nested_path_(std::move(nested_path)) {
}

Expand All @@ -140,6 +143,10 @@
return false;
}

if (element_type_ != other.element_type_) {
return false;
}

for (int i = 0; i < nested_path_.size(); ++i) {
if (nested_path_[i] != other.nested_path_[i]) {
return false;
Expand All @@ -151,10 +158,12 @@

std::string
ToString() const {
return fmt::format("[FieldId:{}, data_type:{}, nested_path:{}]",
std::to_string(field_id_.get()),
data_type_,
milvus::Join<std::string>(nested_path_, ","));
return fmt::format(
"[FieldId:{}, data_type:{}, element_type:{}, nested_path:{}]",
std::to_string(field_id_.get()),
data_type_,
element_type_,
milvus::Join<std::string>(nested_path_, ","));

Check warning on line 166 in internal/core/src/expr/ITypeExpr.h

View check run for this annotation

Codecov / codecov/patch

internal/core/src/expr/ITypeExpr.h#L163-L166

Added lines #L163 - L166 were not covered by tests
}
};

Expand Down
105 changes: 47 additions & 58 deletions internal/core/src/index/IndexFactory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,9 @@
ScalarIndexPtr<T>
IndexFactory::CreateScalarIndex(
const IndexType& index_type,
const storage::FileManagerContext& file_manager_context,
DataType d_type) {
const storage::FileManagerContext& file_manager_context) {
if (index_type == INVERTED_INDEX_TYPE) {
TantivyConfig cfg;
cfg.data_type_ = d_type;
return std::make_unique<InvertedIndexTantivy<T>>(cfg,
file_manager_context);
return std::make_unique<InvertedIndexTantivy<T>>(file_manager_context);
}
if (index_type == BITMAP_INDEX_TYPE) {
return std::make_unique<HybridScalarIndex<T>>(file_manager_context);
Expand All @@ -60,14 +56,11 @@
ScalarIndexPtr<std::string>
IndexFactory::CreateScalarIndex<std::string>(
const IndexType& index_type,
const storage::FileManagerContext& file_manager_context,
DataType d_type) {
const storage::FileManagerContext& file_manager_context) {
#if defined(__linux__) || defined(__APPLE__)
if (index_type == INVERTED_INDEX_TYPE) {
TantivyConfig cfg;
cfg.data_type_ = d_type;
return std::make_unique<InvertedIndexTantivy<std::string>>(
cfg, file_manager_context);
file_manager_context);
}
if (index_type == BITMAP_INDEX_TYPE) {
return std::make_unique<HybridScalarIndex<std::string>>(
Expand All @@ -84,13 +77,10 @@
IndexFactory::CreateScalarIndex(
const IndexType& index_type,
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space,
DataType d_type) {
std::shared_ptr<milvus_storage::Space> space) {
if (index_type == INVERTED_INDEX_TYPE) {
TantivyConfig cfg;
cfg.data_type_ = d_type;
return std::make_unique<InvertedIndexTantivy<T>>(
cfg, file_manager_context, space);
return std::make_unique<InvertedIndexTantivy<T>>(file_manager_context,
space);
}
if (index_type == BITMAP_INDEX_TYPE) {
return std::make_unique<HybridScalarIndex<T>>(file_manager_context,
Expand All @@ -104,14 +94,11 @@
IndexFactory::CreateScalarIndex<std::string>(
const IndexType& index_type,
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space,
DataType d_type) {
std::shared_ptr<milvus_storage::Space> space) {
#if defined(__linux__) || defined(__APPLE__)
if (index_type == INVERTED_INDEX_TYPE) {
TantivyConfig cfg;
cfg.data_type_ = d_type;
return std::make_unique<InvertedIndexTantivy<std::string>>(
cfg, file_manager_context, space);
file_manager_context, space);

Check warning on line 101 in internal/core/src/index/IndexFactory.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/index/IndexFactory.cpp#L101

Added line #L101 was not covered by tests
}
if (index_type == BITMAP_INDEX_TYPE) {
return std::make_unique<HybridScalarIndex<std::string>>(
Expand Down Expand Up @@ -148,48 +135,57 @@
}

IndexBasePtr
IndexFactory::CreateScalarIndex(
const CreateIndexInfo& create_index_info,
IndexFactory::CreatePrimitiveScalarIndex(
DataType data_type,
IndexType index_type,
const storage::FileManagerContext& file_manager_context) {
auto data_type = create_index_info.field_type;
auto index_type = create_index_info.index_type;

switch (data_type) {
// create scalar index
case DataType::BOOL:
return CreateScalarIndex<bool>(
index_type, file_manager_context, data_type);
return CreateScalarIndex<bool>(index_type, file_manager_context);
case DataType::INT8:
return CreateScalarIndex<int8_t>(
index_type, file_manager_context, data_type);
return CreateScalarIndex<int8_t>(index_type, file_manager_context);
case DataType::INT16:
return CreateScalarIndex<int16_t>(
index_type, file_manager_context, data_type);
return CreateScalarIndex<int16_t>(index_type, file_manager_context);
case DataType::INT32:
return CreateScalarIndex<int32_t>(
index_type, file_manager_context, data_type);
return CreateScalarIndex<int32_t>(index_type, file_manager_context);
case DataType::INT64:
return CreateScalarIndex<int64_t>(
index_type, file_manager_context, data_type);
return CreateScalarIndex<int64_t>(index_type, file_manager_context);
case DataType::FLOAT:
return CreateScalarIndex<float>(
index_type, file_manager_context, data_type);
return CreateScalarIndex<float>(index_type, file_manager_context);
case DataType::DOUBLE:
return CreateScalarIndex<double>(
index_type, file_manager_context, data_type);
return CreateScalarIndex<double>(index_type, file_manager_context);

// create string index
case DataType::STRING:
case DataType::VARCHAR:
return CreateScalarIndex<std::string>(
index_type, file_manager_context, data_type);
return CreateScalarIndex<std::string>(index_type,
file_manager_context);
default:
throw SegcoreError(
DataTypeInvalid,
fmt::format("invalid data type to build index: {}", data_type));
}
}

IndexBasePtr
IndexFactory::CreateScalarIndex(
const CreateIndexInfo& create_index_info,
const storage::FileManagerContext& file_manager_context) {
switch (create_index_info.field_type) {
case DataType::ARRAY:

Check warning on line 176 in internal/core/src/index/IndexFactory.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/index/IndexFactory.cpp#L176

Added line #L176 was not covered by tests
return CreatePrimitiveScalarIndex(
static_cast<DataType>(
file_manager_context.fieldDataMeta.schema.element_type()),
create_index_info.index_type,
file_manager_context);

Check warning on line 181 in internal/core/src/index/IndexFactory.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/index/IndexFactory.cpp#L179-L181

Added lines #L179 - L181 were not covered by tests
default:
return CreatePrimitiveScalarIndex(create_index_info.field_type,
create_index_info.index_type,
file_manager_context);
}
}

IndexBasePtr
IndexFactory::CreateVectorIndex(
const CreateIndexInfo& create_index_info,
Expand Down Expand Up @@ -257,32 +253,25 @@
switch (data_type) {
// create scalar index
case DataType::BOOL:
return CreateScalarIndex<bool>(
index_type, file_manager, space, data_type);
return CreateScalarIndex<bool>(index_type, file_manager, space);

Check warning on line 256 in internal/core/src/index/IndexFactory.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/index/IndexFactory.cpp#L256

Added line #L256 was not covered by tests
case DataType::INT8:
return CreateScalarIndex<int8_t>(
index_type, file_manager, space, data_type);
return CreateScalarIndex<int8_t>(index_type, file_manager, space);
case DataType::INT16:
return CreateScalarIndex<int16_t>(
index_type, file_manager, space, data_type);
return CreateScalarIndex<int16_t>(index_type, file_manager, space);
case DataType::INT32:
return CreateScalarIndex<int32_t>(
index_type, file_manager, space, data_type);
return CreateScalarIndex<int32_t>(index_type, file_manager, space);
case DataType::INT64:
return CreateScalarIndex<int64_t>(
index_type, file_manager, space, data_type);
return CreateScalarIndex<int64_t>(index_type, file_manager, space);
case DataType::FLOAT:
return CreateScalarIndex<float>(
index_type, file_manager, space, data_type);
return CreateScalarIndex<float>(index_type, file_manager, space);
case DataType::DOUBLE:
return CreateScalarIndex<double>(
index_type, file_manager, space, data_type);
return CreateScalarIndex<double>(index_type, file_manager, space);

// create string index
case DataType::STRING:
case DataType::VARCHAR:
return CreateScalarIndex<std::string>(
index_type, file_manager, space, data_type);
index_type, file_manager, space);

Check warning on line 274 in internal/core/src/index/IndexFactory.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/index/IndexFactory.cpp#L274

Added line #L274 was not covered by tests
longjiquan marked this conversation as resolved.
Show resolved Hide resolved
default:
throw SegcoreError(
DataTypeInvalid,
Expand Down
Loading
Loading