From f1b2f7b6407e4d1dd6e379fbdba59d3621eb76a6 Mon Sep 17 00:00:00 2001 From: zhagnlu <1542303831@qq.com> Date: Thu, 18 Jul 2024 10:39:42 +0800 Subject: [PATCH] enhance: refactor bitmap index and internal hybrid index (#34450) #32900 Signed-off-by: luzhang Co-authored-by: luzhang --- internal/core/src/index/HybridScalarIndex.cpp | 10 ++-- internal/core/src/index/IndexFactory.cpp | 26 +++++++--- .../core/src/index/InvertedIndexTantivy.cpp | 24 +++++++--- .../core/src/index/InvertedIndexTantivy.h | 8 ++-- internal/core/src/index/Meta.h | 1 + .../core/src/storage/DiskFileManagerImpl.h | 6 +++ .../core/unittest/test_array_bitmap_index.cpp | 42 ++++++++++++++-- internal/core/unittest/test_hybrid_index.cpp | 4 +- internal/proxy/task_index.go | 48 ++++++++++++++----- internal/proxy/task_index_test.go | 19 +++++--- pkg/common/common.go | 2 +- .../indexparamcheck/bitmap_checker_test.go | 4 -- .../indexparamcheck/bitmap_index_checker.go | 5 -- pkg/util/indexparamcheck/conf_adapter_mgr.go | 1 + .../indexparamcheck/hybrid_checker_test.go | 36 ++++++++++++++ .../indexparamcheck/hybrid_index_checker.go | 46 ++++++++++++++++++ pkg/util/indexparamcheck/index_type.go | 1 + pkg/util/paramtable/autoindex_param.go | 29 ++++++++++- pkg/util/typeutil/schema.go | 4 ++ 19 files changed, 258 insertions(+), 58 deletions(-) create mode 100644 pkg/util/indexparamcheck/hybrid_checker_test.go create mode 100644 pkg/util/indexparamcheck/hybrid_index_checker.go diff --git a/internal/core/src/index/HybridScalarIndex.cpp b/internal/core/src/index/HybridScalarIndex.cpp index f943798f3950e..4a7a38666523a 100644 --- a/internal/core/src/index/HybridScalarIndex.cpp +++ b/internal/core/src/index/HybridScalarIndex.cpp @@ -244,7 +244,7 @@ void HybridScalarIndex::BuildInternal( const std::vector& field_datas) { auto index = GetInternalIndex(); - LOG_INFO("build bitmap index with internal index:{}", + LOG_INFO("build hybrid index with internal index:{}", ToString(internal_index_type_)); index->BuildWithFieldData(field_datas); } @@ -406,7 +406,7 @@ HybridScalarIndex::Load(const BinarySet& binary_set, const Config& config) { DeserializeIndexType(binary_set); auto index = GetInternalIndex(); - LOG_INFO("load bitmap index with internal index:{}", + LOG_INFO("load hybrid index with internal index:{}", ToString(internal_index_type_)); index->Load(binary_set, config); @@ -420,7 +420,7 @@ HybridScalarIndex::Load(milvus::tracer::TraceContext ctx, auto index_files = GetValueFromConfig>(config, "index_files"); AssertInfo(index_files.has_value(), - "index file paths is empty when load bitmap index"); + "index file paths is empty when load hybrid index"); auto index_type_file = GetRemoteIndexTypeFile(index_files.value()); @@ -439,7 +439,7 @@ HybridScalarIndex::Load(milvus::tracer::TraceContext ctx, DeserializeIndexType(binary_set); auto index = GetInternalIndex(); - LOG_INFO("load bitmap index with internal index:{}", + LOG_INFO("load hybrid index with internal index:{}", ToString(internal_index_type_)); index->Load(ctx, config); @@ -456,4 +456,4 @@ template class HybridScalarIndex; template class HybridScalarIndex; } // namespace index -} // namespace milvus \ No newline at end of file +} // namespace milvus diff --git a/internal/core/src/index/IndexFactory.cpp b/internal/core/src/index/IndexFactory.cpp index d34ea3b03fd13..cb5656d9eb86b 100644 --- a/internal/core/src/index/IndexFactory.cpp +++ b/internal/core/src/index/IndexFactory.cpp @@ -40,6 +40,9 @@ IndexFactory::CreatePrimitiveScalarIndex( return std::make_unique>(file_manager_context); } if (index_type == BITMAP_INDEX_TYPE) { + return std::make_unique>(file_manager_context); + } + if (index_type == HYBRID_INDEX_TYPE) { return std::make_unique>(file_manager_context); } return CreateScalarIndexSort(file_manager_context); @@ -63,6 +66,9 @@ IndexFactory::CreatePrimitiveScalarIndex( file_manager_context); } if (index_type == BITMAP_INDEX_TYPE) { + return std::make_unique>(file_manager_context); + } + if (index_type == HYBRID_INDEX_TYPE) { return std::make_unique>( file_manager_context); } @@ -83,6 +89,9 @@ IndexFactory::CreatePrimitiveScalarIndex( space); } if (index_type == BITMAP_INDEX_TYPE) { + return std::make_unique>(file_manager_context, space); + } + if (index_type == HYBRID_INDEX_TYPE) { return std::make_unique>(file_manager_context, space); } @@ -101,6 +110,10 @@ IndexFactory::CreatePrimitiveScalarIndex( file_manager_context, space); } if (index_type == BITMAP_INDEX_TYPE) { + return std::make_unique>(file_manager_context, + space); + } + if (index_type == HYBRID_INDEX_TYPE) { return std::make_unique>( file_manager_context, space); } @@ -179,16 +192,17 @@ IndexBasePtr IndexFactory::CreateCompositeScalarIndex( IndexType index_type, const storage::FileManagerContext& file_manager_context) { - if (index_type == BITMAP_INDEX_TYPE) { - auto element_type = static_cast( - file_manager_context.fieldDataMeta.field_schema.element_type()); - return CreatePrimitiveScalarIndex( - element_type, index_type, file_manager_context); - } else if (index_type == INVERTED_INDEX_TYPE) { + if (index_type == HYBRID_INDEX_TYPE || index_type == BITMAP_INDEX_TYPE || + index_type == INVERTED_INDEX_TYPE) { auto element_type = static_cast( file_manager_context.fieldDataMeta.field_schema.element_type()); return CreatePrimitiveScalarIndex( element_type, index_type, file_manager_context); + } else { + PanicInfo( + Unsupported, + fmt::format("index type: {} for composite scalar not supported now", + index_type)); } } diff --git a/internal/core/src/index/InvertedIndexTantivy.cpp b/internal/core/src/index/InvertedIndexTantivy.cpp index 0ee288f5599cc..6d4e4e6d1b8a7 100644 --- a/internal/core/src/index/InvertedIndexTantivy.cpp +++ b/internal/core/src/index/InvertedIndexTantivy.cpp @@ -42,6 +42,7 @@ get_tantivy_data_type(proto::schema::DataType data_type) { return TantivyDataType::F64; } + case proto::schema::DataType::String: case proto::schema::DataType::VarChar: { return TantivyDataType::Keyword; } @@ -152,7 +153,7 @@ InvertedIndexTantivy::Build(const Config& config) { AssertInfo(insert_files.has_value(), "insert_files were empty"); auto field_datas = mem_file_manager_->CacheRawDataToMemory(insert_files.value()); - build_index(field_datas); + BuildWithFieldData(field_datas); } template @@ -173,7 +174,7 @@ InvertedIndexTantivy::BuildV2(const Config& config) { field_data->FillFieldData(col_data); field_datas.push_back(field_data); } - build_index(field_datas); + BuildWithFieldData(field_datas); } template @@ -185,7 +186,17 @@ InvertedIndexTantivy::Load(milvus::tracer::TraceContext ctx, AssertInfo(index_files.has_value(), "index file paths is empty when load disk ann index data"); auto prefix = disk_file_manager_->GetLocalIndexObjectPrefix(); - disk_file_manager_->CacheIndexToDisk(index_files.value()); + auto files_value = index_files.value(); + // need erase the index type file that has been readed + auto index_type_file = + disk_file_manager_->GetRemoteIndexPrefix() + std::string("/index_type"); + files_value.erase(std::remove_if(files_value.begin(), + files_value.end(), + [&](const std::string& file) { + return file == index_type_file; + }), + files_value.end()); + disk_file_manager_->CacheIndexToDisk(files_value); wrapper_ = std::make_shared(prefix.c_str()); } @@ -398,7 +409,7 @@ InvertedIndexTantivy::BuildWithRawData(size_t n, template void -InvertedIndexTantivy::build_index( +InvertedIndexTantivy::BuildWithFieldData( const std::vector>& field_datas) { switch (schema_.data_type()) { case proto::schema::DataType::Bool: @@ -454,8 +465,9 @@ InvertedIndexTantivy::build_index_for_array( auto n = data->get_num_rows(); auto array_column = static_cast(data->Data()); for (int64_t i = 0; i < n; i++) { - assert(array_column[i].get_element_type() == - static_cast(schema_.element_type())); + Assert(IsStringDataType(array_column[i].get_element_type())); + Assert(IsStringDataType( + static_cast(schema_.element_type()))); std::vector output; for (int64_t j = 0; j < array_column[i].length(); j++) { output.push_back( diff --git a/internal/core/src/index/InvertedIndexTantivy.h b/internal/core/src/index/InvertedIndexTantivy.h index e3869809a50ee..faac636df24e7 100644 --- a/internal/core/src/index/InvertedIndexTantivy.h +++ b/internal/core/src/index/InvertedIndexTantivy.h @@ -74,7 +74,7 @@ class InvertedIndexTantivy : public ScalarIndex { GetIndexType() const override { return ScalarIndexType::INVERTED; } - + void Build(const Config& config = {}) override; @@ -170,12 +170,12 @@ class InvertedIndexTantivy : public ScalarIndex { const TargetBitmap RegexQuery(const std::string& pattern) override; - private: void - finish(); + BuildWithFieldData(const std::vector& datas) override; + private: void - build_index(const std::vector>& field_datas); + finish(); void build_index_for_array( diff --git a/internal/core/src/index/Meta.h b/internal/core/src/index/Meta.h index f1a01231b8825..1d427eb1debe6 100644 --- a/internal/core/src/index/Meta.h +++ b/internal/core/src/index/Meta.h @@ -44,6 +44,7 @@ constexpr const char* ASCENDING_SORT = "STL_SORT"; constexpr const char* MARISA_TRIE = "Trie"; constexpr const char* INVERTED_INDEX_TYPE = "INVERTED"; constexpr const char* BITMAP_INDEX_TYPE = "BITMAP"; +constexpr const char* HYBRID_INDEX_TYPE = "HYBRID"; // index meta constexpr const char* COLLECTION_ID = "collection_id"; diff --git a/internal/core/src/storage/DiskFileManagerImpl.h b/internal/core/src/storage/DiskFileManagerImpl.h index 9a6b27d591e69..b059f8399dfc2 100644 --- a/internal/core/src/storage/DiskFileManagerImpl.h +++ b/internal/core/src/storage/DiskFileManagerImpl.h @@ -117,6 +117,12 @@ class DiskFileManagerImpl : public FileManagerImpl { const std::vector& remote_files, const std::vector& remote_file_sizes); + std::string + GetRemoteIndexPrefix() const { + return space_ != nullptr ? GetRemoteIndexObjectPrefixV2() + : GetRemoteIndexObjectPrefix(); + } + private: int64_t GetIndexBuildId() { diff --git a/internal/core/unittest/test_array_bitmap_index.cpp b/internal/core/unittest/test_array_bitmap_index.cpp index e1f58123777ea..78bf6fbcf1bbb 100644 --- a/internal/core/unittest/test_array_bitmap_index.cpp +++ b/internal/core/unittest/test_array_bitmap_index.cpp @@ -208,9 +208,9 @@ class ArrayBitmapIndexTest : public testing::Test { std::vector index_files; Config config; - config["index_type"] = milvus::index::BITMAP_INDEX_TYPE; + config["index_type"] = milvus::index::HYBRID_INDEX_TYPE; config["insert_files"] = std::vector{log_path}; - config["bitmap_cardinality_limit"] = "1000"; + config["bitmap_cardinality_limit"] = "100"; auto build_index = indexbuilder::IndexFactory::GetInstance().CreateIndex( @@ -223,7 +223,7 @@ class ArrayBitmapIndexTest : public testing::Test { } index::CreateIndexInfo index_info{}; - index_info.index_type = milvus::index::BITMAP_INDEX_TYPE; + index_info.index_type = milvus::index::HYBRID_INDEX_TYPE; index_info.field_type = DataType::ARRAY; config["index_files"] = index_files; @@ -233,11 +233,15 @@ class ArrayBitmapIndexTest : public testing::Test { index_->Load(milvus::tracer::TraceContext{}, config); } - void - SetUp() override { + virtual void + SetParam() { nb_ = 10000; cardinality_ = 30; + } + void + SetUp() override { + SetParam(); // if constexpr (std::is_same_v) { // type_ = DataType::INT8; // } else if constexpr (std::is_same_v) { @@ -338,3 +342,31 @@ REGISTER_TYPED_TEST_SUITE_P(ArrayBitmapIndexTest, INSTANTIATE_TYPED_TEST_SUITE_P(ArrayBitmapE2ECheck, ArrayBitmapIndexTest, BitmapType); + +template +class ArrayBitmapIndexTestV1 : public ArrayBitmapIndexTest { + public: + virtual void + SetParam() override { + this->nb_ = 10000; + this->cardinality_ = 200; + } + + virtual ~ArrayBitmapIndexTestV1() { + } +}; + +TYPED_TEST_SUITE_P(ArrayBitmapIndexTestV1); + +TYPED_TEST_P(ArrayBitmapIndexTestV1, CountFuncTest) { + auto count = this->index_->Count(); + EXPECT_EQ(count, this->nb_); +} + +using BitmapTypeV1 = testing::Types; + +REGISTER_TYPED_TEST_SUITE_P(ArrayBitmapIndexTestV1, CountFuncTest); + +INSTANTIATE_TYPED_TEST_SUITE_P(ArrayBitmapE2ECheckV1, + ArrayBitmapIndexTestV1, + BitmapTypeV1); \ No newline at end of file diff --git a/internal/core/unittest/test_hybrid_index.cpp b/internal/core/unittest/test_hybrid_index.cpp index 1f6ea6aef8fbb..b4a8c6811d33d 100644 --- a/internal/core/unittest/test_hybrid_index.cpp +++ b/internal/core/unittest/test_hybrid_index.cpp @@ -120,7 +120,7 @@ class HybridIndexTestV1 : public testing::Test { std::vector index_files; Config config; - config["index_type"] = milvus::index::BITMAP_INDEX_TYPE; + config["index_type"] = milvus::index::HYBRID_INDEX_TYPE; config["insert_files"] = std::vector{log_path}; config["bitmap_cardinality_limit"] = "1000"; @@ -135,7 +135,7 @@ class HybridIndexTestV1 : public testing::Test { } index::CreateIndexInfo index_info{}; - index_info.index_type = milvus::index::BITMAP_INDEX_TYPE; + index_info.index_type = milvus::index::HYBRID_INDEX_TYPE; index_info.field_type = type_; config["index_files"] = index_files; diff --git a/internal/proxy/task_index.go b/internal/proxy/task_index.go index 7eb5284496538..b860bf12da171 100644 --- a/internal/proxy/task_index.go +++ b/internal/proxy/task_index.go @@ -149,8 +149,9 @@ func (cit *createIndexTask) parseIndexParams() error { specifyIndexType, exist := indexParamsMap[common.IndexTypeKey] if exist && specifyIndexType != "" { - _, err := indexparamcheck.GetIndexCheckerMgrInstance().GetChecker(specifyIndexType) - if err != nil { + checker, err := indexparamcheck.GetIndexCheckerMgrInstance().GetChecker(specifyIndexType) + // not enable hybrid index for user, used in milvus internally + if err != nil || indexparamcheck.IsHYBRIDChecker(checker) { log.Ctx(cit.ctx).Warn("Failed to get index checker", zap.String(common.IndexTypeKey, specifyIndexType)) return merr.WrapErrParameterInvalid("valid index", fmt.Sprintf("invalid index type: %s", specifyIndexType)) } @@ -158,17 +159,38 @@ func (cit *createIndexTask) parseIndexParams() error { if !isVecIndex { specifyIndexType, exist := indexParamsMap[common.IndexTypeKey] - if Params.AutoIndexConfig.ScalarAutoIndexEnable.GetAsBool() || specifyIndexType == AutoIndexName || !exist { - if typeutil.IsArithmetic(cit.fieldSchema.DataType) { - indexParamsMap[common.IndexTypeKey] = Params.AutoIndexConfig.ScalarNumericIndexType.GetValue() - } else if typeutil.IsStringType(cit.fieldSchema.DataType) { - indexParamsMap[common.IndexTypeKey] = Params.AutoIndexConfig.ScalarVarcharIndexType.GetValue() - } else if typeutil.IsBoolType(cit.fieldSchema.DataType) { - indexParamsMap[common.IndexTypeKey] = Params.AutoIndexConfig.ScalarBoolIndexType.GetValue() - } else { - return merr.WrapErrParameterInvalid("supported field", - fmt.Sprintf("create auto index on %s field is not supported", cit.fieldSchema.DataType.String())) + autoIndexEnable := Params.AutoIndexConfig.ScalarAutoIndexEnable.GetAsBool() + + if autoIndexEnable || !exist || specifyIndexType == AutoIndexName { + getPrimitiveIndexType := func(dataType schemapb.DataType) string { + if typeutil.IsBoolType(dataType) { + return Params.AutoIndexConfig.ScalarBoolIndexType.GetValue() + } else if typeutil.IsIntegerType(dataType) { + return Params.AutoIndexConfig.ScalarIntIndexType.GetValue() + } else if typeutil.IsFloatingType(dataType) { + return Params.AutoIndexConfig.ScalarFloatIndexType.GetValue() + } else { + return Params.AutoIndexConfig.ScalarVarcharIndexType.GetValue() + } } + + indexType, err := func() (string, error) { + dataType := cit.fieldSchema.DataType + if typeutil.IsPrimitiveType(dataType) { + return getPrimitiveIndexType(dataType), nil + } else if typeutil.IsArrayType(dataType) { + return getPrimitiveIndexType(cit.fieldSchema.ElementType), nil + } else { + return "", fmt.Errorf("create auto index on type:%s is not supported", dataType.String()) + } + }() + + if err != nil { + return merr.WrapErrParameterInvalid("supported field", err.Error()) + } + + indexParamsMap[common.IndexTypeKey] = indexType + } } else { specifyIndexType, exist := indexParamsMap[common.IndexTypeKey] @@ -368,7 +390,7 @@ func fillDimension(field *schemapb.FieldSchema, indexParams map[string]string) e func checkTrain(field *schemapb.FieldSchema, indexParams map[string]string) error { indexType := indexParams[common.IndexTypeKey] - if indexType == indexparamcheck.IndexBitmap { + if indexType == indexparamcheck.IndexHybrid { _, exist := indexParams[common.BitmapCardinalityLimitKey] if !exist { indexParams[common.BitmapCardinalityLimitKey] = paramtable.Get().CommonCfg.BitmapIndexCardinalityBound.GetValue() diff --git a/internal/proxy/task_index_test.go b/internal/proxy/task_index_test.go index 9976ffa8fb8c6..ae74096c66fb2 100644 --- a/internal/proxy/task_index_test.go +++ b/internal/proxy/task_index_test.go @@ -20,6 +20,7 @@ import ( "context" "encoding/json" "os" + "strconv" "testing" "github.com/cockroachdb/errors" @@ -36,6 +37,7 @@ import ( "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/config" "github.com/milvus-io/milvus/pkg/util/funcutil" + "github.com/milvus-io/milvus/pkg/util/indexparamcheck" "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/paramtable" "github.com/milvus-io/milvus/pkg/util/typeutil" @@ -577,7 +579,7 @@ func Test_parseIndexParams(t *testing.T) { ExtraParams: []*commonpb.KeyValuePair{ { Key: common.IndexTypeKey, - Value: DefaultStringIndexType, + Value: indexparamcheck.IndexINVERTED, }, }, IndexName: "", @@ -608,7 +610,8 @@ func Test_parseIndexParams(t *testing.T) { } err := cit.parseIndexParams() assert.NoError(t, err) - assert.Equal(t, cit.newIndexParams, []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: DefaultStringIndexType}}) + assert.Equal(t, cit.newIndexParams, []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: indexparamcheck.IndexHybrid}, + {Key: common.BitmapCardinalityLimitKey, Value: strconv.Itoa(paramtable.DefaultBitmapIndexCardinalityBound)}}) }) t.Run("create index on Arithmetic field", func(t *testing.T) { @@ -648,7 +651,8 @@ func Test_parseIndexParams(t *testing.T) { } err := cit.parseIndexParams() assert.NoError(t, err) - assert.Equal(t, cit.newIndexParams, []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: DefaultArithmeticIndexType}}) + assert.Equal(t, cit.newIndexParams, []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: indexparamcheck.IndexHybrid}, + {Key: common.BitmapCardinalityLimitKey, Value: strconv.Itoa(paramtable.DefaultBitmapIndexCardinalityBound)}}) }) // Compatible with the old version <= 2.3.0 @@ -873,7 +877,8 @@ func Test_parseIndexParams(t *testing.T) { err = cit.parseIndexParams() assert.NoError(t, err) - assert.Equal(t, cit.newIndexParams, []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: DefaultArithmeticIndexType}}) + assert.Equal(t, cit.newIndexParams, []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: indexparamcheck.IndexHybrid}, + {Key: common.BitmapCardinalityLimitKey, Value: strconv.Itoa(paramtable.DefaultBitmapIndexCardinalityBound)}}) }) t.Run("create auto index on numeric field", func(t *testing.T) { @@ -899,7 +904,8 @@ func Test_parseIndexParams(t *testing.T) { err := cit.parseIndexParams() assert.NoError(t, err) - assert.Equal(t, cit.newIndexParams, []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: DefaultArithmeticIndexType}}) + assert.Equal(t, cit.newIndexParams, []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: indexparamcheck.IndexHybrid}, + {Key: common.BitmapCardinalityLimitKey, Value: strconv.Itoa(paramtable.DefaultBitmapIndexCardinalityBound)}}) }) t.Run("create auto index on varchar field", func(t *testing.T) { @@ -925,7 +931,8 @@ func Test_parseIndexParams(t *testing.T) { err := cit.parseIndexParams() assert.NoError(t, err) - assert.Equal(t, cit.newIndexParams, []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: DefaultStringIndexType}}) + assert.Equal(t, cit.newIndexParams, []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: indexparamcheck.IndexHybrid}, + {Key: common.BitmapCardinalityLimitKey, Value: strconv.Itoa(paramtable.DefaultBitmapIndexCardinalityBound)}}) }) t.Run("create auto index on json field", func(t *testing.T) { diff --git a/pkg/common/common.go b/pkg/common/common.go index bf51e769294ae..b5a0bf3cb8bd7 100644 --- a/pkg/common/common.go +++ b/pkg/common/common.go @@ -121,9 +121,9 @@ const ( DropRatioBuildKey = "drop_ratio_build" - BitmapCardinalityLimitKey = "bitmap_cardinality_limit" IsSparseKey = "is_sparse" AutoIndexName = "AUTOINDEX" + BitmapCardinalityLimitKey = "bitmap_cardinality_limit" ) // Collection properties key diff --git a/pkg/util/indexparamcheck/bitmap_checker_test.go b/pkg/util/indexparamcheck/bitmap_checker_test.go index 5d76b3a586f14..6bf134854a491 100644 --- a/pkg/util/indexparamcheck/bitmap_checker_test.go +++ b/pkg/util/indexparamcheck/bitmap_checker_test.go @@ -11,8 +11,6 @@ import ( func Test_BitmapIndexChecker(t *testing.T) { c := newBITMAPChecker() - assert.NoError(t, c.CheckTrain(map[string]string{"bitmap_cardinality_limit": "100"})) - assert.NoError(t, c.CheckValidDataType(&schemapb.FieldSchema{DataType: schemapb.DataType_Bool})) assert.NoError(t, c.CheckValidDataType(&schemapb.FieldSchema{DataType: schemapb.DataType_Int8})) assert.NoError(t, c.CheckValidDataType(&schemapb.FieldSchema{DataType: schemapb.DataType_Int16})) @@ -31,6 +29,4 @@ func Test_BitmapIndexChecker(t *testing.T) { assert.Error(t, c.CheckValidDataType(&schemapb.FieldSchema{DataType: schemapb.DataType_Double})) assert.Error(t, c.CheckValidDataType(&schemapb.FieldSchema{DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Float})) assert.Error(t, c.CheckValidDataType(&schemapb.FieldSchema{DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Double})) - assert.Error(t, c.CheckTrain(map[string]string{})) - assert.Error(t, c.CheckTrain(map[string]string{"bitmap_cardinality_limit": "0"})) } diff --git a/pkg/util/indexparamcheck/bitmap_index_checker.go b/pkg/util/indexparamcheck/bitmap_index_checker.go index 9425557eff3ec..f19b472baa7e9 100644 --- a/pkg/util/indexparamcheck/bitmap_index_checker.go +++ b/pkg/util/indexparamcheck/bitmap_index_checker.go @@ -2,10 +2,8 @@ package indexparamcheck import ( "fmt" - "math" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" - "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/util/typeutil" ) @@ -14,9 +12,6 @@ type BITMAPChecker struct { } func (c *BITMAPChecker) CheckTrain(params map[string]string) error { - if !CheckIntByRange(params, common.BitmapCardinalityLimitKey, 1, math.MaxInt) { - return fmt.Errorf("failed to check bitmap cardinality limit, should be larger than 0 and smaller than math.MaxInt") - } return c.scalarIndexChecker.CheckTrain(params) } diff --git a/pkg/util/indexparamcheck/conf_adapter_mgr.go b/pkg/util/indexparamcheck/conf_adapter_mgr.go index d79196f72a619..2ff7320c9b3a2 100644 --- a/pkg/util/indexparamcheck/conf_adapter_mgr.go +++ b/pkg/util/indexparamcheck/conf_adapter_mgr.go @@ -66,6 +66,7 @@ func (mgr *indexCheckerMgrImpl) registerIndexChecker() { mgr.checkers[IndexTRIE] = newTRIEChecker() mgr.checkers[IndexTrie] = newTRIEChecker() mgr.checkers[IndexBitmap] = newBITMAPChecker() + mgr.checkers[IndexHybrid] = newHYBRIDChecker() mgr.checkers["marisa-trie"] = newTRIEChecker() mgr.checkers[AutoIndex] = newAUTOINDEXChecker() } diff --git a/pkg/util/indexparamcheck/hybrid_checker_test.go b/pkg/util/indexparamcheck/hybrid_checker_test.go new file mode 100644 index 0000000000000..e418f12c5eee1 --- /dev/null +++ b/pkg/util/indexparamcheck/hybrid_checker_test.go @@ -0,0 +1,36 @@ +package indexparamcheck + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" +) + +func Test_HybridIndexChecker(t *testing.T) { + c := newHYBRIDChecker() + + assert.NoError(t, c.CheckTrain(map[string]string{"bitmap_cardinality_limit": "100"})) + + assert.NoError(t, c.CheckValidDataType(&schemapb.FieldSchema{DataType: schemapb.DataType_Bool})) + assert.NoError(t, c.CheckValidDataType(&schemapb.FieldSchema{DataType: schemapb.DataType_Int8})) + assert.NoError(t, c.CheckValidDataType(&schemapb.FieldSchema{DataType: schemapb.DataType_Int16})) + assert.NoError(t, c.CheckValidDataType(&schemapb.FieldSchema{DataType: schemapb.DataType_Int32})) + assert.NoError(t, c.CheckValidDataType(&schemapb.FieldSchema{DataType: schemapb.DataType_Int64})) + assert.NoError(t, c.CheckValidDataType(&schemapb.FieldSchema{DataType: schemapb.DataType_String})) + assert.NoError(t, c.CheckValidDataType(&schemapb.FieldSchema{DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Bool})) + assert.NoError(t, c.CheckValidDataType(&schemapb.FieldSchema{DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Int8})) + assert.NoError(t, c.CheckValidDataType(&schemapb.FieldSchema{DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Int16})) + assert.NoError(t, c.CheckValidDataType(&schemapb.FieldSchema{DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Int32})) + assert.NoError(t, c.CheckValidDataType(&schemapb.FieldSchema{DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Int64})) + assert.NoError(t, c.CheckValidDataType(&schemapb.FieldSchema{DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_String})) + + assert.Error(t, c.CheckValidDataType(&schemapb.FieldSchema{DataType: schemapb.DataType_JSON})) + assert.Error(t, c.CheckValidDataType(&schemapb.FieldSchema{DataType: schemapb.DataType_Float})) + assert.Error(t, c.CheckValidDataType(&schemapb.FieldSchema{DataType: schemapb.DataType_Double})) + assert.Error(t, c.CheckValidDataType(&schemapb.FieldSchema{DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Float})) + assert.Error(t, c.CheckValidDataType(&schemapb.FieldSchema{DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Double})) + assert.Error(t, c.CheckTrain(map[string]string{})) + assert.Error(t, c.CheckTrain(map[string]string{"bitmap_cardinality_limit": "0"})) +} diff --git a/pkg/util/indexparamcheck/hybrid_index_checker.go b/pkg/util/indexparamcheck/hybrid_index_checker.go new file mode 100644 index 0000000000000..84e2366d141ef --- /dev/null +++ b/pkg/util/indexparamcheck/hybrid_index_checker.go @@ -0,0 +1,46 @@ +package indexparamcheck + +import ( + "fmt" + "math" + + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/pkg/common" + "github.com/milvus-io/milvus/pkg/util/typeutil" +) + +type HYBRIDChecker struct { + scalarIndexChecker +} + +func (c *HYBRIDChecker) CheckTrain(params map[string]string) error { + if !CheckIntByRange(params, common.BitmapCardinalityLimitKey, 1, math.MaxInt) { + return fmt.Errorf("failed to check bitmap cardinality limit, should be larger than 0 and smaller than math.MaxInt") + } + return c.scalarIndexChecker.CheckTrain(params) +} + +func (c *HYBRIDChecker) CheckValidDataType(field *schemapb.FieldSchema) error { + mainType := field.GetDataType() + elemType := field.GetElementType() + if !typeutil.IsBoolType(mainType) && !typeutil.IsIntegerType(mainType) && + !typeutil.IsStringType(mainType) && !typeutil.IsArrayType(mainType) { + return fmt.Errorf("hybrid index are only supported on bool, int, string and array field") + } + if typeutil.IsArrayType(mainType) { + if !typeutil.IsBoolType(elemType) && !typeutil.IsIntegerType(elemType) && + !typeutil.IsStringType(elemType) { + return fmt.Errorf("hybrid index are only supported on bool, int, string for array field") + } + } + return nil +} + +func newHYBRIDChecker() *HYBRIDChecker { + return &HYBRIDChecker{} +} + +func IsHYBRIDChecker(checker interface{}) bool { + _, ok := checker.(*HYBRIDChecker) + return ok +} diff --git a/pkg/util/indexparamcheck/index_type.go b/pkg/util/indexparamcheck/index_type.go index a20db560bfdb0..eef92ac9de835 100644 --- a/pkg/util/indexparamcheck/index_type.go +++ b/pkg/util/indexparamcheck/index_type.go @@ -38,6 +38,7 @@ const ( IndexTRIE IndexType = "TRIE" IndexTrie IndexType = "Trie" IndexBitmap IndexType = "BITMAP" + IndexHybrid IndexType = "HYBRID" AutoIndex IndexType = "AUTOINDEX" ) diff --git a/pkg/util/paramtable/autoindex_param.go b/pkg/util/paramtable/autoindex_param.go index 0607e6d30b27d..31df71a4a358d 100644 --- a/pkg/util/paramtable/autoindex_param.go +++ b/pkg/util/paramtable/autoindex_param.go @@ -46,8 +46,10 @@ type autoIndexConfig struct { ScalarAutoIndexEnable ParamItem `refreshable:"true"` ScalarAutoIndexParams ParamItem `refreshable:"true"` ScalarNumericIndexType ParamItem `refreshable:"true"` + ScalarIntIndexType ParamItem `refreshable:"true"` ScalarVarcharIndexType ParamItem `refreshable:"true"` ScalarBoolIndexType ParamItem `refreshable:"true"` + ScalarFloatIndexType ParamItem `refreshable:"true"` } func (p *autoIndexConfig) init(base *BaseTable) { @@ -152,10 +154,11 @@ func (p *autoIndexConfig) init(base *BaseTable) { p.ScalarAutoIndexParams = ParamItem{ Key: "scalarAutoIndex.params.build", Version: "2.4.0", - DefaultValue: `{"numeric": "INVERTED","varchar": "INVERTED","bool": "INVERTED"}`, + DefaultValue: `{"int": "HYBRID","varchar": "HYBRID","bool": "BITMAP", "float": "INVERTED"}`, } p.ScalarAutoIndexParams.Init(base.mgr) + // Deprecated param p.ScalarNumericIndexType = ParamItem{ Version: "2.4.0", Formatter: func(v string) string { @@ -168,6 +171,30 @@ func (p *autoIndexConfig) init(base *BaseTable) { } p.ScalarNumericIndexType.Init(base.mgr) + p.ScalarIntIndexType = ParamItem{ + Version: "2.5.0", + Formatter: func(v string) string { + m := p.ScalarAutoIndexParams.GetAsJSONMap() + if m == nil { + return "" + } + return m["int"] + }, + } + p.ScalarIntIndexType.Init(base.mgr) + + p.ScalarFloatIndexType = ParamItem{ + Version: "2.5.0", + Formatter: func(v string) string { + m := p.ScalarAutoIndexParams.GetAsJSONMap() + if m == nil { + return "" + } + return m["float"] + }, + } + p.ScalarFloatIndexType.Init(base.mgr) + p.ScalarVarcharIndexType = ParamItem{ Version: "2.4.0", Formatter: func(v string) string { diff --git a/pkg/util/typeutil/schema.go b/pkg/util/typeutil/schema.go index 9864972a2c9f8..dde76212e4a17 100644 --- a/pkg/util/typeutil/schema.go +++ b/pkg/util/typeutil/schema.go @@ -463,6 +463,10 @@ func IsVariableDataType(dataType schemapb.DataType) bool { return IsStringType(dataType) || IsArrayType(dataType) || IsJSONType(dataType) } +func IsPrimitiveType(dataType schemapb.DataType) bool { + return IsArithmetic(dataType) || IsStringType(dataType) || IsBoolType(dataType) +} + // PrepareResultFieldData construct this slice fo FieldData for final result reduce // this shall preallocate the space for field data internal slice prevent slice growing cost. func PrepareResultFieldData(sample []*schemapb.FieldData, topK int64) []*schemapb.FieldData {