diff --git a/internal/core/src/index/HybridScalarIndex.cpp b/internal/core/src/index/HybridScalarIndex.cpp index f943798f3950e..4a7a38666523a 100644 --- a/internal/core/src/index/HybridScalarIndex.cpp +++ b/internal/core/src/index/HybridScalarIndex.cpp @@ -244,7 +244,7 @@ void HybridScalarIndex::BuildInternal( const std::vector& field_datas) { auto index = GetInternalIndex(); - LOG_INFO("build bitmap index with internal index:{}", + LOG_INFO("build hybrid index with internal index:{}", ToString(internal_index_type_)); index->BuildWithFieldData(field_datas); } @@ -406,7 +406,7 @@ HybridScalarIndex::Load(const BinarySet& binary_set, const Config& config) { DeserializeIndexType(binary_set); auto index = GetInternalIndex(); - LOG_INFO("load bitmap index with internal index:{}", + LOG_INFO("load hybrid index with internal index:{}", ToString(internal_index_type_)); index->Load(binary_set, config); @@ -420,7 +420,7 @@ HybridScalarIndex::Load(milvus::tracer::TraceContext ctx, auto index_files = GetValueFromConfig>(config, "index_files"); AssertInfo(index_files.has_value(), - "index file paths is empty when load bitmap index"); + "index file paths is empty when load hybrid index"); auto index_type_file = GetRemoteIndexTypeFile(index_files.value()); @@ -439,7 +439,7 @@ HybridScalarIndex::Load(milvus::tracer::TraceContext ctx, DeserializeIndexType(binary_set); auto index = GetInternalIndex(); - LOG_INFO("load bitmap index with internal index:{}", + LOG_INFO("load hybrid index with internal index:{}", ToString(internal_index_type_)); index->Load(ctx, config); @@ -456,4 +456,4 @@ template class HybridScalarIndex; template class HybridScalarIndex; } // namespace index -} // namespace milvus \ No newline at end of file +} // namespace milvus diff --git a/internal/core/src/index/IndexFactory.cpp b/internal/core/src/index/IndexFactory.cpp index d34ea3b03fd13..12a828189866f 100644 --- a/internal/core/src/index/IndexFactory.cpp +++ b/internal/core/src/index/IndexFactory.cpp @@ -39,7 +39,7 @@ IndexFactory::CreatePrimitiveScalarIndex( if (index_type == INVERTED_INDEX_TYPE) { return std::make_unique>(file_manager_context); } - if (index_type == BITMAP_INDEX_TYPE) { + if (index_type == HYBRID_INDEX_TYPE) { return std::make_unique>(file_manager_context); } return CreateScalarIndexSort(file_manager_context); @@ -62,7 +62,7 @@ IndexFactory::CreatePrimitiveScalarIndex( return std::make_unique>( file_manager_context); } - if (index_type == BITMAP_INDEX_TYPE) { + if (index_type == HYBRID_INDEX_TYPE) { return std::make_unique>( file_manager_context); } @@ -82,7 +82,7 @@ IndexFactory::CreatePrimitiveScalarIndex( return std::make_unique>(file_manager_context, space); } - if (index_type == BITMAP_INDEX_TYPE) { + if (index_type == HYBRID_INDEX_TYPE) { return std::make_unique>(file_manager_context, space); } @@ -100,7 +100,7 @@ IndexFactory::CreatePrimitiveScalarIndex( return std::make_unique>( file_manager_context, space); } - if (index_type == BITMAP_INDEX_TYPE) { + if (index_type == HYBRID_INDEX_TYPE) { return std::make_unique>( file_manager_context, space); } @@ -179,7 +179,7 @@ IndexBasePtr IndexFactory::CreateCompositeScalarIndex( IndexType index_type, const storage::FileManagerContext& file_manager_context) { - if (index_type == BITMAP_INDEX_TYPE) { + if (index_type == HYBRID_INDEX_TYPE) { auto element_type = static_cast( file_manager_context.fieldDataMeta.field_schema.element_type()); return CreatePrimitiveScalarIndex( @@ -189,6 +189,11 @@ IndexFactory::CreateCompositeScalarIndex( file_manager_context.fieldDataMeta.field_schema.element_type()); return CreatePrimitiveScalarIndex( element_type, index_type, file_manager_context); + } else { + PanicInfo( + Unsupported, + fmt::format("index type: {} for composite scalar not supported now", + index_type)); } } diff --git a/internal/core/src/index/InvertedIndexTantivy.cpp b/internal/core/src/index/InvertedIndexTantivy.cpp index 0ee288f5599cc..c3f934f542009 100644 --- a/internal/core/src/index/InvertedIndexTantivy.cpp +++ b/internal/core/src/index/InvertedIndexTantivy.cpp @@ -42,6 +42,7 @@ get_tantivy_data_type(proto::schema::DataType data_type) { return TantivyDataType::F64; } + case proto::schema::DataType::String: case proto::schema::DataType::VarChar: { return TantivyDataType::Keyword; } @@ -152,7 +153,7 @@ InvertedIndexTantivy::Build(const Config& config) { AssertInfo(insert_files.has_value(), "insert_files were empty"); auto field_datas = mem_file_manager_->CacheRawDataToMemory(insert_files.value()); - build_index(field_datas); + BuildWithFieldData(field_datas); } template @@ -173,7 +174,7 @@ InvertedIndexTantivy::BuildV2(const Config& config) { field_data->FillFieldData(col_data); field_datas.push_back(field_data); } - build_index(field_datas); + BuildWithFieldData(field_datas); } template @@ -185,7 +186,17 @@ InvertedIndexTantivy::Load(milvus::tracer::TraceContext ctx, AssertInfo(index_files.has_value(), "index file paths is empty when load disk ann index data"); auto prefix = disk_file_manager_->GetLocalIndexObjectPrefix(); - disk_file_manager_->CacheIndexToDisk(index_files.value()); + auto files_value = index_files.value(); + // need erase the index type file that has been readed + auto index_type_file = + disk_file_manager_->GetRemoteIndexPrefix() + std::string("/index_type"); + files_value.erase(std::remove_if(files_value.begin(), + files_value.end(), + [&](const std::string& file) { + return file == index_type_file; + }), + files_value.end()); + disk_file_manager_->CacheIndexToDisk(files_value); wrapper_ = std::make_shared(prefix.c_str()); } @@ -398,7 +409,7 @@ InvertedIndexTantivy::BuildWithRawData(size_t n, template void -InvertedIndexTantivy::build_index( +InvertedIndexTantivy::BuildWithFieldData( const std::vector>& field_datas) { switch (schema_.data_type()) { case proto::schema::DataType::Bool: @@ -454,7 +465,7 @@ InvertedIndexTantivy::build_index_for_array( auto n = data->get_num_rows(); auto array_column = static_cast(data->Data()); for (int64_t i = 0; i < n; i++) { - assert(array_column[i].get_element_type() == + Assert(array_column[i].get_element_type() == static_cast(schema_.element_type())); std::vector output; for (int64_t j = 0; j < array_column[i].length(); j++) { diff --git a/internal/core/src/index/InvertedIndexTantivy.h b/internal/core/src/index/InvertedIndexTantivy.h index e3869809a50ee..faac636df24e7 100644 --- a/internal/core/src/index/InvertedIndexTantivy.h +++ b/internal/core/src/index/InvertedIndexTantivy.h @@ -74,7 +74,7 @@ class InvertedIndexTantivy : public ScalarIndex { GetIndexType() const override { return ScalarIndexType::INVERTED; } - + void Build(const Config& config = {}) override; @@ -170,12 +170,12 @@ class InvertedIndexTantivy : public ScalarIndex { const TargetBitmap RegexQuery(const std::string& pattern) override; - private: void - finish(); + BuildWithFieldData(const std::vector& datas) override; + private: void - build_index(const std::vector>& field_datas); + finish(); void build_index_for_array( diff --git a/internal/core/src/index/Meta.h b/internal/core/src/index/Meta.h index f1a01231b8825..f37f77e100285 100644 --- a/internal/core/src/index/Meta.h +++ b/internal/core/src/index/Meta.h @@ -43,7 +43,7 @@ constexpr const char* METRIC_TYPE = "metric_type"; constexpr const char* ASCENDING_SORT = "STL_SORT"; constexpr const char* MARISA_TRIE = "Trie"; constexpr const char* INVERTED_INDEX_TYPE = "INVERTED"; -constexpr const char* BITMAP_INDEX_TYPE = "BITMAP"; +constexpr const char* HYBRID_INDEX_TYPE = "HYBRID"; // index meta constexpr const char* COLLECTION_ID = "collection_id"; diff --git a/internal/core/src/storage/DiskFileManagerImpl.h b/internal/core/src/storage/DiskFileManagerImpl.h index 9a6b27d591e69..b059f8399dfc2 100644 --- a/internal/core/src/storage/DiskFileManagerImpl.h +++ b/internal/core/src/storage/DiskFileManagerImpl.h @@ -117,6 +117,12 @@ class DiskFileManagerImpl : public FileManagerImpl { const std::vector& remote_files, const std::vector& remote_file_sizes); + std::string + GetRemoteIndexPrefix() const { + return space_ != nullptr ? GetRemoteIndexObjectPrefixV2() + : GetRemoteIndexObjectPrefix(); + } + private: int64_t GetIndexBuildId() { diff --git a/internal/core/unittest/test_array_bitmap_index.cpp b/internal/core/unittest/test_array_bitmap_index.cpp index e1f58123777ea..78bf6fbcf1bbb 100644 --- a/internal/core/unittest/test_array_bitmap_index.cpp +++ b/internal/core/unittest/test_array_bitmap_index.cpp @@ -208,9 +208,9 @@ class ArrayBitmapIndexTest : public testing::Test { std::vector index_files; Config config; - config["index_type"] = milvus::index::BITMAP_INDEX_TYPE; + config["index_type"] = milvus::index::HYBRID_INDEX_TYPE; config["insert_files"] = std::vector{log_path}; - config["bitmap_cardinality_limit"] = "1000"; + config["bitmap_cardinality_limit"] = "100"; auto build_index = indexbuilder::IndexFactory::GetInstance().CreateIndex( @@ -223,7 +223,7 @@ class ArrayBitmapIndexTest : public testing::Test { } index::CreateIndexInfo index_info{}; - index_info.index_type = milvus::index::BITMAP_INDEX_TYPE; + index_info.index_type = milvus::index::HYBRID_INDEX_TYPE; index_info.field_type = DataType::ARRAY; config["index_files"] = index_files; @@ -233,11 +233,15 @@ class ArrayBitmapIndexTest : public testing::Test { index_->Load(milvus::tracer::TraceContext{}, config); } - void - SetUp() override { + virtual void + SetParam() { nb_ = 10000; cardinality_ = 30; + } + void + SetUp() override { + SetParam(); // if constexpr (std::is_same_v) { // type_ = DataType::INT8; // } else if constexpr (std::is_same_v) { @@ -338,3 +342,31 @@ REGISTER_TYPED_TEST_SUITE_P(ArrayBitmapIndexTest, INSTANTIATE_TYPED_TEST_SUITE_P(ArrayBitmapE2ECheck, ArrayBitmapIndexTest, BitmapType); + +template +class ArrayBitmapIndexTestV1 : public ArrayBitmapIndexTest { + public: + virtual void + SetParam() override { + this->nb_ = 10000; + this->cardinality_ = 200; + } + + virtual ~ArrayBitmapIndexTestV1() { + } +}; + +TYPED_TEST_SUITE_P(ArrayBitmapIndexTestV1); + +TYPED_TEST_P(ArrayBitmapIndexTestV1, CountFuncTest) { + auto count = this->index_->Count(); + EXPECT_EQ(count, this->nb_); +} + +using BitmapTypeV1 = testing::Types; + +REGISTER_TYPED_TEST_SUITE_P(ArrayBitmapIndexTestV1, CountFuncTest); + +INSTANTIATE_TYPED_TEST_SUITE_P(ArrayBitmapE2ECheckV1, + ArrayBitmapIndexTestV1, + BitmapTypeV1); \ No newline at end of file diff --git a/internal/core/unittest/test_hybrid_index.cpp b/internal/core/unittest/test_hybrid_index.cpp index 1f6ea6aef8fbb..b4a8c6811d33d 100644 --- a/internal/core/unittest/test_hybrid_index.cpp +++ b/internal/core/unittest/test_hybrid_index.cpp @@ -120,7 +120,7 @@ class HybridIndexTestV1 : public testing::Test { std::vector index_files; Config config; - config["index_type"] = milvus::index::BITMAP_INDEX_TYPE; + config["index_type"] = milvus::index::HYBRID_INDEX_TYPE; config["insert_files"] = std::vector{log_path}; config["bitmap_cardinality_limit"] = "1000"; @@ -135,7 +135,7 @@ class HybridIndexTestV1 : public testing::Test { } index::CreateIndexInfo index_info{}; - index_info.index_type = milvus::index::BITMAP_INDEX_TYPE; + index_info.index_type = milvus::index::HYBRID_INDEX_TYPE; index_info.field_type = type_; config["index_files"] = index_files; diff --git a/internal/core/unittest/test_utils/indexbuilder_test_utils.h b/internal/core/unittest/test_utils/indexbuilder_test_utils.h index a02c5cfe3b19a..ec23eeb337223 100644 --- a/internal/core/unittest/test_utils/indexbuilder_test_utils.h +++ b/internal/core/unittest/test_utils/indexbuilder_test_utils.h @@ -478,14 +478,14 @@ GenDsFromPB(const google::protobuf::Message& msg) { template inline std::vector GetIndexTypes() { - return std::vector{"sort", milvus::index::BITMAP_INDEX_TYPE}; + return std::vector{"sort", milvus::index::HYBRID_INDEX_TYPE}; } template <> inline std::vector GetIndexTypes() { return std::vector{ - "sort", "marisa", milvus::index::BITMAP_INDEX_TYPE}; + "sort", "marisa", milvus::index::HYBRID_INDEX_TYPE}; } template diff --git a/internal/proxy/task_index.go b/internal/proxy/task_index.go index 7eb5284496538..1d6c29dc603d9 100644 --- a/internal/proxy/task_index.go +++ b/internal/proxy/task_index.go @@ -368,7 +368,7 @@ func fillDimension(field *schemapb.FieldSchema, indexParams map[string]string) e func checkTrain(field *schemapb.FieldSchema, indexParams map[string]string) error { indexType := indexParams[common.IndexTypeKey] - if indexType == indexparamcheck.IndexBitmap { + if indexType == indexparamcheck.IndexHybrid { _, exist := indexParams[common.BitmapCardinalityLimitKey] if !exist { indexParams[common.BitmapCardinalityLimitKey] = paramtable.Get().CommonCfg.BitmapIndexCardinalityBound.GetValue() diff --git a/pkg/common/common.go b/pkg/common/common.go index b0fbe73043ad3..5266c8b346902 100644 --- a/pkg/common/common.go +++ b/pkg/common/common.go @@ -119,9 +119,9 @@ const ( DropRatioBuildKey = "drop_ratio_build" - BitmapCardinalityLimitKey = "bitmap_cardinality_limit" IsSparseKey = "is_sparse" AutoIndexName = "AUTOINDEX" + BitmapCardinalityLimitKey = "bitmap_cardinality_limit" ) // Collection properties key diff --git a/pkg/util/indexparamcheck/bitmap_index_checker.go b/pkg/util/indexparamcheck/bitmap_index_checker.go deleted file mode 100644 index 9425557eff3ec..0000000000000 --- a/pkg/util/indexparamcheck/bitmap_index_checker.go +++ /dev/null @@ -1,41 +0,0 @@ -package indexparamcheck - -import ( - "fmt" - "math" - - "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" - "github.com/milvus-io/milvus/pkg/common" - "github.com/milvus-io/milvus/pkg/util/typeutil" -) - -type BITMAPChecker struct { - scalarIndexChecker -} - -func (c *BITMAPChecker) CheckTrain(params map[string]string) error { - if !CheckIntByRange(params, common.BitmapCardinalityLimitKey, 1, math.MaxInt) { - return fmt.Errorf("failed to check bitmap cardinality limit, should be larger than 0 and smaller than math.MaxInt") - } - return c.scalarIndexChecker.CheckTrain(params) -} - -func (c *BITMAPChecker) CheckValidDataType(field *schemapb.FieldSchema) error { - mainType := field.GetDataType() - elemType := field.GetElementType() - if !typeutil.IsBoolType(mainType) && !typeutil.IsIntegerType(mainType) && - !typeutil.IsStringType(mainType) && !typeutil.IsArrayType(mainType) { - return fmt.Errorf("bitmap index are only supported on bool, int, string and array field") - } - if typeutil.IsArrayType(mainType) { - if !typeutil.IsBoolType(elemType) && !typeutil.IsIntegerType(elemType) && - !typeutil.IsStringType(elemType) { - return fmt.Errorf("bitmap index are only supported on bool, int, string for array field") - } - } - return nil -} - -func newBITMAPChecker() *BITMAPChecker { - return &BITMAPChecker{} -} diff --git a/pkg/util/indexparamcheck/conf_adapter_mgr.go b/pkg/util/indexparamcheck/conf_adapter_mgr.go index d79196f72a619..b55e538abc7a5 100644 --- a/pkg/util/indexparamcheck/conf_adapter_mgr.go +++ b/pkg/util/indexparamcheck/conf_adapter_mgr.go @@ -65,7 +65,7 @@ func (mgr *indexCheckerMgrImpl) registerIndexChecker() { mgr.checkers["Asceneding"] = newSTLSORTChecker() mgr.checkers[IndexTRIE] = newTRIEChecker() mgr.checkers[IndexTrie] = newTRIEChecker() - mgr.checkers[IndexBitmap] = newBITMAPChecker() + mgr.checkers[IndexHybrid] = newHYBRIDChecker() mgr.checkers["marisa-trie"] = newTRIEChecker() mgr.checkers[AutoIndex] = newAUTOINDEXChecker() } diff --git a/pkg/util/indexparamcheck/bitmap_checker_test.go b/pkg/util/indexparamcheck/hybrid_checker_test.go similarity index 95% rename from pkg/util/indexparamcheck/bitmap_checker_test.go rename to pkg/util/indexparamcheck/hybrid_checker_test.go index 5d76b3a586f14..e07bb165a4e65 100644 --- a/pkg/util/indexparamcheck/bitmap_checker_test.go +++ b/pkg/util/indexparamcheck/hybrid_checker_test.go @@ -8,10 +8,10 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" ) -func Test_BitmapIndexChecker(t *testing.T) { - c := newBITMAPChecker() +func Test_HybridIndexChecker(t *testing.T) { + c := newHYBRIDChecker() - assert.NoError(t, c.CheckTrain(map[string]string{"bitmap_cardinality_limit": "100"})) + assert.NoError(t, c.CheckTrain(map[string]string{"bitmap_cardinality_limitt": "100"})) assert.NoError(t, c.CheckValidDataType(&schemapb.FieldSchema{DataType: schemapb.DataType_Bool})) assert.NoError(t, c.CheckValidDataType(&schemapb.FieldSchema{DataType: schemapb.DataType_Int8})) @@ -32,5 +32,5 @@ func Test_BitmapIndexChecker(t *testing.T) { assert.Error(t, c.CheckValidDataType(&schemapb.FieldSchema{DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Float})) assert.Error(t, c.CheckValidDataType(&schemapb.FieldSchema{DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Double})) assert.Error(t, c.CheckTrain(map[string]string{})) - assert.Error(t, c.CheckTrain(map[string]string{"bitmap_cardinality_limit": "0"})) + assert.Error(t, c.CheckTrain(map[string]string{"bitmap_cardinality_limitt": "0"})) } diff --git a/pkg/util/indexparamcheck/hybrid_index_checker.go b/pkg/util/indexparamcheck/hybrid_index_checker.go new file mode 100644 index 0000000000000..7c7bada77a5c5 --- /dev/null +++ b/pkg/util/indexparamcheck/hybrid_index_checker.go @@ -0,0 +1,41 @@ +package indexparamcheck + +import ( + "fmt" + "math" + + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/pkg/common" + "github.com/milvus-io/milvus/pkg/util/typeutil" +) + +type HYBRIDhecker struct { + scalarIndexChecker +} + +func (c *HYBRIDhecker) CheckTrain(params map[string]string) error { + if !CheckIntByRange(params, common.BitmapCardinalityLimitKey, 1, math.MaxInt) { + return fmt.Errorf("failed to check hybrid cardinality limit, should be larger than 0 and smaller than math.MaxInt") + } + return c.scalarIndexChecker.CheckTrain(params) +} + +func (c *HYBRIDhecker) CheckValidDataType(field *schemapb.FieldSchema) error { + main_type := field.GetDataType() + elem_type := field.GetElementType() + if !typeutil.IsBoolType(main_type) && !typeutil.IsIntegerType(main_type) && + !typeutil.IsStringType(main_type) && !typeutil.IsArrayType(main_type) { + return fmt.Errorf("bitmap index are only supported on bool, int, string and array field") + } + if typeutil.IsArrayType(main_type) { + if !typeutil.IsBoolType(elem_type) && !typeutil.IsIntegerType(elem_type) && + !typeutil.IsStringType(elem_type) { + return fmt.Errorf("bitmap index are only supported on bool, int, string for array field") + } + } + return nil +} + +func newHYBRIDChecker() *HYBRIDhecker { + return &HYBRIDhecker{} +} diff --git a/pkg/util/indexparamcheck/index_type.go b/pkg/util/indexparamcheck/index_type.go index a20db560bfdb0..3cb78819a9678 100644 --- a/pkg/util/indexparamcheck/index_type.go +++ b/pkg/util/indexparamcheck/index_type.go @@ -37,7 +37,7 @@ const ( IndexSTLSORT IndexType = "STL_SORT" IndexTRIE IndexType = "TRIE" IndexTrie IndexType = "Trie" - IndexBitmap IndexType = "BITMAP" + IndexHybrid IndexType = "HYBRID" AutoIndex IndexType = "AUTOINDEX" )