From 8fd38c8eea5dbe1f1adf3adfb895afa88ebedf78 Mon Sep 17 00:00:00 2001 From: cqy123456 <39671710+cqy123456@users.noreply.github.com> Date: Thu, 7 Dec 2023 09:52:34 +0800 Subject: [PATCH] enhance:[cherry-pick] Use binlog index for better search performance (#29012) this pr is cherry-pick from master: pr: https://github.com/milvus-io/milvus/pull/28528 pr: https://github.com/milvus-io/milvus/pull/27673 related issue: issue: https://github.com/milvus-io/milvus/issues/27678 Signed-off-by: cqy123456 --- configs/milvus.yaml | 7 +- internal/core/src/segcore/FieldIndexing.cpp | 6 +- internal/core/src/segcore/FieldIndexing.h | 2 +- .../core/src/segcore/IndexConfigGenerator.cpp | 6 +- .../core/src/segcore/IndexConfigGenerator.h | 9 +- internal/core/src/segcore/SegcoreConfig.h | 16 +- .../core/src/segcore/SegmentGrowingImpl.cpp | 4 +- .../core/src/segcore/SegmentSealedImpl.cpp | 153 +++++++- internal/core/src/segcore/SegmentSealedImpl.h | 25 +- internal/core/src/segcore/segcore_init_c.cpp | 4 +- internal/core/src/segcore/segcore_init_c.h | 2 +- internal/core/src/segcore/segment_c.cpp | 4 +- internal/core/unittest/CMakeLists.txt | 1 + internal/core/unittest/bench/bench_search.cpp | 2 +- internal/core/unittest/test_binlog_index.cpp | 326 ++++++++++++++++++ internal/core/unittest/test_float16.cpp | 2 +- internal/core/unittest/test_growing.cpp | 2 +- internal/core/unittest/test_growing_index.cpp | 6 +- internal/proto/planpb/plan.pb.go | 3 + .../querynodev2/segments/segment_loader.go | 5 + internal/querynodev2/server.go | 8 +- internal/querynodev2/services_test.go | 19 +- pkg/util/paramtable/component_param.go | 44 ++- pkg/util/paramtable/component_param_test.go | 18 +- tests/python_client/testcases/test_search.py | 4 +- 25 files changed, 590 insertions(+), 88 deletions(-) create mode 100644 internal/core/unittest/test_binlog_index.cpp diff --git a/configs/milvus.yaml b/configs/milvus.yaml index 0615632df0ea4..dbc467340402f 100644 --- a/configs/milvus.yaml +++ b/configs/milvus.yaml @@ -287,10 +287,11 @@ queryNode: # This parameter is only useful when enable-disk = true. # And this value should be a number greater than 1 and less than 32. chunkRows: 1024 # The number of vectors in a chunk. - growing: # growing a vector index for growing segment to accelerate search + interimIndex: # build a vector temperate index for growing segment or binlog to accelerate search enableIndex: true - nlist: 128 # growing segment index nlist - nprobe: 16 # nprobe to search growing segment, based on your accuracy requirement, must smaller than nlist + nlist: 128 # segment index nlist + nprobe: 16 # nprobe to search segment, based on your accuracy requirement, must smaller than nlist + memExpansionRate: 1.15 # the ratio of building interim index memory usage to raw data loadMemoryUsageFactor: 1 # The multiply factor of calculating the memory usage while loading segments enableDisk: false # enable querynode load disk index, and search on disk index maxDiskUsagePercentage: 95 diff --git a/internal/core/src/segcore/FieldIndexing.cpp b/internal/core/src/segcore/FieldIndexing.cpp index f86245f2e57f2..7cf9c0a7f45e8 100644 --- a/internal/core/src/segcore/FieldIndexing.cpp +++ b/internal/core/src/segcore/FieldIndexing.cpp @@ -31,8 +31,10 @@ VectorFieldIndexing::VectorFieldIndexing(const FieldMeta& field_meta, : FieldIndexing(field_meta, segcore_config), build(false), sync_with_index(false), - config_(std::make_unique( - segment_max_row_count, field_index_meta, segcore_config)) { + config_(std::make_unique(segment_max_row_count, + field_index_meta, + segcore_config, + SegmentType::Growing)) { index_ = std::make_unique( config_->GetIndexType(), config_->GetMetricType(), diff --git a/internal/core/src/segcore/FieldIndexing.h b/internal/core/src/segcore/FieldIndexing.h index c894c335abfbb..09613b6040fce 100644 --- a/internal/core/src/segcore/FieldIndexing.h +++ b/internal/core/src/segcore/FieldIndexing.h @@ -245,7 +245,7 @@ class IndexingRecord { for (auto& [field_id, field_meta] : schema_.get_fields()) { ++offset_id; if (field_meta.is_vector() && - segcore_config_.get_enable_growing_segment_index()) { + segcore_config_.get_enable_interim_segment_index()) { // TODO: skip binary small index now, reenable after config.yaml is ready if (field_meta.get_data_type() == DataType::VECTOR_BINARY) { continue; diff --git a/internal/core/src/segcore/IndexConfigGenerator.cpp b/internal/core/src/segcore/IndexConfigGenerator.cpp index 37f6f9563bdba..f40317d8be113 100644 --- a/internal/core/src/segcore/IndexConfigGenerator.cpp +++ b/internal/core/src/segcore/IndexConfigGenerator.cpp @@ -13,15 +13,15 @@ #include "log/Log.h" namespace milvus::segcore { - VecIndexConfig::VecIndexConfig(const int64_t max_index_row_cout, const FieldIndexMeta& index_meta_, - const SegcoreConfig& config) + const SegcoreConfig& config, + const SegmentType& segment_type) : max_index_row_count_(max_index_row_cout), config_(config) { origin_index_type_ = index_meta_.GetIndexType(); metric_type_ = index_meta_.GeMetricType(); - index_type_ = support_index_types[0]; + index_type_ = support_index_types.at(segment_type); build_params_[knowhere::meta::METRIC_TYPE] = metric_type_; build_params_[knowhere::indexparam::NLIST] = std::to_string(config_.get_nlist()); diff --git a/internal/core/src/segcore/IndexConfigGenerator.h b/internal/core/src/segcore/IndexConfigGenerator.h index cf31fc63a5e4a..563e95e4837b0 100644 --- a/internal/core/src/segcore/IndexConfigGenerator.h +++ b/internal/core/src/segcore/IndexConfigGenerator.h @@ -16,6 +16,7 @@ #include "knowhere/config.h" #include "SegcoreConfig.h" #include "common/QueryInfo.h" +#include "common/type_c.h" namespace milvus::segcore { @@ -27,8 +28,9 @@ enum class IndexConfigLevel { }; class VecIndexConfig { - inline static const std::vector support_index_types = { - knowhere::IndexEnum::INDEX_FAISS_IVFFLAT_CC}; + inline static const std::map support_index_types = + {{SegmentType::Growing, knowhere::IndexEnum::INDEX_FAISS_IVFFLAT_CC}, + {SegmentType::Sealed, knowhere::IndexEnum::INDEX_FAISS_IVFFLAT_CC}}; inline static const std::map index_build_ratio = { {knowhere::IndexEnum::INDEX_FAISS_IVFFLAT_CC, 0.1}}; @@ -39,7 +41,8 @@ class VecIndexConfig { public: VecIndexConfig(const int64_t max_index_row_count, const FieldIndexMeta& index_meta_, - const SegcoreConfig& config); + const SegcoreConfig& config, + const SegmentType& segment_type); int64_t GetBuildThreshold() const noexcept; diff --git a/internal/core/src/segcore/SegcoreConfig.h b/internal/core/src/segcore/SegcoreConfig.h index c7f3175119018..51a828125045b 100644 --- a/internal/core/src/segcore/SegcoreConfig.h +++ b/internal/core/src/segcore/SegcoreConfig.h @@ -64,20 +64,20 @@ class SegcoreConfig { } void - set_enable_growing_segment_index(bool enable_growing_segment_index) { - enable_growing_segment_index_ = enable_growing_segment_index; + set_enable_interim_segment_index(bool enable_interim_segment_index) { + this->enable_interim_segment_index_ = enable_interim_segment_index; } bool - get_enable_growing_segment_index() const { - return enable_growing_segment_index_; + get_enable_interim_segment_index() const { + return enable_interim_segment_index_; } private: - bool enable_growing_segment_index_ = false; - int64_t chunk_rows_ = 32 * 1024; - int64_t nlist_ = 100; - int64_t nprobe_ = 4; + inline static bool enable_interim_segment_index_ = false; + inline static int64_t chunk_rows_ = 32 * 1024; + inline static int64_t nlist_ = 100; + inline static int64_t nprobe_ = 4; }; } // namespace milvus::segcore diff --git a/internal/core/src/segcore/SegmentGrowingImpl.cpp b/internal/core/src/segcore/SegmentGrowingImpl.cpp index 9f0240ad3330e..425b316e7d306 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.cpp +++ b/internal/core/src/segcore/SegmentGrowingImpl.cpp @@ -121,7 +121,7 @@ SegmentGrowingImpl::Insert(int64_t reserved_offset, field_meta); } //insert vector data into index - if (segcore_config_.get_enable_growing_segment_index()) { + if (segcore_config_.get_enable_interim_segment_index()) { indexing_record_.AppendingIndex( reserved_offset, num_rows, @@ -204,7 +204,7 @@ SegmentGrowingImpl::LoadFieldData(const LoadFieldDataInfo& infos) { insert_record_.get_field_data_base(field_id)->set_data_raw( reserved_offset, field_data); } - if (segcore_config_.get_enable_growing_segment_index()) { + if (segcore_config_.get_enable_interim_segment_index()) { auto offset = reserved_offset; for (auto& data : field_data) { auto row_count = data->get_num_rows(); diff --git a/internal/core/src/segcore/SegmentSealedImpl.cpp b/internal/core/src/segcore/SegmentSealedImpl.cpp index 5075f9607d78a..11ec0fc00ab27 100644 --- a/internal/core/src/segcore/SegmentSealedImpl.cpp +++ b/internal/core/src/segcore/SegmentSealedImpl.cpp @@ -43,6 +43,7 @@ #include "storage/ChunkCacheSingleton.h" #include "common/File.h" #include "common/Tracer.h" +#include "index/VectorMemIndex.h" namespace milvus::segcore { @@ -99,17 +100,19 @@ SegmentSealedImpl::LoadVecIndex(const LoadIndexInfo& info) { ") than other column's row count (" + std::to_string(num_rows_.value()) + ")"); } - AssertInfo(!vector_indexings_.is_ready(field_id), "vec index is not ready"); + if (get_bit(field_data_ready_bitset_, field_id)) { + fields_.erase(field_id); + set_bit(field_data_ready_bitset_, field_id, false); + } else if (get_bit(binlog_index_bitset_, field_id)) { + set_bit(binlog_index_bitset_, field_id, false); + vector_indexings_.drop_field_indexing(field_id); + } + update_row_count(row_count); vector_indexings_.append_field_indexing( field_id, metric_type, std::move(const_cast(info).index)); - set_bit(index_ready_bitset_, field_id, true); - update_row_count(row_count); - // release field column - fields_.erase(field_id); - set_bit(field_data_ready_bitset_, field_id, false); } void @@ -370,11 +373,29 @@ SegmentSealedImpl::LoadFieldData(FieldId field_id, FieldDataInfo& data) { insert_record_.seal_pks(); } + bool use_temp_index = false; + { + // update num_rows to build temperate binlog index + std::unique_lock lck(mutex_); + update_row_count(num_rows); + } + + if (generate_binlog_index(field_id)) { + std::unique_lock lck(mutex_); + fields_.erase(field_id); + set_bit(field_data_ready_bitset_, field_id, false); + use_temp_index = true; + } + + if (!use_temp_index) { + std::unique_lock lck(mutex_); + set_bit(field_data_ready_bitset_, field_id, true); + } + } + { std::unique_lock lck(mutex_); - set_bit(field_data_ready_bitset_, field_id, true); + update_row_count(num_rows); } - std::unique_lock lck(mutex_); - update_row_count(num_rows); } void @@ -613,7 +634,26 @@ SegmentSealedImpl::vector_search(SearchInfo& search_info, AssertInfo(field_meta.is_vector(), "The meta type of vector field is not vector type"); - if (get_bit(index_ready_bitset_, field_id)) { + if (get_bit(binlog_index_bitset_, field_id)) { + AssertInfo( + vec_binlog_config_.find(field_id) != vec_binlog_config_.end(), + "The binlog params is not generate."); + auto binlog_search_info = + vec_binlog_config_.at(field_id)->GetSearchConf(search_info); + + AssertInfo(vector_indexings_.is_ready(field_id), + "vector indexes isn't ready for field " + + std::to_string(field_id.get())); + query::SearchOnSealedIndex(*schema_, + vector_indexings_, + binlog_search_info, + query_data, + query_count, + bitset, + output); + milvus::tracer::AddEvent( + "finish_searching_vector_temperate_binlog_index"); + } else if (get_bit(index_ready_bitset_, field_id)) { AssertInfo(vector_indexings_.is_ready(field_id), "vector indexes isn't ready for field " + std::to_string(field_id.get())); @@ -680,7 +720,8 @@ SegmentSealedImpl::get_vector(FieldId field_id, auto& field_meta = schema_->operator[](field_id); AssertInfo(field_meta.is_vector(), "vector field is not vector type"); - if (!get_bit(index_ready_bitset_, field_id)) { + if (!get_bit(index_ready_bitset_, field_id) && + !get_bit(binlog_index_bitset_, field_id)) { return fill_with_empty(field_id, count); } @@ -774,8 +815,14 @@ SegmentSealedImpl::DropFieldData(const FieldId field_id) { } else { auto& field_meta = schema_->operator[](field_id); std::unique_lock lck(mutex_); - set_bit(field_data_ready_bitset_, field_id, false); - insert_record_.drop_field_data(field_id); + if (get_bit(field_data_ready_bitset_, field_id)) { + set_bit(field_data_ready_bitset_, field_id, false); + insert_record_.drop_field_data(field_id); + } + if (get_bit(binlog_index_bitset_, field_id)) { + set_bit(binlog_index_bitset_, field_id, false); + vector_indexings_.drop_field_indexing(field_id); + } lck.unlock(); } } @@ -810,7 +857,8 @@ SegmentSealedImpl::check_search(const query::Plan* plan) const { } auto& request_fields = plan->extra_info_opt_.value().involved_fields_; - auto field_ready_bitset = field_data_ready_bitset_ | index_ready_bitset_; + auto field_ready_bitset = + field_data_ready_bitset_ | index_ready_bitset_ | binlog_index_bitset_; AssertInfo(request_fields.size() == field_ready_bitset.size(), "Request fields size not equal to field ready bitset size when " "check search"); @@ -826,13 +874,19 @@ SegmentSealedImpl::check_search(const query::Plan* plan) const { } } -SegmentSealedImpl::SegmentSealedImpl(SchemaPtr schema, int64_t segment_id) - : field_data_ready_bitset_(schema->size()), +SegmentSealedImpl::SegmentSealedImpl(SchemaPtr schema, + IndexMetaPtr index_meta, + const SegcoreConfig& segcore_config, + int64_t segment_id) + : segcore_config_(segcore_config), + field_data_ready_bitset_(schema->size()), index_ready_bitset_(schema->size()), + binlog_index_bitset_(schema->size()), scalar_indexings_(schema->size()), insert_record_(*schema, MAX_ROW_COUNT), schema_(schema), - id_(segment_id) { + id_(segment_id), + col_index_meta_(index_meta) { } SegmentSealedImpl::~SegmentSealedImpl() { @@ -1135,7 +1189,8 @@ SegmentSealedImpl::bulk_subscript(FieldId field_id, bool SegmentSealedImpl::HasIndex(FieldId field_id) const { std::shared_lock lck(mutex_); - return get_bit(index_ready_bitset_, field_id); + return get_bit(index_ready_bitset_, field_id) | + get_bit(binlog_index_bitset_, field_id); } bool @@ -1154,7 +1209,8 @@ SegmentSealedImpl::HasRawData(int64_t field_id) const { auto fieldID = FieldId(field_id); const auto& field_meta = schema_->operator[](fieldID); if (datatype_is_vector(field_meta.get_data_type())) { - if (get_bit(index_ready_bitset_, fieldID)) { + if (get_bit(index_ready_bitset_, fieldID) | + get_bit(binlog_index_bitset_, fieldID)) { AssertInfo(vector_indexings_.is_ready(fieldID), "vector index is not ready"); auto field_indexing = vector_indexings_.get_field_indexing(fieldID); @@ -1303,4 +1359,63 @@ SegmentSealedImpl::mask_with_timestamps(BitsetType& bitset_chunk, bitset_chunk |= mask; } +bool +SegmentSealedImpl::generate_binlog_index(const FieldId field_id) { + if (col_index_meta_ == nullptr) + return false; + auto& field_meta = schema_->operator[](field_id); + + if (field_meta.is_vector() && + field_meta.get_data_type() == DataType::VECTOR_FLOAT && + segcore_config_.get_enable_interim_segment_index()) { + try { + auto& field_index_meta = + col_index_meta_->GetFieldIndexMeta(field_id); + auto& index_params = field_index_meta.GetIndexParams(); + if (index_params.find(knowhere::meta::INDEX_TYPE) == + index_params.end() || + index_params.at(knowhere::meta::INDEX_TYPE) == + knowhere::IndexEnum::INDEX_FAISS_IDMAP) { + return false; + } + // get binlog data and meta + auto row_count = num_rows_.value(); + auto dim = field_meta.get_dim(); + auto vec_data = fields_.at(field_id); + auto dataset = + knowhere::GenDataSet(row_count, dim, (void*)vec_data->Data()); + dataset->SetIsOwner(false); + // generate index params + auto field_binlog_config = std::unique_ptr( + new VecIndexConfig(row_count, + field_index_meta, + segcore_config_, + SegmentType::Sealed)); + auto build_config = field_binlog_config->GetBuildBaseParams(); + build_config[knowhere::meta::DIM] = std::to_string(dim); + build_config[knowhere::meta::NUM_BUILD_THREAD] = std::to_string(1); + auto index_metric = field_binlog_config->GetMetricType(); + + index::IndexBasePtr vec_index = + std::make_unique( + field_binlog_config->GetIndexType(), + index_metric, + knowhere::Version::GetCurrentVersion().VersionNumber()); + vec_index->BuildWithDataset(dataset, build_config); + vector_indexings_.append_field_indexing( + field_id, index_metric, std::move(vec_index)); + { + std::unique_lock lck(mutex_); + vec_binlog_config_[field_id] = std::move(field_binlog_config); + set_bit(binlog_index_bitset_, field_id, true); + } + return true; + } catch (std::exception& e) { + return false; + } + } else { + return false; + } +} + } // namespace milvus::segcore diff --git a/internal/core/src/segcore/SegmentSealedImpl.h b/internal/core/src/segcore/SegmentSealedImpl.h index 94f3fa0443e40..ff3754a55a87f 100644 --- a/internal/core/src/segcore/SegmentSealedImpl.h +++ b/internal/core/src/segcore/SegmentSealedImpl.h @@ -34,12 +34,16 @@ #include "index/ScalarIndex.h" #include "sys/mman.h" #include "common/Types.h" +#include "common/IndexMeta.h" namespace milvus::segcore { class SegmentSealedImpl : public SegmentSealed { public: - explicit SegmentSealedImpl(SchemaPtr schema, int64_t segment_id); + explicit SegmentSealedImpl(SchemaPtr schema, + IndexMetaPtr index_meta, + const SegcoreConfig& segcore_config, + int64_t segment_id); ~SegmentSealedImpl() override; void LoadIndex(const LoadIndexInfo& info) override; @@ -249,10 +253,14 @@ class SegmentSealedImpl : public SegmentSealed { void LoadScalarIndex(const LoadIndexInfo& info); + bool + generate_binlog_index(const FieldId field_id); + private: // segment loading state BitsetType field_data_ready_bitset_; BitsetType index_ready_bitset_; + BitsetType binlog_index_bitset_; std::atomic system_ready_count_ = 0; // segment data @@ -275,11 +283,22 @@ class SegmentSealedImpl : public SegmentSealed { SchemaPtr schema_; int64_t id_; std::unordered_map> fields_; + + // only useful in binlog + IndexMetaPtr col_index_meta_; + SegcoreConfig segcore_config_; + std::unordered_map> + vec_binlog_config_; }; inline SegmentSealedPtr -CreateSealedSegment(SchemaPtr schema, int64_t segment_id = -1) { - return std::make_unique(schema, segment_id); +CreateSealedSegment( + SchemaPtr schema, + IndexMetaPtr index_meta = nullptr, + int64_t segment_id = -1, + const SegcoreConfig& segcore_config = SegcoreConfig::default_config()) { + return std::make_unique( + schema, index_meta, segcore_config, segment_id); } } // namespace milvus::segcore diff --git a/internal/core/src/segcore/segcore_init_c.cpp b/internal/core/src/segcore/segcore_init_c.cpp index 5ebf53f651f01..25c06422cb93a 100644 --- a/internal/core/src/segcore/segcore_init_c.cpp +++ b/internal/core/src/segcore/segcore_init_c.cpp @@ -32,10 +32,10 @@ SegcoreSetChunkRows(const int64_t value) { } extern "C" void -SegcoreSetEnableGrowingSegmentIndex(const bool value) { +SegcoreSetEnableInterimSegmentIndex(const bool value) { milvus::segcore::SegcoreConfig& config = milvus::segcore::SegcoreConfig::default_config(); - config.set_enable_growing_segment_index(value); + config.set_enable_interim_segment_index(value); } extern "C" void diff --git a/internal/core/src/segcore/segcore_init_c.h b/internal/core/src/segcore/segcore_init_c.h index ace8e9e723f75..77a33dcdb03be 100644 --- a/internal/core/src/segcore/segcore_init_c.h +++ b/internal/core/src/segcore/segcore_init_c.h @@ -22,7 +22,7 @@ void SegcoreSetChunkRows(const int64_t); void -SegcoreSetEnableGrowingSegmentIndex(const bool); +SegcoreSetEnableInterimSegmentIndex(const bool); void SegcoreSetNlist(const int64_t); diff --git a/internal/core/src/segcore/segment_c.cpp b/internal/core/src/segcore/segment_c.cpp index a9b933547db02..51e589b26f929 100644 --- a/internal/core/src/segcore/segment_c.cpp +++ b/internal/core/src/segcore/segment_c.cpp @@ -41,8 +41,8 @@ NewSegment(CCollection collection, SegmentType seg_type, int64_t segment_id) { } case Sealed: case Indexing: - segment = milvus::segcore::CreateSealedSegment(col->get_schema(), - segment_id); + segment = milvus::segcore::CreateSealedSegment( + col->get_schema(), col->GetIndexMeta(), segment_id); break; default: LOG_SEGCORE_ERROR_ << "invalid segment type " diff --git a/internal/core/unittest/CMakeLists.txt b/internal/core/unittest/CMakeLists.txt index 445798cb134b9..537a2d26cc88d 100644 --- a/internal/core/unittest/CMakeLists.txt +++ b/internal/core/unittest/CMakeLists.txt @@ -58,6 +58,7 @@ set(MILVUS_TEST_FILES test_plan_proto.cpp test_chunk_cache.cpp test_storage.cpp + test_binlog_index.cpp ) if ( BUILD_DISK_ANN STREQUAL "ON" ) diff --git a/internal/core/unittest/bench/bench_search.cpp b/internal/core/unittest/bench/bench_search.cpp index d1ff6a3813fe6..9f63d61ed61ca 100644 --- a/internal/core/unittest/bench/bench_search.cpp +++ b/internal/core/unittest/bench/bench_search.cpp @@ -75,7 +75,7 @@ Search_GrowingIndex(benchmark::State& state) { FieldIndexMeta fieldIndexMeta(schema->get_field_id(FieldName("fakevec")), std::move(index_params), std::move(type_params)); - segconf.set_enable_growing_segment_index(true); + segconf.set_enable_interim_segment_index(true); std::map filedMap = { {schema->get_field_id(FieldName("fakevec")), fieldIndexMeta}}; IndexMetaPtr metaPtr = diff --git a/internal/core/unittest/test_binlog_index.cpp b/internal/core/unittest/test_binlog_index.cpp new file mode 100644 index 0000000000000..cb05d4752760b --- /dev/null +++ b/internal/core/unittest/test_binlog_index.cpp @@ -0,0 +1,326 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + +#include +#include +#include + +#include "pb/plan.pb.h" +#include "segcore/segcore_init_c.h" +#include "segcore/SegmentSealed.h" +#include "segcore/SegmentSealedImpl.h" +#include "pb/schema.pb.h" +#include "test_utils/DataGen.h" +#include "index/IndexFactory.h" +#include "query/Plan.h" +#include "knowhere/comp/brute_force.h" + +using namespace milvus::segcore; +using namespace milvus; +namespace pb = milvus::proto; + +std::shared_ptr +GenRandomFloatVecData(int rows, int dim, int seed = 42) { + std::shared_ptr vecs = + std::shared_ptr(new float[rows * dim]); + std::mt19937 rng(seed); + std::uniform_int_distribution<> distrib(0.0, 100.0); + for (int i = 0; i < rows * dim; ++i) vecs[i] = (float)distrib(rng); + return std::move(vecs); +} + +inline float +GetKnnSearchRecall( + size_t nq, int64_t* gt_ids, size_t gt_k, int64_t* res_ids, size_t res_k) { + uint32_t matched_num = 0; + for (auto i = 0; i < nq; ++i) { + std::vector ids_0(gt_ids + i * gt_k, + gt_ids + i * gt_k + res_k); + std::vector ids_1(res_ids + i * res_k, + res_ids + i * res_k + res_k); + + std::sort(ids_0.begin(), ids_0.end()); + std::sort(ids_1.begin(), ids_1.end()); + + std::vector v(std::max(ids_0.size(), ids_1.size())); + std::vector::iterator it; + it = std::set_intersection( + ids_0.begin(), ids_0.end(), ids_1.begin(), ids_1.end(), v.begin()); + v.resize(it - v.begin()); + matched_num += v.size(); + } + return ((float)matched_num) / ((float)nq * res_k); +} + +using Param = const char*; +class BinlogIndexTest : public ::testing::TestWithParam { + void + SetUp() override { + auto param = GetParam(); + metricType = param; + + schema = std::make_shared(); + + auto metric_type = metricType; + vec_field_id = schema->AddDebugField( + "fakevec", DataType::VECTOR_FLOAT, data_d, metric_type); + auto i64_fid = schema->AddDebugField("counter", DataType::INT64); + schema->set_primary_field_id(i64_fid); + + // generate vector field data + vec_data = GenRandomFloatVecData(data_n, data_d); + + vec_field_data = + storage::CreateFieldData(DataType::VECTOR_FLOAT, data_d); + vec_field_data->FillFieldData(vec_data.get(), data_n); + } + + public: + IndexMetaPtr + GetCollectionIndexMeta(std::string index_type) { + std::map index_params = { + {"index_type", index_type}, + {"metric_type", metricType}, + {"nlist", "1024"}}; + std::map type_params = {{"dim", "128"}}; + FieldIndexMeta fieldIndexMeta( + vec_field_id, std::move(index_params), std::move(type_params)); + auto& config = SegcoreConfig::default_config(); + config.set_chunk_rows(1024); + config.set_enable_interim_segment_index(true); + std::map filedMap = { + {vec_field_id, fieldIndexMeta}}; + IndexMetaPtr metaPtr = + std::make_shared(226985, std::move(filedMap)); + return std::move(metaPtr); + } + + void + LoadOtherFields() { + auto dataset = DataGen(schema, data_n); + // load id + LoadFieldDataInfo row_id_info; + FieldMeta row_id_field_meta( + FieldName("RowID"), RowFieldID, DataType::INT64); + auto field_data = std::make_shared>( + DataType::INT64); + field_data->FillFieldData(dataset.row_ids_.data(), data_n); + auto field_data_info = + FieldDataInfo{RowFieldID.get(), + data_n, + std::vector{field_data}}; + segment->LoadFieldData(RowFieldID, field_data_info); + // load ts + LoadFieldDataInfo ts_info; + FieldMeta ts_field_meta( + FieldName("Timestamp"), TimestampFieldID, DataType::INT64); + field_data = std::make_shared>( + DataType::INT64); + field_data->FillFieldData(dataset.timestamps_.data(), data_n); + field_data_info = + FieldDataInfo{TimestampFieldID.get(), + data_n, + std::vector{field_data}}; + segment->LoadFieldData(TimestampFieldID, field_data_info); + } + + protected: + milvus::SchemaPtr schema; + const char* metricType; + size_t data_n = 10000; + size_t data_d = 128; + size_t topk = 10; + milvus::storage::FieldDataPtr vec_field_data = nullptr; + milvus::segcore::SegmentSealedPtr segment = nullptr; + milvus::FieldId vec_field_id; + std::shared_ptr vec_data; +}; + +INSTANTIATE_TEST_CASE_P(MetricTypeParameters, + BinlogIndexTest, + ::testing::Values(knowhere::metric::L2)); + +TEST_P(BinlogIndexTest, Accuracy) { + IndexMetaPtr collection_index_meta = + GetCollectionIndexMeta(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT); + + segment = CreateSealedSegment(schema, collection_index_meta); + LoadOtherFields(); + + auto& segcore_config = milvus::segcore::SegcoreConfig::default_config(); + segcore_config.set_enable_interim_segment_index(true); + segcore_config.set_nprobe(32); + // 1. load field data, and build binlog index for binlog data + auto field_data_info = + FieldDataInfo{vec_field_id.get(), + data_n, + std::vector{vec_field_data}}; + segment->LoadFieldData(vec_field_id, field_data_info); + //assert segment has been built binlog index + EXPECT_TRUE(segment->HasIndex(vec_field_id)); + EXPECT_EQ(segment->get_row_count(), data_n); + EXPECT_FALSE(segment->HasFieldData(vec_field_id)); + + // 2. search binlog index + auto num_queries = 10; + auto query_ptr = GenRandomFloatVecData(num_queries, data_d); + + milvus::proto::plan::PlanNode plan_node; + auto vector_anns = plan_node.mutable_vector_anns(); + vector_anns->set_vector_type(milvus::proto::plan::VectorType::FloatVector); + vector_anns->set_placeholder_tag("$0"); + vector_anns->set_field_id(vec_field_id.get()); + auto query_info = vector_anns->mutable_query_info(); + query_info->set_topk(topk); + query_info->set_round_decimal(3); + query_info->set_metric_type(metricType); + query_info->set_search_params(R"({"nprobe": 1024})"); + auto plan_str = plan_node.SerializeAsString(); + + auto ph_group_raw = + CreatePlaceholderGroupFromBlob(num_queries, data_d, query_ptr.get()); + + auto plan = milvus::query::CreateSearchPlanByExpr( + *schema, plan_str.data(), plan_str.size()); + auto ph_group = + ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); + + std::vector ph_group_arr = { + ph_group.get()}; + auto nlist = segcore_config.get_nlist(); + auto binlog_index_sr = segment->Search(plan.get(), ph_group.get()); + ASSERT_EQ(binlog_index_sr->total_nq_, num_queries); + EXPECT_EQ(binlog_index_sr->unity_topK_, topk); + EXPECT_EQ(binlog_index_sr->distances_.size(), num_queries * topk); + EXPECT_EQ(binlog_index_sr->seg_offsets_.size(), num_queries * topk); + + // 3. update vector index + { + milvus::index::CreateIndexInfo create_index_info; + create_index_info.field_type = DataType::VECTOR_FLOAT; + create_index_info.metric_type = metricType; + create_index_info.index_type = knowhere::IndexEnum::INDEX_FAISS_IVFFLAT; + create_index_info.index_engine_version = + knowhere::Version::GetCurrentVersion().VersionNumber(); + auto indexing = milvus::index::IndexFactory::GetInstance().CreateIndex( + create_index_info, milvus::storage::FileManagerContext()); + + auto build_conf = + knowhere::Json{{knowhere::meta::METRIC_TYPE, metricType}, + {knowhere::meta::DIM, std::to_string(data_d)}, + {knowhere::indexparam::NLIST, "1024"}}; + + auto database = knowhere::GenDataSet(data_n, data_d, vec_data.get()); + indexing->BuildWithDataset(database, build_conf); + + LoadIndexInfo load_info; + load_info.field_id = vec_field_id.get(); + + load_info.index = std::move(indexing); + load_info.index_params["metric_type"] = metricType; + segment->DropFieldData(vec_field_id); + ASSERT_NO_THROW(segment->LoadIndex(load_info)); + EXPECT_TRUE(segment->HasIndex(vec_field_id)); + EXPECT_EQ(segment->get_row_count(), data_n); + EXPECT_FALSE(segment->HasFieldData(vec_field_id)); + auto ivf_sr = segment->Search(plan.get(), ph_group.get()); + auto similary = GetKnnSearchRecall(num_queries, + binlog_index_sr->seg_offsets_.data(), + topk, + ivf_sr->seg_offsets_.data(), + topk); + ASSERT_GT(similary, 0.45); + } +} + +TEST_P(BinlogIndexTest, DisableInterimIndex) { + IndexMetaPtr collection_index_meta = + GetCollectionIndexMeta(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT); + + segment = CreateSealedSegment(schema, collection_index_meta); + LoadOtherFields(); + SegcoreSetEnableInterimSegmentIndex(false); + + auto field_data_info = + FieldDataInfo{vec_field_id.get(), + data_n, + std::vector{vec_field_data}}; + segment->LoadFieldData(vec_field_id, field_data_info); + + EXPECT_FALSE(segment->HasIndex(vec_field_id)); + EXPECT_EQ(segment->get_row_count(), data_n); + EXPECT_TRUE(segment->HasFieldData(vec_field_id)); + // load vector index + milvus::index::CreateIndexInfo create_index_info; + create_index_info.field_type = DataType::VECTOR_FLOAT; + create_index_info.metric_type = metricType; + create_index_info.index_type = knowhere::IndexEnum::INDEX_FAISS_IVFFLAT; + create_index_info.index_engine_version = + knowhere::Version::GetCurrentVersion().VersionNumber(); + auto indexing = milvus::index::IndexFactory::GetInstance().CreateIndex( + create_index_info, milvus::storage::FileManagerContext()); + + auto build_conf = + knowhere::Json{{knowhere::meta::METRIC_TYPE, metricType}, + {knowhere::meta::DIM, std::to_string(data_d)}, + {knowhere::indexparam::NLIST, "1024"}}; + + auto database = knowhere::GenDataSet(data_n, data_d, vec_data.get()); + indexing->BuildWithDataset(database, build_conf); + + LoadIndexInfo load_info; + load_info.field_id = vec_field_id.get(); + + load_info.index = std::move(indexing); + load_info.index_params["metric_type"] = metricType; + + segment->DropFieldData(vec_field_id); + ASSERT_NO_THROW(segment->LoadIndex(load_info)); + EXPECT_TRUE(segment->HasIndex(vec_field_id)); + EXPECT_EQ(segment->get_row_count(), data_n); + EXPECT_FALSE(segment->HasFieldData(vec_field_id)); +} + +TEST_P(BinlogIndexTest, LoadBingLogWihIDMAP) { + IndexMetaPtr collection_index_meta = + GetCollectionIndexMeta(knowhere::IndexEnum::INDEX_FAISS_IDMAP); + + segment = CreateSealedSegment(schema, collection_index_meta); + LoadOtherFields(); + + auto field_data_info = + FieldDataInfo{vec_field_id.get(), + data_n, + std::vector{vec_field_data}}; + segment->LoadFieldData(vec_field_id, field_data_info); + + EXPECT_FALSE(segment->HasIndex(vec_field_id)); + EXPECT_EQ(segment->get_row_count(), data_n); + EXPECT_TRUE(segment->HasFieldData(vec_field_id)); +} + +TEST_P(BinlogIndexTest, LoadBinlogWithoutIndexMeta) { + IndexMetaPtr collection_index_meta = + GetCollectionIndexMeta(knowhere::IndexEnum::INDEX_FAISS_IDMAP); + + segment = CreateSealedSegment(schema, collection_index_meta); + SegcoreSetEnableInterimSegmentIndex(true); + + auto field_data_info = + FieldDataInfo{vec_field_id.get(), + data_n, + std::vector{vec_field_data}}; + segment->LoadFieldData(vec_field_id, field_data_info); + + EXPECT_FALSE(segment->HasIndex(vec_field_id)); + EXPECT_EQ(segment->get_row_count(), data_n); + EXPECT_TRUE(segment->HasFieldData(vec_field_id)); +} \ No newline at end of file diff --git a/internal/core/unittest/test_float16.cpp b/internal/core/unittest/test_float16.cpp index 0951344fb3e31..9a5fa2a20c924 100644 --- a/internal/core/unittest/test_float16.cpp +++ b/internal/core/unittest/test_float16.cpp @@ -177,7 +177,7 @@ TEST(Float16, GetVector) { vec, std::move(index_params), std::move(type_params)); auto config = SegcoreConfig::default_config(); config.set_chunk_rows(1024); - config.set_enable_growing_segment_index(true); + config.set_enable_interim_segment_index(true); std::map filedMap = {{vec, fieldIndexMeta}}; IndexMetaPtr metaPtr = std::make_shared(100000, std::move(filedMap)); diff --git a/internal/core/unittest/test_growing.cpp b/internal/core/unittest/test_growing.cpp index 3937f18dd64a7..671d5d23a78f9 100644 --- a/internal/core/unittest/test_growing.cpp +++ b/internal/core/unittest/test_growing.cpp @@ -134,7 +134,7 @@ TEST(Growing, FillData) { vec, std::move(index_params), std::move(type_params)); auto config = SegcoreConfig::default_config(); config.set_chunk_rows(1024); - config.set_enable_growing_segment_index(true); + config.set_enable_interim_segment_index(true); std::map filedMap = {{vec, fieldIndexMeta}}; IndexMetaPtr metaPtr = std::make_shared(100000, std::move(filedMap)); diff --git a/internal/core/unittest/test_growing_index.cpp b/internal/core/unittest/test_growing_index.cpp index 8a690d96a7641..3666dc7cf0ddd 100644 --- a/internal/core/unittest/test_growing_index.cpp +++ b/internal/core/unittest/test_growing_index.cpp @@ -37,7 +37,7 @@ TEST(GrowingIndex, Correctness) { vec, std::move(index_params), std::move(type_params)); auto& config = SegcoreConfig::default_config(); config.set_chunk_rows(1024); - config.set_enable_growing_segment_index(true); + config.set_enable_interim_segment_index(true); std::map filedMap = {{vec, fieldIndexMeta}}; IndexMetaPtr metaPtr = std::make_shared(226985, std::move(filedMap)); @@ -135,7 +135,7 @@ TEST(GrowingIndex, MissIndexMeta) { auto& config = SegcoreConfig::default_config(); config.set_chunk_rows(1024); - config.set_enable_growing_segment_index(true); + config.set_enable_interim_segment_index(true); auto segment = CreateGrowingSegment(schema, nullptr); } @@ -175,7 +175,7 @@ TEST_P(GrowingIndexGetVectorTest, GetVector) { vec, std::move(index_params), std::move(type_params)); auto& config = SegcoreConfig::default_config(); config.set_chunk_rows(1024); - config.set_enable_growing_segment_index(true); + config.set_enable_interim_segment_index(true); std::map filedMap = {{vec, fieldIndexMeta}}; IndexMetaPtr metaPtr = std::make_shared(100000, std::move(filedMap)); diff --git a/internal/proto/planpb/plan.pb.go b/internal/proto/planpb/plan.pb.go index 06de0040e91da..2edfb4c92706d 100644 --- a/internal/proto/planpb/plan.pb.go +++ b/internal/proto/planpb/plan.pb.go @@ -237,6 +237,7 @@ func (BinaryExpr_BinaryOp) EnumDescriptor() ([]byte, []int) { type GenericValue struct { // Types that are valid to be assigned to Val: + // // *GenericValue_BoolVal // *GenericValue_Int64Val // *GenericValue_FloatVal @@ -1297,6 +1298,7 @@ var xxx_messageInfo_AlwaysTrueExpr proto.InternalMessageInfo type Expr struct { // Types that are valid to be assigned to Expr: + // // *Expr_TermExpr // *Expr_UnaryExpr // *Expr_BinaryExpr @@ -1668,6 +1670,7 @@ func (m *QueryPlanNode) GetLimit() int64 { type PlanNode struct { // Types that are valid to be assigned to Node: + // // *PlanNode_VectorAnns // *PlanNode_Predicates // *PlanNode_Query diff --git a/internal/querynodev2/segments/segment_loader.go b/internal/querynodev2/segments/segment_loader.go index caf337cbd139d..bf793a2a38392 100644 --- a/internal/querynodev2/segments/segment_loader.go +++ b/internal/querynodev2/segments/segment_loader.go @@ -948,6 +948,11 @@ func (loader *segmentLoader) checkSegmentSize(ctx context.Context, segmentLoadIn predictDiskUsage += uint64(getBinlogDataSize(fieldBinlog)) } else { predictMemUsage += uint64(getBinlogDataSize(fieldBinlog)) + enableBinlogIndex := paramtable.Get().QueryNodeCfg.EnableInterimSegmentIndex.GetAsBool() + if enableBinlogIndex { + buildBinlogIndexRate := paramtable.Get().QueryNodeCfg.InterimIndexMemExpandRate.GetAsFloat() + predictMemUsage += uint64(float32(getBinlogDataSize(fieldBinlog)) * float32(buildBinlogIndexRate)) + } } } } diff --git a/internal/querynodev2/server.go b/internal/querynodev2/server.go index 28f83d0842fbe..7b2fa15b21e40 100644 --- a/internal/querynodev2/server.go +++ b/internal/querynodev2/server.go @@ -197,13 +197,13 @@ func (node *QueryNode) InitSegcore() error { cKnowhereThreadPoolSize := C.uint32_t(paramtable.Get().QueryNodeCfg.KnowhereThreadPoolSize.GetAsUint32()) C.SegcoreSetKnowhereSearchThreadPoolNum(cKnowhereThreadPoolSize) - enableGrowingIndex := C.bool(paramtable.Get().QueryNodeCfg.EnableGrowingSegmentIndex.GetAsBool()) - C.SegcoreSetEnableGrowingSegmentIndex(enableGrowingIndex) + enableInterimIndex := C.bool(paramtable.Get().QueryNodeCfg.EnableInterimSegmentIndex.GetAsBool()) + C.SegcoreSetEnableInterimSegmentIndex(enableInterimIndex) - nlist := C.int64_t(paramtable.Get().QueryNodeCfg.GrowingIndexNlist.GetAsInt64()) + nlist := C.int64_t(paramtable.Get().QueryNodeCfg.InterimIndexNlist.GetAsInt64()) C.SegcoreSetNlist(nlist) - nprobe := C.int64_t(paramtable.Get().QueryNodeCfg.GrowingIndexNProbe.GetAsInt64()) + nprobe := C.int64_t(paramtable.Get().QueryNodeCfg.InterimIndexNProbe.GetAsInt64()) C.SegcoreSetNprobe(nprobe) // override segcore SIMD type diff --git a/internal/querynodev2/services_test.go b/internal/querynodev2/services_test.go index 2ceda6bae243d..ec645400bd7d3 100644 --- a/internal/querynodev2/services_test.go +++ b/internal/querynodev2/services_test.go @@ -485,6 +485,21 @@ func (suite *ServiceSuite) TestUnsubDmChannels_Failed() { suite.Equal(commonpb.ErrorCode_NotReadyServe, status.GetErrorCode()) } +func (suite *ServiceSuite) genSegmentIndexInfos(loadInfo []*querypb.SegmentLoadInfo) []*indexpb.IndexInfo { + indexInfoList := make([]*indexpb.IndexInfo, 0) + seg0LoadInfo := loadInfo[0] + fieldIndexInfos := seg0LoadInfo.IndexInfos + for _, info := range fieldIndexInfos { + indexInfoList = append(indexInfoList, &indexpb.IndexInfo{ + CollectionID: suite.collectionID, + FieldID: info.GetFieldID(), + IndexName: info.GetIndexName(), + IndexParams: info.GetIndexParams(), + }) + } + return indexInfoList +} + func (suite *ServiceSuite) genSegmentLoadInfos(schema *schemapb.CollectionSchema) []*querypb.SegmentLoadInfo { ctx := context.Background() @@ -665,6 +680,8 @@ func (suite *ServiceSuite) TestLoadIndex_Success() { info.IndexInfos = nil return info }) + // generate indexinfos for setting index meta. + indexInfoList := suite.genSegmentIndexInfos(infos) req := &querypb.LoadSegmentsRequest{ Base: &commonpb.MsgBase{ MsgID: rand.Int63(), @@ -676,7 +693,7 @@ func (suite *ServiceSuite) TestLoadIndex_Success() { Schema: schema, NeedTransfer: false, LoadScope: querypb.LoadScope_Full, - IndexInfoList: []*indexpb.IndexInfo{{}}, + IndexInfoList: indexInfoList, } // Load segment diff --git a/pkg/util/paramtable/component_param.go b/pkg/util/paramtable/component_param.go index 3f4cd6598d27d..3f2980f58e25b 100644 --- a/pkg/util/paramtable/component_param.go +++ b/pkg/util/paramtable/component_param.go @@ -1592,9 +1592,10 @@ type queryNodeConfig struct { // segcore KnowhereThreadPoolSize ParamItem `refreshable:"false"` ChunkRows ParamItem `refreshable:"false"` - EnableGrowingSegmentIndex ParamItem `refreshable:"false"` - GrowingIndexNlist ParamItem `refreshable:"false"` - GrowingIndexNProbe ParamItem `refreshable:"false"` + EnableInterimSegmentIndex ParamItem `refreshable:"false"` + InterimIndexNlist ParamItem `refreshable:"false"` + InterimIndexNProbe ParamItem `refreshable:"false"` + InterimIndexMemExpandRate ParamItem `refreshable:"false"` // memory limit LoadMemoryUsageFactor ParamItem `refreshable:"true"` @@ -1715,42 +1716,51 @@ func (p *queryNodeConfig) init(base *BaseTable) { } p.ChunkRows.Init(base.mgr) - p.EnableGrowingSegmentIndex = ParamItem{ - Key: "queryNode.segcore.growing.enableIndex", + p.EnableInterimSegmentIndex = ParamItem{ + Key: "queryNode.segcore.interimIndex.enableIndex", Version: "2.0.0", DefaultValue: "false", - Doc: "Enable segment growing with index to accelerate vector search.", + Doc: "Enable segment build with index to accelerate vector search when segment is in growing or binlog.", Export: true, } - p.EnableGrowingSegmentIndex.Init(base.mgr) + p.EnableInterimSegmentIndex.Init(base.mgr) - p.GrowingIndexNlist = ParamItem{ - Key: "queryNode.segcore.growing.nlist", + p.InterimIndexNlist = ParamItem{ + Key: "queryNode.segcore.interimIndex.nlist", Version: "2.0.0", DefaultValue: "128", - Doc: "growing index nlist, recommend to set sqrt(chunkRows), must smaller than chunkRows/8", + Doc: "temp index nlist, recommend to set sqrt(chunkRows), must smaller than chunkRows/8", Export: true, } - p.GrowingIndexNlist.Init(base.mgr) + p.InterimIndexNlist.Init(base.mgr) - p.GrowingIndexNProbe = ParamItem{ - Key: "queryNode.segcore.growing.nprobe", + p.InterimIndexMemExpandRate = ParamItem{ + Key: "queryNode.segcore.interimIndex.memExpansionRate", + Version: "2.0.0", + DefaultValue: "1.15", + Doc: "extra memory needed by building interim index", + Export: true, + } + p.InterimIndexMemExpandRate.Init(base.mgr) + + p.InterimIndexNProbe = ParamItem{ + Key: "queryNode.segcore.interimIndex.nprobe", Version: "2.0.0", Formatter: func(v string) string { - defaultNprobe := p.GrowingIndexNlist.GetAsInt64() / 8 + defaultNprobe := p.InterimIndexNlist.GetAsInt64() / 8 nprobe := getAsInt64(v) if nprobe == 0 { nprobe = defaultNprobe } - if nprobe > p.GrowingIndexNlist.GetAsInt64() { - return p.GrowingIndexNlist.GetValue() + if nprobe > p.InterimIndexNlist.GetAsInt64() { + return p.InterimIndexNlist.GetValue() } return strconv.FormatInt(nprobe, 10) }, Doc: "nprobe to search small index, based on your accuracy requirement, must smaller than nlist", Export: true, } - p.GrowingIndexNProbe.Init(base.mgr) + p.InterimIndexNProbe.Init(base.mgr) p.LoadMemoryUsageFactor = ParamItem{ Key: "queryNode.loadMemoryUsageFactor", diff --git a/pkg/util/paramtable/component_param_test.go b/pkg/util/paramtable/component_param_test.go index 9493c50b92a8d..c8127188e8ee8 100644 --- a/pkg/util/paramtable/component_param_test.go +++ b/pkg/util/paramtable/component_param_test.go @@ -299,10 +299,10 @@ func TestComponentParam(t *testing.T) { chunkRows := Params.ChunkRows.GetAsInt64() assert.Equal(t, int64(1024), chunkRows) - nlist := Params.GrowingIndexNlist.GetAsInt64() + nlist := Params.InterimIndexNlist.GetAsInt64() assert.Equal(t, int64(128), nlist) - nprobe := Params.GrowingIndexNProbe.GetAsInt64() + nprobe := Params.InterimIndexNProbe.GetAsInt64() assert.Equal(t, int64(16), nprobe) assert.Equal(t, true, Params.GroupEnabled.GetAsBool()) @@ -321,17 +321,17 @@ func TestComponentParam(t *testing.T) { chunkRows = Params.ChunkRows.GetAsInt64() assert.Equal(t, int64(8192), chunkRows) - enableGrowingIndex := Params.EnableGrowingSegmentIndex.GetAsBool() - assert.Equal(t, true, enableGrowingIndex) + enableInterimIndex := Params.EnableInterimSegmentIndex.GetAsBool() + assert.Equal(t, true, enableInterimIndex) - params.Save("queryNode.segcore.growing.enableIndex", "true") - enableGrowingIndex = Params.EnableGrowingSegmentIndex.GetAsBool() - assert.Equal(t, true, enableGrowingIndex) + params.Save("queryNode.segcore.interimIndex.enableIndex", "true") + enableInterimIndex = Params.EnableInterimSegmentIndex.GetAsBool() + assert.Equal(t, true, enableInterimIndex) - nlist = Params.GrowingIndexNlist.GetAsInt64() + nlist = Params.InterimIndexNlist.GetAsInt64() assert.Equal(t, int64(128), nlist) - nprobe = Params.GrowingIndexNProbe.GetAsInt64() + nprobe = Params.InterimIndexNProbe.GetAsInt64() assert.Equal(t, int64(16), nprobe) params.Remove("queryNode.segcore.growing.nlist") diff --git a/tests/python_client/testcases/test_search.py b/tests/python_client/testcases/test_search.py index c5fbcc04bd4ad..48c51fa29a06d 100644 --- a/tests/python_client/testcases/test_search.py +++ b/tests/python_client/testcases/test_search.py @@ -2986,7 +2986,7 @@ def test_search_with_expression(self, dim, expression, _async, enable_dynamic_fi filter_ids.append(_id) # 2. create index - index_param = {"index_type": "IVF_FLAT", "metric_type": "COSINE", "params": {"nlist": 100}} + index_param = {"index_type": "FLAT", "metric_type": "COSINE", "params": {}} collection_w.create_index("float_vector", index_param) collection_w.load() @@ -7379,7 +7379,7 @@ def test_range_search_with_expression(self, dim, expression, _async, enable_dyna filter_ids.append(_id) # 2. create index - index_param = {"index_type": "IVF_FLAT", + index_param = {"index_type": "FLAT", "metric_type": "L2", "params": {"nlist": 100}} collection_w.create_index("float_vector", index_param) collection_w.load()