diff --git a/knowhere/index/vector_index/IndexDiskANNConfig.cpp b/knowhere/index/vector_index/IndexDiskANNConfig.cpp index c57d6bfbc..07eb16847 100644 --- a/knowhere/index/vector_index/IndexDiskANNConfig.cpp +++ b/knowhere/index/vector_index/IndexDiskANNConfig.cpp @@ -220,13 +220,13 @@ void from_json(const Config& config, DiskANNQueryConfig& query_conf) { CheckNumericParamAndSet(config, kK, kKMinValue, kKMaxValue, query_conf.k); auto search_list_threshold = query_conf.k < kKThreshold ? kKThreshold : query_conf.k; - if (config.contains(kSearchListSize) && config[kSearchListSize].get() >= search_list_threshold) { + if (config.contains(kSearchListSize)) { // The search_list_size should be no less than the k. CheckNumericParamAndSet(config, kSearchListSize, query_conf.k, std::max(kSearchListSizeMaxValue, static_cast(10 * query_conf.k)), query_conf.search_list_size); } else { - // if search_list_size not set (==0), not in json string or smaller than k, modify the value. + // if search_list_size not set (==0), not in json string, modify the value. query_conf.search_list_size = search_list_threshold; } CheckNumericParamAndSet(config, kBeamwidth, kBeamwidthMinValue, kBeamwidthMaxValue, query_conf.beamwidth); diff --git a/knowhere/index/vector_index/IndexDiskANNConfig.h b/knowhere/index/vector_index/IndexDiskANNConfig.h index 24be378ff..a6d3a9459 100644 --- a/knowhere/index/vector_index/IndexDiskANNConfig.h +++ b/knowhere/index/vector_index/IndexDiskANNConfig.h @@ -86,7 +86,7 @@ struct DiskANNQueryConfig { uint64_t k; // A list of search_list sizes to perform searches with. Larger parameters will result in slower latencies, but // higher accuracies. Must be at least the value of k. - uint32_t search_list_size = 0; + uint32_t search_list_size = 128; // The beamwidth to be used for search. This is the maximum number of IO requests each query will issue per // iteration of search code. Larger beamwidth will result in fewer IO round-trips per query but might result in // slightly higher total number of IO requests to SSD per query. For the highest query throughput with a fixed SSD diff --git a/knowhere/index/vector_index/IndexHNSW.cpp b/knowhere/index/vector_index/IndexHNSW.cpp index 9bd0ca3ad..98740da45 100644 --- a/knowhere/index/vector_index/IndexHNSW.cpp +++ b/knowhere/index/vector_index/IndexHNSW.cpp @@ -29,6 +29,21 @@ #include "index/vector_index/helpers/RangeUtil.h" namespace knowhere { +namespace { + inline int64_t + CheckAndGetEfValue(const Config& config) { + auto topk_val = GetMetaTopk(config); + if (CheckKeyInConfig(config, indexparam::EF)) { + auto ef_val = GetIndexParamEf(config); + if (ef_val < topk_val) { + KNOWHERE_THROW_MSG("ef is smaller than topk in hnsw."); + } + return ef_val; + } else { + return std::max(knowhere::DEFAULT_HNSW_EF, topk_val); + } + } +} // namespace BinarySet IndexHNSW::Serialize(const Config& config) { @@ -252,7 +267,8 @@ IndexHNSW::QueryImpl(int64_t n, const float* xq, int64_t k, float* distances, in feder = std::make_unique(); } - size_t ef = GetIndexParamEf(config); + size_t ef = CheckAndGetEfValue(config); + hnswlib::SearchParam param{ef}; bool transform = (index_->metric_type_ == 1); // InnerProduct: 1 @@ -293,7 +309,7 @@ IndexHNSW::QueryByRangeImpl(int64_t n, const float* xq, float*& distances, int64 feder = std::make_unique(); } - size_t ef = GetIndexParamEf(config); + size_t ef = CheckAndGetEfValue(config); hnswlib::SearchParam param{ef}; float radius = GetMetaRadius(config); diff --git a/knowhere/index/vector_index/helpers/IndexParameter.h b/knowhere/index/vector_index/helpers/IndexParameter.h index 4ba24846e..3bfe3071d 100644 --- a/knowhere/index/vector_index/helpers/IndexParameter.h +++ b/knowhere/index/vector_index/helpers/IndexParameter.h @@ -176,7 +176,7 @@ DEFINE_CONFIG_SETTER(SetIndexParamEfConstruction, indexparam::EFCONSTRUCTION, in DEFINE_CONFIG_GETTER_WITH_DEFAULT_VALUE(GetIndexParamHNSWM, indexparam::HNSW_M, DEFAULT_HNSW_M, int64_t) DEFINE_CONFIG_SETTER(SetIndexParamHNSWM, indexparam::HNSW_M, int64_t) -DEFINE_CONFIG_GETTER_WITH_DEFAULT_VALUE(GetIndexParamEf, indexparam::EF, DEFAULT_HNSW_EF, int64_t) +DEFINE_CONFIG_GETTER(GetIndexParamEf, indexparam::EF, int64_t) DEFINE_CONFIG_SETTER(SetIndexParamEf, indexparam::EF, int64_t) DEFINE_CONFIG_GETTER(GetIndexParamOverviewLevels, indexparam::OVERVIEW_LEVELS, int64_t) diff --git a/unittest/test_diskann.cpp b/unittest/test_diskann.cpp index f5e941c83..cd8f98381 100644 --- a/unittest/test_diskann.cpp +++ b/unittest/test_diskann.cpp @@ -477,7 +477,7 @@ TEST_P(DiskANNTest, search_without_search_list_size) { cfg.clear(); knowhere::DiskANNQueryConfig::Set(cfg, tmp_config); search_list_size = knowhere::DiskANNQueryConfig::Get(cfg).search_list_size; - EXPECT_EQ(search_list_size, 16); + EXPECT_EQ(search_list_size, 128); } TEST_P(DiskANNTest, knn_search_test) {