diff --git a/knowhere/index/vector_index/ConfAdapter.cpp b/knowhere/index/vector_index/ConfAdapter.cpp index 5a9591313..232faed14 100644 --- a/knowhere/index/vector_index/ConfAdapter.cpp +++ b/knowhere/index/vector_index/ConfAdapter.cpp @@ -39,7 +39,6 @@ static const int64_t HNSW_MIN_EFCONSTRUCTION = 8; static const int64_t HNSW_MAX_EFCONSTRUCTION = 512; static const int64_t HNSW_MIN_M = 4; static const int64_t HNSW_MAX_M = 64; -static const int64_t HNSW_MAX_EF = 32768; static const std::vector default_metric_array{metric::L2, metric::IP}; static const std::vector default_binary_metric_array{metric::HAMMING, metric::JACCARD, metric::TANIMOTO, @@ -237,14 +236,7 @@ HNSWConfAdapter::CheckTrain(Config& cfg, const IndexMode mode) { bool HNSWConfAdapter::CheckSearch(Config& cfg, const IndexType type, const IndexMode mode) { - auto topk = GetMetaTopk(cfg); - if (topk < HNSW_MAX_EF) { - // normal case if topk is not large - CheckIntegerRange(cfg, indexparam::EF, GetMetaTopk(cfg), HNSW_MAX_EF); - } else { - // if topk is large - CheckIntegerRange(cfg, indexparam::EF, topk, topk * 2); - } + CheckIntegerRange(cfg, indexparam::EF, GetMetaTopk(cfg), std::numeric_limits::max()); return ConfAdapter::CheckSearch(cfg, type, mode); } diff --git a/knowhere/index/vector_index/IndexDiskANNConfig.cpp b/knowhere/index/vector_index/IndexDiskANNConfig.cpp index 07eb16847..a909eb7fa 100644 --- a/knowhere/index/vector_index/IndexDiskANNConfig.cpp +++ b/knowhere/index/vector_index/IndexDiskANNConfig.cpp @@ -12,6 +12,7 @@ #include "knowhere/index/vector_index/IndexDiskANNConfig.h" #include +#include #include #include #include @@ -63,8 +64,9 @@ static constexpr uint32_t kBuildNumThreadsMinValue = 1; static constexpr uint32_t kBuildNumThreadsMaxValue = 128; static constexpr uint32_t kDiskPqBytesMinValue = 0; static constexpr std::optional kDiskPqBytesMaxValue = std::nullopt; -static constexpr uint32_t kSearchListSizeMaxValue = 200; -static constexpr uint32_t kKThreshold = 16; +static constexpr std::optional kSearchListSizeMaxValue = std::nullopt; +static constexpr uint32_t kDefaultSearchListSizeDivider = 16; +static constexpr uint32_t kInvalideSearchListSize = 0; static constexpr uint32_t kBeamwidthMinValue = 1; static constexpr uint32_t kBeamwidthMaxValue = 128; static constexpr float kFilterThresholdMinValue = -1; @@ -142,6 +144,17 @@ CheckNumericParamAndSet(const Config& config, const std::string& key, std::optio config.at(key).get_to(to_be_set); } +template +void +CheckNumericParamAndSetWithDefault(const Config& config, const std::string& key, std::optional min_o, + std::optional max_o, T default_value, T& to_be_set) { + if (!config.contains(key)) { + to_be_set = default_value; + return; + } + CheckNumericParamAndSet(config, key, min_o, max_o, to_be_set); +} + /** * @brief Check the non-numeric param's existence and type, and allocate it to the config. */ @@ -196,11 +209,8 @@ to_json(Config& config, const DiskANNPrepareConfig& prep_conf) { void from_json(const Config& config, DiskANNPrepareConfig& prep_conf) { - if (config.contains(kAioMaxnr)) { - CheckNumericParamAndSet(config, kAioMaxnr, kAioMaxnrMinValue, kAioMaxnrMaxValue, prep_conf.aio_maxnr); - } else { - prep_conf.aio_maxnr = kAioMaxnrDefaultValue; - } + CheckNumericParamAndSetWithDefault(config, kAioMaxnr, kAioMaxnrMinValue, kAioMaxnrMaxValue, + kAioMaxnrDefaultValue, prep_conf.aio_maxnr); CheckNumericParamAndSet(config, kCacheDramBudgetGb, kCacheDramBudgetGbMinValue, kCacheDramBudgetGbMaxValue, prep_conf.search_cache_budget_gb); @@ -219,23 +229,21 @@ to_json(Config& config, const DiskANNQueryConfig& query_conf) { void from_json(const Config& config, DiskANNQueryConfig& query_conf) { CheckNumericParamAndSet(config, kK, kKMinValue, kKMaxValue, query_conf.k); - auto search_list_threshold = query_conf.k < kKThreshold ? kKThreshold : query_conf.k; - if (config.contains(kSearchListSize)) { - // The search_list_size should be no less than the k. - CheckNumericParamAndSet(config, kSearchListSize, query_conf.k, - std::max(kSearchListSizeMaxValue, static_cast(10 * query_conf.k)), - query_conf.search_list_size); + uint32_t default_search_list_size = + query_conf.k <= kDefaultSearchListSizeDivider ? kDefaultSearchListSizeDivider : query_conf.k; + + if (config.contains(kSearchListSize) && + kInvalideSearchListSize == GetValueFromConfig(config, kSearchListSize)) { // Exist but invalid + query_conf.search_list_size = default_search_list_size; } else { - // if search_list_size not set (==0), not in json string, modify the value. - query_conf.search_list_size = search_list_threshold; + CheckNumericParamAndSetWithDefault(config, kSearchListSize, query_conf.k, kSearchListSizeMaxValue, + default_search_list_size, query_conf.search_list_size); } + CheckNumericParamAndSet(config, kBeamwidth, kBeamwidthMinValue, kBeamwidthMaxValue, query_conf.beamwidth); - if (config.contains(kFilterThreshold)) { - CheckNumericParamAndSet(config, kFilterThreshold, kFilterThresholdMinValue, kFilterThresholdMaxValue, - query_conf.filter_threshold); - } else { - query_conf.filter_threshold = kFilterThresholdMinValue; - } + CheckNumericParamAndSetWithDefault(config, kFilterThreshold, kFilterThresholdMinValue, + kFilterThresholdMaxValue, kFilterThresholdMinValue, + query_conf.filter_threshold); } void @@ -261,12 +269,9 @@ from_json(const Config& config, DiskANNQueryByRangeConfig& query_conf) { CheckNumericParamAndSet(config, kMinK, kMinKMinValue, kMinKMaxValue, query_conf.min_k); CheckNumericParamAndSet(config, kMaxK, query_conf.min_k, kMaxKMaxValue, query_conf.max_k); CheckNumericParamAndSet(config, kBeamwidth, kBeamwidthMinValue, kBeamwidthMaxValue, query_conf.beamwidth); - if (config.contains(kSearchListAndKRatio)) { - CheckNumericParamAndSet(config, kSearchListAndKRatio, kSearchListAndKRatioMinValue, - kSearchListAndKRatioMaxValue, query_conf.search_list_and_k_ratio); - } else { - query_conf.search_list_and_k_ratio = kSearchListAndKRatioDefaultValue; - } + CheckNumericParamAndSetWithDefault(config, kSearchListAndKRatio, kSearchListAndKRatioMinValue, + kSearchListAndKRatioMaxValue, kSearchListAndKRatioDefaultValue, + query_conf.search_list_and_k_ratio); } DiskANNBuildConfig @@ -309,10 +314,11 @@ DiskANNQueryByRangeConfig::Set(Config& config, const DiskANNQueryByRangeConfig& config[kDiskANNQueryByRangeConfig] = query_conf; } -const DiskANNPrepareConfig kSanityCheckDiskANNPrepareConfig; // use default +const DiskANNPrepareConfig kSanityCheckDiskANNPrepareConfig; // use default const DiskANNQueryConfig kSanityCheckDiskANNQueryConfig{kSanityCheckMinTopK, kSanityCheckMinTopK}; -Config GenSanityCheckDiskANNConfig(const Config& build_config) { +Config +GenSanityCheckDiskANNConfig(const Config& build_config) { Config config = build_config; DiskANNPrepareConfig::Set(config, kSanityCheckDiskANNPrepareConfig); DiskANNQueryConfig::Set(config, kSanityCheckDiskANNQueryConfig); diff --git a/knowhere/index/vector_index/IndexDiskANNConfig.h b/knowhere/index/vector_index/IndexDiskANNConfig.h index a6d3a9459..16329cebf 100644 --- a/knowhere/index/vector_index/IndexDiskANNConfig.h +++ b/knowhere/index/vector_index/IndexDiskANNConfig.h @@ -85,8 +85,9 @@ struct DiskANNPrepareConfig { struct DiskANNQueryConfig { uint64_t k; // A list of search_list sizes to perform searches with. Larger parameters will result in slower latencies, but - // higher accuracies. Must be at least the value of k. - uint32_t search_list_size = 128; + // higher accuracies. Must be at least the value of k. Default to 0, meaning Knowhere need to take care of the + // default value + uint32_t search_list_size = 0; // The beamwidth to be used for search. This is the maximum number of IO requests each query will issue per // iteration of search code. Larger beamwidth will result in fewer IO round-trips per query but might result in // slightly higher total number of IO requests to SSD per query. For the highest query throughput with a fixed SSD @@ -125,5 +126,6 @@ struct DiskANNQueryByRangeConfig { Set(Config& config, const DiskANNQueryByRangeConfig& query_conf); }; -Config GenSanityCheckDiskANNConfig(const Config& build_config); +Config +GenSanityCheckDiskANNConfig(const Config& build_config); } // namespace knowhere diff --git a/knowhere/index/vector_index/IndexHNSW.cpp b/knowhere/index/vector_index/IndexHNSW.cpp index 98740da45..760fde183 100644 --- a/knowhere/index/vector_index/IndexHNSW.cpp +++ b/knowhere/index/vector_index/IndexHNSW.cpp @@ -29,21 +29,11 @@ #include "index/vector_index/helpers/RangeUtil.h" namespace knowhere { + namespace { - inline int64_t - CheckAndGetEfValue(const Config& config) { - auto topk_val = GetMetaTopk(config); - if (CheckKeyInConfig(config, indexparam::EF)) { - auto ef_val = GetIndexParamEf(config); - if (ef_val < topk_val) { - KNOWHERE_THROW_MSG("ef is smaller than topk in hnsw."); - } - return ef_val; - } else { - return std::max(knowhere::DEFAULT_HNSW_EF, topk_val); - } - } -} // namespace +static constexpr int64_t kDefaultEfDivider = 16; +static constexpr int64_t kDefaultRangeSearchEf = 16; +} BinarySet IndexHNSW::Serialize(const Config& config) { @@ -267,7 +257,7 @@ IndexHNSW::QueryImpl(int64_t n, const float* xq, int64_t k, float* distances, in feder = std::make_unique(); } - size_t ef = CheckAndGetEfValue(config); + size_t ef = GetIndexParamEf(config, k <= kDefaultEfDivider ? kDefaultEfDivider : k); hnswlib::SearchParam param{ef}; bool transform = (index_->metric_type_ == 1); // InnerProduct: 1 @@ -309,7 +299,8 @@ IndexHNSW::QueryByRangeImpl(int64_t n, const float* xq, float*& distances, int64 feder = std::make_unique(); } - size_t ef = CheckAndGetEfValue(config); + size_t ef = GetIndexParamEf(config, kDefaultRangeSearchEf); + hnswlib::SearchParam param{ef}; float radius = GetMetaRadius(config); diff --git a/knowhere/index/vector_index/helpers/IndexParameter.h b/knowhere/index/vector_index/helpers/IndexParameter.h index 3bfe3071d..4ac6cdff8 100644 --- a/knowhere/index/vector_index/helpers/IndexParameter.h +++ b/knowhere/index/vector_index/helpers/IndexParameter.h @@ -108,6 +108,11 @@ SetValueToConfig(Config& cfg, const std::string& key, const T value) { return GetValueFromConfigWithDefaultValue(cfg, key, value); \ } +#define DEFINE_CONFIG_GETTER_WITH_CUSTOMIZED_DEFAULT_VALUE(func_name, key, T) \ + inline T func_name(const Config& cfg, T value) { \ + return GetValueFromConfigWithDefaultValue(cfg, key, value); \ + } + #define DEFINE_CONFIG_SETTER(func_name, key, T) \ inline void func_name(Config& cfg, T value) { \ SetValueToConfig(cfg, key, (T)(value)); \ @@ -153,7 +158,6 @@ static const int64_t DEFAULT_PQ_M = 4; static const int64_t DEFAULT_PQ_NBITS = 8; static const int64_t DEFAULT_HNSW_EFCONSTRUCTION = 360; static const int64_t DEFAULT_HNSW_M = 30; -static const int64_t DEFAULT_HNSW_EF = 16; DEFINE_CONFIG_GETTER_WITH_DEFAULT_VALUE(GetIndexParamNprobe, indexparam::NPROBE, DEFAULT_NPROBE, int64_t) DEFINE_CONFIG_SETTER(SetIndexParamNprobe, indexparam::NPROBE, int64_t) @@ -176,7 +180,7 @@ DEFINE_CONFIG_SETTER(SetIndexParamEfConstruction, indexparam::EFCONSTRUCTION, in DEFINE_CONFIG_GETTER_WITH_DEFAULT_VALUE(GetIndexParamHNSWM, indexparam::HNSW_M, DEFAULT_HNSW_M, int64_t) DEFINE_CONFIG_SETTER(SetIndexParamHNSWM, indexparam::HNSW_M, int64_t) -DEFINE_CONFIG_GETTER(GetIndexParamEf, indexparam::EF, int64_t) +DEFINE_CONFIG_GETTER_WITH_CUSTOMIZED_DEFAULT_VALUE(GetIndexParamEf, indexparam::EF, int64_t) DEFINE_CONFIG_SETTER(SetIndexParamEf, indexparam::EF, int64_t) DEFINE_CONFIG_GETTER(GetIndexParamOverviewLevels, indexparam::OVERVIEW_LEVELS, int64_t) diff --git a/unittest/test_diskann.cpp b/unittest/test_diskann.cpp index cd8f98381..f5e941c83 100644 --- a/unittest/test_diskann.cpp +++ b/unittest/test_diskann.cpp @@ -477,7 +477,7 @@ TEST_P(DiskANNTest, search_without_search_list_size) { cfg.clear(); knowhere::DiskANNQueryConfig::Set(cfg, tmp_config); search_list_size = knowhere::DiskANNQueryConfig::Get(cfg).search_list_size; - EXPECT_EQ(search_list_size, 128); + EXPECT_EQ(search_list_size, 16); } TEST_P(DiskANNTest, knn_search_test) {