From 1dc984d6009bb09facc3a49b6e1379f7166d7c53 Mon Sep 17 00:00:00 2001 From: cqy123456 <39671710+cqy123456@users.noreply.github.com> Date: Mon, 29 May 2023 12:33:24 +0800 Subject: [PATCH] Modify the search list size if this value not set in struct or too small (#911) Signed-off-by: cqy123456 --- knowhere/index/vector_index/IndexDiskANNConfig.cpp | 6 ++++-- knowhere/index/vector_index/IndexDiskANNConfig.h | 2 +- unittest/test_diskann.cpp | 10 +++++++--- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/knowhere/index/vector_index/IndexDiskANNConfig.cpp b/knowhere/index/vector_index/IndexDiskANNConfig.cpp index 9bb709fbe..c57d6bfbc 100644 --- a/knowhere/index/vector_index/IndexDiskANNConfig.cpp +++ b/knowhere/index/vector_index/IndexDiskANNConfig.cpp @@ -219,13 +219,15 @@ to_json(Config& config, const DiskANNQueryConfig& query_conf) { void from_json(const Config& config, DiskANNQueryConfig& query_conf) { CheckNumericParamAndSet(config, kK, kKMinValue, kKMaxValue, query_conf.k); - if (config.contains(kSearchListSize)) { + auto search_list_threshold = query_conf.k < kKThreshold ? kKThreshold : query_conf.k; + if (config.contains(kSearchListSize) && config[kSearchListSize].get() >= search_list_threshold) { // The search_list_size should be no less than the k. CheckNumericParamAndSet(config, kSearchListSize, query_conf.k, std::max(kSearchListSizeMaxValue, static_cast(10 * query_conf.k)), query_conf.search_list_size); } else { - query_conf.search_list_size = query_conf.k < kKThreshold ? kKThreshold : query_conf.k; + // if search_list_size not set (==0), not in json string or smaller than k, modify the value. + query_conf.search_list_size = search_list_threshold; } CheckNumericParamAndSet(config, kBeamwidth, kBeamwidthMinValue, kBeamwidthMaxValue, query_conf.beamwidth); if (config.contains(kFilterThreshold)) { diff --git a/knowhere/index/vector_index/IndexDiskANNConfig.h b/knowhere/index/vector_index/IndexDiskANNConfig.h index ffc25866c..24be378ff 100644 --- a/knowhere/index/vector_index/IndexDiskANNConfig.h +++ b/knowhere/index/vector_index/IndexDiskANNConfig.h @@ -86,7 +86,7 @@ struct DiskANNQueryConfig { uint64_t k; // A list of search_list sizes to perform searches with. Larger parameters will result in slower latencies, but // higher accuracies. Must be at least the value of k. - uint32_t search_list_size; + uint32_t search_list_size = 0; // The beamwidth to be used for search. This is the maximum number of IO requests each query will issue per // iteration of search code. Larger beamwidth will result in fewer IO round-trips per query but might result in // slightly higher total number of IO requests to SSD per query. For the highest query throughput with a fixed SSD diff --git a/unittest/test_diskann.cpp b/unittest/test_diskann.cpp index 65b949d2e..f5e941c83 100644 --- a/unittest/test_diskann.cpp +++ b/unittest/test_diskann.cpp @@ -471,6 +471,13 @@ TEST_P(DiskANNTest, search_without_search_list_size) { query_cfg["k"] = 32; search_list_size = knowhere::DiskANNQueryConfig::Get(cfg).search_list_size; EXPECT_EQ(search_list_size, 32); + + knowhere::DiskANNQueryConfig tmp_config; + tmp_config.k = 10; + cfg.clear(); + knowhere::DiskANNQueryConfig::Set(cfg, tmp_config); + search_list_size = knowhere::DiskANNQueryConfig::Get(cfg).search_list_size; + EXPECT_EQ(search_list_size, 16); } TEST_P(DiskANNTest, knn_search_test) { @@ -719,9 +726,6 @@ TEST_P(DiskANNTest, config_test) { // query config knowhere::DiskANNQueryConfig query_conf_to_test = query_conf; - query_conf_to_test.k = 10; - query_conf_to_test.search_list_size = 9; - CheckConfigError(query_conf_to_test); query_conf_to_test = query_conf; query_conf_to_test.beamwidth = 0;