Skip to content
This repository has been archived by the owner on Aug 16, 2023. It is now read-only.

Commit

Permalink
set search_list(ef) default size and throw exception if search list(e…
Browse files Browse the repository at this point in the history
…f) < topk (#923)

Signed-off-by: cqy123456 <[email protected]>
  • Loading branch information
cqy123456 authored Jun 9, 2023
1 parent 1dc984d commit ab9799f
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 7 deletions.
4 changes: 2 additions & 2 deletions knowhere/index/vector_index/IndexDiskANNConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,13 +220,13 @@ void
from_json(const Config& config, DiskANNQueryConfig& query_conf) {
CheckNumericParamAndSet<uint64_t>(config, kK, kKMinValue, kKMaxValue, query_conf.k);
auto search_list_threshold = query_conf.k < kKThreshold ? kKThreshold : query_conf.k;
if (config.contains(kSearchListSize) && config[kSearchListSize].get<uint32_t>() >= search_list_threshold) {
if (config.contains(kSearchListSize)) {
// The search_list_size should be no less than the k.
CheckNumericParamAndSet<uint32_t>(config, kSearchListSize, query_conf.k,
std::max(kSearchListSizeMaxValue, static_cast<uint32_t>(10 * query_conf.k)),
query_conf.search_list_size);
} else {
// if search_list_size not set (==0), not in json string or smaller than k, modify the value.
// if search_list_size not set (==0), not in json string, modify the value.
query_conf.search_list_size = search_list_threshold;
}
CheckNumericParamAndSet<uint32_t>(config, kBeamwidth, kBeamwidthMinValue, kBeamwidthMaxValue, query_conf.beamwidth);
Expand Down
2 changes: 1 addition & 1 deletion knowhere/index/vector_index/IndexDiskANNConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ struct DiskANNQueryConfig {
uint64_t k;
// A list of search_list sizes to perform searches with. Larger parameters will result in slower latencies, but
// higher accuracies. Must be at least the value of k.
uint32_t search_list_size = 0;
uint32_t search_list_size = 128;
// The beamwidth to be used for search. This is the maximum number of IO requests each query will issue per
// iteration of search code. Larger beamwidth will result in fewer IO round-trips per query but might result in
// slightly higher total number of IO requests to SSD per query. For the highest query throughput with a fixed SSD
Expand Down
20 changes: 18 additions & 2 deletions knowhere/index/vector_index/IndexHNSW.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,21 @@
#include "index/vector_index/helpers/RangeUtil.h"

namespace knowhere {
namespace {
inline int64_t
CheckAndGetEfValue(const Config& config) {
auto topk_val = GetMetaTopk(config);
if (CheckKeyInConfig(config, indexparam::EF)) {
auto ef_val = GetIndexParamEf(config);
if (ef_val < topk_val) {
KNOWHERE_THROW_MSG("ef is smaller than topk in hnsw.");
}
return ef_val;
} else {
return std::max(knowhere::DEFAULT_HNSW_EF, topk_val);
}
}
} // namespace

BinarySet
IndexHNSW::Serialize(const Config& config) {
Expand Down Expand Up @@ -252,7 +267,8 @@ IndexHNSW::QueryImpl(int64_t n, const float* xq, int64_t k, float* distances, in
feder = std::make_unique<feder::hnsw::FederResult>();
}

size_t ef = GetIndexParamEf(config);
size_t ef = CheckAndGetEfValue(config);

hnswlib::SearchParam param{ef};
bool transform = (index_->metric_type_ == 1); // InnerProduct: 1

Expand Down Expand Up @@ -293,7 +309,7 @@ IndexHNSW::QueryByRangeImpl(int64_t n, const float* xq, float*& distances, int64
feder = std::make_unique<feder::hnsw::FederResult>();
}

size_t ef = GetIndexParamEf(config);
size_t ef = CheckAndGetEfValue(config);
hnswlib::SearchParam param{ef};

float radius = GetMetaRadius(config);
Expand Down
2 changes: 1 addition & 1 deletion knowhere/index/vector_index/helpers/IndexParameter.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ DEFINE_CONFIG_SETTER(SetIndexParamEfConstruction, indexparam::EFCONSTRUCTION, in
DEFINE_CONFIG_GETTER_WITH_DEFAULT_VALUE(GetIndexParamHNSWM, indexparam::HNSW_M, DEFAULT_HNSW_M, int64_t)
DEFINE_CONFIG_SETTER(SetIndexParamHNSWM, indexparam::HNSW_M, int64_t)

DEFINE_CONFIG_GETTER_WITH_DEFAULT_VALUE(GetIndexParamEf, indexparam::EF, DEFAULT_HNSW_EF, int64_t)
DEFINE_CONFIG_GETTER(GetIndexParamEf, indexparam::EF, int64_t)
DEFINE_CONFIG_SETTER(SetIndexParamEf, indexparam::EF, int64_t)

DEFINE_CONFIG_GETTER(GetIndexParamOverviewLevels, indexparam::OVERVIEW_LEVELS, int64_t)
Expand Down
2 changes: 1 addition & 1 deletion unittest/test_diskann.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,7 @@ TEST_P(DiskANNTest, search_without_search_list_size) {
cfg.clear();
knowhere::DiskANNQueryConfig::Set(cfg, tmp_config);
search_list_size = knowhere::DiskANNQueryConfig::Get(cfg).search_list_size;
EXPECT_EQ(search_list_size, 16);
EXPECT_EQ(search_list_size, 128);
}

TEST_P(DiskANNTest, knn_search_test) {
Expand Down

0 comments on commit ab9799f

Please sign in to comment.