Skip to content
This repository has been archived by the owner on Aug 16, 2023. It is now read-only.

Commit

Permalink
Sync perform of HNSW/DiskANN (#924)
Browse files Browse the repository at this point in the history
Signed-off-by: liliu-z <[email protected]>
  • Loading branch information
liliu-z authored Jun 10, 2023
1 parent ab9799f commit 998a8b8
Show file tree
Hide file tree
Showing 6 changed files with 55 additions and 60 deletions.
10 changes: 1 addition & 9 deletions knowhere/index/vector_index/ConfAdapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ static const int64_t HNSW_MIN_EFCONSTRUCTION = 8;
static const int64_t HNSW_MAX_EFCONSTRUCTION = 512;
static const int64_t HNSW_MIN_M = 4;
static const int64_t HNSW_MAX_M = 64;
static const int64_t HNSW_MAX_EF = 32768;

static const std::vector<MetricType> default_metric_array{metric::L2, metric::IP};
static const std::vector<MetricType> default_binary_metric_array{metric::HAMMING, metric::JACCARD, metric::TANIMOTO,
Expand Down Expand Up @@ -237,14 +236,7 @@ HNSWConfAdapter::CheckTrain(Config& cfg, const IndexMode mode) {

bool
HNSWConfAdapter::CheckSearch(Config& cfg, const IndexType type, const IndexMode mode) {
auto topk = GetMetaTopk(cfg);
if (topk < HNSW_MAX_EF) {
// normal case if topk is not large
CheckIntegerRange(cfg, indexparam::EF, GetMetaTopk(cfg), HNSW_MAX_EF);
} else {
// if topk is large
CheckIntegerRange(cfg, indexparam::EF, topk, topk * 2);
}
CheckIntegerRange(cfg, indexparam::EF, GetMetaTopk(cfg), std::numeric_limits<int64_t>::max());
return ConfAdapter::CheckSearch(cfg, type, mode);
}

Expand Down
64 changes: 35 additions & 29 deletions knowhere/index/vector_index/IndexDiskANNConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "knowhere/index/vector_index/IndexDiskANNConfig.h"

#include <algorithm>
#include <iostream>
#include <limits>
#include <optional>
#include <sstream>
Expand Down Expand Up @@ -63,8 +64,9 @@ static constexpr uint32_t kBuildNumThreadsMinValue = 1;
static constexpr uint32_t kBuildNumThreadsMaxValue = 128;
static constexpr uint32_t kDiskPqBytesMinValue = 0;
static constexpr std::optional<uint32_t> kDiskPqBytesMaxValue = std::nullopt;
static constexpr uint32_t kSearchListSizeMaxValue = 200;
static constexpr uint32_t kKThreshold = 16;
static constexpr std::optional<uint32_t> kSearchListSizeMaxValue = std::nullopt;
static constexpr uint32_t kDefaultSearchListSizeDivider = 16;
static constexpr uint32_t kInvalideSearchListSize = 0;
static constexpr uint32_t kBeamwidthMinValue = 1;
static constexpr uint32_t kBeamwidthMaxValue = 128;
static constexpr float kFilterThresholdMinValue = -1;
Expand Down Expand Up @@ -142,6 +144,17 @@ CheckNumericParamAndSet(const Config& config, const std::string& key, std::optio
config.at(key).get_to(to_be_set);
}

template <typename T>
void
CheckNumericParamAndSetWithDefault(const Config& config, const std::string& key, std::optional<T> min_o,
std::optional<T> max_o, T default_value, T& to_be_set) {
if (!config.contains(key)) {
to_be_set = default_value;
return;
}
CheckNumericParamAndSet(config, key, min_o, max_o, to_be_set);
}

/**
* @brief Check the non-numeric param's existence and type, and allocate it to the config.
*/
Expand Down Expand Up @@ -196,11 +209,8 @@ to_json(Config& config, const DiskANNPrepareConfig& prep_conf) {

void
from_json(const Config& config, DiskANNPrepareConfig& prep_conf) {
if (config.contains(kAioMaxnr)) {
CheckNumericParamAndSet<uint64_t>(config, kAioMaxnr, kAioMaxnrMinValue, kAioMaxnrMaxValue, prep_conf.aio_maxnr);
} else {
prep_conf.aio_maxnr = kAioMaxnrDefaultValue;
}
CheckNumericParamAndSetWithDefault<uint64_t>(config, kAioMaxnr, kAioMaxnrMinValue, kAioMaxnrMaxValue,
kAioMaxnrDefaultValue, prep_conf.aio_maxnr);

CheckNumericParamAndSet<float>(config, kCacheDramBudgetGb, kCacheDramBudgetGbMinValue, kCacheDramBudgetGbMaxValue,
prep_conf.search_cache_budget_gb);
Expand All @@ -219,23 +229,21 @@ to_json(Config& config, const DiskANNQueryConfig& query_conf) {
void
from_json(const Config& config, DiskANNQueryConfig& query_conf) {
CheckNumericParamAndSet<uint64_t>(config, kK, kKMinValue, kKMaxValue, query_conf.k);
auto search_list_threshold = query_conf.k < kKThreshold ? kKThreshold : query_conf.k;
if (config.contains(kSearchListSize)) {
// The search_list_size should be no less than the k.
CheckNumericParamAndSet<uint32_t>(config, kSearchListSize, query_conf.k,
std::max(kSearchListSizeMaxValue, static_cast<uint32_t>(10 * query_conf.k)),
query_conf.search_list_size);
uint32_t default_search_list_size =
query_conf.k <= kDefaultSearchListSizeDivider ? kDefaultSearchListSizeDivider : query_conf.k;

if (config.contains(kSearchListSize) &&
kInvalideSearchListSize == GetValueFromConfig<uint32_t>(config, kSearchListSize)) { // Exist but invalid
query_conf.search_list_size = default_search_list_size;
} else {
// if search_list_size not set (==0), not in json string, modify the value.
query_conf.search_list_size = search_list_threshold;
CheckNumericParamAndSetWithDefault<uint32_t>(config, kSearchListSize, query_conf.k, kSearchListSizeMaxValue,
default_search_list_size, query_conf.search_list_size);
}

CheckNumericParamAndSet<uint32_t>(config, kBeamwidth, kBeamwidthMinValue, kBeamwidthMaxValue, query_conf.beamwidth);
if (config.contains(kFilterThreshold)) {
CheckNumericParamAndSet<float>(config, kFilterThreshold, kFilterThresholdMinValue, kFilterThresholdMaxValue,
query_conf.filter_threshold);
} else {
query_conf.filter_threshold = kFilterThresholdMinValue;
}
CheckNumericParamAndSetWithDefault<float>(config, kFilterThreshold, kFilterThresholdMinValue,
kFilterThresholdMaxValue, kFilterThresholdMinValue,
query_conf.filter_threshold);
}

void
Expand All @@ -261,12 +269,9 @@ from_json(const Config& config, DiskANNQueryByRangeConfig& query_conf) {
CheckNumericParamAndSet<uint64_t>(config, kMinK, kMinKMinValue, kMinKMaxValue, query_conf.min_k);
CheckNumericParamAndSet<uint64_t>(config, kMaxK, query_conf.min_k, kMaxKMaxValue, query_conf.max_k);
CheckNumericParamAndSet<uint32_t>(config, kBeamwidth, kBeamwidthMinValue, kBeamwidthMaxValue, query_conf.beamwidth);
if (config.contains(kSearchListAndKRatio)) {
CheckNumericParamAndSet<float>(config, kSearchListAndKRatio, kSearchListAndKRatioMinValue,
kSearchListAndKRatioMaxValue, query_conf.search_list_and_k_ratio);
} else {
query_conf.search_list_and_k_ratio = kSearchListAndKRatioDefaultValue;
}
CheckNumericParamAndSetWithDefault<float>(config, kSearchListAndKRatio, kSearchListAndKRatioMinValue,
kSearchListAndKRatioMaxValue, kSearchListAndKRatioDefaultValue,
query_conf.search_list_and_k_ratio);
}

DiskANNBuildConfig
Expand Down Expand Up @@ -309,10 +314,11 @@ DiskANNQueryByRangeConfig::Set(Config& config, const DiskANNQueryByRangeConfig&
config[kDiskANNQueryByRangeConfig] = query_conf;
}

const DiskANNPrepareConfig kSanityCheckDiskANNPrepareConfig; // use default
const DiskANNPrepareConfig kSanityCheckDiskANNPrepareConfig; // use default
const DiskANNQueryConfig kSanityCheckDiskANNQueryConfig{kSanityCheckMinTopK, kSanityCheckMinTopK};

Config GenSanityCheckDiskANNConfig(const Config& build_config) {
Config
GenSanityCheckDiskANNConfig(const Config& build_config) {
Config config = build_config;
DiskANNPrepareConfig::Set(config, kSanityCheckDiskANNPrepareConfig);
DiskANNQueryConfig::Set(config, kSanityCheckDiskANNQueryConfig);
Expand Down
8 changes: 5 additions & 3 deletions knowhere/index/vector_index/IndexDiskANNConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,9 @@ struct DiskANNPrepareConfig {
struct DiskANNQueryConfig {
uint64_t k;
// A list of search_list sizes to perform searches with. Larger parameters will result in slower latencies, but
// higher accuracies. Must be at least the value of k.
uint32_t search_list_size = 128;
// higher accuracies. Must be at least the value of k. Default to 0, meaning Knowhere need to take care of the
// default value
uint32_t search_list_size = 0;
// The beamwidth to be used for search. This is the maximum number of IO requests each query will issue per
// iteration of search code. Larger beamwidth will result in fewer IO round-trips per query but might result in
// slightly higher total number of IO requests to SSD per query. For the highest query throughput with a fixed SSD
Expand Down Expand Up @@ -125,5 +126,6 @@ struct DiskANNQueryByRangeConfig {
Set(Config& config, const DiskANNQueryByRangeConfig& query_conf);
};

Config GenSanityCheckDiskANNConfig(const Config& build_config);
Config
GenSanityCheckDiskANNConfig(const Config& build_config);
} // namespace knowhere
23 changes: 7 additions & 16 deletions knowhere/index/vector_index/IndexHNSW.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,21 +29,11 @@
#include "index/vector_index/helpers/RangeUtil.h"

namespace knowhere {

namespace {
inline int64_t
CheckAndGetEfValue(const Config& config) {
auto topk_val = GetMetaTopk(config);
if (CheckKeyInConfig(config, indexparam::EF)) {
auto ef_val = GetIndexParamEf(config);
if (ef_val < topk_val) {
KNOWHERE_THROW_MSG("ef is smaller than topk in hnsw.");
}
return ef_val;
} else {
return std::max(knowhere::DEFAULT_HNSW_EF, topk_val);
}
}
} // namespace
static constexpr int64_t kDefaultEfDivider = 16;
static constexpr int64_t kDefaultRangeSearchEf = 16;
}

BinarySet
IndexHNSW::Serialize(const Config& config) {
Expand Down Expand Up @@ -267,7 +257,7 @@ IndexHNSW::QueryImpl(int64_t n, const float* xq, int64_t k, float* distances, in
feder = std::make_unique<feder::hnsw::FederResult>();
}

size_t ef = CheckAndGetEfValue(config);
size_t ef = GetIndexParamEf(config, k <= kDefaultEfDivider ? kDefaultEfDivider : k);

hnswlib::SearchParam param{ef};
bool transform = (index_->metric_type_ == 1); // InnerProduct: 1
Expand Down Expand Up @@ -309,7 +299,8 @@ IndexHNSW::QueryByRangeImpl(int64_t n, const float* xq, float*& distances, int64
feder = std::make_unique<feder::hnsw::FederResult>();
}

size_t ef = CheckAndGetEfValue(config);
size_t ef = GetIndexParamEf(config, kDefaultRangeSearchEf);

hnswlib::SearchParam param{ef};

float radius = GetMetaRadius(config);
Expand Down
8 changes: 6 additions & 2 deletions knowhere/index/vector_index/helpers/IndexParameter.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,11 @@ SetValueToConfig(Config& cfg, const std::string& key, const T value) {
return GetValueFromConfigWithDefaultValue<T>(cfg, key, value); \
}

#define DEFINE_CONFIG_GETTER_WITH_CUSTOMIZED_DEFAULT_VALUE(func_name, key, T) \
inline T func_name(const Config& cfg, T value) { \
return GetValueFromConfigWithDefaultValue<T>(cfg, key, value); \
}

#define DEFINE_CONFIG_SETTER(func_name, key, T) \
inline void func_name(Config& cfg, T value) { \
SetValueToConfig<T>(cfg, key, (T)(value)); \
Expand Down Expand Up @@ -153,7 +158,6 @@ static const int64_t DEFAULT_PQ_M = 4;
static const int64_t DEFAULT_PQ_NBITS = 8;
static const int64_t DEFAULT_HNSW_EFCONSTRUCTION = 360;
static const int64_t DEFAULT_HNSW_M = 30;
static const int64_t DEFAULT_HNSW_EF = 16;

DEFINE_CONFIG_GETTER_WITH_DEFAULT_VALUE(GetIndexParamNprobe, indexparam::NPROBE, DEFAULT_NPROBE, int64_t)
DEFINE_CONFIG_SETTER(SetIndexParamNprobe, indexparam::NPROBE, int64_t)
Expand All @@ -176,7 +180,7 @@ DEFINE_CONFIG_SETTER(SetIndexParamEfConstruction, indexparam::EFCONSTRUCTION, in
DEFINE_CONFIG_GETTER_WITH_DEFAULT_VALUE(GetIndexParamHNSWM, indexparam::HNSW_M, DEFAULT_HNSW_M, int64_t)
DEFINE_CONFIG_SETTER(SetIndexParamHNSWM, indexparam::HNSW_M, int64_t)

DEFINE_CONFIG_GETTER(GetIndexParamEf, indexparam::EF, int64_t)
DEFINE_CONFIG_GETTER_WITH_CUSTOMIZED_DEFAULT_VALUE(GetIndexParamEf, indexparam::EF, int64_t)
DEFINE_CONFIG_SETTER(SetIndexParamEf, indexparam::EF, int64_t)

DEFINE_CONFIG_GETTER(GetIndexParamOverviewLevels, indexparam::OVERVIEW_LEVELS, int64_t)
Expand Down
2 changes: 1 addition & 1 deletion unittest/test_diskann.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,7 @@ TEST_P(DiskANNTest, search_without_search_list_size) {
cfg.clear();
knowhere::DiskANNQueryConfig::Set(cfg, tmp_config);
search_list_size = knowhere::DiskANNQueryConfig::Get(cfg).search_list_size;
EXPECT_EQ(search_list_size, 128);
EXPECT_EQ(search_list_size, 16);
}

TEST_P(DiskANNTest, knn_search_test) {
Expand Down

0 comments on commit 998a8b8

Please sign in to comment.