Skip to content

Commit

Permalink
enhance:[cherry-pick] Use binlog index for better search performance (#…
Browse files Browse the repository at this point in the history
…29012)

this pr is cherry-pick from master:
pr: #28528
pr: #27673
related issue:
issue: #27678

Signed-off-by: cqy123456 <[email protected]>
  • Loading branch information
cqy123456 authored Dec 7, 2023
1 parent cdd1305 commit 8fd38c8
Show file tree
Hide file tree
Showing 25 changed files with 590 additions and 88 deletions.
7 changes: 4 additions & 3 deletions configs/milvus.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -287,10 +287,11 @@ queryNode:
# This parameter is only useful when enable-disk = true.
# And this value should be a number greater than 1 and less than 32.
chunkRows: 1024 # The number of vectors in a chunk.
growing: # growing a vector index for growing segment to accelerate search
interimIndex: # build a vector temperate index for growing segment or binlog to accelerate search
enableIndex: true
nlist: 128 # growing segment index nlist
nprobe: 16 # nprobe to search growing segment, based on your accuracy requirement, must smaller than nlist
nlist: 128 # segment index nlist
nprobe: 16 # nprobe to search segment, based on your accuracy requirement, must smaller than nlist
memExpansionRate: 1.15 # the ratio of building interim index memory usage to raw data
loadMemoryUsageFactor: 1 # The multiply factor of calculating the memory usage while loading segments
enableDisk: false # enable querynode load disk index, and search on disk index
maxDiskUsagePercentage: 95
Expand Down
6 changes: 4 additions & 2 deletions internal/core/src/segcore/FieldIndexing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,10 @@ VectorFieldIndexing::VectorFieldIndexing(const FieldMeta& field_meta,
: FieldIndexing(field_meta, segcore_config),
build(false),
sync_with_index(false),
config_(std::make_unique<VecIndexConfig>(
segment_max_row_count, field_index_meta, segcore_config)) {
config_(std::make_unique<VecIndexConfig>(segment_max_row_count,
field_index_meta,
segcore_config,
SegmentType::Growing)) {
index_ = std::make_unique<index::VectorMemIndex>(
config_->GetIndexType(),
config_->GetMetricType(),
Expand Down
2 changes: 1 addition & 1 deletion internal/core/src/segcore/FieldIndexing.h
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ class IndexingRecord {
for (auto& [field_id, field_meta] : schema_.get_fields()) {
++offset_id;
if (field_meta.is_vector() &&
segcore_config_.get_enable_growing_segment_index()) {
segcore_config_.get_enable_interim_segment_index()) {
// TODO: skip binary small index now, reenable after config.yaml is ready
if (field_meta.get_data_type() == DataType::VECTOR_BINARY) {
continue;
Expand Down
6 changes: 3 additions & 3 deletions internal/core/src/segcore/IndexConfigGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@
#include "log/Log.h"

namespace milvus::segcore {

VecIndexConfig::VecIndexConfig(const int64_t max_index_row_cout,
const FieldIndexMeta& index_meta_,
const SegcoreConfig& config)
const SegcoreConfig& config,
const SegmentType& segment_type)
: max_index_row_count_(max_index_row_cout), config_(config) {
origin_index_type_ = index_meta_.GetIndexType();
metric_type_ = index_meta_.GeMetricType();

index_type_ = support_index_types[0];
index_type_ = support_index_types.at(segment_type);
build_params_[knowhere::meta::METRIC_TYPE] = metric_type_;
build_params_[knowhere::indexparam::NLIST] =
std::to_string(config_.get_nlist());
Expand Down
9 changes: 6 additions & 3 deletions internal/core/src/segcore/IndexConfigGenerator.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "knowhere/config.h"
#include "SegcoreConfig.h"
#include "common/QueryInfo.h"
#include "common/type_c.h"

namespace milvus::segcore {

Expand All @@ -27,8 +28,9 @@ enum class IndexConfigLevel {
};

class VecIndexConfig {
inline static const std::vector<std::string> support_index_types = {
knowhere::IndexEnum::INDEX_FAISS_IVFFLAT_CC};
inline static const std::map<SegmentType, std::string> support_index_types =
{{SegmentType::Growing, knowhere::IndexEnum::INDEX_FAISS_IVFFLAT_CC},
{SegmentType::Sealed, knowhere::IndexEnum::INDEX_FAISS_IVFFLAT_CC}};

inline static const std::map<std::string, double> index_build_ratio = {
{knowhere::IndexEnum::INDEX_FAISS_IVFFLAT_CC, 0.1}};
Expand All @@ -39,7 +41,8 @@ class VecIndexConfig {
public:
VecIndexConfig(const int64_t max_index_row_count,
const FieldIndexMeta& index_meta_,
const SegcoreConfig& config);
const SegcoreConfig& config,
const SegmentType& segment_type);

int64_t
GetBuildThreshold() const noexcept;
Expand Down
16 changes: 8 additions & 8 deletions internal/core/src/segcore/SegcoreConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,20 +64,20 @@ class SegcoreConfig {
}

void
set_enable_growing_segment_index(bool enable_growing_segment_index) {
enable_growing_segment_index_ = enable_growing_segment_index;
set_enable_interim_segment_index(bool enable_interim_segment_index) {
this->enable_interim_segment_index_ = enable_interim_segment_index;
}

bool
get_enable_growing_segment_index() const {
return enable_growing_segment_index_;
get_enable_interim_segment_index() const {
return enable_interim_segment_index_;
}

private:
bool enable_growing_segment_index_ = false;
int64_t chunk_rows_ = 32 * 1024;
int64_t nlist_ = 100;
int64_t nprobe_ = 4;
inline static bool enable_interim_segment_index_ = false;
inline static int64_t chunk_rows_ = 32 * 1024;
inline static int64_t nlist_ = 100;
inline static int64_t nprobe_ = 4;
};

} // namespace milvus::segcore
4 changes: 2 additions & 2 deletions internal/core/src/segcore/SegmentGrowingImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ SegmentGrowingImpl::Insert(int64_t reserved_offset,
field_meta);
}
//insert vector data into index
if (segcore_config_.get_enable_growing_segment_index()) {
if (segcore_config_.get_enable_interim_segment_index()) {
indexing_record_.AppendingIndex(
reserved_offset,
num_rows,
Expand Down Expand Up @@ -204,7 +204,7 @@ SegmentGrowingImpl::LoadFieldData(const LoadFieldDataInfo& infos) {
insert_record_.get_field_data_base(field_id)->set_data_raw(
reserved_offset, field_data);
}
if (segcore_config_.get_enable_growing_segment_index()) {
if (segcore_config_.get_enable_interim_segment_index()) {
auto offset = reserved_offset;
for (auto& data : field_data) {
auto row_count = data->get_num_rows();
Expand Down
153 changes: 134 additions & 19 deletions internal/core/src/segcore/SegmentSealedImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include "storage/ChunkCacheSingleton.h"
#include "common/File.h"
#include "common/Tracer.h"
#include "index/VectorMemIndex.h"

namespace milvus::segcore {

Expand Down Expand Up @@ -99,17 +100,19 @@ SegmentSealedImpl::LoadVecIndex(const LoadIndexInfo& info) {
") than other column's row count (" +
std::to_string(num_rows_.value()) + ")");
}
AssertInfo(!vector_indexings_.is_ready(field_id), "vec index is not ready");
if (get_bit(field_data_ready_bitset_, field_id)) {
fields_.erase(field_id);
set_bit(field_data_ready_bitset_, field_id, false);
} else if (get_bit(binlog_index_bitset_, field_id)) {
set_bit(binlog_index_bitset_, field_id, false);
vector_indexings_.drop_field_indexing(field_id);
}
update_row_count(row_count);
vector_indexings_.append_field_indexing(
field_id,
metric_type,
std::move(const_cast<LoadIndexInfo&>(info).index));

set_bit(index_ready_bitset_, field_id, true);
update_row_count(row_count);
// release field column
fields_.erase(field_id);
set_bit(field_data_ready_bitset_, field_id, false);
}

void
Expand Down Expand Up @@ -370,11 +373,29 @@ SegmentSealedImpl::LoadFieldData(FieldId field_id, FieldDataInfo& data) {
insert_record_.seal_pks();
}

bool use_temp_index = false;
{
// update num_rows to build temperate binlog index
std::unique_lock lck(mutex_);
update_row_count(num_rows);
}

if (generate_binlog_index(field_id)) {
std::unique_lock lck(mutex_);
fields_.erase(field_id);
set_bit(field_data_ready_bitset_, field_id, false);
use_temp_index = true;
}

if (!use_temp_index) {
std::unique_lock lck(mutex_);
set_bit(field_data_ready_bitset_, field_id, true);
}
}
{
std::unique_lock lck(mutex_);
set_bit(field_data_ready_bitset_, field_id, true);
update_row_count(num_rows);
}
std::unique_lock lck(mutex_);
update_row_count(num_rows);
}

void
Expand Down Expand Up @@ -613,7 +634,26 @@ SegmentSealedImpl::vector_search(SearchInfo& search_info,

AssertInfo(field_meta.is_vector(),
"The meta type of vector field is not vector type");
if (get_bit(index_ready_bitset_, field_id)) {
if (get_bit(binlog_index_bitset_, field_id)) {
AssertInfo(
vec_binlog_config_.find(field_id) != vec_binlog_config_.end(),
"The binlog params is not generate.");
auto binlog_search_info =
vec_binlog_config_.at(field_id)->GetSearchConf(search_info);

AssertInfo(vector_indexings_.is_ready(field_id),
"vector indexes isn't ready for field " +
std::to_string(field_id.get()));
query::SearchOnSealedIndex(*schema_,
vector_indexings_,
binlog_search_info,
query_data,
query_count,
bitset,
output);
milvus::tracer::AddEvent(
"finish_searching_vector_temperate_binlog_index");
} else if (get_bit(index_ready_bitset_, field_id)) {
AssertInfo(vector_indexings_.is_ready(field_id),
"vector indexes isn't ready for field " +
std::to_string(field_id.get()));
Expand Down Expand Up @@ -680,7 +720,8 @@ SegmentSealedImpl::get_vector(FieldId field_id,
auto& field_meta = schema_->operator[](field_id);
AssertInfo(field_meta.is_vector(), "vector field is not vector type");

if (!get_bit(index_ready_bitset_, field_id)) {
if (!get_bit(index_ready_bitset_, field_id) &&
!get_bit(binlog_index_bitset_, field_id)) {
return fill_with_empty(field_id, count);
}

Expand Down Expand Up @@ -774,8 +815,14 @@ SegmentSealedImpl::DropFieldData(const FieldId field_id) {
} else {
auto& field_meta = schema_->operator[](field_id);
std::unique_lock lck(mutex_);
set_bit(field_data_ready_bitset_, field_id, false);
insert_record_.drop_field_data(field_id);
if (get_bit(field_data_ready_bitset_, field_id)) {
set_bit(field_data_ready_bitset_, field_id, false);
insert_record_.drop_field_data(field_id);
}
if (get_bit(binlog_index_bitset_, field_id)) {
set_bit(binlog_index_bitset_, field_id, false);
vector_indexings_.drop_field_indexing(field_id);
}
lck.unlock();
}
}
Expand Down Expand Up @@ -810,7 +857,8 @@ SegmentSealedImpl::check_search(const query::Plan* plan) const {
}

auto& request_fields = plan->extra_info_opt_.value().involved_fields_;
auto field_ready_bitset = field_data_ready_bitset_ | index_ready_bitset_;
auto field_ready_bitset =
field_data_ready_bitset_ | index_ready_bitset_ | binlog_index_bitset_;
AssertInfo(request_fields.size() == field_ready_bitset.size(),
"Request fields size not equal to field ready bitset size when "
"check search");
Expand All @@ -826,13 +874,19 @@ SegmentSealedImpl::check_search(const query::Plan* plan) const {
}
}

SegmentSealedImpl::SegmentSealedImpl(SchemaPtr schema, int64_t segment_id)
: field_data_ready_bitset_(schema->size()),
SegmentSealedImpl::SegmentSealedImpl(SchemaPtr schema,
IndexMetaPtr index_meta,
const SegcoreConfig& segcore_config,
int64_t segment_id)
: segcore_config_(segcore_config),
field_data_ready_bitset_(schema->size()),
index_ready_bitset_(schema->size()),
binlog_index_bitset_(schema->size()),
scalar_indexings_(schema->size()),
insert_record_(*schema, MAX_ROW_COUNT),
schema_(schema),
id_(segment_id) {
id_(segment_id),
col_index_meta_(index_meta) {
}

SegmentSealedImpl::~SegmentSealedImpl() {
Expand Down Expand Up @@ -1135,7 +1189,8 @@ SegmentSealedImpl::bulk_subscript(FieldId field_id,
bool
SegmentSealedImpl::HasIndex(FieldId field_id) const {
std::shared_lock lck(mutex_);
return get_bit(index_ready_bitset_, field_id);
return get_bit(index_ready_bitset_, field_id) |
get_bit(binlog_index_bitset_, field_id);
}

bool
Expand All @@ -1154,7 +1209,8 @@ SegmentSealedImpl::HasRawData(int64_t field_id) const {
auto fieldID = FieldId(field_id);
const auto& field_meta = schema_->operator[](fieldID);
if (datatype_is_vector(field_meta.get_data_type())) {
if (get_bit(index_ready_bitset_, fieldID)) {
if (get_bit(index_ready_bitset_, fieldID) |
get_bit(binlog_index_bitset_, fieldID)) {
AssertInfo(vector_indexings_.is_ready(fieldID),
"vector index is not ready");
auto field_indexing = vector_indexings_.get_field_indexing(fieldID);
Expand Down Expand Up @@ -1303,4 +1359,63 @@ SegmentSealedImpl::mask_with_timestamps(BitsetType& bitset_chunk,
bitset_chunk |= mask;
}

bool
SegmentSealedImpl::generate_binlog_index(const FieldId field_id) {
if (col_index_meta_ == nullptr)
return false;
auto& field_meta = schema_->operator[](field_id);

if (field_meta.is_vector() &&
field_meta.get_data_type() == DataType::VECTOR_FLOAT &&
segcore_config_.get_enable_interim_segment_index()) {
try {
auto& field_index_meta =
col_index_meta_->GetFieldIndexMeta(field_id);
auto& index_params = field_index_meta.GetIndexParams();
if (index_params.find(knowhere::meta::INDEX_TYPE) ==
index_params.end() ||
index_params.at(knowhere::meta::INDEX_TYPE) ==
knowhere::IndexEnum::INDEX_FAISS_IDMAP) {
return false;
}
// get binlog data and meta
auto row_count = num_rows_.value();
auto dim = field_meta.get_dim();
auto vec_data = fields_.at(field_id);
auto dataset =
knowhere::GenDataSet(row_count, dim, (void*)vec_data->Data());
dataset->SetIsOwner(false);
// generate index params
auto field_binlog_config = std::unique_ptr<VecIndexConfig>(
new VecIndexConfig(row_count,
field_index_meta,
segcore_config_,
SegmentType::Sealed));
auto build_config = field_binlog_config->GetBuildBaseParams();
build_config[knowhere::meta::DIM] = std::to_string(dim);
build_config[knowhere::meta::NUM_BUILD_THREAD] = std::to_string(1);
auto index_metric = field_binlog_config->GetMetricType();

index::IndexBasePtr vec_index =
std::make_unique<index::VectorMemIndex>(
field_binlog_config->GetIndexType(),
index_metric,
knowhere::Version::GetCurrentVersion().VersionNumber());
vec_index->BuildWithDataset(dataset, build_config);
vector_indexings_.append_field_indexing(
field_id, index_metric, std::move(vec_index));
{
std::unique_lock lck(mutex_);
vec_binlog_config_[field_id] = std::move(field_binlog_config);
set_bit(binlog_index_bitset_, field_id, true);
}
return true;
} catch (std::exception& e) {
return false;
}
} else {
return false;
}
}

} // namespace milvus::segcore
Loading

0 comments on commit 8fd38c8

Please sign in to comment.