Skip to content

Commit

Permalink
enhance: remove timestamp_filter after retrieve
Browse files Browse the repository at this point in the history
Signed-off-by: luzhang <[email protected]>
  • Loading branch information
luzhang committed Aug 2, 2024
1 parent b4d0f4d commit 033d58c
Show file tree
Hide file tree
Showing 10 changed files with 52 additions and 140 deletions.
11 changes: 4 additions & 7 deletions internal/core/src/exec/expression/TermExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,6 @@ PhyTermFilterExpr::CanSkipSegment() {
if (segment_->type() == SegmentType::Sealed &&
skip_index.CanSkipBinaryRange<T>(field_id_, 0, min, max, true, true)) {
cached_bits_.resize(active_count_, false);
cached_offsets_ = std::make_shared<ColumnVector>(DataType::INT64, 0);
cached_offsets_inited_ = true;
return true;
}
Expand Down Expand Up @@ -178,14 +177,9 @@ PhyTermFilterExpr::InitPkCacheOffset() {
auto [uids, seg_offsets] =
segment_->search_ids(*id_array, query_timestamp_);
cached_bits_.resize(active_count_, false);
cached_offsets_ =
std::make_shared<ColumnVector>(DataType::INT64, seg_offsets.size());
int64_t* cached_offsets_ptr = (int64_t*)cached_offsets_->GetRawData();
int i = 0;
for (const auto& offset : seg_offsets) {
auto _offset = (int64_t)offset.get();
cached_bits_[_offset] = true;
cached_offsets_ptr[i++] = _offset;
}
cached_offsets_inited_ = true;
}
Expand Down Expand Up @@ -214,7 +208,10 @@ PhyTermFilterExpr::ExecPkTermImpl() {
}

if (use_cache_offsets_) {
std::vector<VectorPtr> vecs{res_vec, cached_offsets_};
auto cache_bits_copy = cached_bits_.clone();
std::vector<VectorPtr> vecs{
res_vec,
std::make_shared<ColumnVector>(std::move(cache_bits_copy))};
return std::make_shared<RowVector>(vecs);
} else {
return res_vec;
Expand Down
36 changes: 1 addition & 35 deletions internal/core/src/query/generated/ExecPlanNodeVisitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,21 +78,6 @@ class ExecPlanNodeVisitor : public PlanNodeVisitor {
return ret;
}

void
SetExprCacheOffsets(std::vector<int64_t>&& offsets) {
expr_cached_pk_id_offsets_ = std::move(offsets);
}

void
AddExprCacheOffset(int64_t offset) {
expr_cached_pk_id_offsets_.push_back(offset);
}

const std::vector<int64_t>&
GetExprCacheOffsets() {
return expr_cached_pk_id_offsets_;
}

void
SetExprUsePkIndex(bool use_pk_index) {
expr_use_pk_index_ = use_pk_index;
Expand All @@ -103,29 +88,11 @@ class ExecPlanNodeVisitor : public PlanNodeVisitor {
return expr_use_pk_index_;
}

void
ExecuteExprNodeInternal(
const std::shared_ptr<milvus::plan::PlanNode>& plannode,
const milvus::segcore::SegmentInternalInterface* segment,
int64_t active_count,
BitsetType& result,
bool& cache_offset_getted,
std::vector<int64_t>& cache_offset);

void
ExecuteExprNode(const std::shared_ptr<milvus::plan::PlanNode>& plannode,
const milvus::segcore::SegmentInternalInterface* segment,
int64_t active_count,
BitsetType& result) {
bool get_cache_offset;
std::vector<int64_t> cache_offsets;
ExecuteExprNodeInternal(plannode,
segment,
active_count,
result,
get_cache_offset,
cache_offsets);
}
BitsetType& result);

private:
template <typename VectorType>
Expand All @@ -140,6 +107,5 @@ class ExecPlanNodeVisitor : public PlanNodeVisitor {
SearchResultOpt search_result_opt_;
RetrieveResultOpt retrieve_result_opt_;
bool expr_use_pk_index_ = false;
std::vector<int64_t> expr_cached_pk_id_offsets_;
};
} // namespace milvus::query
1 change: 0 additions & 1 deletion internal/core/src/query/visitors/ExecExprVisitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2547,7 +2547,6 @@ ExecExprVisitor::ExecTermVisitorImpl(TermExpr& expr_raw) -> BitsetType {
// If enable plan_visitor pk index cache, pass offsets_ to it
if (plan_visitor_ != nullptr) {
plan_visitor_->SetExprUsePkIndex(true);
plan_visitor_->SetExprCacheOffsets(std::move(cached_offsets));
}
AssertInfo(bitset.size() == row_count_,
"[ExecExprVisitor]Size of results not equal row count");
Expand Down
44 changes: 13 additions & 31 deletions internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,11 @@ empty_search_result(int64_t num_queries, SearchInfo& search_info) {
}

void
ExecPlanNodeVisitor::ExecuteExprNodeInternal(
ExecPlanNodeVisitor::ExecuteExprNode(
const std::shared_ptr<milvus::plan::PlanNode>& plannode,
const milvus::segcore::SegmentInternalInterface* segment,
int64_t active_count,
BitsetType& bitset_holder,
bool& cache_offset_getted,
std::vector<int64_t>& cache_offset) {
BitsetType& bitset_holder) {
bitset_holder.clear();
LOG_DEBUG("plannode: {}, active_count: {}, timestamp: {}",
plannode->ToString(),
Expand All @@ -94,6 +92,7 @@ ExecPlanNodeVisitor::ExecuteExprNodeInternal(

auto task =
milvus::exec::Task::Create(DEFAULT_TASK_ID, plan, 0, query_context);
bool cache_offset_getted = false;
for (;;) {
auto result = task->Next();
if (!result) {
Expand All @@ -115,20 +114,17 @@ ExecPlanNodeVisitor::ExecuteExprNodeInternal(

if (!cache_offset_getted) {
// offset cache only get once because not support iterator batch
auto cache_offset_vec =
auto cache_bits_vec =
std::dynamic_pointer_cast<ColumnVector>(row->child(1));
// If get empty cached offsets. mean no record hits in this segment
TargetBitmapView view(cache_bits_vec->GetRawData(),
cache_bits_vec->size());
// If get empty cached bits. mean no record hits in this segment
// no need to get next batch.
if (cache_offset_vec->size() == 0) {
if (view.count() == 0) {
bitset_holder.resize(active_count);
task->RequestCancel();
break;
}
auto cache_offset_vec_ptr =
(int64_t*)(cache_offset_vec->GetRawData());
for (size_t i = 0; i < cache_offset_vec->size(); ++i) {
cache_offset.push_back(cache_offset_vec_ptr[i]);
}
cache_offset_getted = true;
}
} else {
Expand Down Expand Up @@ -281,17 +277,12 @@ ExecPlanNodeVisitor::visit(RetrievePlanNode& node) {
bitset_holder.resize(active_count);
}

// This flag used to indicate whether to get offset from expr module that
// speeds up mvcc filter in the next interface: "timestamp_filter"
bool get_cache_offset = false;
std::vector<int64_t> cache_offsets;
if (node.filter_plannode_.has_value()) {
ExecuteExprNodeInternal(node.filter_plannode_.value(),
segment,
active_count,
bitset_holder,
get_cache_offset,
cache_offsets);
ExecuteExprNode(node.filter_plannode_.value(),
segment,
active_count,
bitset_holder);
bitset_holder.flip();
}

Expand All @@ -313,16 +304,7 @@ ExecPlanNodeVisitor::visit(RetrievePlanNode& node) {
}

retrieve_result.total_data_cnt_ = bitset_holder.size();
bool false_filtered_out = false;
if (get_cache_offset) {
segment->timestamp_filter(bitset_holder, cache_offsets, timestamp_);
} else {
bitset_holder.flip();
false_filtered_out = true;
segment->timestamp_filter(bitset_holder, timestamp_);
}
auto results_pair =
segment->find_first(node.limit_, bitset_holder, false_filtered_out);
auto results_pair = segment->find_first(node.limit_, bitset_holder);
retrieve_result.result_offsets_ = std::move(results_pair.first);
retrieve_result.has_more_result = results_pair.second;
retrieve_result_opt_ = std::move(retrieve_result);
Expand Down
38 changes: 11 additions & 27 deletions internal/core/src/segcore/InsertRecord.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,7 @@ class OffsetMap {
using OffsetType = int64_t;
// TODO: in fact, we can retrieve the pk here. Not sure which way is more efficient.
virtual std::pair<std::vector<OffsetMap::OffsetType>, bool>
find_first(int64_t limit,
const BitsetType& bitset,
bool false_filtered_out) const = 0;
find_first(int64_t limit, const BitsetType& bitset) const = 0;

virtual void
clear() = 0;
Expand Down Expand Up @@ -169,9 +167,7 @@ class OffsetOrderedMap : public OffsetMap {
}

std::pair<std::vector<OffsetMap::OffsetType>, bool>
find_first(int64_t limit,
const BitsetType& bitset,
bool false_filtered_out) const override {
find_first(int64_t limit, const BitsetType& bitset) const override {
std::shared_lock<std::shared_mutex> lck(mtx_);

if (limit == Unlimited || limit == NoLimit) {
Expand All @@ -180,7 +176,7 @@ class OffsetOrderedMap : public OffsetMap {

// TODO: we can't retrieve pk by offset very conveniently.
// Selectivity should be done outside.
return find_first_by_index(limit, bitset, false_filtered_out);
return find_first_by_index(limit, bitset);
}

void
Expand All @@ -191,15 +187,10 @@ class OffsetOrderedMap : public OffsetMap {

private:
std::pair<std::vector<OffsetMap::OffsetType>, bool>
find_first_by_index(int64_t limit,
const BitsetType& bitset,
bool false_filtered_out) const {
find_first_by_index(int64_t limit, const BitsetType& bitset) const {
int64_t hit_num = 0; // avoid counting the number everytime.
int64_t cnt = bitset.count();
auto size = bitset.size();
if (!false_filtered_out) {
cnt = size - bitset.count();
}
int64_t cnt = size - bitset.count();
limit = std::min(limit, cnt);
std::vector<int64_t> seg_offsets;
seg_offsets.reserve(limit);
Expand All @@ -214,7 +205,7 @@ class OffsetOrderedMap : public OffsetMap {
continue;
}

if (!(bitset[seg_offset] ^ false_filtered_out)) {
if (!bitset[seg_offset]) {
seg_offsets.push_back(seg_offset);
hit_num++;
// PK hit, no need to continue traversing offsets with the same PK.
Expand Down Expand Up @@ -346,9 +337,7 @@ class OffsetOrderedArray : public OffsetMap {
}

std::pair<std::vector<OffsetMap::OffsetType>, bool>
find_first(int64_t limit,
const BitsetType& bitset,
bool false_filtered_out) const override {
find_first(int64_t limit, const BitsetType& bitset) const override {
check_search();

if (limit == Unlimited || limit == NoLimit) {
Expand All @@ -357,7 +346,7 @@ class OffsetOrderedArray : public OffsetMap {

// TODO: we can't retrieve pk by offset very conveniently.
// Selectivity should be done outside.
return find_first_by_index(limit, bitset, false_filtered_out);
return find_first_by_index(limit, bitset);
}

void
Expand All @@ -368,15 +357,10 @@ class OffsetOrderedArray : public OffsetMap {

private:
std::pair<std::vector<OffsetMap::OffsetType>, bool>
find_first_by_index(int64_t limit,
const BitsetType& bitset,
bool false_filtered_out) const {
find_first_by_index(int64_t limit, const BitsetType& bitset) const {
int64_t hit_num = 0; // avoid counting the number everytime.
int64_t cnt = bitset.count();
auto size = bitset.size();
if (!false_filtered_out) {
cnt = size - bitset.count();
}
int64_t cnt = size - bitset.count();
auto more_hit_than_limit = cnt > limit;
limit = std::min(limit, cnt);
std::vector<int64_t> seg_offsets;
Expand All @@ -389,7 +373,7 @@ class OffsetOrderedArray : public OffsetMap {
continue;
}

if (!(bitset[seg_offset] ^ false_filtered_out)) {
if (!bitset[seg_offset]) {
seg_offsets.push_back(seg_offset);
hit_num++;
}
Expand Down
7 changes: 2 additions & 5 deletions internal/core/src/segcore/SegmentGrowingImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -299,11 +299,8 @@ class SegmentGrowingImpl : public SegmentGrowing {
}

std::pair<std::vector<OffsetMap::OffsetType>, bool>
find_first(int64_t limit,
const BitsetType& bitset,
bool false_filtered_out) const override {
return insert_record_.pk2offset_->find_first(
limit, bitset, false_filtered_out);
find_first(int64_t limit, const BitsetType& bitset) const override {
return insert_record_.pk2offset_->find_first(limit, bitset);
}

bool
Expand Down
4 changes: 1 addition & 3 deletions internal/core/src/segcore/SegmentInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -333,9 +333,7 @@ class SegmentInternalInterface : public SegmentInterface {
* @return All candidates offsets.
*/
virtual std::pair<std::vector<OffsetMap::OffsetType>, bool>
find_first(int64_t limit,
const BitsetType& bitset,
bool false_filtered_out) const = 0;
find_first(int64_t limit, const BitsetType& bitset) const = 0;

void
FillTargetEntry(
Expand Down
7 changes: 2 additions & 5 deletions internal/core/src/segcore/SegmentSealedImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,11 +142,8 @@ class SegmentSealedImpl : public SegmentSealed {
const Timestamp* timestamps) override;

std::pair<std::vector<OffsetMap::OffsetType>, bool>
find_first(int64_t limit,
const BitsetType& bitset,
bool false_filtered_out) const override {
return insert_record_.pk2offset_->find_first(
limit, bitset, false_filtered_out);
find_first(int64_t limit, const BitsetType& bitset) const override {
return insert_record_.pk2offset_->find_first(limit, bitset);
}

// Calculate: output[i] = Vec[seg_offset[i]]
Expand Down
Loading

0 comments on commit 033d58c

Please sign in to comment.