-
Notifications
You must be signed in to change notification settings - Fork 3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
enhance: refactor delete mvcc function
Signed-off-by: luzhang <[email protected]>
- Loading branch information
luzhang
committed
Nov 28, 2024
1 parent
e247ff9
commit 9d2606d
Showing
17 changed files
with
671 additions
and
316 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,220 @@ | ||
// Copyright (C) 2019-2020 Zilliz. All rights reserved. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance | ||
// with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software distributed under the License | ||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express | ||
// or implied. See the License for the specific language governing permissions and limitations under the License | ||
|
||
#include "segcore/DeletedRecord.h" | ||
#include "segcore/SegmentInterface.h" | ||
|
||
namespace milvus::segcore { | ||
|
||
template class DeletedRecord<true>; | ||
template class DeletedRecord<false>; | ||
|
||
template <bool is_sealed> | ||
void | ||
DeletedRecord<is_sealed>::LoadPush(const std::vector<PkType>& pks, | ||
const Timestamp* timestamps) { | ||
InternalPush(pks, timestamps); | ||
|
||
SortedDeleteList::Accessor accessor(deleted_lists_); | ||
auto* last = accessor.last(); | ||
Assert(last != nullptr); | ||
max_load_timestamp_ = last->first; | ||
} | ||
|
||
template <bool is_sealed> | ||
void | ||
DeletedRecord<is_sealed>::StreamPush(const std::vector<PkType>& pks, | ||
const Timestamp* timestamps) { | ||
InternalPush(pks, timestamps); | ||
|
||
bool can_dump = timestamps[0] >= max_load_timestamp_; | ||
if (can_dump) { | ||
DumpSnapshot(); | ||
} | ||
} | ||
|
||
template <bool is_sealed> | ||
void | ||
DeletedRecord<is_sealed>::InternalPush(const std::vector<PkType>& pks, | ||
const Timestamp* timestamps) { | ||
int64_t removed_num = 0; | ||
int64_t mem_add = 0; | ||
|
||
SortedDeleteList::Accessor accessor(deleted_lists_); | ||
for (size_t i = 0; i < pks.size(); ++i) { | ||
auto deleted_pk = pks[i]; | ||
auto deleted_ts = timestamps[i]; | ||
std::vector<SegOffset> offsets; | ||
if (segment_) { | ||
offsets = std::move(segment_->search_pk(deleted_pk, deleted_ts)); | ||
} else { | ||
// only for testing | ||
offsets = | ||
std::move(insert_record_->search_pk(deleted_pk, deleted_ts)); | ||
} | ||
for (auto& offset : offsets) { | ||
auto row_id = offset.get(); | ||
// if alreay deleted, no need to add new record | ||
if (deleted_mask_.size() > row_id && deleted_mask_[row_id]) { | ||
continue; | ||
} | ||
// if insert record and delete record is same timestamp, | ||
// delete not take effect on this record. | ||
if (deleted_ts == insert_record_->timestamps_[row_id]) { | ||
continue; | ||
} | ||
accessor.insert(std::make_pair(deleted_ts, row_id)); | ||
if constexpr (is_sealed) { | ||
Assert(deleted_mask_.size() > 0); | ||
deleted_mask_.set(row_id); | ||
} else { | ||
// need to add mask size firstly for growing segment | ||
deleted_mask_.resize(insert_record_->size()); | ||
deleted_mask_.set(row_id); | ||
} | ||
removed_num++; | ||
mem_add += DELETE_PAIR_SIZE; | ||
} | ||
} | ||
|
||
n_.fetch_add(removed_num); | ||
mem_size_.fetch_add(mem_add); | ||
} | ||
|
||
template <bool is_sealed> | ||
void | ||
DeletedRecord<is_sealed>::DumpSnapshot() { | ||
SortedDeleteList::Accessor accessor(deleted_lists_); | ||
auto total_size = accessor.size(); | ||
auto dumped_size = | ||
snapshots_.empty() ? 0 : snapshots_.size() * DUMP_BATCH_SIZE; | ||
|
||
while (total_size - dumped_size > DUMP_BATCH_SIZE) { | ||
int32_t bitsize = 0; | ||
if constexpr (is_sealed) { | ||
bitsize = sealed_row_count_; | ||
} else { | ||
bitsize = insert_record_->size(); | ||
} | ||
BitsetType bitmap(bitsize, false); | ||
|
||
auto it = accessor.begin(); | ||
Timestamp last_dump_ts = 0; | ||
if (!snapshots_.empty()) { | ||
it = snap_next_iter_.back(); | ||
last_dump_ts = snapshots_.back().first; | ||
bitmap.inplace_and_with_count(snapshots_.back().second, | ||
snapshots_.back().second.size()); | ||
} | ||
|
||
while (total_size - dumped_size > DUMP_BATCH_SIZE && | ||
it != accessor.end()) { | ||
Timestamp dump_ts = 0; | ||
|
||
for (auto size = 0; size < DUMP_BATCH_SIZE; ++it, ++size) { | ||
bitmap.set(it->second); | ||
if (size == DUMP_BATCH_SIZE - 1) { | ||
dump_ts = it->first; | ||
} | ||
} | ||
|
||
{ | ||
std::unique_lock<std::shared_mutex> lock(snap_lock_); | ||
if (dump_ts == last_dump_ts) { | ||
// only update | ||
snapshots_.back().second = std::move(bitmap.clone()); | ||
snap_next_iter_.back() = it; | ||
} else { | ||
// add new snapshot | ||
snapshots_.push_back( | ||
std::make_pair(dump_ts, std::move(bitmap.clone()))); | ||
Assert(it != accessor.end() && it.good()); | ||
snap_next_iter_.push_back(it); | ||
} | ||
|
||
LOG_INFO( | ||
"dump delete record snapshot at ts: {}, cursor: {}, " | ||
"current snapshot size: {}", | ||
dump_ts, | ||
dumped_size + DUMP_BATCH_SIZE, | ||
snapshots_.size()); | ||
last_dump_ts = dump_ts; | ||
// std::cout | ||
// << fmt::format( | ||
// "dump delete record snapshot at ts: {}, cursor: {}, " | ||
// "current snapshot size: {}", | ||
// dump_ts, | ||
// dumped_size + DUMP_BATCH_SIZE, | ||
// snapshots_.size()) | ||
// << std::endl; | ||
} | ||
|
||
dumped_size += DUMP_BATCH_SIZE; | ||
} | ||
} | ||
} | ||
|
||
template <bool is_sealed> | ||
void | ||
DeletedRecord<is_sealed>::Query(BitsetTypeView& bitset, | ||
int64_t insert_barrier, | ||
Timestamp query_timestamp) { | ||
Assert(bitset.size() == insert_barrier); | ||
|
||
SortedDeleteList::Accessor accessor(deleted_lists_); | ||
if (accessor.size() == 0) { | ||
return; | ||
} | ||
|
||
// try use snapshot to skip iterations | ||
bool hit_snapshot = false; | ||
SortedDeleteList::iterator next_iter; | ||
if (!snapshots_.empty()) { | ||
int loc = snapshots_.size() - 1; | ||
// find last meeted snapshot | ||
{ | ||
std::shared_lock<std::shared_mutex> lock(snap_lock_); | ||
while (snapshots_[loc].first > query_timestamp && loc >= 0) { | ||
loc--; | ||
} | ||
if (loc >= 0) { | ||
next_iter = snap_next_iter_[loc]; | ||
Assert(snapshots_[loc].second.size() >= bitset.size()); | ||
bitset.inplace_and_with_count(snapshots_[loc].second, | ||
bitset.size()); | ||
hit_snapshot = true; | ||
} | ||
} | ||
} | ||
|
||
auto start_iter = hit_snapshot ? next_iter : accessor.begin(); | ||
auto end_iter = accessor.lower_bound(std::make_pair(query_timestamp, 0)); | ||
|
||
auto it = start_iter; | ||
while (it != accessor.end() && it != end_iter) { | ||
AssertInfo(it->second <= insert_barrier, | ||
"delete record beyond insert barrier, {} : {}", | ||
it->second, | ||
insert_barrier); | ||
bitset.set(it->second); | ||
it++; | ||
} | ||
while (it != accessor.end() && it->first == query_timestamp) { | ||
AssertInfo(it->second <= insert_barrier, | ||
"delete record beyond insert barrier, {} : {}", | ||
it->second, | ||
insert_barrier); | ||
bitset.set(it->second); | ||
it++; | ||
} | ||
} | ||
|
||
} // namespace milvus::segcore |
Oops, something went wrong.