Skip to content

Commit

Permalink
enhance: simplify segment interface by removing chunk_view method
Browse files Browse the repository at this point in the history
Signed-off-by: Ted Xu <[email protected]>
  • Loading branch information
tedxu committed Dec 6, 2024
1 parent 8636392 commit 107982c
Show file tree
Hide file tree
Showing 10 changed files with 23 additions and 95 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@
#include "segcore/SegmentSealedImpl.h"
#include "query/Utils.h"

namespace milvus {
namespace exec {
namespace milvus::exec {

void
SearchGroupBy(const std::vector<std::shared_ptr<VectorIterator>>& iterators,
Expand Down Expand Up @@ -211,5 +210,4 @@ GroupIteratorResult(const std::shared_ptr<VectorIterator>& iterator,
}
}

} // namespace exec
} // namespace milvus
} // namespace milvus::exec
19 changes: 7 additions & 12 deletions internal/core/src/exec/operator/groupby/SearchGroupByOperator.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#pragma once

#include <memory>
#include "common/QueryInfo.h"
#include "knowhere/index/index_node.h"
#include "segcore/SegmentInterface.h"
Expand All @@ -25,8 +26,7 @@
#include "common/Span.h"
#include "query/Utils.h"

namespace milvus {
namespace exec {
namespace milvus::exec {

template <typename T>
class DataGetter {
Expand Down Expand Up @@ -58,17 +58,15 @@ template <typename T>
class SealedDataGetter : public DataGetter<T> {
private:
std::shared_ptr<Span<T>> field_data_;
std::shared_ptr<std::vector<std::string_view>> str_field_data_;
std::shared_ptr<Span<std::string>> str_field_data_;
const index::ScalarIndex<T>* field_index_;

public:
SealedDataGetter(const segcore::SegmentSealed& segment, FieldId& field_id) {
if (segment.HasFieldData(field_id)) {
if constexpr (std::is_same_v<T, std::string>) {
str_field_data_ =
std::make_shared<std::vector<std::string_view>>(
segment.chunk_view<std::string_view>(field_id, 0)
.first);
str_field_data_ = std::make_shared<Span<std::string>>(
segment.chunk_data<std::string>(field_id, 0));
} else {
auto span = segment.chunk_data<T>(field_id, 0);
field_data_ = std::make_shared<Span<T>>(
Expand All @@ -94,9 +92,7 @@ class SealedDataGetter : public DataGetter<T> {
Get(int64_t idx) const {
if (field_data_ || str_field_data_) {
if constexpr (std::is_same_v<T, std::string>) {
std::string_view str_val_view =
str_field_data_->operator[](idx);
return std::string(str_val_view.data(), str_val_view.length());
return str_field_data_->data()[idx];
}
return field_data_->operator[](idx);
} else {
Expand Down Expand Up @@ -250,5 +246,4 @@ GroupIteratorResult(const std::shared_ptr<VectorIterator>& iterator,
std::vector<float>& distances,
const knowhere::MetricType& metrics_type);

} // namespace exec
} // namespace milvus
} // namespace milvus::exec
15 changes: 0 additions & 15 deletions internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -812,21 +812,6 @@ ChunkedSegmentSealedImpl::chunk_data_impl(FieldId field_id,
return field_data->get_span_base(0);
}

std::pair<std::vector<std::string_view>, FixedVector<bool>>
ChunkedSegmentSealedImpl::chunk_view_impl(FieldId field_id,
int64_t chunk_id) const {
std::shared_lock lck(mutex_);
AssertInfo(get_bit(field_data_ready_bitset_, field_id),
"Can't get bitset element at " + std::to_string(field_id.get()));
auto& field_meta = schema_->operator[](field_id);
if (auto it = fields_.find(field_id); it != fields_.end()) {
auto& field_data = it->second;
return field_data->StringViews(chunk_id);
}
PanicInfo(ErrorCode::UnexpectedError,
"chunk_view_impl only used for variable column field ");
}

const index::IndexBase*
ChunkedSegmentSealedImpl::chunk_index_impl(FieldId field_id,
int64_t chunk_id) const {
Expand Down
3 changes: 0 additions & 3 deletions internal/core/src/segcore/ChunkedSegmentSealedImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -203,9 +203,6 @@ class ChunkedSegmentSealedImpl : public SegmentSealed {
SpanBase
chunk_data_impl(FieldId field_id, int64_t chunk_id) const override;

std::pair<std::vector<std::string_view>, FixedVector<bool>>
chunk_view_impl(FieldId field_id, int64_t chunk_id) const override;

std::pair<BufferView, FixedVector<bool>>
get_chunk_buffer(FieldId field_id,
int64_t chunk_id,
Expand Down
28 changes: 14 additions & 14 deletions internal/core/src/segcore/SegmentChunkReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,10 @@ SegmentChunkReader::GetChunkDataAccessor<std::string>(
return chunk_data[current_chunk_pos++];
};
} else {
auto chunk_info =
segment_->chunk_view<std::string_view>(field_id, current_chunk_id);

auto span =
segment_->chunk_data<std::string>(field_id, current_chunk_id);
auto chunk_data = span.data();
auto chunk_valid_data = span.valid_data();
auto current_chunk_size =
segment_->chunk_size(field_id, current_chunk_id);
return [=,
Expand All @@ -133,15 +134,14 @@ SegmentChunkReader::GetChunkDataAccessor<std::string>(
if (current_chunk_pos >= current_chunk_size) {
current_chunk_id++;
current_chunk_pos = 0;
chunk_info = segment_->chunk_view<std::string_view>(
field_id, current_chunk_id);
auto span = segment_->chunk_data<std::string>(field_id,
current_chunk_id);
chunk_data = span.data();
chunk_valid_data = span.valid_data();
current_chunk_size =
segment_->chunk_size(field_id, current_chunk_id);
}
auto chunk_data = chunk_info.first;
auto chunk_valid_data = chunk_info.second;
if (current_chunk_pos < chunk_valid_data.size() &&
!chunk_valid_data[current_chunk_pos]) {
if (chunk_valid_data && !chunk_valid_data[current_chunk_pos]) {
current_chunk_pos++;
return std::nullopt;
}
Expand Down Expand Up @@ -250,11 +250,11 @@ SegmentChunkReader::GetChunkDataAccessor<std::string>(FieldId field_id,
};
} else {
auto chunk_info =
segment_->chunk_view<std::string_view>(field_id, chunk_id);
return [chunk_data = std::move(chunk_info.first),
chunk_valid_data = std::move(chunk_info.second)](
int i) -> const data_access_type {
if (i < chunk_valid_data.size() && !chunk_valid_data[i]) {
segment_->chunk_data<std::string_view>(field_id, chunk_id);
return [chunk_data = chunk_info.data(),
chunk_valid_data =
chunk_info.valid_data()](int i) -> const data_access_type {
if (chunk_valid_data && !chunk_valid_data[i]) {
return std::nullopt;
}
return std::string(chunk_data[i]);
Expand Down
6 changes: 0 additions & 6 deletions internal/core/src/segcore/SegmentGrowingImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -403,12 +403,6 @@ SegmentGrowingImpl::chunk_data_impl(FieldId field_id, int64_t chunk_id) const {
return get_insert_record().get_span_base(field_id, chunk_id);
}

std::pair<std::vector<std::string_view>, FixedVector<bool>>
SegmentGrowingImpl::chunk_view_impl(FieldId field_id, int64_t chunk_id) const {
PanicInfo(ErrorCode::NotImplemented,
"chunk view impl not implement for growing segment");
}

int64_t
SegmentGrowingImpl::num_chunk(FieldId field_id) const {
auto size = get_insert_record().ack_responder_.GetAck();
Expand Down
3 changes: 0 additions & 3 deletions internal/core/src/segcore/SegmentGrowingImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -341,9 +341,6 @@ class SegmentGrowingImpl : public SegmentGrowing {
SpanBase
chunk_data_impl(FieldId field_id, int64_t chunk_id) const override;

std::pair<std::vector<std::string_view>, FixedVector<bool>>
chunk_view_impl(FieldId field_id, int64_t chunk_id) const override;

std::pair<BufferView, FixedVector<bool>>
get_chunk_buffer(FieldId field_id,
int64_t chunk_id,
Expand Down
21 changes: 0 additions & 21 deletions internal/core/src/segcore/SegmentInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,23 +146,6 @@ class SegmentInternalInterface : public SegmentInterface {
return static_cast<Span<T>>(chunk_data_impl(field_id, chunk_id));
}

template <typename ViewType>
std::pair<std::vector<ViewType>, FixedVector<bool>>
chunk_view(FieldId field_id, int64_t chunk_id) const {
auto [string_views, valid_data] = chunk_view_impl(field_id, chunk_id);
if constexpr (std::is_same_v<ViewType, std::string_view>) {
return std::make_pair(std::move(string_views),
std::move(valid_data));
} else {
std::vector<ViewType> res;
res.reserve(string_views.size());
for (const auto& view : string_views) {
res.emplace_back(view);
}
return std::make_pair(res, valid_data);
}
}

template <typename ViewType>
std::pair<std::vector<ViewType>, FixedVector<bool>>
get_batch_views(FieldId field_id,
Expand Down Expand Up @@ -403,10 +386,6 @@ class SegmentInternalInterface : public SegmentInterface {
virtual SpanBase
chunk_data_impl(FieldId field_id, int64_t chunk_id) const = 0;

// internal API: return chunk string views in vector
virtual std::pair<std::vector<std::string_view>, FixedVector<bool>>
chunk_view_impl(FieldId field_id, int64_t chunk_id) const = 0;

// internal API: return buffer reference to field chunk data located from start_offset
virtual std::pair<BufferView, FixedVector<bool>>
get_chunk_buffer(FieldId field_id,
Expand Down
14 changes: 0 additions & 14 deletions internal/core/src/segcore/SegmentSealedImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -782,20 +782,6 @@ SegmentSealedImpl::chunk_data_impl(FieldId field_id, int64_t chunk_id) const {
return field_data->get_span_base(0);
}

std::pair<std::vector<std::string_view>, FixedVector<bool>>
SegmentSealedImpl::chunk_view_impl(FieldId field_id, int64_t chunk_id) const {
std::shared_lock lck(mutex_);
AssertInfo(get_bit(field_data_ready_bitset_, field_id),
"Can't get bitset element at " + std::to_string(field_id.get()));
auto& field_meta = schema_->operator[](field_id);
if (auto it = fields_.find(field_id); it != fields_.end()) {
auto& field_data = it->second;
return field_data->StringViews();
}
PanicInfo(ErrorCode::UnexpectedError,
"chunk_view_impl only used for variable column field ");
}

const index::IndexBase*
SegmentSealedImpl::chunk_index_impl(FieldId field_id, int64_t chunk_id) const {
AssertInfo(scalar_indexings_.find(field_id) != scalar_indexings_.end(),
Expand Down
3 changes: 0 additions & 3 deletions internal/core/src/segcore/SegmentSealedImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,9 +209,6 @@ class SegmentSealedImpl : public SegmentSealed {
SpanBase
chunk_data_impl(FieldId field_id, int64_t chunk_id) const override;

std::pair<std::vector<std::string_view>, FixedVector<bool>>
chunk_view_impl(FieldId field_id, int64_t chunk_id) const override;

std::pair<BufferView, FixedVector<bool>>
get_chunk_buffer(FieldId field_id,
int64_t chunk_id,
Expand Down

0 comments on commit 107982c

Please sign in to comment.