Skip to content

Commit

Permalink
support string type in span
Browse files Browse the repository at this point in the history
Signed-off-by: Ted Xu <[email protected]>
  • Loading branch information
tedxu committed Dec 6, 2024
1 parent 107982c commit c12738b
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 23 deletions.
20 changes: 20 additions & 0 deletions internal/core/src/common/Span.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#pragma once

#include <cassert>
#include <memory>
#include <string>
#include <string_view>
#include <type_traits>
Expand All @@ -43,6 +44,24 @@ class SpanBase {
element_sizeof_(element_sizeof) {
}

explicit SpanBase(const void* data,
const bool* valid_data,
int64_t row_count,
int64_t element_sizeof,
bool owned_data)
: data_(data),
valid_data_(valid_data),
row_count_(row_count),
element_sizeof_(element_sizeof),
owned_data_(owned_data) {
}

~SpanBase() {
if (owned_data_) {
delete[] static_cast<const char*>(data_);
}
}

int64_t
row_count() const {
return row_count_;
Expand All @@ -65,6 +84,7 @@ class SpanBase {

private:
const void* data_;
const bool owned_data_ = false;
const bool* valid_data_{nullptr};
int64_t row_count_;
int64_t element_sizeof_;
Expand Down
10 changes: 6 additions & 4 deletions internal/core/src/exec/operator/groupby/SearchGroupByOperator.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,15 @@ template <typename T>
class SealedDataGetter : public DataGetter<T> {
private:
std::shared_ptr<Span<T>> field_data_;
std::shared_ptr<Span<std::string>> str_field_data_;
std::shared_ptr<Span<std::string_view>> str_field_data_;
const index::ScalarIndex<T>* field_index_;

public:
SealedDataGetter(const segcore::SegmentSealed& segment, FieldId& field_id) {
if (segment.HasFieldData(field_id)) {
if constexpr (std::is_same_v<T, std::string>) {
str_field_data_ = std::make_shared<Span<std::string>>(
segment.chunk_data<std::string>(field_id, 0));
str_field_data_ = std::make_shared<Span<std::string_view>>(
segment.chunk_data<std::string_view>(field_id, 0));
} else {
auto span = segment.chunk_data<T>(field_id, 0);
field_data_ = std::make_shared<Span<T>>(
Expand All @@ -92,7 +92,9 @@ class SealedDataGetter : public DataGetter<T> {
Get(int64_t idx) const {
if (field_data_ || str_field_data_) {
if constexpr (std::is_same_v<T, std::string>) {
return str_field_data_->data()[idx];
std::string_view str_val_view =
str_field_data_->operator[](idx);
return std::string(str_val_view.data(), str_val_view.length());
}
return field_data_->operator[](idx);
} else {
Expand Down
20 changes: 4 additions & 16 deletions internal/core/src/mmap/Column.h
Original file line number Diff line number Diff line change
Expand Up @@ -317,12 +317,6 @@ class SingleChunkColumnBase : public ColumnBase {
"GetBatchBuffer only supported for VariableColumn");
}

virtual std::pair<std::vector<std::string_view>, FixedVector<bool>>
StringViews() const {
PanicInfo(ErrorCode::Unsupported,
"StringViews only supported for VariableColumn");
}

virtual void
AppendBatch(const FieldDataPtr data) {
size_t required_size = data_size_ + data->DataSize();
Expand Down Expand Up @@ -675,23 +669,17 @@ class SingleChunkVariableColumn : public SingleChunkColumnBase {

SpanBase
Span() const override {
PanicInfo(ErrorCode::NotImplemented,
"span() interface is not implemented for variable column");
}

std::pair<std::vector<std::string_view>, FixedVector<bool>>
StringViews() const override {
std::vector<std::string_view> res;
res.reserve(num_rows_);
const auto res = new std::string_view[num_rows_];
char* pos = data_;
for (size_t i = 0; i < num_rows_; ++i) {
uint32_t size;
size = *reinterpret_cast<uint32_t*>(pos);
pos += sizeof(uint32_t);
res.emplace_back(std::string_view(pos, size));
res[i] = std::string_view(pos, size);
pos += size;
}
return std::make_pair(res, valid_data_);
return SpanBase(
res, valid_data_.data(), num_rows_, sizeof(std::string_view), true);
}

[[nodiscard]] std::vector<ViewType>
Expand Down
6 changes: 3 additions & 3 deletions internal/core/src/segcore/SegmentChunkReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ SegmentChunkReader::GetChunkDataAccessor<std::string>(
};
} else {
auto span =
segment_->chunk_data<std::string>(field_id, current_chunk_id);
segment_->chunk_data<std::string_view>(field_id, current_chunk_id);
auto chunk_data = span.data();
auto chunk_valid_data = span.valid_data();
auto current_chunk_size =
Expand All @@ -134,8 +134,8 @@ SegmentChunkReader::GetChunkDataAccessor<std::string>(
if (current_chunk_pos >= current_chunk_size) {
current_chunk_id++;
current_chunk_pos = 0;
auto span = segment_->chunk_data<std::string>(field_id,
current_chunk_id);
auto span = segment_->chunk_data<std::string_view>(
field_id, current_chunk_id);
chunk_data = span.data();
chunk_valid_data = span.valid_data();
current_chunk_size =
Expand Down

0 comments on commit c12738b

Please sign in to comment.