From 55ae720390b0da3f043c8b844f2e9bb188ade312 Mon Sep 17 00:00:00 2001 From: Ted Xu Date: Fri, 6 Dec 2024 18:21:35 +0800 Subject: [PATCH] support string type in span Signed-off-by: Ted Xu --- internal/core/src/common/Span.h | 20 +++++++++++++++++++ .../operator/groupby/SearchGroupByOperator.h | 10 ++++++---- internal/core/src/mmap/Column.h | 20 ++++--------------- .../core/src/segcore/SegmentChunkReader.cpp | 6 +++--- 4 files changed, 33 insertions(+), 23 deletions(-) diff --git a/internal/core/src/common/Span.h b/internal/core/src/common/Span.h index 3334b8b44e72e..afec85e26f027 100644 --- a/internal/core/src/common/Span.h +++ b/internal/core/src/common/Span.h @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include #include @@ -43,6 +44,24 @@ class SpanBase { element_sizeof_(element_sizeof) { } + explicit SpanBase(const void* data, + const bool* valid_data, + int64_t row_count, + int64_t element_sizeof, + bool owned_data) + : data_(data), + valid_data_(valid_data), + row_count_(row_count), + element_sizeof_(element_sizeof), + owned_data_(owned_data) { + } + + ~SpanBase() { + if (owned_data_) { + delete[] static_cast(data_); + } + } + int64_t row_count() const { return row_count_; @@ -65,6 +84,7 @@ class SpanBase { private: const void* data_; + const bool owned_data_ = false; const bool* valid_data_{nullptr}; int64_t row_count_; int64_t element_sizeof_; diff --git a/internal/core/src/exec/operator/groupby/SearchGroupByOperator.h b/internal/core/src/exec/operator/groupby/SearchGroupByOperator.h index 64c5087fd81c8..9c97b3a3e61e7 100644 --- a/internal/core/src/exec/operator/groupby/SearchGroupByOperator.h +++ b/internal/core/src/exec/operator/groupby/SearchGroupByOperator.h @@ -58,15 +58,15 @@ template class SealedDataGetter : public DataGetter { private: std::shared_ptr> field_data_; - std::shared_ptr> str_field_data_; + std::shared_ptr> str_field_data_; const index::ScalarIndex* field_index_; public: SealedDataGetter(const segcore::SegmentSealed& segment, FieldId& field_id) { if (segment.HasFieldData(field_id)) { if constexpr (std::is_same_v) { - str_field_data_ = std::make_shared>( - segment.chunk_data(field_id, 0)); + str_field_data_ = std::make_shared>( + segment.chunk_data(field_id, 0)); } else { auto span = segment.chunk_data(field_id, 0); field_data_ = std::make_shared>( @@ -92,7 +92,9 @@ class SealedDataGetter : public DataGetter { Get(int64_t idx) const { if (field_data_ || str_field_data_) { if constexpr (std::is_same_v) { - return str_field_data_->data()[idx]; + std::string_view str_val_view = + str_field_data_->operator[](idx); + return std::string(str_val_view.data(), str_val_view.length()); } return field_data_->operator[](idx); } else { diff --git a/internal/core/src/mmap/Column.h b/internal/core/src/mmap/Column.h index b50a582e90312..84f336657b8e8 100644 --- a/internal/core/src/mmap/Column.h +++ b/internal/core/src/mmap/Column.h @@ -317,12 +317,6 @@ class SingleChunkColumnBase : public ColumnBase { "GetBatchBuffer only supported for VariableColumn"); } - virtual std::pair, FixedVector> - StringViews() const { - PanicInfo(ErrorCode::Unsupported, - "StringViews only supported for VariableColumn"); - } - virtual void AppendBatch(const FieldDataPtr data) { size_t required_size = data_size_ + data->DataSize(); @@ -675,23 +669,17 @@ class SingleChunkVariableColumn : public SingleChunkColumnBase { SpanBase Span() const override { - PanicInfo(ErrorCode::NotImplemented, - "span() interface is not implemented for variable column"); - } - - std::pair, FixedVector> - StringViews() const override { - std::vector res; - res.reserve(num_rows_); + const auto res = new std::string_view[num_rows_]; char* pos = data_; for (size_t i = 0; i < num_rows_; ++i) { uint32_t size; size = *reinterpret_cast(pos); pos += sizeof(uint32_t); - res.emplace_back(std::string_view(pos, size)); + res[i] = std::string_view(pos, size); pos += size; } - return std::make_pair(res, valid_data_); + return SpanBase( + res, valid_data_.data(), num_rows_, sizeof(std::string_view), true); } [[nodiscard]] std::vector diff --git a/internal/core/src/segcore/SegmentChunkReader.cpp b/internal/core/src/segcore/SegmentChunkReader.cpp index 744c2df22396e..51a3498d50dac 100644 --- a/internal/core/src/segcore/SegmentChunkReader.cpp +++ b/internal/core/src/segcore/SegmentChunkReader.cpp @@ -123,7 +123,7 @@ SegmentChunkReader::GetChunkDataAccessor( }; } else { auto span = - segment_->chunk_data(field_id, current_chunk_id); + segment_->chunk_data(field_id, current_chunk_id); auto chunk_data = span.data(); auto chunk_valid_data = span.valid_data(); auto current_chunk_size = @@ -134,8 +134,8 @@ SegmentChunkReader::GetChunkDataAccessor( if (current_chunk_pos >= current_chunk_size) { current_chunk_id++; current_chunk_pos = 0; - auto span = segment_->chunk_data(field_id, - current_chunk_id); + auto span = segment_->chunk_data( + field_id, current_chunk_id); chunk_data = span.data(); chunk_valid_data = span.valid_data(); current_chunk_size =