diff --git a/internal/core/src/common/Span.h b/internal/core/src/common/Span.h index cc6cbf2b727ad..3334b8b44e72e 100644 --- a/internal/core/src/common/Span.h +++ b/internal/core/src/common/Span.h @@ -33,6 +33,15 @@ class SpanBase { int64_t element_sizeof) : data_(data), row_count_(row_count), element_sizeof_(element_sizeof) { } + explicit SpanBase(const void* data, + const bool* valid_data, + int64_t row_count, + int64_t element_sizeof) + : data_(data), + valid_data_(valid_data), + row_count_(row_count), + element_sizeof_(element_sizeof) { + } int64_t row_count() const { @@ -49,8 +58,14 @@ class SpanBase { return data_; } + const bool* + valid_data() const { + return valid_data_; + } + private: const void* data_; + const bool* valid_data_{nullptr}; int64_t row_count_; int64_t element_sizeof_; }; @@ -65,20 +80,22 @@ class Span>> { public: using embedded_type = T; - explicit Span(const T* data, int64_t row_count) - : data_(data), row_count_(row_count) { + explicit Span(const T* data, const bool* valid_data, int64_t row_count) + : data_(data), valid_data_(valid_data), row_count_(row_count) { } - explicit Span(std::string_view data) { - Span(data.data(), data.size()); + explicit Span(std::string_view data, bool* valid_data) { + Span(data.data(), valid_data, data.size()); } operator SpanBase() const { - return SpanBase(data_, row_count_, sizeof(T)); + return SpanBase(data_, valid_data_, row_count_, sizeof(T)); } explicit Span(const SpanBase& base) - : Span(reinterpret_cast(base.data()), base.row_count()) { + : Span(reinterpret_cast(base.data()), + base.valid_data(), + base.row_count()) { assert(base.element_sizeof() == sizeof(T)); } @@ -92,6 +109,11 @@ class Span(FieldId field_id, return [chunk_data](int i) -> const number { return chunk_data[i]; }; } else { auto chunk_data = - segment_->chunk_view(field_id, chunk_id).data(); + segment_->chunk_view(field_id, chunk_id) + .first.data(); return [chunk_data](int i) -> const number { return std::string(chunk_data[i]); }; diff --git a/internal/core/src/exec/expression/Expr.h b/internal/core/src/exec/expression/Expr.h index faaedbea259fb..aa0d70731edf5 100644 --- a/internal/core/src/exec/expression/Expr.h +++ b/internal/core/src/exec/expression/Expr.h @@ -206,8 +206,11 @@ class SegmentExpr : public Expr { auto& skip_index = segment_->GetSkipIndex(); if (!skip_func || !skip_func(skip_index, field_id_, 0)) { - auto data_vec = segment_->get_batch_views( - field_id_, 0, current_data_chunk_pos_, need_size); + auto data_vec = + segment_ + ->get_batch_views( + field_id_, 0, current_data_chunk_pos_, need_size) + .first; func(data_vec.data(), need_size, res, values...); } diff --git a/internal/core/src/mmap/ChunkVector.h b/internal/core/src/mmap/ChunkVector.h index 49377217ecc8b..ed9ec7c0cee02 100644 --- a/internal/core/src/mmap/ChunkVector.h +++ b/internal/core/src/mmap/ChunkVector.h @@ -34,6 +34,10 @@ class ChunkVectorBase { get_chunk_size(int64_t index) = 0; virtual Type get_element(int64_t chunk_id, int64_t chunk_offset) = 0; + virtual int64_t + get_element_size() = 0; + virtual int64_t + get_element_offset(int64_t index) = 0; virtual ChunkViewType view_element(int64_t chunk_id, int64_t chunk_offset) = 0; int64_t @@ -166,6 +170,25 @@ class ThreadSafeChunkVector : public ChunkVectorBase { vec_.clear(); } + int64_t + get_element_size() override { + std::shared_lock lck(mutex_); + if constexpr (IsMmap && std::is_same_v) { + return sizeof(ChunkViewType); + } + return sizeof(Type); + } + + int64_t + get_element_offset(int64_t index) override { + std::shared_lock lck(mutex_); + int64_t offset = 0; + for (int i = 0; i < index - 1; i++) { + offset += vec_[i].size(); + } + return offset; + } + SpanBase get_span(int64_t chunk_id) override { std::shared_lock lck(mutex_); diff --git a/internal/core/src/mmap/Column.h b/internal/core/src/mmap/Column.h index d7e7158c82537..a6c309f472529 100644 --- a/internal/core/src/mmap/Column.h +++ b/internal/core/src/mmap/Column.h @@ -72,6 +72,10 @@ class ColumnBase { SetPaddingSize(data_type); if (IsVariableDataType(data_type)) { + if (field_meta.is_nullable()) { + nullable_ = true; + valid_data_.reserve(reserve); + } return; } @@ -214,7 +218,7 @@ class ColumnBase { ColumnBase(ColumnBase&& column) noexcept : data_(column.data_), nullable_(column.nullable_), - valid_data_(column.valid_data_), + valid_data_(std::move(column.valid_data_)), padding_(column.padding_), type_size_(column.type_size_), num_rows_(column.num_rows_), @@ -282,7 +286,7 @@ class ColumnBase { "GetBatchBuffer only supported for VariableColumn"); } - virtual std::vector + virtual std::pair, FixedVector> StringViews() const { PanicInfo(ErrorCode::Unsupported, "StringViews only supported for VariableColumn"); @@ -519,7 +523,8 @@ class Column : public ColumnBase { SpanBase Span() const override { - return SpanBase(data_, num_rows_, data_cap_size_ / num_rows_); + return SpanBase( + data_, valid_data_.data(), num_rows_, data_cap_size_ / num_rows_); } }; @@ -681,7 +686,7 @@ class VariableColumn : public ColumnBase { "span() interface is not implemented for variable column"); } - std::vector + std::pair, FixedVector> StringViews() const override { std::vector res; char* pos = data_; @@ -692,7 +697,7 @@ class VariableColumn : public ColumnBase { res.emplace_back(std::string_view(pos, size)); pos += size; } - return res; + return std::make_pair(res, valid_data_); } [[nodiscard]] std::vector @@ -861,7 +866,10 @@ class ArrayColumn : public ColumnBase { SpanBase Span() const override { - return SpanBase(views_.data(), views_.size(), sizeof(ArrayView)); + return SpanBase(views_.data(), + valid_data_.data(), + views_.size(), + sizeof(ArrayView)); } [[nodiscard]] const std::vector& @@ -885,8 +893,8 @@ class ArrayColumn : public ColumnBase { element_indices_.emplace_back(array.get_offsets()); if (nullable_) { return ColumnBase::Append(static_cast(array.data()), - array.byte_size(), - valid_data); + valid_data, + array.byte_size()); } ColumnBase::Append(static_cast(array.data()), array.byte_size()); diff --git a/internal/core/src/query/groupby/SearchGroupByOperator.h b/internal/core/src/query/groupby/SearchGroupByOperator.h index dc3d9d6777cea..dfc51d318ebc6 100644 --- a/internal/core/src/query/groupby/SearchGroupByOperator.h +++ b/internal/core/src/query/groupby/SearchGroupByOperator.h @@ -68,11 +68,12 @@ class SealedDataGetter : public DataGetter { if constexpr (std::is_same_v) { str_field_data_ = std::make_shared>( - segment.chunk_view(field_id, 0)); + segment.chunk_view(field_id, 0) + .first); } else { auto span = segment.chunk_data(field_id, 0); - field_data_ = - std::make_shared>(span.data(), span.row_count()); + field_data_ = std::make_shared>( + span.data(), span.valid_data(), span.row_count()); } } else if (segment.HasIndex(field_id)) { this->field_index_ = &(segment.chunk_scalar_index(field_id, 0)); diff --git a/internal/core/src/segcore/ConcurrentVector.h b/internal/core/src/segcore/ConcurrentVector.h index 37167f232a3b4..740c88d0ac82b 100644 --- a/internal/core/src/segcore/ConcurrentVector.h +++ b/internal/core/src/segcore/ConcurrentVector.h @@ -128,6 +128,12 @@ class VectorBase { virtual int64_t get_chunk_size(ssize_t chunk_index) const = 0; + virtual int64_t + get_element_size() const = 0; + + virtual int64_t + get_element_offset(ssize_t chunk_index) const = 0; + virtual ssize_t num_chunk() const = 0; @@ -245,6 +251,26 @@ class ConcurrentVectorImpl : public VectorBase { return chunks_ptr_->get_chunk_size(chunk_index); } + int64_t + get_element_size() const override { + if constexpr (is_type_entire_row) { + return chunks_ptr_->get_element_size(); + } else if constexpr (std::is_same_v || // NOLINT + std::is_same_v) { + // only for testing + PanicInfo(NotImplemented, "unimplemented"); + } else { + static_assert( + std::is_same_v); + return elements_per_row_; + } + } + + int64_t + get_element_offset(ssize_t chunk_index) const override { + return chunks_ptr_->get_element_offset(chunk_index); + } + // just for fun, don't use it directly const Type* get_element(ssize_t element_index) const { diff --git a/internal/core/src/segcore/InsertRecord.h b/internal/core/src/segcore/InsertRecord.h index 0f6c231393e64..d590e3dfb731c 100644 --- a/internal/core/src/segcore/InsertRecord.h +++ b/internal/core/src/segcore/InsertRecord.h @@ -460,6 +460,13 @@ class ThreadSafeValidData { return data_[offset]; } + bool* + get_chunk_data(size_t offset) { + std::shared_lock lck(mutex_); + Assert(offset < length_); + return &data_[offset]; + } + private: mutable std::shared_mutex mutex_{}; FixedVector data_; @@ -770,10 +777,30 @@ struct InsertRecord { } bool - is_valid_data_exist(FieldId field_id) { + is_data_exist(FieldId field_id) const { + return data_.find(field_id) != data_.end(); + } + + bool + is_valid_data_exist(FieldId field_id) const { return valid_data_.find(field_id) != valid_data_.end(); } + SpanBase + get_span_base(FieldId field_id, int64_t chunk_id) const { + auto data = get_data_base(field_id); + if (is_valid_data_exist(field_id)) { + auto size = data->get_chunk_size(chunk_id); + auto element_offset = data->get_element_offset(chunk_id); + return SpanBase( + data->get_chunk_data(chunk_id), + get_valid_data(field_id)->get_chunk_data(element_offset), + size, + data->get_element_size()); + } + return data->get_span_base(chunk_id); + } + // append a column of scalar or sparse float vector type template void diff --git a/internal/core/src/segcore/SegmentGrowingImpl.cpp b/internal/core/src/segcore/SegmentGrowingImpl.cpp index dabe2f5029a57..c6cc0fa35b86d 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.cpp +++ b/internal/core/src/segcore/SegmentGrowingImpl.cpp @@ -345,11 +345,10 @@ SegmentGrowingImpl::LoadDeletedRecord(const LoadDeletedRecordInfo& info) { SpanBase SegmentGrowingImpl::chunk_data_impl(FieldId field_id, int64_t chunk_id) const { - auto vec = get_insert_record().get_data_base(field_id); - return vec->get_span_base(chunk_id); + return get_insert_record().get_span_base(field_id, chunk_id); } -std::vector +std::pair, FixedVector> SegmentGrowingImpl::chunk_view_impl(FieldId field_id, int64_t chunk_id) const { PanicInfo(ErrorCode::NotImplemented, "chunk view impl not implement for growing segment"); diff --git a/internal/core/src/segcore/SegmentGrowingImpl.h b/internal/core/src/segcore/SegmentGrowingImpl.h index 37eaff6cb4710..8715ca488f8cf 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.h +++ b/internal/core/src/segcore/SegmentGrowingImpl.h @@ -76,6 +76,14 @@ class SegmentGrowingImpl : public SegmentGrowing { return id_; } + bool + is_nullable(FieldId field_id) const override { + AssertInfo(insert_record_.is_data_exist(field_id), + "Cannot find field_data with field_id: " + + std::to_string(field_id.get())); + return insert_record_.is_valid_data_exist(field_id); + }; + public: const InsertRecord<>& get_insert_record() const { @@ -318,10 +326,10 @@ class SegmentGrowingImpl : public SegmentGrowing { SpanBase chunk_data_impl(FieldId field_id, int64_t chunk_id) const override; - std::vector + std::pair, FixedVector> chunk_view_impl(FieldId field_id, int64_t chunk_id) const override; - BufferView + std::pair> get_chunk_buffer(FieldId field_id, int64_t chunk_id, int64_t start_offset, diff --git a/internal/core/src/segcore/SegmentInterface.h b/internal/core/src/segcore/SegmentInterface.h index 958fe73127c37..7d2bbab6dd95d 100644 --- a/internal/core/src/segcore/SegmentInterface.h +++ b/internal/core/src/segcore/SegmentInterface.h @@ -126,6 +126,9 @@ class SegmentInterface { virtual bool HasRawData(int64_t field_id) const = 0; + + virtual bool + is_nullable(FieldId field_id) const = 0; }; // internal API for DSL calculation @@ -139,23 +142,26 @@ class SegmentInternalInterface : public SegmentInterface { } template - std::vector + std::pair, FixedVector> chunk_view(FieldId field_id, int64_t chunk_id) const { - auto string_views = chunk_view_impl(field_id, chunk_id); + auto chunk_info = chunk_view_impl(field_id, chunk_id); + auto string_views = chunk_info.first; + auto valid_data = chunk_info.second; if constexpr (std::is_same_v) { - return std::move(string_views); + return std::make_pair(std::move(string_views), + std::move(valid_data)); } else { std::vector res; res.reserve(string_views.size()); for (const auto& view : string_views) { res.emplace_back(view); } - return res; + return std::make_pair(res, valid_data); } } template - std::vector + std::pair, FixedVector> get_batch_views(FieldId field_id, int64_t chunk_id, int64_t start_offset, @@ -164,8 +170,9 @@ class SegmentInternalInterface : public SegmentInterface { PanicInfo(ErrorCode::Unsupported, "get chunk views not supported for growing segment"); } - BufferView buffer = + auto chunk_info = get_chunk_buffer(field_id, chunk_id, start_offset, length); + BufferView buffer = chunk_info.first; std::vector res; res.reserve(length); char* pos = buffer.data_; @@ -176,7 +183,7 @@ class SegmentInternalInterface : public SegmentInterface { res.emplace_back(ViewType(pos, size)); pos += size; } - return res; + return std::make_pair(res, chunk_info.second); } template @@ -352,16 +359,17 @@ class SegmentInternalInterface : public SegmentInterface { is_mmap_field(FieldId field_id) const = 0; protected: + // todo: use an Unified struct for all type in growing/seal segment to store data and valid_data. // internal API: return chunk_data in span virtual SpanBase chunk_data_impl(FieldId field_id, int64_t chunk_id) const = 0; // internal API: return chunk string views in vector - virtual std::vector + virtual std::pair, FixedVector> chunk_view_impl(FieldId field_id, int64_t chunk_id) const = 0; // internal API: return buffer reference to field chunk data located from start_offset - virtual BufferView + virtual std::pair> get_chunk_buffer(FieldId field_id, int64_t chunk_id, int64_t start_offset, diff --git a/internal/core/src/segcore/SegmentSealedImpl.cpp b/internal/core/src/segcore/SegmentSealedImpl.cpp index 3d1c215b753e3..e8878ea6979bb 100644 --- a/internal/core/src/segcore/SegmentSealedImpl.cpp +++ b/internal/core/src/segcore/SegmentSealedImpl.cpp @@ -644,7 +644,7 @@ SegmentSealedImpl::size_per_chunk() const { return get_row_count(); } -BufferView +std::pair> SegmentSealedImpl::get_chunk_buffer(FieldId field_id, int64_t chunk_id, int64_t start_offset, @@ -655,7 +655,15 @@ SegmentSealedImpl::get_chunk_buffer(FieldId field_id, auto& field_meta = schema_->operator[](field_id); if (auto it = fields_.find(field_id); it != fields_.end()) { auto& field_data = it->second; - return field_data->GetBatchBuffer(start_offset, length); + FixedVector valid_data; + if (field_data->IsNullable()) { + valid_data.reserve(length); + for (int i = 0; i < length; i++) { + valid_data.push_back(field_data->IsValid(start_offset + i)); + } + } + return std::make_pair(field_data->GetBatchBuffer(start_offset, length), + valid_data); } PanicInfo(ErrorCode::UnexpectedError, "get_chunk_buffer only used for variable column field"); @@ -680,10 +688,11 @@ SegmentSealedImpl::chunk_data_impl(FieldId field_id, int64_t chunk_id) const { auto field_data = insert_record_.get_data_base(field_id); AssertInfo(field_data->num_chunk() == 1, "num chunk not equal to 1 for sealed segment"); + // system field return field_data->get_span_base(0); } -std::vector +std::pair, FixedVector> SegmentSealedImpl::chunk_view_impl(FieldId field_id, int64_t chunk_id) const { std::shared_lock lck(mutex_); AssertInfo(get_bit(field_data_ready_bitset_, field_id), diff --git a/internal/core/src/segcore/SegmentSealedImpl.h b/internal/core/src/segcore/SegmentSealedImpl.h index 2059ba1c6d513..cf4340cc24071 100644 --- a/internal/core/src/segcore/SegmentSealedImpl.h +++ b/internal/core/src/segcore/SegmentSealedImpl.h @@ -117,6 +117,15 @@ class SegmentSealedImpl : public SegmentSealed { return insert_record_.search_pk(pk, ts); } + bool + is_nullable(FieldId field_id) const override { + auto it = fields_.find(field_id); + AssertInfo(it != fields_.end(), + "Cannot find field with field_id: " + + std::to_string(field_id.get())); + return it->second->IsNullable(); + }; + public: int64_t num_chunk_index(FieldId field_id) const override; @@ -167,10 +176,10 @@ class SegmentSealedImpl : public SegmentSealed { SpanBase chunk_data_impl(FieldId field_id, int64_t chunk_id) const override; - std::vector + std::pair, FixedVector> chunk_view_impl(FieldId field_id, int64_t chunk_id) const override; - BufferView + std::pair> get_chunk_buffer(FieldId field_id, int64_t chunk_id, int64_t start_offset, diff --git a/internal/core/unittest/test_common.cpp b/internal/core/unittest/test_common.cpp index 73bb0a5ecf296..581a07e50600a 100644 --- a/internal/core/unittest/test_common.cpp +++ b/internal/core/unittest/test_common.cpp @@ -19,7 +19,7 @@ TEST(Common, Span) { using namespace milvus; using namespace milvus::segcore; - Span s1(nullptr, 100); + Span s1(nullptr, nullptr, 100); Span s2(nullptr, 10, 16 * sizeof(float)); SpanBase b1 = s1; SpanBase b2 = s2; diff --git a/internal/core/unittest/test_sealed.cpp b/internal/core/unittest/test_sealed.cpp index f4fbec25c024a..5494e0bfcf552 100644 --- a/internal/core/unittest/test_sealed.cpp +++ b/internal/core/unittest/test_sealed.cpp @@ -408,6 +408,20 @@ TEST(Sealed, LoadFieldData) { schema->AddDebugField("json", DataType::JSON); schema->AddDebugField("array", DataType::ARRAY, DataType::INT64); schema->set_primary_field_id(counter_id); + auto int8_nullable_id = + schema->AddDebugField("int8_null", DataType::INT8, true); + auto int16_nullable_id = + schema->AddDebugField("int16_null", DataType::INT16, true); + auto int32_nullable_id = + schema->AddDebugField("int32_null", DataType::INT32, true); + auto int64_nullable_id = + schema->AddDebugField("int64_null", DataType::INT64, true); + auto double_nullable_id = + schema->AddDebugField("double_null", DataType::DOUBLE, true); + auto str_nullable_id = + schema->AddDebugField("str_null", DataType::VARCHAR, true); + auto float_nullable_id = + schema->AddDebugField("float_null", DataType::FLOAT, true); auto dataset = DataGen(schema, N); @@ -500,13 +514,49 @@ TEST(Sealed, LoadFieldData) { auto chunk_span2 = segment->chunk_data(double_id, 0); auto chunk_span3 = segment->get_batch_views(str_id, 0, 0, N); + auto chunk_span4 = segment->chunk_data(int8_nullable_id, 0); + auto chunk_span5 = segment->chunk_data(int16_nullable_id, 0); + auto chunk_span6 = segment->chunk_data(int32_nullable_id, 0); + auto chunk_span7 = segment->chunk_data(int64_nullable_id, 0); + auto chunk_span8 = segment->chunk_data(double_nullable_id, 0); + auto chunk_span9 = + segment->get_batch_views(str_nullable_id, 0, 0, N); + auto ref1 = dataset.get_col(counter_id); auto ref2 = dataset.get_col(double_id); auto ref3 = dataset.get_col(str_id)->scalars().string_data().data(); + auto ref4 = dataset.get_col(int8_nullable_id); + auto ref5 = dataset.get_col(int16_nullable_id); + auto ref6 = dataset.get_col(int32_nullable_id); + auto ref7 = dataset.get_col(int64_nullable_id); + auto ref8 = dataset.get_col(double_nullable_id); + auto ref9 = + dataset.get_col(str_nullable_id)->scalars().string_data().data(); + auto valid4 = dataset.get_col_valid(int8_nullable_id); + auto valid5 = dataset.get_col_valid(int16_nullable_id); + auto valid6 = dataset.get_col_valid(int32_nullable_id); + auto valid7 = dataset.get_col_valid(int64_nullable_id); + auto valid8 = dataset.get_col_valid(double_nullable_id); + auto valid9 = dataset.get_col_valid(str_nullable_id); + ASSERT_EQ(chunk_span1.valid_data(), nullptr); + ASSERT_EQ(chunk_span2.valid_data(), nullptr); + ASSERT_EQ(chunk_span3.second.size(), 0); for (int i = 0; i < N; ++i) { - ASSERT_EQ(chunk_span1[i], ref1[i]); - ASSERT_EQ(chunk_span2[i], ref2[i]); - ASSERT_EQ(chunk_span3[i], ref3[i]); + ASSERT_EQ(chunk_span1.data()[i], ref1[i]); + ASSERT_EQ(chunk_span2.data()[i], ref2[i]); + ASSERT_EQ(chunk_span3.first[i], ref3[i]); + ASSERT_EQ(chunk_span4.data()[i], ref4[i]); + ASSERT_EQ(chunk_span5.data()[i], ref5[i]); + ASSERT_EQ(chunk_span6.data()[i], ref6[i]); + ASSERT_EQ(chunk_span7.data()[i], ref7[i]); + ASSERT_EQ(chunk_span8.data()[i], ref8[i]); + ASSERT_EQ(chunk_span9.first[i], ref9[i]); + ASSERT_EQ(chunk_span4.valid_data()[i], valid4[i]); + ASSERT_EQ(chunk_span5.valid_data()[i], valid5[i]); + ASSERT_EQ(chunk_span6.valid_data()[i], valid6[i]); + ASSERT_EQ(chunk_span7.valid_data()[i], valid7[i]); + ASSERT_EQ(chunk_span8.valid_data()[i], valid8[i]); + ASSERT_EQ(chunk_span9.second[i], valid9[i]); } auto sr = segment->Search(plan.get(), ph_group.get(), timestamp); @@ -630,10 +680,11 @@ TEST(Sealed, ClearData) { auto ref1 = dataset.get_col(counter_id); auto ref2 = dataset.get_col(double_id); auto ref3 = dataset.get_col(str_id)->scalars().string_data().data(); + ASSERT_EQ(chunk_span3.second.size(), 0); for (int i = 0; i < N; ++i) { ASSERT_EQ(chunk_span1[i], ref1[i]); ASSERT_EQ(chunk_span2[i], ref2[i]); - ASSERT_EQ(chunk_span3[i], ref3[i]); + ASSERT_EQ(chunk_span3.first[i], ref3[i]); } auto sr = segment->Search(plan.get(), ph_group.get(), timestamp); @@ -733,10 +784,11 @@ TEST(Sealed, LoadFieldDataMmap) { auto ref1 = dataset.get_col(counter_id); auto ref2 = dataset.get_col(double_id); auto ref3 = dataset.get_col(str_id)->scalars().string_data().data(); + ASSERT_EQ(chunk_span3.second.size(), 0); for (int i = 0; i < N; ++i) { ASSERT_EQ(chunk_span1[i], ref1[i]); ASSERT_EQ(chunk_span2[i], ref2[i]); - ASSERT_EQ(chunk_span3[i], ref3[i]); + ASSERT_EQ(chunk_span3.first[i], ref3[i]); } auto sr = segment->Search(plan.get(), ph_group.get(), timestamp); diff --git a/internal/core/unittest/test_span.cpp b/internal/core/unittest/test_span.cpp index 7c5e29c14e5fc..f0cca40d0b858 100644 --- a/internal/core/unittest/test_span.cpp +++ b/internal/core/unittest/test_span.cpp @@ -29,6 +29,8 @@ TEST(Span, Naive) { auto float_vec_fid = schema->AddDebugField( "floatvec", DataType::VECTOR_FLOAT, 32, knowhere::metric::L2); auto i64_fid = schema->AddDebugField("counter", DataType::INT64); + auto nullable_fid = + schema->AddDebugField("nullable", DataType::INT64, true); schema->set_primary_field_id(i64_fid); auto dataset = DataGen(schema, N); @@ -42,6 +44,8 @@ TEST(Span, Naive) { auto vec_ptr = dataset.get_col(bin_vec_fid); auto age_ptr = dataset.get_col(float_fid); auto float_ptr = dataset.get_col(float_vec_fid); + auto nullable_data_ptr = dataset.get_col(nullable_fid); + auto nullable_valid_data_ptr = dataset.get_col_valid(nullable_fid); auto num_chunk = segment->num_chunk(); ASSERT_EQ(num_chunk, upper_div(N, size_per_chunk)); auto row_count = segment->get_row_count(); @@ -52,9 +56,12 @@ TEST(Span, Naive) { auto age_span = segment->chunk_data(float_fid, chunk_id); auto float_span = segment->chunk_data(float_vec_fid, chunk_id); + auto null_field_span = + segment->chunk_data(nullable_fid, chunk_id); auto begin = chunk_id * size_per_chunk; auto end = std::min((chunk_id + 1) * size_per_chunk, N); auto size_of_chunk = end - begin; + ASSERT_EQ(age_span.valid_data(), nullptr); for (int i = 0; i < size_of_chunk * 512 / 8; ++i) { ASSERT_EQ(vec_span.data()[i], vec_ptr[i + begin * 512 / 8]); } @@ -64,5 +71,12 @@ TEST(Span, Naive) { for (int i = 0; i < size_of_chunk; ++i) { ASSERT_EQ(float_span.data()[i], float_ptr[i + begin * 32]); } + for (int i = 0; i < size_of_chunk; ++i) { + ASSERT_EQ(null_field_span.data()[i], nullable_data_ptr[i + begin]); + } + for (int i = 0; i < size_of_chunk; ++i) { + ASSERT_EQ(null_field_span.valid_data()[i], + nullable_valid_data_ptr[i + begin]); + } } -} +} \ No newline at end of file