diff --git a/configs/milvus.yaml b/configs/milvus.yaml index f67fbb16be318..55939c50098da 100644 --- a/configs/milvus.yaml +++ b/configs/milvus.yaml @@ -400,6 +400,7 @@ queryNode: nprobe: 16 # nprobe to search small index, based on your accuracy requirement, must smaller than nlist memExpansionRate: 1.15 # extra memory needed by building interim index buildParallelRate: 0.5 # the ratio of building interim index parallel matched with cpu num + multipleChunkedEnable: false # Enable multiple chunked search knowhereScoreConsistency: false # Enable knowhere strong consistency score computation logic loadMemoryUsageFactor: 1 # The multiply factor of calculating the memory usage while loading segments enableDisk: false # enable querynode load disk index, and search on disk index diff --git a/internal/core/src/bitset/detail/element_wise.h b/internal/core/src/bitset/detail/element_wise.h index 62e49b5a93ae1..120657908cab6 100644 --- a/internal/core/src/bitset/detail/element_wise.h +++ b/internal/core/src/bitset/detail/element_wise.h @@ -25,6 +25,7 @@ #include "ctz.h" #include "popcount.h" +#include "bitset/common.h" namespace milvus { namespace bitset { namespace detail { diff --git a/internal/core/src/common/Chunk.cpp b/internal/core/src/common/Chunk.cpp index 8e957afd18748..6032c6b930d9c 100644 --- a/internal/core/src/common/Chunk.cpp +++ b/internal/core/src/common/Chunk.cpp @@ -18,15 +18,13 @@ namespace milvus { -std::vector -StringChunk::StringViews() const { +std::pair, FixedVector> +StringChunk::StringViews() { std::vector ret; - for (int i = 0; i < row_nums_ - 1; i++) { + for (int i = 0; i < row_nums_; i++) { ret.emplace_back(data_ + offsets_[i], offsets_[i + 1] - offsets_[i]); } - ret.emplace_back(data_ + offsets_[row_nums_ - 1], - size_ - MMAP_STRING_PADDING - offsets_[row_nums_ - 1]); - return ret; + return {ret, valid_}; } void @@ -34,20 +32,22 @@ ArrayChunk::ConstructViews() { views_.reserve(row_nums_); for (int i = 0; i < row_nums_; ++i) { - auto data_ptr = data_ + offsets_[i]; - auto next_data_ptr = i == row_nums_ - 1 - ? data_ + size_ - MMAP_ARRAY_PADDING - : data_ + offsets_[i + 1]; - auto offsets_len = lens_[i] * sizeof(uint64_t); + int offset = offsets_lens_[2 * i]; + int next_offset = offsets_lens_[2 * (i + 1)]; + int len = offsets_lens_[2 * i + 1]; + + auto data_ptr = data_ + offset; + auto offsets_len = 0; std::vector element_indices = {}; if (IsStringDataType(element_type_)) { + offsets_len = len * sizeof(uint64_t); std::vector tmp( reinterpret_cast(data_ptr), reinterpret_cast(data_ptr + offsets_len)); element_indices = std::move(tmp); } views_.emplace_back(data_ptr + offsets_len, - next_data_ptr - data_ptr - offsets_len, + next_offset - offset - offsets_len, element_type_, std::move(element_indices)); } @@ -55,7 +55,10 @@ ArrayChunk::ConstructViews() { SpanBase ArrayChunk::Span() const { - return SpanBase(views_.data(), views_.size(), sizeof(ArrayView)); + return SpanBase(views_.data(), + nullable_ ? valid_.data() : nullptr, + views_.size(), + sizeof(ArrayView)); } } // namespace milvus diff --git a/internal/core/src/common/Chunk.h b/internal/core/src/common/Chunk.h index facc0cd4c0408..24db41dcd0198 100644 --- a/internal/core/src/common/Chunk.h +++ b/internal/core/src/common/Chunk.h @@ -21,60 +21,126 @@ #include "arrow/record_batch.h" #include "common/Array.h" #include "common/ChunkTarget.h" +#include "common/EasyAssert.h" #include "common/FieldDataInterface.h" #include "common/Json.h" #include "common/Span.h" #include "knowhere/sparse_utils.h" #include "simdjson/common_defs.h" #include "sys/mman.h" +#include "common/Types.h" namespace milvus { -constexpr size_t MMAP_STRING_PADDING = 1; -constexpr size_t MMAP_ARRAY_PADDING = 1; +constexpr uint64_t MMAP_STRING_PADDING = 1; +constexpr uint64_t MMAP_ARRAY_PADDING = 1; class Chunk { public: Chunk() = default; - Chunk(int64_t row_nums, char* data, size_t size) - : row_nums_(row_nums), data_(data), size_(size) { + Chunk(int64_t row_nums, char* data, uint64_t size, bool nullable) + : row_nums_(row_nums), data_(data), size_(size), nullable_(nullable) { + if (nullable) { + valid_.reserve(row_nums); + for (int i = 0; i < row_nums; i++) { + valid_.push_back((data[i >> 3] >> (i & 0x07)) & 1); + } + } } virtual ~Chunk() { munmap(data_, size_); } + uint64_t + Size() const { + return size_; + } + + int64_t + RowNums() const { + return row_nums_; + } + + virtual const char* + ValueAt(int64_t idx) const = 0; + + virtual const char* + Data() const { + return data_; + } + + virtual bool + isValid(int offset) { + return valid_[offset]; + }; + protected: char* data_; int64_t row_nums_; - size_t size_; + uint64_t size_; + bool nullable_; + FixedVector + valid_; // parse null bitmap to valid_ to be compatible with SpanBase }; // for fixed size data, includes fixed size array -template class FixedWidthChunk : public Chunk { public: - FixedWidthChunk(int32_t row_nums, int32_t dim, char* data, size_t size) - : Chunk(row_nums, data, size), dim_(dim){}; + FixedWidthChunk(int32_t row_nums, + int32_t dim, + char* data, + uint64_t size, + uint64_t element_size, + bool nullable) + : Chunk(row_nums, data, size, nullable), + dim_(dim), + element_size_(element_size){}; milvus::SpanBase Span() const { auto null_bitmap_bytes_num = (row_nums_ + 7) / 8; - return milvus::SpanBase( - data_ + null_bitmap_bytes_num, row_nums_, sizeof(T) * dim_); + return milvus::SpanBase(data_ + null_bitmap_bytes_num, + nullable_ ? valid_.data() : nullptr, + row_nums_, + element_size_ * dim_); + } + + const char* + ValueAt(int64_t idx) const override { + auto null_bitmap_bytes_num = (row_nums_ + 7) / 8; + return data_ + null_bitmap_bytes_num + idx * element_size_ * dim_; + } + + const char* + Data() const override { + auto null_bitmap_bytes_num = (row_nums_ + 7) / 8; + return data_ + null_bitmap_bytes_num; } private: int dim_; + int element_size_; }; class StringChunk : public Chunk { public: StringChunk() = default; - StringChunk(int32_t row_nums, char* data, size_t size) - : Chunk(row_nums, data, size) { + StringChunk(int32_t row_nums, char* data, uint64_t size, bool nullable) + : Chunk(row_nums, data, size, nullable) { auto null_bitmap_bytes_num = (row_nums + 7) / 8; offsets_ = reinterpret_cast(data + null_bitmap_bytes_num); } - std::vector - StringViews() const; + std::pair, FixedVector> + StringViews(); + + const char* + ValueAt(int64_t idx) const override { + PanicInfo(ErrorCode::Unsupported, + "StringChunk::ValueAt is not supported"); + } + + uint64_t* + Offsets() { + return offsets_; + } protected: uint64_t* offsets_; @@ -86,63 +152,83 @@ class ArrayChunk : public Chunk { public: ArrayChunk(int32_t row_nums, char* data, - size_t size, - milvus::DataType element_type) - : Chunk(row_nums, data, size), element_type_(element_type) { + uint64_t size, + milvus::DataType element_type, + bool nullable) + : Chunk(row_nums, data, size, nullable), element_type_(element_type) { auto null_bitmap_bytes_num = (row_nums + 7) / 8; - offsets_ = reinterpret_cast(data + null_bitmap_bytes_num); - lens_ = offsets_ + row_nums; + offsets_lens_ = + reinterpret_cast(data + null_bitmap_bytes_num); ConstructViews(); } SpanBase Span() const; + ArrayView + View(int64_t idx) const { + return views_[idx]; + } + void ConstructViews(); + const char* + ValueAt(int64_t idx) const override { + PanicInfo(ErrorCode::Unsupported, + "ArrayChunk::ValueAt is not supported"); + } + private: milvus::DataType element_type_; - uint64_t* offsets_; - uint64_t* lens_; + uint64_t* offsets_lens_; std::vector views_; }; class SparseFloatVectorChunk : public Chunk { public: - SparseFloatVectorChunk(int32_t row_nums, char* data, size_t size) - : Chunk(row_nums, data, size) { + SparseFloatVectorChunk(int32_t row_nums, + char* data, + uint64_t size, + bool nullable) + : Chunk(row_nums, data, size, nullable) { vec_.resize(row_nums); auto null_bitmap_bytes_num = (row_nums + 7) / 8; auto offsets_ptr = reinterpret_cast(data + null_bitmap_bytes_num); for (int i = 0; i < row_nums; i++) { - int vec_size = 0; - if (i == row_nums - 1) { - vec_size = size - offsets_ptr[i]; - } else { - vec_size = offsets_ptr[i + 1] - offsets_ptr[i]; - } - - vec_[i] = { - vec_size / knowhere::sparse::SparseRow::element_size(), - (uint8_t*)(data + offsets_ptr[i]), - false}; + vec_[i] = {(offsets_ptr[i + 1] - offsets_ptr[i]) / + knowhere::sparse::SparseRow::element_size(), + (uint8_t*)(data + offsets_ptr[i]), + false}; + dim_ = std::max(dim_, vec_[i].dim()); } } const char* - Data() const { + Data() const override { return static_cast(static_cast(vec_.data())); } + const char* + ValueAt(int64_t i) const override { + return static_cast( + static_cast(vec_.data() + i)); + } + // only for test std::vector>& Vec() { return vec_; } + int64_t + Dim() { + return dim_; + } + private: + int64_t dim_ = 0; std::vector> vec_; }; } // namespace milvus \ No newline at end of file diff --git a/internal/core/src/common/ChunkTarget.cpp b/internal/core/src/common/ChunkTarget.cpp index abe47dd819f8d..068b1ccabb147 100644 --- a/internal/core/src/common/ChunkTarget.cpp +++ b/internal/core/src/common/ChunkTarget.cpp @@ -10,10 +10,13 @@ // or implied. See the License for the specific language governing permissions and limitations under the License #include +#include #include #include "common/EasyAssert.h" #include +#include +const auto PAGE_SIZE = sysconf(_SC_PAGE_SIZE); namespace milvus { void MemChunkTarget::write(const void* data, size_t size, bool append) { @@ -42,8 +45,33 @@ MemChunkTarget::tell() { return size_; } +void +MmapChunkTarget::flush() { + if (buffer_.pos == 0) { + return; + } + + auto n = file_.Write(buffer_.buf, buffer_.pos); + AssertInfo(n != -1, "failed to write data to file"); + buffer_.clear(); +} + void MmapChunkTarget::write(const void* data, size_t size, bool append) { + if (buffer_.sufficient(size)) { + buffer_.write(data, size); + size_ += append ? size : 0; + return; + } + + flush(); + + if (buffer_.sufficient(size)) { + buffer_.write(data, size); + size_ += append ? size : 0; + return; + } + auto n = file_.Write(data, size); AssertInfo(n != -1, "failed to write data to file"); size_ += append ? size : 0; @@ -51,19 +79,35 @@ MmapChunkTarget::write(const void* data, size_t size, bool append) { void MmapChunkTarget::skip(size_t size) { + flush(); file_.Seek(size, SEEK_CUR); size_ += size; } void MmapChunkTarget::seek(size_t offset) { + flush(); file_.Seek(offset_ + offset, SEEK_SET); } std::pair MmapChunkTarget::get() { + // Write padding to align with the page size, ensuring the offset_ aligns with the page size. + auto padding_size = + (size_ / PAGE_SIZE + (size_ % PAGE_SIZE != 0)) * PAGE_SIZE - size_; + char padding[padding_size]; + memset(padding, 0, sizeof(padding)); + write(padding, padding_size); + + flush(); + auto m = mmap( nullptr, size_, PROT_READ, MAP_SHARED, file_.Descriptor(), offset_); + AssertInfo(m != MAP_FAILED, + "failed to map: {}, map_size={}, offset={}", + strerror(errno), + size_, + offset_); return {(char*)m, size_}; } diff --git a/internal/core/src/common/ChunkTarget.h b/internal/core/src/common/ChunkTarget.h index 3419e40cb202a..91b0655c63373 100644 --- a/internal/core/src/common/ChunkTarget.h +++ b/internal/core/src/common/ChunkTarget.h @@ -37,9 +37,34 @@ class ChunkTarget { }; class MmapChunkTarget : public ChunkTarget { + struct Buffer { + char buf[1 << 14]; + size_t pos = 0; + + bool + sufficient(size_t size) { + return pos + size <= sizeof(buf); + } + + void + write(const void* data, size_t size) { + memcpy(buf + pos, data, size); + pos += size; + } + + void + clear() { + pos = 0; + } + }; + public: MmapChunkTarget(File& file, size_t offset) : file_(file), offset_(offset) { } + + void + flush(); + void write(const void* data, size_t size, bool append = true) override; @@ -59,17 +84,23 @@ class MmapChunkTarget : public ChunkTarget { File& file_; size_t offset_ = 0; size_t size_ = 0; + Buffer buffer_; }; class MemChunkTarget : public ChunkTarget { public: MemChunkTarget(size_t cap) : cap_(cap) { - data_ = reinterpret_cast(mmap(nullptr, - cap, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANON, - -1, - 0)); + auto m = mmap(nullptr, + cap, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, + -1, + 0); + AssertInfo(m != MAP_FAILED, + "failed to map: {}, map_size={}", + strerror(errno), + size_); + data_ = reinterpret_cast(m); } void diff --git a/internal/core/src/common/ChunkWriter.cpp b/internal/core/src/common/ChunkWriter.cpp index 52b339feb2a23..d7ad87db17834 100644 --- a/internal/core/src/common/ChunkWriter.cpp +++ b/internal/core/src/common/ChunkWriter.cpp @@ -44,7 +44,7 @@ StringChunkWriter::write(std::shared_ptr data) { size += null_bitmap_n; row_nums_ += array->length(); } - size += sizeof(uint64_t) * row_nums_ + MMAP_STRING_PADDING; + size += sizeof(uint64_t) * (row_nums_ + 1) + MMAP_STRING_PADDING; if (file_) { target_ = std::make_shared(*file_, file_offset_); } else { @@ -63,11 +63,19 @@ StringChunkWriter::write(std::shared_ptr data) { } // write data - offsets_pos_ = target_->tell(); - target_->skip(sizeof(uint64_t) * row_nums_); + int offset_num = row_nums_ + 1; + int offset_start_pos = target_->tell() + sizeof(uint64_t) * offset_num; + std::vector offsets; + + for (auto str : strs) { + offsets.push_back(offset_start_pos); + offset_start_pos += str.size(); + } + offsets.push_back(offset_start_pos); + + target_->write(offsets.data(), offsets.size() * sizeof(uint64_t)); for (auto str : strs) { - offsets_.push_back(target_->tell()); target_->write(str.data(), str.size()); } } @@ -78,12 +86,8 @@ StringChunkWriter::finish() { // FIXME char padding[MMAP_STRING_PADDING]; target_->write(padding, MMAP_STRING_PADDING); - - // seek back to write offsets - target_->seek(offsets_pos_); - target_->write(offsets_.data(), offsets_.size() * sizeof(uint64_t)); auto [data, size] = target_->get(); - return std::make_shared(row_nums_, data, size); + return std::make_shared(row_nums_, data, size, nullable_); } void @@ -101,14 +105,14 @@ JSONChunkWriter::write(std::shared_ptr data) { size += json.data().size(); jsons.push_back(std::move(json)); } - AssertInfo(data->length() % 8 == 0, - "String length should be multiple of 8"); + // AssertInfo(data->length() % 8 == 0, + // "String length should be multiple of 8"); auto null_bitmap_n = (data->length() + 7) / 8; null_bitmaps.emplace_back(data->null_bitmap_data(), null_bitmap_n); size += null_bitmap_n; row_nums_ += array->length(); } - size += sizeof(uint64_t) * row_nums_ + simdjson::SIMDJSON_PADDING; + size += sizeof(uint64_t) * (row_nums_ + 1) + simdjson::SIMDJSON_PADDING; if (file_) { target_ = std::make_shared(*file_, file_offset_); } else { @@ -126,12 +130,20 @@ JSONChunkWriter::write(std::shared_ptr data) { } } - offsets_pos_ = target_->tell(); - target_->skip(sizeof(uint64_t) * row_nums_); + int offset_num = row_nums_ + 1; + int offset_start_pos = target_->tell() + sizeof(uint64_t) * offset_num; + std::vector offsets; + + for (auto json : jsons) { + offsets.push_back(offset_start_pos); + offset_start_pos += json.data().size(); + } + offsets.push_back(offset_start_pos); + + target_->write(offsets.data(), offsets.size() * sizeof(uint64_t)); // write data for (auto json : jsons) { - offsets_.push_back(target_->tell()); target_->write(json.data().data(), json.data().size()); } } @@ -141,17 +153,15 @@ JSONChunkWriter::finish() { char padding[simdjson::SIMDJSON_PADDING]; target_->write(padding, simdjson::SIMDJSON_PADDING); - // write offsets and padding - target_->seek(offsets_pos_); - target_->write(offsets_.data(), offsets_.size() * sizeof(uint64_t)); auto [data, size] = target_->get(); - return std::make_shared(row_nums_, data, size); + return std::make_shared(row_nums_, data, size, nullable_); } void ArrayChunkWriter::write(std::shared_ptr data) { auto size = 0; + auto is_string = IsStringDataType(element_type_); std::vector arrays; std::vector> null_bitmaps; for (auto batch : *data) { @@ -164,8 +174,10 @@ ArrayChunkWriter::write(std::shared_ptr data) { auto arr = Array(scalar_array); size += arr.byte_size(); arrays.push_back(std::move(arr)); - // element offsets size - size += sizeof(uint64_t) * arr.length(); + if (is_string) { + // element offsets size + size += sizeof(uint64_t) * arr.length(); + } } row_nums_ += array->length(); auto null_bitmap_n = (data->length() + 7) / 8; @@ -173,10 +185,8 @@ ArrayChunkWriter::write(std::shared_ptr data) { size += null_bitmap_n; } - auto is_string = IsStringDataType(element_type_); // offsets + lens - size += is_string ? sizeof(uint64_t) * row_nums_ * 2 + MMAP_ARRAY_PADDING - : sizeof(uint64_t) * row_nums_ + MMAP_ARRAY_PADDING; + size += sizeof(uint64_t) * (row_nums_ * 2 + 1) + MMAP_ARRAY_PADDING; if (file_) { target_ = std::make_shared(*file_, file_offset_); } else { @@ -193,16 +203,35 @@ ArrayChunkWriter::write(std::shared_ptr data) { } } - offsets_pos_ = target_->tell(); - target_->skip(sizeof(uint64_t) * row_nums_ * 2); + int offsets_num = row_nums_ + 1; + int len_num = row_nums_; + int offset_start_pos = + target_->tell() + sizeof(uint64_t) * (offsets_num + len_num); + std::vector offsets; + std::vector lens; + for (auto& arr : arrays) { + offsets.push_back(offset_start_pos); + lens.push_back(arr.length()); + offset_start_pos += + is_string ? sizeof(uint64_t) * arr.get_offsets().size() : 0; + offset_start_pos += arr.byte_size(); + } + offsets.push_back(offset_start_pos); + + for (int i = 0; i < offsets.size(); i++) { + if (i == offsets.size() - 1) { + target_->write(&offsets[i], sizeof(uint64_t)); + break; + } + target_->write(&offsets[i], sizeof(uint64_t)); + target_->write(&lens[i], sizeof(uint64_t)); + } + for (auto& arr : arrays) { - // write elements offsets - offsets_.push_back(target_->tell()); if (is_string) { target_->write(arr.get_offsets().data(), arr.get_offsets().size() * sizeof(uint64_t)); } - lens_.push_back(arr.length()); target_->write(arr.data(), arr.byte_size()); } } @@ -212,14 +241,9 @@ ArrayChunkWriter::finish() { char padding[MMAP_ARRAY_PADDING]; target_->write(padding, MMAP_ARRAY_PADDING); - // write offsets and lens - target_->seek(offsets_pos_); - for (size_t i = 0; i < offsets_.size(); i++) { - target_->write(&offsets_[i], sizeof(uint64_t)); - target_->write(&lens_[i], sizeof(uint64_t)); - } auto [data, size] = target_->get(); - return std::make_shared(row_nums_, data, size, element_type_); + return std::make_shared( + row_nums_, data, size, element_type_, nullable_); } void @@ -241,7 +265,7 @@ SparseFloatVectorChunkWriter::write( size += null_bitmap_n; row_nums_ += array->length(); } - size += sizeof(uint64_t) * row_nums_; + size += sizeof(uint64_t) * (row_nums_ + 1); if (file_) { target_ = std::make_shared(*file_, file_offset_); } else { @@ -260,95 +284,210 @@ SparseFloatVectorChunkWriter::write( } // write data - offsets_pos_ = target_->tell(); - target_->skip(sizeof(uint64_t) * row_nums_); + + int offset_num = row_nums_ + 1; + int offset_start_pos = target_->tell() + sizeof(uint64_t) * offset_num; + std::vector offsets; + + for (auto str : strs) { + offsets.push_back(offset_start_pos); + offset_start_pos += str.size(); + } + offsets.push_back(offset_start_pos); + + target_->write(offsets.data(), offsets.size() * sizeof(uint64_t)); for (auto str : strs) { - offsets_.push_back(target_->tell()); target_->write(str.data(), str.size()); } } std::shared_ptr SparseFloatVectorChunkWriter::finish() { - // seek back to write offsets - target_->seek(offsets_pos_); - target_->write(offsets_.data(), offsets_.size() * sizeof(uint64_t)); auto [data, size] = target_->get(); - return std::make_shared(row_nums_, data, size); + return std::make_shared( + row_nums_, data, size, nullable_); +} + +std::shared_ptr +create_chunk(const FieldMeta& field_meta, + int dim, + std::shared_ptr r) { + std::shared_ptr w; + bool nullable = field_meta.is_nullable(); + + switch (field_meta.get_data_type()) { + case milvus::DataType::BOOL: { + w = std::make_shared>( + dim, nullable); + break; + } + case milvus::DataType::INT8: { + w = std::make_shared>( + dim, nullable); + break; + } + case milvus::DataType::INT16: { + w = std::make_shared>( + dim, nullable); + break; + } + case milvus::DataType::INT32: { + w = std::make_shared>( + dim, nullable); + break; + } + case milvus::DataType::INT64: { + w = std::make_shared>( + dim, nullable); + break; + } + case milvus::DataType::FLOAT: { + w = std::make_shared>( + dim, nullable); + break; + } + case milvus::DataType::DOUBLE: { + w = std::make_shared>( + dim, nullable); + break; + } + case milvus::DataType::VECTOR_FLOAT: { + w = std::make_shared< + ChunkWriter>(dim, nullable); + break; + } + case milvus::DataType::VECTOR_BINARY: { + w = std::make_shared< + ChunkWriter>(dim / 8, + nullable); + break; + } + case milvus::DataType::VECTOR_FLOAT16: { + w = std::make_shared< + ChunkWriter>( + dim, nullable); + break; + } + case milvus::DataType::VECTOR_BFLOAT16: { + w = std::make_shared< + ChunkWriter>( + dim, nullable); + break; + } + case milvus::DataType::VARCHAR: + case milvus::DataType::STRING: { + w = std::make_shared(nullable); + break; + } + case milvus::DataType::JSON: { + w = std::make_shared(nullable); + break; + } + case milvus::DataType::ARRAY: { + w = std::make_shared( + field_meta.get_element_type(), nullable); + break; + } + case milvus::DataType::VECTOR_SPARSE_FLOAT: { + w = std::make_shared(nullable); + break; + } + default: + PanicInfo(Unsupported, "Unsupported data type"); + } + + w->write(r); + return w->finish(); } std::shared_ptr create_chunk(const FieldMeta& field_meta, int dim, + File& file, + size_t file_offset, std::shared_ptr r) { std::shared_ptr w; + bool nullable = field_meta.is_nullable(); switch (field_meta.get_data_type()) { case milvus::DataType::BOOL: { - w = std::make_shared>(dim); + w = std::make_shared>( + dim, file, file_offset, nullable); break; } case milvus::DataType::INT8: { - w = std::make_shared>(dim); + w = std::make_shared>( + dim, file, file_offset, nullable); break; } case milvus::DataType::INT16: { - w = std::make_shared>(dim); + w = std::make_shared>( + dim, file, file_offset, nullable); break; } case milvus::DataType::INT32: { - w = std::make_shared>(dim); + w = std::make_shared>( + dim, file, file_offset, nullable); break; } case milvus::DataType::INT64: { - w = std::make_shared>(dim); + w = std::make_shared>( + dim, file, file_offset, nullable); break; } case milvus::DataType::FLOAT: { - w = std::make_shared>(dim); + w = std::make_shared>( + dim, file, file_offset, nullable); break; } case milvus::DataType::DOUBLE: { - w = std::make_shared>(dim); + w = std::make_shared>( + dim, file, file_offset, nullable); break; } case milvus::DataType::VECTOR_FLOAT: { w = std::make_shared< - ChunkWriter>(dim); + ChunkWriter>( + dim, file, file_offset, nullable); break; } case milvus::DataType::VECTOR_BINARY: { w = std::make_shared< - ChunkWriter>(dim / 8); + ChunkWriter>( + dim / 8, file, file_offset, nullable); break; } case milvus::DataType::VECTOR_FLOAT16: { w = std::make_shared< - ChunkWriter>(dim); + ChunkWriter>( + dim, file, file_offset, nullable); break; } case milvus::DataType::VECTOR_BFLOAT16: { w = std::make_shared< - ChunkWriter>(dim); + ChunkWriter>( + dim, file, file_offset, nullable); break; } case milvus::DataType::VARCHAR: case milvus::DataType::STRING: { - w = std::make_shared(); + w = std::make_shared( + file, file_offset, nullable); break; } case milvus::DataType::JSON: { - w = std::make_shared(); + w = std::make_shared(file, file_offset, nullable); break; } case milvus::DataType::ARRAY: { w = std::make_shared( - field_meta.get_element_type()); + field_meta.get_element_type(), file, file_offset, nullable); break; } case milvus::DataType::VECTOR_SPARSE_FLOAT: { - w = std::make_shared(); + w = std::make_shared( + file, file_offset, nullable); break; } default: diff --git a/internal/core/src/common/ChunkWriter.h b/internal/core/src/common/ChunkWriter.h index a16b9bae47448..c389b0e799096 100644 --- a/internal/core/src/common/ChunkWriter.h +++ b/internal/core/src/common/ChunkWriter.h @@ -25,10 +25,11 @@ namespace milvus { class ChunkWriterBase { public: - ChunkWriterBase() = default; + explicit ChunkWriterBase(bool nullable) : nullable_(nullable) { + } - ChunkWriterBase(File& file, size_t offset) - : file_(&file), file_offset_(offset) { + ChunkWriterBase(File& file, size_t offset, bool nullable) + : file_(&file), file_offset_(offset), nullable_(nullable) { } virtual void @@ -46,17 +47,18 @@ class ChunkWriterBase { int row_nums_ = 0; File* file_ = nullptr; size_t file_offset_ = 0; + bool nullable_ = false; std::shared_ptr target_; }; template class ChunkWriter : public ChunkWriterBase { public: - ChunkWriter(int dim) : dim_(dim) { + ChunkWriter(int dim, bool nullable) : ChunkWriterBase(nullable), dim_(dim) { } - ChunkWriter(int dim, File& file, size_t offset) - : ChunkWriterBase(file, offset), dim_(dim){}; + ChunkWriter(int dim, File& file, size_t offset, bool nullable) + : ChunkWriterBase(file, offset, nullable), dim_(dim){}; void write(std::shared_ptr data) override { @@ -104,8 +106,8 @@ class ChunkWriter : public ChunkWriterBase { std::shared_ptr finish() override { auto [data, size] = target_->get(); - return std::make_shared>( - row_nums_, dim_, data, size); + return std::make_shared( + row_nums_, dim_, data, size, sizeof(T), nullable_); } private: @@ -165,10 +167,6 @@ class StringChunkWriter : public ChunkWriterBase { std::shared_ptr finish() override; - - protected: - std::vector offsets_; - size_t offsets_pos_ = 0; }; class JSONChunkWriter : public ChunkWriterBase { @@ -180,21 +178,18 @@ class JSONChunkWriter : public ChunkWriterBase { std::shared_ptr finish() override; - - private: - std::vector offsets_; - size_t offsets_pos_ = 0; }; class ArrayChunkWriter : public ChunkWriterBase { public: - ArrayChunkWriter(const milvus::DataType element_type) - : element_type_(element_type) { + ArrayChunkWriter(const milvus::DataType element_type, bool nullable) + : ChunkWriterBase(nullable), element_type_(element_type) { } ArrayChunkWriter(const milvus::DataType element_type, File& file, - size_t offset) - : ChunkWriterBase(file, offset), element_type_(element_type) { + size_t offset, + bool nullable) + : ChunkWriterBase(file, offset, nullable), element_type_(element_type) { } void @@ -205,9 +200,6 @@ class ArrayChunkWriter : public ChunkWriterBase { private: const milvus::DataType element_type_; - std::vector offsets_; - std::vector lens_; - size_t offsets_pos_; }; class SparseFloatVectorChunkWriter : public ChunkWriterBase { @@ -219,10 +211,6 @@ class SparseFloatVectorChunkWriter : public ChunkWriterBase { std::shared_ptr finish() override; - - private: - uint64_t offsets_pos_ = 0; - std::vector offsets_; }; std::shared_ptr diff --git a/internal/core/src/common/Common.h b/internal/core/src/common/Common.h index c398c161d58ea..49fcbcb7c8592 100644 --- a/internal/core/src/common/Common.h +++ b/internal/core/src/common/Common.h @@ -17,6 +17,8 @@ #pragma once #include +#include +#include #include "common/Consts.h" namespace milvus { @@ -47,11 +49,14 @@ void SetDefaultExecEvalExprBatchSize(int64_t val); struct BufferView { - char* data_; - size_t size_; - - BufferView(char* data_ptr, size_t size) : data_(data_ptr), size_(size) { - } + struct Element { + const char* data_; + uint64_t* offsets_; + int start_; + int end_; + }; + + std::variant, std::pair> data_; }; } // namespace milvus diff --git a/internal/core/src/common/FieldData.h b/internal/core/src/common/FieldData.h index de796fa3c5e8a..334a46190f02e 100644 --- a/internal/core/src/common/FieldData.h +++ b/internal/core/src/common/FieldData.h @@ -23,6 +23,7 @@ #include "common/FieldDataInterface.h" #include "common/Channel.h" +#include "parquet/arrow/reader.h" namespace milvus { @@ -143,6 +144,21 @@ using FieldDataPtr = std::shared_ptr; using FieldDataChannel = Channel; using FieldDataChannelPtr = std::shared_ptr; +struct ArrowDataWrapper { + ArrowDataWrapper() = default; + ArrowDataWrapper(std::shared_ptr reader, + std::shared_ptr arrow_reader, + std::shared_ptr file_data) + : reader(reader), arrow_reader(arrow_reader), file_data(file_data) { + } + std::shared_ptr reader; + // file reader must outlive the record batch reader + std::shared_ptr arrow_reader; + // underlying file data memory, must outlive the arrow reader + std::shared_ptr file_data; +}; +using ArrowReaderChannel = Channel>; + FieldDataPtr InitScalarFieldData(const DataType& type, bool nullable, int64_t cap_rows); diff --git a/internal/core/src/common/FieldDataInterface.h b/internal/core/src/common/FieldDataInterface.h index 926a1bb16e3d9..72aff36da8b82 100644 --- a/internal/core/src/common/FieldDataInterface.h +++ b/internal/core/src/common/FieldDataInterface.h @@ -395,21 +395,6 @@ class FieldDataImpl : public FieldDataBase { return &data_[offset]; } - // std::optional - // Value(ssize_t offset) { - // if (!is_type_entire_row) { - // return RawValue(offset); - // } - // AssertInfo(offset < get_num_rows(), - // "field data subscript out of range"); - // AssertInfo(offset < length(), - // "subscript position don't has valid value"); - // if (nullable_ && !valid_data_[offset]) { - // return std::nullopt; - // } - // return &field_data_[offset]; - // } - int64_t Size() const override { return DataSize() + ValidDataSize(); diff --git a/internal/core/src/common/type_c.h b/internal/core/src/common/type_c.h index bf19e0dac962d..77bc563698933 100644 --- a/internal/core/src/common/type_c.h +++ b/internal/core/src/common/type_c.h @@ -28,6 +28,7 @@ enum SegmentType { Growing = 1, Sealed = 2, Indexing = 3, + ChunkedSealed = 4, }; typedef enum SegmentType SegmentType; diff --git a/internal/core/src/exec/expression/CompareExpr.cpp b/internal/core/src/exec/expression/CompareExpr.cpp index 0c412ac82b64b..467df6654a929 100644 --- a/internal/core/src/exec/expression/CompareExpr.cpp +++ b/internal/core/src/exec/expression/CompareExpr.cpp @@ -15,6 +15,7 @@ // limitations under the License. #include "CompareExpr.h" +#include "common/type_c.h" #include "query/Relational.h" namespace milvus { @@ -28,15 +29,248 @@ PhyCompareFilterExpr::IsStringExpr() { int64_t PhyCompareFilterExpr::GetNextBatchSize() { - auto current_rows = - segment_->type() == SegmentType::Growing - ? current_chunk_id_ * size_per_chunk_ + current_chunk_pos_ - : current_chunk_pos_; + auto current_rows = GetCurrentRows(); + return current_rows + batch_size_ >= active_count_ ? active_count_ - current_rows : batch_size_; } +template +MultipleChunkDataAccessor +PhyCompareFilterExpr::GetChunkData(FieldId field_id, + bool index, + int64_t& current_chunk_id, + int64_t& current_chunk_pos) { + if (index) { + auto& indexing = const_cast&>( + segment_->chunk_scalar_index(field_id, current_chunk_id)); + auto current_chunk_size = segment_->type() == SegmentType::Growing + ? size_per_chunk_ + : active_count_; + + if (indexing.HasRawData()) { + return [&, current_chunk_size]() -> const number { + if (current_chunk_pos >= current_chunk_size) { + current_chunk_id++; + current_chunk_pos = 0; + indexing = const_cast&>( + segment_->chunk_scalar_index(field_id, + current_chunk_id)); + } + return indexing.Reverse_Lookup(current_chunk_pos++); + }; + } + } + auto chunk_data = + segment_->chunk_data(field_id, current_chunk_id).data(); + auto current_chunk_size = segment_->chunk_size(field_id, current_chunk_id); + return + [=, ¤t_chunk_id, ¤t_chunk_pos]() mutable -> const number { + if (current_chunk_pos >= current_chunk_size) { + current_chunk_id++; + current_chunk_pos = 0; + chunk_data = + segment_->chunk_data(field_id, current_chunk_id).data(); + current_chunk_size = + segment_->chunk_size(field_id, current_chunk_id); + } + + return chunk_data[current_chunk_pos++]; + }; +} + +template <> +MultipleChunkDataAccessor +PhyCompareFilterExpr::GetChunkData(FieldId field_id, + bool index, + int64_t& current_chunk_id, + int64_t& current_chunk_pos) { + if (index) { + auto& indexing = const_cast&>( + segment_->chunk_scalar_index(field_id, + current_chunk_id)); + auto current_chunk_size = segment_->type() == SegmentType::Growing + ? size_per_chunk_ + : active_count_; + + if (indexing.HasRawData()) { + return [&, current_chunk_size]() mutable -> const number { + if (current_chunk_pos >= current_chunk_size) { + current_chunk_id++; + current_chunk_pos = 0; + indexing = const_cast&>( + segment_->chunk_scalar_index( + field_id, current_chunk_id)); + } + return indexing.Reverse_Lookup(current_chunk_pos++); + }; + } + } + if (segment_->type() == SegmentType::Growing && + !storage::MmapManager::GetInstance() + .GetMmapConfig() + .growing_enable_mmap) { + auto chunk_data = + segment_->chunk_data(field_id, current_chunk_id) + .data(); + auto current_chunk_size = + segment_->chunk_size(field_id, current_chunk_id); + return [=, + ¤t_chunk_id, + ¤t_chunk_pos]() mutable -> const number { + if (current_chunk_pos >= current_chunk_size) { + current_chunk_id++; + current_chunk_pos = 0; + chunk_data = + segment_ + ->chunk_data(field_id, current_chunk_id) + .data(); + current_chunk_size = + segment_->chunk_size(field_id, current_chunk_id); + } + + return chunk_data[current_chunk_pos++]; + }; + } else { + auto chunk_data = + segment_->chunk_view(field_id, current_chunk_id) + .first.data(); + auto current_chunk_size = + segment_->chunk_size(field_id, current_chunk_id); + return [=, + ¤t_chunk_id, + ¤t_chunk_pos]() mutable -> const number { + if (current_chunk_pos >= current_chunk_size) { + current_chunk_id++; + current_chunk_pos = 0; + chunk_data = segment_ + ->chunk_view( + field_id, current_chunk_id) + .first.data(); + current_chunk_size = + segment_->chunk_size(field_id, current_chunk_id); + } + + return std::string(chunk_data[current_chunk_pos++]); + }; + } +} + +MultipleChunkDataAccessor +PhyCompareFilterExpr::GetChunkData(DataType data_type, + FieldId field_id, + bool index, + int64_t& current_chunk_id, + int64_t& current_chunk_pos) { + switch (data_type) { + case DataType::BOOL: + return GetChunkData( + field_id, index, current_chunk_id, current_chunk_pos); + case DataType::INT8: + return GetChunkData( + field_id, index, current_chunk_id, current_chunk_pos); + case DataType::INT16: + return GetChunkData( + field_id, index, current_chunk_id, current_chunk_pos); + case DataType::INT32: + return GetChunkData( + field_id, index, current_chunk_id, current_chunk_pos); + case DataType::INT64: + return GetChunkData( + field_id, index, current_chunk_id, current_chunk_pos); + case DataType::FLOAT: + return GetChunkData( + field_id, index, current_chunk_id, current_chunk_pos); + case DataType::DOUBLE: + return GetChunkData( + field_id, index, current_chunk_id, current_chunk_pos); + case DataType::VARCHAR: { + return GetChunkData( + field_id, index, current_chunk_id, current_chunk_pos); + } + default: + PanicInfo(DataTypeInvalid, "unsupported data type: {}", data_type); + } +} + +template +VectorPtr +PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op) { + if (segment_->is_chunked()) { + auto real_batch_size = GetNextBatchSize(); + if (real_batch_size == 0) { + return nullptr; + } + + auto res_vec = + std::make_shared(TargetBitmap(real_batch_size)); + TargetBitmapView res(res_vec->GetRawData(), real_batch_size); + + auto left = GetChunkData(expr_->left_data_type_, + expr_->left_field_id_, + is_left_indexed_, + left_current_chunk_id_, + left_current_chunk_pos_); + auto right = GetChunkData(expr_->right_data_type_, + expr_->right_field_id_, + is_right_indexed_, + right_current_chunk_id_, + right_current_chunk_pos_); + for (int i = 0; i < real_batch_size; ++i) { + res[i] = boost::apply_visitor( + milvus::query::Relational{}, left(), right()); + } + return res_vec; + } else { + auto real_batch_size = GetNextBatchSize(); + if (real_batch_size == 0) { + return nullptr; + } + + auto res_vec = + std::make_shared(TargetBitmap(real_batch_size)); + TargetBitmapView res(res_vec->GetRawData(), real_batch_size); + + auto left_data_barrier = + segment_->num_chunk_data(expr_->left_field_id_); + auto right_data_barrier = + segment_->num_chunk_data(expr_->right_field_id_); + + int64_t processed_rows = 0; + for (int64_t chunk_id = current_chunk_id_; chunk_id < num_chunk_; + ++chunk_id) { + auto chunk_size = chunk_id == num_chunk_ - 1 + ? active_count_ - chunk_id * size_per_chunk_ + : size_per_chunk_; + auto left = GetChunkData(expr_->left_data_type_, + expr_->left_field_id_, + chunk_id, + left_data_barrier); + auto right = GetChunkData(expr_->right_data_type_, + expr_->right_field_id_, + chunk_id, + right_data_barrier); + + for (int i = chunk_id == current_chunk_id_ ? current_chunk_pos_ : 0; + i < chunk_size; + ++i) { + res[processed_rows++] = boost::apply_visitor( + milvus::query::Relational{}, + left(i), + right(i)); + + if (processed_rows >= batch_size_) { + current_chunk_id_ = chunk_id; + current_chunk_pos_ = i + 1; + return res_vec; + } + } + } + return res_vec; + } +} + template ChunkDataAccessor PhyCompareFilterExpr::GetChunkData(FieldId field_id, @@ -113,52 +347,6 @@ PhyCompareFilterExpr::GetChunkData(DataType data_type, } } -template -VectorPtr -PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op) { - auto real_batch_size = GetNextBatchSize(); - if (real_batch_size == 0) { - return nullptr; - } - - auto res_vec = - std::make_shared(TargetBitmap(real_batch_size)); - TargetBitmapView res(res_vec->GetRawData(), real_batch_size); - - auto left_data_barrier = segment_->num_chunk_data(expr_->left_field_id_); - auto right_data_barrier = segment_->num_chunk_data(expr_->right_field_id_); - - int64_t processed_rows = 0; - for (int64_t chunk_id = current_chunk_id_; chunk_id < num_chunk_; - ++chunk_id) { - auto chunk_size = chunk_id == num_chunk_ - 1 - ? active_count_ - chunk_id * size_per_chunk_ - : size_per_chunk_; - auto left = GetChunkData(expr_->left_data_type_, - expr_->left_field_id_, - chunk_id, - left_data_barrier); - auto right = GetChunkData(expr_->right_data_type_, - expr_->right_field_id_, - chunk_id, - right_data_barrier); - - for (int i = chunk_id == current_chunk_id_ ? current_chunk_pos_ : 0; - i < chunk_size; - ++i) { - res[processed_rows++] = boost::apply_visitor( - milvus::query::Relational{}, left(i), right(i)); - - if (processed_rows >= batch_size_) { - current_chunk_id_ = chunk_id; - current_chunk_pos_ = i + 1; - return res_vec; - } - } - } - return res_vec; -} - void PhyCompareFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { // For segment both fields has no index, can use SIMD to speed up. diff --git a/internal/core/src/exec/expression/CompareExpr.h b/internal/core/src/exec/expression/CompareExpr.h index ff6069665182f..fd9ef751387cb 100644 --- a/internal/core/src/exec/expression/CompareExpr.h +++ b/internal/core/src/exec/expression/CompareExpr.h @@ -22,6 +22,7 @@ #include "common/EasyAssert.h" #include "common/Types.h" #include "common/Vector.h" +#include "common/type_c.h" #include "exec/expression/Expr.h" #include "segcore/SegmentInterface.h" @@ -37,6 +38,7 @@ using number = boost::variant; using ChunkDataAccessor = std::function; +using MultipleChunkDataAccessor = std::function; template struct CompareElementFunc { @@ -114,9 +116,26 @@ class PhyCompareFilterExpr : public Expr { is_left_indexed_ = segment_->HasIndex(left_field_); is_right_indexed_ = segment_->HasIndex(right_field_); size_per_chunk_ = segment_->size_per_chunk(); - num_chunk_ = is_left_indexed_ - ? segment_->num_chunk_index(expr_->left_field_id_) - : upper_div(active_count_, size_per_chunk_); + if (segment_->is_chunked()) { + left_num_chunk_ = + is_left_indexed_ + ? segment_->num_chunk_index(expr_->left_field_id_) + : segment_->type() == SegmentType::Growing + ? upper_div(active_count_, size_per_chunk_) + : segment_->num_chunk_data(left_field_); + right_num_chunk_ = + is_right_indexed_ + ? segment_->num_chunk_index(expr_->right_field_id_) + : segment_->type() == SegmentType::Growing + ? upper_div(active_count_, size_per_chunk_) + : segment_->num_chunk_data(right_field_); + num_chunk_ = left_num_chunk_; + } else { + num_chunk_ = is_left_indexed_ + ? segment_->num_chunk_index(expr_->left_field_id_) + : upper_div(active_count_, size_per_chunk_); + } + AssertInfo( batch_size_ > 0, fmt::format("expr batch size should greater than zero, but now: {}", @@ -128,6 +147,67 @@ class PhyCompareFilterExpr : public Expr { void MoveCursor() override { + if (segment_->is_chunked()) { + MoveCursorForMultipleChunk(); + } else { + MoveCursorForSingleChunk(); + } + } + + void + MoveCursorForMultipleChunk() { + int64_t processed_rows = 0; + for (int64_t chunk_id = left_current_chunk_id_; + chunk_id < left_num_chunk_; + ++chunk_id) { + auto chunk_size = 0; + if (segment_->type() == SegmentType::Growing) { + chunk_size = chunk_id == left_num_chunk_ - 1 + ? active_count_ - chunk_id * size_per_chunk_ + : size_per_chunk_; + } else { + chunk_size = segment_->chunk_size(left_field_, chunk_id); + } + + for (int i = chunk_id == left_current_chunk_id_ + ? left_current_chunk_pos_ + : 0; + i < chunk_size; + ++i) { + if (++processed_rows >= batch_size_) { + left_current_chunk_id_ = chunk_id; + left_current_chunk_pos_ = i + 1; + } + } + } + processed_rows = 0; + for (int64_t chunk_id = right_current_chunk_id_; + chunk_id < right_num_chunk_; + ++chunk_id) { + auto chunk_size = 0; + if (segment_->type() == SegmentType::Growing) { + chunk_size = chunk_id == right_num_chunk_ - 1 + ? active_count_ - chunk_id * size_per_chunk_ + : size_per_chunk_; + } else { + chunk_size = segment_->chunk_size(right_field_, chunk_id); + } + + for (int i = chunk_id == right_current_chunk_id_ + ? right_current_chunk_pos_ + : 0; + i < chunk_size; + ++i) { + if (++processed_rows >= batch_size_) { + right_current_chunk_id_ = chunk_id; + right_current_chunk_pos_ = i + 1; + } + } + } + } + + void + MoveCursorForSingleChunk() { int64_t processed_rows = 0; for (int64_t chunk_id = current_chunk_id_; chunk_id < num_chunk_; ++chunk_id) { @@ -146,6 +226,24 @@ class PhyCompareFilterExpr : public Expr { } } + int64_t + GetCurrentRows() { + if (segment_->is_chunked()) { + auto current_rows = + is_left_indexed_ && segment_->type() == SegmentType::Sealed + ? left_current_chunk_pos_ + : segment_->num_rows_until_chunk(left_field_, + left_current_chunk_id_) + + left_current_chunk_pos_; + return current_rows; + } else { + return segment_->type() == SegmentType::Growing + ? current_chunk_id_ * size_per_chunk_ + + current_chunk_pos_ + : current_chunk_pos_; + } + } + private: int64_t GetNextBatchSize(); @@ -153,6 +251,13 @@ class PhyCompareFilterExpr : public Expr { bool IsStringExpr(); + template + MultipleChunkDataAccessor + GetChunkData(FieldId field_id, + bool index, + int64_t& current_chunk_id, + int64_t& current_chunk_pos); + template ChunkDataAccessor GetChunkData(FieldId field_id, int chunk_id, int data_barrier); @@ -160,6 +265,23 @@ class PhyCompareFilterExpr : public Expr { template int64_t ProcessBothDataChunks(FUNC func, TargetBitmapView res, ValTypes... values) { + if (segment_->is_chunked()) { + return ProcessBothDataChunksForMultipleChunk( + func, res, values...); + } else { + return ProcessBothDataChunksForSingleChunk( + func, res, values...); + } + } + + template + int64_t + ProcessBothDataChunksForSingleChunk(FUNC func, + TargetBitmapView res, + ValTypes... values) { int64_t processed_size = 0; for (size_t i = current_chunk_id_; i < num_chunk_; i++) { @@ -194,6 +316,56 @@ class PhyCompareFilterExpr : public Expr { return processed_size; } + template + int64_t + ProcessBothDataChunksForMultipleChunk(FUNC func, + TargetBitmapView res, + ValTypes... values) { + int64_t processed_size = 0; + + // only call this function when left and right are not indexed, so they have the same number of chunks + for (size_t i = left_current_chunk_id_; i < left_num_chunk_; i++) { + auto left_chunk = segment_->chunk_data(left_field_, i); + auto right_chunk = segment_->chunk_data(right_field_, i); + auto data_pos = + (i == left_current_chunk_id_) ? left_current_chunk_pos_ : 0; + auto size = 0; + if (segment_->type() == SegmentType::Growing) { + size = (i == (left_num_chunk_ - 1)) + ? (active_count_ % size_per_chunk_ == 0 + ? size_per_chunk_ - data_pos + : active_count_ % size_per_chunk_ - data_pos) + : size_per_chunk_ - data_pos; + } else { + size = segment_->chunk_size(left_field_, i) - data_pos; + } + + if (processed_size + size >= batch_size_) { + size = batch_size_ - processed_size; + } + + const T* left_data = left_chunk.data() + data_pos; + const U* right_data = right_chunk.data() + data_pos; + func(left_data, right_data, size, res + processed_size, values...); + processed_size += size; + + if (processed_size >= batch_size_) { + left_current_chunk_id_ = i; + left_current_chunk_pos_ = data_pos + size; + break; + } + } + + return processed_size; + } + + MultipleChunkDataAccessor + GetChunkData(DataType data_type, + FieldId field_id, + bool index, + int64_t& current_chunk_id, + int64_t& current_chunk_pos); + ChunkDataAccessor GetChunkData(DataType data_type, FieldId field_id, @@ -225,6 +397,12 @@ class PhyCompareFilterExpr : public Expr { bool is_right_indexed_; int64_t active_count_{0}; int64_t num_chunk_{0}; + int64_t left_num_chunk_{0}; + int64_t right_num_chunk_{0}; + int64_t left_current_chunk_id_{0}; + int64_t left_current_chunk_pos_{0}; + int64_t right_current_chunk_id_{0}; + int64_t right_current_chunk_pos_{0}; int64_t current_chunk_id_{0}; int64_t current_chunk_pos_{0}; int64_t size_per_chunk_{0}; diff --git a/internal/core/src/exec/expression/Expr.h b/internal/core/src/exec/expression/Expr.h index b80d376c78ede..25f90db4a249f 100644 --- a/internal/core/src/exec/expression/Expr.h +++ b/internal/core/src/exec/expression/Expr.h @@ -122,12 +122,43 @@ class SegmentExpr : public Expr { } // if index not include raw data, also need load data if (segment_->HasFieldData(field_id_)) { - num_data_chunk_ = upper_div(active_count_, size_per_chunk_); + if (segment_->is_chunked()) { + num_data_chunk_ = segment_->num_chunk_data(field_id_); + } else { + num_data_chunk_ = upper_div(active_count_, size_per_chunk_); + } } } void - MoveCursorForData() { + MoveCursorForDataMultipleChunk() { + int64_t processed_size = 0; + for (size_t i = current_data_chunk_; i < num_data_chunk_; i++) { + auto data_pos = + (i == current_data_chunk_) ? current_data_chunk_pos_ : 0; + int64_t size = 0; + if (segment_->type() == SegmentType::Growing) { + size = (i == (num_data_chunk_ - 1) && + active_count_ % size_per_chunk_ != 0) + ? active_count_ % size_per_chunk_ - data_pos + : size_per_chunk_ - data_pos; + } else { + size = segment_->chunk_size(field_id_, i) - data_pos; + } + + size = std::min(size, batch_size_ - processed_size); + + processed_size += size; + if (processed_size >= batch_size_) { + current_data_chunk_ = i; + current_data_chunk_pos_ = data_pos + size; + break; + } + // } + } + } + void + MoveCursorForDataSingleChunk() { if (segment_->type() == SegmentType::Sealed) { auto size = std::min(active_count_ - current_data_chunk_pos_, batch_size_); @@ -154,6 +185,15 @@ class SegmentExpr : public Expr { } } + void + MoveCursorForData() { + if (segment_->is_chunked()) { + MoveCursorForDataMultipleChunk(); + } else { + MoveCursorForDataSingleChunk(); + } + } + void MoveCursorForIndex() { AssertInfo(segment_->type() == SegmentType::Sealed, @@ -183,7 +223,17 @@ class SegmentExpr : public Expr { auto current_chunk_pos = is_index_mode_ && use_index_ ? current_index_chunk_pos_ : current_data_chunk_pos_; - auto current_rows = current_chunk * size_per_chunk_ + current_chunk_pos; + auto current_rows = 0; + if (segment_->is_chunked()) { + current_rows = + is_index_mode_ && use_index_ && + segment_->type() == SegmentType::Sealed + ? current_chunk_pos + : segment_->num_rows_until_chunk(field_id_, current_chunk) + + current_chunk_pos; + } else { + current_rows = current_chunk * size_per_chunk_ + current_chunk_pos; + } return current_rows + batch_size_ >= active_count_ ? active_count_ - current_rows : batch_size_; @@ -220,7 +270,7 @@ class SegmentExpr : public Expr { template int64_t - ProcessDataChunks( + ProcessDataChunksForSingleChunk( FUNC func, std::function skip_func, TargetBitmapView res, @@ -266,6 +316,90 @@ class SegmentExpr : public Expr { return processed_size; } + template + int64_t + ProcessDataChunksForMultipleChunk( + FUNC func, + std::function skip_func, + TargetBitmapView res, + ValTypes... values) { + int64_t processed_size = 0; + + // if constexpr (std::is_same_v || + // std::is_same_v) { + // if (segment_->type() == SegmentType::Sealed) { + // return ProcessChunkForSealedSeg( + // func, skip_func, res, values...); + // } + // } + + for (size_t i = current_data_chunk_; i < num_data_chunk_; i++) { + auto data_pos = + (i == current_data_chunk_) ? current_data_chunk_pos_ : 0; + + int64_t size = 0; + if (segment_->type() == SegmentType::Growing) { + size = (i == (num_data_chunk_ - 1)) + ? (active_count_ % size_per_chunk_ == 0 + ? size_per_chunk_ - data_pos + : active_count_ % size_per_chunk_ - data_pos) + : size_per_chunk_ - data_pos; + } else { + size = segment_->chunk_size(field_id_, i) - data_pos; + } + + size = std::min(size, batch_size_ - processed_size); + + auto& skip_index = segment_->GetSkipIndex(); + if (!skip_func || !skip_func(skip_index, field_id_, i)) { + bool is_seal = false; + if constexpr (std::is_same_v || + std::is_same_v) { + if (segment_->type() == SegmentType::Sealed) { + auto data_vec = segment_ + ->get_batch_views( + field_id_, i, data_pos, size) + .first; + func(data_vec.data(), + size, + res + processed_size, + values...); + is_seal = true; + } + } + if (!is_seal) { + auto chunk = segment_->chunk_data(field_id_, i); + const T* data = chunk.data() + data_pos; + func(data, size, res + processed_size, values...); + } + } + + processed_size += size; + if (processed_size >= batch_size_) { + current_data_chunk_ = i; + current_data_chunk_pos_ = data_pos + size; + break; + } + } + + return processed_size; + } + + template + int64_t + ProcessDataChunks( + FUNC func, + std::function skip_func, + TargetBitmapView res, + ValTypes... values) { + if (segment_->is_chunked()) { + return ProcessDataChunksForMultipleChunk( + func, skip_func, res, values...); + } else { + return ProcessDataChunksForSingleChunk( + func, skip_func, res, values...); + } + } int ProcessIndexOneChunk(TargetBitmap& result, diff --git a/internal/core/src/exec/expression/UnaryExpr.cpp b/internal/core/src/exec/expression/UnaryExpr.cpp index f53475e14e192..3b7c2116244fb 100644 --- a/internal/core/src/exec/expression/UnaryExpr.cpp +++ b/internal/core/src/exec/expression/UnaryExpr.cpp @@ -363,15 +363,27 @@ PhyUnaryRangeFilterExpr::ExecArrayEqualForIndex(bool reverse) { } // filtering by index, get candidates. - auto size_per_chunk = segment_->size_per_chunk(); - auto retrieve = [ size_per_chunk, this ](int64_t offset) -> auto { - auto chunk_idx = offset / size_per_chunk; - auto chunk_offset = offset % size_per_chunk; - const auto& chunk = - segment_->template chunk_data(field_id_, - chunk_idx); - return chunk.data() + chunk_offset; - }; + std::function retrieve; + if (segment_->is_chunked()) { + retrieve = [this](int64_t offset) -> const milvus::ArrayView* { + auto [chunk_idx, chunk_offset] = + segment_->get_chunk_by_offset(field_id_, offset); + const auto& chunk = + segment_->template chunk_data( + field_id_, chunk_idx); + return chunk.data() + chunk_offset; + }; + } else { + auto size_per_chunk = segment_->size_per_chunk(); + retrieve = [ size_per_chunk, this ](int64_t offset) -> auto { + auto chunk_idx = offset / size_per_chunk; + auto chunk_offset = offset % size_per_chunk; + const auto& chunk = + segment_->template chunk_data( + field_id_, chunk_idx); + return chunk.data() + chunk_offset; + }; + } // compare the array via the raw data. auto filter = [&retrieve, &val, reverse](size_t offset) -> bool { diff --git a/internal/core/src/exec/operator/groupby/SearchGroupByOperator.h b/internal/core/src/exec/operator/groupby/SearchGroupByOperator.h index 61888ae2874ee..78833a8d34cd5 100644 --- a/internal/core/src/exec/operator/groupby/SearchGroupByOperator.h +++ b/internal/core/src/exec/operator/groupby/SearchGroupByOperator.h @@ -62,8 +62,7 @@ class SealedDataGetter : public DataGetter { const index::ScalarIndex* field_index_; public: - SealedDataGetter(const segcore::SegmentSealedImpl& segment, - FieldId& field_id) { + SealedDataGetter(const segcore::SegmentSealed& segment, FieldId& field_id) { if (segment.HasFieldData(field_id)) { if constexpr (std::is_same_v) { str_field_data_ = @@ -114,8 +113,8 @@ GetDataGetter(const segcore::SegmentInternalInterface& segment, dynamic_cast(&segment)) { return std::make_shared>(*growing_segment, fieldId); - } else if (const segcore::SegmentSealedImpl* sealed_segment = - dynamic_cast(&segment)) { + } else if (const segcore::SegmentSealed* sealed_segment = + dynamic_cast(&segment)) { return std::make_shared>(*sealed_segment, fieldId); } else { PanicInfo(UnexpectedError, diff --git a/internal/core/src/index/SkipIndex.cpp b/internal/core/src/index/SkipIndex.cpp index 20780a4bbc159..82357e5b6ff21 100644 --- a/internal/core/src/index/SkipIndex.cpp +++ b/internal/core/src/index/SkipIndex.cpp @@ -111,29 +111,4 @@ SkipIndex::LoadPrimitive(milvus::FieldId field_id, fieldChunkMetrics_[field_id].emplace(chunk_id, std::move(chunkMetrics)); } -void -SkipIndex::LoadString(milvus::FieldId field_id, - int64_t chunk_id, - const milvus::VariableColumn& var_column) { - int num_rows = var_column.NumRows(); - auto chunkMetrics = std::make_unique(); - if (num_rows > 0) { - auto info = ProcessStringFieldMetrics(var_column); - chunkMetrics->min_ = Metrics(std::move(info.min_)); - chunkMetrics->max_ = Metrics(std::move(info.max_)); - chunkMetrics->null_count_ = info.null_count_; - } - - chunkMetrics->hasValue_ = - chunkMetrics->null_count_ == num_rows ? false : true; - - std::unique_lock lck(mutex_); - if (fieldChunkMetrics_.count(field_id) == 0) { - fieldChunkMetrics_.insert(std::make_pair( - field_id, - std::unordered_map>())); - } - fieldChunkMetrics_[field_id].emplace(chunk_id, std::move(chunkMetrics)); -} - } // namespace milvus diff --git a/internal/core/src/index/SkipIndex.h b/internal/core/src/index/SkipIndex.h index 754a18b8dd1b4..92ee34fd53ea4 100644 --- a/internal/core/src/index/SkipIndex.h +++ b/internal/core/src/index/SkipIndex.h @@ -16,6 +16,7 @@ #include "common/Types.h" #include "log/Log.h" #include "mmap/Column.h" +#include "mmap/ChunkedColumn.h" namespace milvus { @@ -100,10 +101,32 @@ class SkipIndex { const bool* valid_data, int64_t count); + template void LoadString(milvus::FieldId field_id, int64_t chunk_id, - const milvus::VariableColumn& var_column); + const T& var_column) { + int num_rows = var_column.NumRows(); + auto chunkMetrics = std::make_unique(); + if (num_rows > 0) { + auto info = ProcessStringFieldMetrics(var_column); + chunkMetrics->min_ = Metrics(info.min_); + chunkMetrics->max_ = Metrics(info.max_); + chunkMetrics->null_count_ = info.null_count_; + } + + chunkMetrics->hasValue_ = + chunkMetrics->null_count_ == num_rows ? false : true; + + std::unique_lock lck(mutex_); + if (fieldChunkMetrics_.count(field_id) == 0) { + fieldChunkMetrics_.insert(std::make_pair( + field_id, + std::unordered_map>())); + } + fieldChunkMetrics_[field_id].emplace(chunk_id, std::move(chunkMetrics)); + } private: const FieldChunkMetrics& @@ -269,9 +292,9 @@ class SkipIndex { return {minValue, maxValue, null_count}; } + template metricInfo - ProcessStringFieldMetrics( - const milvus::VariableColumn& var_column) { + ProcessStringFieldMetrics(const T& var_column) { int num_rows = var_column.NumRows(); // find first not null value int64_t start = 0; diff --git a/internal/core/src/mmap/ChunkedColumn.h b/internal/core/src/mmap/ChunkedColumn.h new file mode 100644 index 0000000000000..a069c9cf8c4a4 --- /dev/null +++ b/internal/core/src/mmap/ChunkedColumn.h @@ -0,0 +1,427 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/Array.h" +#include "common/Chunk.h" +#include "common/Common.h" +#include "common/EasyAssert.h" +#include "common/File.h" +#include "common/FieldMeta.h" +#include "common/FieldData.h" +#include "common/Span.h" +#include "fmt/format.h" +#include "log/Log.h" +#include "mmap/Utils.h" +#include "common/FieldData.h" +#include "common/FieldDataInterface.h" +#include "common/Array.h" +#include "knowhere/dataset.h" +#include "monitor/prometheus_client.h" +#include "storage/MmapChunkManager.h" + +#include "mmap/Column.h" +namespace milvus { + +class ChunkedColumnBase : public ColumnBase { + public: + ChunkedColumnBase() = default; + // memory mode ctor + ChunkedColumnBase(const FieldMeta& field_meta) { + if (field_meta.is_nullable()) { + nullable_ = true; + } + } + + virtual ~ChunkedColumnBase(){}; + + ChunkedColumnBase(ChunkedColumnBase&& column) noexcept + : nullable_(column.nullable_), num_rows_(column.num_rows_) { + column.num_rows_ = 0; + column.nullable_ = false; + } + + virtual void + AppendBatch(const FieldDataPtr data) override { + PanicInfo(ErrorCode::Unsupported, "AppendBatch not supported"); + } + + virtual const char* + Data(int chunk_id) const override { + chunks_[chunk_id]->Data(); + } + + virtual const char* + ValueAt(int64_t offset) const { + auto [chunk_id, offset_in_chunk] = GetChunkIDByOffset(offset); + return chunks_[chunk_id]->ValueAt(offset_in_chunk); + }; + + // MmappedData() returns the mmaped address + const char* + MmappedData() const override { + AssertInfo(chunks_.size() == 1, + "only support one chunk, but got {} chunk(s)", + chunks_.size()); + return chunks_[0]->Data(); + } + + bool + IsValid(size_t offset) const { + if (nullable_) { + auto [chunk_id, offset_in_chunk] = GetChunkIDByOffset(offset); + return chunks_[chunk_id]->isValid(offset_in_chunk); + } + return true; + } + + bool + IsNullable() const { + return nullable_; + } + + size_t + NumRows() const { + return num_rows_; + }; + + int64_t + num_chunks() const { + return chunks_.size(); + } + + virtual void + AddChunk(std::shared_ptr chunk) { + num_rows_until_chunk_.push_back(num_rows_); + num_rows_ += chunk->RowNums(); + chunks_.push_back(chunk); + } + + virtual uint64_t + DataByteSize() const override { + auto size = 0; + for (auto& chunk : chunks_) { + size += chunk->Size(); + } + return size; + } + + int64_t + chunk_row_nums(int64_t chunk_id) const { + return chunks_[chunk_id]->RowNums(); + } + + virtual SpanBase + Span(int64_t chunk_id) const = 0; + + // used for sequential access for search + virtual BufferView + GetBatchBuffer(int64_t start_offset, int64_t length) { + PanicInfo(ErrorCode::Unsupported, + "GetBatchBuffer only supported for VariableColumn"); + } + + virtual std::pair, FixedVector> + StringViews(int64_t chunk_id) const { + PanicInfo(ErrorCode::Unsupported, + "StringViews only supported for VariableColumn"); + } + + std::pair + GetChunkIDByOffset(int64_t offset) const { + int chunk_id = 0; + for (auto& chunk : chunks_) { + if (offset < chunk->RowNums()) { + break; + } + offset -= chunk->RowNums(); + chunk_id++; + } + return {chunk_id, offset}; + } + + int64_t + GetNumRowsUntilChunk(int64_t chunk_id) const { + return num_rows_until_chunk_[chunk_id]; + } + + protected: + bool nullable_{false}; + size_t num_rows_{0}; + std::vector num_rows_until_chunk_; + + private: + // void + // UpdateMetricWhenMmap(size_t mmaped_size) { + // UpdateMetricWhenMmap(mapping_type_, mmaped_size); + // } + + // void + // UpdateMetricWhenMmap(bool is_map_anonymous, size_t mapped_size) { + // if (mapping_type_ == MappingType::MAP_WITH_ANONYMOUS) { + // milvus::monitor::internal_mmap_allocated_space_bytes_anon.Observe( + // mapped_size); + // milvus::monitor::internal_mmap_in_used_space_bytes_anon.Increment( + // mapped_size); + // } else { + // milvus::monitor::internal_mmap_allocated_space_bytes_file.Observe( + // mapped_size); + // milvus::monitor::internal_mmap_in_used_space_bytes_file.Increment( + // mapped_size); + // } + // } + + // void + // UpdateMetricWhenMunmap(size_t mapped_size) { + // if (mapping_type_ == MappingType::MAP_WITH_ANONYMOUS) { + // milvus::monitor::internal_mmap_in_used_space_bytes_anon.Decrement( + // mapped_size); + // } else { + // milvus::monitor::internal_mmap_in_used_space_bytes_file.Decrement( + // mapped_size); + // } + // } + + private: + storage::MmapChunkManagerPtr mcm_ = nullptr; + + protected: + std::vector> chunks_; +}; + +class ChunkedColumn : public ChunkedColumnBase { + public: + // memory mode ctor + ChunkedColumn(const FieldMeta& field_meta) : ChunkedColumnBase(field_meta) { + } + + ChunkedColumn(ChunkedColumn&& column) noexcept + : ChunkedColumnBase(std::move(column)) { + } + + ChunkedColumn(std::vector> chunks) { + for (auto& chunk : chunks) { + AddChunk(chunk); + } + } + + ~ChunkedColumn() override = default; + + virtual SpanBase + Span(int64_t chunk_id) const override { + return std::dynamic_pointer_cast(chunks_[chunk_id]) + ->Span(); + } +}; + +// when mmap is used, size_, data_ and num_rows_ of ColumnBase are used. +class ChunkedSparseFloatColumn : public ChunkedColumnBase { + public: + // memory mode ctor + ChunkedSparseFloatColumn(const FieldMeta& field_meta) + : ChunkedColumnBase(field_meta) { + } + + ChunkedSparseFloatColumn(ChunkedSparseFloatColumn&& column) noexcept + : ChunkedColumnBase(std::move(column)), + dim_(column.dim_), + vec_(std::move(column.vec_)) { + } + + ChunkedSparseFloatColumn(std::vector> chunks) { + for (auto& chunk : chunks) { + AddChunk(chunk); + } + } + + ~ChunkedSparseFloatColumn() override = default; + + void + AddChunk(std::shared_ptr chunk) override { + num_rows_until_chunk_.push_back(num_rows_); + num_rows_ += chunk->RowNums(); + chunks_.push_back(chunk); + dim_ = std::max( + dim_, + std::dynamic_pointer_cast(chunk)->Dim()); + } + + // This is used to advice mmap prefetch, we don't currently support mmap for + // sparse float vector thus not implemented for now. + size_t + DataByteSize() const override { + PanicInfo(ErrorCode::Unsupported, + "ByteSize not supported for sparse float column"); + } + + SpanBase + Span(int64_t chunk_id) const override { + PanicInfo(ErrorCode::Unsupported, + "Span not supported for sparse float column"); + } + + int64_t + Dim() const { + return dim_; + } + + private: + int64_t dim_ = 0; + std::vector> vec_; +}; + +template +class ChunkedVariableColumn : public ChunkedColumnBase { + public: + using ViewType = + std::conditional_t, std::string_view, T>; + + // memory mode ctor + ChunkedVariableColumn(const FieldMeta& field_meta) + : ChunkedColumnBase(field_meta) { + } + + ChunkedVariableColumn(std::vector> chunks) { + for (auto& chunk : chunks) { + AddChunk(chunk); + } + } + + ChunkedVariableColumn(ChunkedVariableColumn&& column) noexcept + : ChunkedColumnBase(std::move(column)) { + } + + ~ChunkedVariableColumn() override = default; + + SpanBase + Span(int64_t chunk_id) const override { + PanicInfo(ErrorCode::NotImplemented, + "span() interface is not implemented for variable column"); + } + + std::pair, FixedVector> + StringViews(int64_t chunk_id) const override { + return std::dynamic_pointer_cast(chunks_[chunk_id]) + ->StringViews(); + } + + BufferView + GetBatchBuffer(int64_t start_offset, int64_t length) override { + if (start_offset < 0 || start_offset > num_rows_ || + start_offset + length > num_rows_) { + PanicInfo(ErrorCode::OutOfRange, "index out of range"); + } + + int chunk_num = chunks_.size(); + + auto [start_chunk_id, start_offset_in_chunk] = + GetChunkIDByOffset(start_offset); + BufferView buffer_view; + + std::vector elements; + for (; start_chunk_id < chunk_num && length > 0; ++start_chunk_id) { + int chunk_size = chunks_[start_chunk_id]->RowNums(); + int len = + std::min(int64_t(chunk_size - start_offset_in_chunk), length); + elements.push_back( + {chunks_[start_chunk_id]->Data(), + std::dynamic_pointer_cast(chunks_[start_chunk_id]) + ->Offsets(), + start_offset_in_chunk, + start_offset_in_chunk + len}); + + start_offset_in_chunk = 0; + length -= len; + } + + buffer_view.data_ = elements; + return buffer_view; + } + + ViewType + operator[](const int i) const { + if (i < 0 || i > num_rows_) { + PanicInfo(ErrorCode::OutOfRange, "index out of range"); + } + + auto [chunk_id, offset_in_chunk] = GetChunkIDByOffset(i); + auto data = chunks_[chunk_id]->Data(); + auto offsets = std::dynamic_pointer_cast(chunks_[chunk_id]) + ->Offsets(); + auto len = offsets[offset_in_chunk + 1] - offsets[offset_in_chunk]; + + return ViewType(data + offsets[offset_in_chunk], len); + } + + std::string_view + RawAt(const int i) const { + return std::string_view((*this)[i]); + } +}; + +class ChunkedArrayColumn : public ChunkedColumnBase { + public: + // memory mode ctor + ChunkedArrayColumn(const FieldMeta& field_meta) + : ChunkedColumnBase(field_meta) { + } + + ChunkedArrayColumn(ChunkedArrayColumn&& column) noexcept + : ChunkedColumnBase(std::move(column)) { + } + + ChunkedArrayColumn(std::vector> chunks) { + for (auto& chunk : chunks) { + AddChunk(chunk); + } + } + + ~ChunkedArrayColumn() override = default; + + SpanBase + Span(int64_t chunk_id) const override { + return std::dynamic_pointer_cast(chunks_[chunk_id])->Span(); + } + + ArrayView + operator[](const int i) const { + auto [chunk_id, offset_in_chunk] = GetChunkIDByOffset(i); + return std::dynamic_pointer_cast(chunks_[chunk_id]) + ->View(offset_in_chunk); + } + + ScalarArray + RawAt(const int i) const { + auto [chunk_id, offset_in_chunk] = GetChunkIDByOffset(i); + return std::dynamic_pointer_cast(chunks_[chunk_id]) + ->View(offset_in_chunk) + .output_data(); + } +}; +} // namespace milvus \ No newline at end of file diff --git a/internal/core/src/mmap/Column.h b/internal/core/src/mmap/Column.h index 7049c01f2e7b2..698097d30677e 100644 --- a/internal/core/src/mmap/Column.h +++ b/internal/core/src/mmap/Column.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include "common/Array.h" @@ -121,13 +122,27 @@ class ColumnBase { * */ public: - enum class MappingType { + virtual size_t + DataByteSize() const = 0; + + virtual const char* + MmappedData() const = 0; + + virtual void + AppendBatch(const FieldDataPtr data) = 0; + + virtual const char* + Data(int chunk_id = 0) const = 0; +}; +class SingleChunkColumnBase : public ColumnBase { + public: + enum MappingType { MAP_WITH_ANONYMOUS = 0, MAP_WITH_FILE = 1, MAP_WITH_MANAGER = 2, }; // MAP_WITH_ANONYMOUS ctor - ColumnBase(size_t reserve_rows, const FieldMeta& field_meta) + SingleChunkColumnBase(size_t reserve_rows, const FieldMeta& field_meta) : mapping_type_(MappingType::MAP_WITH_ANONYMOUS) { auto data_type = field_meta.get_data_type(); SetPaddingSize(data_type); @@ -161,11 +176,11 @@ class ColumnBase { // MAP_WITH_MANAGER ctor // reserve is number of bytes to allocate(without padding) - ColumnBase(size_t reserve, - const DataType& data_type, - storage::MmapChunkManagerPtr mcm, - storage::MmapChunkDescriptorPtr descriptor, - bool nullable) + SingleChunkColumnBase(size_t reserve, + const DataType& data_type, + storage::MmapChunkManagerPtr mcm, + storage::MmapChunkDescriptorPtr descriptor, + bool nullable) : mcm_(mcm), mmap_descriptor_(descriptor), num_rows_(0), @@ -193,7 +208,9 @@ class ColumnBase { // !!! The incoming file must have padding written at the end of the file. // Subclasses of variable length data type, if they used this constructor, // must set num_rows_ by themselves. - ColumnBase(const File& file, size_t size, const FieldMeta& field_meta) + SingleChunkColumnBase(const File& file, + size_t size, + const FieldMeta& field_meta) : nullable_(field_meta.is_nullable()), mapping_type_(MappingType::MAP_WITH_FILE) { auto data_type = field_meta.get_data_type(); @@ -229,7 +246,7 @@ class ColumnBase { UpdateMetricWhenMmap(size); } - virtual ~ColumnBase() { + virtual ~SingleChunkColumnBase() { if (data_ != nullptr) { size_t mapped_size = data_cap_size_ + padding_; if (mapping_type_ != MappingType::MAP_WITH_MANAGER) { @@ -246,17 +263,17 @@ class ColumnBase { } } - ColumnBase(ColumnBase&&) = delete; + SingleChunkColumnBase(ColumnBase&&) = delete; // Data() points at an addr that contains the elements virtual const char* - Data() const { + Data(int chunk_id = 0) const override { return data_; } // MmappedData() returns the mmaped address const char* - MmappedData() const { + MmappedData() const override { return data_; } @@ -481,28 +498,30 @@ class ColumnBase { storage::MmapChunkManagerPtr mcm_ = nullptr; }; -class Column : public ColumnBase { +class SingleChunkColumn : public SingleChunkColumnBase { public: // MAP_WITH_ANONYMOUS ctor - Column(size_t cap, const FieldMeta& field_meta) - : ColumnBase(cap, field_meta) { + SingleChunkColumn(size_t cap, const FieldMeta& field_meta) + : SingleChunkColumnBase(cap, field_meta) { } // MAP_WITH_FILE ctor - Column(const File& file, size_t size, const FieldMeta& field_meta) - : ColumnBase(file, size, field_meta) { + SingleChunkColumn(const File& file, + size_t size, + const FieldMeta& field_meta) + : SingleChunkColumnBase(file, size, field_meta) { } // MAP_WITH_MANAGER ctor - Column(size_t reserve, - const DataType& data_type, - storage::MmapChunkManagerPtr mcm, - storage::MmapChunkDescriptorPtr descriptor, - bool nullable) - : ColumnBase(reserve, data_type, mcm, descriptor, nullable) { + SingleChunkColumn(size_t reserve, + const DataType& data_type, + storage::MmapChunkManagerPtr mcm, + storage::MmapChunkDescriptorPtr descriptor, + bool nullable) + : SingleChunkColumnBase(reserve, data_type, mcm, descriptor, nullable) { } - ~Column() override = default; + ~SingleChunkColumn() override = default; SpanBase Span() const override { @@ -511,19 +530,18 @@ class Column : public ColumnBase { } }; -class SparseFloatColumn : public ColumnBase { +class SingleChunkSparseFloatColumn : public SingleChunkColumnBase { public: // MAP_WITH_ANONYMOUS ctor - SparseFloatColumn(const FieldMeta& field_meta) - : ColumnBase(/*reserve_rows= */ 0, field_meta) { + SingleChunkSparseFloatColumn(const FieldMeta& field_meta) + : SingleChunkColumnBase(0, field_meta) { } - // MAP_WITH_FILE ctor - SparseFloatColumn(const File& file, - size_t size, - const FieldMeta& field_meta, - std::vector&& indices = {}) - : ColumnBase(file, size, field_meta) { + SingleChunkSparseFloatColumn(const File& file, + size_t size, + const FieldMeta& field_meta, + std::vector&& indices = {}) + : SingleChunkColumnBase(file, size, field_meta) { AssertInfo(!indices.empty(), "SparseFloatColumn indices should not be empty."); num_rows_ = indices.size(); @@ -545,22 +563,18 @@ class SparseFloatColumn : public ColumnBase { dim_ = std::max(dim_, vec_.back().dim()); } } - // MAP_WITH_MANAGER ctor - SparseFloatColumn(storage::MmapChunkManagerPtr mcm, - storage::MmapChunkDescriptorPtr descriptor) - : ColumnBase(/*reserve= */ 0, - DataType::VECTOR_SPARSE_FLOAT, - mcm, - descriptor, - false) { + SingleChunkSparseFloatColumn(storage::MmapChunkManagerPtr mcm, + storage::MmapChunkDescriptorPtr descriptor) + : SingleChunkColumnBase( + 0, DataType::VECTOR_SPARSE_FLOAT, mcm, descriptor, false) { } - ~SparseFloatColumn() override = default; + ~SingleChunkSparseFloatColumn() override = default; // returned pointer points at a list of knowhere::sparse::SparseRow const char* - Data() const override { + Data(int chunk_id = 0) const override { return static_cast(static_cast(vec_.data())); } @@ -635,27 +649,29 @@ class SparseFloatColumn : public ColumnBase { }; template -class VariableColumn : public ColumnBase { +class SingleChunkVariableColumn : public SingleChunkColumnBase { public: using ViewType = std::conditional_t, std::string_view, T>; // MAP_WITH_ANONYMOUS ctor - VariableColumn(size_t reserve_rows, - const FieldMeta& field_meta, - size_t block_size) - : ColumnBase(reserve_rows, field_meta), block_size_(block_size) { + SingleChunkVariableColumn(size_t reserve_rows, + const FieldMeta& field_meta, + size_t block_size) + : SingleChunkColumnBase(reserve_rows, field_meta), + block_size_(block_size) { } // MAP_WITH_FILE ctor - VariableColumn(const File& file, - size_t size, - const FieldMeta& field_meta, - size_t block_size) - : ColumnBase(file, size, field_meta), block_size_(block_size) { + SingleChunkVariableColumn(const File& file, + size_t size, + const FieldMeta& field_meta, + size_t block_size) + : SingleChunkColumnBase(file, size, field_meta), + block_size_(block_size) { } - ~VariableColumn() override = default; + ~SingleChunkVariableColumn() override = default; SpanBase Span() const override { @@ -705,7 +721,9 @@ class VariableColumn : public ColumnBase { pos += sizeof(uint32_t) + size; } - return BufferView{pos, data_size_ - (pos - data_)}; + BufferView res; + res.data_ = std::pair{pos, 0}; + return res; } ViewType @@ -809,21 +827,23 @@ class VariableColumn : public ColumnBase { std::vector indices_{}; }; -class ArrayColumn : public ColumnBase { +class SingleChunkArrayColumn : public SingleChunkColumnBase { public: // MAP_WITH_ANONYMOUS ctor - ArrayColumn(size_t reserve_rows, const FieldMeta& field_meta) - : ColumnBase(reserve_rows, field_meta), + SingleChunkArrayColumn(size_t reserve_rows, const FieldMeta& field_meta) + : SingleChunkColumnBase(reserve_rows, field_meta), element_type_(field_meta.get_element_type()) { } // MAP_WITH_FILE ctor - ArrayColumn(const File& file, size_t size, const FieldMeta& field_meta) - : ColumnBase(file, size, field_meta), + SingleChunkArrayColumn(const File& file, + size_t size, + const FieldMeta& field_meta) + : SingleChunkColumnBase(file, size, field_meta), element_type_(field_meta.get_element_type()) { } - ~ArrayColumn() override = default; + ~SingleChunkArrayColumn() override = default; SpanBase Span() const override { @@ -853,12 +873,13 @@ class ArrayColumn : public ColumnBase { indices_.emplace_back(data_size_); element_indices_.emplace_back(array.get_offsets()); if (nullable_) { - return ColumnBase::Append(static_cast(array.data()), - valid_data, - array.byte_size()); + return SingleChunkColumnBase::Append( + static_cast(array.data()), + valid_data, + array.byte_size()); } - ColumnBase::Append(static_cast(array.data()), - array.byte_size()); + SingleChunkColumnBase::Append(static_cast(array.data()), + array.byte_size()); } void diff --git a/internal/core/src/mmap/Types.h b/internal/core/src/mmap/Types.h index c2f8c1a9e45f2..77c6ec3ee88bc 100644 --- a/internal/core/src/mmap/Types.h +++ b/internal/core/src/mmap/Types.h @@ -19,22 +19,30 @@ #include #include #include +#include "arrow/record_batch.h" #include "common/FieldData.h" +#include "storage/DataCodec.h" namespace milvus { struct FieldDataInfo { FieldDataInfo() { channel = std::make_shared(); + arrow_reader_channel = std::make_shared(); } FieldDataInfo(int64_t field_id, size_t row_count, - std::string mmap_dir_path = "") + std::string mmap_dir_path = "", + bool growing = true) : field_id(field_id), row_count(row_count), mmap_dir_path(std::move(mmap_dir_path)) { - channel = std::make_shared(); + if (growing) { + channel = std::make_shared(); + } else { + arrow_reader_channel = std::make_shared(); + } } FieldDataInfo(int64_t field_id, @@ -66,6 +74,18 @@ struct FieldDataInfo { channel->close(); } + FieldDataInfo( + int64_t field_id, + size_t row_count, + const std::vector>& batch) + : field_id(field_id), row_count(row_count) { + arrow_reader_channel = std::make_shared(); + for (auto& data : batch) { + arrow_reader_channel->push(data); + } + arrow_reader_channel->close(); + } + FieldDataInfo(int64_t field_id, size_t row_count, std::string mmap_dir_path, @@ -84,5 +104,6 @@ struct FieldDataInfo { size_t row_count; std::string mmap_dir_path; FieldDataChannelPtr channel; + std::shared_ptr arrow_reader_channel; }; } // namespace milvus diff --git a/internal/core/src/query/SearchOnSealed.cpp b/internal/core/src/query/SearchOnSealed.cpp index ba0554ca872f7..c519f480ca696 100644 --- a/internal/core/src/query/SearchOnSealed.cpp +++ b/internal/core/src/query/SearchOnSealed.cpp @@ -9,11 +9,15 @@ // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express // or implied. See the License for the specific language governing permissions and limitations under the License +#include #include #include +#include "bitset/detail/element_wise.h" +#include "common/BitsetView.h" #include "common/QueryInfo.h" #include "common/Types.h" +#include "mmap/Column.h" #include "query/SearchBruteForce.h" #include "query/SearchOnSealed.h" #include "query/helper.h" @@ -73,6 +77,95 @@ SearchOnSealedIndex(const Schema& schema, search_result.unity_topK_ = topK; } +void +SearchOnSealed(const Schema& schema, + std::shared_ptr column, + const SearchInfo& search_info, + const void* query_data, + int64_t num_queries, + int64_t row_count, + const BitsetView& bitview, + SearchResult& result) { + auto field_id = search_info.field_id_; + auto& field = schema[field_id]; + + // TODO(SPARSE): see todo in PlanImpl.h::PlaceHolder. + auto dim = field.get_data_type() == DataType::VECTOR_SPARSE_FLOAT + ? 0 + : field.get_dim(); + + query::dataset::SearchDataset dataset{search_info.metric_type_, + num_queries, + search_info.topk_, + search_info.round_decimal_, + dim, + query_data}; + + auto data_type = field.get_data_type(); + CheckBruteForceSearchParam(field, search_info); + auto num_chunk = column->num_chunks(); + + SubSearchResult final_qr(num_queries, + search_info.topk_, + search_info.metric_type_, + search_info.round_decimal_); + + auto offset = 0; + for (int i = 0; i < num_chunk; ++i) { + auto vec_data = column->Data(i); + auto chunk_size = column->chunk_row_nums(i); + const uint8_t* bitset_ptr = nullptr; + bool aligned = false; + if ((offset & 0x7) == 0) { + bitset_ptr = bitview.data() + (offset >> 3); + aligned = true; + } else { + char* bitset_data = new char[(chunk_size + 7) / 8]; + std::fill(bitset_data, bitset_data + sizeof(bitset_data), 0); + bitset::detail::ElementWiseBitsetPolicy::op_copy( + reinterpret_cast(bitview.data()), + offset, + bitset_data, + 0, + chunk_size); + bitset_ptr = reinterpret_cast(bitset_data); + } + offset += chunk_size; + BitsetView bitset_view(bitset_ptr, chunk_size); + + if (search_info.group_by_field_id_.has_value()) { + auto sub_qr = BruteForceSearchIterators(dataset, + vec_data, + row_count, + search_info, + bitset_view, + data_type); + final_qr.merge(sub_qr); + } else { + auto sub_qr = BruteForceSearch(dataset, + vec_data, + row_count, + search_info, + bitset_view, + data_type); + final_qr.merge(sub_qr); + } + + if (!aligned) { + delete[] bitset_ptr; + } + } + if (search_info.group_by_field_id_.has_value()) { + result.AssembleChunkVectorIterators( + num_queries, 1, -1, final_qr.chunk_iterators()); + } else { + result.distances_ = std::move(final_qr.mutable_distances()); + result.seg_offsets_ = std::move(final_qr.mutable_seg_offsets()); + } + result.unity_topK_ = dataset.topk; + result.total_nq_ = dataset.num_queries; +} + void SearchOnSealed(const Schema& schema, const void* vec_data, diff --git a/internal/core/src/query/SearchOnSealed.h b/internal/core/src/query/SearchOnSealed.h index 73528c4b60fb5..a9261c793f88e 100644 --- a/internal/core/src/query/SearchOnSealed.h +++ b/internal/core/src/query/SearchOnSealed.h @@ -27,6 +27,16 @@ SearchOnSealedIndex(const Schema& schema, const BitsetView& view, SearchResult& search_result); +void +SearchOnSealed(const Schema& schema, + std::shared_ptr column, + const SearchInfo& search_info, + const void* query_data, + int64_t num_queries, + int64_t row_count, + const BitsetView& bitset, + SearchResult& result); + void SearchOnSealed(const Schema& schema, const void* vec_data, diff --git a/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp b/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp new file mode 100644 index 0000000000000..a95ae1ecd1665 --- /dev/null +++ b/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp @@ -0,0 +1,2222 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + +#include "ChunkedSegmentSealedImpl.h" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Utils.h" +#include "Types.h" +#include "common/Array.h" +#include "common/Chunk.h" +#include "common/ChunkWriter.h" +#include "common/Consts.h" +#include "common/EasyAssert.h" +#include "common/FieldData.h" +#include "common/FieldMeta.h" +#include "common/File.h" +#include "common/Json.h" +#include "common/LoadInfo.h" +#include "common/Tracer.h" +#include "common/Types.h" +#include "google/protobuf/message_lite.h" +#include "index/VectorMemIndex.h" +#include "mmap/ChunkedColumn.h" +#include "mmap/Utils.h" +#include "mmap/Types.h" +#include "log/Log.h" +#include "pb/schema.pb.h" +#include "query/ScalarIndex.h" +#include "query/SearchBruteForce.h" +#include "query/SearchOnSealed.h" +#include "storage/DataCodec.h" +#include "storage/Util.h" +#include "storage/ThreadPools.h" +#include "storage/MmapManager.h" + +namespace milvus::segcore { + +static inline void +set_bit(BitsetType& bitset, FieldId field_id, bool flag = true) { + auto pos = field_id.get() - START_USER_FIELDID; + AssertInfo(pos >= 0, "invalid field id"); + bitset[pos] = flag; +} + +static inline bool +get_bit(const BitsetType& bitset, FieldId field_id) { + auto pos = field_id.get() - START_USER_FIELDID; + AssertInfo(pos >= 0, "invalid field id"); + + return bitset[pos]; +} + +void +ChunkedSegmentSealedImpl::LoadIndex(const LoadIndexInfo& info) { + // print(info); + // NOTE: lock only when data is ready to avoid starvation + auto field_id = FieldId(info.field_id); + auto& field_meta = schema_->operator[](field_id); + + if (field_meta.is_vector()) { + LoadVecIndex(info); + } else { + LoadScalarIndex(info); + } +} + +void +ChunkedSegmentSealedImpl::LoadVecIndex(const LoadIndexInfo& info) { + // NOTE: lock only when data is ready to avoid starvation + auto field_id = FieldId(info.field_id); + auto& field_meta = schema_->operator[](field_id); + + AssertInfo(info.index_params.count("metric_type"), + "Can't get metric_type in index_params"); + auto metric_type = info.index_params.at("metric_type"); + auto row_count = info.index->Count(); + AssertInfo(row_count > 0, "Index count is 0"); + + std::unique_lock lck(mutex_); + AssertInfo( + !get_bit(index_ready_bitset_, field_id), + "vector index has been exist at " + std::to_string(field_id.get())); + if (num_rows_.has_value()) { + AssertInfo(num_rows_.value() == row_count, + "field (" + std::to_string(field_id.get()) + + ") data has different row count (" + + std::to_string(row_count) + + ") than other column's row count (" + + std::to_string(num_rows_.value()) + ")"); + } + LOG_INFO( + "Before setting field_bit for field index, fieldID:{}. segmentID:{}, ", + info.field_id, + id_); + if (get_bit(field_data_ready_bitset_, field_id)) { + fields_.erase(field_id); + set_bit(field_data_ready_bitset_, field_id, false); + } else if (get_bit(binlog_index_bitset_, field_id)) { + set_bit(binlog_index_bitset_, field_id, false); + vector_indexings_.drop_field_indexing(field_id); + } + update_row_count(row_count); + vector_indexings_.append_field_indexing( + field_id, + metric_type, + std::move(const_cast(info).index)); + set_bit(index_ready_bitset_, field_id, true); + LOG_INFO("Has load vec index done, fieldID:{}. segmentID:{}, ", + info.field_id, + id_); +} + +void +ChunkedSegmentSealedImpl::WarmupChunkCache(const FieldId field_id, + bool mmap_enabled) { + auto& field_meta = schema_->operator[](field_id); + AssertInfo(field_meta.is_vector(), "vector field is not vector type"); + + if (!get_bit(index_ready_bitset_, field_id) && + !get_bit(binlog_index_bitset_, field_id)) { + return; + } + + AssertInfo(vector_indexings_.is_ready(field_id), + "vector index is not ready"); + auto field_indexing = vector_indexings_.get_field_indexing(field_id); + auto vec_index = + dynamic_cast(field_indexing->indexing_.get()); + AssertInfo(vec_index, "invalid vector indexing"); + + auto it = field_data_info_.field_infos.find(field_id.get()); + AssertInfo(it != field_data_info_.field_infos.end(), + "cannot find binlog file for field: {}, seg: {}", + field_id.get(), + id_); + auto field_info = it->second; + + auto cc = storage::MmapManager::GetInstance().GetChunkCache(); + for (const auto& data_path : field_info.insert_files) { + auto column = cc->Read(data_path, mmap_descriptor_, field_meta); + } +} + +void +ChunkedSegmentSealedImpl::LoadScalarIndex(const LoadIndexInfo& info) { + // NOTE: lock only when data is ready to avoid starvation + auto field_id = FieldId(info.field_id); + auto& field_meta = schema_->operator[](field_id); + + auto row_count = info.index->Count(); + AssertInfo(row_count > 0, "Index count is 0"); + + std::unique_lock lck(mutex_); + AssertInfo( + !get_bit(index_ready_bitset_, field_id), + "scalar index has been exist at " + std::to_string(field_id.get())); + if (num_rows_.has_value()) { + AssertInfo(num_rows_.value() == row_count, + "field (" + std::to_string(field_id.get()) + + ") data has different row count (" + + std::to_string(row_count) + + ") than other column's row count (" + + std::to_string(num_rows_.value()) + ")"); + } + + scalar_indexings_[field_id] = + std::move(const_cast(info).index); + // reverse pk from scalar index and set pks to offset + if (schema_->get_primary_field_id() == field_id) { + AssertInfo(field_id.get() != -1, "Primary key is -1"); + switch (field_meta.get_data_type()) { + case DataType::INT64: { + auto int64_index = dynamic_cast*>( + scalar_indexings_[field_id].get()); + if (!is_sorted_by_pk_ && insert_record_.empty_pks() && + int64_index->HasRawData()) { + for (int i = 0; i < row_count; ++i) { + insert_record_.insert_pk(int64_index->Reverse_Lookup(i), + i); + } + insert_record_.seal_pks(); + } + break; + } + case DataType::VARCHAR: { + auto string_index = + dynamic_cast*>( + scalar_indexings_[field_id].get()); + if (!is_sorted_by_pk_ && insert_record_.empty_pks() && + string_index->HasRawData()) { + for (int i = 0; i < row_count; ++i) { + insert_record_.insert_pk( + string_index->Reverse_Lookup(i), i); + } + insert_record_.seal_pks(); + } + break; + } + default: { + PanicInfo(DataTypeInvalid, + fmt::format("unsupported primary key type {}", + field_meta.get_data_type())); + } + } + } + + set_bit(index_ready_bitset_, field_id, true); + update_row_count(row_count); + // release field column if the index contains raw data + if (scalar_indexings_[field_id]->HasRawData() && + get_bit(field_data_ready_bitset_, field_id)) { + fields_.erase(field_id); + set_bit(field_data_ready_bitset_, field_id, false); + } + + lck.unlock(); +} + +void +ChunkedSegmentSealedImpl::LoadFieldData(const LoadFieldDataInfo& load_info) { + // NOTE: lock only when data is ready to avoid starvation + // only one field for now, parallel load field data in golang + size_t num_rows = storage::GetNumRowsForLoadInfo(load_info); + + for (auto& [id, info] : load_info.field_infos) { + AssertInfo(info.row_count > 0, "The row count of field data is 0"); + + auto field_id = FieldId(id); + auto insert_files = info.insert_files; + std::sort(insert_files.begin(), + insert_files.end(), + [](const std::string& a, const std::string& b) { + return std::stol(a.substr(a.find_last_of('/') + 1)) < + std::stol(b.substr(b.find_last_of('/') + 1)); + }); + + auto field_data_info = FieldDataInfo( + field_id.get(), num_rows, load_info.mmap_dir_path, false); + LOG_INFO("segment {} loads field {} with num_rows {}", + this->get_segment_id(), + field_id.get(), + num_rows); + + auto parallel_degree = static_cast( + DEFAULT_FIELD_MAX_MEMORY_LIMIT / FILE_SLICE_SIZE); + field_data_info.arrow_reader_channel->set_capacity(parallel_degree * 2); + auto& pool = + ThreadPools::GetThreadPool(milvus::ThreadPoolPriority::MIDDLE); + pool.Submit(LoadArrowReaderFromRemote, + insert_files, + field_data_info.arrow_reader_channel); + + LOG_INFO("segment {} submits load field {} task to thread pool", + this->get_segment_id(), + field_id.get()); + bool use_mmap = false; + if (!info.enable_mmap || + SystemProperty::Instance().IsSystem(field_id)) { + LoadFieldData(field_id, field_data_info); + } else { + MapFieldData(field_id, field_data_info); + use_mmap = true; + } + LOG_INFO("segment {} loads field {} mmap {} done", + this->get_segment_id(), + field_id.get(), + use_mmap); + } +} + +void +ChunkedSegmentSealedImpl::LoadFieldData(FieldId field_id, FieldDataInfo& data) { + auto num_rows = data.row_count; + if (SystemProperty::Instance().IsSystem(field_id)) { + auto system_field_type = + SystemProperty::Instance().GetSystemFieldType(field_id); + if (system_field_type == SystemFieldType::Timestamp) { + std::vector timestamps(num_rows); + int64_t offset = 0; + FieldMeta field_meta( + FieldName(""), FieldId(0), DataType::INT64, false); + std::shared_ptr r; + while (data.arrow_reader_channel->pop(r)) { + auto chunk = std::dynamic_pointer_cast( + create_chunk(field_meta, 1, r->reader)); + std::copy_n(static_cast(chunk->Span().data()), + chunk->Span().row_count(), + timestamps.data() + offset); + offset += chunk->Span().row_count(); + } + + // for (auto& data : field_data) { + // int64_t row_count = data->get_num_rows(); + // std::copy_n(static_cast(data->Data()), + // row_count, + // timestamps.data() + offset); + // offset += row_count; + // } + + TimestampIndex index; + auto min_slice_length = num_rows < 4096 ? 1 : 4096; + auto meta = GenerateFakeSlices( + timestamps.data(), num_rows, min_slice_length); + index.set_length_meta(std::move(meta)); + // todo ::opt to avoid copy timestamps from field data + index.build_with(timestamps.data(), num_rows); + + // use special index + std::unique_lock lck(mutex_); + AssertInfo(insert_record_.timestamps_.empty(), "already exists"); + insert_record_.timestamps_.set_data_raw( + 0, timestamps.data(), timestamps.size()); + insert_record_.timestamp_index_ = std::move(index); + AssertInfo(insert_record_.timestamps_.num_chunk() == 1, + "num chunk not equal to 1 for sealed segment"); + stats_.mem_size += sizeof(Timestamp) * data.row_count; + } else { + AssertInfo(system_field_type == SystemFieldType::RowId, + "System field type of id column is not RowId"); + // Consume rowid field data but not really load it + // storage::CollectFieldDataChannel(data.arrow_reader_channel); + std::shared_ptr r; + while (data.arrow_reader_channel->pop(r)) { + } + } + ++system_ready_count_; + } else { + // prepare data + auto& field_meta = (*schema_)[field_id]; + auto data_type = field_meta.get_data_type(); + + // Don't allow raw data and index exist at the same time + // AssertInfo(!get_bit(index_ready_bitset_, field_id), + // "field data can't be loaded when indexing exists"); + auto get_block_size = [&]() -> size_t { + return schema_->get_primary_field_id() == field_id + ? DEFAULT_PK_VRCOL_BLOCK_SIZE + : DEFAULT_MEM_VRCOL_BLOCK_SIZE; + }; + + std::shared_ptr column{}; + if (IsVariableDataType(data_type)) { + int64_t field_data_size = 0; + switch (data_type) { + case milvus::DataType::STRING: + case milvus::DataType::VARCHAR: { + auto var_column = + std::make_shared>( + field_meta); + std::shared_ptr r; + while (data.arrow_reader_channel->pop(r)) { + auto chunk = create_chunk(field_meta, 1, r->reader); + var_column->AddChunk(chunk); + } + // var_column->Seal(); + field_data_size = var_column->DataByteSize(); + stats_.mem_size += var_column->DataByteSize(); + LoadStringSkipIndex(field_id, 0, *var_column); + column = std::move(var_column); + break; + } + case milvus::DataType::JSON: { + auto var_column = + std::make_shared>( + field_meta); + std::shared_ptr r; + while (data.arrow_reader_channel->pop(r)) { + auto chunk = create_chunk(field_meta, 1, r->reader); + var_column->AddChunk(chunk); + } + // var_column->Seal(); + stats_.mem_size += var_column->DataByteSize(); + field_data_size = var_column->DataByteSize(); + column = std::move(var_column); + break; + } + case milvus::DataType::ARRAY: { + auto var_column = + std::make_shared(field_meta); + std::shared_ptr r; + while (data.arrow_reader_channel->pop(r)) { + // for (auto i = 0; i < field_data->get_num_rows(); i++) { + // auto rawValue = field_data->RawValue(i); + // auto array = + // static_cast(rawValue); + // if (field_data->IsNullable()) { + // var_column->Append(*array, + // field_data->is_valid(i)); + // } else { + // var_column->Append(*array); + // } + + // // we stores the offset for each array element, so there is a additional uint64_t for each array element + // field_data_size = + // array->byte_size() + sizeof(uint64_t); + // stats_.mem_size += + // array->byte_size() + sizeof(uint64_t); + // } + + auto chunk = create_chunk(field_meta, 1, r->reader); + var_column->AddChunk(chunk); + } + // var_column->Seal(); + column = std::move(var_column); + break; + } + case milvus::DataType::VECTOR_SPARSE_FLOAT: { + auto col = + std::make_shared(field_meta); + std::shared_ptr r; + while (data.arrow_reader_channel->pop(r)) { + auto chunk = create_chunk(field_meta, 1, r->reader); + col->AddChunk(chunk); + } + column = std::move(col); + break; + } + default: { + PanicInfo(DataTypeInvalid, + fmt::format("unsupported data type", data_type)); + } + } + + // update average row data size + SegmentInternalInterface::set_field_avg_size( + field_id, num_rows, field_data_size); + } else { + column = std::make_shared(field_meta); + std::shared_ptr r; + while (data.arrow_reader_channel->pop(r)) { + auto chunk = + create_chunk(field_meta, + IsVectorDataType(field_meta.get_data_type()) + ? field_meta.get_dim() + : 1, + r->reader); + // column->AppendBatch(field_data); + // stats_.mem_size += field_data->Size(); + column->AddChunk(chunk); + } + + auto num_chunk = column->num_chunks(); + for (int i = 0; i < num_chunk; ++i) { + LoadPrimitiveSkipIndex(field_id, + i, + data_type, + column->Span(i).data(), + column->Span(i).valid_data(), + column->Span(i).row_count()); + } + } + + AssertInfo(column->NumRows() == num_rows, + fmt::format("data lost while loading column {}: loaded " + "num rows {} but expected {}", + data.field_id, + column->NumRows(), + num_rows)); + + { + std::unique_lock lck(mutex_); + fields_.emplace(field_id, column); + } + + // set pks to offset + if (schema_->get_primary_field_id() == field_id && !is_sorted_by_pk_) { + AssertInfo(field_id.get() != -1, "Primary key is -1"); + AssertInfo(insert_record_.empty_pks(), "already exists"); + insert_record_.insert_pks(data_type, column); + insert_record_.seal_pks(); + } + + bool use_temp_index = false; + { + // update num_rows to build temperate binlog index + std::unique_lock lck(mutex_); + update_row_count(num_rows); + } + + if (generate_interim_index(field_id)) { + std::unique_lock lck(mutex_); + fields_.erase(field_id); + set_bit(field_data_ready_bitset_, field_id, false); + use_temp_index = true; + } + + if (!use_temp_index) { + std::unique_lock lck(mutex_); + set_bit(field_data_ready_bitset_, field_id, true); + } + } + { + std::unique_lock lck(mutex_); + update_row_count(num_rows); + } +} + +void +ChunkedSegmentSealedImpl::MapFieldData(const FieldId field_id, + FieldDataInfo& data) { + auto filepath = std::filesystem::path(data.mmap_dir_path) / + std::to_string(get_segment_id()) / + std::to_string(field_id.get()); + auto dir = filepath.parent_path(); + std::filesystem::create_directories(dir); + + auto file = File::Open(filepath.string(), O_CREAT | O_TRUNC | O_RDWR); + + auto& field_meta = (*schema_)[field_id]; + auto data_type = field_meta.get_data_type(); + + // write the field data to disk + uint64_t total_written = 0; + std::vector indices{}; + std::vector> element_indices{}; + // FixedVector valid_data{}; + std::shared_ptr r; + + size_t file_offset = 0; + std::vector> chunks; + while (data.arrow_reader_channel->pop(r)) { + // WriteFieldData(file, + // data_type, + // field_data, + // total_written, + // indices, + // element_indices, + // valid_data); + auto chunk = create_chunk(field_meta, + IsVectorDataType(field_meta.get_data_type()) + ? field_meta.get_dim() + : 1, + file, + file_offset, + r->reader); + file_offset += chunk->Size(); + chunks.push_back(chunk); + } + // WriteFieldPadding(file, data_type, total_written); + std::shared_ptr column{}; + auto num_rows = data.row_count; + if (IsVariableDataType(data_type)) { + switch (data_type) { + case milvus::DataType::STRING: + case milvus::DataType::VARCHAR: { + // auto var_column = std::make_shared>( + // file, + // total_written, + // field_meta, + // DEFAULT_MMAP_VRCOL_BLOCK_SIZE); + auto var_column = + std::make_shared>( + chunks); + // var_column->Seal(std::move(indices)); + column = std::move(var_column); + break; + } + case milvus::DataType::JSON: { + auto var_column = + std::make_shared>( + chunks); + // var_column->Seal(std::move(indices)); + column = std::move(var_column); + break; + } + case milvus::DataType::ARRAY: { + auto arr_column = std::make_shared(chunks); + // arr_column->Seal(std::move(indices), + // std::move(element_indices)); + column = std::move(arr_column); + break; + } + case milvus::DataType::VECTOR_SPARSE_FLOAT: { + auto sparse_column = + std::make_shared(chunks); + // sparse_column->Seal(std::move(indices)); + column = std::move(sparse_column); + break; + } + default: { + PanicInfo(DataTypeInvalid, + fmt::format("unsupported data type {}", data_type)); + } + } + } else { + column = std::make_shared(chunks); + } + + // column->SetValidData(std::move(valid_data)); + + { + std::unique_lock lck(mutex_); + fields_.emplace(field_id, column); + mmap_fields_.insert(field_id); + } + + auto ok = unlink(filepath.c_str()); + AssertInfo(ok == 0, + fmt::format("failed to unlink mmap data file {}, err: {}", + filepath.c_str(), + strerror(errno))); + + // set pks to offset + if (schema_->get_primary_field_id() == field_id && !is_sorted_by_pk_) { + AssertInfo(field_id.get() != -1, "Primary key is -1"); + AssertInfo(insert_record_.empty_pks(), "already exists"); + insert_record_.insert_pks(data_type, column); + insert_record_.seal_pks(); + } + + std::unique_lock lck(mutex_); + set_bit(field_data_ready_bitset_, field_id, true); +} + +void +ChunkedSegmentSealedImpl::LoadDeletedRecord(const LoadDeletedRecordInfo& info) { + AssertInfo(info.row_count > 0, "The row count of deleted record is 0"); + AssertInfo(info.primary_keys, "Deleted primary keys is null"); + AssertInfo(info.timestamps, "Deleted timestamps is null"); + // step 1: get pks and timestamps + auto field_id = schema_->get_primary_field_id().value_or(FieldId(-1)); + AssertInfo(field_id.get() != -1, "Primary key is -1"); + auto& field_meta = schema_->operator[](field_id); + int64_t size = info.row_count; + std::vector pks(size); + ParsePksFromIDs(pks, field_meta.get_data_type(), *info.primary_keys); + auto timestamps = reinterpret_cast(info.timestamps); + + std::vector> ordering(size); + for (int i = 0; i < size; i++) { + ordering[i] = std::make_tuple(timestamps[i], pks[i]); + } + + if (!insert_record_.empty_pks()) { + auto end = std::remove_if( + ordering.begin(), + ordering.end(), + [&](const std::tuple& record) { + return !insert_record_.contain(std::get<1>(record)); + }); + size = end - ordering.begin(); + ordering.resize(size); + } + + // all record filtered + if (size == 0) { + return; + } + + std::sort(ordering.begin(), ordering.end()); + std::vector sort_pks(size); + std::vector sort_timestamps(size); + + for (int i = 0; i < size; i++) { + auto [t, pk] = ordering[i]; + sort_timestamps[i] = t; + sort_pks[i] = pk; + } + + deleted_record_.push(sort_pks, sort_timestamps.data()); +} + +void +ChunkedSegmentSealedImpl::AddFieldDataInfoForSealed( + const LoadFieldDataInfo& field_data_info) { + // copy assignment + field_data_info_ = field_data_info; +} + +// internal API: support scalar index only +int64_t +ChunkedSegmentSealedImpl::num_chunk_index(FieldId field_id) const { + auto& field_meta = schema_->operator[](field_id); + if (field_meta.is_vector()) { + return int64_t(vector_indexings_.is_ready(field_id)); + } + + return scalar_indexings_.count(field_id); +} + +int64_t +ChunkedSegmentSealedImpl::num_chunk_data(FieldId field_id) const { + return fields_.at(field_id)->num_chunks(); +} + +int64_t +ChunkedSegmentSealedImpl::num_chunk(FieldId field_id) const { + return get_bit(field_data_ready_bitset_, field_id) + ? fields_.find(field_id) != fields_.end() + ? fields_.at(field_id)->num_chunks() + : 1 + : 0; +} + +int64_t +ChunkedSegmentSealedImpl::size_per_chunk() const { + return get_row_count(); +} + +int64_t +ChunkedSegmentSealedImpl::chunk_size(FieldId field_id, int64_t chunk_id) const { + return get_bit(field_data_ready_bitset_, field_id) + ? fields_.find(field_id) != fields_.end() + ? fields_.at(field_id)->chunk_row_nums(chunk_id) + : num_rows_.value() + : 0; +} + +std::pair +ChunkedSegmentSealedImpl::get_chunk_by_offset(FieldId field_id, + int64_t offset) const { + return fields_.at(field_id)->GetChunkIDByOffset(offset); +} + +int64_t +ChunkedSegmentSealedImpl::num_rows_until_chunk(FieldId field_id, + int64_t chunk_id) const { + return fields_.at(field_id)->GetNumRowsUntilChunk(chunk_id); +} + +std::pair> +ChunkedSegmentSealedImpl::get_chunk_buffer(FieldId field_id, + int64_t chunk_id, + int64_t start_offset, + int64_t length) const { + std::shared_lock lck(mutex_); + AssertInfo(get_bit(field_data_ready_bitset_, field_id), + "Can't get bitset element at " + std::to_string(field_id.get())); + auto& field_meta = schema_->operator[](field_id); + if (auto it = fields_.find(field_id); it != fields_.end()) { + auto& field_data = it->second; + FixedVector valid_data; + if (field_data->IsNullable()) { + valid_data.reserve(length); + for (int i = 0; i < length; i++) { + valid_data.push_back(field_data->IsValid(start_offset + i)); + } + } + return std::make_pair(field_data->GetBatchBuffer(start_offset, length), + valid_data); + } + PanicInfo(ErrorCode::UnexpectedError, + "get_chunk_buffer only used for variable column field"); +} + +bool +ChunkedSegmentSealedImpl::is_mmap_field(FieldId field_id) const { + std::shared_lock lck(mutex_); + return mmap_fields_.find(field_id) != mmap_fields_.end(); +} + +SpanBase +ChunkedSegmentSealedImpl::chunk_data_impl(FieldId field_id, + int64_t chunk_id) const { + std::shared_lock lck(mutex_); + AssertInfo(get_bit(field_data_ready_bitset_, field_id), + "Can't get bitset element at " + std::to_string(field_id.get())); + auto& field_meta = schema_->operator[](field_id); + if (auto it = fields_.find(field_id); it != fields_.end()) { + auto& field_data = it->second; + return field_data->Span(chunk_id); + } + auto field_data = insert_record_.get_data_base(field_id); + AssertInfo(field_data->num_chunk() == 1, + "num chunk not equal to 1 for sealed segment"); + // system field + return field_data->get_span_base(0); +} + +std::pair, FixedVector> +ChunkedSegmentSealedImpl::chunk_view_impl(FieldId field_id, + int64_t chunk_id) const { + std::shared_lock lck(mutex_); + AssertInfo(get_bit(field_data_ready_bitset_, field_id), + "Can't get bitset element at " + std::to_string(field_id.get())); + auto& field_meta = schema_->operator[](field_id); + if (auto it = fields_.find(field_id); it != fields_.end()) { + auto& field_data = it->second; + return field_data->StringViews(chunk_id); + } + PanicInfo(ErrorCode::UnexpectedError, + "chunk_view_impl only used for variable column field "); +} + +const index::IndexBase* +ChunkedSegmentSealedImpl::chunk_index_impl(FieldId field_id, + int64_t chunk_id) const { + AssertInfo(scalar_indexings_.find(field_id) != scalar_indexings_.end(), + "Cannot find scalar_indexing with field_id: " + + std::to_string(field_id.get())); + auto ptr = scalar_indexings_.at(field_id).get(); + return ptr; +} + +int64_t +ChunkedSegmentSealedImpl::get_row_count() const { + std::shared_lock lck(mutex_); + return num_rows_.value_or(0); +} + +int64_t +ChunkedSegmentSealedImpl::get_deleted_count() const { + std::shared_lock lck(mutex_); + return deleted_record_.size(); +} + +const Schema& +ChunkedSegmentSealedImpl::get_schema() const { + return *schema_; +} + +void +ChunkedSegmentSealedImpl::mask_with_delete(BitsetTypeView& bitset, + int64_t ins_barrier, + Timestamp timestamp) const { + auto del_barrier = get_barrier(get_deleted_record(), timestamp); + if (del_barrier == 0) { + return; + } + + auto bitmap_holder = std::shared_ptr(); + + if (!is_sorted_by_pk_) { + bitmap_holder = get_deleted_bitmap(del_barrier, + ins_barrier, + deleted_record_, + insert_record_, + timestamp); + } else { + bitmap_holder = get_deleted_bitmap_s( + del_barrier, ins_barrier, deleted_record_, timestamp); + } + if (!bitmap_holder || !bitmap_holder->bitmap_ptr) { + return; + } + auto& delete_bitset = *bitmap_holder->bitmap_ptr; + AssertInfo( + delete_bitset.size() == bitset.size(), + fmt::format( + "Deleted bitmap size:{} not equal to filtered bitmap size:{}", + delete_bitset.size(), + bitset.size())); + bitset |= delete_bitset; +} + +void +ChunkedSegmentSealedImpl::vector_search(SearchInfo& search_info, + const void* query_data, + int64_t query_count, + Timestamp timestamp, + const BitsetView& bitset, + SearchResult& output) const { + AssertInfo(is_system_field_ready(), "System field is not ready"); + auto field_id = search_info.field_id_; + auto& field_meta = schema_->operator[](field_id); + + AssertInfo(field_meta.is_vector(), + "The meta type of vector field is not vector type"); + if (get_bit(binlog_index_bitset_, field_id)) { + AssertInfo( + vec_binlog_config_.find(field_id) != vec_binlog_config_.end(), + "The binlog params is not generate."); + auto binlog_search_info = + vec_binlog_config_.at(field_id)->GetSearchConf(search_info); + + AssertInfo(vector_indexings_.is_ready(field_id), + "vector indexes isn't ready for field " + + std::to_string(field_id.get())); + query::SearchOnSealedIndex(*schema_, + vector_indexings_, + binlog_search_info, + query_data, + query_count, + bitset, + output); + milvus::tracer::AddEvent( + "finish_searching_vector_temperate_binlog_index"); + } else if (get_bit(index_ready_bitset_, field_id)) { + AssertInfo(vector_indexings_.is_ready(field_id), + "vector indexes isn't ready for field " + + std::to_string(field_id.get())); + query::SearchOnSealedIndex(*schema_, + vector_indexings_, + search_info, + query_data, + query_count, + bitset, + output); + milvus::tracer::AddEvent("finish_searching_vector_index"); + } else { + AssertInfo( + get_bit(field_data_ready_bitset_, field_id), + "Field Data is not loaded: " + std::to_string(field_id.get())); + AssertInfo(num_rows_.has_value(), "Can't get row count value"); + auto row_count = num_rows_.value(); + auto vec_data = fields_.at(field_id); + query::SearchOnSealed(*schema_, + vec_data, + search_info, + query_data, + query_count, + row_count, + bitset, + output); + milvus::tracer::AddEvent("finish_searching_vector_data"); + } +} + +std::tuple +ChunkedSegmentSealedImpl::GetFieldDataPath(FieldId field_id, + int64_t offset) const { + auto offset_in_binlog = offset; + auto data_path = std::string(); + auto it = field_data_info_.field_infos.find(field_id.get()); + AssertInfo(it != field_data_info_.field_infos.end(), + fmt::format("cannot find binlog file for field: {}, seg: {}", + field_id.get(), + id_)); + auto field_info = it->second; + + for (auto i = 0; i < field_info.insert_files.size(); i++) { + if (offset_in_binlog < field_info.entries_nums[i]) { + data_path = field_info.insert_files[i]; + break; + } else { + offset_in_binlog -= field_info.entries_nums[i]; + } + } + return {data_path, offset_in_binlog}; +} + +std::tuple< + std::string, + std::shared_ptr< + ChunkedColumnBase>> static ReadFromChunkCache(const storage:: + ChunkCachePtr& cc, + const std::string& + data_path, + const storage:: + MmapChunkDescriptorPtr& + descriptor, + const FieldMeta& + field_meta) { + auto column = cc->Read(data_path, descriptor, field_meta); + cc->Prefetch(data_path); + return {data_path, std::dynamic_pointer_cast(column)}; +} + +std::unique_ptr +ChunkedSegmentSealedImpl::get_vector(FieldId field_id, + const int64_t* ids, + int64_t count) const { + auto& field_meta = schema_->operator[](field_id); + AssertInfo(field_meta.is_vector(), "vector field is not vector type"); + + if (!get_bit(index_ready_bitset_, field_id) && + !get_bit(binlog_index_bitset_, field_id)) { + return fill_with_empty(field_id, count); + } + + AssertInfo(vector_indexings_.is_ready(field_id), + "vector index is not ready"); + auto field_indexing = vector_indexings_.get_field_indexing(field_id); + auto vec_index = + dynamic_cast(field_indexing->indexing_.get()); + AssertInfo(vec_index, "invalid vector indexing"); + + auto index_type = vec_index->GetIndexType(); + auto metric_type = vec_index->GetMetricType(); + auto has_raw_data = vec_index->HasRawData(); + + if (has_raw_data && !TEST_skip_index_for_retrieve_) { + // If index has raw data, get vector from memory. + auto ids_ds = GenIdsDataset(count, ids); + if (field_meta.get_data_type() == DataType::VECTOR_SPARSE_FLOAT) { + auto res = vec_index->GetSparseVector(ids_ds); + return segcore::CreateVectorDataArrayFrom( + res.get(), count, field_meta); + } else { + // dense vector: + auto vector = vec_index->GetVector(ids_ds); + return segcore::CreateVectorDataArrayFrom( + vector.data(), count, field_meta); + } + } + + // If index doesn't have raw data, get vector from chunk cache. + auto cc = storage::MmapManager::GetInstance().GetChunkCache(); + + // group by data_path + auto id_to_data_path = + std::unordered_map>{}; + auto path_to_column = + std::unordered_map>{}; + for (auto i = 0; i < count; i++) { + const auto& tuple = GetFieldDataPath(field_id, ids[i]); + id_to_data_path.emplace(ids[i], tuple); + path_to_column.emplace(std::get<0>(tuple), nullptr); + } + + // read and prefetch + auto& pool = ThreadPools::GetThreadPool(milvus::ThreadPoolPriority::HIGH); + std::vector>>> + futures; + futures.reserve(path_to_column.size()); + for (const auto& iter : path_to_column) { + const auto& data_path = iter.first; + futures.emplace_back(pool.Submit( + ReadFromChunkCache, cc, data_path, mmap_descriptor_, field_meta)); + } + + for (int i = 0; i < futures.size(); ++i) { + const auto& [data_path, column] = futures[i].get(); + path_to_column[data_path] = column; + } + + if (field_meta.get_data_type() == DataType::VECTOR_SPARSE_FLOAT) { + auto buf = std::vector>(count); + for (auto i = 0; i < count; ++i) { + const auto& [data_path, offset_in_binlog] = + id_to_data_path.at(ids[i]); + const auto& column = path_to_column.at(data_path); + AssertInfo( + offset_in_binlog < column->NumRows(), + "column idx out of range, idx: {}, size: {}, data_path: {}", + offset_in_binlog, + column->NumRows(), + data_path); + auto sparse_column = + std::dynamic_pointer_cast(column); + AssertInfo(sparse_column, "incorrect column created"); + buf[i] = *static_cast*>( + static_cast( + sparse_column->ValueAt(offset_in_binlog))); + } + return segcore::CreateVectorDataArrayFrom( + buf.data(), count, field_meta); + } else { + // assign to data array + auto row_bytes = field_meta.get_sizeof(); + auto buf = std::vector(count * row_bytes); + for (auto i = 0; i < count; ++i) { + AssertInfo(id_to_data_path.count(ids[i]) != 0, "id not found"); + const auto& [data_path, offset_in_binlog] = + id_to_data_path.at(ids[i]); + AssertInfo(path_to_column.count(data_path) != 0, + "column not found"); + const auto& column = path_to_column.at(data_path); + AssertInfo( + offset_in_binlog * row_bytes < column->DataByteSize(), + "column idx out of range, idx: {}, size: {}, data_path: {}", + offset_in_binlog * row_bytes, + column->DataByteSize(), + data_path); + auto vector = column->ValueAt(offset_in_binlog); + std::memcpy(buf.data() + i * row_bytes, vector, row_bytes); + } + return segcore::CreateVectorDataArrayFrom( + buf.data(), count, field_meta); + } +} + +void +ChunkedSegmentSealedImpl::DropFieldData(const FieldId field_id) { + if (SystemProperty::Instance().IsSystem(field_id)) { + auto system_field_type = + SystemProperty::Instance().GetSystemFieldType(field_id); + + std::unique_lock lck(mutex_); + --system_ready_count_; + if (system_field_type == SystemFieldType::Timestamp) { + insert_record_.timestamps_.clear(); + } + lck.unlock(); + } else { + auto& field_meta = schema_->operator[](field_id); + std::unique_lock lck(mutex_); + if (get_bit(field_data_ready_bitset_, field_id)) { + fields_.erase(field_id); + set_bit(field_data_ready_bitset_, field_id, false); + } + if (get_bit(binlog_index_bitset_, field_id)) { + set_bit(binlog_index_bitset_, field_id, false); + vector_indexings_.drop_field_indexing(field_id); + } + lck.unlock(); + } +} + +void +ChunkedSegmentSealedImpl::DropIndex(const FieldId field_id) { + AssertInfo(!SystemProperty::Instance().IsSystem(field_id), + "Field id:" + std::to_string(field_id.get()) + + " isn't one of system type when drop index"); + auto& field_meta = schema_->operator[](field_id); + AssertInfo(field_meta.is_vector(), + "Field meta of offset:" + std::to_string(field_id.get()) + + " is not vector type"); + + std::unique_lock lck(mutex_); + vector_indexings_.drop_field_indexing(field_id); + set_bit(index_ready_bitset_, field_id, false); +} + +void +ChunkedSegmentSealedImpl::check_search(const query::Plan* plan) const { + AssertInfo(plan, "Search plan is null"); + AssertInfo(plan->extra_info_opt_.has_value(), + "Extra info of search plan doesn't have value"); + + if (!is_system_field_ready()) { + PanicInfo( + FieldNotLoaded, + "failed to load row ID or timestamp, potential missing bin logs or " + "empty segments. Segment ID = " + + std::to_string(this->id_)); + } + + auto& request_fields = plan->extra_info_opt_.value().involved_fields_; + auto field_ready_bitset = + field_data_ready_bitset_ | index_ready_bitset_ | binlog_index_bitset_; + AssertInfo(request_fields.size() == field_ready_bitset.size(), + "Request fields size not equal to field ready bitset size when " + "check search"); + auto absent_fields = request_fields - field_ready_bitset; + + if (absent_fields.any()) { + // absent_fields.find_first() returns std::optional<> + auto field_id = + FieldId(absent_fields.find_first().value() + START_USER_FIELDID); + auto& field_meta = schema_->operator[](field_id); + PanicInfo( + FieldNotLoaded, + "User Field(" + field_meta.get_name().get() + ") is not loaded"); + } +} + +std::vector +ChunkedSegmentSealedImpl::search_pk(const PkType& pk, + Timestamp timestamp) const { + auto pk_field_id = schema_->get_primary_field_id().value_or(FieldId(-1)); + AssertInfo(pk_field_id.get() != -1, "Primary key is -1"); + auto pk_column = fields_.at(pk_field_id); + std::vector pk_offsets; + switch (schema_->get_fields().at(pk_field_id).get_data_type()) { + case DataType::INT64: { + auto target = std::get(pk); + // get int64 pks + auto num_chunk = pk_column->num_chunks(); + for (int i = 0; i < num_chunk; ++i) { + auto src = reinterpret_cast(pk_column->Data(i)); + auto chunk_row_num = pk_column->chunk_row_nums(i); + auto it = std::lower_bound( + src, + src + chunk_row_num, + target, + [](const int64_t& elem, const int64_t& value) { + return elem < value; + }); + for (; it != src + chunk_row_num && *it == target; it++) { + auto offset = it - src; + if (insert_record_.timestamps_[offset] <= timestamp) { + pk_offsets.emplace_back(offset); + } + } + } + break; + } + case DataType::VARCHAR: { + auto target = std::get(pk); + // get varchar pks + auto var_column = + std::dynamic_pointer_cast>( + pk_column); + auto num_chunk = var_column->num_chunks(); + for (int i = 0; i < num_chunk; ++i) { + auto views = var_column->StringViews(i).first; + auto it = std::lower_bound(views.begin(), views.end(), target); + for (; it != views.end() && *it == target; it++) { + auto offset = std::distance(views.begin(), it); + if (insert_record_.timestamps_[offset] <= timestamp) { + pk_offsets.emplace_back(offset); + } + } + } + break; + } + default: { + PanicInfo( + DataTypeInvalid, + fmt::format( + "unsupported type {}", + schema_->get_fields().at(pk_field_id).get_data_type())); + } + } + + return pk_offsets; +} + +std::vector +ChunkedSegmentSealedImpl::search_pk(const PkType& pk, + int64_t insert_barrier) const { + auto pk_field_id = schema_->get_primary_field_id().value_or(FieldId(-1)); + AssertInfo(pk_field_id.get() != -1, "Primary key is -1"); + auto pk_column = fields_.at(pk_field_id); + std::vector pk_offsets; + switch (schema_->get_fields().at(pk_field_id).get_data_type()) { + case DataType::INT64: { + auto target = std::get(pk); + // get int64 pks + + auto num_chunk = pk_column->num_chunks(); + for (int i = 0; i < num_chunk; ++i) { + auto src = reinterpret_cast(pk_column->Data(i)); + auto chunk_row_num = pk_column->chunk_row_nums(i); + auto it = std::lower_bound( + src, + src + chunk_row_num, + target, + [](const int64_t& elem, const int64_t& value) { + return elem < value; + }); + for (; it != src + chunk_row_num && *it == target; it++) { + auto offset = it - src; + if (offset < insert_barrier) { + pk_offsets.emplace_back(offset); + } + } + } + + break; + } + case DataType::VARCHAR: { + auto target = std::get(pk); + // get varchar pks + auto var_column = + std::dynamic_pointer_cast>( + pk_column); + + auto num_chunk = var_column->num_chunks(); + for (int i = 0; i < num_chunk; ++i) { + auto views = var_column->StringViews(i).first; + auto it = std::lower_bound(views.begin(), views.end(), target); + for (; it != views.end() && *it == target; it++) { + auto offset = std::distance(views.begin(), it); + if (offset < insert_barrier) { + pk_offsets.emplace_back(offset); + } + } + } + break; + } + default: { + PanicInfo( + DataTypeInvalid, + fmt::format( + "unsupported type {}", + schema_->get_fields().at(pk_field_id).get_data_type())); + } + } + + return pk_offsets; +} + +std::shared_ptr +ChunkedSegmentSealedImpl::get_deleted_bitmap_s( + int64_t del_barrier, + int64_t insert_barrier, + DeletedRecord& delete_record, + Timestamp query_timestamp) const { + // if insert_barrier and del_barrier have not changed, use cache data directly + bool hit_cache = false; + int64_t old_del_barrier = 0; + auto current = delete_record.clone_lru_entry( + insert_barrier, del_barrier, old_del_barrier, hit_cache); + if (hit_cache) { + return current; + } + + auto bitmap = current->bitmap_ptr; + + int64_t start, end; + if (del_barrier < old_del_barrier) { + // in this case, ts of delete record[current_del_barrier : old_del_barrier] > query_timestamp + // so these deletion records do not take effect in query/search + // so bitmap corresponding to those pks in delete record[current_del_barrier:old_del_barrier] will be reset to 0 + // for example, current_del_barrier = 2, query_time = 120, the bitmap will be reset to [0, 1, 1, 0, 0, 0, 0, 0] + start = del_barrier; + end = old_del_barrier; + } else { + // the cache is not enough, so update bitmap using new pks in delete record[old_del_barrier:current_del_barrier] + // for example, current_del_barrier = 4, query_time = 300, bitmap will be updated to [0, 1, 1, 0, 1, 1, 0, 0] + start = old_del_barrier; + end = del_barrier; + } + + // Avoid invalid calculations when there are a lot of repeated delete pks + std::unordered_map delete_timestamps; + for (auto del_index = start; del_index < end; ++del_index) { + auto pk = delete_record.pks()[del_index]; + auto timestamp = delete_record.timestamps()[del_index]; + + delete_timestamps[pk] = timestamp > delete_timestamps[pk] + ? timestamp + : delete_timestamps[pk]; + } + + for (auto& [pk, timestamp] : delete_timestamps) { + auto segOffsets = search_pk(pk, insert_barrier); + for (auto offset : segOffsets) { + int64_t insert_row_offset = offset.get(); + + // The deletion record do not take effect in search/query, + // and reset bitmap to 0 + if (timestamp > query_timestamp) { + bitmap->reset(insert_row_offset); + continue; + } + // Insert after delete with same pk, delete will not task effect on this insert record, + // and reset bitmap to 0 + if (insert_record_.timestamps_[offset.get()] >= timestamp) { + bitmap->reset(insert_row_offset); + continue; + } + // insert data corresponding to the insert_row_offset will be ignored in search/query + bitmap->set(insert_row_offset); + } + } + + delete_record.insert_lru_entry(current); + return current; +} + +std::pair, bool> +ChunkedSegmentSealedImpl::find_first(int64_t limit, + const BitsetType& bitset) const { + if (!is_sorted_by_pk_) { + return insert_record_.pk2offset_->find_first(limit, bitset); + } + if (limit == Unlimited || limit == NoLimit) { + limit = num_rows_.value(); + } + + int64_t hit_num = 0; // avoid counting the number everytime. + auto size = bitset.size(); + int64_t cnt = size - bitset.count(); + auto more_hit_than_limit = cnt > limit; + limit = std::min(limit, cnt); + std::vector seg_offsets; + seg_offsets.reserve(limit); + + int64_t offset = 0; + for (; hit_num < limit && offset < num_rows_.value(); offset++) { + if (offset >= size) { + // In fact, this case won't happen on sealed segments. + continue; + } + + if (!bitset[offset]) { + seg_offsets.push_back(offset); + hit_num++; + } + } + + return {seg_offsets, more_hit_than_limit && offset != num_rows_.value()}; +} + +ChunkedSegmentSealedImpl::ChunkedSegmentSealedImpl( + SchemaPtr schema, + IndexMetaPtr index_meta, + const SegcoreConfig& segcore_config, + int64_t segment_id, + bool TEST_skip_index_for_retrieve, + bool is_sorted_by_pk) + : segcore_config_(segcore_config), + field_data_ready_bitset_(schema->size()), + index_ready_bitset_(schema->size()), + binlog_index_bitset_(schema->size()), + scalar_indexings_(schema->size()), + insert_record_(*schema, MAX_ROW_COUNT), + schema_(schema), + id_(segment_id), + col_index_meta_(index_meta), + TEST_skip_index_for_retrieve_(TEST_skip_index_for_retrieve), + is_sorted_by_pk_(is_sorted_by_pk) { + mmap_descriptor_ = std::shared_ptr( + new storage::MmapChunkDescriptor({segment_id, SegmentType::Sealed})); + auto mcm = storage::MmapManager::GetInstance().GetMmapChunkManager(); + mcm->Register(mmap_descriptor_); +} + +ChunkedSegmentSealedImpl::~ChunkedSegmentSealedImpl() { + auto cc = storage::MmapManager::GetInstance().GetChunkCache(); + if (cc == nullptr) { + return; + } + // munmap and remove binlog from chunk cache + for (const auto& iter : field_data_info_.field_infos) { + for (const auto& binlog : iter.second.insert_files) { + cc->Remove(binlog); + } + } + if (mmap_descriptor_ != nullptr) { + auto mm = storage::MmapManager::GetInstance().GetMmapChunkManager(); + mm->UnRegister(mmap_descriptor_); + } +} + +void +ChunkedSegmentSealedImpl::bulk_subscript(SystemFieldType system_type, + const int64_t* seg_offsets, + int64_t count, + void* output) const { + AssertInfo(is_system_field_ready(), + "System field isn't ready when do bulk_insert, segID:{}", + id_); + switch (system_type) { + case SystemFieldType::Timestamp: + AssertInfo( + insert_record_.timestamps_.num_chunk() == 1, + "num chunk of timestamp not equal to 1 for sealed segment"); + bulk_subscript_impl( + this->insert_record_.timestamps_.get_chunk_data(0), + seg_offsets, + count, + static_cast(output)); + break; + case SystemFieldType::RowId: + PanicInfo(ErrorCode::Unsupported, "RowId retrieve not supported"); + break; + default: + PanicInfo(DataTypeInvalid, + fmt::format("unknown subscript fields", system_type)); + } +} + +template +void +ChunkedSegmentSealedImpl::bulk_subscript_impl(const void* src_raw, + const int64_t* seg_offsets, + int64_t count, + T* dst) { + static_assert(IsScalar); + auto src = static_cast(src_raw); + for (int64_t i = 0; i < count; ++i) { + auto offset = seg_offsets[i]; + dst[i] = src[offset]; + } +} +template +void +ChunkedSegmentSealedImpl::bulk_subscript_impl(const ChunkedColumnBase* field, + const int64_t* seg_offsets, + int64_t count, + T* dst) { + static_assert(IsScalar); + for (int64_t i = 0; i < count; ++i) { + auto offset = seg_offsets[i]; + dst[i] = *static_cast( + static_cast(field->ValueAt(offset))); + } +} + +template +void +ChunkedSegmentSealedImpl::bulk_subscript_impl(const ChunkedColumnBase* column, + const int64_t* seg_offsets, + int64_t count, + void* dst_raw) { + auto field = reinterpret_cast*>(column); + auto dst = reinterpret_cast(dst_raw); + for (int64_t i = 0; i < count; ++i) { + auto offset = seg_offsets[i]; + dst[i] = std::move(T(field->RawAt(offset))); + } +} + +template +void +ChunkedSegmentSealedImpl::bulk_subscript_ptr_impl( + const ChunkedColumnBase* column, + const int64_t* seg_offsets, + int64_t count, + google::protobuf::RepeatedPtrField* dst) { + auto field = reinterpret_cast*>(column); + for (int64_t i = 0; i < count; ++i) { + auto offset = seg_offsets[i]; + dst->at(i) = std::move(T(field->RawAt(offset))); + } +} + +template +void +ChunkedSegmentSealedImpl::bulk_subscript_array_impl( + const ChunkedColumnBase* column, + const int64_t* seg_offsets, + int64_t count, + google::protobuf::RepeatedPtrField* dst) { + auto field = reinterpret_cast(column); + for (int64_t i = 0; i < count; ++i) { + auto offset = seg_offsets[i]; + dst->at(i) = std::move(field->RawAt(offset)); + } +} + +// for dense vector +void +ChunkedSegmentSealedImpl::bulk_subscript_impl(int64_t element_sizeof, + const ChunkedColumnBase* field, + const int64_t* seg_offsets, + int64_t count, + void* dst_raw) { + auto dst_vec = reinterpret_cast(dst_raw); + for (int64_t i = 0; i < count; ++i) { + auto offset = seg_offsets[i]; + auto src = field->ValueAt(offset); + auto dst = dst_vec + i * element_sizeof; + memcpy(dst, src, element_sizeof); + } +} + +void +ChunkedSegmentSealedImpl::ClearData() { + { + std::unique_lock lck(mutex_); + field_data_ready_bitset_.reset(); + index_ready_bitset_.reset(); + binlog_index_bitset_.reset(); + system_ready_count_ = 0; + num_rows_ = std::nullopt; + scalar_indexings_.clear(); + vector_indexings_.clear(); + insert_record_.clear(); + fields_.clear(); + variable_fields_avg_size_.clear(); + stats_.mem_size = 0; + } + auto cc = storage::MmapManager::GetInstance().GetChunkCache(); + if (cc == nullptr) { + return; + } + // munmap and remove binlog from chunk cache + for (const auto& iter : field_data_info_.field_infos) { + for (const auto& binlog : iter.second.insert_files) { + cc->Remove(binlog); + } + } +} + +std::unique_ptr +ChunkedSegmentSealedImpl::fill_with_empty(FieldId field_id, + int64_t count) const { + auto& field_meta = schema_->operator[](field_id); + if (IsVectorDataType(field_meta.get_data_type())) { + return CreateVectorDataArray(count, field_meta); + } + return CreateScalarDataArray(count, field_meta); +} + +void +ChunkedSegmentSealedImpl::CreateTextIndex(FieldId field_id) { + std::unique_lock lck(mutex_); + + const auto& field_meta = schema_->operator[](field_id); + auto& cfg = storage::MmapManager::GetInstance().GetMmapConfig(); + std::unique_ptr index; + if (!cfg.GetScalarIndexEnableMmap()) { + // build text index in ram. + index = std::make_unique( + std::numeric_limits::max(), + "milvus_tokenizer", + field_meta.get_tokenizer_params()); + } else { + // build text index using mmap. + index = std::make_unique( + cfg.GetMmapPath(), + "milvus_tokenizer", + field_meta.get_tokenizer_params()); + } + + { + // build + auto iter = fields_.find(field_id); + if (iter != fields_.end()) { + auto column = + std::dynamic_pointer_cast>( + iter->second); + AssertInfo( + column != nullptr, + "failed to create text index, field is not of text type: {}", + field_id.get()); + auto n = column->NumRows(); + for (size_t i = 0; i < n; i++) { + index->AddText(std::string(column->RawAt(i)), i); + } + } else { // fetch raw data from index. + auto field_index_iter = scalar_indexings_.find(field_id); + AssertInfo(field_index_iter != scalar_indexings_.end(), + "failed to create text index, neither raw data nor " + "index are found"); + auto ptr = field_index_iter->second.get(); + AssertInfo(ptr->HasRawData(), + "text raw data not found, trying to create text index " + "from index, but this index don't contain raw data"); + auto impl = dynamic_cast*>(ptr); + AssertInfo(impl != nullptr, + "failed to create text index, field index cannot be " + "converted to string index"); + auto n = impl->Size(); + for (size_t i = 0; i < n; i++) { + index->AddText(impl->Reverse_Lookup(i), i); + } + } + } + + // create index reader. + index->CreateReader(); + // release index writer. + index->Finish(); + + index->Reload(); + + index->RegisterTokenizer("milvus_tokenizer", + field_meta.get_tokenizer_params()); + + text_indexes_[field_id] = std::move(index); +} + +void +ChunkedSegmentSealedImpl::LoadTextIndex( + FieldId field_id, std::unique_ptr index) { + std::unique_lock lck(mutex_); + const auto& field_meta = schema_->operator[](field_id); + index->RegisterTokenizer("milvus_tokenizer", + field_meta.get_tokenizer_params()); + text_indexes_[field_id] = std::move(index); +} + +std::unique_ptr +ChunkedSegmentSealedImpl::get_raw_data(FieldId field_id, + const FieldMeta& field_meta, + const int64_t* seg_offsets, + int64_t count) const { + // DO NOT directly access the column by map like: `fields_.at(field_id)->Data()`, + // we have to clone the shared pointer, + // to make sure it won't get released if segment released + auto column = fields_.at(field_id); + auto ret = fill_with_empty(field_id, count); + if (column->IsNullable()) { + auto dst = ret->mutable_valid_data()->mutable_data(); + for (int64_t i = 0; i < count; ++i) { + auto offset = seg_offsets[i]; + dst[i] = column->IsValid(offset); + } + } + switch (field_meta.get_data_type()) { + case DataType::VARCHAR: + case DataType::STRING: { + bulk_subscript_ptr_impl( + column.get(), + seg_offsets, + count, + ret->mutable_scalars()->mutable_string_data()->mutable_data()); + break; + } + + case DataType::JSON: { + bulk_subscript_ptr_impl( + column.get(), + seg_offsets, + count, + ret->mutable_scalars()->mutable_json_data()->mutable_data()); + break; + } + + case DataType::ARRAY: { + bulk_subscript_array_impl( + column.get(), + seg_offsets, + count, + ret->mutable_scalars()->mutable_array_data()->mutable_data()); + break; + } + + case DataType::BOOL: { + bulk_subscript_impl(column.get(), + seg_offsets, + count, + ret->mutable_scalars() + ->mutable_bool_data() + ->mutable_data() + ->mutable_data()); + break; + } + case DataType::INT8: { + bulk_subscript_impl(column.get(), + seg_offsets, + count, + ret->mutable_scalars() + ->mutable_int_data() + ->mutable_data() + ->mutable_data()); + break; + } + case DataType::INT16: { + bulk_subscript_impl(column.get(), + seg_offsets, + count, + ret->mutable_scalars() + ->mutable_int_data() + ->mutable_data() + ->mutable_data()); + break; + } + case DataType::INT32: { + bulk_subscript_impl(column.get(), + seg_offsets, + count, + ret->mutable_scalars() + ->mutable_int_data() + ->mutable_data() + ->mutable_data()); + break; + } + case DataType::INT64: { + bulk_subscript_impl(column.get(), + seg_offsets, + count, + ret->mutable_scalars() + ->mutable_long_data() + ->mutable_data() + ->mutable_data()); + break; + } + case DataType::FLOAT: { + bulk_subscript_impl(column.get(), + seg_offsets, + count, + ret->mutable_scalars() + ->mutable_float_data() + ->mutable_data() + ->mutable_data()); + break; + } + case DataType::DOUBLE: { + bulk_subscript_impl(column.get(), + seg_offsets, + count, + ret->mutable_scalars() + ->mutable_double_data() + ->mutable_data() + ->mutable_data()); + break; + } + case DataType::VECTOR_FLOAT: { + bulk_subscript_impl(field_meta.get_sizeof(), + column.get(), + seg_offsets, + count, + ret->mutable_vectors() + ->mutable_float_vector() + ->mutable_data() + ->mutable_data()); + break; + } + case DataType::VECTOR_FLOAT16: { + bulk_subscript_impl( + field_meta.get_sizeof(), + column.get(), + seg_offsets, + count, + ret->mutable_vectors()->mutable_float16_vector()->data()); + break; + } + case DataType::VECTOR_BFLOAT16: { + bulk_subscript_impl( + field_meta.get_sizeof(), + column.get(), + seg_offsets, + count, + ret->mutable_vectors()->mutable_bfloat16_vector()->data()); + break; + } + case DataType::VECTOR_BINARY: { + bulk_subscript_impl( + field_meta.get_sizeof(), + column.get(), + seg_offsets, + count, + ret->mutable_vectors()->mutable_binary_vector()->data()); + break; + } + case DataType::VECTOR_SPARSE_FLOAT: { + auto dst = ret->mutable_vectors()->mutable_sparse_float_vector(); + SparseRowsToProto( + [&](size_t i) { + auto offset = seg_offsets[i]; + auto row = + static_cast*>( + static_cast(column->ValueAt(offset))); + return offset != INVALID_SEG_OFFSET ? row : nullptr; + }, + count, + dst); + ret->mutable_vectors()->set_dim(dst->dim()); + break; + } + + default: { + PanicInfo(DataTypeInvalid, + fmt::format("unsupported data type {}", + field_meta.get_data_type())); + } + } + return ret; +} + +std::unique_ptr +ChunkedSegmentSealedImpl::bulk_subscript(FieldId field_id, + const int64_t* seg_offsets, + int64_t count) const { + auto& field_meta = schema_->operator[](field_id); + // if count == 0, return empty data array + if (count == 0) { + return fill_with_empty(field_id, count); + } + + if (HasIndex(field_id)) { + // if field has load scalar index, reverse raw data from index + if (!IsVectorDataType(field_meta.get_data_type())) { + // AssertInfo(num_chunk() == 1, + // "num chunk not equal to 1 for sealed segment"); + auto index = chunk_index_impl(field_id, 0); + if (index->HasRawData()) { + return ReverseDataFromIndex( + index, seg_offsets, count, field_meta); + } + return get_raw_data(field_id, field_meta, seg_offsets, count); + } + return get_vector(field_id, seg_offsets, count); + } + + Assert(get_bit(field_data_ready_bitset_, field_id)); + + return get_raw_data(field_id, field_meta, seg_offsets, count); +} + +std::unique_ptr +ChunkedSegmentSealedImpl::bulk_subscript( + FieldId field_id, + const int64_t* seg_offsets, + int64_t count, + const std::vector& dynamic_field_names) const { + Assert(!dynamic_field_names.empty()); + auto& field_meta = schema_->operator[](field_id); + if (count == 0) { + return fill_with_empty(field_id, 0); + } + + auto column = fields_.at(field_id); + auto ret = fill_with_empty(field_id, count); + if (column->IsNullable()) { + auto dst = ret->mutable_valid_data()->mutable_data(); + for (int64_t i = 0; i < count; ++i) { + auto offset = seg_offsets[i]; + dst[i] = column->IsValid(offset); + } + } + auto dst = ret->mutable_scalars()->mutable_json_data()->mutable_data(); + auto field = + reinterpret_cast*>(column.get()); + for (int64_t i = 0; i < count; ++i) { + auto offset = seg_offsets[i]; + dst->at(i) = ExtractSubJson(std::string(field->RawAt(offset)), + dynamic_field_names); + } + return ret; +} + +bool +ChunkedSegmentSealedImpl::HasIndex(FieldId field_id) const { + std::shared_lock lck(mutex_); + return get_bit(index_ready_bitset_, field_id) | + get_bit(binlog_index_bitset_, field_id); +} + +bool +ChunkedSegmentSealedImpl::HasFieldData(FieldId field_id) const { + std::shared_lock lck(mutex_); + if (SystemProperty::Instance().IsSystem(field_id)) { + return is_system_field_ready(); + } else { + return get_bit(field_data_ready_bitset_, field_id); + } +} + +bool +ChunkedSegmentSealedImpl::HasRawData(int64_t field_id) const { + std::shared_lock lck(mutex_); + auto fieldID = FieldId(field_id); + const auto& field_meta = schema_->operator[](fieldID); + if (IsVectorDataType(field_meta.get_data_type())) { + if (get_bit(index_ready_bitset_, fieldID) | + get_bit(binlog_index_bitset_, fieldID)) { + AssertInfo(vector_indexings_.is_ready(fieldID), + "vector index is not ready"); + auto field_indexing = vector_indexings_.get_field_indexing(fieldID); + auto vec_index = dynamic_cast( + field_indexing->indexing_.get()); + return vec_index->HasRawData(); + } + } else { + auto scalar_index = scalar_indexings_.find(fieldID); + if (scalar_index != scalar_indexings_.end()) { + return scalar_index->second->HasRawData(); + } + } + return true; +} + +DataType +ChunkedSegmentSealedImpl::GetFieldDataType(milvus::FieldId field_id) const { + auto& field_meta = schema_->operator[](field_id); + return field_meta.get_data_type(); +} + +std::pair, std::vector> +ChunkedSegmentSealedImpl::search_ids(const IdArray& id_array, + Timestamp timestamp) const { + auto field_id = schema_->get_primary_field_id().value_or(FieldId(-1)); + AssertInfo(field_id.get() != -1, "Primary key is -1"); + auto& field_meta = schema_->operator[](field_id); + auto data_type = field_meta.get_data_type(); + auto ids_size = GetSizeOfIdArray(id_array); + std::vector pks(ids_size); + ParsePksFromIDs(pks, data_type, id_array); + + auto res_id_arr = std::make_unique(); + std::vector res_offsets; + res_offsets.reserve(pks.size()); + for (auto& pk : pks) { + std::vector pk_offsets; + if (!is_sorted_by_pk_) { + pk_offsets = insert_record_.search_pk(pk, timestamp); + } else { + pk_offsets = search_pk(pk, timestamp); + } + for (auto offset : pk_offsets) { + switch (data_type) { + case DataType::INT64: { + res_id_arr->mutable_int_id()->add_data( + std::get(pk)); + break; + } + case DataType::VARCHAR: { + res_id_arr->mutable_str_id()->add_data( + std::get(std::move(pk))); + break; + } + default: { + PanicInfo(DataTypeInvalid, + fmt::format("unsupported type {}", data_type)); + } + } + res_offsets.push_back(offset); + } + } + return {std::move(res_id_arr), std::move(res_offsets)}; +} + +SegcoreError +ChunkedSegmentSealedImpl::Delete(int64_t reserved_offset, // deprecated + int64_t size, + const IdArray* ids, + const Timestamp* timestamps_raw) { + auto field_id = schema_->get_primary_field_id().value_or(FieldId(-1)); + AssertInfo(field_id.get() != -1, "Primary key is -1"); + auto& field_meta = schema_->operator[](field_id); + std::vector pks(size); + ParsePksFromIDs(pks, field_meta.get_data_type(), *ids); + + // filter out the deletions that the primary key not exists + std::vector> ordering(size); + for (int i = 0; i < size; i++) { + ordering[i] = std::make_tuple(timestamps_raw[i], pks[i]); + } + // if insert_record_ is empty (may be only-load meta but not data for lru-cache at go side), + // filtering may cause the deletion lost, skip the filtering to avoid it. + if (!insert_record_.empty_pks()) { + auto end = std::remove_if( + ordering.begin(), + ordering.end(), + [&](const std::tuple& record) { + return !insert_record_.contain(std::get<1>(record)); + }); + size = end - ordering.begin(); + ordering.resize(size); + } + if (size == 0) { + return SegcoreError::success(); + } + + // step 1: sort timestamp + std::sort(ordering.begin(), ordering.end()); + std::vector sort_pks(size); + std::vector sort_timestamps(size); + + for (int i = 0; i < size; i++) { + auto [t, pk] = ordering[i]; + sort_timestamps[i] = t; + sort_pks[i] = pk; + } + + deleted_record_.push(sort_pks, sort_timestamps.data()); + return SegcoreError::success(); +} + +std::string +ChunkedSegmentSealedImpl::debug() const { + std::string log_str; + log_str += "Sealed\n"; + log_str += "\n"; + return log_str; +} + +void +ChunkedSegmentSealedImpl::LoadSegmentMeta( + const proto::segcore::LoadSegmentMeta& segment_meta) { + std::unique_lock lck(mutex_); + std::vector slice_lengths; + for (auto& info : segment_meta.metas()) { + slice_lengths.push_back(info.row_count()); + } + insert_record_.timestamp_index_.set_length_meta(std::move(slice_lengths)); + PanicInfo(NotImplemented, "unimplemented"); +} + +int64_t +ChunkedSegmentSealedImpl::get_active_count(Timestamp ts) const { + // TODO optimize here to reduce expr search range + return this->get_row_count(); +} + +void +ChunkedSegmentSealedImpl::mask_with_timestamps(BitsetTypeView& bitset_chunk, + Timestamp timestamp) const { + // TODO change the + AssertInfo(insert_record_.timestamps_.num_chunk() == 1, + "num chunk not equal to 1 for sealed segment"); + auto timestamps_data = + (const milvus::Timestamp*)insert_record_.timestamps_.get_chunk_data(0); + auto timestamps_data_size = insert_record_.timestamps_.get_chunk_size(0); + + AssertInfo(timestamps_data_size == get_row_count(), + fmt::format("Timestamp size not equal to row count: {}, {}", + timestamps_data_size, + get_row_count())); + auto range = insert_record_.timestamp_index_.get_active_range(timestamp); + + // range == (size_, size_) and size_ is this->timestamps_.size(). + // it means these data are all useful, we don't need to update bitset_chunk. + // It can be thought of as an OR operation with another bitmask that is all 0s, but it is not necessary to do so. + if (range.first == range.second && range.first == timestamps_data_size) { + // just skip + return; + } + // range == (0, 0). it means these data can not be used, directly set bitset_chunk to all 1s. + // It can be thought of as an OR operation with another bitmask that is all 1s. + if (range.first == range.second && range.first == 0) { + bitset_chunk.set(); + return; + } + auto mask = TimestampIndex::GenerateBitset( + timestamp, range, timestamps_data, timestamps_data_size); + bitset_chunk |= mask; +} + +bool +ChunkedSegmentSealedImpl::generate_interim_index(const FieldId field_id) { + if (col_index_meta_ == nullptr || !col_index_meta_->HasFiled(field_id)) { + return false; + } + auto& field_meta = schema_->operator[](field_id); + auto& field_index_meta = col_index_meta_->GetFieldIndexMeta(field_id); + auto& index_params = field_index_meta.GetIndexParams(); + + bool is_sparse = + field_meta.get_data_type() == DataType::VECTOR_SPARSE_FLOAT; + + auto enable_binlog_index = [&]() { + // checkout config + if (!segcore_config_.get_enable_interim_segment_index()) { + return false; + } + // check data type + if (field_meta.get_data_type() != DataType::VECTOR_FLOAT && + !is_sparse) { + return false; + } + // check index type + if (index_params.find(knowhere::meta::INDEX_TYPE) == + index_params.end() || + field_index_meta.IsFlatIndex()) { + return false; + } + // check index exist + if (vector_indexings_.is_ready(field_id)) { + return false; + } + return true; + }; + if (!enable_binlog_index()) { + return false; + } + try { + // get binlog data and meta + int64_t row_count; + { + std::shared_lock lck(mutex_); + row_count = num_rows_.value(); + } + + // generate index params + auto field_binlog_config = std::unique_ptr( + new VecIndexConfig(row_count, + field_index_meta, + segcore_config_, + SegmentType::Sealed, + is_sparse)); + if (row_count < field_binlog_config->GetBuildThreshold()) { + return false; + } + std::shared_ptr vec_data{}; + { + std::shared_lock lck(mutex_); + vec_data = fields_.at(field_id); + } + auto dim = + is_sparse + ? dynamic_cast(vec_data.get())->Dim() + : field_meta.get_dim(); + + auto build_config = field_binlog_config->GetBuildBaseParams(); + build_config[knowhere::meta::DIM] = std::to_string(dim); + build_config[knowhere::meta::NUM_BUILD_THREAD] = std::to_string(1); + auto index_metric = field_binlog_config->GetMetricType(); + + auto vec_index = std::make_unique>( + field_binlog_config->GetIndexType(), + index_metric, + knowhere::Version::GetCurrentVersion().VersionNumber()); + auto num_chunk = fields_.at(field_id)->num_chunks(); + for (int i = 0; i < num_chunk; ++i) { + auto dataset = knowhere::GenDataSet( + vec_data->chunk_row_nums(i), dim, vec_data->Data(i)); + dataset->SetIsOwner(false); + dataset->SetIsSparse(is_sparse); + + if (i == 0) { + vec_index->BuildWithDataset(dataset, build_config); + } else { + vec_index->AddWithDataset(dataset, build_config); + } + } + + if (enable_binlog_index()) { + std::unique_lock lck(mutex_); + vector_indexings_.append_field_indexing( + field_id, index_metric, std::move(vec_index)); + + vec_binlog_config_[field_id] = std::move(field_binlog_config); + set_bit(binlog_index_bitset_, field_id, true); + LOG_INFO( + "replace binlog with binlog index in segment {}, field {}.", + this->get_segment_id(), + field_id.get()); + } + return true; + } catch (std::exception& e) { + LOG_WARN("fail to generate binlog index, because {}", e.what()); + return false; + } +} +void +ChunkedSegmentSealedImpl::RemoveFieldFile(const FieldId field_id) { + auto cc = storage::MmapManager::GetInstance().GetChunkCache(); + if (cc == nullptr) { + return; + } + for (const auto& iter : field_data_info_.field_infos) { + if (iter.second.field_id == field_id.get()) { + for (const auto& binlog : iter.second.insert_files) { + cc->Remove(binlog); + } + return; + } + } +} + +} // namespace milvus::segcore diff --git a/internal/core/src/segcore/ChunkedSegmentSealedImpl.h b/internal/core/src/segcore/ChunkedSegmentSealedImpl.h new file mode 100644 index 0000000000000..fb07c1594b553 --- /dev/null +++ b/internal/core/src/segcore/ChunkedSegmentSealedImpl.h @@ -0,0 +1,392 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "ConcurrentVector.h" +#include "DeletedRecord.h" +#include "SealedIndexingRecord.h" +#include "SegmentSealed.h" +#include "TimestampIndex.h" +#include "common/EasyAssert.h" +#include "google/protobuf/message_lite.h" +#include "mmap/ChunkedColumn.h" +#include "index/ScalarIndex.h" +#include "sys/mman.h" +#include "common/Types.h" +#include "common/IndexMeta.h" + +namespace milvus::segcore { + +class ChunkedSegmentSealedImpl : public SegmentSealed { + public: + explicit ChunkedSegmentSealedImpl(SchemaPtr schema, + IndexMetaPtr index_meta, + const SegcoreConfig& segcore_config, + int64_t segment_id, + bool TEST_skip_index_for_retrieve = false, + bool is_sorted_by_pk = false); + ~ChunkedSegmentSealedImpl() override; + void + LoadIndex(const LoadIndexInfo& info) override; + void + LoadFieldData(const LoadFieldDataInfo& info) override; + void + LoadDeletedRecord(const LoadDeletedRecordInfo& info) override; + void + LoadSegmentMeta( + const milvus::proto::segcore::LoadSegmentMeta& segment_meta) override; + void + DropIndex(const FieldId field_id) override; + void + DropFieldData(const FieldId field_id) override; + bool + HasIndex(FieldId field_id) const override; + bool + HasFieldData(FieldId field_id) const override; + + bool + Contain(const PkType& pk) const override { + return insert_record_.contain(pk); + } + + void + LoadFieldData(FieldId field_id, FieldDataInfo& data) override; + void + MapFieldData(const FieldId field_id, FieldDataInfo& data) override; + void + AddFieldDataInfoForSealed( + const LoadFieldDataInfo& field_data_info) override; + + int64_t + get_segment_id() const override { + return id_; + } + + bool + HasRawData(int64_t field_id) const override; + + DataType + GetFieldDataType(FieldId fieldId) const override; + + void + RemoveFieldFile(const FieldId field_id) override; + + void + CreateTextIndex(FieldId field_id) override; + + void + LoadTextIndex(FieldId field_id, + std::unique_ptr index) override; + + public: + size_t + GetMemoryUsageInBytes() const override { + return stats_.mem_size.load() + deleted_record_.mem_size(); + } + + int64_t + get_row_count() const override; + + int64_t + get_deleted_count() const override; + + const Schema& + get_schema() const override; + + std::vector + search_pk(const PkType& pk, Timestamp timestamp) const; + + std::vector + search_pk(const PkType& pk, int64_t insert_barrier) const; + + std::shared_ptr + get_deleted_bitmap_s(int64_t del_barrier, + int64_t insert_barrier, + DeletedRecord& delete_record, + Timestamp query_timestamp) const; + + std::unique_ptr + get_vector(FieldId field_id, + const int64_t* ids, + int64_t count) const override; + + bool + is_nullable(FieldId field_id) const override { + auto it = fields_.find(field_id); + AssertInfo(it != fields_.end(), + "Cannot find field with field_id: " + + std::to_string(field_id.get())); + return it->second->IsNullable(); + }; + + bool + is_chunked() const override { + return true; + } + + public: + int64_t + num_chunk_index(FieldId field_id) const override; + + // count of chunk that has raw data + int64_t + num_chunk_data(FieldId field_id) const override; + + int64_t + num_chunk(FieldId field_id) const override; + + // return size_per_chunk for each chunk, renaming against confusion + int64_t + size_per_chunk() const override; + + int64_t + chunk_size(FieldId field_id, int64_t chunk_id) const override; + + std::pair + get_chunk_by_offset(FieldId field_id, int64_t offset) const override; + + int64_t + num_rows_until_chunk(FieldId field_id, int64_t chunk_id) const override; + + std::string + debug() const override; + + SegcoreError + Delete(int64_t reserved_offset, + int64_t size, + const IdArray* pks, + const Timestamp* timestamps) override; + + std::pair, bool> + find_first(int64_t limit, const BitsetType& bitset) const override; + + // Calculate: output[i] = Vec[seg_offset[i]] + // where Vec is determined from field_offset + std::unique_ptr + bulk_subscript(FieldId field_id, + const int64_t* seg_offsets, + int64_t count) const override; + + std::unique_ptr + bulk_subscript( + FieldId field_id, + const int64_t* seg_offsets, + int64_t count, + const std::vector& dynamic_field_names) const override; + + bool + is_mmap_field(FieldId id) const override; + + void + ClearData(); + + protected: + // blob and row_count + SpanBase + chunk_data_impl(FieldId field_id, int64_t chunk_id) const override; + + std::pair, FixedVector> + chunk_view_impl(FieldId field_id, int64_t chunk_id) const override; + + std::pair> + get_chunk_buffer(FieldId field_id, + int64_t chunk_id, + int64_t start_offset, + int64_t length) const override; + + const index::IndexBase* + chunk_index_impl(FieldId field_id, int64_t chunk_id) const override; + + // Calculate: output[i] = Vec[seg_offset[i]], + // where Vec is determined from field_offset + void + bulk_subscript(SystemFieldType system_type, + const int64_t* seg_offsets, + int64_t count, + void* output) const override; + + void + check_search(const query::Plan* plan) const override; + + int64_t + get_active_count(Timestamp ts) const override; + + const ConcurrentVector& + get_timestamps() const override { + return insert_record_.timestamps_; + } + + private: + template + static void + bulk_subscript_impl(const void* src_raw, + const int64_t* seg_offsets, + int64_t count, + T* dst_raw); + + template + static void + bulk_subscript_impl(const ChunkedColumnBase* field, + const int64_t* seg_offsets, + int64_t count, + T* dst_raw); + + template + static void + bulk_subscript_impl(const ChunkedColumnBase* field, + const int64_t* seg_offsets, + int64_t count, + void* dst_raw); + + template + static void + bulk_subscript_ptr_impl(const ChunkedColumnBase* field, + const int64_t* seg_offsets, + int64_t count, + google::protobuf::RepeatedPtrField* dst_raw); + + template + static void + bulk_subscript_array_impl(const ChunkedColumnBase* column, + const int64_t* seg_offsets, + int64_t count, + google::protobuf::RepeatedPtrField* dst); + + static void + bulk_subscript_impl(int64_t element_sizeof, + const ChunkedColumnBase* field, + const int64_t* seg_offsets, + int64_t count, + void* dst_raw); + + std::unique_ptr + fill_with_empty(FieldId field_id, int64_t count) const; + + std::unique_ptr + get_raw_data(FieldId field_id, + const FieldMeta& field_meta, + const int64_t* seg_offsets, + int64_t count) const; + + void + update_row_count(int64_t row_count) { + // if (row_count_opt_.has_value()) { + // AssertInfo(row_count_opt_.value() == row_count, "load data has different row count from other columns"); + // } else { + num_rows_ = row_count; + // } + } + + void + mask_with_timestamps(BitsetTypeView& bitset_chunk, + Timestamp timestamp) const override; + + void + vector_search(SearchInfo& search_info, + const void* query_data, + int64_t query_count, + Timestamp timestamp, + const BitsetView& bitset, + SearchResult& output) const override; + + void + mask_with_delete(BitsetTypeView& bitset, + int64_t ins_barrier, + Timestamp timestamp) const override; + + bool + is_system_field_ready() const { + return system_ready_count_ == 2; + } + + const DeletedRecord& + get_deleted_record() const { + return deleted_record_; + } + + std::pair, std::vector> + search_ids(const IdArray& id_array, Timestamp timestamp) const override; + + std::tuple + GetFieldDataPath(FieldId field_id, int64_t offset) const; + + void + LoadVecIndex(const LoadIndexInfo& info); + + void + LoadScalarIndex(const LoadIndexInfo& info); + + virtual void + WarmupChunkCache(const FieldId field_id, bool mmap_enabled) override; + + bool + generate_interim_index(const FieldId field_id); + + private: + // mmap descriptor, used in chunk cache + storage::MmapChunkDescriptorPtr mmap_descriptor_ = nullptr; + // segment loading state + BitsetType field_data_ready_bitset_; + BitsetType index_ready_bitset_; + BitsetType binlog_index_bitset_; + std::atomic system_ready_count_ = 0; + // segment data + + // TODO: generate index for scalar + std::optional num_rows_; + + // scalar field index + std::unordered_map scalar_indexings_; + // vector field index + SealedIndexingRecord vector_indexings_; + + // inserted fields data and row_ids, timestamps + InsertRecord insert_record_; + + // deleted pks + mutable DeletedRecord deleted_record_; + + LoadFieldDataInfo field_data_info_; + + SchemaPtr schema_; + int64_t id_; + std::unordered_map> fields_; + std::unordered_set mmap_fields_; + + // only useful in binlog + IndexMetaPtr col_index_meta_; + SegcoreConfig segcore_config_; + std::unordered_map> + vec_binlog_config_; + + SegmentStats stats_{}; + + // for sparse vector unit test only! Once a type of sparse index that + // doesn't has raw data is added, this should be removed. + bool TEST_skip_index_for_retrieve_ = false; + + // whether the segment is sorted by the pk + bool is_sorted_by_pk_ = false; +}; + +} // namespace milvus::segcore diff --git a/internal/core/src/segcore/ConcurrentVector.h b/internal/core/src/segcore/ConcurrentVector.h index 52971063ad02e..484ff7d293c04 100644 --- a/internal/core/src/segcore/ConcurrentVector.h +++ b/internal/core/src/segcore/ConcurrentVector.h @@ -234,9 +234,11 @@ class ConcurrentVectorImpl : public VectorBase { if (element_count == 0) { return; } + auto size = + size_per_chunk_ == MAX_ROW_COUNT ? element_count : size_per_chunk_; chunks_ptr_->emplace_to_at_least( - upper_div(element_offset + element_count, size_per_chunk_), - elements_per_row_ * size_per_chunk_); + upper_div(element_offset + element_count, size), + elements_per_row_ * size); set_data( element_offset, static_cast(source), element_count); } diff --git a/internal/core/src/segcore/InsertRecord.h b/internal/core/src/segcore/InsertRecord.h index a731e84bab1f6..76a1dcf2cd33d 100644 --- a/internal/core/src/segcore/InsertRecord.h +++ b/internal/core/src/segcore/InsertRecord.h @@ -27,6 +27,7 @@ #include "common/Schema.h" #include "common/Types.h" #include "fmt/format.h" +#include "mmap/ChunkedColumn.h" #include "mmap/Column.h" #include "segcore/AckResponder.h" #include "segcore/ConcurrentVector.h" @@ -487,12 +488,52 @@ struct InsertRecord { void insert_pks(milvus::DataType data_type, - const std::shared_ptr& data) { + const std::shared_ptr& data) { std::lock_guard lck(shared_mutex_); int64_t offset = 0; switch (data_type) { case DataType::INT64: { - auto column = std::dynamic_pointer_cast(data); + auto column = std::dynamic_pointer_cast(data); + auto num_chunk = column->num_chunks(); + for (int i = 0; i < num_chunk; ++i) { + auto pks = + reinterpret_cast(column->Data(i)); + for (int i = 0; i < column->NumRows(); ++i) { + pk2offset_->insert(pks[i], offset++); + } + } + break; + } + case DataType::VARCHAR: { + auto column = std::dynamic_pointer_cast< + ChunkedVariableColumn>(data); + + auto num_chunk = column->num_chunks(); + for (int i = 0; i < num_chunk; ++i) { + auto pks = column->StringViews(i).first; + for (auto& pk : pks) { + pk2offset_->insert(std::string(pk), offset++); + } + } + break; + } + default: { + PanicInfo(DataTypeInvalid, + fmt::format("unsupported primary key data type", + data_type)); + } + } + } + + void + insert_pks(milvus::DataType data_type, + const std::shared_ptr& data) { + std::lock_guard lck(shared_mutex_); + int64_t offset = 0; + switch (data_type) { + case DataType::INT64: { + auto column = + std::dynamic_pointer_cast(data); auto pks = reinterpret_cast(column->Data()); for (int i = 0; i < column->NumRows(); ++i) { pk2offset_->insert(pks[i], offset++); @@ -500,9 +541,8 @@ struct InsertRecord { break; } case DataType::VARCHAR: { - auto column = - std::dynamic_pointer_cast>( - data); + auto column = std::dynamic_pointer_cast< + SingleChunkVariableColumn>(data); auto pks = column->Views(); for (int i = 0; i < column->NumRows(); ++i) { diff --git a/internal/core/src/segcore/SegmentGrowingImpl.cpp b/internal/core/src/segcore/SegmentGrowingImpl.cpp index 8dc1304e6143e..b90953c858066 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.cpp +++ b/internal/core/src/segcore/SegmentGrowingImpl.cpp @@ -399,7 +399,7 @@ SegmentGrowingImpl::chunk_view_impl(FieldId field_id, int64_t chunk_id) const { } int64_t -SegmentGrowingImpl::num_chunk() const { +SegmentGrowingImpl::num_chunk(FieldId field_id) const { auto size = get_insert_record().ack_responder_.GetAck(); return upper_div(size, segcore_config_.get_chunk_rows()); } diff --git a/internal/core/src/segcore/SegmentGrowingImpl.h b/internal/core/src/segcore/SegmentGrowingImpl.h index 163b64da21097..f90bba0f5df1d 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.h +++ b/internal/core/src/segcore/SegmentGrowingImpl.h @@ -135,6 +135,22 @@ class SegmentGrowingImpl : public SegmentGrowing { return segcore_config_.get_chunk_rows(); } + virtual int64_t + chunk_size(FieldId field_id, int64_t chunk_id) const final { + return segcore_config_.get_chunk_rows(); + } + + std::pair + get_chunk_by_offset(FieldId field_id, int64_t offset) const override { + auto size_per_chunk = segcore_config_.get_chunk_rows(); + return {offset / size_per_chunk, offset % size_per_chunk}; + } + + int64_t + num_rows_until_chunk(FieldId field_id, int64_t chunk_id) const override { + return chunk_id * segcore_config_.get_chunk_rows(); + } + void try_remove_chunks(FieldId fieldId); @@ -320,7 +336,7 @@ class SegmentGrowingImpl : public SegmentGrowing { protected: int64_t - num_chunk() const override; + num_chunk(FieldId field_id) const override; SpanBase chunk_data_impl(FieldId field_id, int64_t chunk_id) const override; diff --git a/internal/core/src/segcore/SegmentInterface.cpp b/internal/core/src/segcore/SegmentInterface.cpp index b7b8efbf2418d..ee31b16d5fab4 100644 --- a/internal/core/src/segcore/SegmentInterface.cpp +++ b/internal/core/src/segcore/SegmentInterface.cpp @@ -392,14 +392,6 @@ SegmentInternalInterface::LoadPrimitiveSkipIndex(milvus::FieldId field_id, field_id, chunk_id, data_type, chunk_data, valid_data, count); } -void -SegmentInternalInterface::LoadStringSkipIndex( - milvus::FieldId field_id, - int64_t chunk_id, - const milvus::VariableColumn& var_column) { - skip_index_.LoadString(field_id, chunk_id, var_column); -} - index::TextMatchIndex* SegmentInternalInterface::GetTextIndex(FieldId field_id) const { std::shared_lock lock(mutex_); diff --git a/internal/core/src/segcore/SegmentInterface.h b/internal/core/src/segcore/SegmentInterface.h index 2d4e02d2f5fd4..fe09f7c3afb79 100644 --- a/internal/core/src/segcore/SegmentInterface.h +++ b/internal/core/src/segcore/SegmentInterface.h @@ -21,6 +21,7 @@ #include "DeletedRecord.h" #include "FieldIndexing.h" +#include "common/Common.h" #include "common/Schema.h" #include "common/Span.h" #include "common/SystemProperty.h" @@ -179,13 +180,24 @@ class SegmentInternalInterface : public SegmentInterface { BufferView buffer = chunk_info.first; std::vector res; res.reserve(length); - char* pos = buffer.data_; - for (size_t j = 0; j < length; j++) { - uint32_t size; - size = *reinterpret_cast(pos); - pos += sizeof(uint32_t); - res.emplace_back(ViewType(pos, size)); - pos += size; + if (buffer.data_.index() == 1) { + char* pos = std::get<1>(buffer.data_).first; + for (size_t j = 0; j < length; j++) { + uint32_t size; + size = *reinterpret_cast(pos); + pos += sizeof(uint32_t); + res.emplace_back(ViewType(pos, size)); + pos += size; + } + } else { + auto elements = std::get<0>(buffer.data_); + for (auto& element : elements) { + for (int i = element.start_; i < element.end_; i++) { + res.emplace_back(ViewType( + element.data_ + element.offsets_[i], + element.offsets_[i + 1] - element.offsets_[i])); + } + } } return std::make_pair(res, chunk_info.second); } @@ -246,6 +258,10 @@ class SegmentInternalInterface : public SegmentInterface { set_field_avg_size(FieldId field_id, int64_t num_rows, int64_t field_size) override; + virtual bool + is_chunked() const { + return false; + } const SkipIndex& GetSkipIndex() const; @@ -258,10 +274,13 @@ class SegmentInternalInterface : public SegmentInterface { const bool* valid_data, int64_t count); + template void LoadStringSkipIndex(FieldId field_id, int64_t chunk_id, - const milvus::VariableColumn& var_column); + const T& var_column) { + skip_index_.LoadString(field_id, chunk_id, var_column); + } virtual DataType GetFieldDataType(FieldId fieldId) const = 0; @@ -291,6 +310,9 @@ class SegmentInternalInterface : public SegmentInterface { virtual int64_t num_chunk_data(FieldId field_id) const = 0; + virtual int64_t + num_rows_until_chunk(FieldId field_id, int64_t chunk_id) const = 0; + // bitset 1 means not hit. 0 means hit. virtual void mask_with_timestamps(BitsetTypeView& bitset_chunk, @@ -298,7 +320,13 @@ class SegmentInternalInterface : public SegmentInterface { // count of chunks virtual int64_t - num_chunk() const = 0; + num_chunk(FieldId field_id) const = 0; + + virtual int64_t + chunk_size(FieldId field_id, int64_t chunk_id) const = 0; + + virtual std::pair + get_chunk_by_offset(FieldId field_id, int64_t offset) const = 0; // element size in each chunk virtual int64_t @@ -384,7 +412,13 @@ class SegmentInternalInterface : public SegmentInterface { // internal API: return chunk_index in span, support scalar index only virtual const index::IndexBase* chunk_index_impl(FieldId field_id, int64_t chunk_id) const = 0; + virtual void + check_search(const query::Plan* plan) const = 0; + + virtual const ConcurrentVector& + get_timestamps() const = 0; + public: // calculate output[i] = Vec[seg_offsets[i]}, where Vec binds to system_type virtual void bulk_subscript(SystemFieldType system_type, @@ -405,12 +439,6 @@ class SegmentInternalInterface : public SegmentInterface { int64_t count, const std::vector& dynamic_field_names) const = 0; - virtual void - check_search(const query::Plan* plan) const = 0; - - virtual const ConcurrentVector& - get_timestamps() const = 0; - protected: mutable std::shared_mutex mutex_; // fieldID -> std::pair diff --git a/internal/core/src/segcore/SegmentSealed.h b/internal/core/src/segcore/SegmentSealed.h index a3c8cf951a5db..b84b3b9b94d5c 100644 --- a/internal/core/src/segcore/SegmentSealed.h +++ b/internal/core/src/segcore/SegmentSealed.h @@ -19,7 +19,6 @@ #include "pb/segcore.pb.h" #include "segcore/SegmentInterface.h" #include "segcore/Types.h" -#include "mmap/Column.h" namespace milvus::segcore { @@ -42,6 +41,12 @@ class SegmentSealed : public SegmentInternalInterface { AddFieldDataInfoForSealed(const LoadFieldDataInfo& field_data_info) = 0; virtual void WarmupChunkCache(const FieldId field_id, bool mmap_enabled) = 0; + virtual void + RemoveFieldFile(const FieldId field_id) = 0; + virtual void + ClearData() = 0; + virtual std::unique_ptr + get_vector(FieldId field_id, const int64_t* ids, int64_t count) const = 0; virtual void LoadTextIndex(FieldId field_id, diff --git a/internal/core/src/segcore/SegmentSealedImpl.cpp b/internal/core/src/segcore/SegmentSealedImpl.cpp index de7643751f73b..9fff1a9d09410 100644 --- a/internal/core/src/segcore/SegmentSealedImpl.cpp +++ b/internal/core/src/segcore/SegmentSealedImpl.cpp @@ -345,15 +345,15 @@ SegmentSealedImpl::LoadFieldData(FieldId field_id, FieldDataInfo& data) { : DEFAULT_MEM_VRCOL_BLOCK_SIZE; }; - std::shared_ptr column{}; + std::shared_ptr column{}; if (IsVariableDataType(data_type)) { int64_t field_data_size = 0; switch (data_type) { case milvus::DataType::STRING: case milvus::DataType::VARCHAR: { - auto var_column = - std::make_shared>( - num_rows, field_meta, get_block_size()); + auto var_column = std::make_shared< + SingleChunkVariableColumn>( + num_rows, field_meta, get_block_size()); FieldDataPtr field_data; while (data.channel->pop(field_data)) { var_column->Append(std::move(field_data)); @@ -366,9 +366,9 @@ SegmentSealedImpl::LoadFieldData(FieldId field_id, FieldDataInfo& data) { break; } case milvus::DataType::JSON: { - auto var_column = - std::make_shared>( - num_rows, field_meta, get_block_size()); + auto var_column = std::make_shared< + SingleChunkVariableColumn>( + num_rows, field_meta, get_block_size()); FieldDataPtr field_data; while (data.channel->pop(field_data)) { var_column->Append(std::move(field_data)); @@ -380,8 +380,8 @@ SegmentSealedImpl::LoadFieldData(FieldId field_id, FieldDataInfo& data) { break; } case milvus::DataType::ARRAY: { - auto var_column = - std::make_shared(num_rows, field_meta); + auto var_column = std::make_shared( + num_rows, field_meta); FieldDataPtr field_data; while (data.channel->pop(field_data)) { for (auto i = 0; i < field_data->get_num_rows(); i++) { @@ -407,7 +407,8 @@ SegmentSealedImpl::LoadFieldData(FieldId field_id, FieldDataInfo& data) { break; } case milvus::DataType::VECTOR_SPARSE_FLOAT: { - auto col = std::make_shared(field_meta); + auto col = std::make_shared( + field_meta); FieldDataPtr field_data; while (data.channel->pop(field_data)) { stats_.mem_size += field_data->Size(); @@ -426,7 +427,7 @@ SegmentSealedImpl::LoadFieldData(FieldId field_id, FieldDataInfo& data) { SegmentInternalInterface::set_field_avg_size( field_id, num_rows, field_data_size); } else { - column = std::make_shared(num_rows, field_meta); + column = std::make_shared(num_rows, field_meta); FieldDataPtr field_data; while (data.channel->pop(field_data)) { column->AppendBatch(field_data); @@ -516,24 +517,25 @@ SegmentSealedImpl::MapFieldData(const FieldId field_id, FieldDataInfo& data) { valid_data); } WriteFieldPadding(file, data_type, total_written); - std::shared_ptr column{}; + std::shared_ptr column{}; auto num_rows = data.row_count; if (IsVariableDataType(data_type)) { switch (data_type) { case milvus::DataType::STRING: case milvus::DataType::VARCHAR: { - auto var_column = std::make_shared>( - file, - total_written, - field_meta, - DEFAULT_MMAP_VRCOL_BLOCK_SIZE); + auto var_column = + std::make_shared>( + file, + total_written, + field_meta, + DEFAULT_MMAP_VRCOL_BLOCK_SIZE); var_column->Seal(std::move(indices)); column = std::move(var_column); break; } case milvus::DataType::JSON: { auto var_column = - std::make_shared>( + std::make_shared>( file, total_written, field_meta, @@ -543,7 +545,7 @@ SegmentSealedImpl::MapFieldData(const FieldId field_id, FieldDataInfo& data) { break; } case milvus::DataType::ARRAY: { - auto arr_column = std::make_shared( + auto arr_column = std::make_shared( file, total_written, field_meta); arr_column->Seal(std::move(indices), std::move(element_indices)); @@ -551,8 +553,9 @@ SegmentSealedImpl::MapFieldData(const FieldId field_id, FieldDataInfo& data) { break; } case milvus::DataType::VECTOR_SPARSE_FLOAT: { - auto sparse_column = std::make_shared( - file, total_written, field_meta, std::move(indices)); + auto sparse_column = + std::make_shared( + file, total_written, field_meta, std::move(indices)); column = std::move(sparse_column); break; } @@ -562,7 +565,8 @@ SegmentSealedImpl::MapFieldData(const FieldId field_id, FieldDataInfo& data) { } } } else { - column = std::make_shared(file, total_written, field_meta); + column = std::make_shared( + file, total_written, field_meta); } column->SetValidData(std::move(valid_data)); @@ -664,7 +668,7 @@ SegmentSealedImpl::num_chunk_data(FieldId field_id) const { } int64_t -SegmentSealedImpl::num_chunk() const { +SegmentSealedImpl::num_chunk(FieldId field_id) const { return 1; } @@ -790,9 +794,8 @@ SegmentSealedImpl::search_pk(const PkType& pk, Timestamp timestamp) const { case DataType::VARCHAR: { auto target = std::get(pk); // get varchar pks - auto var_column = - std::dynamic_pointer_cast>( - pk_column); + auto var_column = std::dynamic_pointer_cast< + SingleChunkVariableColumn>(pk_column); auto views = var_column->Views(); auto it = std::lower_bound(views.begin(), views.end(), target); for (; it != views.end() && *it == target; it++) { @@ -843,9 +846,8 @@ SegmentSealedImpl::search_pk(const PkType& pk, int64_t insert_barrier) const { case DataType::VARCHAR: { auto target = std::get(pk); // get varchar pks - auto var_column = - std::dynamic_pointer_cast>( - pk_column); + auto var_column = std::dynamic_pointer_cast< + SingleChunkVariableColumn>(pk_column); auto views = var_column->Views(); auto it = std::lower_bound(views.begin(), views.end(), target); while (it != views.end() && *it == target) { @@ -1057,17 +1059,24 @@ SegmentSealedImpl::GetFieldDataPath(FieldId field_id, int64_t offset) const { return {data_path, offset_in_binlog}; } -std::tuple> static ReadFromChunkCache( - const storage::ChunkCachePtr& cc, - const std::string& data_path, - const storage::MmapChunkDescriptorPtr& descriptor) { +std::tuple< + std::string, + std::shared_ptr< + SingleChunkColumnBase>> static ReadFromChunkCache(const storage:: + ChunkCachePtr& cc, + const std::string& + data_path, + const storage:: + MmapChunkDescriptorPtr& + descriptor) { // For mmap mode, field_meta is unused, so just construct a fake field meta. auto fm = FieldMeta(FieldName(""), FieldId(0), milvus::DataType::NONE, false); // TODO: add Load() interface for chunk cache when support retrieve_enable, make Read() raise error if cache miss auto column = cc->Read(data_path, descriptor, fm, true); cc->Prefetch(data_path); - return {data_path, column}; + return {data_path, + std::dynamic_pointer_cast(column)}; } std::unique_ptr @@ -1115,7 +1124,8 @@ SegmentSealedImpl::get_vector(FieldId field_id, auto id_to_data_path = std::unordered_map>{}; auto path_to_column = - std::unordered_map>{}; + std::unordered_map>{}; for (auto i = 0; i < count; i++) { const auto& tuple = GetFieldDataPath(field_id, ids[i]); id_to_data_path.emplace(ids[i], tuple); @@ -1124,8 +1134,8 @@ SegmentSealedImpl::get_vector(FieldId field_id, // read and prefetch auto& pool = ThreadPools::GetThreadPool(milvus::ThreadPoolPriority::HIGH); - std::vector< - std::future>>> + std::vector>>> futures; futures.reserve(path_to_column.size()); for (const auto& iter : path_to_column) { @@ -1152,7 +1162,7 @@ SegmentSealedImpl::get_vector(FieldId field_id, column->NumRows(), data_path); auto sparse_column = - std::dynamic_pointer_cast(column); + std::dynamic_pointer_cast(column); AssertInfo(sparse_column, "incorrect column created"); buf[i] = static_cast*>( static_cast( @@ -1344,11 +1354,11 @@ SegmentSealedImpl::bulk_subscript_impl(const void* src_raw, template void -SegmentSealedImpl::bulk_subscript_impl(const ColumnBase* column, +SegmentSealedImpl::bulk_subscript_impl(const SingleChunkColumnBase* column, const int64_t* seg_offsets, int64_t count, void* dst_raw) { - auto field = reinterpret_cast*>(column); + auto field = reinterpret_cast*>(column); auto dst = reinterpret_cast(dst_raw); for (int64_t i = 0; i < count; ++i) { auto offset = seg_offsets[i]; @@ -1359,11 +1369,11 @@ SegmentSealedImpl::bulk_subscript_impl(const ColumnBase* column, template void SegmentSealedImpl::bulk_subscript_ptr_impl( - const ColumnBase* column, + const SingleChunkColumnBase* column, const int64_t* seg_offsets, int64_t count, google::protobuf::RepeatedPtrField* dst) { - auto field = reinterpret_cast*>(column); + auto field = reinterpret_cast*>(column); for (int64_t i = 0; i < count; ++i) { auto offset = seg_offsets[i]; dst->at(i) = std::move(T(field->RawAt(offset))); @@ -1373,11 +1383,11 @@ SegmentSealedImpl::bulk_subscript_ptr_impl( template void SegmentSealedImpl::bulk_subscript_array_impl( - const ColumnBase* column, + const SingleChunkColumnBase* column, const int64_t* seg_offsets, int64_t count, google::protobuf::RepeatedPtrField* dst) { - auto field = reinterpret_cast(column); + auto field = reinterpret_cast(column); for (int64_t i = 0; i < count; ++i) { auto offset = seg_offsets[i]; dst->at(i) = std::move(field->RawAt(offset)); @@ -1630,7 +1640,7 @@ SegmentSealedImpl::bulk_subscript(FieldId field_id, if (HasIndex(field_id)) { // if field has load scalar index, reverse raw data from index if (!IsVectorDataType(field_meta.get_data_type())) { - AssertInfo(num_chunk() == 1, + AssertInfo(num_chunk(field_id) == 1, "num chunk not equal to 1 for sealed segment"); auto index = chunk_index_impl(field_id, 0); if (index->HasRawData()) { @@ -1669,7 +1679,8 @@ SegmentSealedImpl::bulk_subscript( } } auto dst = ret->mutable_scalars()->mutable_json_data()->mutable_data(); - auto field = reinterpret_cast*>(column.get()); + auto field = + reinterpret_cast*>(column.get()); for (int64_t i = 0; i < count; ++i) { auto offset = seg_offsets[i]; dst->at(i) = ExtractSubJson(std::string(field->RawAt(offset)), @@ -1965,14 +1976,16 @@ SegmentSealedImpl::generate_interim_index(const FieldId field_id) { if (row_count < field_binlog_config->GetBuildThreshold()) { return false; } - std::shared_ptr vec_data{}; + std::shared_ptr vec_data{}; { std::shared_lock lck(mutex_); vec_data = fields_.at(field_id); } - auto dim = is_sparse - ? dynamic_cast(vec_data.get())->Dim() - : field_meta.get_dim(); + auto dim = + is_sparse + ? dynamic_cast(vec_data.get()) + ->Dim() + : field_meta.get_dim(); auto build_config = field_binlog_config->GetBuildBaseParams(); build_config[knowhere::meta::DIM] = std::to_string(dim); @@ -2049,9 +2062,8 @@ SegmentSealedImpl::CreateTextIndex(FieldId field_id) { // build auto iter = fields_.find(field_id); if (iter != fields_.end()) { - auto column = - std::dynamic_pointer_cast>( - iter->second); + auto column = std::dynamic_pointer_cast< + SingleChunkVariableColumn>(iter->second); AssertInfo( column != nullptr, "failed to create text index, field is not of text type: {}", diff --git a/internal/core/src/segcore/SegmentSealedImpl.h b/internal/core/src/segcore/SegmentSealedImpl.h index 4e44a57e472f5..1c07c1047a7e1 100644 --- a/internal/core/src/segcore/SegmentSealedImpl.h +++ b/internal/core/src/segcore/SegmentSealedImpl.h @@ -31,6 +31,7 @@ #include "google/protobuf/message_lite.h" #include "mmap/Column.h" #include "index/ScalarIndex.h" +#include "segcore/ChunkedSegmentSealedImpl.h" #include "sys/mman.h" #include "common/Types.h" #include "common/IndexMeta.h" @@ -127,7 +128,9 @@ class SegmentSealedImpl : public SegmentSealed { Timestamp query_timestamp) const; std::unique_ptr - get_vector(FieldId field_id, const int64_t* ids, int64_t count) const; + get_vector(FieldId field_id, + const int64_t* ids, + int64_t count) const override; bool is_nullable(FieldId field_id) const override { @@ -147,12 +150,30 @@ class SegmentSealedImpl : public SegmentSealed { num_chunk_data(FieldId field_id) const override; int64_t - num_chunk() const override; + num_chunk(FieldId field_id) const override; // return size_per_chunk for each chunk, renaming against confusion int64_t size_per_chunk() const override; + int64_t + chunk_size(FieldId field_id, int64_t chunk_id) const override { + PanicInfo(ErrorCode::Unsupported, "Not implemented"); + } + bool + is_chunked() const override { + return false; + } + + std::pair + get_chunk_by_offset(FieldId field_id, int64_t offset) const override { + PanicInfo(ErrorCode::Unsupported, "Not implemented"); + } + + int64_t + num_rows_until_chunk(FieldId field_id, int64_t chunk_id) const override { + PanicInfo(ErrorCode::Unsupported, "Not implemented"); + } std::string debug() const override; @@ -231,21 +252,21 @@ class SegmentSealedImpl : public SegmentSealed { template static void - bulk_subscript_impl(const ColumnBase* field, + bulk_subscript_impl(const SingleChunkColumnBase* field, const int64_t* seg_offsets, int64_t count, void* dst_raw); template static void - bulk_subscript_ptr_impl(const ColumnBase* field, + bulk_subscript_ptr_impl(const SingleChunkColumnBase* field, const int64_t* seg_offsets, int64_t count, google::protobuf::RepeatedPtrField* dst_raw); template static void - bulk_subscript_array_impl(const ColumnBase* column, + bulk_subscript_array_impl(const SingleChunkColumnBase* column, const int64_t* seg_offsets, int64_t count, google::protobuf::RepeatedPtrField* dst); @@ -348,7 +369,7 @@ class SegmentSealedImpl : public SegmentSealed { SchemaPtr schema_; int64_t id_; - std::unordered_map> fields_; + std::unordered_map> fields_; std::unordered_set mmap_fields_; // only useful in binlog @@ -374,13 +395,24 @@ CreateSealedSegment( int64_t segment_id = -1, const SegcoreConfig& segcore_config = SegcoreConfig::default_config(), bool TEST_skip_index_for_retrieve = false, - bool is_sorted_by_pk = false) { - return std::make_unique(schema, - index_meta, - segcore_config, - segment_id, - TEST_skip_index_for_retrieve, - is_sorted_by_pk); + bool is_sorted_by_pk = false, + bool is_multi_chunk = false) { + if (!is_multi_chunk) { + return std::make_unique(schema, + index_meta, + segcore_config, + segment_id, + TEST_skip_index_for_retrieve, + is_sorted_by_pk); + } else { + return std::make_unique( + schema, + index_meta, + segcore_config, + segment_id, + TEST_skip_index_for_retrieve, + is_sorted_by_pk); + } } -} // namespace milvus::segcore +} // namespace milvus::segcore \ No newline at end of file diff --git a/internal/core/src/segcore/Utils.cpp b/internal/core/src/segcore/Utils.cpp index a9ff746c2ae98..e0bd00007b461 100644 --- a/internal/core/src/segcore/Utils.cpp +++ b/internal/core/src/segcore/Utils.cpp @@ -10,6 +10,7 @@ // or implied. See the License for the specific language governing permissions and limitations under the License #include "segcore/Utils.h" +#include #include #include @@ -22,6 +23,7 @@ #include "index/ScalarIndex.h" #include "mmap/Utils.h" #include "log/Log.h" +#include "storage/DataCodec.h" #include "storage/RemoteChunkManagerSingleton.h" #include "storage/ThreadPools.h" #include "storage/Util.h" @@ -783,6 +785,42 @@ ReverseDataFromIndex(const index::IndexBase* index, // init segcore storage config first, and create default remote chunk manager // segcore use default remote chunk manager to load data from minio/s3 +void +LoadArrowReaderFromRemote(const std::vector& remote_files, + std::shared_ptr channel) { + try { + auto rcm = storage::RemoteChunkManagerSingleton::GetInstance() + .GetRemoteChunkManager(); + auto& pool = ThreadPools::GetThreadPool(ThreadPoolPriority::HIGH); + + std::vector>> + futures; + futures.reserve(remote_files.size()); + for (const auto& file : remote_files) { + auto future = pool.Submit([&]() { + auto fileSize = rcm->Size(file); + auto buf = std::shared_ptr(new uint8_t[fileSize]); + rcm->Read(file, buf.get(), fileSize); + auto result = + storage::DeserializeFileData(buf, fileSize, false); + result->SetData(buf); + return result->GetReader(); + }); + futures.emplace_back(std::move(future)); + } + + for (auto& future : futures) { + auto field_data = future.get(); + channel->push(field_data); + } + + channel->close(); + } catch (std::exception& e) { + LOG_INFO("failed to load data from remote: {}", e.what()); + channel->close(std::current_exception()); + } +} + void LoadFieldDatasFromRemote(const std::vector& remote_files, FieldDataChannelPtr channel) { @@ -815,7 +853,6 @@ LoadFieldDatasFromRemote(const std::vector& remote_files, channel->close(std::current_exception()); } } - int64_t upper_bound(const ConcurrentVector& timestamps, int64_t first, diff --git a/internal/core/src/segcore/Utils.h b/internal/core/src/segcore/Utils.h index c32210d660dae..226e0da6441f0 100644 --- a/internal/core/src/segcore/Utils.h +++ b/internal/core/src/segcore/Utils.h @@ -184,10 +184,13 @@ ReverseDataFromIndex(const index::IndexBase* index, int64_t count, const FieldMeta& field_meta); +void +LoadArrowReaderFromRemote(const std::vector& remote_files, + std::shared_ptr channel); + void LoadFieldDatasFromRemote(const std::vector& remote_files, FieldDataChannelPtr channel); - /** * Returns an index pointing to the first element in the range [first, last) such that `value < element` is true * (i.e. that is strictly greater than value), or last if no such element is found. diff --git a/internal/core/src/segcore/segment_c.cpp b/internal/core/src/segcore/segment_c.cpp index 0baa75345dbd2..fd7180d1ef184 100644 --- a/internal/core/src/segcore/segment_c.cpp +++ b/internal/core/src/segcore/segment_c.cpp @@ -26,9 +26,11 @@ #include "log/Log.h" #include "mmap/Types.h" #include "segcore/Collection.h" +#include "segcore/SegcoreConfig.h" #include "segcore/SegmentGrowingImpl.h" #include "segcore/SegmentSealedImpl.h" #include "segcore/Utils.h" +#include "storage/Event.h" #include "storage/Util.h" #include "futures/Future.h" #include "futures/Executor.h" @@ -59,8 +61,20 @@ NewSegment(CCollection collection, segment_id, milvus::segcore::SegcoreConfig::default_config(), false, - is_sorted_by_pk); + is_sorted_by_pk, + false); break; + case ChunkedSealed: + segment = milvus::segcore::CreateSealedSegment( + col->get_schema(), + col->get_index_meta(), + segment_id, + milvus::segcore::SegcoreConfig::default_config(), + false, + is_sorted_by_pk, + true); + break; + default: PanicInfo(milvus::UnexpectedError, "invalid segment type: {}", @@ -82,7 +96,7 @@ DeleteSegment(CSegmentInterface c_segment) { void ClearSegmentData(CSegmentInterface c_segment) { - auto s = static_cast(c_segment); + auto s = static_cast(c_segment); s->ClearData(); } @@ -549,8 +563,7 @@ WarmupChunkCache(CSegmentInterface c_segment, void RemoveFieldFile(CSegmentInterface c_segment, int64_t field_id) { - auto segment = - reinterpret_cast(c_segment); + auto segment = reinterpret_cast(c_segment); segment->RemoveFieldFile(milvus::FieldId(field_id)); } diff --git a/internal/core/src/storage/ChunkCache.cpp b/internal/core/src/storage/ChunkCache.cpp index 4b85011a578a2..3fd68868d6ddb 100644 --- a/internal/core/src/storage/ChunkCache.cpp +++ b/internal/core/src/storage/ChunkCache.cpp @@ -18,9 +18,97 @@ #include #include "ChunkCache.h" +#include "common/ChunkWriter.h" +#include "common/FieldMeta.h" #include "common/Types.h" +#include "log/Log.h" namespace milvus::storage { +std::shared_ptr +ChunkCache::Read(const std::string& filepath, + const MmapChunkDescriptorPtr& descriptor, + const FieldMeta& field_meta) { + // use rlock to get future + { + std::shared_lock lck(mutex_); + auto it = columns_.find(filepath); + if (it != columns_.end()) { + lck.unlock(); + auto result = it->second.second.get(); + AssertInfo(result, "unexpected null column, file={}", filepath); + return result; + } + } + + // lock for mutation + std::unique_lock lck(mutex_); + // double check no-futurn + auto it = columns_.find(filepath); + if (it != columns_.end()) { + lck.unlock(); + auto result = it->second.second.get(); + AssertInfo(result, "unexpected null column, file={}", filepath); + return result; + } + + std::promise> p; + std::shared_future> f = p.get_future(); + columns_.emplace(filepath, std::make_pair(std::move(p), f)); + lck.unlock(); + + // release lock and perform download and decode + // other thread request same path shall get the future. + bool allocate_success = false; + ErrorCode err_code = Success; + std::string err_msg = ""; + std::shared_ptr column; + try { + auto field_data = + DownloadAndDecodeRemoteFile(cm_.get(), filepath, false); + + auto chunk = create_chunk( + field_meta, field_meta.get_dim(), field_data->GetReader()->reader); + + auto data_type = field_meta.get_data_type(); + if (IsSparseFloatVectorDataType(data_type)) { + auto sparse_column = + std::make_shared(field_meta); + sparse_column->AddChunk(chunk); + column = std::move(sparse_column); + } else if (IsVariableDataType(data_type)) { + AssertInfo(false, + "TODO: unimplemented for variable data type: {}", + data_type); + } else { + std::vector> chunks{chunk}; + column = std::make_shared(chunks); + } + } catch (const SegcoreError& e) { + err_code = e.get_error_code(); + err_msg = fmt::format("failed to read for chunkCache, seg_core_err:{}", + e.what()); + } + std::unique_lock mmap_lck(mutex_); + + it = columns_.find(filepath); + if (it != columns_.end()) { + // check pair exists then set value + it->second.first.set_value(column); + if (allocate_success) { + AssertInfo(column, "unexpected null column, file={}", filepath); + } + } else { + PanicInfo(UnexpectedError, + "Wrong code, the thread to download for cache should get the " + "target entry"); + } + if (err_code != Success) { + columns_.erase(filepath); + throw SegcoreError(err_code, err_msg); + } + return column; +} + std::shared_ptr ChunkCache::Read(const std::string& filepath, const MmapChunkDescriptorPtr& descriptor, @@ -98,7 +186,8 @@ ChunkCache::Read(const std::string& filepath, } } else { PanicInfo(UnexpectedError, - "Wrong code, the thread to download for cache should get the " + "Wrong code, the thread to download for " + "cache should get the " "target entry"); } if (err_code != Success) { @@ -148,23 +237,25 @@ ChunkCache::ConvertToColumn(const FieldDataPtr& field_data, if (IsSparseFloatVectorDataType(data_type)) { if (mmap_enabled) { - column = std::make_shared(mcm_, descriptor); + column = std::make_shared(mcm_, + descriptor); } else { - column = std::make_shared(field_meta); + column = std::make_shared(field_meta); } } else if (IsVariableDataType(data_type)) { AssertInfo( false, "TODO: unimplemented for variable data type: {}", data_type); } else { if (mmap_enabled) { - column = std::make_shared(field_data->Size(), - data_type, - mcm_, - descriptor, - field_data->IsNullable()); + column = + std::make_shared(field_data->Size(), + data_type, + mcm_, + descriptor, + field_data->IsNullable()); } else { - column = std::make_shared(field_data->get_num_rows(), - field_meta); + column = std::make_shared( + field_data->get_num_rows(), field_meta); } } column->AppendBatch(field_data); diff --git a/internal/core/src/storage/ChunkCache.h b/internal/core/src/storage/ChunkCache.h index 0c03dcac633de..fecb8e5bac58c 100644 --- a/internal/core/src/storage/ChunkCache.h +++ b/internal/core/src/storage/ChunkCache.h @@ -17,8 +17,9 @@ #pragma once #include #include +#include "common/FieldMeta.h" #include "storage/MmapChunkManager.h" -#include "mmap/Column.h" +#include "mmap/ChunkedColumn.h" namespace milvus::storage { @@ -44,6 +45,11 @@ class ChunkCache { ~ChunkCache() = default; public: + std::shared_ptr + Read(const std::string& filepath, + const MmapChunkDescriptorPtr& descriptor, + const FieldMeta& field_meta); + std::shared_ptr Read(const std::string& filepath, const MmapChunkDescriptorPtr& descriptor, @@ -58,6 +64,9 @@ class ChunkCache { Prefetch(const std::string& filepath); private: + std::string + CachePath(const std::string& filepath); + std::shared_ptr ConvertToColumn(const FieldDataPtr& field_data, const MmapChunkDescriptorPtr& descriptor, diff --git a/internal/core/src/storage/DataCodec.cpp b/internal/core/src/storage/DataCodec.cpp index 96f0aeac73570..5035a07cd9d14 100644 --- a/internal/core/src/storage/DataCodec.cpp +++ b/internal/core/src/storage/DataCodec.cpp @@ -27,7 +27,7 @@ namespace milvus::storage { // deserialize remote insert and index file std::unique_ptr -DeserializeRemoteFileData(BinlogReaderPtr reader) { +DeserializeRemoteFileData(BinlogReaderPtr reader, bool is_field_data) { DescriptorEvent descriptor_event(reader); DataType data_type = DataType(descriptor_event.event_data.fix_part.data_type); @@ -45,10 +45,17 @@ DeserializeRemoteFileData(BinlogReaderPtr reader) { case EventType::InsertEvent: { auto event_data_length = header.event_length_ - GetEventHeaderSize(header); - auto insert_event_data = - InsertEventData(reader, event_data_length, data_type, nullable); - auto insert_data = - std::make_unique(insert_event_data.field_data); + auto insert_event_data = InsertEventData( + reader, event_data_length, data_type, nullable, is_field_data); + + std::unique_ptr insert_data; + if (is_field_data) { + insert_data = + std::make_unique(insert_event_data.field_data); + } else { + insert_data = std::make_unique( + insert_event_data.payload_reader); + } insert_data->SetFieldDataMeta(data_meta); insert_data->SetTimestamps(insert_event_data.start_timestamp, insert_event_data.end_timestamp); @@ -105,13 +112,14 @@ DeserializeLocalFileData(BinlogReaderPtr reader) { std::unique_ptr DeserializeFileData(const std::shared_ptr input_data, - int64_t length) { + int64_t length, + bool is_field_data) { auto binlog_reader = std::make_shared(input_data, length); auto medium_type = ReadMediumType(binlog_reader); std::unique_ptr res; switch (medium_type) { case StorageType::Remote: { - res = DeserializeRemoteFileData(binlog_reader); + res = DeserializeRemoteFileData(binlog_reader, is_field_data); break; } case StorageType::LocalDisk: { diff --git a/internal/core/src/storage/DataCodec.h b/internal/core/src/storage/DataCodec.h index 74fe0a65c4c4c..51d11a9db5de6 100644 --- a/internal/core/src/storage/DataCodec.h +++ b/internal/core/src/storage/DataCodec.h @@ -16,11 +16,14 @@ #pragma once +#include +#include #include #include #include #include "common/FieldData.h" +#include "storage/PayloadReader.h" #include "storage/Types.h" #include "storage/PayloadStream.h" #include "storage/BinlogReader.h" @@ -33,6 +36,10 @@ class DataCodec { : field_data_(std::move(data)), codec_type_(type) { } + explicit DataCodec(std::shared_ptr reader, CodecType type) + : payload_reader_(reader), codec_type_(type) { + } + virtual ~DataCodec() = default; // Serialized data can be written directly to remote or local disk @@ -69,18 +76,36 @@ class DataCodec { return field_data_; } + virtual std::shared_ptr + GetReader() { + auto ret = std::make_shared(); + ret->reader = payload_reader_->get_reader(); + ret->arrow_reader = payload_reader_->get_file_reader(); + ret->file_data = data_; + return ret; + } + + void + SetData(std::shared_ptr data) { + data_ = data; + } + protected: CodecType codec_type_; std::pair time_range_; FieldDataPtr field_data_; + std::shared_ptr payload_reader_; + std::shared_ptr data_; }; // Deserialize the data stream of the file obtained from remote or local std::unique_ptr -DeserializeFileData(const std::shared_ptr input, int64_t length); +DeserializeFileData(const std::shared_ptr input, + int64_t length, + bool is_field_data = true); std::unique_ptr -DeserializeRemoteFileData(BinlogReaderPtr reader); +DeserializeRemoteFileData(BinlogReaderPtr reader, bool is_field_data); std::unique_ptr DeserializeLocalFileData(BinlogReaderPtr reader); diff --git a/internal/core/src/storage/Event.cpp b/internal/core/src/storage/Event.cpp index 607191ab010f0..b76657de3fd12 100644 --- a/internal/core/src/storage/Event.cpp +++ b/internal/core/src/storage/Event.cpp @@ -210,7 +210,8 @@ DescriptorEventData::Serialize() { BaseEventData::BaseEventData(BinlogReaderPtr reader, int event_length, DataType data_type, - bool nullable) { + bool nullable, + bool is_field_data) { auto ast = reader->Read(sizeof(start_timestamp), &start_timestamp); AssertInfo(ast.ok(), "read start timestamp failed"); ast = reader->Read(sizeof(end_timestamp), &end_timestamp); @@ -220,9 +221,11 @@ BaseEventData::BaseEventData(BinlogReaderPtr reader, event_length - sizeof(start_timestamp) - sizeof(end_timestamp); auto res = reader->Read(payload_length); AssertInfo(res.first.ok(), "read payload failed"); - auto payload_reader = std::make_shared( - res.second.get(), payload_length, data_type, nullable); - field_data = payload_reader->get_field_data(); + payload_reader = std::make_shared( + res.second.get(), payload_length, data_type, nullable, is_field_data); + if (is_field_data) { + field_data = payload_reader->get_field_data(); + } } std::vector diff --git a/internal/core/src/storage/Event.h b/internal/core/src/storage/Event.h index b974331394f9c..b87f8117f5b93 100644 --- a/internal/core/src/storage/Event.h +++ b/internal/core/src/storage/Event.h @@ -24,6 +24,7 @@ #include "common/FieldData.h" #include "common/Types.h" +#include "storage/PayloadReader.h" #include "storage/Types.h" #include "storage/BinlogReader.h" @@ -76,12 +77,14 @@ struct BaseEventData { Timestamp start_timestamp; Timestamp end_timestamp; FieldDataPtr field_data; + std::shared_ptr payload_reader; BaseEventData() = default; explicit BaseEventData(BinlogReaderPtr reader, int event_length, DataType data_type, - bool nullable); + bool nullable, + bool is_field_data = true); std::vector Serialize(); diff --git a/internal/core/src/storage/InsertData.h b/internal/core/src/storage/InsertData.h index eaccee1fe4802..92c906693b91d 100644 --- a/internal/core/src/storage/InsertData.h +++ b/internal/core/src/storage/InsertData.h @@ -20,6 +20,7 @@ #include #include "storage/DataCodec.h" +#include "storage/PayloadReader.h" namespace milvus::storage { @@ -29,6 +30,10 @@ class InsertData : public DataCodec { : DataCodec(data, CodecType::InsertDataType) { } + explicit InsertData(std::shared_ptr payload_reader) + : DataCodec(payload_reader, CodecType::InsertDataType) { + } + std::vector Serialize(StorageType medium) override; diff --git a/internal/core/src/storage/PayloadReader.cpp b/internal/core/src/storage/PayloadReader.cpp index b7fe5117edf8f..4d38ac69bfbe2 100644 --- a/internal/core/src/storage/PayloadReader.cpp +++ b/internal/core/src/storage/PayloadReader.cpp @@ -28,14 +28,16 @@ namespace milvus::storage { PayloadReader::PayloadReader(const uint8_t* data, int length, DataType data_type, - bool nullable) + bool nullable, + bool is_field_data) : column_type_(data_type), nullable_(nullable) { auto input = std::make_shared(data, length); - init(input); + init(input, is_field_data); } void -PayloadReader::init(std::shared_ptr input) { +PayloadReader::init(std::shared_ptr input, + bool is_field_data) { arrow::MemoryPool* pool = arrow::default_memory_pool(); // Configure general Parquet reader settings @@ -73,17 +75,21 @@ PayloadReader::init(std::shared_ptr input) { st = arrow_reader->GetRecordBatchReader(&rb_reader); AssertInfo(st.ok(), "get record batch reader"); - field_data_ = - CreateFieldData(column_type_, nullable_, dim_, total_num_rows); - for (arrow::Result> maybe_batch : - *rb_reader) { - AssertInfo(maybe_batch.ok(), "get batch record success"); - auto array = maybe_batch.ValueOrDie()->column(column_index); - // to read - field_data_->FillFieldData(array); + if (is_field_data) { + field_data_ = + CreateFieldData(column_type_, nullable_, dim_, total_num_rows); + for (arrow::Result> maybe_batch : + *rb_reader) { + AssertInfo(maybe_batch.ok(), "get batch record success"); + auto array = maybe_batch.ValueOrDie()->column(column_index); + // to read + field_data_->FillFieldData(array); + } + AssertInfo(field_data_->IsFull(), "field data hasn't been filled done"); + } else { + arrow_reader_ = std::move(arrow_reader); + record_batch_reader_ = std::move(rb_reader); } - AssertInfo(field_data_->IsFull(), "field data hasn't been filled done"); - // LOG_INFO("Peak arrow memory pool size {}", pool)->max_memory(); } } // namespace milvus::storage diff --git a/internal/core/src/storage/PayloadReader.h b/internal/core/src/storage/PayloadReader.h index 1e75dcd8cb2d2..214ac4c907d90 100644 --- a/internal/core/src/storage/PayloadReader.h +++ b/internal/core/src/storage/PayloadReader.h @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include "common/FieldData.h" @@ -29,23 +30,37 @@ class PayloadReader { explicit PayloadReader(const uint8_t* data, int length, DataType data_type, - bool nullable_); + bool nullable, + bool is_field_data = true); ~PayloadReader() = default; void - init(std::shared_ptr buffer); + init(std::shared_ptr buffer, bool is_field_data); const FieldDataPtr get_field_data() const { return field_data_; } + std::shared_ptr + get_reader() { + return record_batch_reader_; + } + + std::shared_ptr + get_file_reader() { + return arrow_reader_; + } + private: DataType column_type_; int dim_; bool nullable_; FieldDataPtr field_data_; + + std::shared_ptr arrow_reader_; + std::shared_ptr record_batch_reader_; }; } // namespace milvus::storage diff --git a/internal/core/src/storage/Util.cpp b/internal/core/src/storage/Util.cpp index 4efdd45d8cc0e..0ccf13b45fe9d 100644 --- a/internal/core/src/storage/Util.cpp +++ b/internal/core/src/storage/Util.cpp @@ -539,12 +539,15 @@ GetSegmentRawDataPathPrefix(ChunkManagerPtr cm, int64_t segment_id) { std::unique_ptr DownloadAndDecodeRemoteFile(ChunkManager* chunk_manager, - const std::string& file) { + const std::string& file, + bool is_field_data) { auto fileSize = chunk_manager->Size(file); auto buf = std::shared_ptr(new uint8_t[fileSize]); chunk_manager->Read(file, buf.get(), fileSize); - return DeserializeFileData(buf, fileSize); + auto res = DeserializeFileData(buf, fileSize, is_field_data); + res->SetData(buf); + return res; } std::pair @@ -599,7 +602,7 @@ GetObjectData(ChunkManager* remote_chunk_manager, futures.reserve(remote_files.size()); for (auto& file : remote_files) { futures.emplace_back(pool.Submit( - DownloadAndDecodeRemoteFile, remote_chunk_manager, file)); + DownloadAndDecodeRemoteFile, remote_chunk_manager, file, true)); } return futures; } diff --git a/internal/core/src/storage/Util.h b/internal/core/src/storage/Util.h index 4a62096bb7370..b3a6a124fbe70 100644 --- a/internal/core/src/storage/Util.h +++ b/internal/core/src/storage/Util.h @@ -102,7 +102,8 @@ GetSegmentRawDataPathPrefix(ChunkManagerPtr cm, int64_t segment_id); std::unique_ptr DownloadAndDecodeRemoteFile(ChunkManager* chunk_manager, - const std::string& file); + const std::string& file, + bool is_field_data = true); std::pair EncodeAndUploadIndexSlice(ChunkManager* chunk_manager, diff --git a/internal/core/unittest/test_chunk.cpp b/internal/core/unittest/test_chunk.cpp index 543284d16b1ab..126f11cc4739e 100644 --- a/internal/core/unittest/test_chunk.cpp +++ b/internal/core/unittest/test_chunk.cpp @@ -9,17 +9,23 @@ // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express // or implied. See the License for the specific language governing permissions and limitations under the License +#include #include #include #include #include +#include #include +#include +#include "boost/filesystem/operations.hpp" +#include "boost/filesystem/path.hpp" #include "common/Chunk.h" #include "common/ChunkWriter.h" #include "common/EasyAssert.h" #include "common/FieldDataInterface.h" #include "common/FieldMeta.h" +#include "common/File.h" #include "common/Types.h" #include "storage/Event.h" #include "storage/Util.h" @@ -53,8 +59,7 @@ TEST(chunk, test_int64_field) { FieldMeta field_meta( FieldName("a"), milvus::FieldId(1), DataType::INT64, false); auto chunk = create_chunk(field_meta, 1, rb_reader); - auto span = - std::dynamic_pointer_cast>(chunk)->Span(); + auto span = std::dynamic_pointer_cast(chunk)->Span(); EXPECT_EQ(span.row_count(), data.size()); for (size_t i = 0; i < data.size(); ++i) { auto n = *(int64_t*)((char*)span.data() + i * span.element_sizeof()); @@ -92,7 +97,7 @@ TEST(chunk, test_variable_field) { auto chunk = create_chunk(field_meta, 1, rb_reader); auto views = std::dynamic_pointer_cast(chunk)->StringViews(); for (size_t i = 0; i < data.size(); ++i) { - EXPECT_EQ(views[i], data[i]); + EXPECT_EQ(views.first[i], data[i]); } } @@ -183,4 +188,68 @@ TEST(chunk, test_sparse_float) { EXPECT_EQ(v1[j].val, v2[j].val); } } +} + +class TempDir { + public: + TempDir() { + auto path = boost::filesystem::unique_path("%%%%_%%%%"); + auto abs_path = boost::filesystem::temp_directory_path() / path; + boost::filesystem::create_directory(abs_path); + dir_ = abs_path; + } + + ~TempDir() { + boost::filesystem::remove_all(dir_); + } + + std::string + dir() { + return dir_.string(); + } + + private: + boost::filesystem::path dir_; +}; + +TEST(chunk, multiple_chunk_mmap) { + TempDir temp; + std::string temp_dir = temp.dir(); + auto file = File::Open(temp_dir + "/multi_chunk_mmap", O_CREAT | O_RDWR); + + FixedVector data = {1, 2, 3, 4, 5}; + auto field_data = + milvus::storage::CreateFieldData(storage::DataType::INT64); + field_data->FillFieldData(data.data(), data.size()); + storage::InsertEventData event_data; + event_data.field_data = field_data; + auto ser_data = event_data.Serialize(); + auto buffer = std::make_shared( + ser_data.data() + 2 * sizeof(milvus::Timestamp), + ser_data.size() - 2 * sizeof(milvus::Timestamp)); + + parquet::arrow::FileReaderBuilder reader_builder; + auto s = reader_builder.Open(buffer); + EXPECT_TRUE(s.ok()); + std::unique_ptr arrow_reader; + s = reader_builder.Build(&arrow_reader); + EXPECT_TRUE(s.ok()); + + std::shared_ptr<::arrow::RecordBatchReader> rb_reader; + s = arrow_reader->GetRecordBatchReader(&rb_reader); + EXPECT_TRUE(s.ok()); + + FieldMeta field_meta( + FieldName("a"), milvus::FieldId(1), DataType::INT64, false); + int file_offset = 0; + auto page_size = sysconf(_SC_PAGESIZE); + auto chunk = create_chunk(field_meta, 1, file, file_offset, rb_reader); + EXPECT_TRUE(chunk->Size() % page_size == 0); + file_offset += chunk->Size(); + + std::shared_ptr<::arrow::RecordBatchReader> rb_reader2; + s = arrow_reader->GetRecordBatchReader(&rb_reader2); + EXPECT_TRUE(s.ok()); + auto chunk2 = create_chunk(field_meta, 1, file, file_offset, rb_reader2); + EXPECT_TRUE(chunk->Size() % page_size == 0); } \ No newline at end of file diff --git a/internal/core/unittest/test_sealed.cpp b/internal/core/unittest/test_sealed.cpp index 5372847808345..68aec85c752a3 100644 --- a/internal/core/unittest/test_sealed.cpp +++ b/internal/core/unittest/test_sealed.cpp @@ -508,7 +508,7 @@ TEST(Sealed, LoadFieldData) { vec_info.index_params["metric_type"] = knowhere::metric::L2; segment->LoadIndex(vec_info); - ASSERT_EQ(segment->num_chunk(), 1); + ASSERT_EQ(segment->num_chunk(FieldId(0)), 1); ASSERT_EQ(segment->num_chunk_index(double_id), 0); ASSERT_EQ(segment->num_chunk_index(str_id), 0); auto chunk_span1 = segment->chunk_data(counter_id, 0); @@ -671,7 +671,7 @@ TEST(Sealed, ClearData) { vec_info.index_params["metric_type"] = knowhere::metric::L2; segment->LoadIndex(vec_info); - ASSERT_EQ(segment->num_chunk(), 1); + ASSERT_EQ(segment->num_chunk(FieldId(0)), 1); ASSERT_EQ(segment->num_chunk_index(double_id), 0); ASSERT_EQ(segment->num_chunk_index(str_id), 0); auto chunk_span1 = segment->chunk_data(counter_id, 0); @@ -775,7 +775,7 @@ TEST(Sealed, LoadFieldDataMmap) { vec_info.index_params["metric_type"] = knowhere::metric::L2; segment->LoadIndex(vec_info); - ASSERT_EQ(segment->num_chunk(), 1); + ASSERT_EQ(segment->num_chunk(FieldId(0)), 1); ASSERT_EQ(segment->num_chunk_index(double_id), 0); ASSERT_EQ(segment->num_chunk_index(str_id), 0); auto chunk_span1 = segment->chunk_data(counter_id, 0); diff --git a/internal/core/unittest/test_span.cpp b/internal/core/unittest/test_span.cpp index f0cca40d0b858..0fbc8f566f8e7 100644 --- a/internal/core/unittest/test_span.cpp +++ b/internal/core/unittest/test_span.cpp @@ -46,7 +46,7 @@ TEST(Span, Naive) { auto float_ptr = dataset.get_col(float_vec_fid); auto nullable_data_ptr = dataset.get_col(nullable_fid); auto nullable_valid_data_ptr = dataset.get_col_valid(nullable_fid); - auto num_chunk = segment->num_chunk(); + auto num_chunk = segment->num_chunk(FieldId(0)); ASSERT_EQ(num_chunk, upper_div(N, size_per_chunk)); auto row_count = segment->get_row_count(); ASSERT_EQ(N, row_count); diff --git a/internal/querynodev2/segments/segment.go b/internal/querynodev2/segments/segment.go index 50a9a51ddcc93..78b4624353f46 100644 --- a/internal/querynodev2/segments/segment.go +++ b/internal/querynodev2/segments/segment.go @@ -298,11 +298,16 @@ func NewSegment(ctx context.Context, return nil, err } + multipleChunkEnable := paramtable.Get().QueryNodeCfg.MultipleChunkedEnable.GetAsBool() var cSegType C.SegmentType var locker *state.LoadStateLock switch segmentType { case SegmentTypeSealed: - cSegType = C.Sealed + if multipleChunkEnable { + cSegType = C.ChunkedSealed + } else { + cSegType = C.Sealed + } locker = state.NewLoadStateLock(state.LoadStateOnlyMeta) case SegmentTypeGrowing: locker = state.NewLoadStateLock(state.LoadStateDataLoaded) diff --git a/pkg/util/paramtable/component_param.go b/pkg/util/paramtable/component_param.go index 371f242d6785a..d122694753577 100644 --- a/pkg/util/paramtable/component_param.go +++ b/pkg/util/paramtable/component_param.go @@ -2336,6 +2336,7 @@ type queryNodeConfig struct { InterimIndexNProbe ParamItem `refreshable:"false"` InterimIndexMemExpandRate ParamItem `refreshable:"false"` InterimIndexBuildParallelRate ParamItem `refreshable:"false"` + MultipleChunkedEnable ParamItem `refreshable:"false"` KnowhereScoreConsistency ParamItem `refreshable:"false"` @@ -2546,6 +2547,15 @@ This defaults to true, indicating that Milvus creates temporary index for growin } p.InterimIndexBuildParallelRate.Init(base.mgr) + p.MultipleChunkedEnable = ParamItem{ + Key: "queryNode.segcore.multipleChunkedEnable", + Version: "2.0.0", + DefaultValue: "false", + Doc: "Enable multiple chunked search", + Export: true, + } + p.MultipleChunkedEnable.Init(base.mgr) + p.InterimIndexNProbe = ParamItem{ Key: "queryNode.segcore.interimIndex.nprobe", Version: "2.0.0",