diff --git a/internal/core/src/common/Chunk.h b/internal/core/src/common/Chunk.h index 7cfaa7fad466e..11d60def58800 100644 --- a/internal/core/src/common/Chunk.h +++ b/internal/core/src/common/Chunk.h @@ -31,6 +31,7 @@ #include "common/Types.h" namespace milvus { constexpr uint64_t MMAP_STRING_PADDING = 1; +constexpr uint64_t MMAP_GEOMETRY_PADDING = 1; constexpr uint64_t MMAP_ARRAY_PADDING = 1; class Chunk { public: @@ -185,6 +186,7 @@ class StringChunk : public Chunk { }; using JSONChunk = StringChunk; +using GeometryChunk = StringChunk; class ArrayChunk : public Chunk { public: diff --git a/internal/core/src/common/ChunkWriter.cpp b/internal/core/src/common/ChunkWriter.cpp index 5542556abc067..083021ac3f139 100644 --- a/internal/core/src/common/ChunkWriter.cpp +++ b/internal/core/src/common/ChunkWriter.cpp @@ -19,6 +19,7 @@ #include "common/Chunk.h" #include "common/EasyAssert.h" #include "common/FieldDataInterface.h" +#include "common/Geometry.h" #include "common/Types.h" #include "common/VectorTrait.h" #include "simdjson/common_defs.h" @@ -156,6 +157,69 @@ JSONChunkWriter::finish() { return std::make_shared(row_nums_, data, size, nullable_); } +void +GeometryChunkWriter::write(std::shared_ptr data) { + auto size = 0; + std::vector wkb_strs; + std::vector> null_bitmaps; + for (auto batch : *data) { + auto data = batch.ValueOrDie()->column(0); + auto array = std::dynamic_pointer_cast(data); + for (int i = 0; i < array->length(); i++) { + auto str = array->GetView(i); + wkb_strs.emplace_back(str); + size += str.size(); + } + auto null_bitmap_n = (data->length() + 7) / 8; + null_bitmaps.emplace_back(data->null_bitmap_data(), null_bitmap_n); + size += null_bitmap_n; + row_nums_ += array->length(); + } + size += sizeof(uint64_t) * (row_nums_ + 1) + MMAP_GEOMETRY_PADDING; + if (file_) { + target_ = std::make_shared(*file_, file_offset_); + } else { + target_ = std::make_shared(size); + } + + // chunk layout: null bitmap, offset1, offset2, ..., offsetn, wkb1, wkb2, ..., wkbn, padding + // write null bitmaps + for (auto [data, size] : null_bitmaps) { + if (data == nullptr) { + std::vector null_bitmap(size, 0xff); + target_->write(null_bitmap.data(), size); + } else { + target_->write(data, size); + } + } + + int offset_num = row_nums_ + 1; + int offset_start_pos = target_->tell() + sizeof(uint64_t) * offset_num; + std::vector offsets; + + for (auto str : wkb_strs) { + offsets.push_back(offset_start_pos); + offset_start_pos += str.size(); + } + offsets.push_back(offset_start_pos); + + target_->write(offsets.data(), offsets.size() * sizeof(uint64_t)); + + for (auto str : wkb_strs) { + target_->write(str.data(), str.size()); + } +} + +std::shared_ptr +GeometryChunkWriter::finish() { + // write padding, maybe not needed anymore + // FIXME + char padding[MMAP_GEOMETRY_PADDING]; + target_->write(padding, MMAP_GEOMETRY_PADDING); + auto [data, size] = target_->get(); + return std::make_shared(row_nums_, data, size, nullable_); +} + void ArrayChunkWriter::write(std::shared_ptr data) { auto size = 0; @@ -383,6 +447,9 @@ create_chunk(const FieldMeta& field_meta, w = std::make_shared(nullable); break; } + case milvus::DataType::GEOMETRY: { + w = std::make_shared(nullable); + } case milvus::DataType::ARRAY: { w = std::make_shared( field_meta.get_element_type(), nullable); @@ -479,6 +546,11 @@ create_chunk(const FieldMeta& field_meta, w = std::make_shared(file, file_offset, nullable); break; } + case milvus::DataType::GEOMETRY: { + w = std::make_shared( + file, file_offset, nullable); + break; + } case milvus::DataType::ARRAY: { w = std::make_shared( field_meta.get_element_type(), file, file_offset, nullable); diff --git a/internal/core/src/common/ChunkWriter.h b/internal/core/src/common/ChunkWriter.h index c389b0e799096..04fc0079c7204 100644 --- a/internal/core/src/common/ChunkWriter.h +++ b/internal/core/src/common/ChunkWriter.h @@ -21,6 +21,7 @@ #include "common/Chunk.h" #include "common/EasyAssert.h" #include "common/FieldDataInterface.h" +#include "common/Geometry.h" namespace milvus { class ChunkWriterBase { @@ -180,6 +181,16 @@ class JSONChunkWriter : public ChunkWriterBase { finish() override; }; +class GeometryChunkWriter : public ChunkWriterBase { + public: + using ChunkWriterBase::ChunkWriterBase; + void + write(std::shared_ptr data) override; + + std::shared_ptr + finish() override; +}; + class ArrayChunkWriter : public ChunkWriterBase { public: ArrayChunkWriter(const milvus::DataType element_type, bool nullable)