From 552b10e2797e11312b16955df0795cf237a2ff35 Mon Sep 17 00:00:00 2001 From: Bingyi Sun Date: Wed, 30 Oct 2024 10:26:19 +0800 Subject: [PATCH] fix: add SearchOnSealed unit test and fix a bug (#37241) issue: https://github.com/milvus-io/milvus/issues/37244 --------- Signed-off-by: sunby --- internal/core/src/mmap/ChunkedColumn.h | 1 + internal/core/src/query/SearchOnSealed.cpp | 11 +- internal/core/unittest/CMakeLists.txt | 1 + .../core/unittest/test_chunked_segment.cpp | 110 ++++++++++++++++++ 4 files changed, 120 insertions(+), 3 deletions(-) create mode 100644 internal/core/unittest/test_chunked_segment.cpp diff --git a/internal/core/src/mmap/ChunkedColumn.h b/internal/core/src/mmap/ChunkedColumn.h index 09dbd2597cbb4..91a7bf230b3ca 100644 --- a/internal/core/src/mmap/ChunkedColumn.h +++ b/internal/core/src/mmap/ChunkedColumn.h @@ -211,6 +211,7 @@ class ChunkedColumnBase : public ColumnBase { class ChunkedColumn : public ChunkedColumnBase { public: + ChunkedColumn() = default; // memory mode ctor ChunkedColumn(const FieldMeta& field_meta) : ChunkedColumnBase(field_meta) { } diff --git a/internal/core/src/query/SearchOnSealed.cpp b/internal/core/src/query/SearchOnSealed.cpp index c519f480ca696..9b3a4df287599 100644 --- a/internal/core/src/query/SearchOnSealed.cpp +++ b/internal/core/src/query/SearchOnSealed.cpp @@ -130,13 +130,12 @@ SearchOnSealed(const Schema& schema, chunk_size); bitset_ptr = reinterpret_cast(bitset_data); } - offset += chunk_size; BitsetView bitset_view(bitset_ptr, chunk_size); if (search_info.group_by_field_id_.has_value()) { auto sub_qr = BruteForceSearchIterators(dataset, vec_data, - row_count, + chunk_size, search_info, bitset_view, data_type); @@ -144,16 +143,22 @@ SearchOnSealed(const Schema& schema, } else { auto sub_qr = BruteForceSearch(dataset, vec_data, - row_count, + chunk_size, search_info, bitset_view, data_type); + for (auto& o : sub_qr.mutable_seg_offsets()) { + if (o != -1) { + o += offset; + } + } final_qr.merge(sub_qr); } if (!aligned) { delete[] bitset_ptr; } + offset += chunk_size; } if (search_info.group_by_field_id_.has_value()) { result.AssembleChunkVectorIterators( diff --git a/internal/core/unittest/CMakeLists.txt b/internal/core/unittest/CMakeLists.txt index 3b4109b6c652a..67d97b83c3fff 100644 --- a/internal/core/unittest/CMakeLists.txt +++ b/internal/core/unittest/CMakeLists.txt @@ -85,6 +85,7 @@ set(MILVUS_TEST_FILES test_timestamp_index.cpp test_tracer.cpp test_utils.cpp + test_chunked_segment.cpp ) if ( INDEX_ENGINE STREQUAL "cardinal" ) diff --git a/internal/core/unittest/test_chunked_segment.cpp b/internal/core/unittest/test_chunked_segment.cpp new file mode 100644 index 0000000000000..b0b624b68a793 --- /dev/null +++ b/internal/core/unittest/test_chunked_segment.cpp @@ -0,0 +1,110 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + +#include +#include +#include +#include "common/BitsetView.h" +#include "common/QueryInfo.h" +#include "common/Schema.h" +#include "knowhere/comp/index_param.h" +#include "mmap/ChunkedColumn.h" +#include "query/SearchOnSealed.h" +#include "test_utils/DataGen.h" +#include + +struct DeferRelease { + using functype = std::function; + void + AddDefer(const functype& closure) { + closures.push_back(closure); + } + + ~DeferRelease() { + for (auto& closure : closures) { + closure(); + } + } + + std::vector closures; +}; + +using namespace milvus; +TEST(test_chunk_segment, TestSearchOnSealed) { + DeferRelease defer; + + int dim = 16; + int chunk_num = 3; + int chunk_size = 100; + int total_row_count = chunk_num * chunk_size; + int bitset_size = (total_row_count + 7) / 8; + int chunk_bitset_size = (chunk_size + 7) / 8; + + auto column = std::make_shared(); + auto schema = std::make_shared(); + auto fakevec_id = schema->AddDebugField( + "fakevec", DataType::VECTOR_FLOAT, dim, knowhere::metric::COSINE); + + for (int i = 0; i < chunk_num; i++) { + auto dataset = segcore::DataGen(schema, chunk_size); + auto data = dataset.get_col(fakevec_id); + auto buf_size = chunk_bitset_size + 4 * data.size(); + + char* buf = new char[buf_size]; + defer.AddDefer([buf]() { delete[] buf; }); + memcpy(buf + chunk_bitset_size, data.data(), 4 * data.size()); + + auto chunk = std::make_shared( + chunk_size, dim, buf, buf_size, 4, false); + column->AddChunk(chunk); + } + + SearchInfo search_info; + auto search_conf = knowhere::Json{ + {knowhere::meta::METRIC_TYPE, knowhere::metric::COSINE}, + }; + search_info.search_params_ = search_conf; + search_info.field_id_ = fakevec_id; + search_info.metric_type_ = knowhere::metric::COSINE; + // expect to return all rows + search_info.topk_ = total_row_count; + + uint8_t* bitset_data = new uint8_t[bitset_size]; + defer.AddDefer([bitset_data]() { delete[] bitset_data; }); + std::fill(bitset_data, bitset_data + bitset_size, 0); + BitsetView bv(bitset_data, total_row_count); + + auto query_ds = segcore::DataGen(schema, 1); + auto col_query_data = query_ds.get_col(fakevec_id); + auto query_data = col_query_data.data(); + SearchResult search_result; + + query::SearchOnSealed(*schema, + column, + search_info, + query_data, + 1, + chunk_size * chunk_num, + bv, + search_result); + + std::set offsets; + for (auto& offset : search_result.seg_offsets_) { + if (offset != -1) { + offsets.insert(offset); + } + } + // check all rows are returned + ASSERT_EQ(total_row_count, offsets.size()); + for (int i = 0; i < total_row_count; i++) { + ASSERT_TRUE(offsets.find(i) != offsets.end()); + } +}