Skip to content

Commit

Permalink
fix: add SearchOnSealed unit test and fix a bug (#37241)
Browse files Browse the repository at this point in the history
issue: #37244

---------

Signed-off-by: sunby <[email protected]>
  • Loading branch information
sunby committed Oct 30, 2024
1 parent ee6ce8c commit 552b10e
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 3 deletions.
1 change: 1 addition & 0 deletions internal/core/src/mmap/ChunkedColumn.h
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ class ChunkedColumnBase : public ColumnBase {

class ChunkedColumn : public ChunkedColumnBase {
public:
ChunkedColumn() = default;
// memory mode ctor
ChunkedColumn(const FieldMeta& field_meta) : ChunkedColumnBase(field_meta) {
}
Expand Down
11 changes: 8 additions & 3 deletions internal/core/src/query/SearchOnSealed.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,30 +130,35 @@ SearchOnSealed(const Schema& schema,
chunk_size);
bitset_ptr = reinterpret_cast<const uint8_t*>(bitset_data);
}
offset += chunk_size;
BitsetView bitset_view(bitset_ptr, chunk_size);

if (search_info.group_by_field_id_.has_value()) {
auto sub_qr = BruteForceSearchIterators(dataset,
vec_data,
row_count,
chunk_size,
search_info,
bitset_view,
data_type);
final_qr.merge(sub_qr);
} else {
auto sub_qr = BruteForceSearch(dataset,
vec_data,
row_count,
chunk_size,
search_info,
bitset_view,
data_type);
for (auto& o : sub_qr.mutable_seg_offsets()) {
if (o != -1) {
o += offset;
}
}
final_qr.merge(sub_qr);
}

if (!aligned) {
delete[] bitset_ptr;
}
offset += chunk_size;
}
if (search_info.group_by_field_id_.has_value()) {
result.AssembleChunkVectorIterators(
Expand Down
1 change: 1 addition & 0 deletions internal/core/unittest/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ set(MILVUS_TEST_FILES
test_timestamp_index.cpp
test_tracer.cpp
test_utils.cpp
test_chunked_segment.cpp
)

if ( INDEX_ENGINE STREQUAL "cardinal" )
Expand Down
110 changes: 110 additions & 0 deletions internal/core/unittest/test_chunked_segment.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License

#include <gtest/gtest.h>
#include <algorithm>
#include <cstdint>
#include "common/BitsetView.h"
#include "common/QueryInfo.h"
#include "common/Schema.h"
#include "knowhere/comp/index_param.h"
#include "mmap/ChunkedColumn.h"
#include "query/SearchOnSealed.h"
#include "test_utils/DataGen.h"
#include <vector>

struct DeferRelease {
using functype = std::function<void()>;
void
AddDefer(const functype& closure) {
closures.push_back(closure);
}

~DeferRelease() {
for (auto& closure : closures) {
closure();
}
}

std::vector<functype> closures;
};

using namespace milvus;
TEST(test_chunk_segment, TestSearchOnSealed) {
DeferRelease defer;

int dim = 16;
int chunk_num = 3;
int chunk_size = 100;
int total_row_count = chunk_num * chunk_size;
int bitset_size = (total_row_count + 7) / 8;
int chunk_bitset_size = (chunk_size + 7) / 8;

auto column = std::make_shared<ChunkedColumn>();
auto schema = std::make_shared<Schema>();
auto fakevec_id = schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, dim, knowhere::metric::COSINE);

for (int i = 0; i < chunk_num; i++) {
auto dataset = segcore::DataGen(schema, chunk_size);
auto data = dataset.get_col<float>(fakevec_id);
auto buf_size = chunk_bitset_size + 4 * data.size();

char* buf = new char[buf_size];
defer.AddDefer([buf]() { delete[] buf; });
memcpy(buf + chunk_bitset_size, data.data(), 4 * data.size());

auto chunk = std::make_shared<FixedWidthChunk>(
chunk_size, dim, buf, buf_size, 4, false);
column->AddChunk(chunk);
}

SearchInfo search_info;
auto search_conf = knowhere::Json{
{knowhere::meta::METRIC_TYPE, knowhere::metric::COSINE},
};
search_info.search_params_ = search_conf;
search_info.field_id_ = fakevec_id;
search_info.metric_type_ = knowhere::metric::COSINE;
// expect to return all rows
search_info.topk_ = total_row_count;

uint8_t* bitset_data = new uint8_t[bitset_size];
defer.AddDefer([bitset_data]() { delete[] bitset_data; });
std::fill(bitset_data, bitset_data + bitset_size, 0);
BitsetView bv(bitset_data, total_row_count);

auto query_ds = segcore::DataGen(schema, 1);
auto col_query_data = query_ds.get_col<float>(fakevec_id);
auto query_data = col_query_data.data();
SearchResult search_result;

query::SearchOnSealed(*schema,
column,
search_info,
query_data,
1,
chunk_size * chunk_num,
bv,
search_result);

std::set<int64_t> offsets;
for (auto& offset : search_result.seg_offsets_) {
if (offset != -1) {
offsets.insert(offset);
}
}
// check all rows are returned
ASSERT_EQ(total_row_count, offsets.size());
for (int i = 0; i < total_row_count; i++) {
ASSERT_TRUE(offsets.find(i) != offsets.end());
}
}

0 comments on commit 552b10e

Please sign in to comment.