Skip to content

Commit

Permalink
fix: to use the correct offset in span (#37780)
Browse files Browse the repository at this point in the history
#37734

Signed-off-by: lixinguo <[email protected]>
Co-authored-by: lixinguo <[email protected]>
  • Loading branch information
smellthemoon and lixinguo authored Nov 18, 2024
1 parent 97b7ebb commit 3d28d99
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 28 deletions.
2 changes: 1 addition & 1 deletion internal/core/src/mmap/ChunkVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ class ThreadSafeChunkVector : public ChunkVectorBase<Type> {
get_element_offset(int64_t index) override {
std::shared_lock<std::shared_mutex> lck(mutex_);
int64_t offset = 0;
for (int i = 0; i < index - 1; i++) {
for (int i = 0; i < index; i++) {
offset += vec_[i].size();
}
return offset;
Expand Down
2 changes: 1 addition & 1 deletion internal/core/unittest/test_span.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ TEST(Span, Naive) {
schema->AddDebugField("nullable", DataType::INT64, true);
schema->set_primary_field_id(i64_fid);

auto dataset = DataGen(schema, N);
auto dataset = DataGen(schema, N, 42, 0, 1, 10, false, true, true);
auto segment = CreateGrowingSegment(schema, empty_index_meta, -1);
segment->PreInsert(N);
segment->Insert(0,
Expand Down
58 changes: 32 additions & 26 deletions internal/core/unittest/test_utils/DataGen.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <cmath>
#include <google/protobuf/text_format.h>
#include <gtest/gtest.h>
#include <cstdlib>

#include "Constants.h"
#include "common/EasyAssert.h"
Expand Down Expand Up @@ -324,25 +325,30 @@ inline GeneratedData DataGen(SchemaPtr schema,
int repeat_count = 1,
int array_len = 10,
bool random_pk = false,
bool random_val = true) {
bool random_val = true,
bool random_valid = false) {
using std::vector;
std::default_random_engine random(seed);
std::normal_distribution<> distr(0, 1);
int offset = 0;

auto insert_data = std::make_unique<InsertRecordProto>();
auto insert_cols = [&insert_data](
auto& data, int64_t count, auto& field_meta) {
FixedVector<bool> valid_data(count);
if (field_meta.is_nullable()) {
for (int i = 0; i < count; ++i) {
valid_data[i] = i % 2 == 0 ? true : false;
auto insert_cols =
[&insert_data](
auto& data, int64_t count, auto& field_meta, bool random_valid) {
FixedVector<bool> valid_data(count);
if (field_meta.is_nullable()) {
for (int i = 0; i < count; ++i) {
int x = i;
if (random_valid)
x = rand();
valid_data[i] = x % 2 == 0 ? true : false;
}
}
}
auto array = milvus::segcore::CreateDataArrayFrom(
data.data(), valid_data.data(), count, field_meta);
insert_data->mutable_fields_data()->AddAllocated(array.release());
};
auto array = milvus::segcore::CreateDataArrayFrom(
data.data(), valid_data.data(), count, field_meta);
insert_data->mutable_fields_data()->AddAllocated(array.release());
};

for (auto field_id : schema->get_field_ids()) {
auto field_meta = schema->operator[](field_id);
Expand Down Expand Up @@ -373,7 +379,7 @@ inline GeneratedData DataGen(SchemaPtr schema,
std::copy(
data.begin(), data.end(), final.begin() + dim * n);
}
insert_cols(final, N, field_meta);
insert_cols(final, N, field_meta, random_valid);
break;
}
case DataType::VECTOR_BINARY: {
Expand All @@ -383,7 +389,7 @@ inline GeneratedData DataGen(SchemaPtr schema,
for (auto& x : data) {
x = random();
}
insert_cols(data, N, field_meta);
insert_cols(data, N, field_meta, random_valid);
break;
}
case DataType::VECTOR_FLOAT16: {
Expand All @@ -392,7 +398,7 @@ inline GeneratedData DataGen(SchemaPtr schema,
for (auto& x : final) {
x = float16(distr(random) + offset);
}
insert_cols(final, N, field_meta);
insert_cols(final, N, field_meta, random_valid);
break;
}
case DataType::VECTOR_SPARSE_FLOAT: {
Expand All @@ -411,15 +417,15 @@ inline GeneratedData DataGen(SchemaPtr schema,
for (auto& x : final) {
x = bfloat16(distr(random) + offset);
}
insert_cols(final, N, field_meta);
insert_cols(final, N, field_meta, random_valid);
break;
}
case DataType::BOOL: {
FixedVector<bool> data(N);
for (int i = 0; i < N; ++i) {
data[i] = i % 2 == 0 ? true : false;
}
insert_cols(data, N, field_meta);
insert_cols(data, N, field_meta, random_valid);
break;
}
case DataType::INT64: {
Expand All @@ -432,7 +438,7 @@ inline GeneratedData DataGen(SchemaPtr schema,
data[i] = i / repeat_count;
}
}
insert_cols(data, N, field_meta);
insert_cols(data, N, field_meta, random_valid);
break;
}
case DataType::INT32: {
Expand All @@ -445,7 +451,7 @@ inline GeneratedData DataGen(SchemaPtr schema,
x = i / repeat_count;
data[i] = x;
}
insert_cols(data, N, field_meta);
insert_cols(data, N, field_meta, random_valid);
break;
}
case DataType::INT16: {
Expand All @@ -458,7 +464,7 @@ inline GeneratedData DataGen(SchemaPtr schema,
x = i / repeat_count;
data[i] = x;
}
insert_cols(data, N, field_meta);
insert_cols(data, N, field_meta, random_valid);
break;
}
case DataType::INT8: {
Expand All @@ -471,23 +477,23 @@ inline GeneratedData DataGen(SchemaPtr schema,
x = i / repeat_count;
data[i] = x;
}
insert_cols(data, N, field_meta);
insert_cols(data, N, field_meta, random_valid);
break;
}
case DataType::FLOAT: {
vector<float> data(N);
for (auto& x : data) {
x = distr(random);
}
insert_cols(data, N, field_meta);
insert_cols(data, N, field_meta, random_valid);
break;
}
case DataType::DOUBLE: {
vector<double> data(N);
for (auto& x : data) {
x = distr(random);
}
insert_cols(data, N, field_meta);
insert_cols(data, N, field_meta, random_valid);
break;
}
case DataType::VARCHAR: {
Expand All @@ -499,7 +505,7 @@ inline GeneratedData DataGen(SchemaPtr schema,
}
}
std::sort(data.begin(), data.end());
insert_cols(data, N, field_meta);
insert_cols(data, N, field_meta, random_valid);
break;
}
case DataType::JSON: {
Expand All @@ -513,7 +519,7 @@ inline GeneratedData DataGen(SchemaPtr schema,
"}";
data[i] = str;
}
insert_cols(data, N, field_meta);
insert_cols(data, N, field_meta, random_valid);
break;
}
case DataType::ARRAY: {
Expand Down Expand Up @@ -619,7 +625,7 @@ inline GeneratedData DataGen(SchemaPtr schema,
throw std::runtime_error("unsupported data type");
}
}
insert_cols(data, N, field_meta);
insert_cols(data, N, field_meta, random_valid);
break;
}
default: {
Expand Down

0 comments on commit 3d28d99

Please sign in to comment.