Skip to content

Commit

Permalink
fix UT and format
Browse files Browse the repository at this point in the history
Signed-off-by: Patrick Weizhi Xu <[email protected]>
  • Loading branch information
PwzXxm committed Jun 21, 2024
1 parent 3aa1e3f commit 3049d07
Show file tree
Hide file tree
Showing 3 changed files with 138 additions and 62 deletions.
26 changes: 16 additions & 10 deletions internal/core/src/storage/DiskFileManagerImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -746,11 +746,11 @@ DiskFileManagerImpl::CacheOptFieldToDisk(
field_datas.emplace_back(field_data);
}
if (WriteOptFieldIvfData(field_type,
field_id,
local_chunk_manager,
local_data_path,
field_datas,
write_offset)) {
field_id,
local_chunk_manager,
local_data_path,
field_datas,
write_offset)) {
actual_field_ids.insert(field_id);
}
}
Expand All @@ -761,6 +761,9 @@ DiskFileManagerImpl::CacheOptFieldToDisk(
local_data_path,
actual_field_ids.size(),
write_offset);
if (actual_field_ids.empty()) {
return "";
}
}
return local_data_path;
}
Expand Down Expand Up @@ -824,11 +827,11 @@ DiskFileManagerImpl::CacheOptFieldToDisk(OptFieldT& fields_map) {
FetchRawData();
}
if (WriteOptFieldIvfData(field_type,
field_id,
local_chunk_manager,
local_data_path,
field_datas,
write_offset)) {
field_id,
local_chunk_manager,
local_data_path,
field_datas,
write_offset)) {
actual_field_ids.insert(field_id);
}
}
Expand All @@ -839,6 +842,9 @@ DiskFileManagerImpl::CacheOptFieldToDisk(OptFieldT& fields_map) {
local_data_path,
actual_field_ids.size(),
write_offset);
if (actual_field_ids.empty()) {
return "";
}
}

return local_data_path;
Expand Down
172 changes: 121 additions & 51 deletions internal/core/unittest/test_disk_file_manager_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
#include <arrow/type_fwd.h>
#include <gtest/gtest.h>
#include <cstdint>
#include <limits>
#include <memory>
#include <stdexcept>
#include <string>
#include <fstream>
#include <vector>
Expand Down Expand Up @@ -202,14 +204,12 @@ TEST_F(DiskAnnFileManagerTest, TestThreadPoolException) {
}
}

namespace {
const int64_t kOptFieldId = 123456;
const std::string kOptFieldName = "opt_field_name";
const DataType kOptFiledType = DataType::INT64;
const int64_t kOptFieldDataRange = 10000;
const std::string kOptFieldPath = "/tmp/diskann/opt_field/123123";
// const std::string kOptFieldPath = "/tmp/diskann/index_files/1000/index";
const size_t kEntityCnt = 1000 * 1000;
const DataType kOptFieldDataType = DataType::INT64;
const int64_t kOptFieldDataRange = 1000;
const std::string kOptFieldPath = "/tmp/diskann/opt_field/";
const size_t kEntityCnt = 1000 * 10;
const FieldDataMeta kOptVecFieldDataMeta = {1, 2, 3, 100};
using OffsetT = uint32_t;

Expand All @@ -225,25 +225,50 @@ CreateFileManager(const ChunkManagerPtr& cm)
storage::FileManagerContext(kOptVecFieldDataMeta, index_meta, cm));
}

template <typename T>
auto
PrepareRawFieldData() -> std::vector<int64_t> {
std::vector<int64_t> data(kEntityCnt);
int64_t field_val = 0;
PrepareRawFieldData(const int64_t opt_field_data_range) -> std::vector<T> {
if (opt_field_data_range > std::numeric_limits<T>::max()) {
throw std::runtime_error("field data range is too large: " +
std::to_string(opt_field_data_range));
}
std::vector<T> data(kEntityCnt);
T field_val = 0;
for (size_t i = 0; i < kEntityCnt; ++i) {
data[i] = field_val++;
if (field_val >= kOptFieldDataRange) {
if (field_val >= opt_field_data_range) {
field_val = 0;
}
}
return data;
}

template <>
auto
PrepareInsertData() -> std::string {
size_t sz = sizeof(int64_t) * kEntityCnt;
std::vector<int64_t> data = PrepareRawFieldData();
auto field_data =
storage::CreateFieldData(kOptFieldDataType, 1, kEntityCnt);
PrepareRawFieldData<std::string>(const int64_t opt_field_data_range)
-> std::vector<std::string> {
if (opt_field_data_range > std::numeric_limits<char>::max()) {
throw std::runtime_error("field data range is too large: " +
std::to_string(opt_field_data_range));
}
std::vector<std::string> data(kEntityCnt);
char field_val = 0;
for (size_t i = 0; i < kEntityCnt; ++i) {
data[i] = std::to_string(field_val);
field_val++;
if (field_val >= opt_field_data_range) {
field_val = 0;
}
}
return data;
}

template <DataType DT, typename NativeType>
auto
PrepareInsertData(const int64_t opt_field_data_range) -> std::string {
std::vector<NativeType> data =
PrepareRawFieldData<NativeType>(opt_field_data_range);
auto field_data = storage::CreateFieldData(DT, 1, kEntityCnt);
field_data->FillFieldData(data.data(), kEntityCnt);
storage::InsertData insert_data(field_data);
insert_data.SetFieldDataMeta(kOptVecFieldDataMeta);
Expand All @@ -253,16 +278,16 @@ PrepareInsertData() -> std::string {
auto chunk_manager =
storage::CreateChunkManager(get_default_local_storage_config());

std::string path = kOptFieldPath + "0";
std::string path = kOptFieldPath + std::to_string(kOptFieldId);
boost::filesystem::remove_all(path);
chunk_manager->Write(path, serialized_data.data(), serialized_data.size());
return path;
}

auto
PrepareInsertDataSpace()
PrepareInsertDataSpace(const int64_t opt_field_data_range)
-> std::pair<std::string, std::shared_ptr<milvus_storage::Space>> {
std::string path = kOptFieldPath + "1";
std::string path = kOptFieldPath + "space/" + std::to_string(kOptFieldId);
arrow::FieldVector arrow_fields{
arrow::field("pk", arrow::int64()),
arrow::field("ts", arrow::int64()),
Expand All @@ -281,7 +306,7 @@ PrepareInsertDataSpace()
milvus_storage::Options{schema});
EXPECT_TRUE(opt_space.has_value());
auto space = std::move(opt_space.value());
const auto data = PrepareRawFieldData();
const auto data = PrepareRawFieldData<int64_t>(opt_field_data_range);
arrow::Int64Builder pk_builder;
arrow::Int64Builder ts_builder;
arrow::NumericBuilder<arrow::Int64Type> scalar_builder;
Expand Down Expand Up @@ -315,18 +340,21 @@ PrepareInsertDataSpace()
return {path, std::move(space)};
}

template <DataType DT>
auto
PrepareOptionalField(const std::shared_ptr<DiskFileManagerImpl>& file_manager,
const std::string& insert_file_path) -> OptFieldT {
OptFieldT opt_field;
std::vector<std::string> insert_files;
insert_files.emplace_back(insert_file_path);
opt_field[kOptFieldId] = {kOptFieldName, kOptFiledType, insert_files};
opt_field[kOptFieldId] = {kOptFieldName, DT, insert_files};
return opt_field;
}

void
CheckOptFieldCorrectness(const std::string& local_file_path) {
CheckOptFieldCorrectness(
const std::string& local_file_path,
const int64_t opt_field_data_range = kOptFieldDataRange) {
std::ifstream ifs(local_file_path);
if (!ifs.is_open()) {
FAIL() << "open file failed: " << local_file_path << std::endl;
Expand All @@ -344,10 +372,10 @@ CheckOptFieldCorrectness(const std::string& local_file_path) {
EXPECT_EQ(field_id, kOptFieldId);
ifs.read(reinterpret_cast<char*>(&num_of_unique_field_data),
sizeof(num_of_unique_field_data));
EXPECT_EQ(num_of_unique_field_data, kOptFieldDataRange);
EXPECT_EQ(num_of_unique_field_data, opt_field_data_range);

uint32_t expected_single_category_offset_cnt =
kEntityCnt / kOptFieldDataRange;
kEntityCnt / opt_field_data_range;
uint32_t read_single_category_offset_cnt;
std::vector<OffsetT> single_category_offsets(
expected_single_category_offset_cnt);
Expand All @@ -364,54 +392,96 @@ CheckOptFieldCorrectness(const std::string& local_file_path) {
first_offset = single_category_offsets[0];
}
for (size_t j = 1; j < read_single_category_offset_cnt; ++j) {
ASSERT_EQ(single_category_offsets[j] % kOptFieldDataRange,
first_offset % kOptFieldDataRange);
ASSERT_EQ(single_category_offsets[j] % opt_field_data_range,
first_offset % opt_field_data_range);
}
}
}
} // namespace

TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskFieldEmpty) {
auto file_manager = CreateFileManager(cm_);
const auto& [insert_file_space_path, space] = PrepareInsertDataSpace();
OptFieldT opt_fields;
EXPECT_TRUE(file_manager->CacheOptFieldToDisk(opt_fields).empty());
EXPECT_TRUE(file_manager->CacheOptFieldToDisk(space, opt_fields).empty());
}
{
const auto& [insert_file_space_path, space] =
PrepareInsertDataSpace(kOptFieldDataRange);
OptFieldT opt_fields;
EXPECT_TRUE(file_manager->CacheOptFieldToDisk(opt_fields).empty());
EXPECT_TRUE(
file_manager->CacheOptFieldToDisk(space, opt_fields).empty());
}

TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskSpaceEmpty) {
auto file_manager = CreateFileManager(cm_);
auto opt_fileds = PrepareOptionalField(file_manager, "");
auto res = file_manager->CacheOptFieldToDisk(nullptr, opt_fileds);
EXPECT_TRUE(res.empty());
{
auto opt_fileds =
PrepareOptionalField<DataType::INT64>(file_manager, "");
auto res = file_manager->CacheOptFieldToDisk(nullptr, opt_fileds);
EXPECT_TRUE(res.empty());
}
}

TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskOptFieldMoreThanOne) {
auto file_manager = CreateFileManager(cm_);
const auto insert_file_path = PrepareInsertData();
const auto& [insert_file_space_path, space] = PrepareInsertDataSpace();

OptFieldT opt_fields = PrepareOptionalField(file_manager, insert_file_path);
const auto insert_file_path =
PrepareInsertData<DataType::INT64, int64_t>(kOptFieldDataRange);
const auto& [insert_file_space_path, space] =
PrepareInsertDataSpace(kOptFieldDataRange);
OptFieldT opt_fields =
PrepareOptionalField<DataType::INT64>(file_manager, insert_file_path);
opt_fields[kOptFieldId + 1] = {
kOptFieldName + "second", kOptFiledType, {insert_file_space_path}};
kOptFieldName + "second", DataType::INT64, {insert_file_space_path}};
EXPECT_THROW(file_manager->CacheOptFieldToDisk(opt_fields), SegcoreError);
EXPECT_THROW(file_manager->CacheOptFieldToDisk(space, opt_fields),
SegcoreError);
}

TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskCorrect) {
auto file_manager = CreateFileManager(cm_);
const auto insert_file_path = PrepareInsertData();
auto opt_fileds = PrepareOptionalField(file_manager, insert_file_path);
auto res = file_manager->CacheOptFieldToDisk(opt_fileds);
ASSERT_FALSE(res.empty());
CheckOptFieldCorrectness(res);
}

TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskSpaceCorrect) {
auto file_manager = CreateFileManager(cm_);
const auto& [insert_file_path, space] = PrepareInsertDataSpace();
auto opt_fileds = PrepareOptionalField(file_manager, insert_file_path);
const auto& [insert_file_path, space] =
PrepareInsertDataSpace(kOptFieldDataRange);
auto opt_fileds =
PrepareOptionalField<DataType::INT64>(file_manager, insert_file_path);
auto res = file_manager->CacheOptFieldToDisk(space, opt_fileds);
ASSERT_FALSE(res.empty());
CheckOptFieldCorrectness(res);
}

#define TEST_TYPE(NAME, TYPE, NATIVE_TYPE, RANGE) \
TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskCorrect##NAME) { \
auto file_manager = CreateFileManager(cm_); \
auto insert_file_path = PrepareInsertData<TYPE, NATIVE_TYPE>(RANGE); \
auto opt_fields = \
PrepareOptionalField<TYPE>(file_manager, insert_file_path); \
auto res = file_manager->CacheOptFieldToDisk(opt_fields); \
ASSERT_FALSE(res.empty()); \
CheckOptFieldCorrectness(res, RANGE); \
};

TEST_TYPE(INT8, DataType::INT8, int8_t, 100);
TEST_TYPE(INT16, DataType::INT16, int16_t, kOptFieldDataRange);
TEST_TYPE(INT32, DataType::INT32, int32_t, kOptFieldDataRange);
TEST_TYPE(INT64, DataType::INT64, int64_t, kOptFieldDataRange);
TEST_TYPE(FLOAT, DataType::FLOAT, float, kOptFieldDataRange);
TEST_TYPE(DOUBLE, DataType::DOUBLE, double, kOptFieldDataRange);
TEST_TYPE(STRING, DataType::STRING, std::string, 100);
TEST_TYPE(VARCHAR, DataType::VARCHAR, std::string, 100);

#undef TEST_TYPE

TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskOnlyOneCategory) {
auto file_manager = CreateFileManager(cm_);
{
const auto insert_file_path =
PrepareInsertData<DataType::INT64, int64_t>(1);
auto opt_fileds = PrepareOptionalField<DataType::INT64>(
file_manager, insert_file_path);
auto res = file_manager->CacheOptFieldToDisk(opt_fileds);
ASSERT_TRUE(res.empty());
}

{
const auto& [insert_file_path, space] = PrepareInsertDataSpace(1);
auto opt_fileds = PrepareOptionalField<DataType::INT64>(
file_manager, insert_file_path);
auto res = file_manager->CacheOptFieldToDisk(space, opt_fileds);
ASSERT_TRUE(res.empty());
}
}
2 changes: 1 addition & 1 deletion internal/proxy/task_search_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2692,7 +2692,7 @@ func (s *MaterializedViewTestSuite) TestMvEnabledPartitionKeyOnInt64() {
err := task.PreExecute(s.ctx)
s.NoError(err)
s.NotZero(len(task.queryInfos))
s.Equal(false, task.queryInfos[0].MaterializedViewInvolved)
s.Equal(true, task.queryInfos[0].MaterializedViewInvolved)
}

func (s *MaterializedViewTestSuite) TestMvEnabledPartitionKeyOnVarChar() {
Expand Down

0 comments on commit 3049d07

Please sign in to comment.