From 3049d078cd3ef54aaab4fdb5cbf3d24b9b4389d1 Mon Sep 17 00:00:00 2001 From: Patrick Weizhi Xu Date: Mon, 20 May 2024 15:31:27 +0800 Subject: [PATCH] fix UT and format Signed-off-by: Patrick Weizhi Xu --- .../core/src/storage/DiskFileManagerImpl.cpp | 26 ++- .../unittest/test_disk_file_manager_test.cpp | 172 ++++++++++++------ internal/proxy/task_search_test.go | 2 +- 3 files changed, 138 insertions(+), 62 deletions(-) diff --git a/internal/core/src/storage/DiskFileManagerImpl.cpp b/internal/core/src/storage/DiskFileManagerImpl.cpp index 4f646b11b1e2a..844495ceb0bba 100644 --- a/internal/core/src/storage/DiskFileManagerImpl.cpp +++ b/internal/core/src/storage/DiskFileManagerImpl.cpp @@ -746,11 +746,11 @@ DiskFileManagerImpl::CacheOptFieldToDisk( field_datas.emplace_back(field_data); } if (WriteOptFieldIvfData(field_type, - field_id, - local_chunk_manager, - local_data_path, - field_datas, - write_offset)) { + field_id, + local_chunk_manager, + local_data_path, + field_datas, + write_offset)) { actual_field_ids.insert(field_id); } } @@ -761,6 +761,9 @@ DiskFileManagerImpl::CacheOptFieldToDisk( local_data_path, actual_field_ids.size(), write_offset); + if (actual_field_ids.empty()) { + return ""; + } } return local_data_path; } @@ -824,11 +827,11 @@ DiskFileManagerImpl::CacheOptFieldToDisk(OptFieldT& fields_map) { FetchRawData(); } if (WriteOptFieldIvfData(field_type, - field_id, - local_chunk_manager, - local_data_path, - field_datas, - write_offset)) { + field_id, + local_chunk_manager, + local_data_path, + field_datas, + write_offset)) { actual_field_ids.insert(field_id); } } @@ -839,6 +842,9 @@ DiskFileManagerImpl::CacheOptFieldToDisk(OptFieldT& fields_map) { local_data_path, actual_field_ids.size(), write_offset); + if (actual_field_ids.empty()) { + return ""; + } } return local_data_path; diff --git a/internal/core/unittest/test_disk_file_manager_test.cpp b/internal/core/unittest/test_disk_file_manager_test.cpp index 90fa4a7887bae..4c5b75001106c 100644 --- a/internal/core/unittest/test_disk_file_manager_test.cpp +++ b/internal/core/unittest/test_disk_file_manager_test.cpp @@ -18,7 +18,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -202,14 +204,12 @@ TEST_F(DiskAnnFileManagerTest, TestThreadPoolException) { } } +namespace { const int64_t kOptFieldId = 123456; const std::string kOptFieldName = "opt_field_name"; -const DataType kOptFiledType = DataType::INT64; -const int64_t kOptFieldDataRange = 10000; -const std::string kOptFieldPath = "/tmp/diskann/opt_field/123123"; -// const std::string kOptFieldPath = "/tmp/diskann/index_files/1000/index"; -const size_t kEntityCnt = 1000 * 1000; -const DataType kOptFieldDataType = DataType::INT64; +const int64_t kOptFieldDataRange = 1000; +const std::string kOptFieldPath = "/tmp/diskann/opt_field/"; +const size_t kEntityCnt = 1000 * 10; const FieldDataMeta kOptVecFieldDataMeta = {1, 2, 3, 100}; using OffsetT = uint32_t; @@ -225,25 +225,50 @@ CreateFileManager(const ChunkManagerPtr& cm) storage::FileManagerContext(kOptVecFieldDataMeta, index_meta, cm)); } +template auto -PrepareRawFieldData() -> std::vector { - std::vector data(kEntityCnt); - int64_t field_val = 0; +PrepareRawFieldData(const int64_t opt_field_data_range) -> std::vector { + if (opt_field_data_range > std::numeric_limits::max()) { + throw std::runtime_error("field data range is too large: " + + std::to_string(opt_field_data_range)); + } + std::vector data(kEntityCnt); + T field_val = 0; for (size_t i = 0; i < kEntityCnt; ++i) { data[i] = field_val++; - if (field_val >= kOptFieldDataRange) { + if (field_val >= opt_field_data_range) { field_val = 0; } } return data; } +template <> auto -PrepareInsertData() -> std::string { - size_t sz = sizeof(int64_t) * kEntityCnt; - std::vector data = PrepareRawFieldData(); - auto field_data = - storage::CreateFieldData(kOptFieldDataType, 1, kEntityCnt); +PrepareRawFieldData(const int64_t opt_field_data_range) + -> std::vector { + if (opt_field_data_range > std::numeric_limits::max()) { + throw std::runtime_error("field data range is too large: " + + std::to_string(opt_field_data_range)); + } + std::vector data(kEntityCnt); + char field_val = 0; + for (size_t i = 0; i < kEntityCnt; ++i) { + data[i] = std::to_string(field_val); + field_val++; + if (field_val >= opt_field_data_range) { + field_val = 0; + } + } + return data; +} + +template +auto +PrepareInsertData(const int64_t opt_field_data_range) -> std::string { + std::vector data = + PrepareRawFieldData(opt_field_data_range); + auto field_data = storage::CreateFieldData(DT, 1, kEntityCnt); field_data->FillFieldData(data.data(), kEntityCnt); storage::InsertData insert_data(field_data); insert_data.SetFieldDataMeta(kOptVecFieldDataMeta); @@ -253,16 +278,16 @@ PrepareInsertData() -> std::string { auto chunk_manager = storage::CreateChunkManager(get_default_local_storage_config()); - std::string path = kOptFieldPath + "0"; + std::string path = kOptFieldPath + std::to_string(kOptFieldId); boost::filesystem::remove_all(path); chunk_manager->Write(path, serialized_data.data(), serialized_data.size()); return path; } auto -PrepareInsertDataSpace() +PrepareInsertDataSpace(const int64_t opt_field_data_range) -> std::pair> { - std::string path = kOptFieldPath + "1"; + std::string path = kOptFieldPath + "space/" + std::to_string(kOptFieldId); arrow::FieldVector arrow_fields{ arrow::field("pk", arrow::int64()), arrow::field("ts", arrow::int64()), @@ -281,7 +306,7 @@ PrepareInsertDataSpace() milvus_storage::Options{schema}); EXPECT_TRUE(opt_space.has_value()); auto space = std::move(opt_space.value()); - const auto data = PrepareRawFieldData(); + const auto data = PrepareRawFieldData(opt_field_data_range); arrow::Int64Builder pk_builder; arrow::Int64Builder ts_builder; arrow::NumericBuilder scalar_builder; @@ -315,18 +340,21 @@ PrepareInsertDataSpace() return {path, std::move(space)}; } +template auto PrepareOptionalField(const std::shared_ptr& file_manager, const std::string& insert_file_path) -> OptFieldT { OptFieldT opt_field; std::vector insert_files; insert_files.emplace_back(insert_file_path); - opt_field[kOptFieldId] = {kOptFieldName, kOptFiledType, insert_files}; + opt_field[kOptFieldId] = {kOptFieldName, DT, insert_files}; return opt_field; } void -CheckOptFieldCorrectness(const std::string& local_file_path) { +CheckOptFieldCorrectness( + const std::string& local_file_path, + const int64_t opt_field_data_range = kOptFieldDataRange) { std::ifstream ifs(local_file_path); if (!ifs.is_open()) { FAIL() << "open file failed: " << local_file_path << std::endl; @@ -344,10 +372,10 @@ CheckOptFieldCorrectness(const std::string& local_file_path) { EXPECT_EQ(field_id, kOptFieldId); ifs.read(reinterpret_cast(&num_of_unique_field_data), sizeof(num_of_unique_field_data)); - EXPECT_EQ(num_of_unique_field_data, kOptFieldDataRange); + EXPECT_EQ(num_of_unique_field_data, opt_field_data_range); uint32_t expected_single_category_offset_cnt = - kEntityCnt / kOptFieldDataRange; + kEntityCnt / opt_field_data_range; uint32_t read_single_category_offset_cnt; std::vector single_category_offsets( expected_single_category_offset_cnt); @@ -364,54 +392,96 @@ CheckOptFieldCorrectness(const std::string& local_file_path) { first_offset = single_category_offsets[0]; } for (size_t j = 1; j < read_single_category_offset_cnt; ++j) { - ASSERT_EQ(single_category_offsets[j] % kOptFieldDataRange, - first_offset % kOptFieldDataRange); + ASSERT_EQ(single_category_offsets[j] % opt_field_data_range, + first_offset % opt_field_data_range); } } } +} // namespace TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskFieldEmpty) { auto file_manager = CreateFileManager(cm_); - const auto& [insert_file_space_path, space] = PrepareInsertDataSpace(); - OptFieldT opt_fields; - EXPECT_TRUE(file_manager->CacheOptFieldToDisk(opt_fields).empty()); - EXPECT_TRUE(file_manager->CacheOptFieldToDisk(space, opt_fields).empty()); -} + { + const auto& [insert_file_space_path, space] = + PrepareInsertDataSpace(kOptFieldDataRange); + OptFieldT opt_fields; + EXPECT_TRUE(file_manager->CacheOptFieldToDisk(opt_fields).empty()); + EXPECT_TRUE( + file_manager->CacheOptFieldToDisk(space, opt_fields).empty()); + } -TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskSpaceEmpty) { - auto file_manager = CreateFileManager(cm_); - auto opt_fileds = PrepareOptionalField(file_manager, ""); - auto res = file_manager->CacheOptFieldToDisk(nullptr, opt_fileds); - EXPECT_TRUE(res.empty()); + { + auto opt_fileds = + PrepareOptionalField(file_manager, ""); + auto res = file_manager->CacheOptFieldToDisk(nullptr, opt_fileds); + EXPECT_TRUE(res.empty()); + } } TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskOptFieldMoreThanOne) { auto file_manager = CreateFileManager(cm_); - const auto insert_file_path = PrepareInsertData(); - const auto& [insert_file_space_path, space] = PrepareInsertDataSpace(); - - OptFieldT opt_fields = PrepareOptionalField(file_manager, insert_file_path); + const auto insert_file_path = + PrepareInsertData(kOptFieldDataRange); + const auto& [insert_file_space_path, space] = + PrepareInsertDataSpace(kOptFieldDataRange); + OptFieldT opt_fields = + PrepareOptionalField(file_manager, insert_file_path); opt_fields[kOptFieldId + 1] = { - kOptFieldName + "second", kOptFiledType, {insert_file_space_path}}; + kOptFieldName + "second", DataType::INT64, {insert_file_space_path}}; EXPECT_THROW(file_manager->CacheOptFieldToDisk(opt_fields), SegcoreError); EXPECT_THROW(file_manager->CacheOptFieldToDisk(space, opt_fields), SegcoreError); } -TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskCorrect) { - auto file_manager = CreateFileManager(cm_); - const auto insert_file_path = PrepareInsertData(); - auto opt_fileds = PrepareOptionalField(file_manager, insert_file_path); - auto res = file_manager->CacheOptFieldToDisk(opt_fileds); - ASSERT_FALSE(res.empty()); - CheckOptFieldCorrectness(res); -} - TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskSpaceCorrect) { auto file_manager = CreateFileManager(cm_); - const auto& [insert_file_path, space] = PrepareInsertDataSpace(); - auto opt_fileds = PrepareOptionalField(file_manager, insert_file_path); + const auto& [insert_file_path, space] = + PrepareInsertDataSpace(kOptFieldDataRange); + auto opt_fileds = + PrepareOptionalField(file_manager, insert_file_path); auto res = file_manager->CacheOptFieldToDisk(space, opt_fileds); ASSERT_FALSE(res.empty()); CheckOptFieldCorrectness(res); } + +#define TEST_TYPE(NAME, TYPE, NATIVE_TYPE, RANGE) \ + TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskCorrect##NAME) { \ + auto file_manager = CreateFileManager(cm_); \ + auto insert_file_path = PrepareInsertData(RANGE); \ + auto opt_fields = \ + PrepareOptionalField(file_manager, insert_file_path); \ + auto res = file_manager->CacheOptFieldToDisk(opt_fields); \ + ASSERT_FALSE(res.empty()); \ + CheckOptFieldCorrectness(res, RANGE); \ + }; + +TEST_TYPE(INT8, DataType::INT8, int8_t, 100); +TEST_TYPE(INT16, DataType::INT16, int16_t, kOptFieldDataRange); +TEST_TYPE(INT32, DataType::INT32, int32_t, kOptFieldDataRange); +TEST_TYPE(INT64, DataType::INT64, int64_t, kOptFieldDataRange); +TEST_TYPE(FLOAT, DataType::FLOAT, float, kOptFieldDataRange); +TEST_TYPE(DOUBLE, DataType::DOUBLE, double, kOptFieldDataRange); +TEST_TYPE(STRING, DataType::STRING, std::string, 100); +TEST_TYPE(VARCHAR, DataType::VARCHAR, std::string, 100); + +#undef TEST_TYPE + +TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskOnlyOneCategory) { + auto file_manager = CreateFileManager(cm_); + { + const auto insert_file_path = + PrepareInsertData(1); + auto opt_fileds = PrepareOptionalField( + file_manager, insert_file_path); + auto res = file_manager->CacheOptFieldToDisk(opt_fileds); + ASSERT_TRUE(res.empty()); + } + + { + const auto& [insert_file_path, space] = PrepareInsertDataSpace(1); + auto opt_fileds = PrepareOptionalField( + file_manager, insert_file_path); + auto res = file_manager->CacheOptFieldToDisk(space, opt_fileds); + ASSERT_TRUE(res.empty()); + } +} \ No newline at end of file diff --git a/internal/proxy/task_search_test.go b/internal/proxy/task_search_test.go index 286873df4af42..ffb95290f0f29 100644 --- a/internal/proxy/task_search_test.go +++ b/internal/proxy/task_search_test.go @@ -2692,7 +2692,7 @@ func (s *MaterializedViewTestSuite) TestMvEnabledPartitionKeyOnInt64() { err := task.PreExecute(s.ctx) s.NoError(err) s.NotZero(len(task.queryInfos)) - s.Equal(false, task.queryInfos[0].MaterializedViewInvolved) + s.Equal(true, task.queryInfos[0].MaterializedViewInvolved) } func (s *MaterializedViewTestSuite) TestMvEnabledPartitionKeyOnVarChar() {