From 9afcc5bc5cd51a46b2ebf55e7d1ae3afaceb313c Mon Sep 17 00:00:00 2001 From: zhagnlu <1542303831@qq.com> Date: Tue, 17 Dec 2024 20:06:45 +0800 Subject: [PATCH] fix:fix incorrect dir operations when create or load inverted index (#38359) #37944 Signed-off-by: luzhang Co-authored-by: luzhang --- .../core/src/index/InvertedIndexTantivy.cpp | 35 ++++++++++++------- .../core/src/index/InvertedIndexTantivy.h | 2 ++ internal/core/src/segcore/load_index_c.cpp | 4 +++ internal/core/src/storage/FileManager.h | 6 ++++ .../core/unittest/test_array_bitmap_index.cpp | 15 +++++--- internal/core/unittest/test_bitmap_index.cpp | 16 ++++++--- internal/core/unittest/test_hybrid_index.cpp | 15 +++++--- .../core/unittest/test_inverted_index.cpp | 10 +++--- 8 files changed, 75 insertions(+), 28 deletions(-) diff --git a/internal/core/src/index/InvertedIndexTantivy.cpp b/internal/core/src/index/InvertedIndexTantivy.cpp index 663564dd95b23..0072c3de5d0c8 100644 --- a/internal/core/src/index/InvertedIndexTantivy.cpp +++ b/internal/core/src/index/InvertedIndexTantivy.cpp @@ -70,12 +70,8 @@ get_tantivy_data_type(const proto::schema::FieldSchema& schema) { } template -InvertedIndexTantivy::InvertedIndexTantivy( - const storage::FileManagerContext& ctx) - : ScalarIndex(INVERTED_INDEX_TYPE), - schema_(ctx.fieldDataMeta.field_schema) { - mem_file_manager_ = std::make_shared(ctx); - disk_file_manager_ = std::make_shared(ctx); +void +InvertedIndexTantivy::InitForBuildIndex() { auto field = std::to_string(disk_file_manager_->GetFieldDataMeta().field_id); auto prefix = disk_file_manager_->GetIndexIdentifier(); @@ -83,13 +79,26 @@ InvertedIndexTantivy::InvertedIndexTantivy( boost::filesystem::create_directories(path_); d_type_ = get_tantivy_data_type(schema_); if (tantivy_index_exist(path_.c_str())) { - LOG_INFO( - "index {} already exists, which should happen in loading progress", - path_); - } else { - wrapper_ = std::make_shared( - field.c_str(), d_type_, path_.c_str()); + PanicInfo(IndexBuildError, + "build inverted index temp dir:{} not empty", + path_); + } + wrapper_ = std::make_shared( + field.c_str(), d_type_, path_.c_str()); +} + +template +InvertedIndexTantivy::InvertedIndexTantivy( + const storage::FileManagerContext& ctx) + : ScalarIndex(INVERTED_INDEX_TYPE), + schema_(ctx.fieldDataMeta.field_schema) { + mem_file_manager_ = std::make_shared(ctx); + disk_file_manager_ = std::make_shared(ctx); + // push init wrapper to load process + if (ctx.for_loading_index) { + return; } + InitForBuildIndex(); } template @@ -97,6 +106,7 @@ InvertedIndexTantivy::~InvertedIndexTantivy() { auto local_chunk_manager = storage::LocalChunkManagerSingleton::GetInstance().GetChunkManager(); auto prefix = path_; + LOG_INFO("inverted index remove path:{}", path_); local_chunk_manager->RemoveDir(prefix); } @@ -214,6 +224,7 @@ InvertedIndexTantivy::Load(milvus::tracer::TraceContext ctx, (size_t)index_valid_data->size); } disk_file_manager_->CacheIndexToDisk(files_value); + path_ = prefix; wrapper_ = std::make_shared(prefix.c_str()); } diff --git a/internal/core/src/index/InvertedIndexTantivy.h b/internal/core/src/index/InvertedIndexTantivy.h index 62ecff9f29470..e4bcccb0c5b72 100644 --- a/internal/core/src/index/InvertedIndexTantivy.h +++ b/internal/core/src/index/InvertedIndexTantivy.h @@ -42,6 +42,8 @@ class InvertedIndexTantivy : public ScalarIndex { ~InvertedIndexTantivy(); + void + InitForBuildIndex(); /* * deprecated. * TODO: why not remove this? diff --git a/internal/core/src/segcore/load_index_c.cpp b/internal/core/src/segcore/load_index_c.cpp index b63771392f5f7..16d7bcdd89344 100644 --- a/internal/core/src/segcore/load_index_c.cpp +++ b/internal/core/src/segcore/load_index_c.cpp @@ -157,6 +157,8 @@ appendVecIndex(CLoadIndexInfo c_load_index_info, CBinarySet c_binary_set) { milvus::storage::FileManagerContext fileManagerContext( field_meta, index_meta, remote_chunk_manager); + fileManagerContext.set_for_loading_index(true); + load_index_info->index = milvus::index::IndexFactory::GetInstance().CreateIndex( index_info, fileManagerContext); @@ -305,6 +307,8 @@ AppendIndexV2(CTraceContext c_trace, CLoadIndexInfo c_load_index_info) { milvus::storage::FileManagerContext fileManagerContext( field_meta, index_meta, remote_chunk_manager); + fileManagerContext.set_for_loading_index(true); + load_index_info->index = milvus::index::IndexFactory::GetInstance().CreateIndex( index_info, fileManagerContext); diff --git a/internal/core/src/storage/FileManager.h b/internal/core/src/storage/FileManager.h index d2e71a39ee903..6c3e448984d2c 100644 --- a/internal/core/src/storage/FileManager.h +++ b/internal/core/src/storage/FileManager.h @@ -48,9 +48,15 @@ struct FileManagerContext { return chunkManagerPtr != nullptr; } + void + set_for_loading_index(bool value) { + for_loading_index = value; + } + FieldDataMeta fieldDataMeta; IndexMeta indexMeta; ChunkManagerPtr chunkManagerPtr; + bool for_loading_index{false}; }; #define FILEMANAGER_TRY try { diff --git a/internal/core/unittest/test_array_bitmap_index.cpp b/internal/core/unittest/test_array_bitmap_index.cpp index c40652c39b2e9..b6cad45ce8bdc 100644 --- a/internal/core/unittest/test_array_bitmap_index.cpp +++ b/internal/core/unittest/test_array_bitmap_index.cpp @@ -248,6 +248,7 @@ class ArrayBitmapIndexTest : public testing::Test { config["index_files"] = index_files; + ctx.set_for_loading_index(true); index_ = index::IndexFactory::GetInstance().CreateIndex(index_info, ctx); index_->Load(milvus::tracer::TraceContext{}, config); @@ -258,6 +259,8 @@ class ArrayBitmapIndexTest : public testing::Test { nb_ = 10000; cardinality_ = 30; nullable_ = false; + index_build_id_ = 2001; + index_version_ = 2001; } void @@ -278,8 +281,6 @@ class ArrayBitmapIndexTest : public testing::Test { int64_t partition_id = 2; int64_t segment_id = 3; int64_t field_id = 101; - int64_t index_build_id = 1000; - int64_t index_version = 10000; std::string root_path = "/tmp/test-bitmap-index/"; storage::StorageConfig storage_config; @@ -291,8 +292,8 @@ class ArrayBitmapIndexTest : public testing::Test { partition_id, segment_id, field_id, - index_build_id, - index_version); + index_build_id_, + index_version_); } virtual ~ArrayBitmapIndexTest() override { @@ -340,6 +341,8 @@ class ArrayBitmapIndexTest : public testing::Test { bool nullable_; std::vector data_; FixedVector valid_data_; + int index_version_; + int index_build_id_; }; TYPED_TEST_SUITE_P(ArrayBitmapIndexTest); @@ -377,6 +380,8 @@ class ArrayBitmapIndexTestV1 : public ArrayBitmapIndexTest { this->nb_ = 10000; this->cardinality_ = 200; this->nullable_ = false; + this->index_build_id_ = 2002; + this->index_version_ = 2002; } virtual ~ArrayBitmapIndexTestV1() { @@ -398,6 +403,8 @@ class ArrayBitmapIndexTestNullable : public ArrayBitmapIndexTest { this->nb_ = 10000; this->cardinality_ = 30; this->nullable_ = true; + this->index_version_ = 2003; + this->index_build_id_ = 2003; } virtual ~ArrayBitmapIndexTestNullable() { diff --git a/internal/core/unittest/test_bitmap_index.cpp b/internal/core/unittest/test_bitmap_index.cpp index 9a8227482b35e..50bc13aceec83 100644 --- a/internal/core/unittest/test_bitmap_index.cpp +++ b/internal/core/unittest/test_bitmap_index.cpp @@ -176,6 +176,8 @@ class BitmapIndexTest : public testing::Test { nb_ = 10000; cardinality_ = 30; nullable_ = false; + index_version_ = 3000; + index_build_id_ = 3000; } void SetUp() override { @@ -196,8 +198,6 @@ class BitmapIndexTest : public testing::Test { int64_t partition_id = 2; int64_t segment_id = 3; int64_t field_id = 101; - int64_t index_build_id = 1000; - int64_t index_version = 10000; std::string root_path = "/tmp/test-bitmap-index/"; storage::StorageConfig storage_config; @@ -209,8 +209,8 @@ class BitmapIndexTest : public testing::Test { partition_id, segment_id, field_id, - index_build_id, - index_version); + index_build_id_, + index_version_); } virtual ~BitmapIndexTest() override { @@ -400,6 +400,8 @@ class BitmapIndexTest : public testing::Test { bool nullable_; FixedVector valid_data_; std::shared_ptr chunk_manager_; + int index_version_; + int index_build_id_; }; TYPED_TEST_SUITE_P(BitmapIndexTest); @@ -450,6 +452,8 @@ class BitmapIndexTestV2 : public BitmapIndexTest { this->nb_ = 10000; this->cardinality_ = 2000; this->nullable_ = false; + this->index_version_ = 3001; + this->index_build_id_ = 3001; } virtual ~BitmapIndexTestV2() { @@ -512,6 +516,8 @@ class BitmapIndexTestV3 : public BitmapIndexTest { this->cardinality_ = 2000; this->is_mmap_ = true; this->nullable_ = false; + this->index_version_ = 3002; + this->index_build_id_ = 3002; } virtual ~BitmapIndexTestV3() { @@ -574,6 +580,8 @@ class BitmapIndexTestV4 : public BitmapIndexTest { this->cardinality_ = 2000; this->is_mmap_ = true; this->nullable_ = true; + this->index_version_ = 3003; + this->index_build_id_ = 3003; } virtual ~BitmapIndexTestV4() { diff --git a/internal/core/unittest/test_hybrid_index.cpp b/internal/core/unittest/test_hybrid_index.cpp index 7d9f47942c0b5..a4a6661c20914 100644 --- a/internal/core/unittest/test_hybrid_index.cpp +++ b/internal/core/unittest/test_hybrid_index.cpp @@ -161,6 +161,7 @@ class HybridIndexTestV1 : public testing::Test { config["index_files"] = index_files; + ctx.set_for_loading_index(true); index_ = index::IndexFactory::GetInstance().CreateIndex(index_info, ctx); index_->Load(milvus::tracer::TraceContext{}, config); @@ -171,6 +172,8 @@ class HybridIndexTestV1 : public testing::Test { nb_ = 10000; cardinality_ = 30; nullable_ = false; + index_version_ = 1001; + index_build_id_ = 1001; } void SetUp() override { @@ -191,8 +194,6 @@ class HybridIndexTestV1 : public testing::Test { int64_t partition_id = 2; int64_t segment_id = 3; int64_t field_id = 101; - int64_t index_build_id = 1000; - int64_t index_version = 10000; std::string root_path = "/tmp/test-bitmap-index"; storage::StorageConfig storage_config; @@ -204,8 +205,8 @@ class HybridIndexTestV1 : public testing::Test { partition_id, segment_id, field_id, - index_build_id, - index_version); + index_build_id_, + index_version_); } virtual ~HybridIndexTestV1() override { @@ -398,6 +399,8 @@ class HybridIndexTestV1 : public testing::Test { std::shared_ptr chunk_manager_; bool nullable_; FixedVector valid_data_; + int index_build_id_; + int index_version_; }; TYPED_TEST_SUITE_P(HybridIndexTestV1); @@ -455,6 +458,8 @@ class HybridIndexTestV2 : public HybridIndexTestV1 { this->nb_ = 10000; this->cardinality_ = 2000; this->nullable_ = false; + this->index_version_ = 1002; + this->index_build_id_ = 1002; } virtual ~HybridIndexTestV2() { @@ -500,6 +505,8 @@ class HybridIndexTestNullable : public HybridIndexTestV1 { this->nb_ = 10000; this->cardinality_ = 2000; this->nullable_ = true; + this->index_version_ = 1003; + this->index_build_id_ = 1003; } virtual ~HybridIndexTestNullable() { diff --git a/internal/core/unittest/test_inverted_index.cpp b/internal/core/unittest/test_inverted_index.cpp index 3ffc9128f0f37..6574174dc2da7 100644 --- a/internal/core/unittest/test_inverted_index.cpp +++ b/internal/core/unittest/test_inverted_index.cpp @@ -103,8 +103,8 @@ test_run() { int64_t partition_id = 2; int64_t segment_id = 3; int64_t field_id = 101; - int64_t index_build_id = 1000; - int64_t index_version = 10000; + int64_t index_build_id = 4000; + int64_t index_version = 4000; auto field_meta = test::gen_field_meta(collection_id, partition_id, @@ -207,6 +207,7 @@ test_run() { Config config; config["index_files"] = index_files; + ctx.set_for_loading_index(true); auto index = index::IndexFactory::GetInstance().CreateIndex(index_info, ctx); index->Load(milvus::tracer::TraceContext{}, config); @@ -384,8 +385,8 @@ test_string() { int64_t partition_id = 2; int64_t segment_id = 3; int64_t field_id = 101; - int64_t index_build_id = 1000; - int64_t index_version = 10000; + int64_t index_build_id = 4001; + int64_t index_version = 4001; auto field_meta = test::gen_field_meta(collection_id, partition_id, @@ -479,6 +480,7 @@ test_string() { Config config; config["index_files"] = index_files; + ctx.set_for_loading_index(true); auto index = index::IndexFactory::GetInstance().CreateIndex(index_info, ctx); index->Load(milvus::tracer::TraceContext{}, config);