Skip to content

Commit

Permalink
fix:fix incorrect dir operations when create or load inverted index (#…
Browse files Browse the repository at this point in the history
…38359)

#37944

Signed-off-by: luzhang <[email protected]>
Co-authored-by: luzhang <[email protected]>
  • Loading branch information
zhagnlu and luzhang authored Dec 17, 2024
1 parent 93fba1d commit 9afcc5b
Show file tree
Hide file tree
Showing 8 changed files with 75 additions and 28 deletions.
35 changes: 23 additions & 12 deletions internal/core/src/index/InvertedIndexTantivy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,33 +70,43 @@ get_tantivy_data_type(const proto::schema::FieldSchema& schema) {
}

template <typename T>
InvertedIndexTantivy<T>::InvertedIndexTantivy(
const storage::FileManagerContext& ctx)
: ScalarIndex<T>(INVERTED_INDEX_TYPE),
schema_(ctx.fieldDataMeta.field_schema) {
mem_file_manager_ = std::make_shared<MemFileManager>(ctx);
disk_file_manager_ = std::make_shared<DiskFileManager>(ctx);
void
InvertedIndexTantivy<T>::InitForBuildIndex() {
auto field =
std::to_string(disk_file_manager_->GetFieldDataMeta().field_id);
auto prefix = disk_file_manager_->GetIndexIdentifier();
path_ = std::string(TMP_INVERTED_INDEX_PREFIX) + prefix;
boost::filesystem::create_directories(path_);
d_type_ = get_tantivy_data_type(schema_);
if (tantivy_index_exist(path_.c_str())) {
LOG_INFO(
"index {} already exists, which should happen in loading progress",
path_);
} else {
wrapper_ = std::make_shared<TantivyIndexWrapper>(
field.c_str(), d_type_, path_.c_str());
PanicInfo(IndexBuildError,
"build inverted index temp dir:{} not empty",
path_);
}
wrapper_ = std::make_shared<TantivyIndexWrapper>(
field.c_str(), d_type_, path_.c_str());
}

template <typename T>
InvertedIndexTantivy<T>::InvertedIndexTantivy(
const storage::FileManagerContext& ctx)
: ScalarIndex<T>(INVERTED_INDEX_TYPE),
schema_(ctx.fieldDataMeta.field_schema) {
mem_file_manager_ = std::make_shared<MemFileManager>(ctx);
disk_file_manager_ = std::make_shared<DiskFileManager>(ctx);
// push init wrapper to load process
if (ctx.for_loading_index) {
return;
}
InitForBuildIndex();
}

template <typename T>
InvertedIndexTantivy<T>::~InvertedIndexTantivy() {
auto local_chunk_manager =
storage::LocalChunkManagerSingleton::GetInstance().GetChunkManager();
auto prefix = path_;
LOG_INFO("inverted index remove path:{}", path_);
local_chunk_manager->RemoveDir(prefix);
}

Expand Down Expand Up @@ -214,6 +224,7 @@ InvertedIndexTantivy<T>::Load(milvus::tracer::TraceContext ctx,
(size_t)index_valid_data->size);
}
disk_file_manager_->CacheIndexToDisk(files_value);
path_ = prefix;
wrapper_ = std::make_shared<TantivyIndexWrapper>(prefix.c_str());
}

Expand Down
2 changes: 2 additions & 0 deletions internal/core/src/index/InvertedIndexTantivy.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ class InvertedIndexTantivy : public ScalarIndex<T> {

~InvertedIndexTantivy();

void
InitForBuildIndex();
/*
* deprecated.
* TODO: why not remove this?
Expand Down
4 changes: 4 additions & 0 deletions internal/core/src/segcore/load_index_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,8 @@ appendVecIndex(CLoadIndexInfo c_load_index_info, CBinarySet c_binary_set) {

milvus::storage::FileManagerContext fileManagerContext(
field_meta, index_meta, remote_chunk_manager);
fileManagerContext.set_for_loading_index(true);

load_index_info->index =
milvus::index::IndexFactory::GetInstance().CreateIndex(
index_info, fileManagerContext);
Expand Down Expand Up @@ -305,6 +307,8 @@ AppendIndexV2(CTraceContext c_trace, CLoadIndexInfo c_load_index_info) {

milvus::storage::FileManagerContext fileManagerContext(
field_meta, index_meta, remote_chunk_manager);
fileManagerContext.set_for_loading_index(true);

load_index_info->index =
milvus::index::IndexFactory::GetInstance().CreateIndex(
index_info, fileManagerContext);
Expand Down
6 changes: 6 additions & 0 deletions internal/core/src/storage/FileManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,15 @@ struct FileManagerContext {
return chunkManagerPtr != nullptr;
}

void
set_for_loading_index(bool value) {
for_loading_index = value;
}

FieldDataMeta fieldDataMeta;
IndexMeta indexMeta;
ChunkManagerPtr chunkManagerPtr;
bool for_loading_index{false};
};

#define FILEMANAGER_TRY try {
Expand Down
15 changes: 11 additions & 4 deletions internal/core/unittest/test_array_bitmap_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ class ArrayBitmapIndexTest : public testing::Test {

config["index_files"] = index_files;

ctx.set_for_loading_index(true);
index_ =
index::IndexFactory::GetInstance().CreateIndex(index_info, ctx);
index_->Load(milvus::tracer::TraceContext{}, config);
Expand All @@ -258,6 +259,8 @@ class ArrayBitmapIndexTest : public testing::Test {
nb_ = 10000;
cardinality_ = 30;
nullable_ = false;
index_build_id_ = 2001;
index_version_ = 2001;
}

void
Expand All @@ -278,8 +281,6 @@ class ArrayBitmapIndexTest : public testing::Test {
int64_t partition_id = 2;
int64_t segment_id = 3;
int64_t field_id = 101;
int64_t index_build_id = 1000;
int64_t index_version = 10000;
std::string root_path = "/tmp/test-bitmap-index/";

storage::StorageConfig storage_config;
Expand All @@ -291,8 +292,8 @@ class ArrayBitmapIndexTest : public testing::Test {
partition_id,
segment_id,
field_id,
index_build_id,
index_version);
index_build_id_,
index_version_);
}

virtual ~ArrayBitmapIndexTest() override {
Expand Down Expand Up @@ -340,6 +341,8 @@ class ArrayBitmapIndexTest : public testing::Test {
bool nullable_;
std::vector<milvus::Array> data_;
FixedVector<bool> valid_data_;
int index_version_;
int index_build_id_;
};

TYPED_TEST_SUITE_P(ArrayBitmapIndexTest);
Expand Down Expand Up @@ -377,6 +380,8 @@ class ArrayBitmapIndexTestV1 : public ArrayBitmapIndexTest<T> {
this->nb_ = 10000;
this->cardinality_ = 200;
this->nullable_ = false;
this->index_build_id_ = 2002;
this->index_version_ = 2002;
}

virtual ~ArrayBitmapIndexTestV1() {
Expand All @@ -398,6 +403,8 @@ class ArrayBitmapIndexTestNullable : public ArrayBitmapIndexTest<T> {
this->nb_ = 10000;
this->cardinality_ = 30;
this->nullable_ = true;
this->index_version_ = 2003;
this->index_build_id_ = 2003;
}

virtual ~ArrayBitmapIndexTestNullable() {
Expand Down
16 changes: 12 additions & 4 deletions internal/core/unittest/test_bitmap_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,8 @@ class BitmapIndexTest : public testing::Test {
nb_ = 10000;
cardinality_ = 30;
nullable_ = false;
index_version_ = 3000;
index_build_id_ = 3000;
}
void
SetUp() override {
Expand All @@ -196,8 +198,6 @@ class BitmapIndexTest : public testing::Test {
int64_t partition_id = 2;
int64_t segment_id = 3;
int64_t field_id = 101;
int64_t index_build_id = 1000;
int64_t index_version = 10000;
std::string root_path = "/tmp/test-bitmap-index/";

storage::StorageConfig storage_config;
Expand All @@ -209,8 +209,8 @@ class BitmapIndexTest : public testing::Test {
partition_id,
segment_id,
field_id,
index_build_id,
index_version);
index_build_id_,
index_version_);
}

virtual ~BitmapIndexTest() override {
Expand Down Expand Up @@ -400,6 +400,8 @@ class BitmapIndexTest : public testing::Test {
bool nullable_;
FixedVector<bool> valid_data_;
std::shared_ptr<storage::ChunkManager> chunk_manager_;
int index_version_;
int index_build_id_;
};

TYPED_TEST_SUITE_P(BitmapIndexTest);
Expand Down Expand Up @@ -450,6 +452,8 @@ class BitmapIndexTestV2 : public BitmapIndexTest<T> {
this->nb_ = 10000;
this->cardinality_ = 2000;
this->nullable_ = false;
this->index_version_ = 3001;
this->index_build_id_ = 3001;
}

virtual ~BitmapIndexTestV2() {
Expand Down Expand Up @@ -512,6 +516,8 @@ class BitmapIndexTestV3 : public BitmapIndexTest<T> {
this->cardinality_ = 2000;
this->is_mmap_ = true;
this->nullable_ = false;
this->index_version_ = 3002;
this->index_build_id_ = 3002;
}

virtual ~BitmapIndexTestV3() {
Expand Down Expand Up @@ -574,6 +580,8 @@ class BitmapIndexTestV4 : public BitmapIndexTest<T> {
this->cardinality_ = 2000;
this->is_mmap_ = true;
this->nullable_ = true;
this->index_version_ = 3003;
this->index_build_id_ = 3003;
}

virtual ~BitmapIndexTestV4() {
Expand Down
15 changes: 11 additions & 4 deletions internal/core/unittest/test_hybrid_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ class HybridIndexTestV1 : public testing::Test {

config["index_files"] = index_files;

ctx.set_for_loading_index(true);
index_ =
index::IndexFactory::GetInstance().CreateIndex(index_info, ctx);
index_->Load(milvus::tracer::TraceContext{}, config);
Expand All @@ -171,6 +172,8 @@ class HybridIndexTestV1 : public testing::Test {
nb_ = 10000;
cardinality_ = 30;
nullable_ = false;
index_version_ = 1001;
index_build_id_ = 1001;
}
void
SetUp() override {
Expand All @@ -191,8 +194,6 @@ class HybridIndexTestV1 : public testing::Test {
int64_t partition_id = 2;
int64_t segment_id = 3;
int64_t field_id = 101;
int64_t index_build_id = 1000;
int64_t index_version = 10000;
std::string root_path = "/tmp/test-bitmap-index";

storage::StorageConfig storage_config;
Expand All @@ -204,8 +205,8 @@ class HybridIndexTestV1 : public testing::Test {
partition_id,
segment_id,
field_id,
index_build_id,
index_version);
index_build_id_,
index_version_);
}

virtual ~HybridIndexTestV1() override {
Expand Down Expand Up @@ -398,6 +399,8 @@ class HybridIndexTestV1 : public testing::Test {
std::shared_ptr<storage::ChunkManager> chunk_manager_;
bool nullable_;
FixedVector<bool> valid_data_;
int index_build_id_;
int index_version_;
};

TYPED_TEST_SUITE_P(HybridIndexTestV1);
Expand Down Expand Up @@ -455,6 +458,8 @@ class HybridIndexTestV2 : public HybridIndexTestV1<T> {
this->nb_ = 10000;
this->cardinality_ = 2000;
this->nullable_ = false;
this->index_version_ = 1002;
this->index_build_id_ = 1002;
}

virtual ~HybridIndexTestV2() {
Expand Down Expand Up @@ -500,6 +505,8 @@ class HybridIndexTestNullable : public HybridIndexTestV1<T> {
this->nb_ = 10000;
this->cardinality_ = 2000;
this->nullable_ = true;
this->index_version_ = 1003;
this->index_build_id_ = 1003;
}

virtual ~HybridIndexTestNullable() {
Expand Down
10 changes: 6 additions & 4 deletions internal/core/unittest/test_inverted_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ test_run() {
int64_t partition_id = 2;
int64_t segment_id = 3;
int64_t field_id = 101;
int64_t index_build_id = 1000;
int64_t index_version = 10000;
int64_t index_build_id = 4000;
int64_t index_version = 4000;

auto field_meta = test::gen_field_meta(collection_id,
partition_id,
Expand Down Expand Up @@ -207,6 +207,7 @@ test_run() {
Config config;
config["index_files"] = index_files;

ctx.set_for_loading_index(true);
auto index =
index::IndexFactory::GetInstance().CreateIndex(index_info, ctx);
index->Load(milvus::tracer::TraceContext{}, config);
Expand Down Expand Up @@ -384,8 +385,8 @@ test_string() {
int64_t partition_id = 2;
int64_t segment_id = 3;
int64_t field_id = 101;
int64_t index_build_id = 1000;
int64_t index_version = 10000;
int64_t index_build_id = 4001;
int64_t index_version = 4001;

auto field_meta = test::gen_field_meta(collection_id,
partition_id,
Expand Down Expand Up @@ -479,6 +480,7 @@ test_string() {
Config config;
config["index_files"] = index_files;

ctx.set_for_loading_index(true);
auto index =
index::IndexFactory::GetInstance().CreateIndex(index_info, ctx);
index->Load(milvus::tracer::TraceContext{}, config);
Expand Down

0 comments on commit 9afcc5b

Please sign in to comment.