diff --git a/internal/core/src/index/BitmapIndex.cpp b/internal/core/src/index/BitmapIndex.cpp index 0dfc2506cbc28..ec30103548895 100644 --- a/internal/core/src/index/BitmapIndex.cpp +++ b/internal/core/src/index/BitmapIndex.cpp @@ -15,10 +15,14 @@ // limitations under the License. #include +#include +#include +#include #include #include "index/BitmapIndex.h" +#include "common/File.h" #include "common/Slice.h" #include "common/Common.h" #include "index/Meta.h" @@ -33,8 +37,10 @@ namespace index { template BitmapIndex::BitmapIndex( const storage::FileManagerContext& file_manager_context) - : is_built_(false), - schema_(file_manager_context.fieldDataMeta.field_schema) { + : ScalarIndex(BITMAP_INDEX_TYPE), + is_built_(false), + schema_(file_manager_context.fieldDataMeta.field_schema), + is_mmap_(false) { if (file_manager_context.Valid()) { file_manager_ = std::make_shared(file_manager_context); @@ -42,6 +48,19 @@ BitmapIndex::BitmapIndex( } } +template +void +BitmapIndex::UnmapIndexData() { + if (mmap_data_ != nullptr && mmap_data_ != MAP_FAILED) { + if (munmap(mmap_data_, mmap_size_) != 0) { + AssertInfo( + true, "failed to unmap bitmap index, err={}", strerror(errno)); + } + mmap_data_ = nullptr; + mmap_size_ = 0; + } +} + template void BitmapIndex::Build(const Config& config) { @@ -373,6 +392,83 @@ BitmapIndex::DeserializeIndexData(const uint8_t* data_ptr, } } +template +void +BitmapIndex::DeserializeIndexDataForMmap(const char* data_ptr, + size_t index_length) { + for (size_t i = 0; i < index_length; ++i) { + T key; + memcpy(&key, data_ptr, sizeof(T)); + data_ptr += sizeof(T); + + roaring::Roaring value; + value = roaring::Roaring::read(reinterpret_cast(data_ptr)); + auto size = value.getSizeInBytes(); + + bitmap_info_map_[key] = {static_cast(data_ptr - mmap_data_), + size}; + data_ptr += size; + } +} + +template <> +void +BitmapIndex::DeserializeIndexDataForMmap(const char* data_ptr, + size_t index_length) { + for (size_t i = 0; i < index_length; ++i) { + size_t key_size; + memcpy(&key_size, data_ptr, sizeof(size_t)); + data_ptr += sizeof(size_t); + + std::string key(reinterpret_cast(data_ptr), key_size); + data_ptr += key_size; + + roaring::Roaring value; + value = roaring::Roaring::read(reinterpret_cast(data_ptr)); + auto size = value.getSizeInBytes(); + + bitmap_info_map_[key] = {static_cast(data_ptr - mmap_data_), + size}; + data_ptr += size; + } +} + +template +void +BitmapIndex::MMapIndexData(const std::string& file_name, + const uint8_t* data_ptr, + size_t data_size, + size_t index_length) { + std::filesystem::create_directories( + std::filesystem::path(file_name).parent_path()); + + auto file = File::Open(file_name, O_RDWR | O_CREAT | O_TRUNC); + auto written = file.Write(data_ptr, data_size); + if (written != data_size) { + file.Close(); + remove(file_name.c_str()); + PanicInfo(ErrorCode::UnistdError, + fmt::format("write index to fd error: {}", strerror(errno))); + } + + file.Seek(0, SEEK_SET); + mmap_data_ = static_cast( + mmap(NULL, data_size, PROT_READ, MAP_PRIVATE, file.Descriptor(), 0)); + if (mmap_data_ == MAP_FAILED) { + file.Close(); + remove(file_name.c_str()); + PanicInfo( + ErrorCode::UnexpectedError, "failed to mmap: {}", strerror(errno)); + } + + mmap_size_ = data_size; + unlink(file_name.c_str()); + + char* ptr = mmap_data_; + DeserializeIndexDataForMmap(ptr, index_length); + is_mmap_ = true; +} + template void BitmapIndex::LoadWithoutAssemble(const BinarySet& binary_set, @@ -385,11 +481,33 @@ BitmapIndex::LoadWithoutAssemble(const BinarySet& binary_set, valid_bitset = TargetBitmap(total_num_rows_, false); auto index_data_buffer = binary_set.GetByName(BITMAP_INDEX_DATA); - DeserializeIndexData(index_data_buffer->data.get(), index_length); - LOG_INFO("load bitmap index with cardinality = {}, num_rows = {}", - Cardinality(), - total_num_rows_); + ChooseIndexLoadMode(index_length); + + // only using mmap when build mode is raw roaring bitmap + if (config.contains(kMmapFilepath) && + build_mode_ == BitmapIndexBuildMode::ROARING) { + auto mmap_filepath = + GetValueFromConfig(config, kMmapFilepath); + AssertInfo(mmap_filepath.has_value(), + "mmap filepath is empty when load index"); + MMapIndexData(mmap_filepath.value(), + index_data_buffer->data.get(), + index_data_buffer->size, + index_length); + } else { + DeserializeIndexData(index_data_buffer->data.get(), index_length); + } + + auto file_index_meta = file_manager_->GetIndexMeta(); + LOG_INFO( + "load bitmap index with cardinality = {}, num_rows = {} for segment_id " + "= {}, field_id = {}, mmap = {}", + Cardinality(), + total_num_rows_, + file_index_meta.segment_id, + file_index_meta.field_id, + is_mmap_); is_built_ = true; } @@ -397,6 +515,7 @@ BitmapIndex::LoadWithoutAssemble(const BinarySet& binary_set, template void BitmapIndex::Load(milvus::tracer::TraceContext ctx, const Config& config) { + LOG_DEBUG("load bitmap index with config {}", config.dump()); auto index_files = GetValueFromConfig>(config, "index_files"); AssertInfo(index_files.has_value(), @@ -421,6 +540,18 @@ BitmapIndex::In(const size_t n, const T* values) { AssertInfo(is_built_, "index has not been built"); TargetBitmap res(total_num_rows_, false); + if (is_mmap_) { + for (size_t i = 0; i < n; ++i) { + auto val = values[i]; + auto it = bitmap_info_map_.find(val); + if (it != bitmap_info_map_.end()) { + for (const auto& v : AccessBitmap(it->second)) { + res.set(v); + } + } + } + return res; + } if (build_mode_ == BitmapIndexBuildMode::ROARING) { for (size_t i = 0; i < n; ++i) { auto val = values[i]; @@ -447,6 +578,19 @@ const TargetBitmap BitmapIndex::NotIn(const size_t n, const T* values) { AssertInfo(is_built_, "index has not been built"); + if (is_mmap_) { + TargetBitmap res(total_num_rows_, true); + for (int i = 0; i < n; ++i) { + auto val = values[i]; + auto it = bitmap_info_map_.find(val); + if (it != bitmap_info_map_.end()) { + for (const auto& v : AccessBitmap(it->second)) { + res.reset(v); + } + } + } + return res; + } if (build_mode_ == BitmapIndexBuildMode::ROARING) { TargetBitmap res(total_num_rows_, true); for (int i = 0; i < n; ++i) { @@ -558,12 +702,76 @@ BitmapIndex::RangeForBitset(const T value, const OpType op) { template const TargetBitmap BitmapIndex::Range(const T value, OpType op) { + if (is_mmap_) { + return std::move(RangeForMmap(value, op)); + } if (build_mode_ == BitmapIndexBuildMode::ROARING) { return std::move(RangeForRoaring(value, op)); } else { return std::move(RangeForBitset(value, op)); } } +template +TargetBitmap +BitmapIndex::RangeForMmap(const T value, const OpType op) { + AssertInfo(is_built_, "index has not been built"); + TargetBitmap res(total_num_rows_, false); + if (ShouldSkip(value, value, op)) { + return res; + } + auto lb = bitmap_info_map_.begin(); + auto ub = bitmap_info_map_.end(); + + switch (op) { + case OpType::LessThan: { + ub = std::lower_bound(bitmap_info_map_.begin(), + bitmap_info_map_.end(), + std::make_pair(value, TargetBitmap()), + [](const auto& lhs, const auto& rhs) { + return lhs.first < rhs.first; + }); + break; + } + case OpType::LessEqual: { + ub = std::upper_bound(bitmap_info_map_.begin(), + bitmap_info_map_.end(), + std::make_pair(value, TargetBitmap()), + [](const auto& lhs, const auto& rhs) { + return lhs.first < rhs.first; + }); + break; + } + case OpType::GreaterThan: { + lb = std::upper_bound(bitmap_info_map_.begin(), + bitmap_info_map_.end(), + std::make_pair(value, TargetBitmap()), + [](const auto& lhs, const auto& rhs) { + return lhs.first < rhs.first; + }); + break; + } + case OpType::GreaterEqual: { + lb = std::lower_bound(bitmap_info_map_.begin(), + bitmap_info_map_.end(), + std::make_pair(value, TargetBitmap()), + [](const auto& lhs, const auto& rhs) { + return lhs.first < rhs.first; + }); + break; + } + default: { + PanicInfo(OpTypeInvalid, + fmt::format("Invalid OperatorType: {}", op)); + } + } + + for (; lb != ub; lb++) { + for (const auto& v : AccessBitmap(lb->second)) { + res.set(v); + } + } + return res; +} template TargetBitmap @@ -690,6 +898,10 @@ BitmapIndex::Range(const T lower_value, bool lb_inclusive, const T upper_value, bool ub_inclusive) { + if (is_mmap_) { + return RangeForMmap( + lower_value, lb_inclusive, upper_value, ub_inclusive); + } if (build_mode_ == BitmapIndexBuildMode::ROARING) { return RangeForRoaring( lower_value, lb_inclusive, upper_value, ub_inclusive); @@ -699,6 +911,65 @@ BitmapIndex::Range(const T lower_value, } } +template +TargetBitmap +BitmapIndex::RangeForMmap(const T lower_value, + bool lb_inclusive, + const T upper_value, + bool ub_inclusive) { + AssertInfo(is_built_, "index has not been built"); + TargetBitmap res(total_num_rows_, false); + if (lower_value > upper_value || + (lower_value == upper_value && !(lb_inclusive && ub_inclusive))) { + return res; + } + if (ShouldSkip(lower_value, upper_value, OpType::Range)) { + return res; + } + + auto lb = bitmap_info_map_.begin(); + auto ub = bitmap_info_map_.end(); + + if (lb_inclusive) { + lb = std::lower_bound(bitmap_info_map_.begin(), + bitmap_info_map_.end(), + std::make_pair(lower_value, TargetBitmap()), + [](const auto& lhs, const auto& rhs) { + return lhs.first < rhs.first; + }); + } else { + lb = std::upper_bound(bitmap_info_map_.begin(), + bitmap_info_map_.end(), + std::make_pair(lower_value, TargetBitmap()), + [](const auto& lhs, const auto& rhs) { + return lhs.first < rhs.first; + }); + } + + if (ub_inclusive) { + ub = std::upper_bound(bitmap_info_map_.begin(), + bitmap_info_map_.end(), + std::make_pair(upper_value, TargetBitmap()), + [](const auto& lhs, const auto& rhs) { + return lhs.first < rhs.first; + }); + } else { + ub = std::lower_bound(bitmap_info_map_.begin(), + bitmap_info_map_.end(), + std::make_pair(upper_value, TargetBitmap()), + [](const auto& lhs, const auto& rhs) { + return lhs.first < rhs.first; + }); + } + + for (; lb != ub; lb++) { + for (const auto& v : AccessBitmap(lb->second)) { + res.set(v); + } + } + return res; +} + template TargetBitmap BitmapIndex::RangeForRoaring(const T lower_value, @@ -764,18 +1035,29 @@ BitmapIndex::Reverse_Lookup(size_t idx) const { AssertInfo(is_built_, "index has not been built"); AssertInfo(idx < total_num_rows_, "out of range of total coun"); - if (build_mode_ == BitmapIndexBuildMode::ROARING) { - for (auto it = data_.begin(); it != data_.end(); it++) { - for (const auto& v : it->second) { + if (is_mmap_) { + for (auto it = bitmap_info_map_.begin(); it != bitmap_info_map_.end(); + it++) { + for (const auto& v : AccessBitmap(it->second)) { if (v == idx) { return it->first; } } } } else { - for (auto it = bitsets_.begin(); it != bitsets_.end(); it++) { - if (it->second[idx]) { - return it->first; + if (build_mode_ == BitmapIndexBuildMode::ROARING) { + for (auto it = data_.begin(); it != data_.end(); it++) { + for (const auto& v : it->second) { + if (v == idx) { + return it->first; + } + } + } + } else { + for (auto it = bitsets_.begin(); it != bitsets_.end(); it++) { + if (it->second[idx]) { + return it->first; + } } } } @@ -828,6 +1110,15 @@ BitmapIndex::ShouldSkip(const T lower_value, return should_skip; }; + if (is_mmap_) { + if (!bitmap_info_map_.empty()) { + auto lower_bound = bitmap_info_map_.begin()->first; + auto upper_bound = bitmap_info_map_.rbegin()->first; + bool should_skip = skip(op, lower_bound, upper_bound); + return should_skip; + } + } + if (build_mode_ == BitmapIndexBuildMode::ROARING) { if (!data_.empty()) { auto lower_bound = data_.begin()->first; @@ -861,6 +1152,19 @@ BitmapIndex::Query(const DatasetPtr& dataset) { if (op == OpType::PrefixMatch) { auto prefix = dataset->Get(PREFIX_VALUE); TargetBitmap res(total_num_rows_, false); + if (is_mmap_) { + for (auto it = bitmap_info_map_.begin(); + it != bitmap_info_map_.end(); + ++it) { + const auto& key = it->first; + if (milvus::query::Match(key, prefix, op)) { + for (const auto& v : AccessBitmap(it->second)) { + res.set(v); + } + } + } + return res; + } if (build_mode_ == BitmapIndexBuildMode::ROARING) { for (auto it = data_.begin(); it != data_.end(); ++it) { const auto& key = it->first; @@ -898,6 +1202,18 @@ BitmapIndex::RegexQuery(const std::string& regex_pattern) { AssertInfo(is_built_, "index has not been built"); RegexMatcher matcher(regex_pattern); TargetBitmap res(total_num_rows_, false); + if (is_mmap_) { + for (auto it = bitmap_info_map_.begin(); it != bitmap_info_map_.end(); + ++it) { + const auto& key = it->first; + if (matcher(key)) { + for (const auto& v : AccessBitmap(it->second)) { + res.set(v); + } + } + } + return res; + } if (build_mode_ == BitmapIndexBuildMode::ROARING) { for (auto it = data_.begin(); it != data_.end(); ++it) { const auto& key = it->first; diff --git a/internal/core/src/index/BitmapIndex.h b/internal/core/src/index/BitmapIndex.h index 3bf279cf8b75b..a8acb99cf76af 100644 --- a/internal/core/src/index/BitmapIndex.h +++ b/internal/core/src/index/BitmapIndex.h @@ -30,6 +30,11 @@ namespace milvus { namespace index { +struct BitmapInfo { + size_t offset_; + size_t size_; +}; + enum class BitmapIndexBuildMode { ROARING, BITSET, @@ -46,7 +51,11 @@ class BitmapIndex : public ScalarIndex { const storage::FileManagerContext& file_manager_context = storage::FileManagerContext()); - ~BitmapIndex() override = default; + ~BitmapIndex() { + if (is_mmap_) { + UnmapIndexData(); + } + } BinarySet Serialize(const Config& config) override; @@ -146,6 +155,10 @@ class BitmapIndex : public ScalarIndex { public: int64_t Cardinality() { + if (is_mmap_) { + return bitmap_info_map_.size(); + } + if (build_mode_ == BitmapIndexBuildMode::ROARING) { return data_.size(); } else { @@ -172,6 +185,9 @@ class BitmapIndex : public ScalarIndex { std::pair DeserializeIndexMeta(const uint8_t* data_ptr, size_t data_size); + void + DeserializeIndexDataForMmap(const char* data_ptr, size_t index_length); + void DeserializeIndexData(const uint8_t* data_ptr, size_t index_length); @@ -190,6 +206,9 @@ class BitmapIndex : public ScalarIndex { TargetBitmap RangeForBitset(T value, OpType op); + TargetBitmap + RangeForMmap(T value, OpType op); + TargetBitmap RangeForRoaring(T lower_bound_value, bool lb_inclusive, @@ -202,12 +221,35 @@ class BitmapIndex : public ScalarIndex { T upper_bound_value, bool ub_inclusive); + TargetBitmap + RangeForMmap(T lower_bound_value, + bool lb_inclusive, + T upper_bound_value, + bool ub_inclusive); + + void + MMapIndexData(const std::string& filepath, + const uint8_t* data, + size_t data_size, + size_t index_length); + + roaring::Roaring + AccessBitmap(const BitmapInfo& info) const { + return roaring::Roaring::read(mmap_data_ + info.offset_, info.size_); + } + + void + UnmapIndexData(); + public: bool is_built_{false}; - Config config_; BitmapIndexBuildMode build_mode_; std::map data_; std::map bitsets_; + bool is_mmap_{false}; + char* mmap_data_; + int64_t mmap_size_; + std::map bitmap_info_map_; size_t total_num_rows_{0}; proto::schema::FieldSchema schema_; std::shared_ptr file_manager_; diff --git a/internal/core/src/index/HybridScalarIndex.cpp b/internal/core/src/index/HybridScalarIndex.cpp index 84870bc09d118..d9ab3c13a1dcc 100644 --- a/internal/core/src/index/HybridScalarIndex.cpp +++ b/internal/core/src/index/HybridScalarIndex.cpp @@ -30,7 +30,8 @@ namespace index { template HybridScalarIndex::HybridScalarIndex( const storage::FileManagerContext& file_manager_context) - : is_built_(false), + : ScalarIndex(HYBRID_INDEX_TYPE), + is_built_(false), bitmap_index_cardinality_limit_(DEFAULT_BITMAP_INDEX_CARDINALITY_BOUND), file_manager_context_(file_manager_context) { if (file_manager_context.Valid()) { diff --git a/internal/core/src/index/Index.h b/internal/core/src/index/Index.h index 0061253de620f..438b678e7f10e 100644 --- a/internal/core/src/index/Index.h +++ b/internal/core/src/index/Index.h @@ -24,6 +24,7 @@ #include "knowhere/dataset.h" #include "common/Tracer.h" #include "common/Types.h" +#include "index/Meta.h" const std::string kMmapFilepath = "mmap_filepath"; const std::string kEnableMmap = "enable_mmap"; @@ -76,7 +77,10 @@ class IndexBase { index_type_ == knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP || index_type_ == knowhere::IndexEnum::INDEX_SPARSE_INVERTED_INDEX || - index_type_ == knowhere::IndexEnum::INDEX_SPARSE_WAND; + index_type_ == knowhere::IndexEnum::INDEX_SPARSE_WAND || + // support mmap for bitmap/hybrid index + index_type_ == milvus::index::BITMAP_INDEX_TYPE || + index_type_ == milvus::index::HYBRID_INDEX_TYPE; } const IndexType& diff --git a/internal/core/src/index/InvertedIndexTantivy.cpp b/internal/core/src/index/InvertedIndexTantivy.cpp index adcfdab0c33e9..994c8b0c05648 100644 --- a/internal/core/src/index/InvertedIndexTantivy.cpp +++ b/internal/core/src/index/InvertedIndexTantivy.cpp @@ -71,7 +71,8 @@ get_tantivy_data_type(const proto::schema::FieldSchema& schema) { template InvertedIndexTantivy::InvertedIndexTantivy( const storage::FileManagerContext& ctx) - : schema_(ctx.fieldDataMeta.field_schema) { + : ScalarIndex(INVERTED_INDEX_TYPE), + schema_(ctx.fieldDataMeta.field_schema) { mem_file_manager_ = std::make_shared(ctx); disk_file_manager_ = std::make_shared(ctx); auto field = diff --git a/internal/core/src/index/InvertedIndexTantivy.h b/internal/core/src/index/InvertedIndexTantivy.h index 7c3ade38b3747..8dac3291a358b 100644 --- a/internal/core/src/index/InvertedIndexTantivy.h +++ b/internal/core/src/index/InvertedIndexTantivy.h @@ -35,7 +35,9 @@ class InvertedIndexTantivy : public ScalarIndex { using DiskFileManager = storage::DiskFileManagerImpl; using DiskFileManagerPtr = std::shared_ptr; - InvertedIndexTantivy() = default; + InvertedIndexTantivy() : ScalarIndex(INVERTED_INDEX_TYPE) { + } + explicit InvertedIndexTantivy(const storage::FileManagerContext& ctx); ~InvertedIndexTantivy(); diff --git a/internal/core/src/index/ScalarIndex.h b/internal/core/src/index/ScalarIndex.h index 1a3067256adce..6105ce4afb980 100644 --- a/internal/core/src/index/ScalarIndex.h +++ b/internal/core/src/index/ScalarIndex.h @@ -60,6 +60,9 @@ ToString(ScalarIndexType type) { template class ScalarIndex : public IndexBase { public: + ScalarIndex(const std::string& index_type) : IndexBase(index_type) { + } + void BuildWithRawData(size_t n, const void* values, diff --git a/internal/core/src/index/ScalarIndexSort.cpp b/internal/core/src/index/ScalarIndexSort.cpp index 3d206a697f738..56396d7d192f3 100644 --- a/internal/core/src/index/ScalarIndexSort.cpp +++ b/internal/core/src/index/ScalarIndexSort.cpp @@ -36,7 +36,7 @@ namespace milvus::index { template ScalarIndexSort::ScalarIndexSort( const storage::FileManagerContext& file_manager_context) - : is_built_(false), data_() { + : ScalarIndex(ASCENDING_SORT), is_built_(false), data_() { if (file_manager_context.Valid()) { file_manager_ = std::make_shared(file_manager_context); diff --git a/internal/core/src/index/StringIndex.h b/internal/core/src/index/StringIndex.h index 07e19c54f34b9..3aa84927b1b90 100644 --- a/internal/core/src/index/StringIndex.h +++ b/internal/core/src/index/StringIndex.h @@ -29,6 +29,10 @@ namespace milvus::index { class StringIndex : public ScalarIndex { public: + StringIndex(const std::string& index_type) + : ScalarIndex(index_type) { + } + const TargetBitmap Query(const DatasetPtr& dataset) override { auto op = dataset->Get(OPERATOR_TYPE); diff --git a/internal/core/src/index/StringIndexMarisa.cpp b/internal/core/src/index/StringIndexMarisa.cpp index 6052532fc0a87..20b2b9dd99690 100644 --- a/internal/core/src/index/StringIndexMarisa.cpp +++ b/internal/core/src/index/StringIndexMarisa.cpp @@ -40,7 +40,8 @@ namespace milvus::index { StringIndexMarisa::StringIndexMarisa( - const storage::FileManagerContext& file_manager_context) { + const storage::FileManagerContext& file_manager_context) + : StringIndex(MARISA_TRIE) { if (file_manager_context.Valid()) { file_manager_ = std::make_shared(file_manager_context); diff --git a/internal/core/src/storage/FileManager.h b/internal/core/src/storage/FileManager.h index 87f94a1b4075d..5d22b79fde29c 100644 --- a/internal/core/src/storage/FileManager.h +++ b/internal/core/src/storage/FileManager.h @@ -31,6 +31,9 @@ namespace milvus::storage { struct FileManagerContext { FileManagerContext() : chunkManagerPtr(nullptr) { } + FileManagerContext(const ChunkManagerPtr& chunkManagerPtr) + : chunkManagerPtr(chunkManagerPtr) { + } FileManagerContext(const FieldDataMeta& fieldDataMeta, const IndexMeta& indexMeta, const ChunkManagerPtr& chunkManagerPtr) diff --git a/internal/core/unittest/test_bitmap_index.cpp b/internal/core/unittest/test_bitmap_index.cpp index 3ea8811965ab2..ed7833ff3002c 100644 --- a/internal/core/unittest/test_bitmap_index.cpp +++ b/internal/core/unittest/test_bitmap_index.cpp @@ -105,7 +105,7 @@ class BitmapIndexTest : public testing::Test { auto serialized_bytes = insert_data.Serialize(storage::Remote); auto log_path = fmt::format("/{}/{}/{}/{}/{}/{}", - "/tmp/test_bitmap/", + "/tmp/test-bitmap-index/", collection_id, partition_id, segment_id, @@ -137,6 +137,16 @@ class BitmapIndexTest : public testing::Test { config["index_files"] = index_files; + if (is_mmap_) { + config["enable_mmap"] = "true"; + config["mmap_filepath"] = fmt::format("/{}/{}/{}/{}/{}", + "/tmp/test-bitmap-index/", + collection_id, + 1, + segment_id, + field_id); + ; + } index_ = index::IndexFactory::GetInstance().CreateIndex(index_info, ctx); index_->Load(milvus::tracer::TraceContext{}, config); @@ -247,7 +257,7 @@ class BitmapIndexTest : public testing::Test { auto should = ref(i); ASSERT_EQ(ans, should) << "op: " << op << ", @" << i << ", ans: " << ans - << ", ref: " << should; + << ", ref: " << should << "|" << data_[i]; } } } @@ -318,6 +328,7 @@ class BitmapIndexTest : public testing::Test { DataType type_; size_t nb_; size_t cardinality_; + bool is_mmap_ = false; boost::container::vector data_; std::shared_ptr chunk_manager_; }; @@ -400,4 +411,55 @@ REGISTER_TYPED_TEST_SUITE_P(BitmapIndexTestV2, INSTANTIATE_TYPED_TEST_SUITE_P(BitmapIndexE2ECheck_HighCardinality, BitmapIndexTestV2, + BitmapType); + +template +class BitmapIndexTestV3 : public BitmapIndexTest { + public: + virtual void + SetParam() override { + this->nb_ = 10000; + this->cardinality_ = 2000; + this->is_mmap_ = true; + } + + virtual ~BitmapIndexTestV3() { + } +}; + +TYPED_TEST_SUITE_P(BitmapIndexTestV3); + +TYPED_TEST_P(BitmapIndexTestV3, CountFuncTest) { + auto count = this->index_->Count(); + EXPECT_EQ(count, this->nb_); +} + +TYPED_TEST_P(BitmapIndexTestV3, INFuncTest) { + this->TestInFunc(); +} + +TYPED_TEST_P(BitmapIndexTestV3, NotINFuncTest) { + this->TestNotInFunc(); +} + +TYPED_TEST_P(BitmapIndexTestV3, CompareValFuncTest) { + this->TestCompareValueFunc(); +} + +TYPED_TEST_P(BitmapIndexTestV3, TestRangeCompareFuncTest) { + this->TestRangeCompareFunc(); +} + +using BitmapType = + testing::Types; + +REGISTER_TYPED_TEST_SUITE_P(BitmapIndexTestV3, + CountFuncTest, + INFuncTest, + NotINFuncTest, + CompareValFuncTest, + TestRangeCompareFuncTest); + +INSTANTIATE_TYPED_TEST_SUITE_P(BitmapIndexE2ECheck_Mmap, + BitmapIndexTestV3, BitmapType); \ No newline at end of file diff --git a/internal/core/unittest/test_scalar_index.cpp b/internal/core/unittest/test_scalar_index.cpp index d6206285410a4..8ca5067d007ef 100644 --- a/internal/core/unittest/test_scalar_index.cpp +++ b/internal/core/unittest/test_scalar_index.cpp @@ -27,6 +27,7 @@ #include #include "test_utils/storage_test_utils.h" #include "test_utils/TmpPath.h" +#include "storage/Util.h" constexpr int64_t nb = 100; namespace indexcgo = milvus::proto::indexcgo; @@ -55,7 +56,11 @@ TYPED_TEST_P(TypedScalarIndexTest, Dummy) { auto GetTempFileManagerCtx(CDataType data_type) { - auto ctx = milvus::storage::FileManagerContext(); + milvus::storage::StorageConfig storage_config; + storage_config.storage_type = "local"; + storage_config.root_path = "/tmp/local/"; + auto chunk_manager = milvus::storage::CreateChunkManager(storage_config); + auto ctx = milvus::storage::FileManagerContext(chunk_manager); ctx.fieldDataMeta.field_schema.set_data_type( static_cast(data_type)); return ctx; diff --git a/pkg/util/indexparamcheck/bitmap_index_checker.go b/pkg/util/indexparamcheck/bitmap_index_checker.go index f19b472baa7e9..f19943a50ea93 100644 --- a/pkg/util/indexparamcheck/bitmap_index_checker.go +++ b/pkg/util/indexparamcheck/bitmap_index_checker.go @@ -16,6 +16,9 @@ func (c *BITMAPChecker) CheckTrain(params map[string]string) error { } func (c *BITMAPChecker) CheckValidDataType(field *schemapb.FieldSchema) error { + if field.IsPrimaryKey { + return fmt.Errorf("create bitmap index on primary key not supported") + } mainType := field.GetDataType() elemType := field.GetElementType() if !typeutil.IsBoolType(mainType) && !typeutil.IsIntegerType(mainType) && diff --git a/pkg/util/indexparamcheck/index_type.go b/pkg/util/indexparamcheck/index_type.go index efe4bbbc7375a..430d719f5fca0 100644 --- a/pkg/util/indexparamcheck/index_type.go +++ b/pkg/util/indexparamcheck/index_type.go @@ -70,7 +70,9 @@ func IsVectorMmapIndex(indexType IndexType) bool { indexType == IndexHNSW || indexType == IndexScaNN || indexType == IndexSparseInverted || - indexType == IndexSparseWand + indexType == IndexSparseWand || + indexType == IndexBitmap || + indexType == IndexHybrid } func IsDiskIndex(indexType IndexType) bool {