Skip to content

Commit

Permalink
enhance: support bitmap index mmap
Browse files Browse the repository at this point in the history
Signed-off-by: luzhang <[email protected]>
  • Loading branch information
luzhang committed Aug 22, 2024
1 parent 3107701 commit 510cb9f
Show file tree
Hide file tree
Showing 15 changed files with 473 additions and 24 deletions.
340 changes: 328 additions & 12 deletions internal/core/src/index/BitmapIndex.cpp

Large diffs are not rendered by default.

46 changes: 44 additions & 2 deletions internal/core/src/index/BitmapIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@
namespace milvus {
namespace index {

struct BitmapInfo {
size_t offset_;
size_t size_;
};

enum class BitmapIndexBuildMode {
ROARING,
BITSET,
Expand All @@ -46,7 +51,11 @@ class BitmapIndex : public ScalarIndex<T> {
const storage::FileManagerContext& file_manager_context =
storage::FileManagerContext());

~BitmapIndex() override = default;
~BitmapIndex() {
if (is_mmap_) {
UnmapIndexData();
}
}

BinarySet
Serialize(const Config& config) override;
Expand Down Expand Up @@ -146,6 +155,10 @@ class BitmapIndex : public ScalarIndex<T> {
public:
int64_t
Cardinality() {
if (is_mmap_) {
return bitmap_info_map_.size();
}

if (build_mode_ == BitmapIndexBuildMode::ROARING) {
return data_.size();
} else {
Expand All @@ -172,6 +185,9 @@ class BitmapIndex : public ScalarIndex<T> {
std::pair<size_t, size_t>
DeserializeIndexMeta(const uint8_t* data_ptr, size_t data_size);

void
DeserializeIndexDataForMmap(const char* data_ptr, size_t index_length);

void
DeserializeIndexData(const uint8_t* data_ptr, size_t index_length);

Expand All @@ -190,6 +206,9 @@ class BitmapIndex : public ScalarIndex<T> {
TargetBitmap
RangeForBitset(T value, OpType op);

TargetBitmap
RangeForMmap(T value, OpType op);

TargetBitmap
RangeForRoaring(T lower_bound_value,
bool lb_inclusive,
Expand All @@ -202,12 +221,35 @@ class BitmapIndex : public ScalarIndex<T> {
T upper_bound_value,
bool ub_inclusive);

TargetBitmap
RangeForMmap(T lower_bound_value,
bool lb_inclusive,
T upper_bound_value,
bool ub_inclusive);

void
MMapIndexData(const std::string& filepath,
const uint8_t* data,
size_t data_size,
size_t index_length);

roaring::Roaring
AccessBitmap(const BitmapInfo& info) const {
return roaring::Roaring::read(mmap_data_ + info.offset_, info.size_);
}

void
UnmapIndexData();

public:
bool is_built_{false};
Config config_;
BitmapIndexBuildMode build_mode_;
std::map<T, roaring::Roaring> data_;
std::map<T, TargetBitmap> bitsets_;
bool is_mmap_{false};
char* mmap_data_;
int64_t mmap_size_;
std::map<T, BitmapInfo> bitmap_info_map_;
size_t total_num_rows_{0};
proto::schema::FieldSchema schema_;
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
Expand Down
3 changes: 2 additions & 1 deletion internal/core/src/index/HybridScalarIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ namespace index {
template <typename T>
HybridScalarIndex<T>::HybridScalarIndex(
const storage::FileManagerContext& file_manager_context)
: is_built_(false),
: ScalarIndex<T>(HYBRID_INDEX_TYPE),
is_built_(false),
bitmap_index_cardinality_limit_(DEFAULT_BITMAP_INDEX_CARDINALITY_BOUND),
file_manager_context_(file_manager_context) {
if (file_manager_context.Valid()) {
Expand Down
6 changes: 5 additions & 1 deletion internal/core/src/index/Index.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "knowhere/dataset.h"
#include "common/Tracer.h"
#include "common/Types.h"
#include "index/Meta.h"

const std::string kMmapFilepath = "mmap_filepath";
const std::string kEnableMmap = "enable_mmap";
Expand Down Expand Up @@ -76,7 +77,10 @@ class IndexBase {
index_type_ == knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP ||
index_type_ ==
knowhere::IndexEnum::INDEX_SPARSE_INVERTED_INDEX ||
index_type_ == knowhere::IndexEnum::INDEX_SPARSE_WAND;
index_type_ == knowhere::IndexEnum::INDEX_SPARSE_WAND ||
// support mmap for bitmap/hybrid index
index_type_ == milvus::index::BITMAP_INDEX_TYPE ||
index_type_ == milvus::index::HYBRID_INDEX_TYPE;
}

const IndexType&
Expand Down
3 changes: 2 additions & 1 deletion internal/core/src/index/InvertedIndexTantivy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ get_tantivy_data_type(const proto::schema::FieldSchema& schema) {
template <typename T>
InvertedIndexTantivy<T>::InvertedIndexTantivy(
const storage::FileManagerContext& ctx)
: schema_(ctx.fieldDataMeta.field_schema) {
: ScalarIndex<T>(INVERTED_INDEX_TYPE),
schema_(ctx.fieldDataMeta.field_schema) {
mem_file_manager_ = std::make_shared<MemFileManager>(ctx);
disk_file_manager_ = std::make_shared<DiskFileManager>(ctx);
auto field =
Expand Down
4 changes: 3 additions & 1 deletion internal/core/src/index/InvertedIndexTantivy.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
using DiskFileManager = storage::DiskFileManagerImpl;
using DiskFileManagerPtr = std::shared_ptr<DiskFileManager>;

InvertedIndexTantivy() = default;
InvertedIndexTantivy() : ScalarIndex<T>(INVERTED_INDEX_TYPE) {
}

explicit InvertedIndexTantivy(const storage::FileManagerContext& ctx);

~InvertedIndexTantivy();
Expand Down
3 changes: 3 additions & 0 deletions internal/core/src/index/ScalarIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ ToString(ScalarIndexType type) {
template <typename T>
class ScalarIndex : public IndexBase {
public:
ScalarIndex(const std::string& index_type) : IndexBase(index_type) {
}

void
BuildWithRawData(size_t n,
const void* values,
Expand Down
2 changes: 1 addition & 1 deletion internal/core/src/index/ScalarIndexSort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ namespace milvus::index {
template <typename T>
ScalarIndexSort<T>::ScalarIndexSort(
const storage::FileManagerContext& file_manager_context)
: is_built_(false), data_() {
: ScalarIndex<T>(ASCENDING_SORT), is_built_(false), data_() {
if (file_manager_context.Valid()) {
file_manager_ =
std::make_shared<storage::MemFileManagerImpl>(file_manager_context);
Expand Down
4 changes: 4 additions & 0 deletions internal/core/src/index/StringIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ namespace milvus::index {

class StringIndex : public ScalarIndex<std::string> {
public:
StringIndex(const std::string& index_type)
: ScalarIndex<std::string>(index_type) {
}

const TargetBitmap
Query(const DatasetPtr& dataset) override {
auto op = dataset->Get<OpType>(OPERATOR_TYPE);
Expand Down
3 changes: 2 additions & 1 deletion internal/core/src/index/StringIndexMarisa.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@
namespace milvus::index {

StringIndexMarisa::StringIndexMarisa(
const storage::FileManagerContext& file_manager_context) {
const storage::FileManagerContext& file_manager_context)
: StringIndex(MARISA_TRIE) {
if (file_manager_context.Valid()) {
file_manager_ =
std::make_shared<storage::MemFileManagerImpl>(file_manager_context);
Expand Down
3 changes: 3 additions & 0 deletions internal/core/src/storage/FileManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ namespace milvus::storage {
struct FileManagerContext {
FileManagerContext() : chunkManagerPtr(nullptr) {
}
FileManagerContext(const ChunkManagerPtr& chunkManagerPtr)
: chunkManagerPtr(chunkManagerPtr) {
}
FileManagerContext(const FieldDataMeta& fieldDataMeta,
const IndexMeta& indexMeta,
const ChunkManagerPtr& chunkManagerPtr)
Expand Down
66 changes: 64 additions & 2 deletions internal/core/unittest/test_bitmap_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ class BitmapIndexTest : public testing::Test {
auto serialized_bytes = insert_data.Serialize(storage::Remote);

auto log_path = fmt::format("/{}/{}/{}/{}/{}/{}",
"/tmp/test_bitmap/",
"/tmp/test-bitmap-index/",
collection_id,
partition_id,
segment_id,
Expand Down Expand Up @@ -137,6 +137,16 @@ class BitmapIndexTest : public testing::Test {

config["index_files"] = index_files;

if (is_mmap_) {
config["enable_mmap"] = "true";
config["mmap_filepath"] = fmt::format("/{}/{}/{}/{}/{}",
"/tmp/test-bitmap-index/",
collection_id,
1,
segment_id,
field_id);
;
}
index_ =
index::IndexFactory::GetInstance().CreateIndex(index_info, ctx);
index_->Load(milvus::tracer::TraceContext{}, config);
Expand Down Expand Up @@ -247,7 +257,7 @@ class BitmapIndexTest : public testing::Test {
auto should = ref(i);
ASSERT_EQ(ans, should)
<< "op: " << op << ", @" << i << ", ans: " << ans
<< ", ref: " << should;
<< ", ref: " << should << "|" << data_[i];
}
}
}
Expand Down Expand Up @@ -318,6 +328,7 @@ class BitmapIndexTest : public testing::Test {
DataType type_;
size_t nb_;
size_t cardinality_;
bool is_mmap_ = false;
boost::container::vector<T> data_;
std::shared_ptr<storage::ChunkManager> chunk_manager_;
};
Expand Down Expand Up @@ -400,4 +411,55 @@ REGISTER_TYPED_TEST_SUITE_P(BitmapIndexTestV2,

INSTANTIATE_TYPED_TEST_SUITE_P(BitmapIndexE2ECheck_HighCardinality,
BitmapIndexTestV2,
BitmapType);

template <typename T>
class BitmapIndexTestV3 : public BitmapIndexTest<T> {
public:
virtual void
SetParam() override {
this->nb_ = 10000;
this->cardinality_ = 2000;
this->is_mmap_ = true;
}

virtual ~BitmapIndexTestV3() {
}
};

TYPED_TEST_SUITE_P(BitmapIndexTestV3);

TYPED_TEST_P(BitmapIndexTestV3, CountFuncTest) {
auto count = this->index_->Count();
EXPECT_EQ(count, this->nb_);
}

TYPED_TEST_P(BitmapIndexTestV3, INFuncTest) {
this->TestInFunc();
}

TYPED_TEST_P(BitmapIndexTestV3, NotINFuncTest) {
this->TestNotInFunc();
}

TYPED_TEST_P(BitmapIndexTestV3, CompareValFuncTest) {
this->TestCompareValueFunc();
}

TYPED_TEST_P(BitmapIndexTestV3, TestRangeCompareFuncTest) {
this->TestRangeCompareFunc();
}

using BitmapType =
testing::Types<int8_t, int16_t, int32_t, int64_t, std::string>;

REGISTER_TYPED_TEST_SUITE_P(BitmapIndexTestV3,
CountFuncTest,
INFuncTest,
NotINFuncTest,
CompareValFuncTest,
TestRangeCompareFuncTest);

INSTANTIATE_TYPED_TEST_SUITE_P(BitmapIndexE2ECheck_Mmap,
BitmapIndexTestV3,
BitmapType);
7 changes: 6 additions & 1 deletion internal/core/unittest/test_scalar_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <boost/filesystem.hpp>
#include "test_utils/storage_test_utils.h"
#include "test_utils/TmpPath.h"
#include "storage/Util.h"

constexpr int64_t nb = 100;
namespace indexcgo = milvus::proto::indexcgo;
Expand Down Expand Up @@ -55,7 +56,11 @@ TYPED_TEST_P(TypedScalarIndexTest, Dummy) {

auto
GetTempFileManagerCtx(CDataType data_type) {
auto ctx = milvus::storage::FileManagerContext();
milvus::storage::StorageConfig storage_config;
storage_config.storage_type = "local";
storage_config.root_path = "/tmp/local/";
auto chunk_manager = milvus::storage::CreateChunkManager(storage_config);
auto ctx = milvus::storage::FileManagerContext(chunk_manager);
ctx.fieldDataMeta.field_schema.set_data_type(
static_cast<milvus::proto::schema::DataType>(data_type));
return ctx;
Expand Down
3 changes: 3 additions & 0 deletions pkg/util/indexparamcheck/bitmap_index_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ func (c *BITMAPChecker) CheckTrain(params map[string]string) error {
}

func (c *BITMAPChecker) CheckValidDataType(field *schemapb.FieldSchema) error {
if field.IsPrimaryKey {
return fmt.Errorf("create bitmap index on primary key not supported")

Check warning on line 20 in pkg/util/indexparamcheck/bitmap_index_checker.go

View check run for this annotation

Codecov / codecov/patch

pkg/util/indexparamcheck/bitmap_index_checker.go#L20

Added line #L20 was not covered by tests
}
mainType := field.GetDataType()
elemType := field.GetElementType()
if !typeutil.IsBoolType(mainType) && !typeutil.IsIntegerType(mainType) &&
Expand Down
4 changes: 3 additions & 1 deletion pkg/util/indexparamcheck/index_type.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,9 @@ func IsVectorMmapIndex(indexType IndexType) bool {
indexType == IndexHNSW ||
indexType == IndexScaNN ||
indexType == IndexSparseInverted ||
indexType == IndexSparseWand
indexType == IndexSparseWand ||
indexType == IndexBitmap ||
indexType == IndexHybrid
}

func IsDiskIndex(indexType IndexType) bool {
Expand Down

0 comments on commit 510cb9f

Please sign in to comment.