Skip to content

Commit

Permalink
enhance: rename bitmap index to hybrid index
Browse files Browse the repository at this point in the history
Signed-off-by: luzhang <[email protected]>
  • Loading branch information
luzhang committed Jul 9, 2024
1 parent 233b848 commit 7e682fe
Show file tree
Hide file tree
Showing 15 changed files with 97 additions and 43 deletions.
10 changes: 5 additions & 5 deletions internal/core/src/index/HybridScalarIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ void
HybridScalarIndex<T>::BuildInternal(
const std::vector<FieldDataPtr>& field_datas) {
auto index = GetInternalIndex();
LOG_INFO("build bitmap index with internal index:{}",
LOG_INFO("build hybrid index with internal index:{}",
ToString(internal_index_type_));
index->BuildWithFieldData(field_datas);
}
Expand Down Expand Up @@ -406,7 +406,7 @@ HybridScalarIndex<T>::Load(const BinarySet& binary_set, const Config& config) {
DeserializeIndexType(binary_set);

auto index = GetInternalIndex();
LOG_INFO("load bitmap index with internal index:{}",
LOG_INFO("load hybrid index with internal index:{}",
ToString(internal_index_type_));
index->Load(binary_set, config);

Expand All @@ -420,7 +420,7 @@ HybridScalarIndex<T>::Load(milvus::tracer::TraceContext ctx,
auto index_files =
GetValueFromConfig<std::vector<std::string>>(config, "index_files");
AssertInfo(index_files.has_value(),
"index file paths is empty when load bitmap index");
"index file paths is empty when load hybrid index");

auto index_type_file = GetRemoteIndexTypeFile(index_files.value());

Expand All @@ -439,7 +439,7 @@ HybridScalarIndex<T>::Load(milvus::tracer::TraceContext ctx,
DeserializeIndexType(binary_set);

auto index = GetInternalIndex();
LOG_INFO("load bitmap index with internal index:{}",
LOG_INFO("load hybrid index with internal index:{}",
ToString(internal_index_type_));
index->Load(ctx, config);

Expand All @@ -456,4 +456,4 @@ template class HybridScalarIndex<double>;
template class HybridScalarIndex<std::string>;

} // namespace index
} // namespace milvus
} // namespace milvus
15 changes: 10 additions & 5 deletions internal/core/src/index/IndexFactory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ IndexFactory::CreatePrimitiveScalarIndex(
if (index_type == INVERTED_INDEX_TYPE) {
return std::make_unique<InvertedIndexTantivy<T>>(file_manager_context);
}
if (index_type == BITMAP_INDEX_TYPE) {
if (index_type == HYBRID_INDEX_TYPE) {
return std::make_unique<HybridScalarIndex<T>>(file_manager_context);
}
return CreateScalarIndexSort<T>(file_manager_context);
Expand All @@ -62,7 +62,7 @@ IndexFactory::CreatePrimitiveScalarIndex<std::string>(
return std::make_unique<InvertedIndexTantivy<std::string>>(
file_manager_context);
}
if (index_type == BITMAP_INDEX_TYPE) {
if (index_type == HYBRID_INDEX_TYPE) {
return std::make_unique<HybridScalarIndex<std::string>>(
file_manager_context);
}
Expand All @@ -82,7 +82,7 @@ IndexFactory::CreatePrimitiveScalarIndex(
return std::make_unique<InvertedIndexTantivy<T>>(file_manager_context,
space);
}
if (index_type == BITMAP_INDEX_TYPE) {
if (index_type == HYBRID_INDEX_TYPE) {
return std::make_unique<HybridScalarIndex<T>>(file_manager_context,
space);
}
Expand All @@ -100,7 +100,7 @@ IndexFactory::CreatePrimitiveScalarIndex<std::string>(
return std::make_unique<InvertedIndexTantivy<std::string>>(
file_manager_context, space);
}
if (index_type == BITMAP_INDEX_TYPE) {
if (index_type == HYBRID_INDEX_TYPE) {
return std::make_unique<HybridScalarIndex<std::string>>(
file_manager_context, space);
}
Expand Down Expand Up @@ -179,7 +179,7 @@ IndexBasePtr
IndexFactory::CreateCompositeScalarIndex(
IndexType index_type,
const storage::FileManagerContext& file_manager_context) {
if (index_type == BITMAP_INDEX_TYPE) {
if (index_type == HYBRID_INDEX_TYPE) {
auto element_type = static_cast<DataType>(
file_manager_context.fieldDataMeta.field_schema.element_type());
return CreatePrimitiveScalarIndex(
Expand All @@ -189,6 +189,11 @@ IndexFactory::CreateCompositeScalarIndex(
file_manager_context.fieldDataMeta.field_schema.element_type());
return CreatePrimitiveScalarIndex(
element_type, index_type, file_manager_context);
} else {
PanicInfo(
Unsupported,
fmt::format("index type: {} for composite scalar not supported now",
index_type));
}
}

Expand Down
21 changes: 16 additions & 5 deletions internal/core/src/index/InvertedIndexTantivy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ get_tantivy_data_type(proto::schema::DataType data_type) {
return TantivyDataType::F64;
}

case proto::schema::DataType::String:
case proto::schema::DataType::VarChar: {
return TantivyDataType::Keyword;
}
Expand Down Expand Up @@ -152,7 +153,7 @@ InvertedIndexTantivy<T>::Build(const Config& config) {
AssertInfo(insert_files.has_value(), "insert_files were empty");
auto field_datas =
mem_file_manager_->CacheRawDataToMemory(insert_files.value());
build_index(field_datas);
BuildWithFieldData(field_datas);
}

template <typename T>
Expand All @@ -173,7 +174,7 @@ InvertedIndexTantivy<T>::BuildV2(const Config& config) {
field_data->FillFieldData(col_data);
field_datas.push_back(field_data);
}
build_index(field_datas);
BuildWithFieldData(field_datas);
}

template <typename T>
Expand All @@ -185,7 +186,17 @@ InvertedIndexTantivy<T>::Load(milvus::tracer::TraceContext ctx,
AssertInfo(index_files.has_value(),
"index file paths is empty when load disk ann index data");
auto prefix = disk_file_manager_->GetLocalIndexObjectPrefix();
disk_file_manager_->CacheIndexToDisk(index_files.value());
auto files_value = index_files.value();
// need erase the index type file that has been readed
auto index_type_file =
disk_file_manager_->GetRemoteIndexPrefix() + std::string("/index_type");
files_value.erase(std::remove_if(files_value.begin(),
files_value.end(),
[&](const std::string& file) {
return file == index_type_file;
}),
files_value.end());
disk_file_manager_->CacheIndexToDisk(files_value);
wrapper_ = std::make_shared<TantivyIndexWrapper>(prefix.c_str());
}

Expand Down Expand Up @@ -398,7 +409,7 @@ InvertedIndexTantivy<T>::BuildWithRawData(size_t n,

template <typename T>
void
InvertedIndexTantivy<T>::build_index(
InvertedIndexTantivy<T>::BuildWithFieldData(
const std::vector<std::shared_ptr<FieldDataBase>>& field_datas) {
switch (schema_.data_type()) {
case proto::schema::DataType::Bool:
Expand Down Expand Up @@ -454,7 +465,7 @@ InvertedIndexTantivy<std::string>::build_index_for_array(
auto n = data->get_num_rows();
auto array_column = static_cast<const Array*>(data->Data());
for (int64_t i = 0; i < n; i++) {
assert(array_column[i].get_element_type() ==
Assert(array_column[i].get_element_type() ==
static_cast<DataType>(schema_.element_type()));
std::vector<std::string> output;
for (int64_t j = 0; j < array_column[i].length(); j++) {
Expand Down
8 changes: 4 additions & 4 deletions internal/core/src/index/InvertedIndexTantivy.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
GetIndexType() const override {
return ScalarIndexType::INVERTED;
}

void
Build(const Config& config = {}) override;

Expand Down Expand Up @@ -170,12 +170,12 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
const TargetBitmap
RegexQuery(const std::string& pattern) override;

private:
void
finish();
BuildWithFieldData(const std::vector<FieldDataPtr>& datas) override;

private:
void
build_index(const std::vector<std::shared_ptr<FieldDataBase>>& field_datas);
finish();

void
build_index_for_array(
Expand Down
2 changes: 1 addition & 1 deletion internal/core/src/index/Meta.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ constexpr const char* METRIC_TYPE = "metric_type";
constexpr const char* ASCENDING_SORT = "STL_SORT";
constexpr const char* MARISA_TRIE = "Trie";
constexpr const char* INVERTED_INDEX_TYPE = "INVERTED";
constexpr const char* BITMAP_INDEX_TYPE = "BITMAP";
constexpr const char* HYBRID_INDEX_TYPE = "HYBRID";

// index meta
constexpr const char* COLLECTION_ID = "collection_id";
Expand Down
6 changes: 6 additions & 0 deletions internal/core/src/storage/DiskFileManagerImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,12 @@ class DiskFileManagerImpl : public FileManagerImpl {
const std::vector<std::string>& remote_files,
const std::vector<int64_t>& remote_file_sizes);

std::string
GetRemoteIndexPrefix() const {
return space_ != nullptr ? GetRemoteIndexObjectPrefixV2()
: GetRemoteIndexObjectPrefix();
}

private:
int64_t
GetIndexBuildId() {
Expand Down
42 changes: 37 additions & 5 deletions internal/core/unittest/test_array_bitmap_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,9 +208,9 @@ class ArrayBitmapIndexTest : public testing::Test {
std::vector<std::string> index_files;

Config config;
config["index_type"] = milvus::index::BITMAP_INDEX_TYPE;
config["index_type"] = milvus::index::HYBRID_INDEX_TYPE;
config["insert_files"] = std::vector<std::string>{log_path};
config["bitmap_cardinality_limit"] = "1000";
config["bitmap_cardinality_limit"] = "100";

auto build_index =
indexbuilder::IndexFactory::GetInstance().CreateIndex(
Expand All @@ -223,7 +223,7 @@ class ArrayBitmapIndexTest : public testing::Test {
}

index::CreateIndexInfo index_info{};
index_info.index_type = milvus::index::BITMAP_INDEX_TYPE;
index_info.index_type = milvus::index::HYBRID_INDEX_TYPE;
index_info.field_type = DataType::ARRAY;

config["index_files"] = index_files;
Expand All @@ -233,11 +233,15 @@ class ArrayBitmapIndexTest : public testing::Test {
index_->Load(milvus::tracer::TraceContext{}, config);
}

void
SetUp() override {
virtual void
SetParam() {
nb_ = 10000;
cardinality_ = 30;
}

void
SetUp() override {
SetParam();
// if constexpr (std::is_same_v<T, int8_t>) {
// type_ = DataType::INT8;
// } else if constexpr (std::is_same_v<T, int16_t>) {
Expand Down Expand Up @@ -338,3 +342,31 @@ REGISTER_TYPED_TEST_SUITE_P(ArrayBitmapIndexTest,
INSTANTIATE_TYPED_TEST_SUITE_P(ArrayBitmapE2ECheck,
ArrayBitmapIndexTest,
BitmapType);

template <typename T>
class ArrayBitmapIndexTestV1 : public ArrayBitmapIndexTest<T> {
public:
virtual void
SetParam() override {
this->nb_ = 10000;
this->cardinality_ = 200;
}

virtual ~ArrayBitmapIndexTestV1() {
}
};

TYPED_TEST_SUITE_P(ArrayBitmapIndexTestV1);

TYPED_TEST_P(ArrayBitmapIndexTestV1, CountFuncTest) {
auto count = this->index_->Count();
EXPECT_EQ(count, this->nb_);
}

using BitmapTypeV1 = testing::Types<int32_t, int64_t, std::string>;

REGISTER_TYPED_TEST_SUITE_P(ArrayBitmapIndexTestV1, CountFuncTest);

INSTANTIATE_TYPED_TEST_SUITE_P(ArrayBitmapE2ECheckV1,
ArrayBitmapIndexTestV1,
BitmapTypeV1);
4 changes: 2 additions & 2 deletions internal/core/unittest/test_hybrid_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ class HybridIndexTestV1 : public testing::Test {
std::vector<std::string> index_files;

Config config;
config["index_type"] = milvus::index::BITMAP_INDEX_TYPE;
config["index_type"] = milvus::index::HYBRID_INDEX_TYPE;
config["insert_files"] = std::vector<std::string>{log_path};
config["bitmap_cardinality_limit"] = "1000";

Expand All @@ -135,7 +135,7 @@ class HybridIndexTestV1 : public testing::Test {
}

index::CreateIndexInfo index_info{};
index_info.index_type = milvus::index::BITMAP_INDEX_TYPE;
index_info.index_type = milvus::index::HYBRID_INDEX_TYPE;
index_info.field_type = type_;

config["index_files"] = index_files;
Expand Down
4 changes: 2 additions & 2 deletions internal/core/unittest/test_utils/indexbuilder_test_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -478,14 +478,14 @@ GenDsFromPB(const google::protobuf::Message& msg) {
template <typename T>
inline std::vector<std::string>
GetIndexTypes() {
return std::vector<std::string>{"sort", milvus::index::BITMAP_INDEX_TYPE};
return std::vector<std::string>{"sort", milvus::index::HYBRID_INDEX_TYPE};
}

template <>
inline std::vector<std::string>
GetIndexTypes<std::string>() {
return std::vector<std::string>{
"sort", "marisa", milvus::index::BITMAP_INDEX_TYPE};
"sort", "marisa", milvus::index::HYBRID_INDEX_TYPE};
}

template <typename T>
Expand Down
2 changes: 1 addition & 1 deletion internal/proxy/task_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ func fillDimension(field *schemapb.FieldSchema, indexParams map[string]string) e
func checkTrain(field *schemapb.FieldSchema, indexParams map[string]string) error {
indexType := indexParams[common.IndexTypeKey]

if indexType == indexparamcheck.IndexBitmap {
if indexType == indexparamcheck.IndexHybrid {
_, exist := indexParams[common.BitmapCardinalityLimitKey]
if !exist {
indexParams[common.BitmapCardinalityLimitKey] = paramtable.Get().CommonCfg.BitmapIndexCardinalityBound.GetValue()
Expand Down
2 changes: 1 addition & 1 deletion pkg/common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,8 @@ const (

DropRatioBuildKey = "drop_ratio_build"

BitmapCardinalityLimitKey = "bitmap_cardinality_limit"
IsSparseKey = "is_sparse"
BitmapCardinalityLimitKey = "bitmap_cardinality_limit"
)

// Collection properties key
Expand Down
2 changes: 1 addition & 1 deletion pkg/util/indexparamcheck/conf_adapter_mgr.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ func (mgr *indexCheckerMgrImpl) registerIndexChecker() {
mgr.checkers["Asceneding"] = newSTLSORTChecker()
mgr.checkers[IndexTRIE] = newTRIEChecker()
mgr.checkers[IndexTrie] = newTRIEChecker()
mgr.checkers[IndexBitmap] = newBITMAPChecker()
mgr.checkers[IndexHybrid] = newHYBRIDChecker()
mgr.checkers["marisa-trie"] = newTRIEChecker()
mgr.checkers[AutoIndex] = newAUTOINDEXChecker()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
)

func Test_BitmapIndexChecker(t *testing.T) {
c := newBITMAPChecker()
func Test_HybridIndexChecker(t *testing.T) {
c := newHYBRIDChecker()

assert.NoError(t, c.CheckTrain(map[string]string{"bitmap_cardinality_limit": "100"}))
assert.NoError(t, c.CheckTrain(map[string]string{"bitmap_cardinality_limitt": "100"}))

assert.NoError(t, c.CheckValidDataType(&schemapb.FieldSchema{DataType: schemapb.DataType_Bool}))
assert.NoError(t, c.CheckValidDataType(&schemapb.FieldSchema{DataType: schemapb.DataType_Int8}))
Expand All @@ -32,5 +32,5 @@ func Test_BitmapIndexChecker(t *testing.T) {
assert.Error(t, c.CheckValidDataType(&schemapb.FieldSchema{DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Float}))
assert.Error(t, c.CheckValidDataType(&schemapb.FieldSchema{DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Double}))
assert.Error(t, c.CheckTrain(map[string]string{}))
assert.Error(t, c.CheckTrain(map[string]string{"bitmap_cardinality_limit": "0"}))
assert.Error(t, c.CheckTrain(map[string]string{"bitmap_cardinality_limitt": "0"}))
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,18 @@ import (
"github.com/milvus-io/milvus/pkg/util/typeutil"
)

type BITMAPChecker struct {
type HYBRIDhecker struct {
scalarIndexChecker
}

func (c *BITMAPChecker) CheckTrain(params map[string]string) error {
func (c *HYBRIDhecker) CheckTrain(params map[string]string) error {
if !CheckIntByRange(params, common.BitmapCardinalityLimitKey, 1, math.MaxInt) {
return fmt.Errorf("failed to check bitmap cardinality limit, should be larger than 0 and smaller than math.MaxInt")
return fmt.Errorf("failed to check hybrid cardinality limit, should be larger than 0 and smaller than math.MaxInt")
}
return c.scalarIndexChecker.CheckTrain(params)
}

func (c *BITMAPChecker) CheckValidDataType(field *schemapb.FieldSchema) error {
func (c *HYBRIDhecker) CheckValidDataType(field *schemapb.FieldSchema) error {
main_type := field.GetDataType()
elem_type := field.GetElementType()
if !typeutil.IsBoolType(main_type) && !typeutil.IsIntegerType(main_type) &&
Expand All @@ -36,6 +36,6 @@ func (c *BITMAPChecker) CheckValidDataType(field *schemapb.FieldSchema) error {
return nil
}

func newBITMAPChecker() *BITMAPChecker {
return &BITMAPChecker{}
func newHYBRIDChecker() *HYBRIDhecker {
return &HYBRIDhecker{}
}
Loading

0 comments on commit 7e682fe

Please sign in to comment.