From 208c8a2328319c703a869706454e55fd5d8afde9 Mon Sep 17 00:00:00 2001 From: zhagnlu <1542303831@qq.com> Date: Sun, 8 Sep 2024 18:23:05 +0800 Subject: [PATCH] fix:support config index offsetcache and fix create same index again (#35985) #35971 Signed-off-by: luzhang Co-authored-by: luzhang --- configs/milvus.yaml | 1 + internal/datacoord/index_meta.go | 11 +++--- internal/datacoord/index_service.go | 3 +- internal/querynodev2/segments/segment.go | 5 +++ pkg/util/indexparams/index_params.go | 8 +++++ pkg/util/paramtable/component_param.go | 12 +++++++ tests/python_client/testcases/test_query.py | 38 +++++++++++++++++++-- 7 files changed, 70 insertions(+), 8 deletions(-) diff --git a/configs/milvus.yaml b/configs/milvus.yaml index fcc086544731c..7788ec5181528 100644 --- a/configs/milvus.yaml +++ b/configs/milvus.yaml @@ -425,6 +425,7 @@ queryNode: requestResourceRetryInterval: 2000 # retry interval in milliseconds for waiting request resource for lazy load, 2s by default maxRetryTimes: 1 # max retry times for lazy load, 1 by default maxEvictPerRetry: 1 # max evict count for lazy load, 1 by default + indexOffsetCacheEnabled: false # enable index offset cache for some scalar indexes, now is just for bitmap index, enable this param can improve performance for retrieving raw data from index grouping: enabled: true maxNQ: 1000 diff --git a/internal/datacoord/index_meta.go b/internal/datacoord/index_meta.go index 3ff4fe1fae5cf..260953e074bc7 100644 --- a/internal/datacoord/index_meta.go +++ b/internal/datacoord/index_meta.go @@ -38,6 +38,7 @@ import ( "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/metrics" "github.com/milvus-io/milvus/pkg/util/indexparamcheck" + "github.com/milvus-io/milvus/pkg/util/indexparams" "github.com/milvus-io/milvus/pkg/util/timerecord" "github.com/milvus-io/milvus/pkg/util/typeutil" ) @@ -191,21 +192,21 @@ func checkParams(fieldIndex *model.Index, req *indexpb.CreateIndexRequest) bool } useAutoIndex := false - userIndexParamsWithoutMmapKey := make([]*commonpb.KeyValuePair, 0) + userIndexParamsWithoutConfigableKey := make([]*commonpb.KeyValuePair, 0) for _, param := range fieldIndex.UserIndexParams { - if param.Key == common.MmapEnabledKey { + if indexparams.IsConfigableIndexParam(param.Key) { continue } if param.Key == common.IndexTypeKey && param.Value == common.AutoIndexName { useAutoIndex = true } - userIndexParamsWithoutMmapKey = append(userIndexParamsWithoutMmapKey, param) + userIndexParamsWithoutConfigableKey = append(userIndexParamsWithoutConfigableKey, param) } - if len(userIndexParamsWithoutMmapKey) != len(req.GetUserIndexParams()) { + if len(userIndexParamsWithoutConfigableKey) != len(req.GetUserIndexParams()) { return false } - for _, param1 := range userIndexParamsWithoutMmapKey { + for _, param1 := range userIndexParamsWithoutConfigableKey { exist := false for i, param2 := range req.GetUserIndexParams() { if param2.Key == param1.Key && param2.Value == param1.Value { diff --git a/internal/datacoord/index_service.go b/internal/datacoord/index_service.go index 977ef2462cd2a..92dcf056230cf 100644 --- a/internal/datacoord/index_service.go +++ b/internal/datacoord/index_service.go @@ -183,6 +183,7 @@ func (s *Server) CreateIndex(ctx context.Context, req *indexpb.CreateIndexReques zap.String("IndexName", req.GetIndexName()), zap.Int64("fieldID", req.GetFieldID()), zap.Any("TypeParams", req.GetTypeParams()), zap.Any("IndexParams", req.GetIndexParams()), + zap.Any("UserIndexParams", req.GetUserIndexParams()), ) if err := merr.CheckHealthy(s.GetStateCode()); err != nil { @@ -343,7 +344,7 @@ func (s *Server) AlterIndex(ctx context.Context, req *indexpb.AlterIndexRequest) // update index params newIndexParams := UpdateParams(index, index.IndexParams, req.GetParams()) - log.Info("alter index user index params", + log.Info("alter index index params", zap.String("indexName", index.IndexName), zap.Any("params", newIndexParams), ) diff --git a/internal/querynodev2/segments/segment.go b/internal/querynodev2/segments/segment.go index 066043cb4d95e..7b5148fecede5 100644 --- a/internal/querynodev2/segments/segment.go +++ b/internal/querynodev2/segments/segment.go @@ -1116,6 +1116,11 @@ func (s *LocalSegment) LoadIndex(ctx context.Context, indexInfo *querypb.FieldIn } } + // set whether enable offset cache for bitmap index + if indexParams["index_type"] == indexparamcheck.IndexBitmap { + indexparams.SetBitmapIndexLoadParams(paramtable.Get(), indexParams) + } + if err := indexparams.AppendPrepareLoadParams(paramtable.Get(), indexParams); err != nil { return err } diff --git a/pkg/util/indexparams/index_params.go b/pkg/util/indexparams/index_params.go index b4876c9c7c24f..62a87984bcd36 100644 --- a/pkg/util/indexparams/index_params.go +++ b/pkg/util/indexparams/index_params.go @@ -299,6 +299,14 @@ func SetDiskIndexBuildParams(indexParams map[string]string, fieldDataSize int64) return nil } +func SetBitmapIndexLoadParams(params *paramtable.ComponentParam, indexParams map[string]string) { + _, exist := indexParams[common.IndexOffsetCacheEnabledKey] + if exist { + return + } + indexParams[common.IndexOffsetCacheEnabledKey] = params.QueryNodeCfg.IndexOffsetCacheEnabled.GetValue() +} + // SetDiskIndexLoadParams set disk index load params with ratio params on queryNode // QueryNode cal load params with ratio params ans cpu count... func SetDiskIndexLoadParams(params *paramtable.ComponentParam, indexParams map[string]string, numRows int64) error { diff --git a/pkg/util/paramtable/component_param.go b/pkg/util/paramtable/component_param.go index a16565246b252..b0de4c42e73ec 100644 --- a/pkg/util/paramtable/component_param.go +++ b/pkg/util/paramtable/component_param.go @@ -2358,6 +2358,8 @@ type queryNodeConfig struct { LazyLoadMaxRetryTimes ParamItem `refreshable:"true"` LazyLoadMaxEvictPerRetry ParamItem `refreshable:"true"` + IndexOffsetCacheEnabled ParamItem `refreshable:"true"` + // chunk cache ReadAheadPolicy ParamItem `refreshable:"false"` ChunkCacheWarmingUp ParamItem `refreshable:"true"` @@ -2864,6 +2866,16 @@ Max read concurrency must greater than or equal to 1, and less than or equal to } p.EnableDisk.Init(base.mgr) + p.IndexOffsetCacheEnabled = ParamItem{ + Key: "queryNode.indexOffsetCacheEnabled", + Version: "2.5.0", + DefaultValue: "false", + Doc: "enable index offset cache for some scalar indexes, now is just for bitmap index," + + " enable this param can improve performance for retrieving raw data from index", + Export: true, + } + p.IndexOffsetCacheEnabled.Init(base.mgr) + p.DiskCapacityLimit = ParamItem{ Key: "LOCAL_STORAGE_SIZE", Version: "2.2.0", diff --git a/tests/python_client/testcases/test_query.py b/tests/python_client/testcases/test_query.py index 41f15b08b402b..4b740f5c08286 100644 --- a/tests/python_client/testcases/test_query.py +++ b/tests/python_client/testcases/test_query.py @@ -2703,6 +2703,40 @@ def test_query_string_expr_with_prefixes(self): collection_w.query(expression, output_fields=output_fields, check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + @pytest.mark.tags(CaseLabel.L1) + def test_bitmap_alter_offset_cache_param(self): + """ + target: test bitmap index with enable offset cache. + expected: verify create index and load successfully + """ + collection_w, vectors = self.init_collection_general(prefix, insert_data=True,is_index=False, + primary_field=default_int_field_name)[0:2] + + collection_w.create_index(ct.default_float_vec_field_name, default_index_params, index_name="test_vec") + collection_w.create_index("varchar", index_name="bitmap_offset_cache", index_params={"index_type": "BITMAP"}) + time.sleep(1) + collection_w.load() + expression = 'varchar like "0%"' + result , _ = collection_w.query(expression, output_fields=['varchar']) + res_len = len(result) + collection_w.release() + collection_w.alter_index("bitmap_offset_cache", {'indexoffsetcache.enabled': True}) + collection_w.create_index("varchar", index_name="bitmap_offset_cache", index_params={"index_type": "BITMAP"}) + collection_w.load() + expression = 'varchar like "0%"' + result , _ = collection_w.query(expression, output_fields=['varchar']) + res_len_new = len(result) + assert res_len_new == res_len + collection_w.release() + collection_w.alter_index("bitmap_offset_cache", {'indexoffsetcache.enabled': False}) + collection_w.create_index("varchar", index_name="bitmap_offset_cache", index_params={"index_type": "BITMAP"}) + collection_w.load() + expression = 'varchar like "0%"' + result , _ = collection_w.query(expression, output_fields=['varchar']) + res_len_new = len(result) + assert res_len_new == res_len + collection_w.release() + @pytest.mark.tags(CaseLabel.L1) def test_query_string_expr_with_prefixes_auto_index(self): """ @@ -2736,7 +2770,7 @@ def test_query_string_expr_with_prefixes_bitmap(self): primary_field=default_int_field_name)[0:2] collection_w.create_index(ct.default_float_vec_field_name, default_index_params, index_name="query_expr_pre_index") - collection_w.create_index("varchar", index_name="bitmap_auto_index") + collection_w.create_index("varchar", index_name="bitmap_auto_index", index_params={"index_type": "BITMAP"}) time.sleep(1) collection_w.load() expression = 'varchar like "0%"' @@ -2782,7 +2816,7 @@ def test_query_string_expr_with_match_bitmap(self): primary_field=default_int_field_name)[0:2] collection_w.create_index(ct.default_float_vec_field_name, default_index_params, index_name="query_expr_pre_index") - collection_w.create_index("varchar", index_name="bitmap_auto_index") + collection_w.create_index("varchar", index_name="bitmap_auto_index", index_params={"index_type": "BITMAP"}) time.sleep(1) collection_w.load() expression = 'varchar like "%0%"'