From ca76c95ca364173e0db1380d870f75d88eecf74a Mon Sep 17 00:00:00 2001 From: chyezh Date: Tue, 24 Dec 2024 20:11:17 +0800 Subject: [PATCH] enhance: add multiply factor when loading index Signed-off-by: chyezh --- configs/milvus.yaml | 2 ++ internal/querynodev2/segments/segment.go | 3 ++- pkg/util/paramtable/component_param.go | 12 ++++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/configs/milvus.yaml b/configs/milvus.yaml index a8d10251b48f2..5b50aa7a7efb9 100644 --- a/configs/milvus.yaml +++ b/configs/milvus.yaml @@ -589,6 +589,8 @@ dataCoord: maxClusterSizeRatio: 10 # maximum cluster size / avg size in Kmeans train maxClusterSize: 5g # maximum cluster size in Kmeans train syncSegmentsInterval: 300 # The time interval for regularly syncing segments + index: + memSizeEstimateMultiplier: 2 # When the memory size is not setup by index procedure, multiplier to estimate the memory size of index data enableGarbageCollection: true # Switch value to control if to enable garbage collection to clear the discarded data in MinIO or S3 service. gc: interval: 3600 # The interval at which data coord performs garbage collection, unit: second. diff --git a/internal/querynodev2/segments/segment.go b/internal/querynodev2/segments/segment.go index 00509135cb8d2..46509437f498c 100644 --- a/internal/querynodev2/segments/segment.go +++ b/internal/querynodev2/segments/segment.go @@ -1346,7 +1346,8 @@ func GetCLoadInfoWithFunc(ctx context.Context, IndexFiles: indexInfo.GetIndexFilePaths(), IndexEngineVersion: indexInfo.GetCurrentIndexVersion(), IndexStoreVersion: indexInfo.GetIndexStoreVersion(), - IndexFileSize: indexInfo.GetIndexSize(), + // TODO: For quickly fixing, we add the multiplier here, but those logic should be put at the datacoord after we add the mem size for each index. + IndexFileSize: int64(paramtable.Get().DataCoordCfg.IndexMemSizeEstimateMultiplier.GetAsFloat() * float64(indexInfo.GetIndexSize())), } // 2. diff --git a/pkg/util/paramtable/component_param.go b/pkg/util/paramtable/component_param.go index f2732e2083c87..5631ff20d0daf 100644 --- a/pkg/util/paramtable/component_param.go +++ b/pkg/util/paramtable/component_param.go @@ -3273,6 +3273,9 @@ type dataCoordConfig struct { ChannelCheckpointMaxLag ParamItem `refreshable:"true"` SyncSegmentsInterval ParamItem `refreshable:"false"` + // Index related configuration + IndexMemSizeEstimateMultiplier ParamItem `refreshable:"true"` + // Clustering Compaction ClusteringCompactionEnable ParamItem `refreshable:"true"` ClusteringCompactionAutoEnable ParamItem `refreshable:"true"` @@ -3749,6 +3752,15 @@ During compaction, the size of segment # of rows is able to exceed segment max # } p.LevelZeroCompactionTriggerDeltalogMaxNum.Init(base.mgr) + p.IndexMemSizeEstimateMultiplier = ParamItem{ + Key: "dataCoord.index.memSizeEstimateMultiplier", + Version: "2.4.19", + DefaultValue: "2", + Doc: "When the memory size is not setup by index procedure, multiplier to estimate the memory size of index data", + Export: true, + } + p.IndexMemSizeEstimateMultiplier.Init(base.mgr) + p.ClusteringCompactionEnable = ParamItem{ Key: "dataCoord.compaction.clustering.enable", Version: "2.4.7",