From 46fb47206bf260027bdb370cf985a494006953ec Mon Sep 17 00:00:00 2001 From: wayblink Date: Wed, 17 Jul 2024 18:14:24 +0800 Subject: [PATCH] fix: compaction task not be cleaned correctly 1.fix compaction task not be cleaned correctly 2.add a new parameter to control compaction gc loop interval 3.remove some useless configs of clustering compaction Signed-off-by: wayblink --- configs/milvus.yaml | 5 ++- internal/datacoord/compaction.go | 9 +++-- internal/datacoord/compaction_test.go | 40 +++++++++++++++++++++ pkg/util/paramtable/component_param.go | 36 ++++++------------- pkg/util/paramtable/component_param_test.go | 7 ++-- 5 files changed, 63 insertions(+), 34 deletions(-) diff --git a/configs/milvus.yaml b/configs/milvus.yaml index 52b4e525282ea..e39508975ae9f 100644 --- a/configs/milvus.yaml +++ b/configs/milvus.yaml @@ -465,18 +465,17 @@ dataCoord: rpcTimeout: 10 maxParallelTaskNum: 10 workerMaxParallelTaskNum: 2 + gcInterval: 1800 # The time interval in seconds for compaction gc + dropTolerance: 86400 # Compaction task will be cleaned after finish longer than this time(in seconds) clustering: enable: true autoEnable: false triggerInterval: 600 - stateCheckInterval: 10 - gcInterval: 600 minInterval: 3600 maxInterval: 259200 newDataRatioThreshold: 0.2 newDataSizeThreshold: 512m timeout: 7200 - dropTolerance: 86400 # clustering compaction will try best to distribute data into segments with size range in [preferSegmentSize, maxSegmentSize]. # data will be clustered by preferSegmentSize, if a cluster is larger than maxSegmentSize, will spilt it into multi segment # buffer between (preferSegmentSize, maxSegmentSize) is left for new data in the same cluster(range), to avoid globally redistribute too often diff --git a/internal/datacoord/compaction.go b/internal/datacoord/compaction.go index 69a0357e6c6f7..118222253f40d 100644 --- a/internal/datacoord/compaction.go +++ b/internal/datacoord/compaction.go @@ -375,8 +375,10 @@ func (c *compactionPlanHandler) loopCheck() { } func (c *compactionPlanHandler) loopClean() { + interval := Params.DataCoordCfg.CompactionGCIntervalInSeconds.GetAsDuration(time.Second) + log.Info("compactionPlanHandler start clean check loop", zap.Any("gc interval", interval)) defer c.stopWg.Done() - cleanTicker := time.NewTicker(30 * time.Minute) + cleanTicker := time.NewTicker(interval) defer cleanTicker.Stop() for { select { @@ -400,10 +402,11 @@ func (c *compactionPlanHandler) cleanCompactionTaskMeta() { for _, tasks := range triggers { for _, task := range tasks { if task.State == datapb.CompactionTaskState_completed || task.State == datapb.CompactionTaskState_cleaned { - duration := time.Since(time.Unix(task.StartTime, 0)).Seconds() - if duration > float64(Params.DataCoordCfg.CompactionDropToleranceInSeconds.GetAsDuration(time.Second)) { + duration := time.Since(time.UnixMilli(task.StartTime)).Seconds() + if duration > float64(Params.DataCoordCfg.CompactionDropToleranceInSeconds.GetAsDuration(time.Second).Seconds()) { // try best to delete meta err := c.meta.DropCompactionTask(task) + log.Debug("drop compaction task meta", zap.Int64("planID", task.PlanID)) if err != nil { log.Warn("fail to drop task", zap.Int64("planID", task.PlanID), zap.Error(err)) } diff --git a/internal/datacoord/compaction_test.go b/internal/datacoord/compaction_test.go index c2d2da70d2122..00829953f7d72 100644 --- a/internal/datacoord/compaction_test.go +++ b/internal/datacoord/compaction_test.go @@ -17,7 +17,9 @@ package datacoord import ( + "context" "testing" + "time" "github.com/cockroachdb/errors" "github.com/samber/lo" @@ -25,6 +27,7 @@ import ( "github.com/stretchr/testify/suite" "github.com/milvus-io/milvus/internal/metastore/kv/binlog" + "github.com/milvus-io/milvus/internal/metastore/kv/datacoord" "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/pkg/util/metautil" "github.com/milvus-io/milvus/pkg/util/typeutil" @@ -759,6 +762,43 @@ func (s *CompactionPlanHandlerSuite) TestCheckCompaction() { s.Equal(datapb.CompactionTaskState_executing, t.GetState()) } +func (s *CompactionPlanHandlerSuite) TestCompactionGC() { + s.SetupTest() + inTasks := []*datapb.CompactionTask{ + { + PlanID: 1, + Type: datapb.CompactionType_MixCompaction, + State: datapb.CompactionTaskState_completed, + StartTime: time.Now().UnixMilli() - (time.Second * 100000).Milliseconds(), + }, + { + PlanID: 2, + Type: datapb.CompactionType_MixCompaction, + State: datapb.CompactionTaskState_cleaned, + StartTime: time.Now().UnixMilli() - (time.Second * 100000).Milliseconds(), + }, + { + PlanID: 3, + Type: datapb.CompactionType_MixCompaction, + State: datapb.CompactionTaskState_cleaned, + StartTime: time.Now().UnixMilli(), + }, + } + + catalog := &datacoord.Catalog{MetaKv: NewMetaMemoryKV()} + compactionTaskMeta, err := newCompactionTaskMeta(context.TODO(), catalog) + s.NoError(err) + s.handler.meta = &meta{compactionTaskMeta: compactionTaskMeta} + for _, t := range inTasks { + s.handler.meta.SaveCompactionTask(t) + } + + s.handler.cleanCompactionTaskMeta() + // two task should be cleaned, one remains + tasks := s.handler.meta.GetCompactionTaskMeta().GetCompactionTasks() + s.Equal(1, len(tasks)) +} + func (s *CompactionPlanHandlerSuite) TestProcessCompleteCompaction() { s.SetupTest() diff --git a/pkg/util/paramtable/component_param.go b/pkg/util/paramtable/component_param.go index 845d0d718fd7d..625c151ef2cf9 100644 --- a/pkg/util/paramtable/component_param.go +++ b/pkg/util/paramtable/component_param.go @@ -2904,6 +2904,7 @@ type dataCoordConfig struct { SegmentExpansionRate ParamItem `refreshable:"true"` CompactionTimeoutInSeconds ParamItem `refreshable:"true"` CompactionDropToleranceInSeconds ParamItem `refreshable:"true"` + CompactionGCIntervalInSeconds ParamItem `refreshable:"true"` CompactionCheckIntervalInSeconds ParamItem `refreshable:"false"` SingleCompactionRatioThreshold ParamItem `refreshable:"true"` SingleCompactionDeltaLogMaxSize ParamItem `refreshable:"true"` @@ -2917,12 +2918,9 @@ type dataCoordConfig struct { ClusteringCompactionEnable ParamItem `refreshable:"true"` ClusteringCompactionAutoEnable ParamItem `refreshable:"true"` ClusteringCompactionTriggerInterval ParamItem `refreshable:"true"` - ClusteringCompactionStateCheckInterval ParamItem `refreshable:"true"` - ClusteringCompactionGCInterval ParamItem `refreshable:"true"` ClusteringCompactionMinInterval ParamItem `refreshable:"true"` ClusteringCompactionMaxInterval ParamItem `refreshable:"true"` ClusteringCompactionNewDataSizeThreshold ParamItem `refreshable:"true"` - ClusteringCompactionDropTolerance ParamItem `refreshable:"true"` ClusteringCompactionPreferSegmentSize ParamItem `refreshable:"true"` ClusteringCompactionMaxSegmentSize ParamItem `refreshable:"true"` ClusteringCompactionMaxTrainSizeRatio ParamItem `refreshable:"true"` @@ -3239,11 +3237,19 @@ During compaction, the size of segment # of rows is able to exceed segment max # p.CompactionDropToleranceInSeconds = ParamItem{ Key: "dataCoord.compaction.dropTolerance", Version: "2.4.2", - Doc: "If compaction job is finished for a long time, gc it", + Doc: "Compaction task will be cleaned after finish longer than this time(in seconds)", DefaultValue: "86400", } p.CompactionDropToleranceInSeconds.Init(base.mgr) + p.CompactionGCIntervalInSeconds = ParamItem{ + Key: "dataCoord.compaction.gcInterval", + Version: "2.4.7", + Doc: "The time interval in seconds for compaction gc", + DefaultValue: "1800", + } + p.CompactionGCIntervalInSeconds.Init(base.mgr) + p.CompactionCheckIntervalInSeconds = ParamItem{ Key: "dataCoord.compaction.check.interval", Version: "2.0.0", @@ -3372,20 +3378,6 @@ During compaction, the size of segment # of rows is able to exceed segment max # } p.ClusteringCompactionTriggerInterval.Init(base.mgr) - p.ClusteringCompactionStateCheckInterval = ParamItem{ - Key: "dataCoord.compaction.clustering.stateCheckInterval", - Version: "2.4.6", - DefaultValue: "10", - } - p.ClusteringCompactionStateCheckInterval.Init(base.mgr) - - p.ClusteringCompactionGCInterval = ParamItem{ - Key: "dataCoord.compaction.clustering.gcInterval", - Version: "2.4.6", - DefaultValue: "600", - } - p.ClusteringCompactionGCInterval.Init(base.mgr) - p.ClusteringCompactionMinInterval = ParamItem{ Key: "dataCoord.compaction.clustering.minInterval", Version: "2.4.6", @@ -3417,14 +3409,6 @@ During compaction, the size of segment # of rows is able to exceed segment max # } p.ClusteringCompactionTimeoutInSeconds.Init(base.mgr) - p.ClusteringCompactionDropTolerance = ParamItem{ - Key: "dataCoord.compaction.clustering.dropTolerance", - Version: "2.4.6", - Doc: "If clustering compaction job is finished for a long time, gc it", - DefaultValue: "259200", - } - p.ClusteringCompactionDropTolerance.Init(base.mgr) - p.ClusteringCompactionPreferSegmentSize = ParamItem{ Key: "dataCoord.compaction.clustering.preferSegmentSize", Version: "2.4.6", diff --git a/pkg/util/paramtable/component_param_test.go b/pkg/util/paramtable/component_param_test.go index 57710aaa8f35b..33a768962147e 100644 --- a/pkg/util/paramtable/component_param_test.go +++ b/pkg/util/paramtable/component_param_test.go @@ -456,6 +456,11 @@ func TestComponentParam(t *testing.T) { params.Save("datacoord.gracefulStopTimeout", "100") assert.Equal(t, 100*time.Second, Params.GracefulStopTimeout.GetAsDuration(time.Second)) + params.Save("dataCoord.compaction.gcInterval", "100") + assert.Equal(t, float64(100), Params.CompactionGCIntervalInSeconds.GetAsDuration(time.Second).Seconds()) + params.Save("dataCoord.compaction.dropTolerance", "100") + assert.Equal(t, float64(100), Params.CompactionDropToleranceInSeconds.GetAsDuration(time.Second).Seconds()) + params.Save("dataCoord.compaction.clustering.enable", "true") assert.Equal(t, true, Params.ClusteringCompactionEnable.GetAsBool()) params.Save("dataCoord.compaction.clustering.newDataSizeThreshold", "10") @@ -466,8 +471,6 @@ func TestComponentParam(t *testing.T) { assert.Equal(t, int64(10*1024*1024), Params.ClusteringCompactionNewDataSizeThreshold.GetAsSize()) params.Save("dataCoord.compaction.clustering.newDataSizeThreshold", "10g") assert.Equal(t, int64(10*1024*1024*1024), Params.ClusteringCompactionNewDataSizeThreshold.GetAsSize()) - params.Save("dataCoord.compaction.clustering.dropTolerance", "86400") - assert.Equal(t, int64(86400), Params.ClusteringCompactionDropTolerance.GetAsInt64()) params.Save("dataCoord.compaction.clustering.maxSegmentSize", "100m") assert.Equal(t, int64(100*1024*1024), Params.ClusteringCompactionMaxSegmentSize.GetAsSize()) params.Save("dataCoord.compaction.clustering.preferSegmentSize", "10m")