From 5b4f16e6e61c6df499e78f82049337c7a93bcd5f Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Wed, 13 Nov 2024 12:51:27 +0800 Subject: [PATCH] fix: Balance channel may stuck at increasing replica number case cause balance channel will wait until new delegator becomes serviceable, but new delegator need to sync target version then becomes serviceable, and sync target version need to be wait all replica load done. so if increasing replica number and balance channel happens at same time, logic dead lock occurs. Signed-off-by: Wei Liu --- internal/querycoordv2/observers/target_observer.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/internal/querycoordv2/observers/target_observer.go b/internal/querycoordv2/observers/target_observer.go index c29625c6b56f6..cf63aa99076f6 100644 --- a/internal/querycoordv2/observers/target_observer.go +++ b/internal/querycoordv2/observers/target_observer.go @@ -388,9 +388,8 @@ func (ob *TargetObserver) shouldUpdateCurrentTarget(ctx context.Context, collect }) collectionReadyLeaders = append(collectionReadyLeaders, channelReadyLeaders...) - nodes := lo.Map(channelReadyLeaders, func(view *meta.LeaderView, _ int) int64 { return view.ID }) - group := utils.GroupNodesByReplica(ob.meta.ReplicaManager, collectionID, nodes) - if int32(len(group)) < replicaNum { + // to avoid stuck here in dynamic increase replica case, we just check available delegator number + if int32(len(collectionReadyLeaders)) < replicaNum { log.RatedInfo(10, "channel not ready", zap.Int("readyReplicaNum", len(channelReadyLeaders)), zap.String("channelName", channel),