From 50ec3dd4db96820f151582ca32f6909ba6de2ffe Mon Sep 17 00:00:00 2001 From: wei liu Date: Thu, 29 Aug 2024 12:33:01 +0800 Subject: [PATCH] enhance: avoid the coexistence of the old coordinator and the new node/proxy (#35760) issue: #35719 pr: #35720 In standalone mode, block the start process until the new coordinator is active to avoid the coexistence of the old coordinator and the new node/proxy 1. In the start/restart process, the new coordinator will become active immediately and will not be blocked 2. In the rolling upgrade process, the new coordinator will not be active until the old coordinator is down, and it will be blocked Signed-off-by: Wei Liu --- cmd/roles/roles.go | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/cmd/roles/roles.go b/cmd/roles/roles.go index bdb43c7da6414..e886b0c82affd 100644 --- a/cmd/roles/roles.go +++ b/cmd/roles/roles.go @@ -415,6 +415,35 @@ func (mr *MilvusRoles) Run() { componentMap[typeutil.QueryCoordRole] = queryCoord } + waitCoordBecomeHealthy := func() { + for { + select { + case <-ctx.Done(): + log.Info("wait all coord become healthy loop quit") + return + default: + rcState := rootCoord.Health(ctx) + dcState := dataCoord.Health(ctx) + icState := indexCoord.Health(ctx) + qcState := queryCoord.Health(ctx) + + if rcState == commonpb.StateCode_Healthy && dcState == commonpb.StateCode_Healthy && icState == commonpb.StateCode_Healthy && qcState == commonpb.StateCode_Healthy { + log.Info("all coord become healthy") + return + } + log.Info("wait all coord become healthy", zap.String("rootCoord", rcState.String()), zap.String("dataCoord", dcState.String()), zap.String("indexCoord", icState.String()), zap.String("queryCoord", qcState.String())) + time.Sleep(time.Second) + } + } + } + + // In standalone mode, block the start process until the new coordinator is active to avoid the coexistence of the old coordinator and the new node/proxy + // 1. In the start/restart process, the new coordinator will become active immediately and will not be blocked + // 2. In the rolling upgrade process, the new coordinator will not be active until the old coordinator is down, and it will be blocked + if mr.Local { + waitCoordBecomeHealthy() + } + if mr.EnableQueryNode { queryNode = mr.runQueryNode(ctx, local, &wg) componentMap[typeutil.QueryNodeRole] = queryNode