diff --git a/cmd/roles/roles.go b/cmd/roles/roles.go index bdb43c7da6414..e886b0c82affd 100644 --- a/cmd/roles/roles.go +++ b/cmd/roles/roles.go @@ -415,6 +415,35 @@ func (mr *MilvusRoles) Run() { componentMap[typeutil.QueryCoordRole] = queryCoord } + waitCoordBecomeHealthy := func() { + for { + select { + case <-ctx.Done(): + log.Info("wait all coord become healthy loop quit") + return + default: + rcState := rootCoord.Health(ctx) + dcState := dataCoord.Health(ctx) + icState := indexCoord.Health(ctx) + qcState := queryCoord.Health(ctx) + + if rcState == commonpb.StateCode_Healthy && dcState == commonpb.StateCode_Healthy && icState == commonpb.StateCode_Healthy && qcState == commonpb.StateCode_Healthy { + log.Info("all coord become healthy") + return + } + log.Info("wait all coord become healthy", zap.String("rootCoord", rcState.String()), zap.String("dataCoord", dcState.String()), zap.String("indexCoord", icState.String()), zap.String("queryCoord", qcState.String())) + time.Sleep(time.Second) + } + } + } + + // In standalone mode, block the start process until the new coordinator is active to avoid the coexistence of the old coordinator and the new node/proxy + // 1. In the start/restart process, the new coordinator will become active immediately and will not be blocked + // 2. In the rolling upgrade process, the new coordinator will not be active until the old coordinator is down, and it will be blocked + if mr.Local { + waitCoordBecomeHealthy() + } + if mr.EnableQueryNode { queryNode = mr.runQueryNode(ctx, local, &wg) componentMap[typeutil.QueryNodeRole] = queryNode