Skip to content

Commit

Permalink
enhance: avoid the coexistence of the old coordinator and the new nod…
Browse files Browse the repository at this point in the history
…e/proxy (#35760)

issue: #35719
pr: #35720 
In standalone mode, block the start process until the new coordinator is
active to avoid the coexistence of the old coordinator and the new
node/proxy
1. In the start/restart process, the new coordinator will become active
immediately and will not be blocked
2. In the rolling upgrade process, the new coordinator will not be
active until the old coordinator is down, and it will be blocked

Signed-off-by: Wei Liu <[email protected]>
  • Loading branch information
weiliu1031 authored Aug 29, 2024
1 parent 8928c9d commit 50ec3dd
Showing 1 changed file with 29 additions and 0 deletions.
29 changes: 29 additions & 0 deletions cmd/roles/roles.go
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,35 @@ func (mr *MilvusRoles) Run() {
componentMap[typeutil.QueryCoordRole] = queryCoord
}

waitCoordBecomeHealthy := func() {
for {
select {
case <-ctx.Done():
log.Info("wait all coord become healthy loop quit")
return
default:
rcState := rootCoord.Health(ctx)
dcState := dataCoord.Health(ctx)
icState := indexCoord.Health(ctx)
qcState := queryCoord.Health(ctx)

if rcState == commonpb.StateCode_Healthy && dcState == commonpb.StateCode_Healthy && icState == commonpb.StateCode_Healthy && qcState == commonpb.StateCode_Healthy {
log.Info("all coord become healthy")
return
}
log.Info("wait all coord become healthy", zap.String("rootCoord", rcState.String()), zap.String("dataCoord", dcState.String()), zap.String("indexCoord", icState.String()), zap.String("queryCoord", qcState.String()))
time.Sleep(time.Second)
}
}
}

// In standalone mode, block the start process until the new coordinator is active to avoid the coexistence of the old coordinator and the new node/proxy
// 1. In the start/restart process, the new coordinator will become active immediately and will not be blocked
// 2. In the rolling upgrade process, the new coordinator will not be active until the old coordinator is down, and it will be blocked
if mr.Local {
waitCoordBecomeHealthy()
}

if mr.EnableQueryNode {
queryNode = mr.runQueryNode(ctx, local, &wg)
componentMap[typeutil.QueryNodeRole] = queryNode
Expand Down

0 comments on commit 50ec3dd

Please sign in to comment.