Skip to content

Commit

Permalink
fix: Skip unnecessary query node health check in proxy (#36491)
Browse files Browse the repository at this point in the history
issue: #36490
After the query node changes from a delegator to a worker, proxy should
skip this querynode's health check.

Signed-off-by: Wei Liu <[email protected]>
  • Loading branch information
weiliu1031 authored Sep 26, 2024
1 parent 55be814 commit c056620
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 7 deletions.
1 change: 1 addition & 0 deletions internal/proxy/look_aside_balancer.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ func (b *LookAsideBalancer) checkQueryNodeHealthLoop(ctx context.Context) {
qn, err := b.clientMgr.GetClient(ctx, node)
if err != nil {
// get client from clientMgr failed, which means this qn isn't a shard leader anymore, skip it's health check
b.trySetQueryNodeUnReachable(node, err)
log.RatedInfo(10, "get client failed", zap.Int64("node", node), zap.Error(err))
return struct{}{}, nil
}
Expand Down
11 changes: 4 additions & 7 deletions internal/proxy/look_aside_balancer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ import (
"github.com/cockroachdb/errors"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/suite"
"go.uber.org/atomic"

"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
Expand Down Expand Up @@ -344,12 +343,10 @@ func (suite *LookAsideBalancerSuite) TestGetClientFailed() {
// test get shard client from client mgr return nil
suite.clientMgr.ExpectedCalls = nil
suite.clientMgr.EXPECT().GetClient(mock.Anything, int64(2)).Return(nil, errors.New("shard client not found"))
failCounter := atomic.NewInt64(0)
suite.balancer.failedHeartBeatCounter.Insert(2, failCounter)

// slepp 10s, wait for checkNodeHealth execute for more than one round
time.Sleep(10 * time.Second)
suite.True(failCounter.Load() == 0)
// expected stopping the health check after failure times reaching the limit
suite.Eventually(func() bool {
return !suite.balancer.metricsMap.Contain(2)
}, 30*time.Second, 1*time.Second)
}

func (suite *LookAsideBalancerSuite) TestNodeRecover() {
Expand Down

0 comments on commit c056620

Please sign in to comment.