From d3bf7a2d2702ce812f036812d0ca0ea60cb37766 Mon Sep 17 00:00:00 2001 From: SimFG Date: Mon, 9 Sep 2024 10:11:06 +0800 Subject: [PATCH] fix: [2.4] delay to start the metric server port (#36085) - issue: #36083 - pr: #36080 /kind improvement Signed-off-by: SimFG --- cmd/roles/roles.go | 16 ++++++++++++++++ internal/http/healthz/healthz_handler.go | 17 ++++++++++++++--- internal/http/server_test.go | 7 ++++++- 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/cmd/roles/roles.go b/cmd/roles/roles.go index 02e8138b5fa48..8445971f3be42 100644 --- a/cmd/roles/roles.go +++ b/cmd/roles/roles.go @@ -30,6 +30,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" + "github.com/samber/lo" "go.uber.org/zap" "go.uber.org/zap/zapcore" @@ -371,6 +372,21 @@ func (mr *MilvusRoles) Run() { paramtable.SetRole(mr.ServerType) } + enableComponents := []bool{ + mr.EnableRootCoord, + mr.EnableProxy, + mr.EnableQueryCoord, + mr.EnableQueryNode, + mr.EnableDataCoord, + mr.EnableDataNode, + mr.EnableIndexCoord, + mr.EnableIndexNode, + } + enableComponents = lo.Filter(enableComponents, func(v bool, _ int) bool { + return v + }) + healthz.SetComponentNum(len(enableComponents)) + expr.Init() expr.Register("param", paramtable.Get()) mr.setupLogger() diff --git a/internal/http/healthz/healthz_handler.go b/internal/http/healthz/healthz_handler.go index 3848eb4c4402a..35679623116c8 100644 --- a/internal/http/healthz/healthz_handler.go +++ b/internal/http/healthz/healthz_handler.go @@ -52,7 +52,8 @@ type HealthResponse struct { } type HealthHandler struct { - indicators []Indicator + indicators []Indicator + indicatorNum int // unregister role when call stop by restful api unregisterLock sync.RWMutex @@ -67,6 +68,10 @@ func Register(indicator Indicator) { defaultHandler.indicators = append(defaultHandler.indicators, indicator) } +func SetComponentNum(num int) { + defaultHandler.indicatorNum = num +} + func UnRegister(role string) { defaultHandler.unregisterLock.Lock() defer defaultHandler.unregisterLock.Unlock() @@ -86,11 +91,13 @@ func (handler *HealthHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) State: "OK", } ctx := context.Background() + healthNum := 0 for _, in := range handler.indicators { handler.unregisterLock.RLock() _, unregistered := handler.unregisteredRoles[in.GetName()] handler.unregisterLock.RUnlock() if unregistered { + healthNum++ continue } code := in.Health(ctx) @@ -98,11 +105,15 @@ func (handler *HealthHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) Name: in.GetName(), Code: code, }) - if code != commonpb.StateCode_Healthy && code != commonpb.StateCode_StandBy { - resp.State = fmt.Sprintf("component %s state is %s", in.GetName(), code.String()) + if code == commonpb.StateCode_Healthy || code == commonpb.StateCode_StandBy { + healthNum++ } } + if healthNum != handler.indicatorNum { + resp.State = fmt.Sprintf("Not all components are healthy, %d/%d", healthNum, handler.indicatorNum) + } + if resp.State == "OK" { w.WriteHeader(http.StatusOK) } else { diff --git a/internal/http/server_test.go b/internal/http/server_test.go index d68a38d2d4b1f..d243bf8ac9abf 100644 --- a/internal/http/server_test.go +++ b/internal/http/server_test.go @@ -101,6 +101,7 @@ func (suite *HTTPServerTestSuite) TestHealthzHandler() { url := "http://localhost:" + DefaultListenPort + "/healthz" client := http.Client{} + healthz.SetComponentNum(1) healthz.Register(&MockIndicator{"m1", commonpb.StateCode_Healthy}) req, _ := http.NewRequest(http.MethodGet, url, nil) @@ -118,6 +119,7 @@ func (suite *HTTPServerTestSuite) TestHealthzHandler() { body, _ = io.ReadAll(resp.Body) suite.Equal("{\"state\":\"OK\",\"detail\":[{\"name\":\"m1\",\"code\":1}]}", string(body)) + healthz.SetComponentNum(2) healthz.Register(&MockIndicator{"m2", commonpb.StateCode_Abnormal}) req, _ = http.NewRequest(http.MethodGet, url, nil) req.Header.Set("Content-Type", "application/json") @@ -125,7 +127,10 @@ func (suite *HTTPServerTestSuite) TestHealthzHandler() { suite.Nil(err) defer resp.Body.Close() body, _ = io.ReadAll(resp.Body) - suite.Equal("{\"state\":\"component m2 state is Abnormal\",\"detail\":[{\"name\":\"m1\",\"code\":1},{\"name\":\"m2\",\"code\":2}]}", string(body)) + respObj := &healthz.HealthResponse{} + err = json.Unmarshal(body, respObj) + suite.NoError(err) + suite.NotEqual("OK", respObj.State) } func (suite *HTTPServerTestSuite) TestEventlogHandler() {