From cfa1f1f141aa2f1e7ae57882711becc0bce4864e Mon Sep 17 00:00:00 2001 From: congqixia Date: Fri, 22 Nov 2024 10:20:33 +0800 Subject: [PATCH] enhance: Add thread watcher to provide actual thread num (#37905) Related to #37904 --------- Signed-off-by: Congqi Xia --- cmd/roles/roles.go | 5 ++ go.mod | 10 ++-- go.sum | 19 +++++-- internal/util/metrics/thread.go | 87 +++++++++++++++++++++++++++++++++ pkg/metrics/info_metrics.go | 8 +++ pkg/metrics/metrics.go | 1 + 6 files changed, 121 insertions(+), 9 deletions(-) create mode 100644 internal/util/metrics/thread.go diff --git a/cmd/roles/roles.go b/cmd/roles/roles.go index ff0c498c0369f..4698ede3259ad 100644 --- a/cmd/roles/roles.go +++ b/cmd/roles/roles.go @@ -347,6 +347,11 @@ func (mr *MilvusRoles) Run() { mr.printLDPreLoad() + // start milvus thread watcher to update actual thread number metrics + thw := internalmetrics.NewThreadWatcher() + thw.Start() + defer thw.Stop() + // only standalone enable localMsg if mr.Local { if err := os.Setenv(metricsinfo.DeployModeEnvKey, metricsinfo.StandaloneDeployMode); err != nil { diff --git a/go.mod b/go.mod index 63da99beea9a6..b53b281c0bcc8 100644 --- a/go.mod +++ b/go.mod @@ -70,6 +70,7 @@ require ( github.com/milvus-io/milvus/pkg v0.0.2-0.20240801085213-a642a26ed4c6 github.com/pkg/errors v0.9.1 github.com/remeh/sizedwaitgroup v1.0.0 + github.com/shirou/gopsutil/v4 v4.24.10 github.com/tidwall/gjson v1.17.1 github.com/valyala/fastjson v1.6.4 github.com/zeebo/xxh3 v1.0.2 @@ -119,6 +120,7 @@ require ( github.com/docker/go-units v0.4.0 // indirect github.com/dustin/go-humanize v1.0.1 // indirect github.com/dvsekhvalnov/jose2go v1.6.0 // indirect + github.com/ebitengine/purego v0.8.1 // indirect github.com/expr-lang/expr v1.15.7 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/form3tech-oss/jwt-go v3.2.3+incompatible // indirect @@ -216,8 +218,8 @@ require ( github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/pretty v1.2.0 // indirect github.com/tikv/pd/client v0.0.0-20221031025758-80f0d8ca4d07 // indirect - github.com/tklauser/go-sysconf v0.3.10 // indirect - github.com/tklauser/numcpus v0.4.0 // indirect + github.com/tklauser/go-sysconf v0.3.12 // indirect + github.com/tklauser/numcpus v0.6.1 // indirect github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect github.com/twmb/murmur3 v1.1.3 // indirect @@ -225,7 +227,7 @@ require ( github.com/ugorji/go/codec v1.2.11 // indirect github.com/x448/float16 v0.8.4 // indirect github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 // indirect - github.com/yusufpapurcu/wmi v1.2.2 // indirect + github.com/yusufpapurcu/wmi v1.2.4 // indirect go.etcd.io/bbolt v1.3.6 // indirect go.etcd.io/etcd/client/pkg/v3 v3.5.5 // indirect go.etcd.io/etcd/client/v2 v2.305.5 // indirect @@ -244,7 +246,7 @@ require ( go.uber.org/automaxprocs v1.5.3 // indirect golang.org/x/arch v0.3.0 // indirect golang.org/x/mod v0.17.0 // indirect - golang.org/x/sys v0.22.0 // indirect + golang.org/x/sys v0.26.0 // indirect golang.org/x/term v0.22.0 // indirect golang.org/x/time v0.5.0 // indirect golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect diff --git a/go.sum b/go.sum index 8542171ec5aaa..5ce59aedb4722 100644 --- a/go.sum +++ b/go.sum @@ -231,6 +231,8 @@ github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkp github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/dvsekhvalnov/jose2go v1.6.0 h1:Y9gnSnP4qEI0+/uQkHvFXeD2PLPJeXEL+ySMEA2EjTY= github.com/dvsekhvalnov/jose2go v1.6.0/go.mod h1:QsHjhyTlD/lAVqn/NSbVZmSCGeDehTB/mPZadG+mhXU= +github.com/ebitengine/purego v0.8.1 h1:sdRKd6plj7KYW33EH5As6YKfe8m9zbN9JMrOjNVF/BE= +github.com/ebitengine/purego v0.8.1/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ= github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385/go.mod h1:0vRUJqYpeSZifjYj7uP3BG/gKcuzL9xWVV/Y+cK33KM= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= @@ -806,6 +808,8 @@ github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAm github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= github.com/shirou/gopsutil/v3 v3.22.9 h1:yibtJhIVEMcdw+tCTbOPiF1VcsuDeTE4utJ8Dm4c5eA= github.com/shirou/gopsutil/v3 v3.22.9/go.mod h1:bBYl1kjgEJpWpxeHmLI+dVHWtyAwfcmSBLDsp2TNT8A= +github.com/shirou/gopsutil/v4 v4.24.10 h1:7VOzPtfw/5YDU+jLEoBwXwxJbQetULywoSV4RYY7HkM= +github.com/shirou/gopsutil/v4 v4.24.10/go.mod h1:s4D/wg+ag4rG0WO7AiTj2BeYCRhym0vM7DHbZRxnIT8= github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= @@ -883,10 +887,12 @@ github.com/tikv/client-go/v2 v2.0.4 h1:cPtMXTExqjzk8L40qhrgB/mXiBXKP5LRU0vwjtI2X github.com/tikv/client-go/v2 v2.0.4/go.mod h1:v52O5zDtv2BBus4lm5yrSQhxGW4Z4RaXWfg0U1Kuyqo= github.com/tikv/pd/client v0.0.0-20221031025758-80f0d8ca4d07 h1:ckPpxKcl75mO2N6a4cJXiZH43hvcHPpqc9dh1TmH1nc= github.com/tikv/pd/client v0.0.0-20221031025758-80f0d8ca4d07/go.mod h1:CipBxPfxPUME+BImx9MUYXCnAVLS3VJUr3mnSJwh40A= -github.com/tklauser/go-sysconf v0.3.10 h1:IJ1AZGZRWbY8T5Vfk04D9WOA5WSejdflXxP03OUqALw= github.com/tklauser/go-sysconf v0.3.10/go.mod h1:C8XykCvCb+Gn0oNCWPIlcb0RuglQTYaQ2hGm7jmxEFk= -github.com/tklauser/numcpus v0.4.0 h1:E53Dm1HjH1/R2/aoCtXtPgzmElmn51aOkhCFSuZq//o= +github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU= +github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI= github.com/tklauser/numcpus v0.4.0/go.mod h1:1+UI3pD8NW14VMwdgJNJ1ESk2UnwhAnz5hMwiKKqXCQ= +github.com/tklauser/numcpus v0.6.1 h1:ng9scYS7az0Bk4OZLvrNXNSAO2Pxr1XXRAPyjhIx+Fk= +github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY= github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802 h1:uruHq4dN7GR16kFc5fp3d1RIYzJW5onx8Ybykw2YQFA= github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= @@ -929,8 +935,9 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= -github.com/yusufpapurcu/wmi v1.2.2 h1:KBNDSne4vP5mbSWnJbO+51IMOXJB67QiYCSBrubbPRg= github.com/yusufpapurcu/wmi v1.2.2/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= +github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0= +github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= @@ -1241,8 +1248,10 @@ golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= -golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= +golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.22.0 h1:BbsgPEJULsl2fV/AT3v15Mjva5yXKQDyKf+TbDz7QJk= golang.org/x/term v0.22.0/go.mod h1:F3qCibpT5AMpCRfhfT53vVJwhLtIVHhB9XDjfFvnMI4= diff --git a/internal/util/metrics/thread.go b/internal/util/metrics/thread.go new file mode 100644 index 0000000000000..9e3489fa078c8 --- /dev/null +++ b/internal/util/metrics/thread.go @@ -0,0 +1,87 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package metrics + +import ( + "os" + "sync" + "time" + + "github.com/shirou/gopsutil/v4/process" + "go.uber.org/zap" + + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/metrics" +) + +// theadWatcher is the utility to update milvus process thread number metrics. +// the os thread number metrics is not accurate since it only returns thread number used by golang "normal" runtime +// and the crucial threads number in cpp side is not included. +type threadWatcher struct { + startOnce sync.Once + stopOnce sync.Once + wg sync.WaitGroup + ch chan struct{} +} + +func NewThreadWatcher() *threadWatcher { + return &threadWatcher{ + ch: make(chan struct{}), + } +} + +func (thw *threadWatcher) Start() { + thw.startOnce.Do(func() { + thw.wg.Add(1) + go func() { + defer thw.wg.Done() + thw.watchThreadNum() + }() + }) +} + +func (thw *threadWatcher) watchThreadNum() { + ticker := time.NewTicker(time.Second * 30) + defer ticker.Stop() + pid := os.Getpid() + p, err := process.NewProcess(int32(pid)) + if err != nil { + log.Warn("thread watcher failed to get milvus process info, quit", zap.Int("pid", pid), zap.Error(err)) + return + } + for { + select { + case <-ticker.C: + threadNum, err := p.NumThreads() + if err != nil { + log.Warn("thread watcher failed to get process", zap.Int("pid", pid), zap.Error(err)) + continue + } + log.Debug("thread watcher observe thread num", zap.Int32("threadNum", threadNum)) + metrics.ThreadNum.Set(float64(threadNum)) + case <-thw.ch: + log.Info("thread watcher exit") + } + } +} + +func (thw *threadWatcher) Stop() { + thw.stopOnce.Do(func() { + close(thw.ch) + thw.wg.Wait() + }) +} diff --git a/pkg/metrics/info_metrics.go b/pkg/metrics/info_metrics.go index 9fbd5790928fb..efe84e96f0d5e 100644 --- a/pkg/metrics/info_metrics.go +++ b/pkg/metrics/info_metrics.go @@ -35,6 +35,14 @@ var ( "meta", }, ) + + ThreadNum = prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: milvusNamespace, + Name: "thread_num", + Help: "the actual thread number of milvus process", + }, + ) ) // RegisterMQType registers the type of mq diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index 1f7a1ae64841c..0c1a1e6da4e14 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -181,5 +181,6 @@ func Register(r prometheus.Registerer) { r.MustRegister(LockCosts) r.MustRegister(BuildInfo) r.MustRegister(RuntimeInfo) + r.MustRegister(ThreadNum) metricRegisterer = r }