From ed73de8c7c7cb28296b88074356274c67c550c9e Mon Sep 17 00:00:00 2001 From: skosito Date: Wed, 13 Mar 2024 11:57:05 +0000 Subject: [PATCH] refactor: change important metrics on port 8123 to be prometheus compatible (#1885) * Refactor telemetry metrics * Fix last start time metrics name * Fix help description * Changelog * Fix help desc * Fix desc * PR comment --- changelog.md | 1 + cmd/zetaclientd/start.go | 7 +- zetaclient/bitcoin/bitcoin_client.go | 5 +- zetaclient/evm/evm_client.go | 2 +- zetaclient/metrics/metrics.go | 42 ++++++++-- zetaclient/metrics/telemetry.go | 115 ++------------------------- zetaclient/zetacore_observer.go | 2 +- 7 files changed, 55 insertions(+), 119 deletions(-) diff --git a/changelog.md b/changelog.md index 1ddeb6c4ee..e94fe5602a 100644 --- a/changelog.md +++ b/changelog.md @@ -15,6 +15,7 @@ * [1783](https://github.com/zeta-chain/node/pull/1783) - refactor zetaclient metrics naming and structure * [1774](https://github.com/zeta-chain/node/pull/1774) - split params and config in zetaclient * [1831](https://github.com/zeta-chain/node/pull/1831) - removing unnecessary pointers in context structure +* [1885](https://github.com/zeta-chain/node/pull/1885) - change important metrics on port 8123 to be prometheus compatible ### Features diff --git a/cmd/zetaclientd/start.go b/cmd/zetaclientd/start.go index 3816699c78..f402b26b4d 100644 --- a/cmd/zetaclientd/start.go +++ b/cmd/zetaclientd/start.go @@ -162,12 +162,15 @@ func start(_ *cobra.Command, _ []string) error { } } - metrics, err := metrics.NewMetrics() + m, err := metrics.NewMetrics() if err != nil { log.Error().Err(err).Msg("NewMetrics") return err } - metrics.Start() + m.Start() + + metrics.Info.WithLabelValues(common.Version).Set(1) + metrics.LastStartTime.SetToCurrentTime() var tssHistoricalList []observerTypes.TSS tssHistoricalList, err = zetaBridge.GetTssHistory() diff --git a/zetaclient/bitcoin/bitcoin_client.go b/zetaclient/bitcoin/bitcoin_client.go index bb47a6eaa4..34ddc580ab 100644 --- a/zetaclient/bitcoin/bitcoin_client.go +++ b/zetaclient/bitcoin/bitcoin_client.go @@ -292,8 +292,7 @@ func (ob *BTCChainClient) SetLastBlockHeightScanned(height int64) { panic("lastBlockScanned is negative") } atomic.StoreInt64(&ob.lastBlockScanned, height) - // #nosec G701 checked as positive - ob.ts.SetLastScannedBlockNumber((ob.chain.ChainId), uint64(height)) + metrics.LastScannedBlockNumber.WithLabelValues(ob.chain.ChainName.String()).Set(float64(height)) } func (ob *BTCChainClient) GetLastBlockHeightScanned() int64 { @@ -888,7 +887,7 @@ func (ob *BTCChainClient) FetchUTXOS() error { } ob.Mu.Lock() - ob.ts.SetNumberOfUTXOs(len(utxosFiltered)) + metrics.NumberOfUTXO.Set(float64(len(utxosFiltered))) ob.utxos = utxosFiltered ob.Mu.Unlock() return nil diff --git a/zetaclient/evm/evm_client.go b/zetaclient/evm/evm_client.go index 55e3850a44..736f35254f 100644 --- a/zetaclient/evm/evm_client.go +++ b/zetaclient/evm/evm_client.go @@ -807,7 +807,7 @@ func (ob *ChainClient) CheckTxInclusion(tx *ethtypes.Transaction, receipt *ethty // SetLastBlockHeightScanned set last block height scanned (not necessarily caught up with external block; could be slow/paused) func (ob *ChainClient) SetLastBlockHeightScanned(height uint64) { atomic.StoreUint64(&ob.lastBlockScanned, height) - ob.ts.SetLastScannedBlockNumber(ob.chain.ChainId, height) + metrics.LastScannedBlockNumber.WithLabelValues(ob.chain.ChainName.String()).Set(float64(height)) } // GetLastBlockHeightScanned get last block height scanned (not necessarily caught up with external block; could be slow/paused) diff --git a/zetaclient/metrics/metrics.go b/zetaclient/metrics/metrics.go index a99db9f760..3325d21156 100644 --- a/zetaclient/metrics/metrics.go +++ b/zetaclient/metrics/metrics.go @@ -16,36 +16,68 @@ type Metrics struct { s *http.Server } +const ZetaClientNamespace = "zetaclient" + var ( PendingTxsPerChain = promauto.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: "zetaclient", + Namespace: ZetaClientNamespace, Name: "pending_txs_total", Help: "Number of pending transactions per chain", }, []string{"chain"}) GetFilterLogsPerChain = promauto.NewCounterVec(prometheus.CounterOpts{ - Namespace: "zetaclient", + Namespace: ZetaClientNamespace, Name: "rpc_getFilterLogs_count", Help: "Count of getLogs per chain", }, []string{"chain"}) GetBlockByNumberPerChain = promauto.NewCounterVec(prometheus.CounterOpts{ - Namespace: "zetaclient", + Namespace: ZetaClientNamespace, Name: "rpc_getBlockByNumber_count", Help: "Count of getLogs per chain", }, []string{"chain"}) TssNodeBlamePerPubKey = promauto.NewCounterVec(prometheus.CounterOpts{ - Namespace: "zetaclient", + Namespace: ZetaClientNamespace, Name: "tss_node_blame_count", Help: "Tss node blame counter per pubkey", }, []string{"pubkey"}) HotKeyBurnRate = promauto.NewGauge(prometheus.GaugeOpts{ - Namespace: "zetaclient", + Namespace: ZetaClientNamespace, Name: "hotkey_burn_rate", Help: "Fee burn rate of the hotkey", }) + + NumberOfUTXO = promauto.NewGauge(prometheus.GaugeOpts{ + Namespace: ZetaClientNamespace, + Name: "utxo_number", + Help: "Number of UTXOs", + }) + + LastScannedBlockNumber = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: ZetaClientNamespace, + Name: "last_scanned_block_number", + Help: "Last scanned block number per chain", + }, []string{"chain"}) + + LastCoreBlockNumber = promauto.NewGauge(prometheus.GaugeOpts{ + Namespace: ZetaClientNamespace, + Name: "last_core_block_number", + Help: "Last core block number", + }) + + Info = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: ZetaClientNamespace, + Name: "info", + Help: "Information about Zetaclient environment", + }, []string{"version"}) + + LastStartTime = promauto.NewGauge(prometheus.GaugeOpts{ + Namespace: ZetaClientNamespace, + Name: "last_start_timestamp_seconds", + Help: "Start time in Unix time", + }) ) func NewMetrics() (*Metrics, error) { diff --git a/zetaclient/metrics/telemetry.go b/zetaclient/metrics/telemetry.go index 07ca32ae35..209bf61fed 100644 --- a/zetaclient/metrics/telemetry.go +++ b/zetaclient/metrics/telemetry.go @@ -2,7 +2,6 @@ package metrics import ( "context" - "encoding/json" "errors" "fmt" "net/http" @@ -12,31 +11,23 @@ import ( "github.com/gorilla/mux" "github.com/rs/zerolog" "github.com/rs/zerolog/log" - "github.com/zeta-chain/zetacore/common" - "github.com/zeta-chain/zetacore/zetaclient/types" ) // TelemetryServer provide http endpoint for Tss server type TelemetryServer struct { - logger zerolog.Logger - s *http.Server - p2pid string - lastScannedBlockNumber map[int64]uint64 // chainid => block number - lastCoreBlockNumber int64 - mu sync.Mutex - lastStartTimestamp time.Time - status types.Status - ipAddress string - HotKeyBurnRate *BurnRate + logger zerolog.Logger + s *http.Server + p2pid string + mu sync.Mutex + ipAddress string + HotKeyBurnRate *BurnRate } // NewTelemetryServer should only listen to the loopback func NewTelemetryServer() *TelemetryServer { hs := &TelemetryServer{ - logger: log.With().Str("module", "http").Logger(), - lastScannedBlockNumber: make(map[int64]uint64), - lastStartTimestamp: time.Now(), - HotKeyBurnRate: NewBurnRate(100), + logger: log.With().Str("module", "http").Logger(), + HotKeyBurnRate: NewBurnRate(100), } s := &http.Server{ Addr: ":8123", @@ -48,12 +39,6 @@ func NewTelemetryServer() *TelemetryServer { return hs } -func (t *TelemetryServer) GetLastStartTimestamp() time.Time { - t.mu.Lock() - defer t.mu.Unlock() - return t.lastStartTimestamp -} - // setter/getter for p2pid func (t *TelemetryServer) SetP2PID(p2pid string) { t.mu.Lock() @@ -80,37 +65,6 @@ func (t *TelemetryServer) GetIPAddress() string { return t.ipAddress } -// setter for lastScanned block number -func (t *TelemetryServer) SetLastScannedBlockNumber(chainID int64, blockNumber uint64) { - t.mu.Lock() - t.lastScannedBlockNumber[chainID] = blockNumber - t.mu.Unlock() -} - -func (t *TelemetryServer) GetLastScannedBlockNumber(chainID int64) uint64 { - t.mu.Lock() - defer t.mu.Unlock() - return t.lastScannedBlockNumber[chainID] -} - -func (t *TelemetryServer) SetCoreBlockNumber(blockNumber int64) { - t.mu.Lock() - t.lastCoreBlockNumber = blockNumber - t.mu.Unlock() -} - -func (t *TelemetryServer) GetCoreBlockNumber() int64 { - t.mu.Lock() - defer t.mu.Unlock() - return t.lastCoreBlockNumber -} - -func (t *TelemetryServer) SetNumberOfUTXOs(numberOfUTXOs int) { - t.mu.Lock() - t.status.BTCNumberOfUTXOs = numberOfUTXOs - t.mu.Unlock() -} - func (t *TelemetryServer) AddFeeEntry(block int64, amount int64) { t.mu.Lock() err := t.HotKeyBurnRate.AddFee(amount, block) @@ -125,11 +79,6 @@ func (t *TelemetryServer) Handlers() http.Handler { router := mux.NewRouter() router.Handle("/ping", http.HandlerFunc(t.pingHandler)).Methods(http.MethodGet) router.Handle("/p2p", http.HandlerFunc(t.p2pHandler)).Methods(http.MethodGet) - router.Handle("/version", http.HandlerFunc(t.versionHandler)).Methods(http.MethodGet) - router.Handle("/lastscannedblock", http.HandlerFunc(t.lastScannedBlockHandler)).Methods(http.MethodGet) - router.Handle("/laststarttimestamp", http.HandlerFunc(t.lastStartTimestampHandler)).Methods(http.MethodGet) - router.Handle("/lastcoreblock", http.HandlerFunc(t.lastCoreBlockHandler)).Methods(http.MethodGet) - router.Handle("/status", http.HandlerFunc(t.statusHandler)).Methods(http.MethodGet) router.Handle("/ip", http.HandlerFunc(t.ipHandler)).Methods(http.MethodGet) router.Handle("/hotkeyburnrate", http.HandlerFunc(t.hotKeyFeeBurnRate)).Methods(http.MethodGet) @@ -198,54 +147,6 @@ func (t *TelemetryServer) ipHandler(w http.ResponseWriter, _ *http.Request) { fmt.Fprintf(w, "%s", t.ipAddress) } -func (t *TelemetryServer) lastScannedBlockHandler(w http.ResponseWriter, _ *http.Request) { - //w.WriteHeader(http.StatusOK) - w.Header().Set("Content-Type", "application/json") - - t.mu.Lock() - defer t.mu.Unlock() - // Convert map to JSON - jsonBytes, err := json.Marshal(t.lastScannedBlockNumber) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - _, err = w.Write(jsonBytes) - if err != nil { - t.logger.Error().Err(err).Msg("Failed to write response") - } -} - -func (t *TelemetryServer) lastCoreBlockHandler(w http.ResponseWriter, _ *http.Request) { - w.WriteHeader(http.StatusOK) - t.mu.Lock() - defer t.mu.Unlock() - fmt.Fprintf(w, "%d", t.lastCoreBlockNumber) -} - -func (t *TelemetryServer) statusHandler(w http.ResponseWriter, _ *http.Request) { - w.WriteHeader(http.StatusOK) - t.mu.Lock() - defer t.mu.Unlock() - s, err := json.MarshalIndent(t.status, "", "\t") - if err != nil { - t.logger.Error().Err(err).Msg("Failed to marshal status") - } - fmt.Fprintf(w, "%s", s) -} - -func (t *TelemetryServer) versionHandler(w http.ResponseWriter, _ *http.Request) { - w.WriteHeader(http.StatusOK) - fmt.Fprintf(w, "%s", common.Version) -} - -func (t *TelemetryServer) lastStartTimestampHandler(w http.ResponseWriter, _ *http.Request) { - w.WriteHeader(http.StatusOK) - t.mu.Lock() - defer t.mu.Unlock() - fmt.Fprintf(w, "%s", t.lastStartTimestamp.Format(time.RFC3339)) -} - func (t *TelemetryServer) hotKeyFeeBurnRate(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) t.mu.Lock() diff --git a/zetaclient/zetacore_observer.go b/zetaclient/zetacore_observer.go index 6d3320c746..429c209287 100644 --- a/zetaclient/zetacore_observer.go +++ b/zetaclient/zetacore_observer.go @@ -168,7 +168,7 @@ func (co *CoreObserver) startCctxScheduler(appContext *appcontext.AppContext) { } // update last processed block number lastBlockNum = bn - co.ts.SetCoreBlockNumber(lastBlockNum) + metrics.LastCoreBlockNumber.Set(float64(lastBlockNum)) } } }