diff --git a/zetaclient/metrics/telemetry.go b/zetaclient/metrics/telemetry.go index 506945859c..7c50e9f248 100644 --- a/zetaclient/metrics/telemetry.go +++ b/zetaclient/metrics/telemetry.go @@ -31,6 +31,7 @@ type TelemetryServer struct { status types.Status ipAddress string HotKeyBurnRate *BurnRate + knownPeers []peer.AddrInfo connectedPeers []peer.AddrInfo rtt map[peer.ID]int64 } @@ -42,6 +43,7 @@ func NewTelemetryServer() *TelemetryServer { lastScannedBlockNumber: make(map[int64]uint64), lastStartTimestamp: time.Now(), HotKeyBurnRate: NewBurnRate(100), + knownPeers: make([]peer.AddrInfo, 0), connectedPeers: make([]peer.AddrInfo, 0), rtt: make(map[peer.ID]int64), } @@ -67,6 +69,18 @@ func (t *TelemetryServer) GetPingRTT() map[peer.ID]int64 { return t.rtt } +func (t *TelemetryServer) SetKnownPeers(peers []peer.AddrInfo) { + t.mu.Lock() + defer t.mu.Unlock() + t.knownPeers = peers +} + +func (t *TelemetryServer) GetKnownPeers() []peer.AddrInfo { + t.mu.Lock() + defer t.mu.Unlock() + return t.knownPeers +} + func (t *TelemetryServer) SetConnectedPeers(peers []peer.AddrInfo) { t.mu.Lock() defer t.mu.Unlock() @@ -175,6 +189,7 @@ func (t *TelemetryServer) Handlers() http.Handler { router.Handle("/ip", http.HandlerFunc(t.ipHandler)).Methods(http.MethodGet) router.Handle("/hotkeyburnrate", http.HandlerFunc(t.hotKeyFeeBurnRate)).Methods(http.MethodGet) router.Handle("/connectedpeers", http.HandlerFunc(t.connectedPeersHandler)).Methods(http.MethodGet) + router.Handle("/knownpeers", http.HandlerFunc(t.knownPeersHandler)).Methods(http.MethodGet) router.Handle("/pingrtt", http.HandlerFunc(t.pingRTTHandler)).Methods(http.MethodGet) router.Use(logMiddleware()) @@ -283,7 +298,19 @@ func (t *TelemetryServer) connectedPeersHandler(w http.ResponseWriter, _ *http.R peers := t.GetConnectedPeers() data, err := json.Marshal(peers) if err != nil { - t.logger.Error().Err(err).Msg("Failed to marshal connected peers") + t.logger.Error().Err(err).Msg("Failed to marshal known peers") + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + fmt.Fprintf(w, "%s", string(data)) +} + +func (t *TelemetryServer) knownPeersHandler(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + peers := t.GetKnownPeers() + data, err := json.Marshal(peers) + if err != nil { + t.logger.Error().Err(err).Msg("Failed to marshal known peers") http.Error(w, err.Error(), http.StatusInternalServerError) return } diff --git a/zetaclient/tss/healthcheck.go b/zetaclient/tss/healthcheck.go index ea23b17de3..36a5b02c72 100644 --- a/zetaclient/tss/healthcheck.go +++ b/zetaclient/tss/healthcheck.go @@ -11,8 +11,11 @@ import ( "github.com/libp2p/go-libp2p/p2p/protocol/ping" "github.com/prometheus/client_golang/prometheus" "github.com/rs/zerolog" + "github.com/samber/lo" "gitlab.com/thorchain/tss/go-tss/tss" + libp2p_network "github.com/libp2p/go-libp2p/core/network" + maddr "github.com/multiformats/go-multiaddr" "github.com/zeta-chain/node/pkg/bg" "github.com/zeta-chain/node/pkg/ticker" "github.com/zeta-chain/node/zetaclient/logs" @@ -24,6 +27,7 @@ type HealthcheckProps struct { Interval time.Duration WhitelistPeers []peer.ID NumConnectedPeersMetric prometheus.Gauge + NumKnownPeersMetric prometheus.Gauge } // HealthcheckWorker checks the health of the TSS server and its peers. @@ -31,6 +35,9 @@ func HealthcheckWorker(ctx context.Context, server *tss.TssServer, p Healthcheck if p.NumConnectedPeersMetric == nil { return errors.New("missing NumConnectedPeersMetric") } + if p.NumKnownPeersMetric == nil { + return errors.New("missing NumKnownPeersMetric") + } if p.Interval == 0 { p.Interval = 30 * time.Second @@ -89,16 +96,30 @@ func HealthcheckWorker(ctx context.Context, server *tss.TssServer, p Healthcheck return nil } - peersCounter := func(_ context.Context, _ *ticker.Ticker) error { + knownPeersCounter := func(_ context.Context, _ *ticker.Ticker) error { peers := server.GetKnownPeers() p.NumConnectedPeersMetric.Set(float64(len(peers))) - p.Telemetry.SetConnectedPeers(peers) + p.Telemetry.SetKnownPeers(peers) return nil } + connectedPeersCounter := func(_ context.Context, _ *ticker.Ticker) error { + p2pHost := server.GetP2PHost() + connectedPeers := lo.Map(p2pHost.Network().Conns(), func(conn libp2p_network.Conn, _ int) peer.AddrInfo { + return peer.AddrInfo{ + ID: conn.RemotePeer(), + Addrs: []maddr.Multiaddr{conn.RemoteMultiaddr()}, + } + }) + p.Telemetry.SetConnectedPeers(connectedPeers) + p.NumConnectedPeersMetric.Set(float64(len(connectedPeers))) + return nil + } + runBackgroundTicker(ctx, pinger, p.Interval, "TSSHealthcheckPeersPing", logger) - runBackgroundTicker(ctx, peersCounter, p.Interval, "TSSHealthcheckPeersCounter", logger) + runBackgroundTicker(ctx, knownPeersCounter, p.Interval, "TSSHealthcheckKnownPeersCounter", logger) + runBackgroundTicker(ctx, connectedPeersCounter, p.Interval, "TSSHealthcheckConnectedPeersCounter", logger) return nil } diff --git a/zetaclient/tss/service.go b/zetaclient/tss/service.go index 7a8391ff89..ed8a034eee 100644 --- a/zetaclient/tss/service.go +++ b/zetaclient/tss/service.go @@ -48,6 +48,7 @@ type Zetacore interface { type Telemetry interface { SetP2PID(id string) SetConnectedPeers(peers []peer.AddrInfo) + SetKnownPeers(peers []peer.AddrInfo) SetPingRTT(peers map[peer.ID]int64) }