Skip to content

Commit

Permalink
fix(zetaclient): distinguish between known and connected peers
Browse files Browse the repository at this point in the history
  • Loading branch information
gartnera committed Nov 25, 2024
1 parent cfcf706 commit c7bbd4a
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 4 deletions.
29 changes: 28 additions & 1 deletion zetaclient/metrics/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ type TelemetryServer struct {
status types.Status
ipAddress string
HotKeyBurnRate *BurnRate
knownPeers []peer.AddrInfo
connectedPeers []peer.AddrInfo
rtt map[peer.ID]int64
}
Expand All @@ -42,6 +43,7 @@ func NewTelemetryServer() *TelemetryServer {
lastScannedBlockNumber: make(map[int64]uint64),
lastStartTimestamp: time.Now(),
HotKeyBurnRate: NewBurnRate(100),
knownPeers: make([]peer.AddrInfo, 0),

Check warning on line 46 in zetaclient/metrics/telemetry.go

View check run for this annotation

Codecov / codecov/patch

zetaclient/metrics/telemetry.go#L46

Added line #L46 was not covered by tests
connectedPeers: make([]peer.AddrInfo, 0),
rtt: make(map[peer.ID]int64),
}
Expand All @@ -67,6 +69,18 @@ func (t *TelemetryServer) GetPingRTT() map[peer.ID]int64 {
return t.rtt
}

func (t *TelemetryServer) SetKnownPeers(peers []peer.AddrInfo) {
t.mu.Lock()
defer t.mu.Unlock()
t.knownPeers = peers

Check warning on line 75 in zetaclient/metrics/telemetry.go

View check run for this annotation

Codecov / codecov/patch

zetaclient/metrics/telemetry.go#L72-L75

Added lines #L72 - L75 were not covered by tests
}

func (t *TelemetryServer) GetKnownPeers() []peer.AddrInfo {
t.mu.Lock()
defer t.mu.Unlock()
return t.knownPeers

Check warning on line 81 in zetaclient/metrics/telemetry.go

View check run for this annotation

Codecov / codecov/patch

zetaclient/metrics/telemetry.go#L78-L81

Added lines #L78 - L81 were not covered by tests
}

func (t *TelemetryServer) SetConnectedPeers(peers []peer.AddrInfo) {
t.mu.Lock()
defer t.mu.Unlock()
Expand Down Expand Up @@ -175,6 +189,7 @@ func (t *TelemetryServer) Handlers() http.Handler {
router.Handle("/ip", http.HandlerFunc(t.ipHandler)).Methods(http.MethodGet)
router.Handle("/hotkeyburnrate", http.HandlerFunc(t.hotKeyFeeBurnRate)).Methods(http.MethodGet)
router.Handle("/connectedpeers", http.HandlerFunc(t.connectedPeersHandler)).Methods(http.MethodGet)
router.Handle("/knownpeers", http.HandlerFunc(t.knownPeersHandler)).Methods(http.MethodGet)

Check warning on line 192 in zetaclient/metrics/telemetry.go

View check run for this annotation

Codecov / codecov/patch

zetaclient/metrics/telemetry.go#L192

Added line #L192 was not covered by tests
router.Handle("/pingrtt", http.HandlerFunc(t.pingRTTHandler)).Methods(http.MethodGet)
router.Use(logMiddleware())

Expand Down Expand Up @@ -283,7 +298,19 @@ func (t *TelemetryServer) connectedPeersHandler(w http.ResponseWriter, _ *http.R
peers := t.GetConnectedPeers()
data, err := json.Marshal(peers)
if err != nil {
t.logger.Error().Err(err).Msg("Failed to marshal connected peers")
t.logger.Error().Err(err).Msg("Failed to marshal known peers")
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
fmt.Fprintf(w, "%s", string(data))

Check warning on line 305 in zetaclient/metrics/telemetry.go

View check run for this annotation

Codecov / codecov/patch

zetaclient/metrics/telemetry.go#L301-L305

Added lines #L301 - L305 were not covered by tests
}

func (t *TelemetryServer) knownPeersHandler(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusOK)
peers := t.GetKnownPeers()
data, err := json.Marshal(peers)
if err != nil {
t.logger.Error().Err(err).Msg("Failed to marshal known peers")

Check warning on line 313 in zetaclient/metrics/telemetry.go

View check run for this annotation

Codecov / codecov/patch

zetaclient/metrics/telemetry.go#L308-L313

Added lines #L308 - L313 were not covered by tests
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
Expand Down
27 changes: 24 additions & 3 deletions zetaclient/tss/healthcheck.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,11 @@ import (
"github.com/libp2p/go-libp2p/p2p/protocol/ping"
"github.com/prometheus/client_golang/prometheus"
"github.com/rs/zerolog"
"github.com/samber/lo"
"gitlab.com/thorchain/tss/go-tss/tss"

libp2p_network "github.com/libp2p/go-libp2p/core/network"
maddr "github.com/multiformats/go-multiaddr"
"github.com/zeta-chain/node/pkg/bg"
"github.com/zeta-chain/node/pkg/ticker"
"github.com/zeta-chain/node/zetaclient/logs"
Expand All @@ -24,13 +27,17 @@ type HealthcheckProps struct {
Interval time.Duration
WhitelistPeers []peer.ID
NumConnectedPeersMetric prometheus.Gauge
NumKnownPeersMetric prometheus.Gauge
}

// HealthcheckWorker checks the health of the TSS server and its peers.
func HealthcheckWorker(ctx context.Context, server *tss.TssServer, p HealthcheckProps, logger zerolog.Logger) error {
if p.NumConnectedPeersMetric == nil {
return errors.New("missing NumConnectedPeersMetric")
}
if p.NumKnownPeersMetric == nil {
return errors.New("missing NumKnownPeersMetric")
}

Check warning on line 40 in zetaclient/tss/healthcheck.go

View check run for this annotation

Codecov / codecov/patch

zetaclient/tss/healthcheck.go#L38-L40

Added lines #L38 - L40 were not covered by tests

if p.Interval == 0 {
p.Interval = 30 * time.Second
Expand Down Expand Up @@ -89,16 +96,30 @@ func HealthcheckWorker(ctx context.Context, server *tss.TssServer, p Healthcheck
return nil
}

peersCounter := func(_ context.Context, _ *ticker.Ticker) error {
knownPeersCounter := func(_ context.Context, _ *ticker.Ticker) error {

Check warning on line 99 in zetaclient/tss/healthcheck.go

View check run for this annotation

Codecov / codecov/patch

zetaclient/tss/healthcheck.go#L99

Added line #L99 was not covered by tests
peers := server.GetKnownPeers()
p.NumConnectedPeersMetric.Set(float64(len(peers)))
p.Telemetry.SetConnectedPeers(peers)
p.Telemetry.SetKnownPeers(peers)

Check warning on line 102 in zetaclient/tss/healthcheck.go

View check run for this annotation

Codecov / codecov/patch

zetaclient/tss/healthcheck.go#L102

Added line #L102 was not covered by tests

return nil
}

connectedPeersCounter := func(_ context.Context, _ *ticker.Ticker) error {
p2pHost := server.GetP2PHost()
connectedPeers := lo.Map(p2pHost.Network().Conns(), func(conn libp2p_network.Conn, _ int) peer.AddrInfo {
return peer.AddrInfo{
ID: conn.RemotePeer(),
Addrs: []maddr.Multiaddr{conn.RemoteMultiaddr()},
}
})
p.Telemetry.SetConnectedPeers(connectedPeers)
p.NumConnectedPeersMetric.Set(float64(len(connectedPeers)))
return nil

Check warning on line 117 in zetaclient/tss/healthcheck.go

View check run for this annotation

Codecov / codecov/patch

zetaclient/tss/healthcheck.go#L107-L117

Added lines #L107 - L117 were not covered by tests
}

runBackgroundTicker(ctx, pinger, p.Interval, "TSSHealthcheckPeersPing", logger)
runBackgroundTicker(ctx, peersCounter, p.Interval, "TSSHealthcheckPeersCounter", logger)
runBackgroundTicker(ctx, knownPeersCounter, p.Interval, "TSSHealthcheckKnownPeersCounter", logger)
runBackgroundTicker(ctx, connectedPeersCounter, p.Interval, "TSSHealthcheckConnectedPeersCounter", logger)

Check warning on line 122 in zetaclient/tss/healthcheck.go

View check run for this annotation

Codecov / codecov/patch

zetaclient/tss/healthcheck.go#L121-L122

Added lines #L121 - L122 were not covered by tests

return nil
}
Expand Down
1 change: 1 addition & 0 deletions zetaclient/tss/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ type Zetacore interface {
type Telemetry interface {
SetP2PID(id string)
SetConnectedPeers(peers []peer.AddrInfo)
SetKnownPeers(peers []peer.AddrInfo)
SetPingRTT(peers map[peer.ID]int64)
}

Expand Down

0 comments on commit c7bbd4a

Please sign in to comment.