Skip to content

Commit

Permalink
Merge pull request #23 from Layr-Labs/fix-tx-revert-logs-metrics-alerts
Browse files Browse the repository at this point in the history
Add metrics to track txs reverting
  • Loading branch information
samlaf authored Mar 30, 2024
2 parents be163e1 + 1ac0c1e commit 3cc4743
Show file tree
Hide file tree
Showing 10 changed files with 90 additions and 20 deletions.
6 changes: 4 additions & 2 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
# Mandatory
AVS_SYNC_REGISTRY_COORDINATOR_ADDR=0xd19d750531c5e95CcC5C9610bF119dDe3008A16D
AVS_SYNC_OPERATOR_STATE_RETRIEVER_ADDR=0x1b41CA79b86295e77Dd49f28DbB000286c022dfd
AVS_SYNC_REGISTRY_COORDINATOR_ADDR=0x53012C69A189cfA2D9d29eb6F19B32e0A2EA3490
AVS_SYNC_OPERATOR_STATE_RETRIEVER_ADDR=0xB4baAfee917fb4449f5ec64804217bccE9f46C67
AVS_SYNC_ETH_HTTP_URL=http://localhost:8545
AVS_SYNC_SYNC_INTERVAL=24h

# Optional
AVS_SYNC_FIRST_SYNC_TIME=00:00:00 # this will make it run at midnight
AVS_SYNC_LOG_LEVEL=DEBUG
AVS_SYNC_LOG_FORMAT=text
AVS_SYNC_READER_TIMEOUT_DURATION=
AVS_SYNC_WRITER_TIMEOUT_DURATION=

# Either AVS_SYNC_ECDSA_PRIVATE_KEY or Fireblocks credentials are required
# If AVS_SYNC_ECDSA_PRIVATE_KEY is specified, the Fireblocks credentials are not required and will be ignored
Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ help:
# include .env
# export

start-anvil-goerli-fork: ##
anvil --fork-url https://goerli.gateway.tenderly.co
start-anvil-holesky-fork: ##
anvil --fork-url https://ethereum-holesky-rpc.publicnode.com

run-avs-sync: ##
@# we export the env vars from .env file and then run the go program
Expand Down
9 changes: 5 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,15 @@ The test can be run via:
make test
```

#### Against a goerli fork
#### Against a holesky fork

The most recent eigenDA m2 deployment is accessible [here](https://docs.google.com/spreadsheets/d/1UgXnn_9U5mQ6jvj_y1oKjxTG5q0IoYdFvM1704inPGg/edit#gid=0). We can test avssync against this deployment by running a goerli fork and running the tests against it.
First create a .env file by copying the example .env.example file, and adjust the variables as needed. You should only need to enter a private key that has goerli eth.
The most recent eigenDA holesky deployment is accessible [here](https://github.com/Layr-Labs/eigenda/blob/master/contracts/script/deploy/holesky/output/holesky_testnet_deployment_data.json). We can test avssync against this deployment by running a holesky fork and running the tests against it.

First create a .env file by copying the example .env.example file, and adjust the variables as needed. You will need to enter a private key that has holesky eth, and you will most likely also need to set `AVS_SYNC_READER_TIMEOUT_DURATION` to at least 1m, since retrieving the operator state can take a while the first time anvil is querying the holesky fork and filling its local cache.

Then run
```
make start-anvil-goerli-fork
make start-anvil-holesky-fork
```
and in a separate terminal
```
Expand Down
39 changes: 32 additions & 7 deletions avssync.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,18 @@ package main

import (
"context"
"errors"
"sort"
"strconv"
"time"

"github.com/Layr-Labs/eigensdk-go/chainio/clients/avsregistry"
sdklogging "github.com/Layr-Labs/eigensdk-go/logging"
"github.com/Layr-Labs/eigensdk-go/types"
"github.com/ethereum/go-ethereum/accounts/abi/bind"
"github.com/ethereum/go-ethereum/common"
gethtypes "github.com/ethereum/go-ethereum/core/types"
"github.com/prometheus/client_golang/prometheus"
)

type AvsSync struct {
Expand All @@ -26,6 +30,7 @@ type AvsSync struct {

readerTimeoutDuration time.Duration
writerTimeoutDuration time.Duration
prometheusServerAddr string
}

// NewAvsSync creates a new AvsSync object
Expand All @@ -38,8 +43,8 @@ func NewAvsSync(
avsReader avsregistry.AvsRegistryReader, avsWriter avsregistry.AvsRegistryWriter,
sleepBeforeFirstSyncDuration time.Duration, syncInterval time.Duration, operators []common.Address,
quorums []byte, fetchQuorumsDynamically bool, retrySyncNTimes int,
readerTimeoutDuration time.Duration,
writerTimeoutDuration time.Duration,
readerTimeoutDuration time.Duration, writerTimeoutDuration time.Duration,
prometheusServerAddr string,
) *AvsSync {
return &AvsSync{
logger: logger,
Expand All @@ -53,6 +58,7 @@ func NewAvsSync(
retrySyncNTimes: retrySyncNTimes,
readerTimeoutDuration: readerTimeoutDuration,
writerTimeoutDuration: writerTimeoutDuration,
prometheusServerAddr: prometheusServerAddr,
}
}

Expand All @@ -67,8 +73,15 @@ func (a *AvsSync) Start() {
"fetchQuorumsDynamically", a.fetchQuorumsDynamically,
"readerTimeoutDuration", a.readerTimeoutDuration,
"writerTimeoutDuration", a.writerTimeoutDuration,
"prometheusServerAddr", a.prometheusServerAddr,
)

if a.prometheusServerAddr != "" {
StartMetricsServer(a.prometheusServerAddr)
} else {
a.logger.Info("Prometheus server address not set, not starting metrics server")
}

// ticker doesn't tick immediately, so we send a first updateStakes here
// see https://github.com/golang/go/issues/17601
// we first sleep some amount of time before the first sync, which allows the syncs to happen at some preferred time
Expand Down Expand Up @@ -115,11 +128,16 @@ func (a *AvsSync) updateStakes() error {
timeoutCtx, cancel := context.WithTimeout(context.Background(), a.writerTimeoutDuration)
defer cancel()
// this one we update all quorums at once, since we're only updating a subset of operators (which should be a small number)
_, err := a.avsWriter.UpdateStakesOfOperatorSubsetForAllQuorums(timeoutCtx, a.operators)
if err == nil {
a.logger.Info("Completed stake update successfully")
receipt, err := a.avsWriter.UpdateStakesOfOperatorSubsetForAllQuorums(timeoutCtx, a.operators)
if err != nil {
erroredTxs.Inc()
return err
} else if receipt.Status == gethtypes.ReceiptStatusFailed {
revertedTxs.Inc()
return errors.New("Update stakes of operator subset for all quorums reverted")
}
return err
a.logger.Info("Completed stake update successfully")
return nil
}
}

Expand Down Expand Up @@ -158,17 +176,24 @@ func (a *AvsSync) tryNTimesUpdateStakesOfEntireOperatorSetForQuorum(quorum byte,
}
var operators []common.Address
operators = append(operators, operatorAddrsPerQuorum[0]...)
operatorsUpdated.With(prometheus.Labels{"quorum": strconv.Itoa(int(quorum))}).Set(float64(len(operators)))
sort.Slice(operators, func(i, j int) bool {
return operators[i].Big().Cmp(operators[j].Big()) < 0
})
a.logger.Infof("Updating stakes of operators in quorum %d: %v", int(quorum), operators)
timeoutCtx, cancel = context.WithTimeout(context.Background(), a.writerTimeoutDuration)
defer cancel()
_, err = a.avsWriter.UpdateStakesOfEntireOperatorSetForQuorums(timeoutCtx, [][]common.Address{operators}, types.QuorumNums{types.QuorumNum(quorum)})
receipt, err := a.avsWriter.UpdateStakesOfEntireOperatorSetForQuorums(timeoutCtx, [][]common.Address{operators}, types.QuorumNums{types.QuorumNum(quorum)})
if err != nil {
erroredTxs.Inc()
a.logger.Error("Error updating stakes of entire operator set for quorum", "err", err, "quorum", int(quorum))
continue
}
if receipt.Status == gethtypes.ReceiptStatusFailed {
revertedTxs.Inc()
a.logger.Infof("Successfully updated stakes of operators in quorum %d", int(quorum))
continue
}
return
}
a.logger.Error("Giving up after retrying", "retryNTimes", retryNTimes)
Expand Down
7 changes: 7 additions & 0 deletions flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ var (
EnvVar: envVarPrefix + "SYNC_INTERVAL",
}
/* Optional Flags */
PrometheusServerAddrFlag = cli.StringFlag{
Name: "prometheus-server-addr",
Usage: "Prometheus server address",
Value: ":9090",
EnvVar: envVarPrefix + "PROMETHEUS_SERVER_ADDR",
}
FirstSyncTimeFlag = cli.StringFlag{
Name: "first-sync-time",
Required: false,
Expand Down Expand Up @@ -112,6 +118,7 @@ var RequiredFlags = []cli.Flag{
}

var OptionalFlags = []cli.Flag{
PrometheusServerAddrFlag,
FirstSyncTimeFlag,
OperatorListFlag,
QuorumListFlag,
Expand Down
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ module github.com/Layr-Labs/avs-sync
go 1.21.2

require (
github.com/Layr-Labs/eigensdk-go v0.1.3
github.com/Layr-Labs/eigensdk-go v0.1.4-0.20240329184527-d5c0a153a1c4
github.com/ethereum/go-ethereum v1.13.14
github.com/prometheus/client_golang v1.19.0
github.com/testcontainers/testcontainers-go v0.29.1
github.com/urfave/cli v1.22.14
go.uber.org/mock v0.4.0
Expand Down Expand Up @@ -59,7 +60,6 @@ require (
github.com/opencontainers/image-spec v1.1.0 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
github.com/prometheus/client_golang v1.19.0 // indirect
github.com/prometheus/client_model v0.5.0 // indirect
github.com/prometheus/common v0.48.0 // indirect
github.com/prometheus/procfs v0.12.0 // indirect
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1/go.mod h1:xomTg6
github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/DataDog/zstd v1.4.5 h1:EndNeuB0l9syBZhut0wns3gV1hL8zX8LIu6ZiVHWLIQ=
github.com/DataDog/zstd v1.4.5/go.mod h1:1jcaCB/ufaK+sKp1NBhlGmpz41jOoPQ35bpF36t7BBo=
github.com/Layr-Labs/eigensdk-go v0.1.3 h1:GYRd3CYtPFkLVP07Ml284j9m4igKvPxeAhgSCpRCtpc=
github.com/Layr-Labs/eigensdk-go v0.1.3/go.mod h1:J+d9zxN4VyMtchmsPzGASFcCjpnh1eT4aE2ggiqOz/g=
github.com/Layr-Labs/eigensdk-go v0.1.4-0.20240329184527-d5c0a153a1c4 h1:D58ekIT4cQlumjetYfwpzLeNHEOAYg/YjaB3LR2YxRY=
github.com/Layr-Labs/eigensdk-go v0.1.4-0.20240329184527-d5c0a153a1c4/go.mod h1:HOSNuZcwaKbP4cnNk9c1hK2B2RitcMQ36Xj2msBBBpE=
github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow=
github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM=
github.com/Microsoft/hcsshim v0.11.4 h1:68vKo2VN8DE9AdN4tnkWnmdhqdbpUFM8OF3Airm7fz8=
Expand Down
4 changes: 3 additions & 1 deletion integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (

"github.com/ethereum/go-ethereum/accounts/abi/bind"
"github.com/ethereum/go-ethereum/common"
gethtypes "github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/crypto"
"github.com/testcontainers/testcontainers-go"
"github.com/testcontainers/testcontainers-go/wait"
Expand Down Expand Up @@ -190,7 +191,7 @@ func TestIntegrationFullOperatorSetWithRetry(t *testing.T) {
mockAvsRegistryWriter := chainiomocks.NewMockAvsRegistryWriter(mockCtrl)
// this is the test. we just make sure this is called 3 times
mockAvsRegistryWriter.EXPECT().UpdateStakesOfEntireOperatorSetForQuorums(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, fmt.Errorf("error")).Times(2)
mockAvsRegistryWriter.EXPECT().UpdateStakesOfEntireOperatorSetForQuorums(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil)
mockAvsRegistryWriter.EXPECT().UpdateStakesOfEntireOperatorSetForQuorums(gomock.Any(), gomock.Any(), gomock.Any()).Return(&gethtypes.Receipt{Status: gethtypes.ReceiptStatusSuccessful}, nil)
avsSync.avsWriter = mockAvsRegistryWriter
avsSync.retrySyncNTimes = 3

Expand Down Expand Up @@ -327,6 +328,7 @@ func NewAvsSyncComponents(t *testing.T, anvilHttpEndpoint string, contractAddres
1, // 1 retry
5*time.Second,
5*time.Second,
"", // no metrics server (otherwise parallel tests all try to start server at same endpoint and error out)
)
return &AvsSyncComponents{
avsSync: avsSync,
Expand Down
1 change: 1 addition & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ func avsSyncMain(cliCtx *cli.Context) error {
cliCtx.Int(retrySyncNTimes.Name),
readerTimeout,
writerTimeout,
cliCtx.String(PrometheusServerAddrFlag.Name),
)

avsSync.Start()
Expand Down
32 changes: 32 additions & 0 deletions metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package main

import (
"net/http"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/client_golang/prometheus/promhttp"
)

var (
erroredTxs = promauto.NewCounter(prometheus.CounterOpts{
Name: "errored_txs_total",
Help: "The total number of transactions that errored (failed to get processed by chain)",
})
revertedTxs = promauto.NewCounter(prometheus.CounterOpts{
Name: "reverted_txs_total",
Help: "The total number of transactions that reverted (processed by chain but reverted)",
})
operatorsUpdated = promauto.NewGaugeVec(prometheus.GaugeOpts{
Name: "operators_updated",
Help: "The total number of operators updated (during the last quorum sync)",
}, []string{"quorum"})
)

func StartMetricsServer(metricsAddr string) {
registry := prometheus.NewRegistry()
registry.MustRegister(erroredTxs, revertedTxs, operatorsUpdated)
http.Handle("/metrics", promhttp.HandlerFor(registry, promhttp.HandlerOpts{}))
// not sure if we need to handle this error, since if metric server errors, then we will get alerts from grafana
go http.ListenAndServe(metricsAddr, nil)
}

0 comments on commit 3cc4743

Please sign in to comment.