Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

metrics, cmd/geth: change init-process of metrics #30814

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
d2a3d9a
metrics: remove Counter interfaces
holiman Nov 26, 2024
bea9235
metrics: remove interfaces for float64-counter
holiman Nov 26, 2024
c1c47ee
metrics: fix ewma
holiman Nov 26, 2024
afaca63
metrics: fix gauge
holiman Nov 26, 2024
f27df5c
metrics: fix float64 gauge
holiman Nov 26, 2024
3c3623d
metrics: simplify float64 counter
holiman Nov 26, 2024
0d706d6
metrics: fix info-gauge
holiman Nov 26, 2024
05d2af6
metrics: fix healthcheck
holiman Nov 26, 2024
754c0c8
metrics: fix meter
holiman Nov 26, 2024
6bf0361
metrics: docs
holiman Nov 27, 2024
abd76ee
metrics: remove snapshot interface and nil-impl of sample
holiman Nov 27, 2024
01cfef2
metrics: remove nil histograms
holiman Nov 27, 2024
a6038c6
metrics: fix timer, remove interfaces and nil-implementations
holiman Nov 27, 2024
9814769
metrics: test updates, remove deprecated 'graphite' reporter
holiman Nov 27, 2024
653c557
metrics: some minor nits in the registry
holiman Nov 27, 2024
87048fc
p2p: remove pkg-init check on metrics.Enabled
holiman Nov 27, 2024
3ade355
all: different metrics-api, enable from flags
holiman Nov 27, 2024
d803afd
metrics: follow-up fixes to initialization
holiman Nov 27, 2024
6ef3c5b
metrics: make meter ticker respect enabled-flag
holiman Nov 27, 2024
b1dc19d
metrics: make sample respect enabled-flag
holiman Nov 27, 2024
4111636
metrics: unexport ewma.Tick method
holiman Nov 27, 2024
9e22de7
metrics: fix resetting timer
holiman Nov 27, 2024
df9fdbc
metrics: Init -> Enable
holiman Nov 27, 2024
4121cb4
cmd/geth: fix flawed configuration sanity-check
holiman Nov 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions cmd/geth/chaincmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -282,14 +282,15 @@ func importChain(ctx *cli.Context) error {
if ctx.Args().Len() < 1 {
utils.Fatalf("This command requires an argument.")
}
stack, cfg := makeConfigNode(ctx)
defer stack.Close()

// Start metrics export if enabled
utils.SetupMetrics(ctx)
utils.SetupMetrics(&cfg.Metrics)

// Start system runtime metrics collection
go metrics.CollectProcessMetrics(3 * time.Second)

stack, _ := makeConfigNode(ctx)
defer stack.Close()

chain, db := utils.MakeChain(ctx, stack, false)
defer db.Close()

Expand Down
26 changes: 26 additions & 0 deletions cmd/geth/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"runtime"
"slices"
"strings"
"time"
"unicode"

"github.com/ethereum/go-ethereum/accounts"
Expand Down Expand Up @@ -192,6 +193,10 @@ func makeFullNode(ctx *cli.Context) *node.Node {
cfg.Eth.OverrideVerkle = &v
}

// Start metrics export if enabled
utils.SetupMetrics(&cfg.Metrics)
go metrics.CollectProcessMetrics(3 * time.Second)

backend, eth := utils.RegisterEthService(stack, &cfg.Eth)

// Create gauge with geth system and build information
Expand Down Expand Up @@ -325,6 +330,27 @@ func applyMetricConfig(ctx *cli.Context, cfg *gethConfig) {
if ctx.IsSet(utils.MetricsInfluxDBOrganizationFlag.Name) {
cfg.Metrics.InfluxDBOrganization = ctx.String(utils.MetricsInfluxDBOrganizationFlag.Name)
}
// Sanity-check the commandline flags. It is fine if some unused fields is part
// of the toml-config, but we expect the commandline to only contain relevant
// arguments, otherwise it indicates an error.
var (
enableExport = ctx.Bool(utils.MetricsEnableInfluxDBFlag.Name)
enableExportV2 = ctx.Bool(utils.MetricsEnableInfluxDBV2Flag.Name)
)
if enableExport || enableExportV2 {
v1FlagIsSet := ctx.IsSet(utils.MetricsInfluxDBUsernameFlag.Name) ||
ctx.IsSet(utils.MetricsInfluxDBPasswordFlag.Name)

v2FlagIsSet := ctx.IsSet(utils.MetricsInfluxDBTokenFlag.Name) ||
ctx.IsSet(utils.MetricsInfluxDBOrganizationFlag.Name) ||
ctx.IsSet(utils.MetricsInfluxDBBucketFlag.Name)

if enableExport && v2FlagIsSet {
utils.Fatalf("Flags --influxdb.metrics.organization, --influxdb.metrics.token, --influxdb.metrics.bucket are only available for influxdb-v2")
} else if enableExportV2 && v1FlagIsSet {
utils.Fatalf("Flags --influxdb.metrics.username, --influxdb.metrics.password are only available for influxdb-v1")
}
}
}

func setAccountManagerBackends(conf *node.Config, am *accounts.Manager, keydir string) error {
Expand Down
7 changes: 0 additions & 7 deletions cmd/geth/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ import (
"github.com/ethereum/go-ethereum/internal/debug"
"github.com/ethereum/go-ethereum/internal/flags"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/metrics"
"github.com/ethereum/go-ethereum/node"
"go.uber.org/automaxprocs/maxprocs"

Expand Down Expand Up @@ -325,12 +324,6 @@ func prepare(ctx *cli.Context) {
ctx.Set(utils.CacheFlag.Name, strconv.Itoa(4096))
}
}

// Start metrics export if enabled
utils.SetupMetrics(ctx)

// Start system runtime metrics collection
go metrics.CollectProcessMetrics(3 * time.Second)
}

// geth is the main entry point into the system if no special subcommand is run.
Expand Down
90 changes: 38 additions & 52 deletions cmd/utils/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -1969,64 +1969,50 @@ func RegisterFullSyncTester(stack *node.Node, eth *eth.Ethereum, target common.H
log.Info("Registered full-sync tester", "hash", target)
}

func SetupMetrics(ctx *cli.Context) {
if metrics.Enabled {
log.Info("Enabling metrics collection")

var (
enableExport = ctx.Bool(MetricsEnableInfluxDBFlag.Name)
enableExportV2 = ctx.Bool(MetricsEnableInfluxDBV2Flag.Name)
)

if enableExport || enableExportV2 {
CheckExclusive(ctx, MetricsEnableInfluxDBFlag, MetricsEnableInfluxDBV2Flag)

v1FlagIsSet := ctx.IsSet(MetricsInfluxDBUsernameFlag.Name) ||
ctx.IsSet(MetricsInfluxDBPasswordFlag.Name)

v2FlagIsSet := ctx.IsSet(MetricsInfluxDBTokenFlag.Name) ||
ctx.IsSet(MetricsInfluxDBOrganizationFlag.Name) ||
ctx.IsSet(MetricsInfluxDBBucketFlag.Name)

if enableExport && v2FlagIsSet {
Fatalf("Flags --influxdb.metrics.organization, --influxdb.metrics.token, --influxdb.metrics.bucket are only available for influxdb-v2")
} else if enableExportV2 && v1FlagIsSet {
Fatalf("Flags --influxdb.metrics.username, --influxdb.metrics.password are only available for influxdb-v1")
}
}

var (
endpoint = ctx.String(MetricsInfluxDBEndpointFlag.Name)
database = ctx.String(MetricsInfluxDBDatabaseFlag.Name)
username = ctx.String(MetricsInfluxDBUsernameFlag.Name)
password = ctx.String(MetricsInfluxDBPasswordFlag.Name)

token = ctx.String(MetricsInfluxDBTokenFlag.Name)
bucket = ctx.String(MetricsInfluxDBBucketFlag.Name)
organization = ctx.String(MetricsInfluxDBOrganizationFlag.Name)
)
func SetupMetrics(cfg *metrics.Config) {
if !cfg.Enabled {
return
}
metrics.Enable()
log.Info("Enabling metrics collection")
var (
enableExport = cfg.EnableInfluxDB
enableExportV2 = cfg.EnableInfluxDBV2
)
if cfg.EnableInfluxDB && cfg.EnableInfluxDBV2 {
Fatalf("Flags %v can't be used at the same time", strings.Join([]string{MetricsEnableInfluxDBFlag.Name, MetricsEnableInfluxDBV2Flag.Name}, ", "))
}
var (
endpoint = cfg.InfluxDBEndpoint
database = cfg.InfluxDBDatabase
username = cfg.InfluxDBUsername
password = cfg.InfluxDBPassword

token = cfg.InfluxDBToken
bucket = cfg.InfluxDBBucket
organization = cfg.InfluxDBOrganization
)

if enableExport {
tagsMap := SplitTagsFlag(ctx.String(MetricsInfluxDBTagsFlag.Name))
if enableExport {
tagsMap := SplitTagsFlag(cfg.InfluxDBTags)

log.Info("Enabling metrics export to InfluxDB")
log.Info("Enabling metrics export to InfluxDB")

go influxdb.InfluxDBWithTags(metrics.DefaultRegistry, 10*time.Second, endpoint, database, username, password, "geth.", tagsMap)
} else if enableExportV2 {
tagsMap := SplitTagsFlag(ctx.String(MetricsInfluxDBTagsFlag.Name))
go influxdb.InfluxDBWithTags(metrics.DefaultRegistry, 10*time.Second, endpoint, database, username, password, "geth.", tagsMap)
} else if enableExportV2 {
tagsMap := SplitTagsFlag(cfg.InfluxDBTags)

log.Info("Enabling metrics export to InfluxDB (v2)")
log.Info("Enabling metrics export to InfluxDB (v2)")

go influxdb.InfluxDBV2WithTags(metrics.DefaultRegistry, 10*time.Second, endpoint, token, bucket, organization, "geth.", tagsMap)
}
go influxdb.InfluxDBV2WithTags(metrics.DefaultRegistry, 10*time.Second, endpoint, token, bucket, organization, "geth.", tagsMap)
}

if ctx.IsSet(MetricsHTTPFlag.Name) {
address := net.JoinHostPort(ctx.String(MetricsHTTPFlag.Name), fmt.Sprintf("%d", ctx.Int(MetricsPortFlag.Name)))
log.Info("Enabling stand-alone metrics HTTP endpoint", "address", address)
exp.Setup(address)
} else if ctx.IsSet(MetricsPortFlag.Name) {
log.Warn(fmt.Sprintf("--%s specified without --%s, metrics server will not start.", MetricsPortFlag.Name, MetricsHTTPFlag.Name))
}
if cfg.HTTP != "" {
address := net.JoinHostPort(cfg.HTTP, fmt.Sprintf("%d", cfg.Port))
log.Info("Enabling stand-alone metrics HTTP endpoint", "address", address)
exp.Setup(address)
} else if cfg.HTTP == "" && cfg.Port != 0 {
log.Warn(fmt.Sprintf("--%s specified without --%s, metrics server will not start.", MetricsPortFlag.Name, MetricsHTTPFlag.Name))
}
}

Expand Down
12 changes: 6 additions & 6 deletions core/rawdb/freezer_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,24 +113,24 @@ type freezerTable struct {
headId uint32 // number of the currently active head file
tailId uint32 // number of the earliest file

headBytes int64 // Number of bytes written to the head file
readMeter metrics.Meter // Meter for measuring the effective amount of data read
writeMeter metrics.Meter // Meter for measuring the effective amount of data written
sizeGauge metrics.Gauge // Gauge for tracking the combined size of all freezer tables
headBytes int64 // Number of bytes written to the head file
readMeter *metrics.Meter // Meter for measuring the effective amount of data read
writeMeter *metrics.Meter // Meter for measuring the effective amount of data written
sizeGauge *metrics.Gauge // Gauge for tracking the combined size of all freezer tables

logger log.Logger // Logger with database path and table name embedded
lock sync.RWMutex // Mutex protecting the data file descriptors
}

// newFreezerTable opens the given path as a freezer table.
func newFreezerTable(path, name string, disableSnappy, readonly bool) (*freezerTable, error) {
return newTable(path, name, metrics.NilMeter{}, metrics.NilMeter{}, metrics.NilGauge{}, freezerTableSize, disableSnappy, readonly)
return newTable(path, name, metrics.NewInactiveMeter(), metrics.NewInactiveMeter(), metrics.NewGauge(), freezerTableSize, disableSnappy, readonly)
}

// newTable opens a freezer table, creating the data and index files if they are
// non-existent. Both files are truncated to the shortest common length to ensure
// they don't go out of sync.
func newTable(path string, name string, readMeter metrics.Meter, writeMeter metrics.Meter, sizeGauge metrics.Gauge, maxFilesize uint32, noCompression, readonly bool) (*freezerTable, error) {
func newTable(path string, name string, readMeter, writeMeter *metrics.Meter, sizeGauge *metrics.Gauge, maxFilesize uint32, noCompression, readonly bool) (*freezerTable, error) {
// Ensure the containing directory exists and open the indexEntry file
if err := os.MkdirAll(path, 0755); err != nil {
return nil, err
Expand Down
32 changes: 16 additions & 16 deletions core/state/trie_prefetcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,21 +47,21 @@ type triePrefetcher struct {
term chan struct{} // Channel to signal interruption
noreads bool // Whether to ignore state-read-only prefetch requests

deliveryMissMeter metrics.Meter

accountLoadReadMeter metrics.Meter
accountLoadWriteMeter metrics.Meter
accountDupReadMeter metrics.Meter
accountDupWriteMeter metrics.Meter
accountDupCrossMeter metrics.Meter
accountWasteMeter metrics.Meter

storageLoadReadMeter metrics.Meter
storageLoadWriteMeter metrics.Meter
storageDupReadMeter metrics.Meter
storageDupWriteMeter metrics.Meter
storageDupCrossMeter metrics.Meter
storageWasteMeter metrics.Meter
deliveryMissMeter *metrics.Meter

accountLoadReadMeter *metrics.Meter
accountLoadWriteMeter *metrics.Meter
accountDupReadMeter *metrics.Meter
accountDupWriteMeter *metrics.Meter
accountDupCrossMeter *metrics.Meter
accountWasteMeter *metrics.Meter

storageLoadReadMeter *metrics.Meter
storageLoadWriteMeter *metrics.Meter
storageDupReadMeter *metrics.Meter
storageDupWriteMeter *metrics.Meter
storageDupCrossMeter *metrics.Meter
storageWasteMeter *metrics.Meter
}

func newTriePrefetcher(db Database, root common.Hash, namespace string, noreads bool) *triePrefetcher {
Expand Down Expand Up @@ -111,7 +111,7 @@ func (p *triePrefetcher) terminate(async bool) {

// report aggregates the pre-fetching and usage metrics and reports them.
func (p *triePrefetcher) report() {
if !metrics.Enabled {
if !metrics.Enabled() {
return
}
for _, fetcher := range p.fetchers {
Expand Down
4 changes: 2 additions & 2 deletions core/txpool/txpool.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ func (p *TxPool) reserver(id int, subpool SubPool) AddressReserver {
return ErrAlreadyReserved
}
p.reservations[addr] = subpool
if metrics.Enabled {
if metrics.Enabled() {
m := fmt.Sprintf("%s/%d", reservationsGaugeName, id)
metrics.GetOrRegisterGauge(m, nil).Inc(1)
}
Expand All @@ -143,7 +143,7 @@ func (p *TxPool) reserver(id int, subpool SubPool) AddressReserver {
return errors.New("address not owned")
}
delete(p.reservations, addr)
if metrics.Enabled {
if metrics.Enabled() {
m := fmt.Sprintf("%s/%d", reservationsGaugeName, id)
metrics.GetOrRegisterGauge(m, nil).Dec(1)
}
Expand Down
2 changes: 1 addition & 1 deletion eth/downloader/queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -881,7 +881,7 @@ func (q *queue) DeliverReceipts(id string, receiptList [][]*types.Receipt, recei
// to access the queue, so they already need a lock anyway.
func (q *queue) deliver(id string, taskPool map[common.Hash]*types.Header,
taskQueue *prque.Prque[int64, *types.Header], pendPool map[string]*fetchRequest,
reqTimer metrics.Timer, resInMeter metrics.Meter, resDropMeter metrics.Meter,
reqTimer *metrics.Timer, resInMeter, resDropMeter *metrics.Meter,
results int, validate func(index int, header *types.Header) error,
reconstruct func(index int, result *fetchResult)) (int, error) {
// Short circuit if the data was never requested
Expand Down
2 changes: 1 addition & 1 deletion eth/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ func (h *handler) runSnapExtension(peer *snap.Peer, handler snap.Handler) error
defer h.decHandlers()

if err := h.peers.registerSnapExtension(peer); err != nil {
if metrics.Enabled {
if metrics.Enabled() {
if peer.Inbound() {
snap.IngressRegistrationErrorMeter.Mark(1)
} else {
Expand Down
2 changes: 1 addition & 1 deletion eth/protocols/eth/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ func handleMessage(backend Backend, peer *Peer) error {
var handlers = eth68

// Track the amount of time it takes to serve the request and run the handler
if metrics.Enabled {
if metrics.Enabled() {
h := fmt.Sprintf("%s/%s/%d/%#02x", p2p.HandleHistName, ProtocolName, peer.Version(), msg.Code)
defer func(start time.Time) {
sampler := func() metrics.Sample {
Expand Down
2 changes: 1 addition & 1 deletion eth/protocols/eth/handshake.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ func (p *Peer) readStatus(network uint64, status *StatusPacket, genesis common.H

// markError registers the error with the corresponding metric.
func markError(p *Peer, err error) {
if !metrics.Enabled {
if !metrics.Enabled() {
return
}
m := meters.get(p.Inbound())
Expand Down
12 changes: 6 additions & 6 deletions eth/protocols/eth/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,23 +41,23 @@ func (h *bidirectionalMeters) get(ingress bool) *hsMeters {
type hsMeters struct {
// peerError measures the number of errors related to incorrect peer
// behaviour, such as invalid message code, size, encoding, etc.
peerError metrics.Meter
peerError *metrics.Meter

// timeoutError measures the number of timeouts.
timeoutError metrics.Meter
timeoutError *metrics.Meter

// networkIDMismatch measures the number of network id mismatch errors.
networkIDMismatch metrics.Meter
networkIDMismatch *metrics.Meter

// protocolVersionMismatch measures the number of differing protocol
// versions.
protocolVersionMismatch metrics.Meter
protocolVersionMismatch *metrics.Meter

// genesisMismatch measures the number of differing genesises.
genesisMismatch metrics.Meter
genesisMismatch *metrics.Meter

// forkidRejected measures the number of differing forkids.
forkidRejected metrics.Meter
forkidRejected *metrics.Meter
}

// newHandshakeMeters registers and returns handshake meters for the given
Expand Down
2 changes: 1 addition & 1 deletion eth/protocols/snap/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ func HandleMessage(backend Backend, peer *Peer) error {
defer msg.Discard()
start := time.Now()
// Track the amount of time it takes to serve the request and run the handler
if metrics.Enabled {
if metrics.Enabled() {
h := fmt.Sprintf("%s/%s/%d/%#02x", p2p.HandleHistName, ProtocolName, peer.Version(), msg.Code)
defer func(start time.Time) {
sampler := func() metrics.Sample {
Expand Down
30 changes: 15 additions & 15 deletions ethdb/leveldb/leveldb.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,21 +62,21 @@ type Database struct {
fn string // filename for reporting
db *leveldb.DB // LevelDB instance

compTimeMeter metrics.Meter // Meter for measuring the total time spent in database compaction
compReadMeter metrics.Meter // Meter for measuring the data read during compaction
compWriteMeter metrics.Meter // Meter for measuring the data written during compaction
writeDelayNMeter metrics.Meter // Meter for measuring the write delay number due to database compaction
writeDelayMeter metrics.Meter // Meter for measuring the write delay duration due to database compaction
diskSizeGauge metrics.Gauge // Gauge for tracking the size of all the levels in the database
diskReadMeter metrics.Meter // Meter for measuring the effective amount of data read
diskWriteMeter metrics.Meter // Meter for measuring the effective amount of data written
memCompGauge metrics.Gauge // Gauge for tracking the number of memory compaction
level0CompGauge metrics.Gauge // Gauge for tracking the number of table compaction in level0
nonlevel0CompGauge metrics.Gauge // Gauge for tracking the number of table compaction in non0 level
seekCompGauge metrics.Gauge // Gauge for tracking the number of table compaction caused by read opt
manualMemAllocGauge metrics.Gauge // Gauge to track the amount of memory that has been manually allocated (not a part of runtime/GC)

levelsGauge []metrics.Gauge // Gauge for tracking the number of tables in levels
compTimeMeter *metrics.Meter // Meter for measuring the total time spent in database compaction
compReadMeter *metrics.Meter // Meter for measuring the data read during compaction
compWriteMeter *metrics.Meter // Meter for measuring the data written during compaction
writeDelayNMeter *metrics.Meter // Meter for measuring the write delay number due to database compaction
writeDelayMeter *metrics.Meter // Meter for measuring the write delay duration due to database compaction
diskSizeGauge *metrics.Gauge // Gauge for tracking the size of all the levels in the database
diskReadMeter *metrics.Meter // Meter for measuring the effective amount of data read
diskWriteMeter *metrics.Meter // Meter for measuring the effective amount of data written
memCompGauge *metrics.Gauge // Gauge for tracking the number of memory compaction
level0CompGauge *metrics.Gauge // Gauge for tracking the number of table compaction in level0
nonlevel0CompGauge *metrics.Gauge // Gauge for tracking the number of table compaction in non0 level
seekCompGauge *metrics.Gauge // Gauge for tracking the number of table compaction caused by read opt
manualMemAllocGauge *metrics.Gauge // Gauge to track the amount of memory that has been manually allocated (not a part of runtime/GC)

levelsGauge []*metrics.Gauge // Gauge for tracking the number of tables in levels

quitLock sync.Mutex // Mutex protecting the quit channel access
quitChan chan chan error // Quit channel to stop the metrics collection before closing the database
Expand Down
Loading