From 4feb960086a473905691945c9d4449d6c78e715e Mon Sep 17 00:00:00 2001
From: Arran Schlosberg <519948+ARR4N@users.noreply.github.com>
Date: Tue, 26 Nov 2024 16:01:47 +0000
Subject: [PATCH] feat(core/state): async trie prefetching (#76)
## Why this should be merged
Performs trie prefetching concurrently, required for equivalent
performance with `coreth` / `subnet-evm` implementations.
## How this works
`StateDB.StartPrefetcher()` accepts variadic options (for backwards
compatibility of function signatures). An option to specify a
`WorkerPool` is provided which, if present, is used to call
`Trie.Get{Account,Storage}()`; the pool is responsible for concurrency
but does not need to be able to wait on the work as that is handled by
this change.
## How this was tested
Unit test demonstrating hand-off of work to a `WorkerPool` as well as
API-guaranteed ordering of events.
---
core/state/statedb.go | 4 +-
core/state/trie_prefetcher.go | 62 ++++++++---
core/state/trie_prefetcher.libevm.go | 126 ++++++++++++++++++++++
core/state/trie_prefetcher.libevm_test.go | 80 ++++++++++++++
libevm/sync/sync.go | 52 +++++++++
5 files changed, 307 insertions(+), 17 deletions(-)
create mode 100644 core/state/trie_prefetcher.libevm.go
create mode 100644 core/state/trie_prefetcher.libevm_test.go
create mode 100644 libevm/sync/sync.go
diff --git a/core/state/statedb.go b/core/state/statedb.go
index 3b706002e765..d641fb3b0425 100644
--- a/core/state/statedb.go
+++ b/core/state/statedb.go
@@ -175,13 +175,13 @@ func New(root common.Hash, db Database, snaps *snapshot.Tree) (*StateDB, error)
// StartPrefetcher initializes a new trie prefetcher to pull in nodes from the
// state trie concurrently while the state is mutated so that when we reach the
// commit phase, most of the needed data is already hot.
-func (s *StateDB) StartPrefetcher(namespace string) {
+func (s *StateDB) StartPrefetcher(namespace string, opts ...PrefetcherOption) {
if s.prefetcher != nil {
s.prefetcher.close()
s.prefetcher = nil
}
if s.snap != nil {
- s.prefetcher = newTriePrefetcher(s.db, s.originalRoot, namespace)
+ s.prefetcher = newTriePrefetcher(s.db, s.originalRoot, namespace, opts...)
}
}
diff --git a/core/state/trie_prefetcher.go b/core/state/trie_prefetcher.go
index 45fac913dd0f..275f20b94b69 100644
--- a/core/state/trie_prefetcher.go
+++ b/core/state/trie_prefetcher.go
@@ -20,6 +20,7 @@ import (
"sync"
"github.com/ava-labs/libevm/common"
+ "github.com/ava-labs/libevm/libevm/options"
"github.com/ava-labs/libevm/log"
"github.com/ava-labs/libevm/metrics"
)
@@ -49,9 +50,11 @@ type triePrefetcher struct {
storageDupMeter metrics.Meter
storageSkipMeter metrics.Meter
storageWasteMeter metrics.Meter
+
+ options []PrefetcherOption
}
-func newTriePrefetcher(db Database, root common.Hash, namespace string) *triePrefetcher {
+func newTriePrefetcher(db Database, root common.Hash, namespace string, opts ...PrefetcherOption) *triePrefetcher {
prefix := triePrefetchMetricsPrefix + namespace
p := &triePrefetcher{
db: db,
@@ -67,6 +70,8 @@ func newTriePrefetcher(db Database, root common.Hash, namespace string) *triePre
storageDupMeter: metrics.GetOrRegisterMeter(prefix+"/storage/dup", nil),
storageSkipMeter: metrics.GetOrRegisterMeter(prefix+"/storage/skip", nil),
storageWasteMeter: metrics.GetOrRegisterMeter(prefix+"/storage/waste", nil),
+
+ options: opts,
}
return p
}
@@ -99,6 +104,7 @@ func (p *triePrefetcher) close() {
}
}
}
+ p.releaseWorkerPools()
// Clear out all fetchers (will crash on a second call, deliberate)
p.fetchers = nil
}
@@ -122,6 +128,8 @@ func (p *triePrefetcher) copy() *triePrefetcher {
storageDupMeter: p.storageDupMeter,
storageSkipMeter: p.storageSkipMeter,
storageWasteMeter: p.storageWasteMeter,
+
+ options: p.options,
}
// If the prefetcher is already a copy, duplicate the data
if p.fetches != nil {
@@ -150,7 +158,7 @@ func (p *triePrefetcher) prefetch(owner common.Hash, root common.Hash, addr comm
id := p.trieID(owner, root)
fetcher := p.fetchers[id]
if fetcher == nil {
- fetcher = newSubfetcher(p.db, p.root, owner, root, addr)
+ fetcher = newSubfetcher(p.db, p.root, owner, root, addr, p.options...)
p.fetchers[id] = fetcher
}
fetcher.schedule(keys)
@@ -226,11 +234,13 @@ type subfetcher struct {
seen map[string]struct{} // Tracks the entries already loaded
dups int // Number of duplicate preload tasks
used [][]byte // Tracks the entries used in the end
+
+ pool *subfetcherPool
}
// newSubfetcher creates a goroutine to prefetch state items belonging to a
// particular root hash.
-func newSubfetcher(db Database, state common.Hash, owner common.Hash, root common.Hash, addr common.Address) *subfetcher {
+func newSubfetcher(db Database, state common.Hash, owner common.Hash, root common.Hash, addr common.Address, opts ...PrefetcherOption) *subfetcher {
sf := &subfetcher{
db: db,
state: state,
@@ -243,6 +253,7 @@ func newSubfetcher(db Database, state common.Hash, owner common.Hash, root commo
copy: make(chan chan Trie),
seen: make(map[string]struct{}),
}
+ options.As[prefetcherConfig](opts...).applyTo(sf)
go sf.loop()
return sf
}
@@ -294,7 +305,10 @@ func (sf *subfetcher) abort() {
// out of tasks or its underlying trie is retrieved for committing.
func (sf *subfetcher) loop() {
// No matter how the loop stops, signal anyone waiting that it's terminated
- defer close(sf.term)
+ defer func() {
+ sf.pool.wait()
+ close(sf.term)
+ }()
// Start by opening the trie and stop processing if it fails
if sf.owner == (common.Hash{}) {
@@ -325,14 +339,14 @@ func (sf *subfetcher) loop() {
sf.lock.Unlock()
// Prefetch any tasks until the loop is interrupted
- for i, task := range tasks {
+ for _, task := range tasks {
select {
- case <-sf.stop:
- // If termination is requested, add any leftover back and return
- sf.lock.Lock()
- sf.tasks = append(sf.tasks, tasks[i:]...)
- sf.lock.Unlock()
- return
+ //libevm:start
+ //
+ // The <-sf.stop case has been removed, in keeping with the equivalent change below. Future geth
+ // versions also remove it so our modification here can be undone when merging upstream.
+ //
+ //libevm:end
case ch := <-sf.copy:
// Somebody wants a copy of the current trie, grant them
@@ -344,9 +358,9 @@ func (sf *subfetcher) loop() {
sf.dups++
} else {
if len(task) == common.AddressLength {
- sf.trie.GetAccount(common.BytesToAddress(task))
+ sf.pool.GetAccount(common.BytesToAddress(task))
} else {
- sf.trie.GetStorage(sf.addr, task)
+ sf.pool.GetStorage(sf.addr, task)
}
sf.seen[string(task)] = struct{}{}
}
@@ -358,8 +372,26 @@ func (sf *subfetcher) loop() {
ch <- sf.db.CopyTrie(sf.trie)
case <-sf.stop:
- // Termination is requested, abort and leave remaining tasks
- return
+ //libevm:start
+ //
+ // This is copied, with alteration, from ethereum/go-ethereum#29519
+ // and can be deleted once we update to include that change.
+
+ // Termination is requested, abort if no more tasks are pending. If
+ // there are some, exhaust them first.
+ sf.lock.Lock()
+ done := len(sf.tasks) == 0
+ sf.lock.Unlock()
+
+ if done {
+ return
+ }
+
+ select {
+ case sf.wake <- struct{}{}:
+ default:
+ }
+ //libevm:end
}
}
}
diff --git a/core/state/trie_prefetcher.libevm.go b/core/state/trie_prefetcher.libevm.go
new file mode 100644
index 000000000000..abee575be31a
--- /dev/null
+++ b/core/state/trie_prefetcher.libevm.go
@@ -0,0 +1,126 @@
+// Copyright 2024 the libevm authors.
+//
+// The libevm additions to go-ethereum are free software: you can redistribute
+// them and/or modify them under the terms of the GNU Lesser General Public License
+// as published by the Free Software Foundation, either version 3 of the License,
+// or (at your option) any later version.
+//
+// The libevm additions are distributed in the hope that they will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
+// General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see
+// .
+
+package state
+
+import (
+ "github.com/ava-labs/libevm/common"
+ "github.com/ava-labs/libevm/libevm/options"
+ "github.com/ava-labs/libevm/libevm/sync"
+ "github.com/ava-labs/libevm/log"
+)
+
+// A PrefetcherOption configures behaviour of trie prefetching.
+type PrefetcherOption = options.Option[prefetcherConfig]
+
+type prefetcherConfig struct {
+ newWorkers func() WorkerPool
+}
+
+// A WorkerPool executes functions asynchronously. Done() is called to signal
+// that the pool is no longer needed and that Execute() is guaranteed to not be
+// called again.
+type WorkerPool interface {
+ Execute(func())
+ Done()
+}
+
+// WithWorkerPools configures trie prefetching to execute asynchronously. The
+// provided constructor is called once for each trie being fetched but it MAY
+// return the same pool.
+func WithWorkerPools(ctor func() WorkerPool) PrefetcherOption {
+ return options.Func[prefetcherConfig](func(c *prefetcherConfig) {
+ c.newWorkers = ctor
+ })
+}
+
+type subfetcherPool struct {
+ workers WorkerPool
+ tries sync.Pool[Trie]
+ wg sync.WaitGroup
+}
+
+// applyTo configures the [subfetcher] to use a [WorkerPool] if one was provided
+// with a [PrefetcherOption].
+func (c *prefetcherConfig) applyTo(sf *subfetcher) {
+ sf.pool = &subfetcherPool{
+ tries: sync.Pool[Trie]{
+ // Although the workers may be shared between all subfetchers, each
+ // MUST have its own Trie pool.
+ New: func() Trie {
+ return sf.db.CopyTrie(sf.trie)
+ },
+ },
+ }
+ if c.newWorkers != nil {
+ sf.pool.workers = c.newWorkers()
+ }
+}
+
+// releaseWorkerPools calls Done() on all [WorkerPool]s. This MUST only be
+// called after [subfetcher.abort] returns on ALL fetchers as a pool is allowed
+// to be shared between them. This is because we guarantee in the public API
+// that no further calls will be made to Execute() after a call to Done().
+func (p *triePrefetcher) releaseWorkerPools() {
+ for _, f := range p.fetchers {
+ if w := f.pool.workers; w != nil {
+ w.Done()
+ }
+ }
+}
+
+func (p *subfetcherPool) wait() {
+ p.wg.Wait()
+}
+
+// execute runs the provided function with a copy of the subfetcher's Trie.
+// Copies are stored in a [sync.Pool] to reduce creation overhead. If p was
+// configured with a [WorkerPool] then it is used for function execution,
+// otherwise `fn` is just called directly.
+func (p *subfetcherPool) execute(fn func(Trie)) {
+ p.wg.Add(1)
+ do := func() {
+ t := p.tries.Get()
+ fn(t)
+ p.tries.Put(t)
+ p.wg.Done()
+ }
+
+ if w := p.workers; w != nil {
+ w.Execute(do)
+ } else {
+ do()
+ }
+}
+
+// GetAccount optimistically pre-fetches an account, dropping the returned value
+// and logging errors. See [subfetcherPool.execute] re worker pools.
+func (p *subfetcherPool) GetAccount(addr common.Address) {
+ p.execute(func(t Trie) {
+ if _, err := t.GetAccount(addr); err != nil {
+ log.Error("account prefetching failed", "address", addr, "err", err)
+ }
+ })
+}
+
+// GetStorage is the storage equivalent of [subfetcherPool.GetAccount].
+func (p *subfetcherPool) GetStorage(addr common.Address, key []byte) {
+ p.execute(func(t Trie) {
+ if _, err := t.GetStorage(addr, key); err != nil {
+ log.Error("storage prefetching failed", "address", addr, "key", key, "err", err)
+ }
+ })
+}
diff --git a/core/state/trie_prefetcher.libevm_test.go b/core/state/trie_prefetcher.libevm_test.go
new file mode 100644
index 000000000000..884bfba56770
--- /dev/null
+++ b/core/state/trie_prefetcher.libevm_test.go
@@ -0,0 +1,80 @@
+// Copyright 2024 the libevm authors.
+//
+// The libevm additions to go-ethereum are free software: you can redistribute
+// them and/or modify them under the terms of the GNU Lesser General Public License
+// as published by the Free Software Foundation, either version 3 of the License,
+// or (at your option) any later version.
+//
+// The libevm additions are distributed in the hope that they will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
+// General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see
+// .
+
+package state
+
+import (
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/assert"
+
+ "github.com/ava-labs/libevm/common"
+)
+
+type synchronisingWorkerPool struct {
+ t *testing.T
+ executed, unblock chan struct{}
+ done bool
+ preconditionsToStopPrefetcher int
+}
+
+var _ WorkerPool = (*synchronisingWorkerPool)(nil)
+
+func (p *synchronisingWorkerPool) Execute(fn func()) {
+ fn()
+ select {
+ case <-p.executed:
+ default:
+ close(p.executed)
+ }
+
+ <-p.unblock
+ assert.False(p.t, p.done, "Done() called before Execute() returns")
+ p.preconditionsToStopPrefetcher++
+}
+
+func (p *synchronisingWorkerPool) Done() {
+ p.done = true
+ p.preconditionsToStopPrefetcher++
+}
+
+func TestStopPrefetcherWaitsOnWorkers(t *testing.T) {
+ pool := &synchronisingWorkerPool{
+ t: t,
+ executed: make(chan struct{}),
+ unblock: make(chan struct{}),
+ }
+ opt := WithWorkerPools(func() WorkerPool { return pool })
+
+ db := filledStateDB()
+ db.prefetcher = newTriePrefetcher(db.db, db.originalRoot, "", opt)
+ db.prefetcher.prefetch(common.Hash{}, common.Hash{}, common.Address{}, [][]byte{{}})
+
+ go func() {
+ <-pool.executed
+ // Sleep otherwise there is a small chance that we close pool.unblock
+ // between db.StopPrefetcher() returning and the assertion.
+ time.Sleep(time.Second)
+ close(pool.unblock)
+ }()
+
+ <-pool.executed
+ db.StopPrefetcher()
+ // If this errors then either Execute() hadn't returned or Done() wasn't
+ // called.
+ assert.Equalf(t, 2, pool.preconditionsToStopPrefetcher, "%T.StopPrefetcher() returned early", db)
+}
diff --git a/libevm/sync/sync.go b/libevm/sync/sync.go
new file mode 100644
index 000000000000..991a3a875ee7
--- /dev/null
+++ b/libevm/sync/sync.go
@@ -0,0 +1,52 @@
+// Copyright 2024 the libevm authors.
+//
+// The libevm additions to go-ethereum are free software: you can redistribute
+// them and/or modify them under the terms of the GNU Lesser General Public License
+// as published by the Free Software Foundation, either version 3 of the License,
+// or (at your option) any later version.
+//
+// The libevm additions are distributed in the hope that they will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
+// General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see
+// .
+
+// Package sync extends the standard library's sync package.
+package sync
+
+import "sync"
+
+// Aliases of stdlib sync's types to avoid having to import it alongside this
+// package.
+type (
+ Cond = sync.Cond
+ Locker = sync.Locker
+ Map = sync.Map
+ Mutex = sync.Mutex
+ Once = sync.Once
+ RWMutex = sync.RWMutex
+ WaitGroup = sync.WaitGroup
+)
+
+// A Pool is a type-safe wrapper around [sync.Pool].
+type Pool[T any] struct {
+ New func() T
+ pool sync.Pool
+ once Once
+}
+
+// Get is equivalent to [sync.Pool.Get].
+func (p *Pool[T]) Get() T {
+ p.once.Do(func() { // Do() guarantees at least once, not just only once
+ p.pool.New = func() any { return p.New() }
+ })
+ return p.pool.Get().(T) //nolint:forcetypeassert
+}
+
+// Put is equivalent to [sync.Pool.Put].
+func (p *Pool[T]) Put(t T) {
+ p.pool.Put(t)
+}