From e62084281ac0b92c56b7b1697be82ad5107488c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lucas=20Men=C3=A9ndez?= Date: Fri, 31 May 2024 09:18:30 +0200 Subject: [PATCH 01/21] basic integration --- db/migrations/0002_census3.sql | 8 ++++++ db/queries/tokens.sql | 15 ++++++++++- go.mod | 1 + go.sum | 2 ++ scanner/providers/web3/erc20_provider.go | 32 ++++++++++++++++++++++++ scanner/providers/web3/errors.go | 1 + scanner/providers/web3/web3_provider.go | 2 ++ scanner/scanner.go | 3 +++ 8 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 db/migrations/0002_census3.sql diff --git a/db/migrations/0002_census3.sql b/db/migrations/0002_census3.sql new file mode 100644 index 00000000..55e86c5e --- /dev/null +++ b/db/migrations/0002_census3.sql @@ -0,0 +1,8 @@ +-- +goose Up +CREATE TABLE token_updates ( + id BLOB NOT NULL, + chain_id INTEGER NOT NULL DEFAULT 0, + filter_gob BLOB NOT NULL DEFAULT '', + last_block BIGINT NOT NULL DEFAULT 0, + PRIMARY KEY (id, chain_id, external_id), +); \ No newline at end of file diff --git a/db/queries/tokens.sql b/db/queries/tokens.sql index f8ea29b4..72bc8be7 100644 --- a/db/queries/tokens.sql +++ b/db/queries/tokens.sql @@ -112,4 +112,17 @@ FROM tokens WHERE id = ? AND chain_id = ? AND external_id = ? HAVING num_of_tokens = 1; -- name: DeleteToken :execresult -DELETE FROM tokens WHERE id = ? AND chain_id = ? AND external_id = ?; \ No newline at end of file +DELETE FROM tokens WHERE id = ? AND chain_id = ? AND external_id = ?; + +-- name: SetTokenUpdate :execresult +INSERT INTO token_updates (id, chain_id, filter_gob, last_block) + +-- name: UpdateTokenUpdate :execresult +UPDATE token_updates +SET last_block = sqlc.arg(last_block) + AND filter_gob = sqlc.arg(filter_gob) +WHERE id = sqlc.arg(id) + AND chain_id = sqlc.arg(chain_id); + +-- name: GetTokenUpdate :one +SELECT * FROM token_updates WHERE id = ? AND chain_id = ? diff --git a/go.mod b/go.mod index 52af3278..cd9d7600 100644 --- a/go.mod +++ b/go.mod @@ -249,6 +249,7 @@ require ( github.com/syndtr/goleveldb v1.0.1-0.20220614013038-64ee5596c38a // indirect github.com/tklauser/go-sysconf v0.3.12 // indirect github.com/tklauser/numcpus v0.6.1 // indirect + github.com/tylertreat/BoomFilters v0.0.0-20210315201527-1a82519a3e43 // indirect github.com/ucarion/urlpath v0.0.0-20200424170820-7ccc79b76bbb // indirect github.com/valyala/fastrand v1.1.0 // indirect github.com/valyala/histogram v1.2.0 // indirect diff --git a/go.sum b/go.sum index d988743f..121843f4 100644 --- a/go.sum +++ b/go.sum @@ -1141,6 +1141,8 @@ github.com/tv42/httpunix v0.0.0-20191220191345-2ba4b9c3382c h1:u6SKchux2yDvFQnDH github.com/tv42/httpunix v0.0.0-20191220191345-2ba4b9c3382c/go.mod h1:hzIxponao9Kjc7aWznkXaL4U4TWaDSs8zcsY4Ka08nM= github.com/tyler-smith/go-bip39 v1.1.0 h1:5eUemwrMargf3BSLRRCalXT93Ns6pQJIjYQN2nyfOP8= github.com/tyler-smith/go-bip39 v1.1.0/go.mod h1:gUYDtqQw1JS3ZJ8UWVcGTGqqr6YIN3CWg+kkNaLt55U= +github.com/tylertreat/BoomFilters v0.0.0-20210315201527-1a82519a3e43 h1:QEePdg0ty2r0t1+qwfZmQ4OOl/MB2UXIeJSpIZv56lg= +github.com/tylertreat/BoomFilters v0.0.0-20210315201527-1a82519a3e43/go.mod h1:OYRfF6eb5wY9VRFkXJH8FFBi3plw2v+giaIu7P054pM= github.com/ucarion/urlpath v0.0.0-20200424170820-7ccc79b76bbb h1:Ywfo8sUltxogBpFuMOFRrrSifO788kAFxmvVw31PtQQ= github.com/ucarion/urlpath v0.0.0-20200424170820-7ccc79b76bbb/go.mod h1:ikPs9bRWicNw3S7XpJ8sK/smGwU9WcSVU3dy9qahYBM= github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= diff --git a/scanner/providers/web3/erc20_provider.go b/scanner/providers/web3/erc20_provider.go index 16ceabbe..51ffb7c3 100644 --- a/scanner/providers/web3/erc20_provider.go +++ b/scanner/providers/web3/erc20_provider.go @@ -2,6 +2,7 @@ package web3 import ( "context" + "crypto/sha256" "errors" "fmt" "math/big" @@ -9,6 +10,8 @@ import ( "time" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + boom "github.com/tylertreat/BoomFilters" erc20 "github.com/vocdoni/census3/contracts/erc/erc20" "github.com/vocdoni/census3/helpers/web3" "github.com/vocdoni/census3/scanner/providers" @@ -29,6 +32,7 @@ type ERC20HolderProvider struct { creationBlock uint64 lastNetworkBlock uint64 synced atomic.Bool + filter boom.Filter } func (p *ERC20HolderProvider) Init(_ context.Context, iconf any) error { @@ -38,6 +42,7 @@ func (p *ERC20HolderProvider) Init(_ context.Context, iconf any) error { return errors.New("invalid config type, it must be Web3ProviderConfig") } p.endpoints = conf.Endpoints + p.filter = conf.filter p.synced.Store(false) // set the reference if the address and chainID are defined in the config if conf.HexAddress != "" && conf.ChainID > 0 { @@ -155,6 +160,15 @@ func (p *ERC20HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fro balances := make(map[common.Address]*big.Int) // iterate the logs and update the balances for _, currentLog := range logs { + // check if the log has been already processed + processed, err := p.isLogAlreadyProcessed(currentLog) + if err != nil { + return nil, newTransfers, lastBlock, false, big.NewInt(0), + errors.Join(ErrCheckingProcessedLogs, fmt.Errorf("[ERC20] %s: %w", p.address, err)) + } + if processed { + continue + } logData, err := p.contract.ERC20ContractFilterer.ParseTransfer(currentLog) if err != nil { return nil, newTransfers, lastBlock, false, big.NewInt(0), @@ -339,3 +353,21 @@ func (p *ERC20HolderProvider) IconURI(_ []byte) (string, error) { func (p *ERC20HolderProvider) CensusKeys(data map[common.Address]*big.Int) (map[common.Address]*big.Int, error) { return data, nil } + +// isLogAlreadyProcessed returns true if the log with the given block number and +// log index has been already processed. It uses a filter to check if the log +// has been processed. To identify the log, it creates a hash with the block +// number and log index. It returns true if the log has been already processed +// or false if it has not been processed yet. If some error occurs, it returns +// false and the error. +func (p *ERC20HolderProvider) isLogAlreadyProcessed(log types.Log) (bool, error) { + // get a identifier of each transfer: + // blockNumber-logIndex + transferID := fmt.Sprintf("%x-%d-%d", log.Data, log.BlockNumber, log.Index) + hashFn := sha256.New() + if _, err := hashFn.Write([]byte(transferID)); err != nil { + return false, err + } + hID := hashFn.Sum(nil) + return p.filter.TestAndAdd(hID), nil +} diff --git a/scanner/providers/web3/errors.go b/scanner/providers/web3/errors.go index aa64f591..b08122d3 100644 --- a/scanner/providers/web3/errors.go +++ b/scanner/providers/web3/errors.go @@ -9,5 +9,6 @@ var ( ErrScanningTokenLogs = fmt.Errorf("error scanning token logs") ErrTooManyRequests = fmt.Errorf("web3 endpoint returns too many requests") ErrParsingTokenLogs = fmt.Errorf("error parsing token logs") + ErrCheckingProcessedLogs = fmt.Errorf("error checking processed logs") ErrGettingTotalSupply = fmt.Errorf("error getting total supply") ) diff --git a/scanner/providers/web3/web3_provider.go b/scanner/providers/web3/web3_provider.go index 67111255..29350898 100644 --- a/scanner/providers/web3/web3_provider.go +++ b/scanner/providers/web3/web3_provider.go @@ -11,6 +11,7 @@ import ( "github.com/ethereum/go-ethereum" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/types" + boom "github.com/tylertreat/BoomFilters" "github.com/vocdoni/census3/helpers/web3" "github.com/vocdoni/census3/scanner/providers" "go.vocdoni.io/dvote/db" @@ -27,6 +28,7 @@ type Web3ProviderConfig struct { Web3ProviderRef Endpoints *web3.Web3Pool DB *db.Database + filter boom.Filter } // creationBlock function returns the block number of the creation of a contract diff --git a/scanner/scanner.go b/scanner/scanner.go index 415c5215..082968f2 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -316,6 +316,9 @@ func (s *Scanner) ScanHolders(ctx context.Context, token ScannerToken) ( qtx := s.db.QueriesRW.WithTx(tx) // if the provider is not an external one, instance the current token if !provider.IsExternal() { + // load filter of the token from the database + + // set the token reference in the provider if err := provider.SetRef(web3provider.Web3ProviderRef{ HexAddress: token.Address.Hex(), ChainID: token.ChainID, From 5805ce6ce43ff7bee94a712d79b26c33496ea991 Mon Sep 17 00:00:00 2001 From: Lucas Menendez Date: Mon, 3 Jun 2024 16:44:49 +0200 Subject: [PATCH 02/21] initial updater, no heavy tested and no comments yet --- db/migrations/0002_census3.sql | 8 - db/queries/tokens.sql | 13 -- go.mod | 3 +- go.sum | 2 + scanner/helpers.go | 134 ++++++++++++++++ scanner/providers/web3/erc20_provider.go | 3 +- scanner/scanner.go | 147 +++-------------- scanner/updater.go | 191 +++++++++++++++++++++++ 8 files changed, 355 insertions(+), 146 deletions(-) delete mode 100644 db/migrations/0002_census3.sql create mode 100644 scanner/helpers.go create mode 100644 scanner/updater.go diff --git a/db/migrations/0002_census3.sql b/db/migrations/0002_census3.sql deleted file mode 100644 index 55e86c5e..00000000 --- a/db/migrations/0002_census3.sql +++ /dev/null @@ -1,8 +0,0 @@ --- +goose Up -CREATE TABLE token_updates ( - id BLOB NOT NULL, - chain_id INTEGER NOT NULL DEFAULT 0, - filter_gob BLOB NOT NULL DEFAULT '', - last_block BIGINT NOT NULL DEFAULT 0, - PRIMARY KEY (id, chain_id, external_id), -); \ No newline at end of file diff --git a/db/queries/tokens.sql b/db/queries/tokens.sql index 72bc8be7..3e419f82 100644 --- a/db/queries/tokens.sql +++ b/db/queries/tokens.sql @@ -113,16 +113,3 @@ HAVING num_of_tokens = 1; -- name: DeleteToken :execresult DELETE FROM tokens WHERE id = ? AND chain_id = ? AND external_id = ?; - --- name: SetTokenUpdate :execresult -INSERT INTO token_updates (id, chain_id, filter_gob, last_block) - --- name: UpdateTokenUpdate :execresult -UPDATE token_updates -SET last_block = sqlc.arg(last_block) - AND filter_gob = sqlc.arg(filter_gob) -WHERE id = sqlc.arg(id) - AND chain_id = sqlc.arg(chain_id); - --- name: GetTokenUpdate :one -SELECT * FROM token_updates WHERE id = ? AND chain_id = ? diff --git a/go.mod b/go.mod index cd9d7600..0265776e 100644 --- a/go.mod +++ b/go.mod @@ -15,6 +15,7 @@ require ( github.com/pressly/goose/v3 v3.10.0 github.com/spf13/pflag v1.0.5 github.com/spf13/viper v1.17.0 + github.com/tylertreat/BoomFilters v0.0.0-20210315201527-1a82519a3e43 go.vocdoni.io/dvote v1.10.2-0.20240312161355-15c06c28e91c go.vocdoni.io/proto v1.15.6-0.20240209115732-27836380ccae ) @@ -53,6 +54,7 @@ require ( github.com/crackcomm/go-gitignore v0.0.0-20170627025303-887ab5e44cc3 // indirect github.com/crate-crypto/go-kzg-4844 v0.7.0 // indirect github.com/cskr/pubsub v1.0.2 // indirect + github.com/d4l3k/messagediff v1.2.1 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/davidlazar/go-crypto v0.0.0-20200604182044-b73af7476f6c // indirect github.com/deckarep/golang-set/v2 v2.1.0 // indirect @@ -249,7 +251,6 @@ require ( github.com/syndtr/goleveldb v1.0.1-0.20220614013038-64ee5596c38a // indirect github.com/tklauser/go-sysconf v0.3.12 // indirect github.com/tklauser/numcpus v0.6.1 // indirect - github.com/tylertreat/BoomFilters v0.0.0-20210315201527-1a82519a3e43 // indirect github.com/ucarion/urlpath v0.0.0-20200424170820-7ccc79b76bbb // indirect github.com/valyala/fastrand v1.1.0 // indirect github.com/valyala/histogram v1.2.0 // indirect diff --git a/go.sum b/go.sum index 121843f4..cb3e9b45 100644 --- a/go.sum +++ b/go.sum @@ -186,6 +186,8 @@ github.com/crate-crypto/go-kzg-4844 v0.7.0/go.mod h1:1kMhvPgI0Ky3yIa+9lFySEBUBXk github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/cskr/pubsub v1.0.2 h1:vlOzMhl6PFn60gRlTQQsIfVwaPB/B/8MziK8FhEPt/0= github.com/cskr/pubsub v1.0.2/go.mod h1:/8MzYXk/NJAz782G8RPkFzXTZVu63VotefPnR9TIRis= +github.com/d4l3k/messagediff v1.2.1 h1:ZcAIMYsUg0EAp9X+tt8/enBE/Q8Yd5kzPynLyKptt9U= +github.com/d4l3k/messagediff v1.2.1/go.mod h1:Oozbb1TVXFac9FtSIxHBMnBCq2qeH/2KkEQxENCrlLo= github.com/davecgh/go-spew v0.0.0-20171005155431-ecdeabc65495/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= diff --git a/scanner/helpers.go b/scanner/helpers.go new file mode 100644 index 00000000..2eea3a92 --- /dev/null +++ b/scanner/helpers.go @@ -0,0 +1,134 @@ +package scanner + +import ( + "context" + "database/sql" + "errors" + "fmt" + "math/big" + + "github.com/ethereum/go-ethereum/common" + "github.com/vocdoni/census3/db" + "github.com/vocdoni/census3/db/annotations" + queries "github.com/vocdoni/census3/db/sqlc" + "go.vocdoni.io/dvote/log" +) + +// SaveHolders saves the given holders in the database. It updates the token +// synced status if it is different from the received one. Then, it creates, +// updates or deletes the token holders in the database depending on the +// calculated balance. +// WARNING: the following code could produce holders with negative balances +// in the database. This is because the scanner does not know if the token +// holder is a contract or not, so it does not know if the balance is +// correct or not. The scanner assumes that the balance is correct and +// updates it in the database: +// 1. To get the correct holders from the database you must filter the +// holders with negative balances. +// 2. To get the correct balances you must use the contract methods to get +// the balances of the holders. +func SaveHolders(db *db.DB, ctx context.Context, token ScannerToken, + holders map[common.Address]*big.Int, newTransfers, lastBlock uint64, + synced bool, totalSupply *big.Int, +) (int, int, error) { + // create a tx to use it in the following queries + tx, err := db.RW.BeginTx(ctx, nil) + if err != nil { + return 0, 0, err + } + defer func() { + if err := tx.Rollback(); err != nil && !errors.Is(sql.ErrTxDone, err) { + log.Errorf("error rolling back tx: %v, token=%s chainID=%d externalID=%s", + err, token.Address.Hex(), token.ChainID, token.ExternalID) + } + }() + qtx := db.QueriesRW.WithTx(tx) + // create, update or delete token holders + created, updated := 0, 0 + for addr, balance := range holders { + // get the current token holder from the database + currentTokenHolder, err := qtx.GetTokenHolderEvenZero(ctx, queries.GetTokenHolderEvenZeroParams{ + TokenID: token.Address.Bytes(), + ChainID: token.ChainID, + ExternalID: token.ExternalID, + HolderID: addr.Bytes(), + }) + if err != nil { + if !errors.Is(sql.ErrNoRows, err) { + return created, updated, err + } + // if the token holder not exists, create it + _, err = qtx.CreateTokenHolder(ctx, queries.CreateTokenHolderParams{ + TokenID: token.Address.Bytes(), + ChainID: token.ChainID, + ExternalID: token.ExternalID, + HolderID: addr.Bytes(), + BlockID: lastBlock, + Balance: balance.String(), + }) + if err != nil { + return created, updated, err + } + created++ + continue + } + // parse the current balance of the holder + currentBalance, ok := new(big.Int).SetString(currentTokenHolder.Balance, 10) + if !ok { + return created, updated, fmt.Errorf("error parsing current token holder balance") + } + // if both balances are zero, continue with the next holder to prevent + // UNIQUES constraint errors + if balance.Cmp(big.NewInt(0)) == 0 && currentBalance.Cmp(big.NewInt(0)) == 0 { + continue + } + // calculate the new balance of the holder by adding the current balance + // and the new balance + newBalance := new(big.Int).Add(currentBalance, balance) + // update the token holder in the database with the new balance. + // WANING: the balance could be negative so you must filter the holders + // with negative balances to get the correct holders from the database. + _, err = qtx.UpdateTokenHolderBalance(ctx, queries.UpdateTokenHolderBalanceParams{ + TokenID: token.Address.Bytes(), + ChainID: token.ChainID, + ExternalID: token.ExternalID, + HolderID: addr.Bytes(), + BlockID: currentTokenHolder.BlockID, + NewBlockID: lastBlock, + Balance: newBalance.String(), + }) + if err != nil { + return created, updated, fmt.Errorf("error updating token holder: %w", err) + } + updated++ + } + // get the token info from the database to update ir + tokenInfo, err := qtx.GetToken(ctx, + queries.GetTokenParams{ + ID: token.Address.Bytes(), + ChainID: token.ChainID, + ExternalID: token.ExternalID, + }) + if err != nil { + return created, updated, err + } + // update the synced status, last block, the number of analysed transfers + // (for debug) and the total supply in the database + _, err = qtx.UpdateTokenStatus(ctx, queries.UpdateTokenStatusParams{ + ID: token.Address.Bytes(), + ChainID: token.ChainID, + ExternalID: token.ExternalID, + Synced: synced, + LastBlock: int64(lastBlock), + AnalysedTransfers: tokenInfo.AnalysedTransfers + int64(newTransfers), + TotalSupply: annotations.BigInt(totalSupply.String()), + }) + if err != nil { + return created, updated, err + } + // close the database tx and commit it + if err := tx.Commit(); err != nil { + return created, updated, err + } + return created, updated, nil +} diff --git a/scanner/providers/web3/erc20_provider.go b/scanner/providers/web3/erc20_provider.go index 51ffb7c3..57f715bb 100644 --- a/scanner/providers/web3/erc20_provider.go +++ b/scanner/providers/web3/erc20_provider.go @@ -156,7 +156,7 @@ func (p *ERC20HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fro log.Warnf("too many requests, the provider will continue in the next iteration from block %d", lastBlock) } // encode the number of new transfers - newTransfers := uint64(len(logs)) + newTransfers := uint64(0) balances := make(map[common.Address]*big.Int) // iterate the logs and update the balances for _, currentLog := range logs { @@ -169,6 +169,7 @@ func (p *ERC20HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fro if processed { continue } + newTransfers++ logData, err := p.contract.ERC20ContractFilterer.ParseTransfer(currentLog) if err != nil { return nil, newTransfers, lastBlock, false, big.NewInt(0), diff --git a/scanner/scanner.go b/scanner/scanner.go index 082968f2..81ccf5ac 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -14,7 +14,6 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/vocdoni/census3/db" - "github.com/vocdoni/census3/db/annotations" queries "github.com/vocdoni/census3/db/sqlc" "github.com/vocdoni/census3/helpers/web3" "github.com/vocdoni/census3/scanner/providers/manager" @@ -136,6 +135,7 @@ func (s *Scanner) Start(ctx context.Context, concurrentTokens int) { atSyncGlobal.Store(false) } // save the new token holders + s.updateInternalTokenStatus(token, lastBlock, synced, totalSupply) if err = s.SaveHolders(ctx, token, holders, newTransfers, lastBlock, synced, totalSupply); err != nil { if strings.Contains(err.Error(), "database is closed") { return @@ -396,29 +396,8 @@ func (s *Scanner) ScanHolders(ctx context.Context, token ScannerToken) ( return provider.HoldersBalances(ctx, []byte(token.ExternalID), token.LastBlock) } -// SaveHolders saves the given holders in the database. It updates the token -// synced status if it is different from the received one. Then, it creates, -// updates or deletes the token holders in the database depending on the -// calculated balance. -// WARNING: the following code could produce holders with negative balances -// in the database. This is because the scanner does not know if the token -// holder is a contract or not, so it does not know if the balance is -// correct or not. The scanner assumes that the balance is correct and -// updates it in the database: -// 1. To get the correct holders from the database you must filter the -// holders with negative balances. -// 2. To get the correct balances you must use the contract methods to get -// the balances of the holders. -func (s *Scanner) SaveHolders(ctx context.Context, token ScannerToken, - holders map[common.Address]*big.Int, newTransfers, lastBlock uint64, - synced bool, totalSupply *big.Int, -) error { - log.Infow("saving token status and holders", - "token", token.Address.Hex(), - "chainID", token.ChainID, - "externalID", token.ExternalID, - "block", lastBlock, - "holders", len(holders)) +func (s *Scanner) updateInternalTokenStatus(token ScannerToken, lastBlock uint64, + synced bool, totalSupply *big.Int) { s.tokensMtx.Lock() for i, t := range s.tokens { if t.Address == token.Address && t.ChainID == token.ChainID && t.ExternalID == token.ExternalID { @@ -432,79 +411,25 @@ func (s *Scanner) SaveHolders(ctx context.Context, token ScannerToken, } } s.tokensMtx.Unlock() +} + +// SaveHolders saves the given holders in the database. It calls the SaveHolders +// helper function to save the holders and the token status in the database. It +// prints the number of created and updated token holders if there are any, else +// it prints that there are no holders to save. If some error occurs, it returns +// the error. +func (s *Scanner) SaveHolders(ctx context.Context, token ScannerToken, + holders map[common.Address]*big.Int, newTransfers, lastBlock uint64, + synced bool, totalSupply *big.Int, +) error { + log.Infow("saving token status and holders", + "token", token.Address.Hex(), + "chainID", token.ChainID, + "externalID", token.ExternalID, + "block", lastBlock, + "holders", len(holders)) internalCtx, cancel := context.WithTimeout(ctx, SAVE_TIMEOUT) defer cancel() - // create a tx to use it in the following queries - tx, err := s.db.RW.BeginTx(internalCtx, nil) - if err != nil { - return err - } - defer func() { - if err := tx.Rollback(); err != nil && !errors.Is(sql.ErrTxDone, err) { - log.Errorf("error rolling back tx: %v, token=%s chainID=%d externalID=%s", - err, token.Address.Hex(), token.ChainID, token.ExternalID) - } - }() - qtx := s.db.QueriesRW.WithTx(tx) - // create, update or delete token holders - created, updated := 0, 0 - for addr, balance := range holders { - // get the current token holder from the database - currentTokenHolder, err := qtx.GetTokenHolderEvenZero(ctx, queries.GetTokenHolderEvenZeroParams{ - TokenID: token.Address.Bytes(), - ChainID: token.ChainID, - ExternalID: token.ExternalID, - HolderID: addr.Bytes(), - }) - if err != nil { - if !errors.Is(sql.ErrNoRows, err) { - return err - } - // if the token holder not exists, create it - _, err = qtx.CreateTokenHolder(ctx, queries.CreateTokenHolderParams{ - TokenID: token.Address.Bytes(), - ChainID: token.ChainID, - ExternalID: token.ExternalID, - HolderID: addr.Bytes(), - BlockID: lastBlock, - Balance: balance.String(), - }) - if err != nil { - return err - } - created++ - continue - } - // parse the current balance of the holder - currentBalance, ok := new(big.Int).SetString(currentTokenHolder.Balance, 10) - if !ok { - return fmt.Errorf("error parsing current token holder balance") - } - // if both balances are zero, continue with the next holder to prevent - // UNIQUES constraint errors - if balance.Cmp(big.NewInt(0)) == 0 && currentBalance.Cmp(big.NewInt(0)) == 0 { - continue - } - // calculate the new balance of the holder by adding the current balance - // and the new balance - newBalance := new(big.Int).Add(currentBalance, balance) - // update the token holder in the database with the new balance. - // WANING: the balance could be negative so you must filter the holders - // with negative balances to get the correct holders from the database. - _, err = qtx.UpdateTokenHolderBalance(ctx, queries.UpdateTokenHolderBalanceParams{ - TokenID: token.Address.Bytes(), - ChainID: token.ChainID, - ExternalID: token.ExternalID, - HolderID: addr.Bytes(), - BlockID: currentTokenHolder.BlockID, - NewBlockID: lastBlock, - Balance: newBalance.String(), - }) - if err != nil { - return fmt.Errorf("error updating token holder: %w", err) - } - updated++ - } // print the number of created and updated token holders if there are any, // else, print that there are no holders to save if len(holders) == 0 { @@ -513,6 +438,10 @@ func (s *Scanner) SaveHolders(ctx context.Context, token ScannerToken, "chainID", token.ChainID, "externalID", token.ExternalID) } else { + created, updated, err := SaveHolders(s.db, internalCtx, token, holders, newTransfers, lastBlock, synced, totalSupply) + if err != nil { + return err + } log.Debugw("committing token holders", "token", token.Address.Hex(), "chainID", token.ChainID, @@ -522,30 +451,6 @@ func (s *Scanner) SaveHolders(ctx context.Context, token ScannerToken, "created", created, "updated", updated) } - // get the token info from the database to update ir - tokenInfo, err := qtx.GetToken(internalCtx, - queries.GetTokenParams{ - ID: token.Address.Bytes(), - ChainID: token.ChainID, - ExternalID: token.ExternalID, - }) - if err != nil { - return err - } - // update the synced status, last block, the number of analysed transfers - // (for debug) and the total supply in the database - _, err = qtx.UpdateTokenStatus(internalCtx, queries.UpdateTokenStatusParams{ - ID: token.Address.Bytes(), - ChainID: token.ChainID, - ExternalID: token.ExternalID, - Synced: synced, - LastBlock: int64(lastBlock), - AnalysedTransfers: tokenInfo.AnalysedTransfers + int64(newTransfers), - TotalSupply: annotations.BigInt(token.totalSupply.String()), - }) - if err != nil { - return err - } log.Debugw("token status saved", "synced", synced, "token", token.Address.Hex(), @@ -553,10 +458,6 @@ func (s *Scanner) SaveHolders(ctx context.Context, token ScannerToken, "externalID", token.ExternalID, "totalSupply", token.totalSupply.String(), "block", lastBlock) - // close the database tx and commit it - if err := tx.Commit(); err != nil { - return err - } return nil } diff --git a/scanner/updater.go b/scanner/updater.go new file mode 100644 index 00000000..bcb44735 --- /dev/null +++ b/scanner/updater.go @@ -0,0 +1,191 @@ +package scanner + +import ( + "context" + "fmt" + "math/big" + "sync" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/vocdoni/census3/db" + queries "github.com/vocdoni/census3/db/sqlc" + "github.com/vocdoni/census3/helpers/web3" + "github.com/vocdoni/census3/scanner/providers/manager" + web3provider "github.com/vocdoni/census3/scanner/providers/web3" + "go.vocdoni.io/dvote/log" + "go.vocdoni.io/dvote/util" +) + +const ( + coolDown = 15 * time.Second + UPDATE_TIMEOUT = 5 * time.Minute +) + +type UpdateRequest struct { + Address common.Address + ChainID uint64 + Type uint64 + CreationBlock uint64 + EndBlock uint64 + LastBlock uint64 +} + +type Updater struct { + ctx context.Context + cancel context.CancelFunc + + db *db.DB + networks *web3.Web3Pool + providers *manager.ProviderManager + queue map[string]UpdateRequest + queueMtx sync.Mutex + waiter sync.WaitGroup +} + +func NewUpdater(db *db.DB, networks *web3.Web3Pool, pm *manager.ProviderManager) *Updater { + return &Updater{ + db: db, + networks: networks, + providers: pm, + queue: make(map[string]UpdateRequest), + } +} + +func (u *Updater) Start(ctx context.Context) { + u.ctx, u.cancel = context.WithCancel(ctx) + + u.waiter.Add(1) + go func() { + defer u.waiter.Done() + for { + select { + case <-u.ctx.Done(): + return + default: + if u.IsEmpty() { + time.Sleep(coolDown) + continue + } + if err := u.process(); err != nil { + log.Error("Error processing update request: %w", err) + } + } + } + }() +} + +func (u *Updater) Stop() { + u.cancel() + u.waiter.Wait() +} + +func (u *Updater) RequestStatus(id string) UpdateRequest { + u.queueMtx.Lock() + defer u.queueMtx.Unlock() + req := u.queue[id] + if req.LastBlock >= req.EndBlock { + delete(u.queue, id) + } + return u.queue[id] +} + +func (u *Updater) AddRequest(req UpdateRequest) { + if req.ChainID == 0 || req.Type == 0 || req.CreationBlock == 0 || req.EndBlock == 0 { + return + } + if req.CreationBlock >= req.EndBlock || req.LastBlock >= req.EndBlock { + return + } + id := util.RandomHex(16) + u.queueMtx.Lock() + defer u.queueMtx.Unlock() + u.queue[id] = req +} + +func (u *Updater) IsEmpty() bool { + u.queueMtx.Lock() + defer u.queueMtx.Unlock() + return len(u.queue) == 0 +} + +func (u *Updater) process() error { + // make a copy of current queue + u.queueMtx.Lock() + queue := map[string]UpdateRequest{} + for k, v := range u.queue { + queue[k] = v + } + u.queueMtx.Unlock() + // iterate over the current queue items + for id, req := range queue { + internalCtx, cancel := context.WithTimeout(u.ctx, UPDATE_TIMEOUT) + defer cancel() + // get the provider by token type + provider, err := u.providers.GetProvider(u.ctx, req.Type) + if err != nil { + return err + } + // if the token is a external token, return an error + if provider.IsExternal() { + return fmt.Errorf("external providers are not supported yet") + } + // set the reference of the token to update in the provider + if err := provider.SetRef(web3provider.Web3ProviderRef{ + HexAddress: req.Address.Hex(), + ChainID: req.ChainID, + CreationBlock: req.CreationBlock, + }); err != nil { + return err + } + // get current token holders from database + results, err := u.db.QueriesRO.ListTokenHolders(internalCtx, queries.ListTokenHoldersParams{ + TokenID: req.Address.Bytes(), + ChainID: req.ChainID, + }) + if err != nil { + return nil + } + currentHolders := map[common.Address]*big.Int{} + for _, holder := range results { + bBalance, ok := new(big.Int).SetString(holder.Balance, 10) + if !ok { + return fmt.Errorf("error parsing holder balance from database") + } + currentHolders[common.Address(holder.HolderID)] = bBalance + } + // set the current holders in the provider + if err := provider.SetLastBalances(internalCtx, nil, currentHolders, req.LastBlock); err != nil { + return err + } + // get range balances from the provider + rangeBalances, newTransfers, lastBlock, synced, totalSupply, err := provider.HoldersBalances(internalCtx, nil, req.EndBlock) + if err != nil { + return err + } + // update the token last + if synced { + req.LastBlock = req.EndBlock + } else { + req.LastBlock = lastBlock + } + // save the new balances in the database + created, updated, err := SaveHolders(u.db, internalCtx, ScannerToken{ + Address: req.Address, + ChainID: req.ChainID, + }, rangeBalances, newTransfers, lastBlock, synced, totalSupply) + if err != nil { + return err + } + log.Debugw("missing token holders balances updated", + "token", req.Address.Hex(), + "chainID", req.ChainID, + "created", created, + "updated", updated) + // update the request in the queue + u.queueMtx.Lock() + u.queue[id] = req + u.queueMtx.Unlock() + } + return nil +} From e2dabf51d2becf1b4c9729074b8c9d0dc5ef6b98 Mon Sep 17 00:00:00 2001 From: Lucas Menendez Date: Tue, 4 Jun 2024 10:32:24 +0200 Subject: [PATCH 03/21] new filter abstraction to load and save them in filesystem --- cmd/census3/main.go | 5 +- scanner/filter.go | 65 ++++++++++++++++++++++++ scanner/providers/web3/erc20_provider.go | 7 ++- scanner/providers/web3/web3_provider.go | 2 +- scanner/scanner.go | 18 ++++++- scanner/updater.go | 4 +- 6 files changed, 95 insertions(+), 6 deletions(-) create mode 100644 scanner/filter.go diff --git a/cmd/census3/main.go b/cmd/census3/main.go index fa89e72c..b1741f56 100644 --- a/cmd/census3/main.go +++ b/cmd/census3/main.go @@ -39,6 +39,7 @@ type Census3Config struct { adminToken string initialTokens string farcaster bool + filtersPath string } func main() { @@ -135,6 +136,8 @@ func main() { panic(err) } config.farcaster = pviper.GetBool("farcaster") + // set the filters path into the config + config.filtersPath = config.dataDir + "/filters" // init logger log.Init(config.logLevel, "stdout", nil) // check if the web3 providers are defined @@ -195,7 +198,7 @@ func main() { }) } // start the holder scanner with the database and the provider manager - hc := scanner.NewScanner(database, w3p, pm, config.scannerCoolDown) + hc := scanner.NewScanner(database, w3p, pm, config.scannerCoolDown, config.filtersPath) // if the admin token is not defined, generate a random one if config.adminToken != "" { if _, err := uuid.Parse(config.adminToken); err != nil { diff --git a/scanner/filter.go b/scanner/filter.go new file mode 100644 index 00000000..4b3acfcd --- /dev/null +++ b/scanner/filter.go @@ -0,0 +1,65 @@ +package scanner + +import ( + "fmt" + "os" + + "github.com/ethereum/go-ethereum/common" + boom "github.com/tylertreat/BoomFilters" +) + +type TokenFilter struct { + filter *boom.ScalableBloomFilter + address common.Address + chainID uint64 + path string +} + +func LoadFilter(basePath string, address common.Address, chainID uint64) (*TokenFilter, error) { + // compose the filter path: path/
-.filter + // by default, create a empty filter + tf := &TokenFilter{ + filter: boom.NewDefaultScalableBloomFilter(0.01), + address: address, + chainID: chainID, + path: fmt.Sprintf("%s/%s-%d.filter", basePath, address.Hex(), chainID), + } + // read the filter from the file, if it not exists, create a new one + bFilter, err := os.ReadFile(tf.path) + if err != nil { + if !os.IsNotExist(err) { + return nil, err + } + return tf, nil + } + // decode the filter from the file content + if err := tf.filter.GobDecode(bFilter); err != nil { + return nil, err + } + return tf, nil +} + +func (tf *TokenFilter) Add(key []byte) { + tf.filter.Add(key) +} + +func (tf *TokenFilter) Test(key []byte) bool { + return tf.filter.Test(key) +} + +func (tf *TokenFilter) TestAndAdd(key []byte) bool { + return tf.filter.TestAndAdd(key) +} + +func (tf *TokenFilter) Commit() error { + // encode the filter + bFilter, err := tf.filter.GobEncode() + if err != nil { + return err + } + // write the filter to the file + if err := os.WriteFile(tf.path, bFilter, 0644); err != nil { + return err + } + return nil +} diff --git a/scanner/providers/web3/erc20_provider.go b/scanner/providers/web3/erc20_provider.go index 57f715bb..13188f12 100644 --- a/scanner/providers/web3/erc20_provider.go +++ b/scanner/providers/web3/erc20_provider.go @@ -42,7 +42,6 @@ func (p *ERC20HolderProvider) Init(_ context.Context, iconf any) error { return errors.New("invalid config type, it must be Web3ProviderConfig") } p.endpoints = conf.Endpoints - p.filter = conf.filter p.synced.Store(false) // set the reference if the address and chainID are defined in the config if conf.HexAddress != "" && conf.ChainID > 0 { @@ -70,6 +69,8 @@ func (p *ERC20HolderProvider) SetRef(iref any) error { if err != nil { return fmt.Errorf("error getting web3 client for the given chainID: %w", err) } + // set the filter provided in the reference + p.filter = ref.Filter // set the client, parse the address and initialize the contract p.address = common.HexToAddress(ref.HexAddress) if p.contract, err = erc20.NewERC20Contract(p.address, p.client); err != nil { @@ -362,6 +363,10 @@ func (p *ERC20HolderProvider) CensusKeys(data map[common.Address]*big.Int) (map[ // or false if it has not been processed yet. If some error occurs, it returns // false and the error. func (p *ERC20HolderProvider) isLogAlreadyProcessed(log types.Log) (bool, error) { + // if the filter is not defined, return false + if p.filter == nil { + return false, nil + } // get a identifier of each transfer: // blockNumber-logIndex transferID := fmt.Sprintf("%x-%d-%d", log.Data, log.BlockNumber, log.Index) diff --git a/scanner/providers/web3/web3_provider.go b/scanner/providers/web3/web3_provider.go index 29350898..b8503b5d 100644 --- a/scanner/providers/web3/web3_provider.go +++ b/scanner/providers/web3/web3_provider.go @@ -22,13 +22,13 @@ type Web3ProviderRef struct { HexAddress string ChainID uint64 CreationBlock uint64 + Filter boom.Filter } type Web3ProviderConfig struct { Web3ProviderRef Endpoints *web3.Web3Pool DB *db.Database - filter boom.Filter } // creationBlock function returns the block number of the creation of a contract diff --git a/scanner/scanner.go b/scanner/scanner.go index 81ccf5ac..cb999136 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -46,6 +46,7 @@ type Scanner struct { networks *web3.Web3Pool providerManager *manager.ProviderManager coolDown time.Duration + filtersPath string tokens []*ScannerToken tokensMtx sync.Mutex @@ -56,12 +57,15 @@ type Scanner struct { // NewScanner returns a new scanner instance with the required parameters // initialized. -func NewScanner(db *db.DB, networks *web3.Web3Pool, pm *manager.ProviderManager, coolDown time.Duration) *Scanner { +func NewScanner(db *db.DB, networks *web3.Web3Pool, pm *manager.ProviderManager, + coolDown time.Duration, filtersPath string, +) *Scanner { return &Scanner{ db: db, networks: networks, providerManager: pm, coolDown: coolDown, + filtersPath: filtersPath, tokens: []*ScannerToken{}, tokensMtx: sync.Mutex{}, waiter: sync.WaitGroup{}, @@ -317,12 +321,22 @@ func (s *Scanner) ScanHolders(ctx context.Context, token ScannerToken) ( // if the provider is not an external one, instance the current token if !provider.IsExternal() { // load filter of the token from the database - + filter, err := LoadFilter(s.filtersPath, token.Address, token.ChainID) + if err != nil { + return nil, 0, token.LastBlock, token.Synced, nil, err + } + // commit the filter when the function finishes + defer func() { + if err := filter.Commit(); err != nil { + log.Error(err) + } + }() // set the token reference in the provider if err := provider.SetRef(web3provider.Web3ProviderRef{ HexAddress: token.Address.Hex(), ChainID: token.ChainID, CreationBlock: token.CreationBlock, + Filter: filter.filter, }); err != nil { return nil, 0, token.LastBlock, token.Synced, nil, err } diff --git a/scanner/updater.go b/scanner/updater.go index bcb44735..0aca98c4 100644 --- a/scanner/updater.go +++ b/scanner/updater.go @@ -158,7 +158,9 @@ func (u *Updater) process() error { if err := provider.SetLastBalances(internalCtx, nil, currentHolders, req.LastBlock); err != nil { return err } - // get range balances from the provider + // get range balances from the provider, it will check itereate again + // over transfers logs, checking if there are new transfers using the + // bloom filter associated to the token rangeBalances, newTransfers, lastBlock, synced, totalSupply, err := provider.HoldersBalances(internalCtx, nil, req.EndBlock) if err != nil { return err From ff2d01ddefaf2115252b18385b4181b811639373 Mon Sep 17 00:00:00 2001 From: Lucas Menendez Date: Tue, 4 Jun 2024 12:52:22 +0200 Subject: [PATCH 04/21] new endpoints to rescan a token --- api/api.go | 7 ++++ api/errors.go | 10 +++++ api/tokens.go | 89 +++++++++++++++++++++++++++++++++++++++++++++ api/types.go | 10 +++++ cmd/census3/main.go | 3 ++ scanner/updater.go | 23 ++++++++---- 6 files changed, 134 insertions(+), 8 deletions(-) diff --git a/api/api.go b/api/api.go index 0e98cfce..f9f402f7 100644 --- a/api/api.go +++ b/api/api.go @@ -18,6 +18,7 @@ import ( queries "github.com/vocdoni/census3/db/sqlc" "github.com/vocdoni/census3/helpers/queue" "github.com/vocdoni/census3/helpers/web3" + "github.com/vocdoni/census3/scanner" "github.com/vocdoni/census3/scanner/providers" web3provider "github.com/vocdoni/census3/scanner/providers/web3" "go.vocdoni.io/dvote/api/censusdb" @@ -38,8 +39,10 @@ type Census3APIConf struct { Hostname string Port int DataDir string + FiltersPath string GroupKey string Web3Providers *web3.Web3Pool + TokenUpdater *scanner.Updater HolderProviders map[uint64]providers.HolderProvider AdminToken string } @@ -56,6 +59,8 @@ type census3API struct { holderProviders map[uint64]providers.HolderProvider cache *lru.Cache[CacheKey, any] router *httprouter.HTTProuter + tokenUpdater *scanner.Updater + filtersPath string } func Init(db *db.DB, conf Census3APIConf) (*census3API, error) { @@ -69,8 +74,10 @@ func Init(db *db.DB, conf Census3APIConf) (*census3API, error) { w3p: conf.Web3Providers, queue: queue.NewBackgroundQueue(), holderProviders: conf.HolderProviders, + tokenUpdater: conf.TokenUpdater, cache: cache, router: &httprouter.HTTProuter{}, + filtersPath: conf.FiltersPath, } // get the current chainID log.Infow("starting API", "web3Providers", conf.Web3Providers.String()) diff --git a/api/errors.go b/api/errors.go index 9d9eb5f7..78fd15d9 100644 --- a/api/errors.go +++ b/api/errors.go @@ -128,6 +128,16 @@ var ( HTTPstatus: apirest.HTTPstatusNotFound, Err: fmt.Errorf("token holder not found for the token provided"), } + ErrNoSyncedToken = apirest.APIerror{ + Code: 4024, + HTTPstatus: apirest.HTTPstatusBadRequest, + Err: fmt.Errorf("token is not synced yet"), + } + ErrMalformedRescanQueueID = apirest.APIerror{ + Code: 4025, + HTTPstatus: apirest.HTTPstatusBadRequest, + Err: fmt.Errorf("malformed queue ID"), + } ErrCantCreateToken = apirest.APIerror{ Code: 5000, HTTPstatus: apirest.HTTPstatusInternalErr, diff --git a/api/tokens.go b/api/tokens.go index b0c82233..023127d4 100644 --- a/api/tokens.go +++ b/api/tokens.go @@ -17,6 +17,7 @@ import ( queries "github.com/vocdoni/census3/db/sqlc" "github.com/vocdoni/census3/helpers/lexer" "github.com/vocdoni/census3/metrics" + "github.com/vocdoni/census3/scanner" "github.com/vocdoni/census3/scanner/providers" "github.com/vocdoni/census3/scanner/providers/web3" "go.vocdoni.io/dvote/httprouter" @@ -37,6 +38,14 @@ func (capi *census3API) initTokenHandlers() error { api.MethodAccessTypePublic, capi.getToken); err != nil { return err } + if err := capi.endpoint.RegisterMethod("/tokens/rescan/{tokenID}", "POST", + api.MethodAccessTypeAdmin, capi.rescanToken); err != nil { + return err + } + if err := capi.endpoint.RegisterMethod("/tokens/rescan/queue/{queueId}", "GET", + api.MethodAccessTypeAdmin, capi.checkRescanToken); err != nil { + return err + } if err := capi.endpoint.RegisterMethod("/tokens/{tokenID}", "DELETE", api.MethodAccessTypeAdmin, capi.launchDeleteToken); err != nil { return err @@ -600,6 +609,86 @@ func (capi *census3API) getToken(msg *api.APIdata, ctx *httprouter.HTTPContext) return ctx.Send(res, api.HTTPstatusOK) } +func (capi *census3API) rescanToken(msg *api.APIdata, ctx *httprouter.HTTPContext) error { + // get contract address from the tokenID query param and decode check if + // it is provided, if not return an error + strAddress := ctx.URLParam("tokenID") + if strAddress == "" { + return ErrMalformedToken.With("tokenID is required") + } + address := common.HexToAddress(strAddress) + // get chainID from query params and decode it as integer, if it's not + // provided or it's not a valid integer return an error + strChainID := ctx.Request.URL.Query().Get("chainID") + if strChainID == "" { + return ErrMalformedChainID.With("chainID is required") + } + chainID, err := strconv.Atoi(strChainID) + if err != nil { + return ErrMalformedChainID.WithErr(err) + } else if chainID < 0 { + return ErrMalformedChainID.With("chainID must be a positive number") + } + // get token information from the database + internalCtx, cancel := context.WithTimeout(ctx.Request.Context(), getTokenTimeout) + defer cancel() + tokenData, err := capi.db.QueriesRO.GetToken(internalCtx, + queries.GetTokenParams{ + ID: address.Bytes(), + ChainID: uint64(chainID), + }) + if err != nil { + if errors.Is(err, sql.ErrNoRows) { + return ErrNotFoundToken.WithErr(err) + } + return ErrCantGetToken.WithErr(err) + } + // only the tokens that are already synced can be rescanned + if !tokenData.Synced { + return ErrNoSyncedToken + } + // enqueue the rescan token process + id, err := capi.tokenUpdater.AddRequest(scanner.UpdateRequest{ + Address: address, + ChainID: uint64(chainID), + Type: tokenData.TypeID, + CreationBlock: uint64(tokenData.CreationBlock), + LastBlock: uint64(tokenData.CreationBlock), + EndBlock: uint64(tokenData.LastBlock), + }) + if err != nil { + return ErrMalformedToken.WithErr(err) + } + // encoding the result and response it + res, err := json.Marshal(QueueResponse{id}) + if err != nil { + return ErrEncodeQueueItem.WithErr(err) + } + return ctx.Send(res, api.HTTPstatusOK) +} + +func (capi *census3API) checkRescanToken(msg *api.APIdata, ctx *httprouter.HTTPContext) error { + queueID := ctx.URLParam("queueID") + if queueID == "" { + return ErrMalformedRescanQueueID + } + // get the rescan status from the updater + status, err := capi.tokenUpdater.RequestStatus(queueID) + if err != nil { + return ErrNotFoundToken.Withf("the ID %s does not exist in the queue", queueID) + } + // encoding the result and response it + response, err := json.Marshal(RescanTokenStatus{ + Address: status.Address.String(), + ChainID: status.ChainID, + Done: status.Done(), + }) + if err != nil { + return ErrEncodeQueueItem.WithErr(err) + } + return ctx.Send(response, api.HTTPstatusOK) +} + func (capi *census3API) getTokenHolder(msg *api.APIdata, ctx *httprouter.HTTPContext) error { // get contract address from the tokenID query param and decode check if // it is provided, if not return an error diff --git a/api/types.go b/api/types.go index 39995869..652750f9 100644 --- a/api/types.go +++ b/api/types.go @@ -162,3 +162,13 @@ type DeleteTokenQueueResponse struct { Done bool `json:"done"` Error error `json:"error"` } + +type RescanTokenResponse struct { + ID string `json:"ID"` +} + +type RescanTokenStatus struct { + Address string `json:"address"` + ChainID uint64 `json:"chainID"` + Done bool `json:"done"` +} diff --git a/cmd/census3/main.go b/cmd/census3/main.go index b1741f56..c1c8c5e3 100644 --- a/cmd/census3/main.go +++ b/cmd/census3/main.go @@ -199,6 +199,8 @@ func main() { } // start the holder scanner with the database and the provider manager hc := scanner.NewScanner(database, w3p, pm, config.scannerCoolDown, config.filtersPath) + // start the token updater with the database and the provider manager + updater := scanner.NewUpdater(database, w3p, pm) // if the admin token is not defined, generate a random one if config.adminToken != "" { if _, err := uuid.Parse(config.adminToken); err != nil { @@ -219,6 +221,7 @@ func main() { GroupKey: config.connectKey, HolderProviders: pm.Providers(ctx), AdminToken: config.adminToken, + TokenUpdater: updater, }) if err != nil { log.Fatal(err) diff --git a/scanner/updater.go b/scanner/updater.go index 0aca98c4..eda8f4a0 100644 --- a/scanner/updater.go +++ b/scanner/updater.go @@ -31,6 +31,10 @@ type UpdateRequest struct { LastBlock uint64 } +func (ur UpdateRequest) Done() bool { + return ur.LastBlock >= ur.EndBlock +} + type Updater struct { ctx context.Context cancel context.CancelFunc @@ -54,7 +58,6 @@ func NewUpdater(db *db.DB, networks *web3.Web3Pool, pm *manager.ProviderManager) func (u *Updater) Start(ctx context.Context) { u.ctx, u.cancel = context.WithCancel(ctx) - u.waiter.Add(1) go func() { defer u.waiter.Done() @@ -80,27 +83,31 @@ func (u *Updater) Stop() { u.waiter.Wait() } -func (u *Updater) RequestStatus(id string) UpdateRequest { +func (u *Updater) RequestStatus(id string) (UpdateRequest, error) { u.queueMtx.Lock() defer u.queueMtx.Unlock() - req := u.queue[id] - if req.LastBlock >= req.EndBlock { + req, ok := u.queue[id] + if !ok { + return UpdateRequest{}, fmt.Errorf("request not found") + } + if req.Done() { delete(u.queue, id) } - return u.queue[id] + return u.queue[id], nil } -func (u *Updater) AddRequest(req UpdateRequest) { +func (u *Updater) AddRequest(req UpdateRequest) (string, error) { if req.ChainID == 0 || req.Type == 0 || req.CreationBlock == 0 || req.EndBlock == 0 { - return + return "", fmt.Errorf("missing required fields") } if req.CreationBlock >= req.EndBlock || req.LastBlock >= req.EndBlock { - return + return "", fmt.Errorf("invalid block range") } id := util.RandomHex(16) u.queueMtx.Lock() defer u.queueMtx.Unlock() u.queue[id] = req + return id, nil } func (u *Updater) IsEmpty() bool { From f320e45c812b82eabe4a23c9528981df62a30f29 Mon Sep 17 00:00:00 2001 From: Lucas Menendez Date: Tue, 4 Jun 2024 15:18:45 +0200 Subject: [PATCH 05/21] include comments --- .golangci.yml | 4 +-- scanner/const.go | 8 +++-- scanner/filter.go | 19 ++++++++++- scanner/providers/web3/erc721_provider.go | 40 ++++++++++++++++++++++- scanner/providers/web3/erc777_provider.go | 40 ++++++++++++++++++++++- scanner/scanner.go | 3 +- scanner/updater.go | 37 ++++++++++++++++++--- 7 files changed, 137 insertions(+), 14 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index c3746c33..9a527b23 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -1,10 +1,10 @@ run: go: '1.20' - skip-files: - - scanner/providers/gitcoin/gitcoin_stamps.go issues: max-same-issues: 0 exclude-use-default: false + exclude-files: + - scanner/providers/gitcoin/gitcoin_stamps.go linters: enable: - misspell diff --git a/scanner/const.go b/scanner/const.go index 0c46989e..49eed770 100644 --- a/scanner/const.go +++ b/scanner/const.go @@ -3,12 +3,14 @@ package scanner import "time" const ( - READ_TIMEOUT = time.Minute - SCAN_TIMEOUT = 5 * time.Minute - SAVE_TIMEOUT = 5 * time.Minute + READ_TIMEOUT = time.Minute + SCAN_TIMEOUT = 5 * time.Minute + SAVE_TIMEOUT = 5 * time.Minute + UPDATE_TIMEOUT = 5 * time.Minute ) const ( + coolDown = 15 * time.Second // time to wait between updates scanSleepTime = time.Second * 20 // time to sleep between scans scanSleepTimeOnceSync = time.Second * 120 // time to sleep between scans, once all the tokens are synced blockNumbersCooldown = 5 * time.Minute // time to wait to update latest block numbers of every supported networkd diff --git a/scanner/filter.go b/scanner/filter.go index 4b3acfcd..62e7c605 100644 --- a/scanner/filter.go +++ b/scanner/filter.go @@ -1,5 +1,10 @@ package scanner +// The filter package provides a wrapper of boom.ScalableBloomFilter to store +// the filter to a file and load it from it. The filter is used to store the +// processed transactions to avoid re-processing them, but also rescanning a +// synced token to find missing transactions. + import ( "fmt" "os" @@ -8,6 +13,10 @@ import ( boom "github.com/tylertreat/BoomFilters" ) +// TokenFilter is a wrapper of boom.ScalableBloomFilter to store the filter to +// a file and load it from it. The file that stores the filter is named as +//
-.filter, where address is the token contract address and +// chainID is the chain ID of the network where the token is deployed. type TokenFilter struct { filter *boom.ScalableBloomFilter address common.Address @@ -15,6 +24,9 @@ type TokenFilter struct { path string } +// LoadFilter loads the filter from the file, if the file does not exist, create +// a new filter and return it. The filter is stored in the file named as +//
-.filter in the basePath directory. func LoadFilter(basePath string, address common.Address, chainID uint64) (*TokenFilter, error) { // compose the filter path: path/
-.filter // by default, create a empty filter @@ -39,18 +51,23 @@ func LoadFilter(basePath string, address common.Address, chainID uint64) (*Token return tf, nil } +// Add adds a key to the filter. func (tf *TokenFilter) Add(key []byte) { tf.filter.Add(key) } +// Test checks if a key is in the filter. func (tf *TokenFilter) Test(key []byte) bool { return tf.filter.Test(key) } +// TestAndAdd checks if a key is in the filter, if not, add it to the filter. It +// is the combination of Test and conditional Add. func (tf *TokenFilter) TestAndAdd(key []byte) bool { return tf.filter.TestAndAdd(key) } +// Commit writes the filter to its file. func (tf *TokenFilter) Commit() error { // encode the filter bFilter, err := tf.filter.GobEncode() @@ -58,7 +75,7 @@ func (tf *TokenFilter) Commit() error { return err } // write the filter to the file - if err := os.WriteFile(tf.path, bFilter, 0644); err != nil { + if err := os.WriteFile(tf.path, bFilter, 0o644); err != nil { return err } return nil diff --git a/scanner/providers/web3/erc721_provider.go b/scanner/providers/web3/erc721_provider.go index f83112a8..de347bd6 100644 --- a/scanner/providers/web3/erc721_provider.go +++ b/scanner/providers/web3/erc721_provider.go @@ -2,6 +2,7 @@ package web3 import ( "context" + "crypto/sha256" "errors" "fmt" "math/big" @@ -9,6 +10,8 @@ import ( "time" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + boom "github.com/tylertreat/BoomFilters" erc721 "github.com/vocdoni/census3/contracts/erc/erc721" "github.com/vocdoni/census3/helpers/web3" "github.com/vocdoni/census3/scanner/providers" @@ -29,6 +32,7 @@ type ERC721HolderProvider struct { creationBlock uint64 lastNetworkBlock uint64 synced atomic.Bool + filter boom.Filter } func (p *ERC721HolderProvider) Init(_ context.Context, iconf any) error { @@ -65,6 +69,8 @@ func (p *ERC721HolderProvider) SetRef(iref any) error { if err != nil { return fmt.Errorf("error getting web3 client for the given chainID: %w", err) } + // set the filter provided in the reference + p.filter = ref.Filter // set the client, parse the address and initialize the contract address := common.HexToAddress(ref.HexAddress) if p.contract, err = erc721.NewERC721Contract(address, p.client); err != nil { @@ -149,10 +155,20 @@ func (p *ERC721HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fr log.Warnf("too many requests, the provider will continue in the next iteration from block %d", lastBlock) } // encode the number of new transfers - newTransfers := uint64(len(logs)) + newTransfers := uint64(0) balances := make(map[common.Address]*big.Int) // iterate the logs and update the balances for _, currentLog := range logs { + // check if the log has been already processed + processed, err := p.isLogAlreadyProcessed(currentLog) + if err != nil { + return nil, newTransfers, lastBlock, false, big.NewInt(0), + errors.Join(ErrCheckingProcessedLogs, fmt.Errorf("[ERC20] %s: %w", p.address, err)) + } + if processed { + continue + } + newTransfers++ logData, err := p.contract.ERC721ContractFilterer.ParseTransfer(currentLog) if err != nil { return nil, newTransfers, lastBlock, false, nil, fmt.Errorf("[ERC721] %w: %s: %w", ErrParsingTokenLogs, p.address.Hex(), err) @@ -329,3 +345,25 @@ func (p *ERC721HolderProvider) IconURI(_ []byte) (string, error) { func (p *ERC721HolderProvider) CensusKeys(data map[common.Address]*big.Int) (map[common.Address]*big.Int, error) { return data, nil } + +// isLogAlreadyProcessed returns true if the log with the given block number and +// log index has been already processed. It uses a filter to check if the log +// has been processed. To identify the log, it creates a hash with the block +// number and log index. It returns true if the log has been already processed +// or false if it has not been processed yet. If some error occurs, it returns +// false and the error. +func (p *ERC721HolderProvider) isLogAlreadyProcessed(log types.Log) (bool, error) { + // if the filter is not defined, return false + if p.filter == nil { + return false, nil + } + // get a identifier of each transfer: + // blockNumber-logIndex + transferID := fmt.Sprintf("%x-%d-%d", log.Data, log.BlockNumber, log.Index) + hashFn := sha256.New() + if _, err := hashFn.Write([]byte(transferID)); err != nil { + return false, err + } + hID := hashFn.Sum(nil) + return p.filter.TestAndAdd(hID), nil +} diff --git a/scanner/providers/web3/erc777_provider.go b/scanner/providers/web3/erc777_provider.go index b5b3fd6f..b9a130f9 100644 --- a/scanner/providers/web3/erc777_provider.go +++ b/scanner/providers/web3/erc777_provider.go @@ -2,6 +2,7 @@ package web3 import ( "context" + "crypto/sha256" "errors" "fmt" "math/big" @@ -9,6 +10,8 @@ import ( "time" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + boom "github.com/tylertreat/BoomFilters" erc777 "github.com/vocdoni/census3/contracts/erc/erc777" "github.com/vocdoni/census3/helpers/web3" "github.com/vocdoni/census3/scanner/providers" @@ -29,6 +32,7 @@ type ERC777HolderProvider struct { creationBlock uint64 lastNetworkBlock uint64 synced atomic.Bool + filter boom.Filter } func (p *ERC777HolderProvider) Init(_ context.Context, iconf any) error { @@ -65,6 +69,8 @@ func (p *ERC777HolderProvider) SetRef(iref any) error { if err != nil { return fmt.Errorf("error getting web3 client for the given chainID: %w", err) } + // set the filter provided in the reference + p.filter = ref.Filter // set the client, parse the address and initialize the contract address := common.HexToAddress(ref.HexAddress) if p.contract, err = erc777.NewERC777Contract(address, p.client); err != nil { @@ -149,10 +155,20 @@ func (p *ERC777HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fr log.Warnf("too many requests, the provider will continue in the next iteration from block %d", lastBlock) } // encode the number of new transfers - newTransfers := uint64(len(logs)) + newTransfers := uint64(0) balances := make(map[common.Address]*big.Int) // iterate the logs and update the balances for _, currentLog := range logs { + // check if the log has been already processed + processed, err := p.isLogAlreadyProcessed(currentLog) + if err != nil { + return nil, newTransfers, lastBlock, false, big.NewInt(0), + errors.Join(ErrCheckingProcessedLogs, fmt.Errorf("[ERC20] %s: %w", p.address, err)) + } + if processed { + continue + } + newTransfers++ logData, err := p.contract.ERC777ContractFilterer.ParseTransfer(currentLog) if err != nil { return nil, newTransfers, lastBlock, false, nil, @@ -329,3 +345,25 @@ func (p *ERC777HolderProvider) IconURI(_ []byte) (string, error) { func (p *ERC777HolderProvider) CensusKeys(data map[common.Address]*big.Int) (map[common.Address]*big.Int, error) { return data, nil } + +// isLogAlreadyProcessed returns true if the log with the given block number and +// log index has been already processed. It uses a filter to check if the log +// has been processed. To identify the log, it creates a hash with the block +// number and log index. It returns true if the log has been already processed +// or false if it has not been processed yet. If some error occurs, it returns +// false and the error. +func (p *ERC777HolderProvider) isLogAlreadyProcessed(log types.Log) (bool, error) { + // if the filter is not defined, return false + if p.filter == nil { + return false, nil + } + // get a identifier of each transfer: + // blockNumber-logIndex + transferID := fmt.Sprintf("%x-%d-%d", log.Data, log.BlockNumber, log.Index) + hashFn := sha256.New() + if _, err := hashFn.Write([]byte(transferID)); err != nil { + return false, err + } + hID := hashFn.Sum(nil) + return p.filter.TestAndAdd(hID), nil +} diff --git a/scanner/scanner.go b/scanner/scanner.go index cb999136..5f3ebef1 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -411,7 +411,8 @@ func (s *Scanner) ScanHolders(ctx context.Context, token ScannerToken) ( } func (s *Scanner) updateInternalTokenStatus(token ScannerToken, lastBlock uint64, - synced bool, totalSupply *big.Int) { + synced bool, totalSupply *big.Int, +) { s.tokensMtx.Lock() for i, t := range s.tokens { if t.Address == token.Address && t.ChainID == token.ChainID && t.ExternalID == token.ExternalID { diff --git a/scanner/updater.go b/scanner/updater.go index eda8f4a0..8fb7e1ed 100644 --- a/scanner/updater.go +++ b/scanner/updater.go @@ -17,11 +17,8 @@ import ( "go.vocdoni.io/dvote/util" ) -const ( - coolDown = 15 * time.Second - UPDATE_TIMEOUT = 5 * time.Minute -) - +// UpdateRequest is a struct to request a token update but also to query about +// the status of a request that is being processed. type UpdateRequest struct { Address common.Address ChainID uint64 @@ -31,10 +28,20 @@ type UpdateRequest struct { LastBlock uint64 } +// Done returns true if the request is done, that is, the last block is greater +// or equal to the end block. func (ur UpdateRequest) Done() bool { return ur.LastBlock >= ur.EndBlock } +// Updater is a struct to manage the update requests of the tokens. It will +// iterate over the requests, repeating the process of getting the token holders +// balances and saving them in the database until the last block is greater or +// equal to the end block. The end block is the block number where the token +// holders balances are up to date. The holders providers must include an +// instance of a TokenFilter to store the processed transactions to avoid +// re-processing them, but also rescanning a synced token to find missing +// transactions. type Updater struct { ctx context.Context cancel context.CancelFunc @@ -47,6 +54,7 @@ type Updater struct { waiter sync.WaitGroup } +// NewUpdater creates a new instance of Updater. func NewUpdater(db *db.DB, networks *web3.Web3Pool, pm *manager.ProviderManager) *Updater { return &Updater{ db: db, @@ -56,6 +64,7 @@ func NewUpdater(db *db.DB, networks *web3.Web3Pool, pm *manager.ProviderManager) } } +// Start starts the updater process in a goroutine. func (u *Updater) Start(ctx context.Context) { u.ctx, u.cancel = context.WithCancel(ctx) u.waiter.Add(1) @@ -78,11 +87,15 @@ func (u *Updater) Start(ctx context.Context) { }() } +// Stop stops the updater process. func (u *Updater) Stop() { u.cancel() u.waiter.Wait() } +// RequestStatus returns the status of a request by its ID. If the request is +// done, it will be removed from the queue. If the request is not found, it will +// return an error. func (u *Updater) RequestStatus(id string) (UpdateRequest, error) { u.queueMtx.Lock() defer u.queueMtx.Unlock() @@ -96,6 +109,10 @@ func (u *Updater) RequestStatus(id string) (UpdateRequest, error) { return u.queue[id], nil } +// AddRequest adds a new request to the queue. It will return an error if the +// request is missing required fields or the block range is invalid. The request +// will be added to the queue with a random ID, that will be returned to allow +// the client to query the status of the request. func (u *Updater) AddRequest(req UpdateRequest) (string, error) { if req.ChainID == 0 || req.Type == 0 || req.CreationBlock == 0 || req.EndBlock == 0 { return "", fmt.Errorf("missing required fields") @@ -110,12 +127,18 @@ func (u *Updater) AddRequest(req UpdateRequest) (string, error) { return id, nil } +// IsEmpty returns true if the queue is empty. func (u *Updater) IsEmpty() bool { u.queueMtx.Lock() defer u.queueMtx.Unlock() return len(u.queue) == 0 } +// process iterates over the current queue items, getting the token holders +// balances and saving them in the database until the last block is greater or +// equal to the end block. It updates th status of the request in the queue. It +// will return an error if the provider is not found, the token is external or +// there is an error getting the token holders balances. func (u *Updater) process() error { // make a copy of current queue u.queueMtx.Lock() @@ -126,6 +149,10 @@ func (u *Updater) process() error { u.queueMtx.Unlock() // iterate over the current queue items for id, req := range queue { + // check if the request is done + if req.Done() { + continue + } internalCtx, cancel := context.WithTimeout(u.ctx, UPDATE_TIMEOUT) defer cancel() // get the provider by token type From 54b6519737ec651324e9d42edaf0d05b4318d198 Mon Sep 17 00:00:00 2001 From: Lucas Menendez Date: Wed, 5 Jun 2024 11:32:35 +0200 Subject: [PATCH 06/21] last fixes about filters --- api/tokens.go | 8 +-- cmd/census3/main.go | 10 +++- scanner/filter.go | 6 +- scanner/providers/web3/erc20_provider.go | 4 +- scanner/scanner.go | 3 +- scanner/updater.go | 76 ++++++++++++++---------- 6 files changed, 65 insertions(+), 42 deletions(-) diff --git a/api/tokens.go b/api/tokens.go index 023127d4..5a6b4005 100644 --- a/api/tokens.go +++ b/api/tokens.go @@ -42,7 +42,7 @@ func (capi *census3API) initTokenHandlers() error { api.MethodAccessTypeAdmin, capi.rescanToken); err != nil { return err } - if err := capi.endpoint.RegisterMethod("/tokens/rescan/queue/{queueId}", "GET", + if err := capi.endpoint.RegisterMethod("/tokens/rescan/queue/{queueID}", "GET", api.MethodAccessTypeAdmin, capi.checkRescanToken); err != nil { return err } @@ -648,7 +648,7 @@ func (capi *census3API) rescanToken(msg *api.APIdata, ctx *httprouter.HTTPContex return ErrNoSyncedToken } // enqueue the rescan token process - id, err := capi.tokenUpdater.AddRequest(scanner.UpdateRequest{ + id, err := capi.tokenUpdater.AddRequest(&scanner.UpdateRequest{ Address: address, ChainID: uint64(chainID), Type: tokenData.TypeID, @@ -674,14 +674,14 @@ func (capi *census3API) checkRescanToken(msg *api.APIdata, ctx *httprouter.HTTPC } // get the rescan status from the updater status, err := capi.tokenUpdater.RequestStatus(queueID) - if err != nil { + if err != nil || status == nil { return ErrNotFoundToken.Withf("the ID %s does not exist in the queue", queueID) } // encoding the result and response it response, err := json.Marshal(RescanTokenStatus{ Address: status.Address.String(), ChainID: status.ChainID, - Done: status.Done(), + Done: status.Done, }) if err != nil { return ErrEncodeQueueItem.WithErr(err) diff --git a/cmd/census3/main.go b/cmd/census3/main.go index c1c8c5e3..10c04bc0 100644 --- a/cmd/census3/main.go +++ b/cmd/census3/main.go @@ -136,8 +136,12 @@ func main() { panic(err) } config.farcaster = pviper.GetBool("farcaster") - // set the filters path into the config + // set the filters path into the config, create the folder if it does not + // exitst yet config.filtersPath = config.dataDir + "/filters" + if err := os.MkdirAll(config.filtersPath, os.ModePerm); err != nil { + log.Fatal(err) + } // init logger log.Init(config.logLevel, "stdout", nil) // check if the web3 providers are defined @@ -200,7 +204,7 @@ func main() { // start the holder scanner with the database and the provider manager hc := scanner.NewScanner(database, w3p, pm, config.scannerCoolDown, config.filtersPath) // start the token updater with the database and the provider manager - updater := scanner.NewUpdater(database, w3p, pm) + updater := scanner.NewUpdater(database, w3p, pm, config.filtersPath) // if the admin token is not defined, generate a random one if config.adminToken != "" { if _, err := uuid.Parse(config.adminToken); err != nil { @@ -235,6 +239,7 @@ func main() { }() // start the holder scanner go hc.Start(ctx, config.scannerConcurrentTokens) + go updater.Start(ctx) metrics.NewCounter(fmt.Sprintf("census3_info{version=%q,chains=%q}", internal.Version, w3p.String())).Set(1) @@ -249,6 +254,7 @@ func main() { // closing database go func() { hc.Stop() + updater.Stop() if err := apiService.Stop(); err != nil { log.Fatal(err) } diff --git a/scanner/filter.go b/scanner/filter.go index 62e7c605..01db0464 100644 --- a/scanner/filter.go +++ b/scanner/filter.go @@ -52,8 +52,8 @@ func LoadFilter(basePath string, address common.Address, chainID uint64) (*Token } // Add adds a key to the filter. -func (tf *TokenFilter) Add(key []byte) { - tf.filter.Add(key) +func (tf *TokenFilter) Add(key []byte) boom.Filter { + return tf.filter.Add(key) } // Test checks if a key is in the filter. @@ -75,7 +75,7 @@ func (tf *TokenFilter) Commit() error { return err } // write the filter to the file - if err := os.WriteFile(tf.path, bFilter, 0o644); err != nil { + if err := os.WriteFile(tf.path, bFilter, os.ModePerm); err != nil { return err } return nil diff --git a/scanner/providers/web3/erc20_provider.go b/scanner/providers/web3/erc20_provider.go index 13188f12..4d19dfe2 100644 --- a/scanner/providers/web3/erc20_provider.go +++ b/scanner/providers/web3/erc20_provider.go @@ -168,8 +168,10 @@ func (p *ERC20HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fro errors.Join(ErrCheckingProcessedLogs, fmt.Errorf("[ERC20] %s: %w", p.address, err)) } if processed { + log.Info("log already processed") continue } + log.Info("log not processed yet, processing...") newTransfers++ logData, err := p.contract.ERC20ContractFilterer.ParseTransfer(currentLog) if err != nil { @@ -197,7 +199,7 @@ func (p *ERC20HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fro p.synced.Store(synced) totalSupply, err := p.TotalSupply(nil) if err != nil { - log.Warn("error getting total supply, it will retry in the next iteration", "error", err) + log.Warnw("error getting total supply, it will retry in the next iteration", "error", err) return balances, newTransfers, lastBlock, synced, nil, nil } return balances, newTransfers, lastBlock, synced, totalSupply, nil diff --git a/scanner/scanner.go b/scanner/scanner.go index 5f3ebef1..7984194b 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -329,6 +329,7 @@ func (s *Scanner) ScanHolders(ctx context.Context, token ScannerToken) ( defer func() { if err := filter.Commit(); err != nil { log.Error(err) + return } }() // set the token reference in the provider @@ -336,7 +337,7 @@ func (s *Scanner) ScanHolders(ctx context.Context, token ScannerToken) ( HexAddress: token.Address.Hex(), ChainID: token.ChainID, CreationBlock: token.CreationBlock, - Filter: filter.filter, + Filter: filter, }); err != nil { return nil, 0, token.LastBlock, token.Synced, nil, err } diff --git a/scanner/updater.go b/scanner/updater.go index 8fb7e1ed..83d22501 100644 --- a/scanner/updater.go +++ b/scanner/updater.go @@ -26,12 +26,7 @@ type UpdateRequest struct { CreationBlock uint64 EndBlock uint64 LastBlock uint64 -} - -// Done returns true if the request is done, that is, the last block is greater -// or equal to the end block. -func (ur UpdateRequest) Done() bool { - return ur.LastBlock >= ur.EndBlock + Done bool } // Updater is a struct to manage the update requests of the tokens. It will @@ -46,21 +41,25 @@ type Updater struct { ctx context.Context cancel context.CancelFunc - db *db.DB - networks *web3.Web3Pool - providers *manager.ProviderManager - queue map[string]UpdateRequest - queueMtx sync.Mutex - waiter sync.WaitGroup + db *db.DB + networks *web3.Web3Pool + providers *manager.ProviderManager + queue map[string]*UpdateRequest + queueMtx sync.Mutex + waiter sync.WaitGroup + filtersPath string } // NewUpdater creates a new instance of Updater. -func NewUpdater(db *db.DB, networks *web3.Web3Pool, pm *manager.ProviderManager) *Updater { +func NewUpdater(db *db.DB, networks *web3.Web3Pool, pm *manager.ProviderManager, + filtersPath string, +) *Updater { return &Updater{ - db: db, - networks: networks, - providers: pm, - queue: make(map[string]UpdateRequest), + db: db, + networks: networks, + providers: pm, + queue: make(map[string]*UpdateRequest), + filtersPath: filtersPath, } } @@ -80,7 +79,7 @@ func (u *Updater) Start(ctx context.Context) { continue } if err := u.process(); err != nil { - log.Error("Error processing update request: %w", err) + log.Errorf("Error processing update request: %v", err) } } } @@ -96,24 +95,24 @@ func (u *Updater) Stop() { // RequestStatus returns the status of a request by its ID. If the request is // done, it will be removed from the queue. If the request is not found, it will // return an error. -func (u *Updater) RequestStatus(id string) (UpdateRequest, error) { +func (u *Updater) RequestStatus(id string) (*UpdateRequest, error) { u.queueMtx.Lock() defer u.queueMtx.Unlock() req, ok := u.queue[id] if !ok { - return UpdateRequest{}, fmt.Errorf("request not found") + return nil, fmt.Errorf("request not found") } - if req.Done() { + if req.Done { delete(u.queue, id) } - return u.queue[id], nil + return req, nil } // AddRequest adds a new request to the queue. It will return an error if the // request is missing required fields or the block range is invalid. The request // will be added to the queue with a random ID, that will be returned to allow // the client to query the status of the request. -func (u *Updater) AddRequest(req UpdateRequest) (string, error) { +func (u *Updater) AddRequest(req *UpdateRequest) (string, error) { if req.ChainID == 0 || req.Type == 0 || req.CreationBlock == 0 || req.EndBlock == 0 { return "", fmt.Errorf("missing required fields") } @@ -142,7 +141,7 @@ func (u *Updater) IsEmpty() bool { func (u *Updater) process() error { // make a copy of current queue u.queueMtx.Lock() - queue := map[string]UpdateRequest{} + queue := map[string]*UpdateRequest{} for k, v := range u.queue { queue[k] = v } @@ -150,9 +149,10 @@ func (u *Updater) process() error { // iterate over the current queue items for id, req := range queue { // check if the request is done - if req.Done() { + if req.Done { continue } + log.Infow("rescanning token", "address", req.Address.Hex(), "from", req.CreationBlock, "to", req.EndBlock, "current", req.LastBlock) internalCtx, cancel := context.WithTimeout(u.ctx, UPDATE_TIMEOUT) defer cancel() // get the provider by token type @@ -164,14 +164,30 @@ func (u *Updater) process() error { if provider.IsExternal() { return fmt.Errorf("external providers are not supported yet") } + // load filter of the token from the database + filter, err := LoadFilter(u.filtersPath, req.Address, req.ChainID) + if err != nil { + return err + } + // commit the filter when the function finishes + defer func() { + if err := filter.Commit(); err != nil { + log.Error(err) + return + } + }() // set the reference of the token to update in the provider if err := provider.SetRef(web3provider.Web3ProviderRef{ HexAddress: req.Address.Hex(), ChainID: req.ChainID, CreationBlock: req.CreationBlock, + Filter: filter, }); err != nil { return err } + // update the last block number of the provider to the last block of + // the request + provider.SetLastBlockNumber(req.EndBlock) // get current token holders from database results, err := u.db.QueriesRO.ListTokenHolders(internalCtx, queries.ListTokenHoldersParams{ TokenID: req.Address.Bytes(), @@ -195,16 +211,14 @@ func (u *Updater) process() error { // get range balances from the provider, it will check itereate again // over transfers logs, checking if there are new transfers using the // bloom filter associated to the token - rangeBalances, newTransfers, lastBlock, synced, totalSupply, err := provider.HoldersBalances(internalCtx, nil, req.EndBlock) + rangeBalances, newTransfers, lastBlock, synced, totalSupply, err := provider.HoldersBalances(internalCtx, nil, req.CreationBlock) if err != nil { return err } + log.Infow("new logs received", "address", req.Address.Hex(), "from", req.LastBlock, "lastBlock", lastBlock, "newLogs", newTransfers) // update the token last - if synced { - req.LastBlock = req.EndBlock - } else { - req.LastBlock = lastBlock - } + req.LastBlock = lastBlock + req.Done = synced // save the new balances in the database created, updated, err := SaveHolders(u.db, internalCtx, ScannerToken{ Address: req.Address, From b620177b10c126a9a2e61eb87a4c6205713d5955 Mon Sep 17 00:00:00 2001 From: Lucas Menendez Date: Wed, 5 Jun 2024 11:48:10 +0200 Subject: [PATCH 07/21] solve linter issues --- scanner/updater.go | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/scanner/updater.go b/scanner/updater.go index 83d22501..7482d5ee 100644 --- a/scanner/updater.go +++ b/scanner/updater.go @@ -152,8 +152,12 @@ func (u *Updater) process() error { if req.Done { continue } - log.Infow("rescanning token", "address", req.Address.Hex(), "from", req.CreationBlock, "to", req.EndBlock, "current", req.LastBlock) - internalCtx, cancel := context.WithTimeout(u.ctx, UPDATE_TIMEOUT) + log.Infow("rescanning token", + "address", req.Address.Hex(), + "from", req.CreationBlock, + "to", req.EndBlock, + "current", req.LastBlock) + ctx, cancel := context.WithTimeout(u.ctx, UPDATE_TIMEOUT) defer cancel() // get the provider by token type provider, err := u.providers.GetProvider(u.ctx, req.Type) @@ -189,7 +193,7 @@ func (u *Updater) process() error { // the request provider.SetLastBlockNumber(req.EndBlock) // get current token holders from database - results, err := u.db.QueriesRO.ListTokenHolders(internalCtx, queries.ListTokenHoldersParams{ + results, err := u.db.QueriesRO.ListTokenHolders(ctx, queries.ListTokenHoldersParams{ TokenID: req.Address.Bytes(), ChainID: req.ChainID, }) @@ -205,25 +209,25 @@ func (u *Updater) process() error { currentHolders[common.Address(holder.HolderID)] = bBalance } // set the current holders in the provider - if err := provider.SetLastBalances(internalCtx, nil, currentHolders, req.LastBlock); err != nil { + if err := provider.SetLastBalances(ctx, nil, currentHolders, req.LastBlock); err != nil { return err } // get range balances from the provider, it will check itereate again // over transfers logs, checking if there are new transfers using the // bloom filter associated to the token - rangeBalances, newTransfers, lastBlock, synced, totalSupply, err := provider.HoldersBalances(internalCtx, nil, req.CreationBlock) + balances, nTx, lastBlock, synced, totalSupply, err := provider.HoldersBalances(ctx, nil, req.CreationBlock) if err != nil { return err } - log.Infow("new logs received", "address", req.Address.Hex(), "from", req.LastBlock, "lastBlock", lastBlock, "newLogs", newTransfers) + log.Infow("new logs received", "address", req.Address.Hex(), "from", req.LastBlock, "lastBlock", lastBlock, "newLogs", nTx) // update the token last req.LastBlock = lastBlock req.Done = synced // save the new balances in the database - created, updated, err := SaveHolders(u.db, internalCtx, ScannerToken{ + created, updated, err := SaveHolders(u.db, ctx, ScannerToken{ Address: req.Address, ChainID: req.ChainID, - }, rangeBalances, newTransfers, lastBlock, synced, totalSupply) + }, balances, nTx, lastBlock, synced, totalSupply) if err != nil { return err } From cf7c29d4488709af043dd48e494527f6e34faa51 Mon Sep 17 00:00:00 2001 From: Lucas Menendez Date: Wed, 5 Jun 2024 18:39:54 +0200 Subject: [PATCH 08/21] make updater request last block internal to avoid loop for ever --- api/tokens.go | 1 - scanner/updater.go | 19 ++++++++++++------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/api/tokens.go b/api/tokens.go index 5a6b4005..45163e86 100644 --- a/api/tokens.go +++ b/api/tokens.go @@ -653,7 +653,6 @@ func (capi *census3API) rescanToken(msg *api.APIdata, ctx *httprouter.HTTPContex ChainID: uint64(chainID), Type: tokenData.TypeID, CreationBlock: uint64(tokenData.CreationBlock), - LastBlock: uint64(tokenData.CreationBlock), EndBlock: uint64(tokenData.LastBlock), }) if err != nil { diff --git a/scanner/updater.go b/scanner/updater.go index 7482d5ee..35f945e0 100644 --- a/scanner/updater.go +++ b/scanner/updater.go @@ -25,7 +25,7 @@ type UpdateRequest struct { Type uint64 CreationBlock uint64 EndBlock uint64 - LastBlock uint64 + lastBlock uint64 Done bool } @@ -113,12 +113,17 @@ func (u *Updater) RequestStatus(id string) (*UpdateRequest, error) { // will be added to the queue with a random ID, that will be returned to allow // the client to query the status of the request. func (u *Updater) AddRequest(req *UpdateRequest) (string, error) { + // check required fields if req.ChainID == 0 || req.Type == 0 || req.CreationBlock == 0 || req.EndBlock == 0 { return "", fmt.Errorf("missing required fields") } - if req.CreationBlock >= req.EndBlock || req.LastBlock >= req.EndBlock { + // ensure the block range is valid + if req.CreationBlock >= req.EndBlock { return "", fmt.Errorf("invalid block range") } + // set the last block to the creation block to start the process from there + req.lastBlock = req.CreationBlock + // generate a random ID for the request and insert it in the queue id := util.RandomHex(16) u.queueMtx.Lock() defer u.queueMtx.Unlock() @@ -156,7 +161,7 @@ func (u *Updater) process() error { "address", req.Address.Hex(), "from", req.CreationBlock, "to", req.EndBlock, - "current", req.LastBlock) + "current", req.lastBlock) ctx, cancel := context.WithTimeout(u.ctx, UPDATE_TIMEOUT) defer cancel() // get the provider by token type @@ -209,19 +214,19 @@ func (u *Updater) process() error { currentHolders[common.Address(holder.HolderID)] = bBalance } // set the current holders in the provider - if err := provider.SetLastBalances(ctx, nil, currentHolders, req.LastBlock); err != nil { + if err := provider.SetLastBalances(ctx, nil, currentHolders, req.lastBlock); err != nil { return err } // get range balances from the provider, it will check itereate again // over transfers logs, checking if there are new transfers using the // bloom filter associated to the token - balances, nTx, lastBlock, synced, totalSupply, err := provider.HoldersBalances(ctx, nil, req.CreationBlock) + balances, nTx, lastBlock, synced, totalSupply, err := provider.HoldersBalances(ctx, nil, req.lastBlock) if err != nil { return err } - log.Infow("new logs received", "address", req.Address.Hex(), "from", req.LastBlock, "lastBlock", lastBlock, "newLogs", nTx) + log.Infow("new logs received", "address", req.Address.Hex(), "from", req.lastBlock, "lastBlock", lastBlock, "newLogs", nTx) // update the token last - req.LastBlock = lastBlock + req.lastBlock = lastBlock req.Done = synced // save the new balances in the database created, updated, err := SaveHolders(u.db, ctx, ScannerToken{ From b6b42c73fe407819560ab12e031fa1ed447abdf8 Mon Sep 17 00:00:00 2001 From: Lucas Menendez Date: Wed, 5 Jun 2024 19:24:43 +0200 Subject: [PATCH 09/21] removing annoying logs --- scanner/providers/web3/erc20_provider.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scanner/providers/web3/erc20_provider.go b/scanner/providers/web3/erc20_provider.go index 4d19dfe2..a6af94a1 100644 --- a/scanner/providers/web3/erc20_provider.go +++ b/scanner/providers/web3/erc20_provider.go @@ -159,6 +159,7 @@ func (p *ERC20HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fro // encode the number of new transfers newTransfers := uint64(0) balances := make(map[common.Address]*big.Int) + alreadyProcessedLogs := 0 // iterate the logs and update the balances for _, currentLog := range logs { // check if the log has been already processed @@ -168,10 +169,9 @@ func (p *ERC20HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fro errors.Join(ErrCheckingProcessedLogs, fmt.Errorf("[ERC20] %s: %w", p.address, err)) } if processed { - log.Info("log already processed") + alreadyProcessedLogs++ continue } - log.Info("log not processed yet, processing...") newTransfers++ logData, err := p.contract.ERC20ContractFilterer.ParseTransfer(currentLog) if err != nil { @@ -192,7 +192,8 @@ func (p *ERC20HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fro } log.Infow("saving blocks", "count", len(balances), - "logs", len(logs), + "new_logs", newTransfers, + "already_processed_logs", alreadyProcessedLogs, "blocks/s", 1000*float32(lastBlock-fromBlock)/float32(time.Since(startTime).Milliseconds()), "took", time.Since(startTime).Seconds(), "progress", fmt.Sprintf("%d%%", (fromBlock*100)/toBlock)) From 803f80ab82dd04a19d87ea4969ab1273112898c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lucas=20Men=C3=A9ndez?= Date: Wed, 12 Jun 2024 14:13:21 +0200 Subject: [PATCH 10/21] initial integration between updater and scanner, no concurrent --- api/tokens.go | 15 +- api/types.go | 4 + cmd/census3/main.go | 4 +- helpers/web3/web3_client.go | 8 +- scanner/const.go | 2 +- scanner/filter.go | 11 +- scanner/providers/farcaster/provider.go | 19 +- scanner/providers/gitcoin/gitcoin_provider.go | 19 +- .../gitcoin/gitcoin_provider_test.go | 8 +- scanner/providers/holders_provider.go | 11 +- scanner/providers/poap/poap_provider.go | 13 +- scanner/providers/poap/poap_provider_test.go | 6 +- scanner/providers/web3/erc20_provider.go | 54 +++- scanner/providers/web3/erc721_provider.go | 48 ++- scanner/providers/web3/erc777_provider.go | 50 +++- scanner/scanner.go | 278 ++++++++---------- scanner/updater.go | 139 ++++++--- 17 files changed, 410 insertions(+), 279 deletions(-) diff --git a/api/tokens.go b/api/tokens.go index 45163e86..46308ff4 100644 --- a/api/tokens.go +++ b/api/tokens.go @@ -648,7 +648,7 @@ func (capi *census3API) rescanToken(msg *api.APIdata, ctx *httprouter.HTTPContex return ErrNoSyncedToken } // enqueue the rescan token process - id, err := capi.tokenUpdater.AddRequest(&scanner.UpdateRequest{ + id, err := capi.tokenUpdater.SetRequest(&scanner.UpdateRequest{ Address: address, ChainID: uint64(chainID), Type: tokenData.TypeID, @@ -672,15 +672,18 @@ func (capi *census3API) checkRescanToken(msg *api.APIdata, ctx *httprouter.HTTPC return ErrMalformedRescanQueueID } // get the rescan status from the updater - status, err := capi.tokenUpdater.RequestStatus(queueID) - if err != nil || status == nil { + status := capi.tokenUpdater.RequestStatus(queueID, true) + if status == nil { return ErrNotFoundToken.Withf("the ID %s does not exist in the queue", queueID) } // encoding the result and response it response, err := json.Marshal(RescanTokenStatus{ - Address: status.Address.String(), - ChainID: status.ChainID, - Done: status.Done, + Address: status.Address.String(), + ChainID: status.ChainID, + Done: status.Done, + LogsScanned: status.TotalLogs, + NewLogs: status.TotalNewLogs, + DuplicatedLogs: status.TotalAlreadyProcessedLogs, }) if err != nil { return ErrEncodeQueueItem.WithErr(err) diff --git a/api/types.go b/api/types.go index 652750f9..4aff0eb1 100644 --- a/api/types.go +++ b/api/types.go @@ -171,4 +171,8 @@ type RescanTokenStatus struct { Address string `json:"address"` ChainID uint64 `json:"chainID"` Done bool `json:"done"` + + LogsScanned uint64 `json:"logsScanned"` + NewLogs uint64 `json:"newLogs"` + DuplicatedLogs uint64 `json:"duplicatedLogs"` } diff --git a/cmd/census3/main.go b/cmd/census3/main.go index 10c04bc0..66d08fef 100644 --- a/cmd/census3/main.go +++ b/cmd/census3/main.go @@ -201,10 +201,10 @@ func main() { DB: farcasterDB, }) } - // start the holder scanner with the database and the provider manager - hc := scanner.NewScanner(database, w3p, pm, config.scannerCoolDown, config.filtersPath) // start the token updater with the database and the provider manager updater := scanner.NewUpdater(database, w3p, pm, config.filtersPath) + // start the holder scanner with the database and the provider manager + hc := scanner.NewScanner(database, updater, w3p, pm, config.scannerCoolDown, config.filtersPath) // if the admin token is not defined, generate a random one if config.adminToken != "" { if _, err := uuid.Parse(config.adminToken); err != nil { diff --git a/helpers/web3/web3_client.go b/helpers/web3/web3_client.go index ca0ca6f5..0b145d64 100644 --- a/helpers/web3/web3_client.go +++ b/helpers/web3/web3_client.go @@ -16,7 +16,7 @@ const defaultRetries = 3 var ( defaultTimeout = 2 * time.Second - filterLogsTimeout = 3 * time.Second + filterLogsTimeout = 15 * time.Second retrySleep = 200 * time.Millisecond ) @@ -32,6 +32,12 @@ type Client struct { // EthClient method returns the ethclient.Client for the chainID of the Client // instance. It returns an error if the chainID is not found in the pool. func (c *Client) EthClient() (*ethclient.Client, error) { + if c == nil { + return nil, fmt.Errorf("web3 client is nil") + } + if c.w3p == nil { + return nil, fmt.Errorf("web3 pool is nil") + } endpoint, err := c.w3p.Endpoint(c.chainID) if err != nil { return nil, fmt.Errorf("error getting endpoint for chainID %d: %w", c.chainID, err) diff --git a/scanner/const.go b/scanner/const.go index 49eed770..b47f56fd 100644 --- a/scanner/const.go +++ b/scanner/const.go @@ -6,7 +6,7 @@ const ( READ_TIMEOUT = time.Minute SCAN_TIMEOUT = 5 * time.Minute SAVE_TIMEOUT = 5 * time.Minute - UPDATE_TIMEOUT = 5 * time.Minute + UPDATE_TIMEOUT = 15 * time.Minute ) const ( diff --git a/scanner/filter.go b/scanner/filter.go index 01db0464..71477b15 100644 --- a/scanner/filter.go +++ b/scanner/filter.go @@ -15,8 +15,9 @@ import ( // TokenFilter is a wrapper of boom.ScalableBloomFilter to store the filter to // a file and load it from it. The file that stores the filter is named as -//
-.filter, where address is the token contract address and -// chainID is the chain ID of the network where the token is deployed. +//
--.filter, where address is the token contract +// address and chainID is the chain ID of the network where the token is +// deployed. type TokenFilter struct { filter *boom.ScalableBloomFilter address common.Address @@ -26,9 +27,9 @@ type TokenFilter struct { // LoadFilter loads the filter from the file, if the file does not exist, create // a new filter and return it. The filter is stored in the file named as -//
-.filter in the basePath directory. -func LoadFilter(basePath string, address common.Address, chainID uint64) (*TokenFilter, error) { - // compose the filter path: path/
-.filter +//
--.filter in the basePath directory. +func LoadFilter(basePath string, address common.Address, chainID uint64, externalID string) (*TokenFilter, error) { + // compose the filter path: path/
--.filter // by default, create a empty filter tf := &TokenFilter{ filter: boom.NewDefaultScalableBloomFilter(0.01), diff --git a/scanner/providers/farcaster/provider.go b/scanner/providers/farcaster/provider.go index 11d21023..173e1745 100644 --- a/scanner/providers/farcaster/provider.go +++ b/scanner/providers/farcaster/provider.go @@ -178,14 +178,20 @@ func (p *FarcasterProvider) SetLastBlockNumber(blockNumber uint64) { // internal database and the current holders from the scanner and calculates the // partial holders. func (p *FarcasterProvider) HoldersBalances(ctx context.Context, _ []byte, fromBlock uint64) ( - map[common.Address]*big.Int, uint64, uint64, bool, *big.Int, error, + map[common.Address]*big.Int, *providers.BlocksDelta, error, ) { // check if both contracts are synced isSynced := globallySynced.Load() // get current holders from internal db appKeys, err := p.db.QueriesRO.ListAppKeys(ctx) if err != nil && !errors.Is(err, sql.ErrNoRows) { - return nil, 0, fromBlock, isSynced, nil, fmt.Errorf("cannot get app keys from farcaster DB %s", err.Error()) + return nil, &providers.BlocksDelta{ + LogsCount: 0, + NewLogsCount: 0, + AlreadyProcessedLogsCount: 0, + Block: fromBlock, + Synced: isSynced, + }, fmt.Errorf("cannot get app keys from farcaster DB %s", err.Error()) } currentHolders := make(map[common.Address]*big.Int) for _, appKey := range appKeys { @@ -200,7 +206,14 @@ func (p *FarcasterProvider) HoldersBalances(ctx context.Context, _ []byte, fromB } p.currentScannerHoldersMtx.Unlock() resultingHolders := providers.CalcPartialHolders(currentScannerHolders, currentHolders) - return resultingHolders, uint64(len(resultingHolders)), p.contracts.lastBlock.Load(), isSynced, totalSupply, nil + return resultingHolders, &providers.BlocksDelta{ + LogsCount: uint64(len(resultingHolders)), + NewLogsCount: uint64(len(resultingHolders)), + AlreadyProcessedLogsCount: uint64(len(resultingHolders)), + Block: p.contracts.lastBlock.Load(), + Synced: isSynced, + TotalSupply: totalSupply, + }, nil } // Close method is not implemented for Farcaster Key Registry. diff --git a/scanner/providers/gitcoin/gitcoin_provider.go b/scanner/providers/gitcoin/gitcoin_provider.go index 8a11f309..cd1136f2 100644 --- a/scanner/providers/gitcoin/gitcoin_provider.go +++ b/scanner/providers/gitcoin/gitcoin_provider.go @@ -141,7 +141,7 @@ func (g *GitcoinPassport) SetLastBalances(_ context.Context, _ []byte, } func (g *GitcoinPassport) HoldersBalances(ctx context.Context, stamp []byte, _ uint64) ( - map[common.Address]*big.Int, uint64, uint64, bool, *big.Int, error, + map[common.Address]*big.Int, *providers.BlocksDelta, error, ) { internalCtx, cancel := context.WithCancel(ctx) defer cancel() @@ -153,13 +153,13 @@ func (g *GitcoinPassport) HoldersBalances(ctx context.Context, stamp []byte, _ u if len(stamp) > 0 { dbStampScores, err := g.db.QueriesRW.GetStampScores(internalCtx, string(stamp)) if err != nil { - return nil, 0, 0, false, big.NewInt(0), fmt.Errorf("error getting stamp scores: %w", err) + return nil, nil, fmt.Errorf("error getting stamp scores: %w", err) } for _, dbStampScore := range dbStampScores { address := common.HexToAddress(string(dbStampScore.Address)) score, ok := new(big.Int).SetString(string(dbStampScore.Score), 10) if !ok { - return nil, 0, 0, false, big.NewInt(0), fmt.Errorf("error parsing score: %w", err) + return nil, nil, fmt.Errorf("error parsing score: %w", err) } currentScores[address] = score totalSupply.Add(totalSupply, score) @@ -167,13 +167,13 @@ func (g *GitcoinPassport) HoldersBalances(ctx context.Context, stamp []byte, _ u } else { dbScores, err := g.db.QueriesRW.GetScores(internalCtx) if err != nil { - return nil, 0, 0, false, big.NewInt(0), fmt.Errorf("error getting scores: %w", err) + return nil, nil, fmt.Errorf("error getting scores: %w", err) } for _, dbScore := range dbScores { address := common.HexToAddress(string(dbScore.Address)) score, ok := new(big.Int).SetString(string(dbScore.Score), 10) if !ok { - return nil, 0, 0, false, big.NewInt(0), fmt.Errorf("error parsing score: %w", err) + return nil, nil, fmt.Errorf("error parsing score: %w", err) } currentScores[address] = score totalSupply.Add(totalSupply, score) @@ -185,7 +185,14 @@ func (g *GitcoinPassport) HoldersBalances(ctx context.Context, stamp []byte, _ u holders := providers.CalcPartialHolders(g.currentBalances, currentScores) // return the balances, 1 new transfer, the current time as lastBlock, true // as a synced and the computed totalSupply - return holders, 1, uint64(time.Now().Unix()), synced, totalSupply, nil + return holders, &providers.BlocksDelta{ + LogsCount: 1, + NewLogsCount: 1, + AlreadyProcessedLogsCount: 0, + Block: uint64(time.Now().Unix()), + Synced: synced, + TotalSupply: totalSupply, + }, nil } // Close cancels the download context. diff --git a/scanner/providers/gitcoin/gitcoin_provider_test.go b/scanner/providers/gitcoin/gitcoin_provider_test.go index cac7457e..121656f9 100644 --- a/scanner/providers/gitcoin/gitcoin_provider_test.go +++ b/scanner/providers/gitcoin/gitcoin_provider_test.go @@ -44,13 +44,13 @@ func TestGitcoinPassport(t *testing.T) { provider := new(GitcoinPassport) c.Assert(provider.Init(ctx, GitcoinPassportConf{endpoints["/original"], time.Second, testDB}), qt.IsNil) // start the first download - emptyBalances, _, _, _, _, err := provider.HoldersBalances(context.TODO(), nil, 0) + emptyBalances, _, err := provider.HoldersBalances(context.TODO(), nil, 0) c.Assert(err, qt.IsNil) c.Assert(len(emptyBalances), qt.Equals, 0) // wait for the download to finish time.Sleep(2 * time.Second) // check the balances - holders, _, _, _, _, err := provider.HoldersBalances(context.TODO(), nil, 0) + holders, _, err := provider.HoldersBalances(context.TODO(), nil, 0) c.Assert(err, qt.IsNil) c.Assert(len(holders), qt.Equals, len(expectedOriginalHolders)) for addr, balance := range holders { @@ -61,7 +61,7 @@ func TestGitcoinPassport(t *testing.T) { } c.Assert(provider.SetLastBalances(context.TODO(), nil, holders, 0), qt.IsNil) // start the second download expecting to use the cached data - sameBalances, _, _, _, _, err := provider.HoldersBalances(context.TODO(), nil, 0) + sameBalances, _, err := provider.HoldersBalances(context.TODO(), nil, 0) c.Assert(err, qt.IsNil) // empty results because the data the same c.Assert(len(sameBalances), qt.Equals, 0) @@ -73,7 +73,7 @@ func TestGitcoinPassport(t *testing.T) { // new endpoint with one change time.Sleep(time.Second * 5) c.Assert(newProvider.SetLastBalances(context.TODO(), nil, holders, 0), qt.IsNil) - holders, _, _, _, _, err = newProvider.HoldersBalances(context.TODO(), nil, 1) + holders, _, err = newProvider.HoldersBalances(context.TODO(), nil, 1) c.Assert(err, qt.IsNil) c.Assert(len(holders), qt.Equals, len(expectedUpdatedHolders)) for addr, balance := range holders { diff --git a/scanner/providers/holders_provider.go b/scanner/providers/holders_provider.go index a87707d0..0b28a705 100644 --- a/scanner/providers/holders_provider.go +++ b/scanner/providers/holders_provider.go @@ -7,6 +7,15 @@ import ( "github.com/ethereum/go-ethereum/common" ) +type BlocksDelta struct { + LogsCount uint64 + NewLogsCount uint64 + AlreadyProcessedLogsCount uint64 + Block uint64 + Synced bool + TotalSupply *big.Int +} + // HolderProvider is the interface that wraps the basic methods to interact with // a holders provider. It is used by the HoldersScanner to get the balances of // the token holders. It allows to implement different providers, such as @@ -32,7 +41,7 @@ type HolderProvider interface { // HoldersBalances returns the balances of the token holders for the given // id and delta point in time, from the stored last snapshot. It also // returns the total supply of tokens as a *big.Int. - HoldersBalances(ctx context.Context, id []byte, to uint64) (map[common.Address]*big.Int, uint64, uint64, bool, *big.Int, error) + HoldersBalances(ctx context.Context, id []byte, to uint64) (map[common.Address]*big.Int, *BlocksDelta, error) // Close closes the provider and its internal structures. Close() error // IsExternal returns true if the provider is an external API. diff --git a/scanner/providers/poap/poap_provider.go b/scanner/providers/poap/poap_provider.go index d264bdab..19223121 100644 --- a/scanner/providers/poap/poap_provider.go +++ b/scanner/providers/poap/poap_provider.go @@ -128,7 +128,7 @@ func (p *POAPHolderProvider) SetLastBalances(_ context.Context, id []byte, // API parsing every POAP holder for the event ID provided and calculate the // balances of the token holders from the last snapshot. func (p *POAPHolderProvider) HoldersBalances(_ context.Context, id []byte, delta uint64) ( - map[common.Address]*big.Int, uint64, uint64, bool, *big.Int, error, + map[common.Address]*big.Int, *providers.BlocksDelta, error, ) { // parse eventID from id eventID := string(id) @@ -136,7 +136,7 @@ func (p *POAPHolderProvider) HoldersBalances(_ context.Context, id []byte, delta // get last snapshot newSnapshot, err := p.lastHolders(eventID) if err != nil { - return nil, 0, 0, false, big.NewInt(0), err + return nil, nil, err } p.snapshotsMtx.RLock() defer p.snapshotsMtx.RUnlock() @@ -159,7 +159,14 @@ func (p *POAPHolderProvider) HoldersBalances(_ context.Context, id []byte, delta totalSupply.Add(totalSupply, balance) } // return the final snapshot - return finalSnapshot, uint64(len(finalSnapshot)), from, true, totalSupply, nil + return finalSnapshot, &providers.BlocksDelta{ + LogsCount: uint64(len(finalSnapshot)), + NewLogsCount: uint64(len(newSnapshot)), + AlreadyProcessedLogsCount: 0, + Block: from, + Synced: true, + TotalSupply: totalSupply, + }, nil } // Close method is not implemented in the POAP external provider. By default it diff --git a/scanner/providers/poap/poap_provider_test.go b/scanner/providers/poap/poap_provider_test.go index adef6074..b50867b8 100644 --- a/scanner/providers/poap/poap_provider_test.go +++ b/scanner/providers/poap/poap_provider_test.go @@ -39,7 +39,7 @@ func TestPOAP(t *testing.T) { provider := new(POAPHolderProvider) c.Assert(provider.Init(ctx, POAPConfig{endpoints["/original"], "no-token"}), qt.IsNil) - holders, _, _, _, _, err := provider.HoldersBalances(context.TODO(), nil, 0) + holders, _, err := provider.HoldersBalances(context.TODO(), nil, 0) c.Assert(err, qt.IsNil) c.Assert(len(holders), qt.Equals, len(expectedOriginalHolders)) for addr, balance := range holders { @@ -47,13 +47,13 @@ func TestPOAP(t *testing.T) { c.Assert(exists, qt.Equals, true) c.Assert(balance.String(), qt.Equals, expectedBalance) } - sameBalances, _, _, _, _, err := provider.HoldersBalances(context.TODO(), nil, 0) + sameBalances, _, err := provider.HoldersBalances(context.TODO(), nil, 0) c.Assert(err, qt.IsNil) // empty results because the data the same c.Assert(len(sameBalances), qt.Equals, 0) provider.apiEndpoint = endpoints["/updated"] - holders, _, _, _, _, err = provider.HoldersBalances(context.TODO(), nil, 0) + holders, _, err = provider.HoldersBalances(context.TODO(), nil, 0) c.Assert(err, qt.IsNil) c.Assert(len(holders), qt.Equals, len(expectedUpdatedHolders)) for addr, balance := range holders { diff --git a/scanner/providers/web3/erc20_provider.go b/scanner/providers/web3/erc20_provider.go index a6af94a1..844c5f97 100644 --- a/scanner/providers/web3/erc20_provider.go +++ b/scanner/providers/web3/erc20_provider.go @@ -120,15 +120,14 @@ func (p *ERC20HolderProvider) SetLastBlockNumber(blockNumber uint64) { // of new transfers, the last block scanned, if the provider is synced and an // error if it exists. func (p *ERC20HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fromBlock uint64) ( - map[common.Address]*big.Int, uint64, uint64, bool, *big.Int, error, + map[common.Address]*big.Int, *providers.BlocksDelta, error, ) { // if the last network block is lower than the last scanned block, and the // last scanned block is equal to the creation block, it means that the // last network block is outdated, so it returns that it is not synced and // an error if fromBlock >= p.lastNetworkBlock && fromBlock == p.creationBlock { - return nil, 0, fromBlock, false, big.NewInt(0), - fmt.Errorf("outdated last network block, it will retry in the next iteration") + return nil, nil, fmt.Errorf("outdated last network block, it will retry in the next iteration") } // calculate the range of blocks to scan, by default take the last block // scanned and scan to the latest block, calculate the latest block if the @@ -138,7 +137,7 @@ func (p *ERC20HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fro var err error toBlock, err = p.LatestBlockNumber(ctx, nil) if err != nil { - return nil, 0, fromBlock, false, big.NewInt(0), err + return nil, nil, err } } log.Infow("scan iteration", @@ -151,22 +150,28 @@ func (p *ERC20HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fro startTime := time.Now() logs, lastBlock, synced, err := RangeOfLogs(ctx, p.client, p.address, fromBlock, toBlock, LOG_TOPIC_ERC20_TRANSFER) if err != nil && !errors.Is(err, ErrTooManyRequests) { - return nil, 0, fromBlock, false, big.NewInt(0), err + return nil, nil, err } if errors.Is(err, ErrTooManyRequests) { log.Warnf("too many requests, the provider will continue in the next iteration from block %d", lastBlock) } // encode the number of new transfers newTransfers := uint64(0) + alreadyProcessedLogs := uint64(0) balances := make(map[common.Address]*big.Int) - alreadyProcessedLogs := 0 // iterate the logs and update the balances for _, currentLog := range logs { // check if the log has been already processed processed, err := p.isLogAlreadyProcessed(currentLog) if err != nil { - return nil, newTransfers, lastBlock, false, big.NewInt(0), - errors.Join(ErrCheckingProcessedLogs, fmt.Errorf("[ERC20] %s: %w", p.address, err)) + return nil, &providers.BlocksDelta{ + Block: lastBlock, + LogsCount: uint64(len(logs)), + NewLogsCount: newTransfers, + AlreadyProcessedLogsCount: alreadyProcessedLogs, + Synced: false, + TotalSupply: big.NewInt(0), + }, errors.Join(ErrCheckingProcessedLogs, fmt.Errorf("[ERC20] %s: %w", p.address, err)) } if processed { alreadyProcessedLogs++ @@ -175,8 +180,14 @@ func (p *ERC20HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fro newTransfers++ logData, err := p.contract.ERC20ContractFilterer.ParseTransfer(currentLog) if err != nil { - return nil, newTransfers, lastBlock, false, big.NewInt(0), - errors.Join(ErrParsingTokenLogs, fmt.Errorf("[ERC20] %s: %w", p.address, err)) + return nil, &providers.BlocksDelta{ + Block: lastBlock, + LogsCount: uint64(len(logs)), + NewLogsCount: newTransfers, + AlreadyProcessedLogsCount: alreadyProcessedLogs, + Synced: false, + TotalSupply: big.NewInt(0), + }, errors.Join(ErrParsingTokenLogs, fmt.Errorf("[ERC20] %s: %w", p.address, err)) } // update balances if toBalance, ok := balances[logData.To]; ok { @@ -189,6 +200,14 @@ func (p *ERC20HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fro } else { balances[logData.From] = new(big.Int).Neg(logData.Value) } + target := common.HexToAddress("0x05887A1CB6230E40a39c020E9f7fB09d3fC9D8da") + if logData.To.Hex() == target.Hex() || logData.From.Hex() == target.Hex() { + log.Infow("target addrsss transfer", + "from", logData.From.Hex(), + "to", logData.To.Hex(), + "value", logData.Value.String()) + } + } log.Infow("saving blocks", "count", len(balances), @@ -198,12 +217,19 @@ func (p *ERC20HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fro "took", time.Since(startTime).Seconds(), "progress", fmt.Sprintf("%d%%", (fromBlock*100)/toBlock)) p.synced.Store(synced) - totalSupply, err := p.TotalSupply(nil) - if err != nil { + + delta := &providers.BlocksDelta{ + Block: lastBlock, + LogsCount: uint64(len(logs)), + NewLogsCount: newTransfers, + AlreadyProcessedLogsCount: alreadyProcessedLogs, + Synced: synced, + TotalSupply: big.NewInt(0), + } + if delta.TotalSupply, err = p.TotalSupply(nil); err != nil { log.Warnw("error getting total supply, it will retry in the next iteration", "error", err) - return balances, newTransfers, lastBlock, synced, nil, nil } - return balances, newTransfers, lastBlock, synced, totalSupply, nil + return balances, delta, nil } // Close method is not implemented for ERC20 tokens. diff --git a/scanner/providers/web3/erc721_provider.go b/scanner/providers/web3/erc721_provider.go index de347bd6..fc9315cc 100644 --- a/scanner/providers/web3/erc721_provider.go +++ b/scanner/providers/web3/erc721_provider.go @@ -118,15 +118,14 @@ func (p *ERC721HolderProvider) SetLastBlockNumber(blockNumber uint64) { // of new transfers, the last block scanned, if the provider is synced and an // error if it exists. func (p *ERC721HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fromBlock uint64) ( - map[common.Address]*big.Int, uint64, uint64, bool, *big.Int, error, + map[common.Address]*big.Int, *providers.BlocksDelta, error, ) { // if the last network block is lower than the last scanned block, and the // last scanned block is equal to the creation block, it means that the // last network block is outdated, so it returns that it is not synced and // an error if fromBlock >= p.lastNetworkBlock && fromBlock == p.creationBlock { - return nil, 0, fromBlock, false, big.NewInt(0), - errors.New("outdated last network block, it will retry in the next iteration") + return nil, nil, fmt.Errorf("outdated last network block, it will retry in the next iteration") } // calculate the range of blocks to scan, by default take the last block // scanned and scan to the latest block, calculate the latest block if the @@ -136,7 +135,7 @@ func (p *ERC721HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fr var err error toBlock, err = p.LatestBlockNumber(ctx, nil) if err != nil { - return nil, 0, fromBlock, false, nil, err + return nil, nil, err } } log.Infow("scan iteration", @@ -149,29 +148,45 @@ func (p *ERC721HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fr startTime := time.Now() logs, lastBlock, synced, err := RangeOfLogs(ctx, p.client, p.address, fromBlock, toBlock, LOG_TOPIC_ERC20_TRANSFER) if err != nil && !errors.Is(err, ErrTooManyRequests) { - return nil, 0, fromBlock, false, big.NewInt(0), err + return nil, nil, err } if errors.Is(err, ErrTooManyRequests) { log.Warnf("too many requests, the provider will continue in the next iteration from block %d", lastBlock) } + log.Warnw("logs received", "number_of_logs", len(logs), "last_block", lastBlock) // encode the number of new transfers newTransfers := uint64(0) + alreadyProcessedLogs := uint64(0) balances := make(map[common.Address]*big.Int) // iterate the logs and update the balances for _, currentLog := range logs { // check if the log has been already processed processed, err := p.isLogAlreadyProcessed(currentLog) if err != nil { - return nil, newTransfers, lastBlock, false, big.NewInt(0), - errors.Join(ErrCheckingProcessedLogs, fmt.Errorf("[ERC20] %s: %w", p.address, err)) + return nil, &providers.BlocksDelta{ + Block: lastBlock, + LogsCount: uint64(len(logs)), + NewLogsCount: newTransfers, + AlreadyProcessedLogsCount: alreadyProcessedLogs, + Synced: false, + TotalSupply: big.NewInt(0), + }, errors.Join(ErrCheckingProcessedLogs, fmt.Errorf("[ERC20] %s: %w", p.address, err)) } if processed { + alreadyProcessedLogs++ continue } newTransfers++ logData, err := p.contract.ERC721ContractFilterer.ParseTransfer(currentLog) if err != nil { - return nil, newTransfers, lastBlock, false, nil, fmt.Errorf("[ERC721] %w: %s: %w", ErrParsingTokenLogs, p.address.Hex(), err) + return nil, &providers.BlocksDelta{ + Block: lastBlock, + LogsCount: uint64(len(logs)), + NewLogsCount: newTransfers, + AlreadyProcessedLogsCount: alreadyProcessedLogs, + Synced: false, + TotalSupply: big.NewInt(0), + }, errors.Join(ErrParsingTokenLogs, fmt.Errorf("[ERC20] %s: %w", p.address, err)) } // update balances if toBalance, ok := balances[logData.To]; ok { @@ -187,12 +202,25 @@ func (p *ERC721HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fr } log.Infow("saving blocks", "count", len(balances), - "logs", len(logs), + "new_logs", newTransfers, + "already_processed_logs", alreadyProcessedLogs, "blocks/s", 1000*float32(lastBlock-fromBlock)/float32(time.Since(startTime).Milliseconds()), "took", time.Since(startTime).Seconds(), "progress", fmt.Sprintf("%d%%", (fromBlock*100)/toBlock)) p.synced.Store(synced) - return balances, newTransfers, lastBlock, synced, nil, nil + + delta := &providers.BlocksDelta{ + Block: lastBlock, + LogsCount: uint64(len(logs)), + NewLogsCount: newTransfers, + AlreadyProcessedLogsCount: alreadyProcessedLogs, + Synced: synced, + TotalSupply: big.NewInt(0), + } + if delta.TotalSupply, err = p.TotalSupply(nil); err != nil { + log.Warnw("error getting total supply, it will retry in the next iteration", "error", err) + } + return balances, delta, nil } // Close method is not implemented for ERC721 tokens. diff --git a/scanner/providers/web3/erc777_provider.go b/scanner/providers/web3/erc777_provider.go index b9a130f9..6b9c1384 100644 --- a/scanner/providers/web3/erc777_provider.go +++ b/scanner/providers/web3/erc777_provider.go @@ -118,15 +118,14 @@ func (p *ERC777HolderProvider) SetLastBlockNumber(blockNumber uint64) { // of new transfers, the last block scanned, if the provider is synced and an // error if it exists. func (p *ERC777HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fromBlock uint64) ( - map[common.Address]*big.Int, uint64, uint64, bool, *big.Int, error, + map[common.Address]*big.Int, *providers.BlocksDelta, error, ) { // if the last network block is lower than the last scanned block, and the // last scanned block is equal to the creation block, it means that the // last network block is outdated, so it returns that it is not synced and // an error if fromBlock >= p.lastNetworkBlock && fromBlock == p.creationBlock { - return nil, 0, fromBlock, false, big.NewInt(0), - errors.New("outdated last network block, it will retry in the next iteration") + return nil, nil, fmt.Errorf("outdated last network block, it will retry in the next iteration") } // calculate the range of blocks to scan, by default take the last block // scanned and scan to the latest block, calculate the latest block if the @@ -136,7 +135,7 @@ func (p *ERC777HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fr var err error toBlock, err = p.LatestBlockNumber(ctx, nil) if err != nil { - return nil, 0, fromBlock, false, nil, err + return nil, nil, err } } log.Infow("scan iteration", @@ -149,30 +148,45 @@ func (p *ERC777HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fr startTime := time.Now() logs, lastBlock, synced, err := RangeOfLogs(ctx, p.client, p.address, fromBlock, toBlock, LOG_TOPIC_ERC20_TRANSFER) if err != nil && !errors.Is(err, ErrTooManyRequests) { - return nil, 0, fromBlock, false, big.NewInt(0), err + return nil, nil, err } if errors.Is(err, ErrTooManyRequests) { log.Warnf("too many requests, the provider will continue in the next iteration from block %d", lastBlock) } + log.Warnw("logs received", "number_of_logs", len(logs), "last_block", lastBlock) // encode the number of new transfers newTransfers := uint64(0) + alreadyProcessedLogs := uint64(0) balances := make(map[common.Address]*big.Int) // iterate the logs and update the balances for _, currentLog := range logs { // check if the log has been already processed processed, err := p.isLogAlreadyProcessed(currentLog) if err != nil { - return nil, newTransfers, lastBlock, false, big.NewInt(0), - errors.Join(ErrCheckingProcessedLogs, fmt.Errorf("[ERC20] %s: %w", p.address, err)) + return nil, &providers.BlocksDelta{ + Block: lastBlock, + LogsCount: uint64(len(logs)), + NewLogsCount: newTransfers, + AlreadyProcessedLogsCount: alreadyProcessedLogs, + Synced: false, + TotalSupply: big.NewInt(0), + }, errors.Join(ErrCheckingProcessedLogs, fmt.Errorf("[ERC20] %s: %w", p.address, err)) } if processed { + alreadyProcessedLogs++ continue } newTransfers++ logData, err := p.contract.ERC777ContractFilterer.ParseTransfer(currentLog) if err != nil { - return nil, newTransfers, lastBlock, false, nil, - errors.Join(ErrParsingTokenLogs, fmt.Errorf("[ERC777] %s: %w", p.address, err)) + return nil, &providers.BlocksDelta{ + Block: lastBlock, + LogsCount: uint64(len(logs)), + NewLogsCount: newTransfers, + AlreadyProcessedLogsCount: alreadyProcessedLogs, + Synced: false, + TotalSupply: big.NewInt(0), + }, errors.Join(ErrParsingTokenLogs, fmt.Errorf("[ERC20] %s: %w", p.address, err)) } // update balances if toBalance, ok := balances[logData.To]; ok { @@ -188,11 +202,25 @@ func (p *ERC777HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fr } log.Infow("saving blocks", "count", len(balances), - "logs", len(logs), + "new_logs", newTransfers, + "already_processed_logs", alreadyProcessedLogs, "blocks/s", 1000*float32(lastBlock-fromBlock)/float32(time.Since(startTime).Milliseconds()), "took", time.Since(startTime).Seconds(), "progress", fmt.Sprintf("%d%%", (fromBlock*100)/toBlock)) - return balances, newTransfers, lastBlock, synced, nil, nil + p.synced.Store(synced) + + delta := &providers.BlocksDelta{ + Block: lastBlock, + LogsCount: uint64(len(logs)), + NewLogsCount: newTransfers, + AlreadyProcessedLogsCount: alreadyProcessedLogs, + Synced: synced, + TotalSupply: big.NewInt(0), + } + if delta.TotalSupply, err = p.TotalSupply(nil); err != nil { + log.Warnw("error getting total supply, it will retry in the next iteration", "error", err) + } + return balances, delta, nil } // Close method is not implemented for ERC777 tokens. diff --git a/scanner/scanner.go b/scanner/scanner.go index 7984194b..6d02c4e0 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -4,10 +4,8 @@ import ( "context" "database/sql" "errors" - "fmt" "math/big" "sort" - "strings" "sync" "sync/atomic" "time" @@ -43,6 +41,7 @@ type Scanner struct { ctx context.Context cancel context.CancelFunc db *db.DB + updater *Updater networks *web3.Web3Pool providerManager *manager.ProviderManager coolDown time.Duration @@ -57,11 +56,12 @@ type Scanner struct { // NewScanner returns a new scanner instance with the required parameters // initialized. -func NewScanner(db *db.DB, networks *web3.Web3Pool, pm *manager.ProviderManager, +func NewScanner(db *db.DB, updater *Updater, networks *web3.Web3Pool, pm *manager.ProviderManager, coolDown time.Duration, filtersPath string, ) *Scanner { return &Scanner{ db: db, + updater: updater, networks: networks, providerManager: pm, coolDown: coolDown, @@ -111,50 +111,64 @@ func (s *Scanner) Start(ctx context.Context, concurrentTokens int) { var atSyncGlobal atomic.Bool atSyncGlobal.Store(true) for _, token := range tokens { - // get the semaphore - sem <- struct{}{} - go func(token ScannerToken) { - // release the semaphore when the goroutine finishes - defer func() { - <-sem - }() - log.Infow("scanning token", + log.Infow("checking token in the updater queue", + "address", token.Address.Hex(), + "chainID", token.ChainID, + "externalID", token.ExternalID) + // get the request ID of the token in the updater queue + reqID, err := RequestID(token.Address, token.ChainID, token.ExternalID) + if err != nil { + log.Error(err) + continue + } + // get the status of the token in the updater queue + status := s.updater.RequestStatus(reqID, true) + if status != nil { + log.Infow("token status in the updater queue", "address", token.Address.Hex(), "chainID", token.ChainID, "externalID", token.ExternalID, - "lastBlock", token.LastBlock, - "ready", token.Ready) - // scan the token - holders, newTransfers, lastBlock, synced, totalSupply, err := s.ScanHolders(ctx, token) - if err != nil { - atSyncGlobal.Store(false) - if errors.Is(err, context.Canceled) { - log.Info("scanner context cancelled, shutting down") - return - } - log.Error(err) - return - } - if !synced { - atSyncGlobal.Store(false) + "lastBlock", status.LastBlock, + "lastTotalSupply", status.LastTotalSupply, + "totalNewLogs", status.TotalNewLogs, + "totalAlreadyProcessedLogs", status.TotalAlreadyProcessedLogs, + "totalLogs", status.TotalLogs, + "done", status.Done) + // if the token is in the updater queue, update the + // internal token status and continue to the next token + // only if the token is done + defer s.updateInternalTokenStatus(*token, status.LastBlock, status.Done, status.LastTotalSupply) + if status.Done { + continue } - // save the new token holders - s.updateInternalTokenStatus(token, lastBlock, synced, totalSupply) - if err = s.SaveHolders(ctx, token, holders, newTransfers, lastBlock, synced, totalSupply); err != nil { - if strings.Contains(err.Error(), "database is closed") { - return + atSyncGlobal.Store(false) + } + // if it has been processed or it is not in the queue, load + // the last available block number of the network and + // enqueue it to the updater queue from the last scanned + // block + if iLastNetworkBlock, ok := s.latestBlockNumbers.Load(token.ChainID); ok { + if lastNetworkBlock, ok := iLastNetworkBlock.(uint64); ok { + if _, err := s.updater.SetRequest(&UpdateRequest{ + Address: token.Address, + ChainID: token.ChainID, + Type: token.Type, + ExternalID: token.ExternalID, + CreationBlock: token.CreationBlock, + EndBlock: lastNetworkBlock, + LastBlock: token.LastBlock, + }); err != nil { + log.Warnw("error enqueuing token", "error", err) + continue } - log.Warnw("error saving tokenholders", + log.Infow("token enqueued from the scanner", "address", token.Address.Hex(), "chainID", token.ChainID, "externalID", token.ExternalID, - "error", err) + "from", token.LastBlock, + "to", lastNetworkBlock) } - }(*token) - } - // wait for all the tokens to be scanned - for i := 0; i < concurrentTokens; i++ { - sem <- struct{}{} + } } log.Infow("scan iteration finished", "iteration", itCounter, @@ -199,7 +213,7 @@ func (s *Scanner) TokensToScan(ctx context.Context) ([]*ScannerToken, error) { if !ok { totalSupply = nil } - tokens = append(tokens, &ScannerToken{ + st := &ScannerToken{ Address: common.BytesToAddress(token.ID), ChainID: token.ChainID, Type: token.TypeID, @@ -209,7 +223,12 @@ func (s *Scanner) TokensToScan(ctx context.Context) ([]*ScannerToken, error) { Ready: token.CreationBlock > 0 && token.LastBlock >= token.CreationBlock, Synced: token.Synced, totalSupply: totalSupply, - }) + } + if err := s.prepareToken(st); err != nil { + log.Warnw("error preparing token", "error", err) + continue + } + tokens = append(tokens, st) } // get old not synced tokens from the database (2) oldNotSyncedTokens, err := s.db.QueriesRO.ListOldNoSyncedTokens(internalCtx) @@ -250,7 +269,7 @@ func (s *Scanner) TokensToScan(ctx context.Context) ([]*ScannerToken, error) { if !ok { totalSupply = nil } - tokens = append(tokens, &ScannerToken{ + st := &ScannerToken{ Address: common.BytesToAddress(token.ID), ChainID: token.ChainID, Type: token.TypeID, @@ -260,7 +279,12 @@ func (s *Scanner) TokensToScan(ctx context.Context) ([]*ScannerToken, error) { Ready: token.CreationBlock > 0 && token.LastBlock >= token.CreationBlock, Synced: token.Synced, totalSupply: totalSupply, - }) + } + if err := s.prepareToken(st); err != nil { + log.Warnw("error preparing token", "error", err) + continue + } + tokens = append(tokens, st) } } // get synced tokens from the database to scan them last (3) @@ -273,7 +297,7 @@ func (s *Scanner) TokensToScan(ctx context.Context) ([]*ScannerToken, error) { if !ok { totalSupply = nil } - tokens = append(tokens, &ScannerToken{ + st := &ScannerToken{ Address: common.BytesToAddress(token.ID), ChainID: token.ChainID, Type: token.TypeID, @@ -283,7 +307,12 @@ func (s *Scanner) TokensToScan(ctx context.Context) ([]*ScannerToken, error) { Ready: token.CreationBlock > 0 && token.LastBlock >= token.CreationBlock, Synced: token.Synced, totalSupply: totalSupply, - }) + } + if err := s.prepareToken(st); err != nil { + log.Warnw("error preparing token", "error", err) + continue + } + tokens = append(tokens, st) } // update the tokens to scan in the scanner and return them s.tokensMtx.Lock() @@ -292,125 +321,6 @@ func (s *Scanner) TokensToScan(ctx context.Context) ([]*ScannerToken, error) { return tokens, nil } -// ScanHolders scans the holders of the given token. It get the current holders -// from the database, set them into the provider and get the new ones. It -// returns the new holders, the last block scanned and if the token is synced -// after the scan. -func (s *Scanner) ScanHolders(ctx context.Context, token ScannerToken) ( - map[common.Address]*big.Int, uint64, uint64, bool, *big.Int, error, -) { - internalCtx, cancel := context.WithTimeout(ctx, SCAN_TIMEOUT) - defer cancel() - // get the correct token holder provider for the current token - provider, err := s.providerManager.GetProvider(s.ctx, token.Type) - if err != nil { - return nil, 0, token.LastBlock, token.Synced, nil, - fmt.Errorf("token type %d not supported: %w", token.Type, err) - } - // create a tx to use it in the following queries - tx, err := s.db.RW.BeginTx(internalCtx, nil) - if err != nil { - return nil, 0, token.LastBlock, token.Synced, nil, err - } - defer func() { - if err := tx.Rollback(); err != nil && !errors.Is(sql.ErrTxDone, err) { - log.Error(err) - } - }() - qtx := s.db.QueriesRW.WithTx(tx) - // if the provider is not an external one, instance the current token - if !provider.IsExternal() { - // load filter of the token from the database - filter, err := LoadFilter(s.filtersPath, token.Address, token.ChainID) - if err != nil { - return nil, 0, token.LastBlock, token.Synced, nil, err - } - // commit the filter when the function finishes - defer func() { - if err := filter.Commit(); err != nil { - log.Error(err) - return - } - }() - // set the token reference in the provider - if err := provider.SetRef(web3provider.Web3ProviderRef{ - HexAddress: token.Address.Hex(), - ChainID: token.ChainID, - CreationBlock: token.CreationBlock, - Filter: filter, - }); err != nil { - return nil, 0, token.LastBlock, token.Synced, nil, err - } - // set the last block number of the network in the provider getting it - // from the latest block numbers cache - if iLastNetworkBlock, ok := s.latestBlockNumbers.Load(token.ChainID); ok { - if lastNetworkBlock, ok := iLastNetworkBlock.(uint64); ok { - provider.SetLastBlockNumber(lastNetworkBlock) - } - } - // if the token is not ready yet (its creation block has not been - // calculated yet), calculate it, update the token information and - // return - if !token.Ready { - log.Debugw("token not ready yet, calculating creation block and continue", - "address", token.Address.Hex(), - "chainID", token.ChainID, - "externalID", token.ExternalID) - creationBlock, err := provider.CreationBlock(internalCtx, []byte(token.ExternalID)) - if err != nil { - return nil, 0, token.LastBlock, token.Synced, nil, err - } - _, err = qtx.UpdateTokenBlocks(internalCtx, queries.UpdateTokenBlocksParams{ - ID: token.Address.Bytes(), - ChainID: token.ChainID, - ExternalID: token.ExternalID, - CreationBlock: int64(creationBlock), - LastBlock: int64(creationBlock), - }) - if err != nil { - return nil, 0, token.LastBlock, token.Synced, nil, err - } - token.LastBlock = creationBlock - } - } - log.Infow("scanning holders", - "address", token.Address.Hex(), - "chainID", token.ChainID, - "externalID", token.ExternalID, - "lastBlock", token.LastBlock) - // get the current token holders from the database - results, err := qtx.ListTokenHolders(internalCtx, - queries.ListTokenHoldersParams{ - TokenID: token.Address.Bytes(), - ChainID: token.ChainID, - ExternalID: token.ExternalID, - }) - if err != nil { - return nil, 0, token.LastBlock, token.Synced, nil, err - } - // set the current holders into the provider and get the new ones - currentHolders := map[common.Address]*big.Int{} - for _, result := range results { - bBalance, ok := new(big.Int).SetString(result.Balance, 10) - if !ok { - return nil, 0, token.LastBlock, token.Synced, nil, fmt.Errorf("error parsing token holder balance") - } - currentHolders[common.BytesToAddress(result.HolderID)] = bBalance - } - // close the database tx and commit it - if err := tx.Commit(); err != nil { - return nil, 0, token.LastBlock, token.Synced, nil, err - } - // set the current holders into the provider and get the new ones - if err := provider.SetLastBalances(ctx, []byte(token.ExternalID), - currentHolders, token.LastBlock, - ); err != nil { - return nil, 0, token.LastBlock, token.Synced, nil, err - } - // get the new holders from the provider - return provider.HoldersBalances(ctx, []byte(token.ExternalID), token.LastBlock) -} - func (s *Scanner) updateInternalTokenStatus(token ScannerToken, lastBlock uint64, synced bool, totalSupply *big.Int, ) { @@ -429,6 +339,48 @@ func (s *Scanner) updateInternalTokenStatus(token ScannerToken, lastBlock uint64 s.tokensMtx.Unlock() } +func (s *Scanner) prepareToken(token *ScannerToken) error { + ctx, cancel := context.WithTimeout(s.ctx, UPDATE_TIMEOUT) + defer cancel() + // get the provider by token type + provider, err := s.providerManager.GetProvider(ctx, token.Type) + if err != nil { + return err + } + // if the token is not ready yet (its creation block has not been + // calculated yet), calculate it, update the token information and + // return + if !provider.IsExternal() && !token.Ready { + if err := provider.SetRef(web3provider.Web3ProviderRef{ + HexAddress: token.Address.Hex(), + ChainID: token.ChainID, + CreationBlock: token.CreationBlock, + }); err != nil { + return err + } + log.Debugw("token not ready yet, calculating creation block and continue", + "address", token.Address.Hex(), + "chainID", token.ChainID, + "externalID", token.ExternalID) + creationBlock, err := provider.CreationBlock(ctx, []byte(token.ExternalID)) + if err != nil { + return err + } + _, err = s.db.QueriesRW.UpdateTokenBlocks(ctx, queries.UpdateTokenBlocksParams{ + ID: token.Address.Bytes(), + ChainID: token.ChainID, + ExternalID: token.ExternalID, + CreationBlock: int64(creationBlock), + LastBlock: int64(creationBlock), + }) + if err != nil { + return err + } + token.LastBlock = creationBlock + } + return nil +} + // SaveHolders saves the given holders in the database. It calls the SaveHolders // helper function to save the holders and the token status in the database. It // prints the number of created and updated token holders if there are any, else diff --git a/scanner/updater.go b/scanner/updater.go index 35f945e0..97c50301 100644 --- a/scanner/updater.go +++ b/scanner/updater.go @@ -2,6 +2,8 @@ package scanner import ( "context" + "crypto/sha256" + "encoding/hex" "fmt" "math/big" "sync" @@ -14,7 +16,6 @@ import ( "github.com/vocdoni/census3/scanner/providers/manager" web3provider "github.com/vocdoni/census3/scanner/providers/web3" "go.vocdoni.io/dvote/log" - "go.vocdoni.io/dvote/util" ) // UpdateRequest is a struct to request a token update but also to query about @@ -22,11 +23,17 @@ import ( type UpdateRequest struct { Address common.Address ChainID uint64 + ExternalID string Type uint64 CreationBlock uint64 EndBlock uint64 - lastBlock uint64 + LastBlock uint64 Done bool + + TotalLogs uint64 + TotalNewLogs uint64 + TotalAlreadyProcessedLogs uint64 + LastTotalSupply *big.Int } // Updater is a struct to manage the update requests of the tokens. It will @@ -95,24 +102,25 @@ func (u *Updater) Stop() { // RequestStatus returns the status of a request by its ID. If the request is // done, it will be removed from the queue. If the request is not found, it will // return an error. -func (u *Updater) RequestStatus(id string) (*UpdateRequest, error) { +func (u *Updater) RequestStatus(id string, deleteOnDone bool) *UpdateRequest { u.queueMtx.Lock() defer u.queueMtx.Unlock() req, ok := u.queue[id] if !ok { - return nil, fmt.Errorf("request not found") + return nil } - if req.Done { + res := *req + if deleteOnDone && req.Done { delete(u.queue, id) } - return req, nil + return &res } -// AddRequest adds a new request to the queue. It will return an error if the +// SetRequest adds a new request to the queue. It will return an error if the // request is missing required fields or the block range is invalid. The request // will be added to the queue with a random ID, that will be returned to allow // the client to query the status of the request. -func (u *Updater) AddRequest(req *UpdateRequest) (string, error) { +func (u *Updater) SetRequest(req *UpdateRequest) (string, error) { // check required fields if req.ChainID == 0 || req.Type == 0 || req.CreationBlock == 0 || req.EndBlock == 0 { return "", fmt.Errorf("missing required fields") @@ -122,9 +130,15 @@ func (u *Updater) AddRequest(req *UpdateRequest) (string, error) { return "", fmt.Errorf("invalid block range") } // set the last block to the creation block to start the process from there - req.lastBlock = req.CreationBlock + // if it is not set by the client + if req.LastBlock == 0 { + req.LastBlock = req.CreationBlock + } // generate a random ID for the request and insert it in the queue - id := util.RandomHex(16) + id, err := RequestID(req.Address, req.ChainID, req.ExternalID) + if err != nil { + return "", fmt.Errorf("error generating request ID") + } u.queueMtx.Lock() defer u.queueMtx.Unlock() u.queue[id] = req @@ -138,6 +152,20 @@ func (u *Updater) IsEmpty() bool { return len(u.queue) == 0 } +// RequestID returns the ID of a request given the address, chainID and external +// ID. The raw ID is a string with the format "chainID:address:externalID". The +// resulting ID is the first 4 bytes of the hash of the raw ID using the sha256 +// algorithm, encoded in hexadecimal. +func RequestID(address common.Address, chainID uint64, externalID string) (string, error) { + rawID := fmt.Sprintf("%d:%s:%s", chainID, address.Hex(), externalID) + hashFn := sha256.New() + if _, err := hashFn.Write([]byte(rawID)); err != nil { + return "", err + } + bHash := hashFn.Sum(nil) + return hex.EncodeToString(bHash[:4]), nil +} + // process iterates over the current queue items, getting the token holders // balances and saving them in the database until the last block is greater or // equal to the end block. It updates th status of the request in the queue. It @@ -161,38 +189,37 @@ func (u *Updater) process() error { "address", req.Address.Hex(), "from", req.CreationBlock, "to", req.EndBlock, - "current", req.lastBlock) + "current", req.LastBlock) ctx, cancel := context.WithTimeout(u.ctx, UPDATE_TIMEOUT) defer cancel() // get the provider by token type - provider, err := u.providers.GetProvider(u.ctx, req.Type) + provider, err := u.providers.GetProvider(ctx, req.Type) if err != nil { return err } // if the token is a external token, return an error - if provider.IsExternal() { - return fmt.Errorf("external providers are not supported yet") - } - // load filter of the token from the database - filter, err := LoadFilter(u.filtersPath, req.Address, req.ChainID) - if err != nil { - return err - } - // commit the filter when the function finishes - defer func() { - if err := filter.Commit(); err != nil { - log.Error(err) - return + if !provider.IsExternal() { + // load filter of the token from the database + filter, err := LoadFilter(u.filtersPath, req.Address, req.ChainID, req.ExternalID) + if err != nil { + return err + } + // commit the filter when the function finishes + defer func() { + if err := filter.Commit(); err != nil { + log.Error(err) + return + } + }() + // set the reference of the token to update in the provider + if err := provider.SetRef(web3provider.Web3ProviderRef{ + HexAddress: req.Address.Hex(), + ChainID: req.ChainID, + CreationBlock: req.CreationBlock, + Filter: filter, + }); err != nil { + return err } - }() - // set the reference of the token to update in the provider - if err := provider.SetRef(web3provider.Web3ProviderRef{ - HexAddress: req.Address.Hex(), - ChainID: req.ChainID, - CreationBlock: req.CreationBlock, - Filter: filter, - }); err != nil { - return err } // update the last block number of the provider to the last block of // the request @@ -214,37 +241,57 @@ func (u *Updater) process() error { currentHolders[common.Address(holder.HolderID)] = bBalance } // set the current holders in the provider - if err := provider.SetLastBalances(ctx, nil, currentHolders, req.lastBlock); err != nil { + if err := provider.SetLastBalances(ctx, nil, currentHolders, req.LastBlock); err != nil { return err } + // update with expected results in the queue once the function finishes + defer func() { + log.Infow("updating request in the queue", "lastBlock", req.LastBlock, "done", req.Done) + u.queueMtx.Lock() + u.queue[id] = req + u.queueMtx.Unlock() + }() // get range balances from the provider, it will check itereate again // over transfers logs, checking if there are new transfers using the // bloom filter associated to the token - balances, nTx, lastBlock, synced, totalSupply, err := provider.HoldersBalances(ctx, nil, req.lastBlock) + balances, delta, err := provider.HoldersBalances(ctx, nil, req.LastBlock) + // update the token last block in the request before checking the error + if delta != nil { + req.TotalLogs += delta.LogsCount + req.TotalNewLogs += delta.NewLogsCount + req.TotalAlreadyProcessedLogs += delta.AlreadyProcessedLogsCount + req.LastTotalSupply = delta.TotalSupply + + req.Done = delta.Synced + if delta.Synced { + req.LastBlock = req.EndBlock + } else if delta.Block >= req.LastBlock { + req.LastBlock = delta.Block + } + } if err != nil { return err } - log.Infow("new logs received", "address", req.Address.Hex(), "from", req.lastBlock, "lastBlock", lastBlock, "newLogs", nTx) - // update the token last - req.lastBlock = lastBlock - req.Done = synced + log.Infow("new logs received", + "address", req.Address.Hex(), + "from", req.LastBlock, + "lastBlock", delta.Block, + "newLogs", delta.NewLogsCount, + "alreadyProcessedLogs", delta.AlreadyProcessedLogsCount, + "totalLogs", delta.LogsCount) // save the new balances in the database created, updated, err := SaveHolders(u.db, ctx, ScannerToken{ Address: req.Address, ChainID: req.ChainID, - }, balances, nTx, lastBlock, synced, totalSupply) + }, balances, delta.NewLogsCount, delta.Block, delta.Synced, delta.TotalSupply) if err != nil { return err } - log.Debugw("missing token holders balances updated", + log.Debugw("token holders balances updated", "token", req.Address.Hex(), "chainID", req.ChainID, "created", created, "updated", updated) - // update the request in the queue - u.queueMtx.Lock() - u.queue[id] = req - u.queueMtx.Unlock() } return nil } From df310ca1d6ad33020ea058aac3e7546288a50bac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lucas=20Men=C3=A9ndez?= Date: Mon, 17 Jun 2024 18:21:26 +0200 Subject: [PATCH 11/21] debug txs and filters --- api/tokens.go | 9 +- go.mod | 4 +- go.sum | 8 +- scanner/filter.go | 83 ----------- scanner/filter/filter.go | 164 ++++++++++++++++++++++ scanner/providers/web3/erc20_provider.go | 69 +++++---- scanner/providers/web3/erc721_provider.go | 4 +- scanner/providers/web3/erc777_provider.go | 4 +- scanner/providers/web3/web3_provider.go | 4 +- scanner/scanner.go | 158 ++++++++++----------- scanner/updater.go | 37 +++-- scripts/block_logs/main.go | 89 ++++++++++++ 12 files changed, 417 insertions(+), 216 deletions(-) delete mode 100644 scanner/filter.go create mode 100644 scanner/filter/filter.go create mode 100644 scripts/block_logs/main.go diff --git a/api/tokens.go b/api/tokens.go index 46308ff4..1c117042 100644 --- a/api/tokens.go +++ b/api/tokens.go @@ -23,6 +23,7 @@ import ( "go.vocdoni.io/dvote/httprouter" api "go.vocdoni.io/dvote/httprouter/apirest" "go.vocdoni.io/dvote/log" + "go.vocdoni.io/dvote/util" ) func (capi *census3API) initTokenHandlers() error { @@ -310,7 +311,7 @@ func (capi *census3API) createToken(msg *api.APIdata, ctx *httprouter.HTTPContex Symbol: symbol, Decimals: decimals, TotalSupply: annotations.BigInt(sTotalSupply), - CreationBlock: 0, + CreationBlock: int64(req.StartBlock), TypeID: tokenType, Synced: false, Tags: req.Tags, @@ -648,14 +649,14 @@ func (capi *census3API) rescanToken(msg *api.APIdata, ctx *httprouter.HTTPContex return ErrNoSyncedToken } // enqueue the rescan token process - id, err := capi.tokenUpdater.SetRequest(&scanner.UpdateRequest{ + id := util.RandomHex(4) + if err := capi.tokenUpdater.SetRequest(id, &scanner.UpdateRequest{ Address: address, ChainID: uint64(chainID), Type: tokenData.TypeID, CreationBlock: uint64(tokenData.CreationBlock), EndBlock: uint64(tokenData.LastBlock), - }) - if err != nil { + }); err != nil { return ErrMalformedToken.WithErr(err) } // encoding the result and response it diff --git a/go.mod b/go.mod index 0265776e..1e8ba726 100644 --- a/go.mod +++ b/go.mod @@ -12,10 +12,10 @@ require ( github.com/hashicorp/golang-lru/v2 v2.0.7 github.com/ipfs/go-cid v0.4.1 github.com/mattn/go-sqlite3 v1.14.20 + github.com/panmari/cuckoofilter v1.0.6 github.com/pressly/goose/v3 v3.10.0 github.com/spf13/pflag v1.0.5 github.com/spf13/viper v1.17.0 - github.com/tylertreat/BoomFilters v0.0.0-20210315201527-1a82519a3e43 go.vocdoni.io/dvote v1.10.2-0.20240312161355-15c06c28e91c go.vocdoni.io/proto v1.15.6-0.20240209115732-27836380ccae ) @@ -54,13 +54,13 @@ require ( github.com/crackcomm/go-gitignore v0.0.0-20170627025303-887ab5e44cc3 // indirect github.com/crate-crypto/go-kzg-4844 v0.7.0 // indirect github.com/cskr/pubsub v1.0.2 // indirect - github.com/d4l3k/messagediff v1.2.1 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/davidlazar/go-crypto v0.0.0-20200604182044-b73af7476f6c // indirect github.com/deckarep/golang-set/v2 v2.1.0 // indirect github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0 // indirect github.com/dgraph-io/badger v1.6.2 // indirect github.com/dgraph-io/ristretto v0.1.1 // indirect + github.com/dgryski/go-metro v0.0.0-20200812162917-85c65e2d0165 // indirect github.com/docker/go-units v0.5.0 // indirect github.com/dustin/go-humanize v1.0.1 // indirect github.com/elastic/gosigar v0.14.2 // indirect diff --git a/go.sum b/go.sum index cb3e9b45..bbb2af0d 100644 --- a/go.sum +++ b/go.sum @@ -186,8 +186,6 @@ github.com/crate-crypto/go-kzg-4844 v0.7.0/go.mod h1:1kMhvPgI0Ky3yIa+9lFySEBUBXk github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/cskr/pubsub v1.0.2 h1:vlOzMhl6PFn60gRlTQQsIfVwaPB/B/8MziK8FhEPt/0= github.com/cskr/pubsub v1.0.2/go.mod h1:/8MzYXk/NJAz782G8RPkFzXTZVu63VotefPnR9TIRis= -github.com/d4l3k/messagediff v1.2.1 h1:ZcAIMYsUg0EAp9X+tt8/enBE/Q8Yd5kzPynLyKptt9U= -github.com/d4l3k/messagediff v1.2.1/go.mod h1:Oozbb1TVXFac9FtSIxHBMnBCq2qeH/2KkEQxENCrlLo= github.com/davecgh/go-spew v0.0.0-20171005155431-ecdeabc65495/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -211,6 +209,8 @@ github.com/dgraph-io/ristretto v0.1.1/go.mod h1:S1GPSBCYCIhmVNfcth17y2zZtQT6wzkz github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= github.com/dgryski/go-farm v0.0.0-20200201041132-a6ae2369ad13 h1:fAjc9m62+UWV/WAFKLNi6ZS0675eEUC9y3AlwSbQu1Y= github.com/dgryski/go-farm v0.0.0-20200201041132-a6ae2369ad13/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= +github.com/dgryski/go-metro v0.0.0-20200812162917-85c65e2d0165 h1:BS21ZUJ/B5X2UVUbczfmdWH7GapPWAhxcMsDnjJTU1E= +github.com/dgryski/go-metro v0.0.0-20200812162917-85c65e2d0165/go.mod h1:c9O8+fpSOX1DM8cPNSkX/qsBWdkD4yd2dpciOWQjpBw= github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= @@ -898,6 +898,8 @@ github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYr github.com/openzipkin/zipkin-go v0.1.1/go.mod h1:NtoC/o8u3JlF1lSlyPNswIbeQH9bJTmOf0Erfk+hxe8= github.com/openzipkin/zipkin-go v0.4.1 h1:kNd/ST2yLLWhaWrkgchya40TJabe8Hioj9udfPcEO5A= github.com/openzipkin/zipkin-go v0.4.1/go.mod h1:qY0VqDSN1pOBN94dBc6w2GJlWLiovAyg7Qt6/I9HecM= +github.com/panmari/cuckoofilter v1.0.6 h1:WKb1aSj16h22x0CKVtTCaRkJiCnVGPLEMGbNY8xwXf8= +github.com/panmari/cuckoofilter v1.0.6/go.mod h1:bKADbQPGbN6TxUvo/IbMEIUbKuASnpsOvrLTgpSX0aU= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= @@ -1143,8 +1145,6 @@ github.com/tv42/httpunix v0.0.0-20191220191345-2ba4b9c3382c h1:u6SKchux2yDvFQnDH github.com/tv42/httpunix v0.0.0-20191220191345-2ba4b9c3382c/go.mod h1:hzIxponao9Kjc7aWznkXaL4U4TWaDSs8zcsY4Ka08nM= github.com/tyler-smith/go-bip39 v1.1.0 h1:5eUemwrMargf3BSLRRCalXT93Ns6pQJIjYQN2nyfOP8= github.com/tyler-smith/go-bip39 v1.1.0/go.mod h1:gUYDtqQw1JS3ZJ8UWVcGTGqqr6YIN3CWg+kkNaLt55U= -github.com/tylertreat/BoomFilters v0.0.0-20210315201527-1a82519a3e43 h1:QEePdg0ty2r0t1+qwfZmQ4OOl/MB2UXIeJSpIZv56lg= -github.com/tylertreat/BoomFilters v0.0.0-20210315201527-1a82519a3e43/go.mod h1:OYRfF6eb5wY9VRFkXJH8FFBi3plw2v+giaIu7P054pM= github.com/ucarion/urlpath v0.0.0-20200424170820-7ccc79b76bbb h1:Ywfo8sUltxogBpFuMOFRrrSifO788kAFxmvVw31PtQQ= github.com/ucarion/urlpath v0.0.0-20200424170820-7ccc79b76bbb/go.mod h1:ikPs9bRWicNw3S7XpJ8sK/smGwU9WcSVU3dy9qahYBM= github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= diff --git a/scanner/filter.go b/scanner/filter.go deleted file mode 100644 index 71477b15..00000000 --- a/scanner/filter.go +++ /dev/null @@ -1,83 +0,0 @@ -package scanner - -// The filter package provides a wrapper of boom.ScalableBloomFilter to store -// the filter to a file and load it from it. The filter is used to store the -// processed transactions to avoid re-processing them, but also rescanning a -// synced token to find missing transactions. - -import ( - "fmt" - "os" - - "github.com/ethereum/go-ethereum/common" - boom "github.com/tylertreat/BoomFilters" -) - -// TokenFilter is a wrapper of boom.ScalableBloomFilter to store the filter to -// a file and load it from it. The file that stores the filter is named as -//
--.filter, where address is the token contract -// address and chainID is the chain ID of the network where the token is -// deployed. -type TokenFilter struct { - filter *boom.ScalableBloomFilter - address common.Address - chainID uint64 - path string -} - -// LoadFilter loads the filter from the file, if the file does not exist, create -// a new filter and return it. The filter is stored in the file named as -//
--.filter in the basePath directory. -func LoadFilter(basePath string, address common.Address, chainID uint64, externalID string) (*TokenFilter, error) { - // compose the filter path: path/
--.filter - // by default, create a empty filter - tf := &TokenFilter{ - filter: boom.NewDefaultScalableBloomFilter(0.01), - address: address, - chainID: chainID, - path: fmt.Sprintf("%s/%s-%d.filter", basePath, address.Hex(), chainID), - } - // read the filter from the file, if it not exists, create a new one - bFilter, err := os.ReadFile(tf.path) - if err != nil { - if !os.IsNotExist(err) { - return nil, err - } - return tf, nil - } - // decode the filter from the file content - if err := tf.filter.GobDecode(bFilter); err != nil { - return nil, err - } - return tf, nil -} - -// Add adds a key to the filter. -func (tf *TokenFilter) Add(key []byte) boom.Filter { - return tf.filter.Add(key) -} - -// Test checks if a key is in the filter. -func (tf *TokenFilter) Test(key []byte) bool { - return tf.filter.Test(key) -} - -// TestAndAdd checks if a key is in the filter, if not, add it to the filter. It -// is the combination of Test and conditional Add. -func (tf *TokenFilter) TestAndAdd(key []byte) bool { - return tf.filter.TestAndAdd(key) -} - -// Commit writes the filter to its file. -func (tf *TokenFilter) Commit() error { - // encode the filter - bFilter, err := tf.filter.GobEncode() - if err != nil { - return err - } - // write the filter to the file - if err := os.WriteFile(tf.path, bFilter, os.ModePerm); err != nil { - return err - } - return nil -} diff --git a/scanner/filter/filter.go b/scanner/filter/filter.go new file mode 100644 index 00000000..b255d026 --- /dev/null +++ b/scanner/filter/filter.go @@ -0,0 +1,164 @@ +package filter + +// The filter package provides a wrapper of boom.ScalableBloomFilter to store +// the filter to a file and load it from it. The filter is used to store the +// processed transactions to avoid re-processing them, but also rescanning a +// synced token to find missing transactions. + +import ( + "encoding/json" + "fmt" + "os" + + "github.com/ethereum/go-ethereum/common" + cuckoo "github.com/panmari/cuckoofilter" + "go.vocdoni.io/dvote/log" +) + +const ( + FilterSize = 5000000 // 5M items + MaxSize = 4000000 // 4M items +) + +type filterDump struct { + Filters [][]byte `json:"filters"` +} + +type batchFilter struct { + filter *cuckoo.Filter + size uint + full bool +} + +// TokenFilter is a wrapper of boom.ScalableBloomFilter to store the filter to +// a file and load it from it. The file that stores the filter is named as +//
--.filter, where address is the token contract +// address and chainID is the chain ID of the network where the token is +// deployed. +type TokenFilter struct { + path string + filters []*batchFilter + address common.Address + chainID uint64 + externalID string +} + +// LoadFilter loads the filter from the file, if the file does not exist, create +// a new filter and return it. The filter is stored in the file named as +//
--.filter in the basePath directory. +func LoadFilter(basePath string, address common.Address, chainID uint64, externalID string) (*TokenFilter, error) { + tf := &TokenFilter{ + path: fmt.Sprintf("%s/%s-%d-%s.filter", basePath, address.Hex(), chainID, externalID), + filters: []*batchFilter{}, + address: address, + chainID: chainID, + externalID: externalID, + } + // load filters from the local file + empty, err := tf.loadLocalFilters() + if err != nil { + return nil, err + } + // append a new filter if there is no filter in the local file + if empty { + tf.addFilter() + } + return tf, nil +} + +// Add adds a key to the filter. +func (tf *TokenFilter) Add(key []byte) { + for _, f := range tf.filters { + if f.full { + continue + } + if f.filter.Insert(key) { + f.size++ + f.full = f.size >= MaxSize + return + } + } + // add a new filter if all filters are full + tf.addFilter(key) +} + +// Test checks if a key is in the filter. +func (tf *TokenFilter) Test(key []byte) bool { + for _, f := range tf.filters { + if f.filter.Lookup(key) { + return true + } + } + return false +} + +// TestAndAdd checks if a key is in the filter, if not, add it to the filter. It +// is the combination of Test and conditional Add. +func (tf *TokenFilter) TestAndAdd(key []byte) bool { + if tf.Test(key) { + return true + } + tf.Add(key) + return false +} + +// Commit writes the filter to its file. +func (tf *TokenFilter) Commit() error { + filterDump := filterDump{ + Filters: make([][]byte, 0, len(tf.filters)), + } + for _, f := range tf.filters { + filterDump.Filters = append(filterDump.Filters, f.filter.Encode()) + } + filterBytes, err := json.Marshal(filterDump) + if err != nil { + return err + } + return os.WriteFile(tf.path, filterBytes, 0644) +} + +func (tf *TokenFilter) loadLocalFilters() (bool, error) { + filterBytes, err := os.ReadFile(tf.path) + if err != nil { + if os.IsNotExist(err) { + return false, nil + } + return false, err + } + var filterDump filterDump + if err := json.Unmarshal(filterBytes, &filterDump); err != nil { + return false, err + } + if len(filterDump.Filters) == 0 { + return false, nil + } + for _, bFilter := range filterDump.Filters { + filter, err := cuckoo.Decode(bFilter) + if err != nil { + return false, err + } + tf.filters = append(tf.filters, &batchFilter{ + filter: filter, + size: filter.Count(), + full: filter.Count() >= MaxSize, + }) + } + return true, nil +} + +func (tf *TokenFilter) addFilter(keys ...[]byte) *batchFilter { + log.Info("adding new filter") + f := &batchFilter{ + filter: cuckoo.NewFilter(FilterSize), + size: 0, + full: false, + } + for _, key := range keys { + if f.filter.Insert(key) { + f.size++ + } + } + + tf.filters = append(tf.filters, f) + return f +} diff --git a/scanner/providers/web3/erc20_provider.go b/scanner/providers/web3/erc20_provider.go index 844c5f97..c468636e 100644 --- a/scanner/providers/web3/erc20_provider.go +++ b/scanner/providers/web3/erc20_provider.go @@ -11,13 +11,15 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/types" - boom "github.com/tylertreat/BoomFilters" erc20 "github.com/vocdoni/census3/contracts/erc/erc20" "github.com/vocdoni/census3/helpers/web3" + "github.com/vocdoni/census3/scanner/filter" "github.com/vocdoni/census3/scanner/providers" "go.vocdoni.io/dvote/log" ) +var processedLogs = make(map[string]bool) + type ERC20HolderProvider struct { endpoints *web3.Web3Pool client *web3.Client @@ -32,7 +34,7 @@ type ERC20HolderProvider struct { creationBlock uint64 lastNetworkBlock uint64 synced atomic.Bool - filter boom.Filter + filter *filter.TokenFilter } func (p *ERC20HolderProvider) Init(_ context.Context, iconf any) error { @@ -159,10 +161,20 @@ func (p *ERC20HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fro newTransfers := uint64(0) alreadyProcessedLogs := uint64(0) balances := make(map[common.Address]*big.Int) + // debug + targetTx := common.HexToHash("0x68fbbe59012bf2a60e94c1cbd11bbafdb50ce149aeb881e63dcb2da1f102186b") // iterate the logs and update the balances for _, currentLog := range logs { - // check if the log has been already processed - processed, err := p.isLogAlreadyProcessed(currentLog) + // debug + if currentLog.TxHash.Hex() == targetTx.Hex() { + log.Warnw("target", "log", currentLog) + } + // skip the log if it has been removed + if currentLog.Removed { + continue + } + // parse log data + logData, err := p.contract.ERC20ContractFilterer.ParseTransfer(currentLog) if err != nil { return nil, &providers.BlocksDelta{ Block: lastBlock, @@ -171,14 +183,11 @@ func (p *ERC20HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fro AlreadyProcessedLogsCount: alreadyProcessedLogs, Synced: false, TotalSupply: big.NewInt(0), - }, errors.Join(ErrCheckingProcessedLogs, fmt.Errorf("[ERC20] %s: %w", p.address, err)) - } - if processed { - alreadyProcessedLogs++ - continue + }, errors.Join(ErrParsingTokenLogs, fmt.Errorf("[ERC20] %s: %w", p.address, err)) } - newTransfers++ - logData, err := p.contract.ERC20ContractFilterer.ParseTransfer(currentLog) + // check if the log has been already processed and add it to the filter + // if it is not already included + processed, err := p.isLogAlreadyProcessed(currentLog) if err != nil { return nil, &providers.BlocksDelta{ Block: lastBlock, @@ -187,7 +196,19 @@ func (p *ERC20HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fro AlreadyProcessedLogsCount: alreadyProcessedLogs, Synced: false, TotalSupply: big.NewInt(0), - }, errors.Join(ErrParsingTokenLogs, fmt.Errorf("[ERC20] %s: %w", p.address, err)) + }, errors.Join(ErrCheckingProcessedLogs, fmt.Errorf("[ERC20] %s: %w", p.address, err)) + } + // debug + if currentLog.TxHash.Hex() == targetTx.Hex() { + log.Warnw("target", "log", currentLog, "data", logData, "processed", processed) + } + // if it is the first scan, it will not check if the log has been + // already processed + if processed { + alreadyProcessedLogs++ + // continue + } else { + newTransfers++ } // update balances if toBalance, ok := balances[logData.To]; ok { @@ -200,14 +221,6 @@ func (p *ERC20HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fro } else { balances[logData.From] = new(big.Int).Neg(logData.Value) } - target := common.HexToAddress("0x05887A1CB6230E40a39c020E9f7fB09d3fC9D8da") - if logData.To.Hex() == target.Hex() || logData.From.Hex() == target.Hex() { - log.Infow("target addrsss transfer", - "from", logData.From.Hex(), - "to", logData.To.Hex(), - "value", logData.Value.String()) - } - } log.Infow("saving blocks", "count", len(balances), @@ -391,18 +404,26 @@ func (p *ERC20HolderProvider) CensusKeys(data map[common.Address]*big.Int) (map[ // number and log index. It returns true if the log has been already processed // or false if it has not been processed yet. If some error occurs, it returns // false and the error. -func (p *ERC20HolderProvider) isLogAlreadyProcessed(log types.Log) (bool, error) { +func (p *ERC20HolderProvider) isLogAlreadyProcessed(l types.Log) (bool, error) { + // if the filter is not defined, return false if p.filter == nil { return false, nil } // get a identifier of each transfer: - // blockNumber-logIndex - transferID := fmt.Sprintf("%x-%d-%d", log.Data, log.BlockNumber, log.Index) + // sha256(blockNumber-txHash-log.Index) + transferID := fmt.Sprintf("%d-%x-%d", l.BlockNumber, l.TxHash, l.Index) hashFn := sha256.New() if _, err := hashFn.Write([]byte(transferID)); err != nil { return false, err } hID := hashFn.Sum(nil) - return p.filter.TestAndAdd(hID), nil + processed := p.filter.TestAndAdd(hID) + + // local filter for debug + if processed && !processedLogs[transferID] { + log.Infow("false positive", "log", l) + } + processedLogs[transferID] = processed + return processed, nil } diff --git a/scanner/providers/web3/erc721_provider.go b/scanner/providers/web3/erc721_provider.go index fc9315cc..005dc4aa 100644 --- a/scanner/providers/web3/erc721_provider.go +++ b/scanner/providers/web3/erc721_provider.go @@ -11,9 +11,9 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/types" - boom "github.com/tylertreat/BoomFilters" erc721 "github.com/vocdoni/census3/contracts/erc/erc721" "github.com/vocdoni/census3/helpers/web3" + "github.com/vocdoni/census3/scanner/filter" "github.com/vocdoni/census3/scanner/providers" "go.vocdoni.io/dvote/log" ) @@ -32,7 +32,7 @@ type ERC721HolderProvider struct { creationBlock uint64 lastNetworkBlock uint64 synced atomic.Bool - filter boom.Filter + filter *filter.TokenFilter } func (p *ERC721HolderProvider) Init(_ context.Context, iconf any) error { diff --git a/scanner/providers/web3/erc777_provider.go b/scanner/providers/web3/erc777_provider.go index 6b9c1384..2abb8117 100644 --- a/scanner/providers/web3/erc777_provider.go +++ b/scanner/providers/web3/erc777_provider.go @@ -11,9 +11,9 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/types" - boom "github.com/tylertreat/BoomFilters" erc777 "github.com/vocdoni/census3/contracts/erc/erc777" "github.com/vocdoni/census3/helpers/web3" + "github.com/vocdoni/census3/scanner/filter" "github.com/vocdoni/census3/scanner/providers" "go.vocdoni.io/dvote/log" ) @@ -32,7 +32,7 @@ type ERC777HolderProvider struct { creationBlock uint64 lastNetworkBlock uint64 synced atomic.Bool - filter boom.Filter + filter *filter.TokenFilter } func (p *ERC777HolderProvider) Init(_ context.Context, iconf any) error { diff --git a/scanner/providers/web3/web3_provider.go b/scanner/providers/web3/web3_provider.go index b8503b5d..d518bf99 100644 --- a/scanner/providers/web3/web3_provider.go +++ b/scanner/providers/web3/web3_provider.go @@ -11,8 +11,8 @@ import ( "github.com/ethereum/go-ethereum" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/types" - boom "github.com/tylertreat/BoomFilters" "github.com/vocdoni/census3/helpers/web3" + "github.com/vocdoni/census3/scanner/filter" "github.com/vocdoni/census3/scanner/providers" "go.vocdoni.io/dvote/db" "go.vocdoni.io/dvote/log" @@ -22,7 +22,7 @@ type Web3ProviderRef struct { HexAddress string ChainID uint64 CreationBlock uint64 - Filter boom.Filter + Filter *filter.TokenFilter } type Web3ProviderConfig struct { diff --git a/scanner/scanner.go b/scanner/scanner.go index 6d02c4e0..8026cd2d 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -111,6 +111,13 @@ func (s *Scanner) Start(ctx context.Context, concurrentTokens int) { var atSyncGlobal atomic.Bool atSyncGlobal.Store(true) for _, token := range tokens { + if !token.Ready { + if err := s.prepareToken(token); err != nil { + log.Warnw("error preparing token", "error", err) + continue + } + } + log.Infow("checking token in the updater queue", "address", token.Address.Hex(), "chainID", token.ChainID, @@ -149,7 +156,7 @@ func (s *Scanner) Start(ctx context.Context, concurrentTokens int) { // block if iLastNetworkBlock, ok := s.latestBlockNumbers.Load(token.ChainID); ok { if lastNetworkBlock, ok := iLastNetworkBlock.(uint64); ok { - if _, err := s.updater.SetRequest(&UpdateRequest{ + if _, err := s.updater.AddRequest(&UpdateRequest{ Address: token.Address, ChainID: token.ChainID, Type: token.Type, @@ -157,6 +164,7 @@ func (s *Scanner) Start(ctx context.Context, concurrentTokens int) { CreationBlock: token.CreationBlock, EndBlock: lastNetworkBlock, LastBlock: token.LastBlock, + Initial: token.LastBlock == 0 || token.LastBlock == token.CreationBlock, }); err != nil { log.Warnw("error enqueuing token", "error", err) continue @@ -213,7 +221,7 @@ func (s *Scanner) TokensToScan(ctx context.Context) ([]*ScannerToken, error) { if !ok { totalSupply = nil } - st := &ScannerToken{ + tokens = append(tokens, &ScannerToken{ Address: common.BytesToAddress(token.ID), ChainID: token.ChainID, Type: token.TypeID, @@ -223,12 +231,7 @@ func (s *Scanner) TokensToScan(ctx context.Context) ([]*ScannerToken, error) { Ready: token.CreationBlock > 0 && token.LastBlock >= token.CreationBlock, Synced: token.Synced, totalSupply: totalSupply, - } - if err := s.prepareToken(st); err != nil { - log.Warnw("error preparing token", "error", err) - continue - } - tokens = append(tokens, st) + }) } // get old not synced tokens from the database (2) oldNotSyncedTokens, err := s.db.QueriesRO.ListOldNoSyncedTokens(internalCtx) @@ -269,7 +272,7 @@ func (s *Scanner) TokensToScan(ctx context.Context) ([]*ScannerToken, error) { if !ok { totalSupply = nil } - st := &ScannerToken{ + tokens = append(tokens, &ScannerToken{ Address: common.BytesToAddress(token.ID), ChainID: token.ChainID, Type: token.TypeID, @@ -279,12 +282,7 @@ func (s *Scanner) TokensToScan(ctx context.Context) ([]*ScannerToken, error) { Ready: token.CreationBlock > 0 && token.LastBlock >= token.CreationBlock, Synced: token.Synced, totalSupply: totalSupply, - } - if err := s.prepareToken(st); err != nil { - log.Warnw("error preparing token", "error", err) - continue - } - tokens = append(tokens, st) + }) } } // get synced tokens from the database to scan them last (3) @@ -297,7 +295,7 @@ func (s *Scanner) TokensToScan(ctx context.Context) ([]*ScannerToken, error) { if !ok { totalSupply = nil } - st := &ScannerToken{ + tokens = append(tokens, &ScannerToken{ Address: common.BytesToAddress(token.ID), ChainID: token.ChainID, Type: token.TypeID, @@ -307,12 +305,7 @@ func (s *Scanner) TokensToScan(ctx context.Context) ([]*ScannerToken, error) { Ready: token.CreationBlock > 0 && token.LastBlock >= token.CreationBlock, Synced: token.Synced, totalSupply: totalSupply, - } - if err := s.prepareToken(st); err != nil { - log.Warnw("error preparing token", "error", err) - continue - } - tokens = append(tokens, st) + }) } // update the tokens to scan in the scanner and return them s.tokensMtx.Lock() @@ -321,66 +314,6 @@ func (s *Scanner) TokensToScan(ctx context.Context) ([]*ScannerToken, error) { return tokens, nil } -func (s *Scanner) updateInternalTokenStatus(token ScannerToken, lastBlock uint64, - synced bool, totalSupply *big.Int, -) { - s.tokensMtx.Lock() - for i, t := range s.tokens { - if t.Address == token.Address && t.ChainID == token.ChainID && t.ExternalID == token.ExternalID { - s.tokens[i].LastBlock = lastBlock - s.tokens[i].Synced = synced - if totalSupply != nil && totalSupply.Cmp(big.NewInt(0)) > 0 { - s.tokens[i].totalSupply = totalSupply - token.totalSupply = totalSupply - } - break - } - } - s.tokensMtx.Unlock() -} - -func (s *Scanner) prepareToken(token *ScannerToken) error { - ctx, cancel := context.WithTimeout(s.ctx, UPDATE_TIMEOUT) - defer cancel() - // get the provider by token type - provider, err := s.providerManager.GetProvider(ctx, token.Type) - if err != nil { - return err - } - // if the token is not ready yet (its creation block has not been - // calculated yet), calculate it, update the token information and - // return - if !provider.IsExternal() && !token.Ready { - if err := provider.SetRef(web3provider.Web3ProviderRef{ - HexAddress: token.Address.Hex(), - ChainID: token.ChainID, - CreationBlock: token.CreationBlock, - }); err != nil { - return err - } - log.Debugw("token not ready yet, calculating creation block and continue", - "address", token.Address.Hex(), - "chainID", token.ChainID, - "externalID", token.ExternalID) - creationBlock, err := provider.CreationBlock(ctx, []byte(token.ExternalID)) - if err != nil { - return err - } - _, err = s.db.QueriesRW.UpdateTokenBlocks(ctx, queries.UpdateTokenBlocksParams{ - ID: token.Address.Bytes(), - ChainID: token.ChainID, - ExternalID: token.ExternalID, - CreationBlock: int64(creationBlock), - LastBlock: int64(creationBlock), - }) - if err != nil { - return err - } - token.LastBlock = creationBlock - } - return nil -} - // SaveHolders saves the given holders in the database. It calls the SaveHolders // helper function to save the holders and the token status in the database. It // prints the number of created and updated token holders if there are any, else @@ -454,3 +387,64 @@ func (s *Scanner) getLatestBlockNumbersUpdates() { } } } + +func (s *Scanner) updateInternalTokenStatus(token ScannerToken, lastBlock uint64, + synced bool, totalSupply *big.Int, +) { + s.tokensMtx.Lock() + for i, t := range s.tokens { + if t.Address == token.Address && t.ChainID == token.ChainID && t.ExternalID == token.ExternalID { + s.tokens[i].LastBlock = lastBlock + s.tokens[i].Synced = synced + if totalSupply != nil && totalSupply.Cmp(big.NewInt(0)) > 0 { + s.tokens[i].totalSupply = totalSupply + token.totalSupply = totalSupply + } + break + } + } + s.tokensMtx.Unlock() +} + +func (s *Scanner) prepareToken(token *ScannerToken) error { + ctx, cancel := context.WithTimeout(s.ctx, UPDATE_TIMEOUT) + defer cancel() + // get the provider by token type + provider, err := s.providerManager.GetProvider(ctx, token.Type) + if err != nil { + return err + } + // if the token is not ready yet (its creation block has not been + // calculated yet), calculate it, update the token information and + // return + if !provider.IsExternal() && !token.Ready { + if err := provider.SetRef(web3provider.Web3ProviderRef{ + HexAddress: token.Address.Hex(), + ChainID: token.ChainID, + CreationBlock: token.CreationBlock, + }); err != nil { + return err + } + log.Debugw("token not ready yet, calculating creation block and continue", + "address", token.Address.Hex(), + "chainID", token.ChainID, + "externalID", token.ExternalID) + creationBlock, err := provider.CreationBlock(ctx, []byte(token.ExternalID)) + if err != nil { + return err + } + _, err = s.db.QueriesRW.UpdateTokenBlocks(ctx, queries.UpdateTokenBlocksParams{ + ID: token.Address.Bytes(), + ChainID: token.ChainID, + ExternalID: token.ExternalID, + CreationBlock: int64(creationBlock), + LastBlock: int64(creationBlock), + }) + if err != nil { + return err + } + token.CreationBlock = creationBlock + token.Ready = true + } + return nil +} diff --git a/scanner/updater.go b/scanner/updater.go index 97c50301..22339b75 100644 --- a/scanner/updater.go +++ b/scanner/updater.go @@ -13,6 +13,7 @@ import ( "github.com/vocdoni/census3/db" queries "github.com/vocdoni/census3/db/sqlc" "github.com/vocdoni/census3/helpers/web3" + "github.com/vocdoni/census3/scanner/filter" "github.com/vocdoni/census3/scanner/providers/manager" web3provider "github.com/vocdoni/census3/scanner/providers/web3" "go.vocdoni.io/dvote/log" @@ -29,6 +30,7 @@ type UpdateRequest struct { EndBlock uint64 LastBlock uint64 Done bool + Initial bool TotalLogs uint64 TotalNewLogs uint64 @@ -118,30 +120,43 @@ func (u *Updater) RequestStatus(id string, deleteOnDone bool) *UpdateRequest { // SetRequest adds a new request to the queue. It will return an error if the // request is missing required fields or the block range is invalid. The request -// will be added to the queue with a random ID, that will be returned to allow -// the client to query the status of the request. -func (u *Updater) SetRequest(req *UpdateRequest) (string, error) { +// will be added to the queue with a given ID. +func (u *Updater) SetRequest(id string, req *UpdateRequest) error { // check required fields + if id == "" { + return fmt.Errorf("missing request ID") + } if req.ChainID == 0 || req.Type == 0 || req.CreationBlock == 0 || req.EndBlock == 0 { - return "", fmt.Errorf("missing required fields") + return fmt.Errorf("missing required fields") } // ensure the block range is valid if req.CreationBlock >= req.EndBlock { - return "", fmt.Errorf("invalid block range") + return fmt.Errorf("invalid block range") } // set the last block to the creation block to start the process from there // if it is not set by the client if req.LastBlock == 0 { req.LastBlock = req.CreationBlock } - // generate a random ID for the request and insert it in the queue - id, err := RequestID(req.Address, req.ChainID, req.ExternalID) - if err != nil { - return "", fmt.Errorf("error generating request ID") - } u.queueMtx.Lock() defer u.queueMtx.Unlock() u.queue[id] = req + return nil +} + +// AddRequest adds a new request to the queue. It will return an error if the +// request is missing required fields or the block range is invalid. The request +// will be added to the queue with a ID generated from the address, chainID and +// externalID, that will be returned to allow the client to query the status of +// the request. +func (u *Updater) AddRequest(req *UpdateRequest) (string, error) { + id, err := RequestID(req.Address, req.ChainID, req.ExternalID) + if err != nil { + return "", err + } + if err := u.SetRequest(id, req); err != nil { + return "", err + } return id, nil } @@ -200,7 +215,7 @@ func (u *Updater) process() error { // if the token is a external token, return an error if !provider.IsExternal() { // load filter of the token from the database - filter, err := LoadFilter(u.filtersPath, req.Address, req.ChainID, req.ExternalID) + filter, err := filter.LoadFilter(u.filtersPath, req.Address, req.ChainID, req.ExternalID) if err != nil { return err } diff --git a/scripts/block_logs/main.go b/scripts/block_logs/main.go new file mode 100644 index 00000000..7f155eaf --- /dev/null +++ b/scripts/block_logs/main.go @@ -0,0 +1,89 @@ +package main + +import ( + "context" + "flag" + "fmt" + "log" + "math/big" + "strings" + + "github.com/ethereum/go-ethereum" + "github.com/ethereum/go-ethereum/accounts/abi" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/ethclient" +) + +var ( + rpcURL string + contractAddress string + blockNumber int64 +) + +func init() { + flag.StringVar(&rpcURL, "rpc", "https://mainnet.infura.io/v3/YOUR_INFURA_PROJECT_ID", "Ethereum RPC URL") + flag.StringVar(&contractAddress, "contract", "", "ERC20 contract address") + flag.Int64Var(&blockNumber, "block", 0, "Block number") +} + +var transferEventSignature = []byte("Transfer(address,address,uint256)") +var transferEventSigHash = common.BytesToHash(crypto.Keccak256(transferEventSignature)) + +func main() { + flag.Parse() + + if rpcURL == "" || contractAddress == "" || blockNumber == 0 { + log.Fatalf("All flags (rpc, contract, block) are required") + } + fmt.Println("event signature hash:", transferEventSigHash.Hex()) + + client, err := ethclient.Dial(rpcURL) + if err != nil { + log.Fatalf("Failed to connect to the Ethereum client: %v", err) + } + + blockNum := big.NewInt(blockNumber) + contractAddr := common.HexToAddress(contractAddress) + + query := ethereum.FilterQuery{ + FromBlock: blockNum, + ToBlock: blockNum, + Addresses: []common.Address{contractAddr}, + Topics: [][]common.Hash{{transferEventSigHash}}, + } + + logs, err := client.FilterLogs(context.Background(), query) + if err != nil { + log.Fatalf("Failed to retrieve logs: %v", err) + } + + transferEventABI := `[{"anonymous":false,"inputs":[{"indexed":true,"name":"from","type":"address"},{"indexed":true,"name":"to","type":"address"},{"indexed":false,"name":"value","type":"uint256"}],"name":"Transfer","type":"event"}]` + + contractABI, err := abi.JSON(strings.NewReader(transferEventABI)) + if err != nil { + log.Fatalf("Failed to parse contract ABI: %v", err) + } + + for _, vLog := range logs { + event := struct { + From common.Address + To common.Address + Value *big.Int + }{} + + err := contractABI.UnpackIntoInterface(&event, "Transfer", vLog.Data) + if err != nil { + log.Fatalf("Failed to unpack log data: %v", err) + } + + event.From = common.HexToAddress(vLog.Topics[1].Hex()) + event.To = common.HexToAddress(vLog.Topics[2].Hex()) + + fmt.Printf("Log Block Number: %d\n", vLog.BlockNumber) + fmt.Printf("Log Index: %d\n", vLog.Index) + fmt.Printf("From: %s\n", event.From.Hex()) + fmt.Printf("To: %s\n", event.To.Hex()) + fmt.Printf("Value: %s\n", event.Value.String()) + } +} From ab5dc1ac2a2c1edf38d4754ba8c4f6479467e062 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lucas=20Men=C3=A9ndez?= Date: Wed, 19 Jun 2024 00:11:16 +0200 Subject: [PATCH 12/21] concurrent updater --- cmd/census3/main.go | 4 +- scanner/providers/web3/erc20_provider.go | 27 +- scanner/providers/web3/erc721_provider.go | 34 ++- scanner/providers/web3/erc777_provider.go | 34 ++- scanner/scanner.go | 9 +- scanner/updater.go | 295 +++++++++++----------- 6 files changed, 201 insertions(+), 202 deletions(-) diff --git a/cmd/census3/main.go b/cmd/census3/main.go index 66d08fef..76ad5a17 100644 --- a/cmd/census3/main.go +++ b/cmd/census3/main.go @@ -238,8 +238,8 @@ func main() { log.Info("initial tokens created, or at least tried to") }() // start the holder scanner - go hc.Start(ctx, config.scannerConcurrentTokens) - go updater.Start(ctx) + go hc.Start(ctx) + go updater.Start(ctx, config.scannerConcurrentTokens) metrics.NewCounter(fmt.Sprintf("census3_info{version=%q,chains=%q}", internal.Version, w3p.String())).Set(1) diff --git a/scanner/providers/web3/erc20_provider.go b/scanner/providers/web3/erc20_provider.go index c468636e..e1b7c6cb 100644 --- a/scanner/providers/web3/erc20_provider.go +++ b/scanner/providers/web3/erc20_provider.go @@ -18,8 +18,6 @@ import ( "go.vocdoni.io/dvote/log" ) -var processedLogs = make(map[string]bool) - type ERC20HolderProvider struct { endpoints *web3.Web3Pool client *web3.Client @@ -161,14 +159,8 @@ func (p *ERC20HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fro newTransfers := uint64(0) alreadyProcessedLogs := uint64(0) balances := make(map[common.Address]*big.Int) - // debug - targetTx := common.HexToHash("0x68fbbe59012bf2a60e94c1cbd11bbafdb50ce149aeb881e63dcb2da1f102186b") // iterate the logs and update the balances for _, currentLog := range logs { - // debug - if currentLog.TxHash.Hex() == targetTx.Hex() { - log.Warnw("target", "log", currentLog) - } // skip the log if it has been removed if currentLog.Removed { continue @@ -198,18 +190,13 @@ func (p *ERC20HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fro TotalSupply: big.NewInt(0), }, errors.Join(ErrCheckingProcessedLogs, fmt.Errorf("[ERC20] %s: %w", p.address, err)) } - // debug - if currentLog.TxHash.Hex() == targetTx.Hex() { - log.Warnw("target", "log", currentLog, "data", logData, "processed", processed) - } // if it is the first scan, it will not check if the log has been // already processed if processed { alreadyProcessedLogs++ - // continue - } else { - newTransfers++ + continue } + newTransfers++ // update balances if toBalance, ok := balances[logData.To]; ok { balances[logData.To] = new(big.Int).Add(toBalance, logData.Value) @@ -405,7 +392,6 @@ func (p *ERC20HolderProvider) CensusKeys(data map[common.Address]*big.Int) (map[ // or false if it has not been processed yet. If some error occurs, it returns // false and the error. func (p *ERC20HolderProvider) isLogAlreadyProcessed(l types.Log) (bool, error) { - // if the filter is not defined, return false if p.filter == nil { return false, nil @@ -418,12 +404,5 @@ func (p *ERC20HolderProvider) isLogAlreadyProcessed(l types.Log) (bool, error) { return false, err } hID := hashFn.Sum(nil) - processed := p.filter.TestAndAdd(hID) - - // local filter for debug - if processed && !processedLogs[transferID] { - log.Infow("false positive", "log", l) - } - processedLogs[transferID] = processed - return processed, nil + return p.filter.TestAndAdd(hID), nil } diff --git a/scanner/providers/web3/erc721_provider.go b/scanner/providers/web3/erc721_provider.go index 005dc4aa..df2c4d89 100644 --- a/scanner/providers/web3/erc721_provider.go +++ b/scanner/providers/web3/erc721_provider.go @@ -160,8 +160,12 @@ func (p *ERC721HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fr balances := make(map[common.Address]*big.Int) // iterate the logs and update the balances for _, currentLog := range logs { - // check if the log has been already processed - processed, err := p.isLogAlreadyProcessed(currentLog) + // skip the log if it has been removed + if currentLog.Removed { + continue + } + // parse log data + logData, err := p.contract.ERC721ContractFilterer.ParseTransfer(currentLog) if err != nil { return nil, &providers.BlocksDelta{ Block: lastBlock, @@ -170,14 +174,11 @@ func (p *ERC721HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fr AlreadyProcessedLogsCount: alreadyProcessedLogs, Synced: false, TotalSupply: big.NewInt(0), - }, errors.Join(ErrCheckingProcessedLogs, fmt.Errorf("[ERC20] %s: %w", p.address, err)) - } - if processed { - alreadyProcessedLogs++ - continue + }, errors.Join(ErrParsingTokenLogs, fmt.Errorf("[ERC721] %s: %w", p.address, err)) } - newTransfers++ - logData, err := p.contract.ERC721ContractFilterer.ParseTransfer(currentLog) + // check if the log has been already processed and add it to the filter + // if it is not already included + processed, err := p.isLogAlreadyProcessed(currentLog) if err != nil { return nil, &providers.BlocksDelta{ Block: lastBlock, @@ -186,8 +187,15 @@ func (p *ERC721HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fr AlreadyProcessedLogsCount: alreadyProcessedLogs, Synced: false, TotalSupply: big.NewInt(0), - }, errors.Join(ErrParsingTokenLogs, fmt.Errorf("[ERC20] %s: %w", p.address, err)) + }, errors.Join(ErrCheckingProcessedLogs, fmt.Errorf("[ERC721] %s: %w", p.address, err)) + } + // if it is the first scan, it will not check if the log has been + // already processed + if processed { + alreadyProcessedLogs++ + continue } + newTransfers++ // update balances if toBalance, ok := balances[logData.To]; ok { balances[logData.To] = new(big.Int).Add(toBalance, big.NewInt(1)) @@ -380,14 +388,14 @@ func (p *ERC721HolderProvider) CensusKeys(data map[common.Address]*big.Int) (map // number and log index. It returns true if the log has been already processed // or false if it has not been processed yet. If some error occurs, it returns // false and the error. -func (p *ERC721HolderProvider) isLogAlreadyProcessed(log types.Log) (bool, error) { +func (p *ERC721HolderProvider) isLogAlreadyProcessed(l types.Log) (bool, error) { // if the filter is not defined, return false if p.filter == nil { return false, nil } // get a identifier of each transfer: - // blockNumber-logIndex - transferID := fmt.Sprintf("%x-%d-%d", log.Data, log.BlockNumber, log.Index) + // sha256(blockNumber-txHash-log.Index) + transferID := fmt.Sprintf("%d-%x-%d", l.BlockNumber, l.TxHash, l.Index) hashFn := sha256.New() if _, err := hashFn.Write([]byte(transferID)); err != nil { return false, err diff --git a/scanner/providers/web3/erc777_provider.go b/scanner/providers/web3/erc777_provider.go index 2abb8117..abd41bd0 100644 --- a/scanner/providers/web3/erc777_provider.go +++ b/scanner/providers/web3/erc777_provider.go @@ -160,8 +160,12 @@ func (p *ERC777HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fr balances := make(map[common.Address]*big.Int) // iterate the logs and update the balances for _, currentLog := range logs { - // check if the log has been already processed - processed, err := p.isLogAlreadyProcessed(currentLog) + // skip the log if it has been removed + if currentLog.Removed { + continue + } + // parse log data + logData, err := p.contract.ERC777ContractFilterer.ParseTransfer(currentLog) if err != nil { return nil, &providers.BlocksDelta{ Block: lastBlock, @@ -170,14 +174,11 @@ func (p *ERC777HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fr AlreadyProcessedLogsCount: alreadyProcessedLogs, Synced: false, TotalSupply: big.NewInt(0), - }, errors.Join(ErrCheckingProcessedLogs, fmt.Errorf("[ERC20] %s: %w", p.address, err)) - } - if processed { - alreadyProcessedLogs++ - continue + }, errors.Join(ErrParsingTokenLogs, fmt.Errorf("[ERC777] %s: %w", p.address, err)) } - newTransfers++ - logData, err := p.contract.ERC777ContractFilterer.ParseTransfer(currentLog) + // check if the log has been already processed and add it to the filter + // if it is not already included + processed, err := p.isLogAlreadyProcessed(currentLog) if err != nil { return nil, &providers.BlocksDelta{ Block: lastBlock, @@ -186,8 +187,15 @@ func (p *ERC777HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fr AlreadyProcessedLogsCount: alreadyProcessedLogs, Synced: false, TotalSupply: big.NewInt(0), - }, errors.Join(ErrParsingTokenLogs, fmt.Errorf("[ERC20] %s: %w", p.address, err)) + }, errors.Join(ErrCheckingProcessedLogs, fmt.Errorf("[ERC777] %s: %w", p.address, err)) + } + // if it is the first scan, it will not check if the log has been + // already processed + if processed { + alreadyProcessedLogs++ + continue } + newTransfers++ // update balances if toBalance, ok := balances[logData.To]; ok { balances[logData.To] = new(big.Int).Add(toBalance, big.NewInt(1)) @@ -380,14 +388,14 @@ func (p *ERC777HolderProvider) CensusKeys(data map[common.Address]*big.Int) (map // number and log index. It returns true if the log has been already processed // or false if it has not been processed yet. If some error occurs, it returns // false and the error. -func (p *ERC777HolderProvider) isLogAlreadyProcessed(log types.Log) (bool, error) { +func (p *ERC777HolderProvider) isLogAlreadyProcessed(l types.Log) (bool, error) { // if the filter is not defined, return false if p.filter == nil { return false, nil } // get a identifier of each transfer: - // blockNumber-logIndex - transferID := fmt.Sprintf("%x-%d-%d", log.Data, log.BlockNumber, log.Index) + // sha256(blockNumber-txHash-log.Index) + transferID := fmt.Sprintf("%d-%x-%d", l.BlockNumber, l.TxHash, l.Index) hashFn := sha256.New() if _, err := hashFn.Write([]byte(transferID)); err != nil { return false, err diff --git a/scanner/scanner.go b/scanner/scanner.go index 8026cd2d..d4377047 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -77,10 +77,7 @@ func NewScanner(db *db.DB, updater *Updater, networks *web3.Web3Pool, pm *manage // Start starts the scanner. It starts a loop that scans the tokens in the // database and saves the holders in the database. It stops when the context is // cancelled. -func (s *Scanner) Start(ctx context.Context, concurrentTokens int) { - if concurrentTokens < 1 { - concurrentTokens = 1 - } +func (s *Scanner) Start(ctx context.Context) { s.ctx, s.cancel = context.WithCancel(ctx) itCounter := 0 // keep the latest block numbers updated @@ -104,9 +101,6 @@ func (s *Scanner) Start(ctx context.Context, concurrentTokens int) { log.Error(err) continue } - // calculate number of batches - sem := make(chan struct{}, concurrentTokens) - defer close(sem) // iterate over the tokens to scan var atSyncGlobal atomic.Bool atSyncGlobal.Store(true) @@ -164,7 +158,6 @@ func (s *Scanner) Start(ctx context.Context, concurrentTokens int) { CreationBlock: token.CreationBlock, EndBlock: lastNetworkBlock, LastBlock: token.LastBlock, - Initial: token.LastBlock == 0 || token.LastBlock == token.CreationBlock, }); err != nil { log.Warnw("error enqueuing token", "error", err) continue diff --git a/scanner/updater.go b/scanner/updater.go index 22339b75..f3662a7f 100644 --- a/scanner/updater.go +++ b/scanner/updater.go @@ -13,7 +13,6 @@ import ( "github.com/vocdoni/census3/db" queries "github.com/vocdoni/census3/db/sqlc" "github.com/vocdoni/census3/helpers/web3" - "github.com/vocdoni/census3/scanner/filter" "github.com/vocdoni/census3/scanner/providers/manager" web3provider "github.com/vocdoni/census3/scanner/providers/web3" "go.vocdoni.io/dvote/log" @@ -30,7 +29,6 @@ type UpdateRequest struct { EndBlock uint64 LastBlock uint64 Done bool - Initial bool TotalLogs uint64 TotalNewLogs uint64 @@ -55,6 +53,7 @@ type Updater struct { providers *manager.ProviderManager queue map[string]*UpdateRequest queueMtx sync.Mutex + processing sync.Map waiter sync.WaitGroup filtersPath string } @@ -73,26 +72,40 @@ func NewUpdater(db *db.DB, networks *web3.Web3Pool, pm *manager.ProviderManager, } // Start starts the updater process in a goroutine. -func (u *Updater) Start(ctx context.Context) { +func (u *Updater) Start(ctx context.Context, concurrentTokens int) { u.ctx, u.cancel = context.WithCancel(ctx) - u.waiter.Add(1) - go func() { - defer u.waiter.Done() - for { - select { - case <-u.ctx.Done(): - return - default: - if u.IsEmpty() { - time.Sleep(coolDown) - continue - } - if err := u.process(); err != nil { - log.Errorf("Error processing update request: %v", err) - } + for { + select { + case <-u.ctx.Done(): + return + default: + pending := u.pendingRequests() + if len(pending) == 0 { + time.Sleep(coolDown) + continue + } + sem := make(chan struct{}, concurrentTokens) + defer close(sem) + for id, req := range u.pendingRequests() { + u.processing.Store(id, true) + sem <- struct{}{} + go func(id string, req *UpdateRequest) { + defer func() { + <-sem + u.processing.Store(id, false) + }() + if err := u.process(req); err != nil && err != context.Canceled { + log.Errorf("Error processing update request: %v", err) + return + } + // update the request in the queue + u.queueMtx.Lock() + u.queue[id] = req + u.queueMtx.Unlock() + }(id, req) } } - }() + } } // Stop stops the updater process. @@ -113,6 +126,7 @@ func (u *Updater) RequestStatus(id string, deleteOnDone bool) *UpdateRequest { } res := *req if deleteOnDone && req.Done { + u.processing.Delete(id) delete(u.queue, id) } return &res @@ -141,6 +155,7 @@ func (u *Updater) SetRequest(id string, req *UpdateRequest) error { u.queueMtx.Lock() defer u.queueMtx.Unlock() u.queue[id] = req + u.processing.Store(id, false) return nil } @@ -154,19 +169,15 @@ func (u *Updater) AddRequest(req *UpdateRequest) (string, error) { if err != nil { return "", err } + if _, ok := u.processing.Load(id); ok { + return "", nil + } if err := u.SetRequest(id, req); err != nil { return "", err } return id, nil } -// IsEmpty returns true if the queue is empty. -func (u *Updater) IsEmpty() bool { - u.queueMtx.Lock() - defer u.queueMtx.Unlock() - return len(u.queue) == 0 -} - // RequestID returns the ID of a request given the address, chainID and external // ID. The raw ID is a string with the format "chainID:address:externalID". The // resulting ID is the first 4 bytes of the hash of the raw ID using the sha256 @@ -181,132 +192,132 @@ func RequestID(address common.Address, chainID uint64, externalID string) (strin return hex.EncodeToString(bHash[:4]), nil } +func (u *Updater) pendingRequests() map[string]*UpdateRequest { + u.queueMtx.Lock() + defer u.queueMtx.Unlock() + queue := map[string]*UpdateRequest{} + for k, v := range u.queue { + if processing, ok := u.processing.Load(k); v.Done || !ok || processing.(bool) { + continue + } + queue[k] = v + } + return queue +} + // process iterates over the current queue items, getting the token holders // balances and saving them in the database until the last block is greater or // equal to the end block. It updates th status of the request in the queue. It // will return an error if the provider is not found, the token is external or // there is an error getting the token holders balances. -func (u *Updater) process() error { - // make a copy of current queue - u.queueMtx.Lock() - queue := map[string]*UpdateRequest{} - for k, v := range u.queue { - queue[k] = v +func (u *Updater) process(req *UpdateRequest) error { + // log the start of the process + log.Infow("rescanning token", + "address", req.Address.Hex(), + "from", req.CreationBlock, + "to", req.EndBlock, + "current", req.LastBlock) + ctx, cancel := context.WithTimeout(u.ctx, UPDATE_TIMEOUT) + defer cancel() + // get the provider by token type + provider, err := u.providers.GetProvider(ctx, req.Type) + if err != nil { + return fmt.Errorf("error getting provider for token: %v", err) } - u.queueMtx.Unlock() - // iterate over the current queue items - for id, req := range queue { - // check if the request is done - if req.Done { - continue - } - log.Infow("rescanning token", - "address", req.Address.Hex(), - "from", req.CreationBlock, - "to", req.EndBlock, - "current", req.LastBlock) - ctx, cancel := context.WithTimeout(u.ctx, UPDATE_TIMEOUT) - defer cancel() - // get the provider by token type - provider, err := u.providers.GetProvider(ctx, req.Type) - if err != nil { - return err - } - // if the token is a external token, return an error - if !provider.IsExternal() { - // load filter of the token from the database - filter, err := filter.LoadFilter(u.filtersPath, req.Address, req.ChainID, req.ExternalID) - if err != nil { - return err - } - // commit the filter when the function finishes - defer func() { - if err := filter.Commit(); err != nil { - log.Error(err) - return - } - }() - // set the reference of the token to update in the provider - if err := provider.SetRef(web3provider.Web3ProviderRef{ - HexAddress: req.Address.Hex(), - ChainID: req.ChainID, - CreationBlock: req.CreationBlock, - Filter: filter, - }); err != nil { - return err - } - } - // update the last block number of the provider to the last block of - // the request - provider.SetLastBlockNumber(req.EndBlock) - // get current token holders from database - results, err := u.db.QueriesRO.ListTokenHolders(ctx, queries.ListTokenHoldersParams{ - TokenID: req.Address.Bytes(), - ChainID: req.ChainID, - }) - if err != nil { - return nil + // if the token is a external token, return an error + if !provider.IsExternal() { + // // load filter of the token from the database + // filter, err := filter.LoadFilter(u.filtersPath, req.Address, req.ChainID, req.ExternalID) + // if err != nil { + // return err + // } + // // commit the filter when the function finishes + // defer func() { + // if err := filter.Commit(); err != nil { + // log.Error(err) + // return + // } + // }() + // set the reference of the token to update in the provider + if err := provider.SetRef(web3provider.Web3ProviderRef{ + HexAddress: req.Address.Hex(), + ChainID: req.ChainID, + CreationBlock: req.CreationBlock, + // Filter: filter, + }); err != nil { + return fmt.Errorf("error setting provider reference: %v", err) } - currentHolders := map[common.Address]*big.Int{} - for _, holder := range results { - bBalance, ok := new(big.Int).SetString(holder.Balance, 10) - if !ok { - return fmt.Errorf("error parsing holder balance from database") - } - currentHolders[common.Address(holder.HolderID)] = bBalance - } - // set the current holders in the provider - if err := provider.SetLastBalances(ctx, nil, currentHolders, req.LastBlock); err != nil { - return err + } + // update the last block number of the provider to the last block of + // the request + provider.SetLastBlockNumber(req.EndBlock) + // get current token holders from database + results, err := u.db.QueriesRO.ListTokenHolders(ctx, queries.ListTokenHoldersParams{ + TokenID: req.Address.Bytes(), + ChainID: req.ChainID, + }) + if err != nil { + return fmt.Errorf("error getting token holders from database: %v", err) + } + currentHolders := map[common.Address]*big.Int{} + for _, holder := range results { + bBalance, ok := new(big.Int).SetString(holder.Balance, 10) + if !ok { + log.Warnw("error parsing balance from database", + "balance", holder.Balance, + "holder", holder.HolderID, + "address", req.Address.Hex(), + "chainID", req.ChainID, + "externalID", req.ExternalID) + continue } - // update with expected results in the queue once the function finishes - defer func() { - log.Infow("updating request in the queue", "lastBlock", req.LastBlock, "done", req.Done) - u.queueMtx.Lock() - u.queue[id] = req - u.queueMtx.Unlock() - }() - // get range balances from the provider, it will check itereate again - // over transfers logs, checking if there are new transfers using the - // bloom filter associated to the token - balances, delta, err := provider.HoldersBalances(ctx, nil, req.LastBlock) - // update the token last block in the request before checking the error - if delta != nil { - req.TotalLogs += delta.LogsCount - req.TotalNewLogs += delta.NewLogsCount - req.TotalAlreadyProcessedLogs += delta.AlreadyProcessedLogsCount - req.LastTotalSupply = delta.TotalSupply + currentHolders[common.Address(holder.HolderID)] = bBalance + } + // set the current holders in the provider + if err := provider.SetLastBalances(ctx, nil, currentHolders, req.LastBlock); err != nil { + return fmt.Errorf("error setting last balances in provider: %v", err) + } + // get range balances from the provider, it will check itereate again + // over transfers logs, checking if there are new transfers using the + // bloom filter associated to the token + balances, delta, err := provider.HoldersBalances(ctx, nil, req.LastBlock) + // update the token last block in the request before checking the error + if delta != nil { + req.TotalLogs += delta.LogsCount + req.TotalNewLogs += delta.NewLogsCount + req.TotalAlreadyProcessedLogs += delta.AlreadyProcessedLogsCount + req.LastTotalSupply = delta.TotalSupply - req.Done = delta.Synced - if delta.Synced { - req.LastBlock = req.EndBlock - } else if delta.Block >= req.LastBlock { - req.LastBlock = delta.Block - } - } - if err != nil { - return err + req.Done = delta.Synced + if delta.Synced { + req.LastBlock = req.EndBlock + } else if delta.Block >= req.LastBlock { + req.LastBlock = delta.Block } - log.Infow("new logs received", - "address", req.Address.Hex(), - "from", req.LastBlock, - "lastBlock", delta.Block, - "newLogs", delta.NewLogsCount, - "alreadyProcessedLogs", delta.AlreadyProcessedLogsCount, - "totalLogs", delta.LogsCount) - // save the new balances in the database - created, updated, err := SaveHolders(u.db, ctx, ScannerToken{ - Address: req.Address, - ChainID: req.ChainID, - }, balances, delta.NewLogsCount, delta.Block, delta.Synced, delta.TotalSupply) - if err != nil { - return err - } - log.Debugw("token holders balances updated", - "token", req.Address.Hex(), - "chainID", req.ChainID, - "created", created, - "updated", updated) } + if err != nil { + return fmt.Errorf("error getting token holders balances: %v", err) + } + log.Debugw("new logs received", + "address", req.Address.Hex(), + "from", req.LastBlock, + "lastBlock", delta.Block, + "newLogs", delta.NewLogsCount, + "alreadyProcessedLogs", delta.AlreadyProcessedLogsCount, + "totalLogs", delta.LogsCount) + // save the new balances in the database + created, updated, err := SaveHolders(u.db, ctx, ScannerToken{ + Address: req.Address, + ChainID: req.ChainID, + }, balances, delta.NewLogsCount, delta.Block, delta.Synced, delta.TotalSupply) + if err != nil { + return fmt.Errorf("error saving token holders balances: %v", err) + } + log.Debugw("token holders balances updated", + "token", req.Address.Hex(), + "chainID", req.ChainID, + "created", created, + "updated", updated) + log.Infow("updating request in the queue", "lastBlock", req.LastBlock, "done", req.Done) return nil } From 08f70b2473afe8be0f833c78fd9090cea568e014 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lucas=20Men=C3=A9ndez?= Date: Wed, 19 Jun 2024 13:02:40 +0200 Subject: [PATCH 13/21] mock filter --- scanner/filter/filter.go | 164 ++--------------------- scanner/providers/web3/erc20_provider.go | 2 +- scanner/updater.go | 34 +++-- 3 files changed, 35 insertions(+), 165 deletions(-) diff --git a/scanner/filter/filter.go b/scanner/filter/filter.go index b255d026..81e3d777 100644 --- a/scanner/filter/filter.go +++ b/scanner/filter/filter.go @@ -1,164 +1,28 @@ package filter -// The filter package provides a wrapper of boom.ScalableBloomFilter to store -// the filter to a file and load it from it. The filter is used to store the -// processed transactions to avoid re-processing them, but also rescanning a -// synced token to find missing transactions. +// The filter package provides a wrapper of mechanism to store a list of +// identifiers to avoid re-processing them. It splits the list of identifiers +// into buckets, each bucket has a fixed size, and stores the buckets to files. +// This allows to reduce the write operations to the file system. -import ( - "encoding/json" - "fmt" - "os" +type TokenFilter struct{} - "github.com/ethereum/go-ethereum/common" - cuckoo "github.com/panmari/cuckoofilter" - "go.vocdoni.io/dvote/log" -) - -const ( - FilterSize = 5000000 // 5M items - MaxSize = 4000000 // 4M items -) - -type filterDump struct { - Filters [][]byte `json:"filters"` -} - -type batchFilter struct { - filter *cuckoo.Filter - size uint - full bool -} - -// TokenFilter is a wrapper of boom.ScalableBloomFilter to store the filter to -// a file and load it from it. The file that stores the filter is named as -//
--.filter, where address is the token contract -// address and chainID is the chain ID of the network where the token is -// deployed. -type TokenFilter struct { - path string - filters []*batchFilter - address common.Address - chainID uint64 - externalID string -} - -// LoadFilter loads the filter from the file, if the file does not exist, create -// a new filter and return it. The filter is stored in the file named as -//
--.filter in the basePath directory. -func LoadFilter(basePath string, address common.Address, chainID uint64, externalID string) (*TokenFilter, error) { - tf := &TokenFilter{ - path: fmt.Sprintf("%s/%s-%d-%s.filter", basePath, address.Hex(), chainID, externalID), - filters: []*batchFilter{}, - address: address, - chainID: chainID, - externalID: externalID, - } - // load filters from the local file - empty, err := tf.loadLocalFilters() - if err != nil { - return nil, err - } - // append a new filter if there is no filter in the local file - if empty { - tf.addFilter() - } - return tf, nil -} +// LoadFilter loads a filter from a file. +func LoadFilter(basePath, fileName string) (*TokenFilter, error) { return &TokenFilter{}, nil } // Add adds a key to the filter. -func (tf *TokenFilter) Add(key []byte) { - for _, f := range tf.filters { - if f.full { - continue - } - if f.filter.Insert(key) { - f.size++ - f.full = f.size >= MaxSize - return - } - } - // add a new filter if all filters are full - tf.addFilter(key) -} +func (tf *TokenFilter) Add(key []byte) {} // Test checks if a key is in the filter. -func (tf *TokenFilter) Test(key []byte) bool { - for _, f := range tf.filters { - if f.filter.Lookup(key) { - return true - } - } - return false -} +func (tf *TokenFilter) Test(key []byte) bool { return false } // TestAndAdd checks if a key is in the filter, if not, add it to the filter. It // is the combination of Test and conditional Add. -func (tf *TokenFilter) TestAndAdd(key []byte) bool { - if tf.Test(key) { - return true - } - tf.Add(key) - return false -} +func (tf *TokenFilter) TestAndAdd(key []byte) bool { return false } // Commit writes the filter to its file. -func (tf *TokenFilter) Commit() error { - filterDump := filterDump{ - Filters: make([][]byte, 0, len(tf.filters)), - } - for _, f := range tf.filters { - filterDump.Filters = append(filterDump.Filters, f.filter.Encode()) - } - filterBytes, err := json.Marshal(filterDump) - if err != nil { - return err - } - return os.WriteFile(tf.path, filterBytes, 0644) -} - -func (tf *TokenFilter) loadLocalFilters() (bool, error) { - filterBytes, err := os.ReadFile(tf.path) - if err != nil { - if os.IsNotExist(err) { - return false, nil - } - return false, err - } - var filterDump filterDump - if err := json.Unmarshal(filterBytes, &filterDump); err != nil { - return false, err - } - if len(filterDump.Filters) == 0 { - return false, nil - } - for _, bFilter := range filterDump.Filters { - filter, err := cuckoo.Decode(bFilter) - if err != nil { - return false, err - } - tf.filters = append(tf.filters, &batchFilter{ - filter: filter, - size: filter.Count(), - full: filter.Count() >= MaxSize, - }) - } - return true, nil -} - -func (tf *TokenFilter) addFilter(keys ...[]byte) *batchFilter { - log.Info("adding new filter") - f := &batchFilter{ - filter: cuckoo.NewFilter(FilterSize), - size: 0, - full: false, - } - for _, key := range keys { - if f.filter.Insert(key) { - f.size++ - } - } +func (tf *TokenFilter) Commit() error { return nil } - tf.filters = append(tf.filters, f) - return f -} +func (tf *TokenFilter) loadLocalFilters() error { return nil } +func (tf *TokenFilter) add(key ...string) {} +func (tf *TokenFilter) test(key string) bool { return false } diff --git a/scanner/providers/web3/erc20_provider.go b/scanner/providers/web3/erc20_provider.go index e1b7c6cb..db4ee7a2 100644 --- a/scanner/providers/web3/erc20_provider.go +++ b/scanner/providers/web3/erc20_provider.go @@ -403,6 +403,6 @@ func (p *ERC20HolderProvider) isLogAlreadyProcessed(l types.Log) (bool, error) { if _, err := hashFn.Write([]byte(transferID)); err != nil { return false, err } - hID := hashFn.Sum(nil) + hID := hashFn.Sum(nil)[:8] return p.filter.TestAndAdd(hID), nil } diff --git a/scanner/updater.go b/scanner/updater.go index f3662a7f..33390f23 100644 --- a/scanner/updater.go +++ b/scanner/updater.go @@ -13,6 +13,7 @@ import ( "github.com/vocdoni/census3/db" queries "github.com/vocdoni/census3/db/sqlc" "github.com/vocdoni/census3/helpers/web3" + "github.com/vocdoni/census3/scanner/filter" "github.com/vocdoni/census3/scanner/providers/manager" web3provider "github.com/vocdoni/census3/scanner/providers/web3" "go.vocdoni.io/dvote/log" @@ -24,6 +25,7 @@ type UpdateRequest struct { Address common.Address ChainID uint64 ExternalID string + ChainAddress string Type uint64 CreationBlock uint64 EndBlock uint64 @@ -94,7 +96,7 @@ func (u *Updater) Start(ctx context.Context, concurrentTokens int) { <-sem u.processing.Store(id, false) }() - if err := u.process(req); err != nil && err != context.Canceled { + if err := u.process(req); err != nil { log.Errorf("Error processing update request: %v", err) return } @@ -226,24 +228,28 @@ func (u *Updater) process(req *UpdateRequest) error { } // if the token is a external token, return an error if !provider.IsExternal() { - // // load filter of the token from the database - // filter, err := filter.LoadFilter(u.filtersPath, req.Address, req.ChainID, req.ExternalID) - // if err != nil { - // return err - // } - // // commit the filter when the function finishes - // defer func() { - // if err := filter.Commit(); err != nil { - // log.Error(err) - // return - // } - // }() + chainAddress, ok := u.networks.ChainAddress(req.ChainID, req.ChainAddress) + if !ok { + return fmt.Errorf("error getting chain address for token: %v", err) + } + // load filter of the token from the database + filter, err := filter.LoadFilter(u.filtersPath, chainAddress) + if err != nil { + return err + } + // commit the filter when the function finishes + defer func() { + if err := filter.Commit(); err != nil { + log.Error(err) + return + } + }() // set the reference of the token to update in the provider if err := provider.SetRef(web3provider.Web3ProviderRef{ HexAddress: req.Address.Hex(), ChainID: req.ChainID, CreationBlock: req.CreationBlock, - // Filter: filter, + Filter: filter, }); err != nil { return fmt.Errorf("error setting provider reference: %v", err) } From 08b1e7611c5dc5544e09070099fed2f86426d051 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lucas=20Men=C3=A9ndez?= Date: Wed, 19 Jun 2024 13:39:35 +0200 Subject: [PATCH 14/21] update dependencies --- go.mod | 6 ++---- go.sum | 8 ++------ 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/go.mod b/go.mod index 3c2ca056..84916d76 100644 --- a/go.mod +++ b/go.mod @@ -11,9 +11,8 @@ require ( github.com/google/uuid v1.6.0 github.com/hashicorp/golang-lru/v2 v2.0.7 github.com/ipfs/go-cid v0.4.1 - github.com/mattn/go-sqlite3 v1.14.20 - github.com/panmari/cuckoofilter v1.0.6 - github.com/pressly/goose/v3 v3.10.0 + github.com/mattn/go-sqlite3 v1.14.22 + github.com/pressly/goose/v3 v3.20.0 github.com/spf13/pflag v1.0.5 github.com/spf13/viper v1.18.2 go.vocdoni.io/dvote v1.10.2-0.20240617105531-77480ae05205 @@ -62,7 +61,6 @@ require ( github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0 // indirect github.com/dgraph-io/badger v1.6.2 // indirect github.com/dgraph-io/ristretto v0.1.1 // indirect - github.com/dgryski/go-metro v0.0.0-20200812162917-85c65e2d0165 // indirect github.com/docker/go-units v0.5.0 // indirect github.com/dustin/go-humanize v1.0.1 // indirect github.com/elastic/gosigar v0.14.2 // indirect diff --git a/go.sum b/go.sum index 73c1b9fb..2c4ff7d3 100644 --- a/go.sum +++ b/go.sum @@ -197,8 +197,6 @@ github.com/dgraph-io/ristretto v0.1.1/go.mod h1:S1GPSBCYCIhmVNfcth17y2zZtQT6wzkz github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= github.com/dgryski/go-farm v0.0.0-20200201041132-a6ae2369ad13 h1:fAjc9m62+UWV/WAFKLNi6ZS0675eEUC9y3AlwSbQu1Y= github.com/dgryski/go-farm v0.0.0-20200201041132-a6ae2369ad13/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= -github.com/dgryski/go-metro v0.0.0-20200812162917-85c65e2d0165 h1:BS21ZUJ/B5X2UVUbczfmdWH7GapPWAhxcMsDnjJTU1E= -github.com/dgryski/go-metro v0.0.0-20200812162917-85c65e2d0165/go.mod h1:c9O8+fpSOX1DM8cPNSkX/qsBWdkD4yd2dpciOWQjpBw= github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= @@ -816,10 +814,8 @@ github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFSt github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs= github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc= github.com/openzipkin/zipkin-go v0.1.1/go.mod h1:NtoC/o8u3JlF1lSlyPNswIbeQH9bJTmOf0Erfk+hxe8= -github.com/openzipkin/zipkin-go v0.4.1 h1:kNd/ST2yLLWhaWrkgchya40TJabe8Hioj9udfPcEO5A= -github.com/openzipkin/zipkin-go v0.4.1/go.mod h1:qY0VqDSN1pOBN94dBc6w2GJlWLiovAyg7Qt6/I9HecM= -github.com/panmari/cuckoofilter v1.0.6 h1:WKb1aSj16h22x0CKVtTCaRkJiCnVGPLEMGbNY8xwXf8= -github.com/panmari/cuckoofilter v1.0.6/go.mod h1:bKADbQPGbN6TxUvo/IbMEIUbKuASnpsOvrLTgpSX0aU= +github.com/openzipkin/zipkin-go v0.4.2 h1:zjqfqHjUpPmB3c1GlCvvgsM1G4LkvqQbBDueDOCg/jA= +github.com/openzipkin/zipkin-go v0.4.2/go.mod h1:ZeVkFjuuBiSy13y8vpSDCjMi9GoI3hPpCJSBx/EYFhY= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= From 276976b0136962df386ba7c6640ba1c8d1187961 Mon Sep 17 00:00:00 2001 From: Lucas Menendez Date: Wed, 19 Jun 2024 15:34:11 +0200 Subject: [PATCH 15/21] linter fixes --- .golangci.yml | 1 + scanner/filter/filter.go | 14 +++++++++----- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index 9a527b23..e082039c 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -5,6 +5,7 @@ issues: exclude-use-default: false exclude-files: - scanner/providers/gitcoin/gitcoin_stamps.go + - scripts/* linters: enable: - misspell diff --git a/scanner/filter/filter.go b/scanner/filter/filter.go index 81e3d777..46226217 100644 --- a/scanner/filter/filter.go +++ b/scanner/filter/filter.go @@ -8,13 +8,17 @@ package filter type TokenFilter struct{} // LoadFilter loads a filter from a file. -func LoadFilter(basePath, fileName string) (*TokenFilter, error) { return &TokenFilter{}, nil } +func LoadFilter(basePath, fileName string) (*TokenFilter, error) { + tf := &TokenFilter{} + _ = tf.loadLocalFilters() + return tf, nil +} // Add adds a key to the filter. -func (tf *TokenFilter) Add(key []byte) {} +func (tf *TokenFilter) Add(key []byte) { tf.add(key) } // Test checks if a key is in the filter. -func (tf *TokenFilter) Test(key []byte) bool { return false } +func (tf *TokenFilter) Test(key []byte) bool { return tf.test(key) } // TestAndAdd checks if a key is in the filter, if not, add it to the filter. It // is the combination of Test and conditional Add. @@ -24,5 +28,5 @@ func (tf *TokenFilter) TestAndAdd(key []byte) bool { return false } func (tf *TokenFilter) Commit() error { return nil } func (tf *TokenFilter) loadLocalFilters() error { return nil } -func (tf *TokenFilter) add(key ...string) {} -func (tf *TokenFilter) test(key string) bool { return false } +func (tf *TokenFilter) add(key ...[]byte) {} +func (tf *TokenFilter) test(key []byte) bool { return false } From 3979a7dc60ce8717d3096c6616a137e0d09af78a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lucas=20Men=C3=A9ndez?= Date: Fri, 21 Jun 2024 20:23:18 +0200 Subject: [PATCH 16/21] including arbo as key-value database to be used with updater filter to avoid reprocessing transactions --- cmd/census3/main.go | 11 +- db/treedb/treedb.go | 128 ++++++++++++++++++++++ scanner/filter/filter.go | 32 ------ scanner/providers/holders_provider.go | 15 +++ scanner/providers/web3/erc20_provider.go | 5 +- scanner/providers/web3/erc721_provider.go | 5 +- scanner/providers/web3/erc777_provider.go | 5 +- scanner/providers/web3/web3_provider.go | 3 +- scanner/scanner.go | 6 +- scanner/updater.go | 42 +++---- 10 files changed, 179 insertions(+), 73 deletions(-) create mode 100644 db/treedb/treedb.go delete mode 100644 scanner/filter/filter.go diff --git a/cmd/census3/main.go b/cmd/census3/main.go index 8791f340..2e951e07 100644 --- a/cmd/census3/main.go +++ b/cmd/census3/main.go @@ -24,6 +24,8 @@ import ( "github.com/vocdoni/census3/scanner/providers/manager" "github.com/vocdoni/census3/scanner/providers/poap" web3provider "github.com/vocdoni/census3/scanner/providers/web3" + dvotedb "go.vocdoni.io/dvote/db" + "go.vocdoni.io/dvote/db/metadb" "go.vocdoni.io/dvote/log" ) @@ -201,10 +203,15 @@ func main() { DB: farcasterDB, }) } + // init the filters database + filtersDB, err := metadb.New(dvotedb.TypePebble, config.filtersPath) + if err != nil { + log.Fatal(err) + } // start the token updater with the database and the provider manager - updater := scanner.NewUpdater(database, w3p, pm, config.filtersPath) + updater := scanner.NewUpdater(database, w3p, pm, filtersDB) // start the holder scanner with the database and the provider manager - hc := scanner.NewScanner(database, updater, w3p, pm, config.scannerCoolDown, config.filtersPath) + hc := scanner.NewScanner(database, updater, w3p, pm, config.scannerCoolDown) // if the admin token is not defined, generate a random one if config.adminToken != "" { if _, err := uuid.Parse(config.adminToken); err != nil { diff --git a/db/treedb/treedb.go b/db/treedb/treedb.go new file mode 100644 index 00000000..3f5446eb --- /dev/null +++ b/db/treedb/treedb.go @@ -0,0 +1,128 @@ +package treedb + +// The treedb package provides a wrapper of key-value database that uses a +// merkle tree under the hood. Every tree is stored in the same database, but +// with a different prefix. + +import ( + "fmt" + + "go.vocdoni.io/dvote/db" + "go.vocdoni.io/dvote/db/prefixeddb" + "go.vocdoni.io/dvote/log" + "go.vocdoni.io/dvote/tree" + "go.vocdoni.io/dvote/tree/arbo" +) + +// filterTreeLevels is the number of levels of the tree used to store the +// filter. It is a constant to avoid re-creating the tree with a different +// number of levels. The available number of leaves is 2^filterTreeLevels. +// It also limits the size of the key to filterTreeLevels/8 bytes. +const filterTreeLevels = 64 + +// ErrNotInitialized is returned when no tree is initialized in a TreeDB +// instance, which means that LoadTree has not been called and the tree is +// not ready to be used. +var ErrNotInitialized = fmt.Errorf("tree not initialized, call Load first") + +// TokenFilter is a filter associated with a token. +type TreeDB struct { + prefix string + parentDB db.Database + tree *tree.Tree +} + +// LoadTree loads a tree from the database identified by the given prefix. If it +// does not exist, it creates a new tree with the given prefix. It also creates +// the index if it does not exist. It returns an error if the tree cannot be +// loaded or created. +func LoadTree(db db.Database, prefix string) (*TreeDB, error) { + treeDB := prefixeddb.NewPrefixedDatabase(db, []byte(prefix)) + tree, err := tree.New(nil, tree.Options{ + DB: treeDB, + MaxLevels: filterTreeLevels, + HashFunc: arbo.HashFunctionBlake2b, + }) + if err != nil { + return nil, err + } + // ensure index is created + wTx := tree.DB().WriteTx() + defer wTx.Discard() + return &TreeDB{ + prefix: prefix, + parentDB: db, + tree: tree, + }, wTx.Commit() +} + +func (tdb *TreeDB) Close() error { + if tdb.tree != nil { + if err := tdb.tree.DB().Close(); err != nil { + return err + } + } + if tdb.parentDB != nil { + return tdb.parentDB.Close() + } + return nil +} + +// DeleteTree deletes a tree from the database identified by current prefix. +// It iterates over all the keys in the tree and deletes them. If some key +// cannot be deleted, it logs a warning and continues with the next key. It +// commits the transaction at the end. +func (tdb *TreeDB) Delete() error { + treeDB := prefixeddb.NewPrefixedDatabase(tdb.parentDB, []byte(tdb.prefix)) + wTx := treeDB.WriteTx() + if err := treeDB.Iterate(nil, func(k, _ []byte) bool { + if err := wTx.Delete(k); err != nil { + log.Warnw("error deleting key", "key", k, "err", err) + } + return true + }); err != nil { + return err + } + return wTx.Commit() +} + +// Add adds a key to the tree. +func (tdb *TreeDB) Add(key, value []byte) error { + if tdb.tree == nil { + return ErrNotInitialized + } + wTx := tdb.tree.DB().WriteTx() + defer wTx.Discard() + if err := tdb.tree.Add(wTx, key, value); err != nil { + return err + } + return wTx.Commit() +} + +// Test checks if a key is in the tree. +func (tdb *TreeDB) Test(key []byte) (bool, error) { + if tdb.tree == nil { + return false, ErrNotInitialized + } + _, err := tdb.tree.Get(nil, key) + if err != nil { + if err == arbo.ErrKeyNotFound { + return false, nil + } + return false, err + } + return true, nil +} + +// TestAndAdd checks if a key is in the tree, if not, add it to the tree. It +// is the combination of Test and conditional Add. +func (tdb *TreeDB) TestAndAdd(key, value []byte) (bool, error) { + exists, err := tdb.Test(key) + if err != nil { + return false, err + } + if exists { + return true, nil + } + return false, tdb.Add(key, value) +} diff --git a/scanner/filter/filter.go b/scanner/filter/filter.go deleted file mode 100644 index 46226217..00000000 --- a/scanner/filter/filter.go +++ /dev/null @@ -1,32 +0,0 @@ -package filter - -// The filter package provides a wrapper of mechanism to store a list of -// identifiers to avoid re-processing them. It splits the list of identifiers -// into buckets, each bucket has a fixed size, and stores the buckets to files. -// This allows to reduce the write operations to the file system. - -type TokenFilter struct{} - -// LoadFilter loads a filter from a file. -func LoadFilter(basePath, fileName string) (*TokenFilter, error) { - tf := &TokenFilter{} - _ = tf.loadLocalFilters() - return tf, nil -} - -// Add adds a key to the filter. -func (tf *TokenFilter) Add(key []byte) { tf.add(key) } - -// Test checks if a key is in the filter. -func (tf *TokenFilter) Test(key []byte) bool { return tf.test(key) } - -// TestAndAdd checks if a key is in the filter, if not, add it to the filter. It -// is the combination of Test and conditional Add. -func (tf *TokenFilter) TestAndAdd(key []byte) bool { return false } - -// Commit writes the filter to its file. -func (tf *TokenFilter) Commit() error { return nil } - -func (tf *TokenFilter) loadLocalFilters() error { return nil } -func (tf *TokenFilter) add(key ...[]byte) {} -func (tf *TokenFilter) test(key []byte) bool { return false } diff --git a/scanner/providers/holders_provider.go b/scanner/providers/holders_provider.go index 0b28a705..144d2948 100644 --- a/scanner/providers/holders_provider.go +++ b/scanner/providers/holders_provider.go @@ -7,6 +7,11 @@ import ( "github.com/ethereum/go-ethereum/common" ) +// BlocksDelta struct defines the delta of blocks processed by any +// HolderProvider. It includes the total number of logs processed, the new logs +// processed, the logs already processed, the last block processed, and if the +// provider is synced. It also includes the current total supply of the token +// set in the provider. type BlocksDelta struct { LogsCount uint64 NewLogsCount uint64 @@ -16,6 +21,16 @@ type BlocksDelta struct { TotalSupply *big.Int } +// Filter interface defines the basic methods to interact with a filter to +// store the processed transfers identifiers and avoid to process them again, +// for example, if a token is rescanned. It allows to implement different +// filters, such as in-memory, disk, merkle tree, etc. +type Filter interface { + Add(key, value []byte) error + Test(key []byte) (bool, error) + TestAndAdd(key, value []byte) (bool, error) +} + // HolderProvider is the interface that wraps the basic methods to interact with // a holders provider. It is used by the HoldersScanner to get the balances of // the token holders. It allows to implement different providers, such as diff --git a/scanner/providers/web3/erc20_provider.go b/scanner/providers/web3/erc20_provider.go index db4ee7a2..81edce35 100644 --- a/scanner/providers/web3/erc20_provider.go +++ b/scanner/providers/web3/erc20_provider.go @@ -13,7 +13,6 @@ import ( "github.com/ethereum/go-ethereum/core/types" erc20 "github.com/vocdoni/census3/contracts/erc/erc20" "github.com/vocdoni/census3/helpers/web3" - "github.com/vocdoni/census3/scanner/filter" "github.com/vocdoni/census3/scanner/providers" "go.vocdoni.io/dvote/log" ) @@ -32,7 +31,7 @@ type ERC20HolderProvider struct { creationBlock uint64 lastNetworkBlock uint64 synced atomic.Bool - filter *filter.TokenFilter + filter providers.Filter } func (p *ERC20HolderProvider) Init(_ context.Context, iconf any) error { @@ -404,5 +403,5 @@ func (p *ERC20HolderProvider) isLogAlreadyProcessed(l types.Log) (bool, error) { return false, err } hID := hashFn.Sum(nil)[:8] - return p.filter.TestAndAdd(hID), nil + return p.filter.TestAndAdd(hID, nil) } diff --git a/scanner/providers/web3/erc721_provider.go b/scanner/providers/web3/erc721_provider.go index df2c4d89..e7404c9a 100644 --- a/scanner/providers/web3/erc721_provider.go +++ b/scanner/providers/web3/erc721_provider.go @@ -13,7 +13,6 @@ import ( "github.com/ethereum/go-ethereum/core/types" erc721 "github.com/vocdoni/census3/contracts/erc/erc721" "github.com/vocdoni/census3/helpers/web3" - "github.com/vocdoni/census3/scanner/filter" "github.com/vocdoni/census3/scanner/providers" "go.vocdoni.io/dvote/log" ) @@ -32,7 +31,7 @@ type ERC721HolderProvider struct { creationBlock uint64 lastNetworkBlock uint64 synced atomic.Bool - filter *filter.TokenFilter + filter providers.Filter } func (p *ERC721HolderProvider) Init(_ context.Context, iconf any) error { @@ -401,5 +400,5 @@ func (p *ERC721HolderProvider) isLogAlreadyProcessed(l types.Log) (bool, error) return false, err } hID := hashFn.Sum(nil) - return p.filter.TestAndAdd(hID), nil + return p.filter.TestAndAdd(hID, nil) } diff --git a/scanner/providers/web3/erc777_provider.go b/scanner/providers/web3/erc777_provider.go index abd41bd0..2ea955f0 100644 --- a/scanner/providers/web3/erc777_provider.go +++ b/scanner/providers/web3/erc777_provider.go @@ -13,7 +13,6 @@ import ( "github.com/ethereum/go-ethereum/core/types" erc777 "github.com/vocdoni/census3/contracts/erc/erc777" "github.com/vocdoni/census3/helpers/web3" - "github.com/vocdoni/census3/scanner/filter" "github.com/vocdoni/census3/scanner/providers" "go.vocdoni.io/dvote/log" ) @@ -32,7 +31,7 @@ type ERC777HolderProvider struct { creationBlock uint64 lastNetworkBlock uint64 synced atomic.Bool - filter *filter.TokenFilter + filter providers.Filter } func (p *ERC777HolderProvider) Init(_ context.Context, iconf any) error { @@ -401,5 +400,5 @@ func (p *ERC777HolderProvider) isLogAlreadyProcessed(l types.Log) (bool, error) return false, err } hID := hashFn.Sum(nil) - return p.filter.TestAndAdd(hID), nil + return p.filter.TestAndAdd(hID, nil) } diff --git a/scanner/providers/web3/web3_provider.go b/scanner/providers/web3/web3_provider.go index 6c16e274..401ccab6 100644 --- a/scanner/providers/web3/web3_provider.go +++ b/scanner/providers/web3/web3_provider.go @@ -12,7 +12,6 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/types" "github.com/vocdoni/census3/helpers/web3" - "github.com/vocdoni/census3/scanner/filter" "github.com/vocdoni/census3/scanner/providers" "go.vocdoni.io/dvote/db" "go.vocdoni.io/dvote/log" @@ -22,7 +21,7 @@ type Web3ProviderRef struct { HexAddress string ChainID uint64 CreationBlock uint64 - Filter *filter.TokenFilter + Filter providers.Filter } type Web3ProviderConfig struct { diff --git a/scanner/scanner.go b/scanner/scanner.go index d4377047..367f0f8f 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -45,7 +45,6 @@ type Scanner struct { networks *web3.Web3Pool providerManager *manager.ProviderManager coolDown time.Duration - filtersPath string tokens []*ScannerToken tokensMtx sync.Mutex @@ -56,8 +55,8 @@ type Scanner struct { // NewScanner returns a new scanner instance with the required parameters // initialized. -func NewScanner(db *db.DB, updater *Updater, networks *web3.Web3Pool, pm *manager.ProviderManager, - coolDown time.Duration, filtersPath string, +func NewScanner(db *db.DB, updater *Updater, networks *web3.Web3Pool, + pm *manager.ProviderManager, coolDown time.Duration, ) *Scanner { return &Scanner{ db: db, @@ -65,7 +64,6 @@ func NewScanner(db *db.DB, updater *Updater, networks *web3.Web3Pool, pm *manage networks: networks, providerManager: pm, coolDown: coolDown, - filtersPath: filtersPath, tokens: []*ScannerToken{}, tokensMtx: sync.Mutex{}, waiter: sync.WaitGroup{}, diff --git a/scanner/updater.go b/scanner/updater.go index 33390f23..7c8090f4 100644 --- a/scanner/updater.go +++ b/scanner/updater.go @@ -12,10 +12,11 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/vocdoni/census3/db" queries "github.com/vocdoni/census3/db/sqlc" + "github.com/vocdoni/census3/db/treedb" "github.com/vocdoni/census3/helpers/web3" - "github.com/vocdoni/census3/scanner/filter" "github.com/vocdoni/census3/scanner/providers/manager" web3provider "github.com/vocdoni/census3/scanner/providers/web3" + dvotedb "go.vocdoni.io/dvote/db" "go.vocdoni.io/dvote/log" ) @@ -50,26 +51,26 @@ type Updater struct { ctx context.Context cancel context.CancelFunc - db *db.DB - networks *web3.Web3Pool - providers *manager.ProviderManager - queue map[string]*UpdateRequest - queueMtx sync.Mutex - processing sync.Map - waiter sync.WaitGroup - filtersPath string + db *db.DB + networks *web3.Web3Pool + providers *manager.ProviderManager + queue map[string]*UpdateRequest + queueMtx sync.Mutex + processing sync.Map + waiter sync.WaitGroup + kvdb dvotedb.Database } // NewUpdater creates a new instance of Updater. func NewUpdater(db *db.DB, networks *web3.Web3Pool, pm *manager.ProviderManager, - filtersPath string, + kvdb dvotedb.Database, ) *Updater { return &Updater{ - db: db, - networks: networks, - providers: pm, - queue: make(map[string]*UpdateRequest), - filtersPath: filtersPath, + db: db, + networks: networks, + providers: pm, + queue: make(map[string]*UpdateRequest), + kvdb: kvdb, } } @@ -228,22 +229,15 @@ func (u *Updater) process(req *UpdateRequest) error { } // if the token is a external token, return an error if !provider.IsExternal() { - chainAddress, ok := u.networks.ChainAddress(req.ChainID, req.ChainAddress) + chainAddress, ok := u.networks.ChainAddress(req.ChainID, req.Address.Hex()) if !ok { return fmt.Errorf("error getting chain address for token: %v", err) } // load filter of the token from the database - filter, err := filter.LoadFilter(u.filtersPath, chainAddress) + filter, err := treedb.LoadTree(u.kvdb, chainAddress) if err != nil { return err } - // commit the filter when the function finishes - defer func() { - if err := filter.Commit(); err != nil { - log.Error(err) - return - } - }() // set the reference of the token to update in the provider if err := provider.SetRef(web3provider.Web3ProviderRef{ HexAddress: req.Address.Hex(), From 09f83922208205df52f2d54b5244b5c25126e892 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lucas=20Men=C3=A9ndez?= Date: Sun, 23 Jun 2024 20:27:34 +0200 Subject: [PATCH 17/21] new 'next' method for updater to get the next token to update in each iteration an support sorted scan --- cmd/census3/main.go | 2 +- scanner/providers/web3/erc20_provider.go | 3 +- scanner/providers/web3/erc721_provider.go | 3 +- scanner/providers/web3/erc777_provider.go | 3 +- scanner/scanner.go | 2 +- scanner/updater.go | 172 ++++++++++++++-------- 6 files changed, 114 insertions(+), 71 deletions(-) diff --git a/cmd/census3/main.go b/cmd/census3/main.go index 2e951e07..5fe4a6e5 100644 --- a/cmd/census3/main.go +++ b/cmd/census3/main.go @@ -209,7 +209,7 @@ func main() { log.Fatal(err) } // start the token updater with the database and the provider manager - updater := scanner.NewUpdater(database, w3p, pm, filtersDB) + updater := scanner.NewUpdater(database, w3p, pm, filtersDB, config.scannerCoolDown) // start the holder scanner with the database and the provider manager hc := scanner.NewScanner(database, updater, w3p, pm, config.scannerCoolDown) // if the admin token is not defined, generate a random one diff --git a/scanner/providers/web3/erc20_provider.go b/scanner/providers/web3/erc20_provider.go index 81edce35..adee6a9a 100644 --- a/scanner/providers/web3/erc20_provider.go +++ b/scanner/providers/web3/erc20_provider.go @@ -159,6 +159,7 @@ func (p *ERC20HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fro alreadyProcessedLogs := uint64(0) balances := make(map[common.Address]*big.Int) // iterate the logs and update the balances + log.Infow("parsing logs", "address", p.address, "type", p.TypeName(), "count", len(logs)) for _, currentLog := range logs { // skip the log if it has been removed if currentLog.Removed { @@ -208,7 +209,7 @@ func (p *ERC20HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fro balances[logData.From] = new(big.Int).Neg(logData.Value) } } - log.Infow("saving blocks", + log.Infow("logs parsed", "count", len(balances), "new_logs", newTransfers, "already_processed_logs", alreadyProcessedLogs, diff --git a/scanner/providers/web3/erc721_provider.go b/scanner/providers/web3/erc721_provider.go index e7404c9a..d5a0253e 100644 --- a/scanner/providers/web3/erc721_provider.go +++ b/scanner/providers/web3/erc721_provider.go @@ -152,7 +152,6 @@ func (p *ERC721HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fr if errors.Is(err, ErrTooManyRequests) { log.Warnf("too many requests, the provider will continue in the next iteration from block %d", lastBlock) } - log.Warnw("logs received", "number_of_logs", len(logs), "last_block", lastBlock) // encode the number of new transfers newTransfers := uint64(0) alreadyProcessedLogs := uint64(0) @@ -399,6 +398,6 @@ func (p *ERC721HolderProvider) isLogAlreadyProcessed(l types.Log) (bool, error) if _, err := hashFn.Write([]byte(transferID)); err != nil { return false, err } - hID := hashFn.Sum(nil) + hID := hashFn.Sum(nil)[:8] return p.filter.TestAndAdd(hID, nil) } diff --git a/scanner/providers/web3/erc777_provider.go b/scanner/providers/web3/erc777_provider.go index 2ea955f0..f3fbbba6 100644 --- a/scanner/providers/web3/erc777_provider.go +++ b/scanner/providers/web3/erc777_provider.go @@ -152,7 +152,6 @@ func (p *ERC777HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fr if errors.Is(err, ErrTooManyRequests) { log.Warnf("too many requests, the provider will continue in the next iteration from block %d", lastBlock) } - log.Warnw("logs received", "number_of_logs", len(logs), "last_block", lastBlock) // encode the number of new transfers newTransfers := uint64(0) alreadyProcessedLogs := uint64(0) @@ -399,6 +398,6 @@ func (p *ERC777HolderProvider) isLogAlreadyProcessed(l types.Log) (bool, error) if _, err := hashFn.Write([]byte(transferID)); err != nil { return false, err } - hID := hashFn.Sum(nil) + hID := hashFn.Sum(nil)[:8] return p.filter.TestAndAdd(hID, nil) } diff --git a/scanner/scanner.go b/scanner/scanner.go index 367f0f8f..d0406a4d 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -109,7 +109,6 @@ func (s *Scanner) Start(ctx context.Context) { continue } } - log.Infow("checking token in the updater queue", "address", token.Address.Hex(), "chainID", token.ChainID, @@ -435,6 +434,7 @@ func (s *Scanner) prepareToken(token *ScannerToken) error { return err } token.CreationBlock = creationBlock + token.LastBlock = creationBlock token.Ready = true } return nil diff --git a/scanner/updater.go b/scanner/updater.go index 7c8090f4..a43a1663 100644 --- a/scanner/updater.go +++ b/scanner/updater.go @@ -7,6 +7,7 @@ import ( "fmt" "math/big" "sync" + "sync/atomic" "time" "github.com/ethereum/go-ethereum/common" @@ -51,62 +52,73 @@ type Updater struct { ctx context.Context cancel context.CancelFunc - db *db.DB - networks *web3.Web3Pool - providers *manager.ProviderManager - queue map[string]*UpdateRequest - queueMtx sync.Mutex - processing sync.Map - waiter sync.WaitGroup - kvdb dvotedb.Database + db *db.DB + networks *web3.Web3Pool + providers *manager.ProviderManager + sortedQueue []string + queue map[string]*UpdateRequest + queueMtx sync.Mutex + processing sync.Map + nextReq atomic.Uint64 + waiter sync.WaitGroup + kvdb dvotedb.Database + coolDown time.Duration } // NewUpdater creates a new instance of Updater. func NewUpdater(db *db.DB, networks *web3.Web3Pool, pm *manager.ProviderManager, - kvdb dvotedb.Database, + kvdb dvotedb.Database, coolDown time.Duration, ) *Updater { return &Updater{ - db: db, - networks: networks, - providers: pm, - queue: make(map[string]*UpdateRequest), - kvdb: kvdb, + db: db, + networks: networks, + providers: pm, + sortedQueue: []string{}, + queue: make(map[string]*UpdateRequest), + kvdb: kvdb, + coolDown: coolDown, } } // Start starts the updater process in a goroutine. func (u *Updater) Start(ctx context.Context, concurrentTokens int) { u.ctx, u.cancel = context.WithCancel(ctx) + sem := make(chan struct{}, concurrentTokens) + defer close(sem) for { select { case <-u.ctx.Done(): return default: - pending := u.pendingRequests() - if len(pending) == 0 { - time.Sleep(coolDown) + req, id := u.next() + if req == nil { + log.Info("no more requests to process, sleeping...") + time.Sleep(u.coolDown) continue } - sem := make(chan struct{}, concurrentTokens) - defer close(sem) - for id, req := range u.pendingRequests() { - u.processing.Store(id, true) - sem <- struct{}{} - go func(id string, req *UpdateRequest) { - defer func() { - <-sem - u.processing.Store(id, false) - }() - if err := u.process(req); err != nil { - log.Errorf("Error processing update request: %v", err) - return - } - // update the request in the queue - u.queueMtx.Lock() - u.queue[id] = req - u.queueMtx.Unlock() - }(id, req) - } + sem <- struct{}{} + u.waiter.Add(1) + go func(id string, req UpdateRequest) { + defer func() { + u.waiter.Done() + <-sem + }() + log.Infow("processing token", + "address", req.Address.Hex(), + "from", req.CreationBlock, + "to", req.EndBlock, + "current", req.LastBlock) + res, err := u.process(id, req) + if err != nil { + log.Errorf("error processing update request: %v", err) + return + } + // update the request in the queue + log.Infow("updating request in the queue", "lastBlock", req.LastBlock, "done", req.Done) + if err := u.SetRequest(id, &res); err != nil { + log.Errorf("error updating request in the queue: %v", err) + } + }(id, *req) } } } @@ -127,12 +139,20 @@ func (u *Updater) RequestStatus(id string, deleteOnDone bool) *UpdateRequest { if !ok { return nil } - res := *req if deleteOnDone && req.Done { + // remove the request from the processing map u.processing.Delete(id) + // remove the request from the queue delete(u.queue, id) + // remove the request from the sorted queue + for i, v := range u.sortedQueue { + if v == id { + u.sortedQueue = append(u.sortedQueue[:i], u.sortedQueue[i+1:]...) + break + } + } } - return &res + return req } // SetRequest adds a new request to the queue. It will return an error if the @@ -157,8 +177,10 @@ func (u *Updater) SetRequest(id string, req *UpdateRequest) error { } u.queueMtx.Lock() defer u.queueMtx.Unlock() + if _, exists := u.queue[id]; !exists { + u.sortedQueue = append(u.sortedQueue, id) + } u.queue[id] = req - u.processing.Store(id, false) return nil } @@ -195,17 +217,42 @@ func RequestID(address common.Address, chainID uint64, externalID string) (strin return hex.EncodeToString(bHash[:4]), nil } -func (u *Updater) pendingRequests() map[string]*UpdateRequest { +func (u *Updater) next() (*UpdateRequest, string) { u.queueMtx.Lock() defer u.queueMtx.Unlock() - queue := map[string]*UpdateRequest{} - for k, v := range u.queue { - if processing, ok := u.processing.Load(k); v.Done || !ok || processing.(bool) { - continue + // if the queue is empty return nil + if len(u.sortedQueue) == 0 { + return nil, "" + } + // get the next request in the queue, if the next request is out of the + // range of the sorted queue, return nil and set the next request index to 0 + i := u.nextReq.Load() + max := uint64(len(u.sortedQueue)) + if i >= max { + u.nextReq.Store(0) + return nil, "" + } + // iterate over the sorted queue to find the next request that is not being + // processed or already done + for ; i < max; i++ { + id := u.sortedQueue[i] + req, exists := u.queue[id] + if !exists { + // if the request is not found, remove the ID from the sorted queue and + // return nil setting the next request index to 0 + u.sortedQueue = append(u.sortedQueue[:i], u.sortedQueue[i+1:]...) + u.nextReq.Store(0) + return nil, "" + } + // if request is not done and not being processed, return it + if isProcessing, ok := u.processing.Load(id); !req.Done && (!ok || !isProcessing.(bool)) { + u.nextReq.Store(i + 1) + return req, id } - queue[k] = v } - return queue + // if the next request is not found, set the next request index to 0 + u.nextReq.Store(0) + return nil, "" } // process iterates over the current queue items, getting the token holders @@ -213,30 +260,28 @@ func (u *Updater) pendingRequests() map[string]*UpdateRequest { // equal to the end block. It updates th status of the request in the queue. It // will return an error if the provider is not found, the token is external or // there is an error getting the token holders balances. -func (u *Updater) process(req *UpdateRequest) error { - // log the start of the process - log.Infow("rescanning token", - "address", req.Address.Hex(), - "from", req.CreationBlock, - "to", req.EndBlock, - "current", req.LastBlock) +func (u *Updater) process(id string, req UpdateRequest) (UpdateRequest, error) { + // set the request as processing and defer to set it as not processing + u.processing.Store(id, true) + defer u.processing.Store(id, false) + // create a context with a timeout to avoid blocking the process ctx, cancel := context.WithTimeout(u.ctx, UPDATE_TIMEOUT) defer cancel() // get the provider by token type provider, err := u.providers.GetProvider(ctx, req.Type) if err != nil { - return fmt.Errorf("error getting provider for token: %v", err) + return req, fmt.Errorf("error getting provider for token: %v", err) } // if the token is a external token, return an error if !provider.IsExternal() { chainAddress, ok := u.networks.ChainAddress(req.ChainID, req.Address.Hex()) if !ok { - return fmt.Errorf("error getting chain address for token: %v", err) + return req, fmt.Errorf("error getting chain address for token: %v", err) } // load filter of the token from the database filter, err := treedb.LoadTree(u.kvdb, chainAddress) if err != nil { - return err + return req, err } // set the reference of the token to update in the provider if err := provider.SetRef(web3provider.Web3ProviderRef{ @@ -245,7 +290,7 @@ func (u *Updater) process(req *UpdateRequest) error { CreationBlock: req.CreationBlock, Filter: filter, }); err != nil { - return fmt.Errorf("error setting provider reference: %v", err) + return req, fmt.Errorf("error setting provider reference: %v", err) } } // update the last block number of the provider to the last block of @@ -257,7 +302,7 @@ func (u *Updater) process(req *UpdateRequest) error { ChainID: req.ChainID, }) if err != nil { - return fmt.Errorf("error getting token holders from database: %v", err) + return req, fmt.Errorf("error getting token holders from database: %v", err) } currentHolders := map[common.Address]*big.Int{} for _, holder := range results { @@ -275,7 +320,7 @@ func (u *Updater) process(req *UpdateRequest) error { } // set the current holders in the provider if err := provider.SetLastBalances(ctx, nil, currentHolders, req.LastBlock); err != nil { - return fmt.Errorf("error setting last balances in provider: %v", err) + return req, fmt.Errorf("error setting last balances in provider: %v", err) } // get range balances from the provider, it will check itereate again // over transfers logs, checking if there are new transfers using the @@ -296,7 +341,7 @@ func (u *Updater) process(req *UpdateRequest) error { } } if err != nil { - return fmt.Errorf("error getting token holders balances: %v", err) + return req, fmt.Errorf("error getting token holders balances: %v", err) } log.Debugw("new logs received", "address", req.Address.Hex(), @@ -311,13 +356,12 @@ func (u *Updater) process(req *UpdateRequest) error { ChainID: req.ChainID, }, balances, delta.NewLogsCount, delta.Block, delta.Synced, delta.TotalSupply) if err != nil { - return fmt.Errorf("error saving token holders balances: %v", err) + return req, fmt.Errorf("error saving token holders balances: %v", err) } log.Debugw("token holders balances updated", "token", req.Address.Hex(), "chainID", req.ChainID, "created", created, "updated", updated) - log.Infow("updating request in the queue", "lastBlock", req.LastBlock, "done", req.Done) - return nil + return req, nil } From 4c4b3185545a6f4c30776e26cb346b6c3756ff59 Mon Sep 17 00:00:00 2001 From: Lucas Menendez Date: Sun, 23 Jun 2024 21:05:34 +0200 Subject: [PATCH 18/21] filter logs partially and them update the filter in a single call --- db/treedb/treedb.go | 27 +++++++++++++++----- scanner/providers/holders_provider.go | 6 ++--- scanner/providers/web3/erc20_provider.go | 27 +++++++++++++++++--- scanner/providers/web3/erc721_provider.go | 30 ++++++++++++++++++++--- scanner/providers/web3/erc777_provider.go | 30 ++++++++++++++++++++--- scanner/providers/web3/errors.go | 1 + scanner/providers/web3/web3_provider.go | 8 ++++++ scanner/scanner.go | 16 ------------ scanner/updater.go | 1 - 9 files changed, 109 insertions(+), 37 deletions(-) diff --git a/db/treedb/treedb.go b/db/treedb/treedb.go index 3f5446eb..2544bf67 100644 --- a/db/treedb/treedb.go +++ b/db/treedb/treedb.go @@ -99,8 +99,23 @@ func (tdb *TreeDB) Add(key, value []byte) error { return wTx.Commit() } -// Test checks if a key is in the tree. -func (tdb *TreeDB) Test(key []byte) (bool, error) { +// AddKey adds a key to the tree with nil value. It accepts variadic keys. +func (tdb *TreeDB) AddKey(key ...[]byte) error { + if tdb.tree == nil { + return ErrNotInitialized + } + wTx := tdb.tree.DB().WriteTx() + defer wTx.Discard() + for _, k := range key { + if err := tdb.tree.Add(wTx, k, nil); err != nil { + return err + } + } + return wTx.Commit() +} + +// TestKey checks if a key is in the tree. +func (tdb *TreeDB) TestKey(key []byte) (bool, error) { if tdb.tree == nil { return false, ErrNotInitialized } @@ -114,15 +129,15 @@ func (tdb *TreeDB) Test(key []byte) (bool, error) { return true, nil } -// TestAndAdd checks if a key is in the tree, if not, add it to the tree. It +// TestAndAddKey checks if a key is in the tree, if not, add it to the tree. It // is the combination of Test and conditional Add. -func (tdb *TreeDB) TestAndAdd(key, value []byte) (bool, error) { - exists, err := tdb.Test(key) +func (tdb *TreeDB) TestAndAddKey(key []byte) (bool, error) { + exists, err := tdb.TestKey(key) if err != nil { return false, err } if exists { return true, nil } - return false, tdb.Add(key, value) + return false, tdb.AddKey(key) } diff --git a/scanner/providers/holders_provider.go b/scanner/providers/holders_provider.go index 144d2948..f8cde985 100644 --- a/scanner/providers/holders_provider.go +++ b/scanner/providers/holders_provider.go @@ -26,9 +26,9 @@ type BlocksDelta struct { // for example, if a token is rescanned. It allows to implement different // filters, such as in-memory, disk, merkle tree, etc. type Filter interface { - Add(key, value []byte) error - Test(key []byte) (bool, error) - TestAndAdd(key, value []byte) (bool, error) + AddKey(key ...[]byte) error + TestKey(key []byte) (bool, error) + TestAndAddKey(key []byte) (bool, error) } // HolderProvider is the interface that wraps the basic methods to interact with diff --git a/scanner/providers/web3/erc20_provider.go b/scanner/providers/web3/erc20_provider.go index adee6a9a..3eb3c746 100644 --- a/scanner/providers/web3/erc20_provider.go +++ b/scanner/providers/web3/erc20_provider.go @@ -1,6 +1,7 @@ package web3 import ( + "bytes" "context" "crypto/sha256" "errors" @@ -160,6 +161,7 @@ func (p *ERC20HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fro balances := make(map[common.Address]*big.Int) // iterate the logs and update the balances log.Infow("parsing logs", "address", p.address, "type", p.TypeName(), "count", len(logs)) + processedLogs := &partialProcessedLogs{} for _, currentLog := range logs { // skip the log if it has been removed if currentLog.Removed { @@ -179,7 +181,7 @@ func (p *ERC20HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fro } // check if the log has been already processed and add it to the filter // if it is not already included - processed, err := p.isLogAlreadyProcessed(currentLog) + processed, err := p.isLogAlreadyProcessed(currentLog, processedLogs) if err != nil { return nil, &providers.BlocksDelta{ Block: lastBlock, @@ -209,6 +211,9 @@ func (p *ERC20HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fro balances[logData.From] = new(big.Int).Neg(logData.Value) } } + if err := p.filter.AddKey(processedLogs.ids...); err != nil { + return nil, nil, errors.Join(ErrAddingProcessedLogs, fmt.Errorf("[ERC20] %s: %w", p.address, err)) + } log.Infow("logs parsed", "count", len(balances), "new_logs", newTransfers, @@ -391,7 +396,7 @@ func (p *ERC20HolderProvider) CensusKeys(data map[common.Address]*big.Int) (map[ // number and log index. It returns true if the log has been already processed // or false if it has not been processed yet. If some error occurs, it returns // false and the error. -func (p *ERC20HolderProvider) isLogAlreadyProcessed(l types.Log) (bool, error) { +func (p *ERC20HolderProvider) isLogAlreadyProcessed(l types.Log, pl *partialProcessedLogs) (bool, error) { // if the filter is not defined, return false if p.filter == nil { return false, nil @@ -403,6 +408,22 @@ func (p *ERC20HolderProvider) isLogAlreadyProcessed(l types.Log) (bool, error) { if _, err := hashFn.Write([]byte(transferID)); err != nil { return false, err } + // check if the hash is in the filter hID := hashFn.Sum(nil)[:8] - return p.filter.TestAndAdd(hID, nil) + exists, err := p.filter.TestKey(hID) + if err != nil { + return false, err + } + if exists { + return true, nil + } + // if the hash is not in the filter, check if it is in the partial filter + for _, id := range pl.ids { + if bytes.Equal(id, hID) { + return true, nil + } + } + // add the hash to the partial filter if it has not been processed and return + pl.ids = append(pl.ids, hID) + return false, nil } diff --git a/scanner/providers/web3/erc721_provider.go b/scanner/providers/web3/erc721_provider.go index d5a0253e..8e188ebb 100644 --- a/scanner/providers/web3/erc721_provider.go +++ b/scanner/providers/web3/erc721_provider.go @@ -1,6 +1,7 @@ package web3 import ( + "bytes" "context" "crypto/sha256" "errors" @@ -157,6 +158,8 @@ func (p *ERC721HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fr alreadyProcessedLogs := uint64(0) balances := make(map[common.Address]*big.Int) // iterate the logs and update the balances + log.Infow("parsing logs", "address", p.address, "type", p.TypeName(), "count", len(logs)) + processedLogs := &partialProcessedLogs{} for _, currentLog := range logs { // skip the log if it has been removed if currentLog.Removed { @@ -176,7 +179,7 @@ func (p *ERC721HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fr } // check if the log has been already processed and add it to the filter // if it is not already included - processed, err := p.isLogAlreadyProcessed(currentLog) + processed, err := p.isLogAlreadyProcessed(currentLog, processedLogs) if err != nil { return nil, &providers.BlocksDelta{ Block: lastBlock, @@ -206,7 +209,10 @@ func (p *ERC721HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fr balances[logData.From] = big.NewInt(-1) } } - log.Infow("saving blocks", + if err := p.filter.AddKey(processedLogs.ids...); err != nil { + return nil, nil, errors.Join(ErrAddingProcessedLogs, fmt.Errorf("[ERC20] %s: %w", p.address, err)) + } + log.Infow("logs parsed", "count", len(balances), "new_logs", newTransfers, "already_processed_logs", alreadyProcessedLogs, @@ -386,7 +392,7 @@ func (p *ERC721HolderProvider) CensusKeys(data map[common.Address]*big.Int) (map // number and log index. It returns true if the log has been already processed // or false if it has not been processed yet. If some error occurs, it returns // false and the error. -func (p *ERC721HolderProvider) isLogAlreadyProcessed(l types.Log) (bool, error) { +func (p *ERC721HolderProvider) isLogAlreadyProcessed(l types.Log, pl *partialProcessedLogs) (bool, error) { // if the filter is not defined, return false if p.filter == nil { return false, nil @@ -398,6 +404,22 @@ func (p *ERC721HolderProvider) isLogAlreadyProcessed(l types.Log) (bool, error) if _, err := hashFn.Write([]byte(transferID)); err != nil { return false, err } + // check if the hash is in the filter hID := hashFn.Sum(nil)[:8] - return p.filter.TestAndAdd(hID, nil) + exists, err := p.filter.TestKey(hID) + if err != nil { + return false, err + } + if exists { + return true, nil + } + // if the hash is not in the filter, check if it is in the partial filter + for _, id := range pl.ids { + if bytes.Equal(id, hID) { + return true, nil + } + } + // add the hash to the partial filter if it has not been processed and return + pl.ids = append(pl.ids, hID) + return false, nil } diff --git a/scanner/providers/web3/erc777_provider.go b/scanner/providers/web3/erc777_provider.go index f3fbbba6..71bec291 100644 --- a/scanner/providers/web3/erc777_provider.go +++ b/scanner/providers/web3/erc777_provider.go @@ -1,6 +1,7 @@ package web3 import ( + "bytes" "context" "crypto/sha256" "errors" @@ -157,6 +158,8 @@ func (p *ERC777HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fr alreadyProcessedLogs := uint64(0) balances := make(map[common.Address]*big.Int) // iterate the logs and update the balances + log.Infow("parsing logs", "address", p.address, "type", p.TypeName(), "count", len(logs)) + processedLogs := &partialProcessedLogs{} for _, currentLog := range logs { // skip the log if it has been removed if currentLog.Removed { @@ -176,7 +179,7 @@ func (p *ERC777HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fr } // check if the log has been already processed and add it to the filter // if it is not already included - processed, err := p.isLogAlreadyProcessed(currentLog) + processed, err := p.isLogAlreadyProcessed(currentLog, processedLogs) if err != nil { return nil, &providers.BlocksDelta{ Block: lastBlock, @@ -206,7 +209,10 @@ func (p *ERC777HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fr balances[logData.From] = big.NewInt(-1) } } - log.Infow("saving blocks", + if err := p.filter.AddKey(processedLogs.ids...); err != nil { + return nil, nil, errors.Join(ErrAddingProcessedLogs, fmt.Errorf("[ERC20] %s: %w", p.address, err)) + } + log.Infow("logs parsed", "count", len(balances), "new_logs", newTransfers, "already_processed_logs", alreadyProcessedLogs, @@ -386,7 +392,7 @@ func (p *ERC777HolderProvider) CensusKeys(data map[common.Address]*big.Int) (map // number and log index. It returns true if the log has been already processed // or false if it has not been processed yet. If some error occurs, it returns // false and the error. -func (p *ERC777HolderProvider) isLogAlreadyProcessed(l types.Log) (bool, error) { +func (p *ERC777HolderProvider) isLogAlreadyProcessed(l types.Log, pl *partialProcessedLogs) (bool, error) { // if the filter is not defined, return false if p.filter == nil { return false, nil @@ -398,6 +404,22 @@ func (p *ERC777HolderProvider) isLogAlreadyProcessed(l types.Log) (bool, error) if _, err := hashFn.Write([]byte(transferID)); err != nil { return false, err } + // check if the hash is in the filter hID := hashFn.Sum(nil)[:8] - return p.filter.TestAndAdd(hID, nil) + exists, err := p.filter.TestKey(hID) + if err != nil { + return false, err + } + if exists { + return true, nil + } + // if the hash is not in the filter, check if it is in the partial filter + for _, id := range pl.ids { + if bytes.Equal(id, hID) { + return true, nil + } + } + // add the hash to the partial filter if it has not been processed and return + pl.ids = append(pl.ids, hID) + return false, nil } diff --git a/scanner/providers/web3/errors.go b/scanner/providers/web3/errors.go index b08122d3..e51d92d8 100644 --- a/scanner/providers/web3/errors.go +++ b/scanner/providers/web3/errors.go @@ -11,4 +11,5 @@ var ( ErrParsingTokenLogs = fmt.Errorf("error parsing token logs") ErrCheckingProcessedLogs = fmt.Errorf("error checking processed logs") ErrGettingTotalSupply = fmt.Errorf("error getting total supply") + ErrAddingProcessedLogs = fmt.Errorf("error adding processed logs to the filter") ) diff --git a/scanner/providers/web3/web3_provider.go b/scanner/providers/web3/web3_provider.go index 401ccab6..206bed30 100644 --- a/scanner/providers/web3/web3_provider.go +++ b/scanner/providers/web3/web3_provider.go @@ -30,6 +30,14 @@ type Web3ProviderConfig struct { DB *db.Database } +// partialProcessedLogs struct is used to store the logs that are partially +// processed by the provider. It is used to avoid to process the same logs +// multiple times if the provider is rescanned and to store the logs that are +// already processed in a single call to the token filter. +type partialProcessedLogs struct { + ids [][]byte +} + // creationBlock function returns the block number of the creation of a contract // address. It uses the `eth_getCode` method to get the contract code at the // block number provided. If the method is not supported, it returns 0 and nil. diff --git a/scanner/scanner.go b/scanner/scanner.go index d0406a4d..a9a39310 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -122,16 +122,6 @@ func (s *Scanner) Start(ctx context.Context) { // get the status of the token in the updater queue status := s.updater.RequestStatus(reqID, true) if status != nil { - log.Infow("token status in the updater queue", - "address", token.Address.Hex(), - "chainID", token.ChainID, - "externalID", token.ExternalID, - "lastBlock", status.LastBlock, - "lastTotalSupply", status.LastTotalSupply, - "totalNewLogs", status.TotalNewLogs, - "totalAlreadyProcessedLogs", status.TotalAlreadyProcessedLogs, - "totalLogs", status.TotalLogs, - "done", status.Done) // if the token is in the updater queue, update the // internal token status and continue to the next token // only if the token is done @@ -159,12 +149,6 @@ func (s *Scanner) Start(ctx context.Context) { log.Warnw("error enqueuing token", "error", err) continue } - log.Infow("token enqueued from the scanner", - "address", token.Address.Hex(), - "chainID", token.ChainID, - "externalID", token.ExternalID, - "from", token.LastBlock, - "to", lastNetworkBlock) } } } diff --git a/scanner/updater.go b/scanner/updater.go index a43a1663..8fec3cb3 100644 --- a/scanner/updater.go +++ b/scanner/updater.go @@ -92,7 +92,6 @@ func (u *Updater) Start(ctx context.Context, concurrentTokens int) { default: req, id := u.next() if req == nil { - log.Info("no more requests to process, sleeping...") time.Sleep(u.coolDown) continue } From 5299dced00af53df1474ee3822cfbb7b72494771 Mon Sep 17 00:00:00 2001 From: Lucas Menendez Date: Sun, 23 Jun 2024 21:10:03 +0200 Subject: [PATCH 19/21] trying to fix annoying gitcoin test --- scanner/providers/gitcoin/gitcoin_provider_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scanner/providers/gitcoin/gitcoin_provider_test.go b/scanner/providers/gitcoin/gitcoin_provider_test.go index 121656f9..10d320df 100644 --- a/scanner/providers/gitcoin/gitcoin_provider_test.go +++ b/scanner/providers/gitcoin/gitcoin_provider_test.go @@ -48,7 +48,7 @@ func TestGitcoinPassport(t *testing.T) { c.Assert(err, qt.IsNil) c.Assert(len(emptyBalances), qt.Equals, 0) // wait for the download to finish - time.Sleep(2 * time.Second) + time.Sleep(5 * time.Second) // check the balances holders, _, err := provider.HoldersBalances(context.TODO(), nil, 0) c.Assert(err, qt.IsNil) From ef978c21346e50f07ce6ed52cc6fedd7a3517d27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lucas=20Men=C3=A9ndez?= Date: Mon, 24 Jun 2024 11:11:23 +0200 Subject: [PATCH 20/21] final minor changes --- api/tokens.go | 18 +++++++++++++++--- db/queries/tokens.sql | 2 +- scanner/scanner.go | 11 +++++++++++ scanner/updater.go | 15 ++++++++++----- 4 files changed, 37 insertions(+), 9 deletions(-) diff --git a/api/tokens.go b/api/tokens.go index f0f6ee81..45f5e022 100644 --- a/api/tokens.go +++ b/api/tokens.go @@ -45,11 +45,11 @@ func (capi *census3API) initTokenHandlers() error { api.MethodAccessTypePublic, capi.tokenStartBlock); err != nil { return err } - if err := capi.endpoint.RegisterMethod("/tokens/rescan/{tokenID}", "POST", + if err := capi.endpoint.RegisterMethod("/tokens/update/{tokenID}", "POST", api.MethodAccessTypeAdmin, capi.rescanToken); err != nil { return err } - if err := capi.endpoint.RegisterMethod("/tokens/rescan/queue/{queueID}", "GET", + if err := capi.endpoint.RegisterMethod("/tokens/update/queue/{queueID}", "GET", api.MethodAccessTypeAdmin, capi.checkRescanToken); err != nil { return err } @@ -617,6 +617,12 @@ func (capi *census3API) getToken(msg *api.APIdata, ctx *httprouter.HTTPContext) return ctx.Send(res, api.HTTPstatusOK) } +// rescanToken function handler enqueues the rescan process for the token with +// the given ID. The token is scanned from the creation block to the last block +// stored in the database. It returns a 400 error if the provided ID is wrong or +// empty, a 404 error if the token is not found, a 500 error if something fails +// or a 200 response if the process is enqueued. It returns a queue ID to track +// the status of the process. func (capi *census3API) rescanToken(msg *api.APIdata, ctx *httprouter.HTTPContext) error { // get contract address from the tokenID query param and decode check if // it is provided, if not return an error @@ -664,7 +670,7 @@ func (capi *census3API) rescanToken(msg *api.APIdata, ctx *httprouter.HTTPContex CreationBlock: uint64(tokenData.CreationBlock), EndBlock: uint64(tokenData.LastBlock), }); err != nil { - return ErrMalformedToken.WithErr(err) + return ErrEncodeQueueItem.WithErr(err) } // encoding the result and response it res, err := json.Marshal(QueueResponse{id}) @@ -674,6 +680,12 @@ func (capi *census3API) rescanToken(msg *api.APIdata, ctx *httprouter.HTTPContex return ctx.Send(res, api.HTTPstatusOK) } +// checkRescanToken function handler returns the status of the rescan process +// with the given queue ID. It returns a 400 error if the provided ID is wrong +// or empty, a 404 error if the token is not found in the queue or a 500 error +// if something fails. The response contains the address of the token, the chain +// ID, the status of the process, the number of logs scanned, the number of new +// logs found, and the number of duplicated logs. func (capi *census3API) checkRescanToken(msg *api.APIdata, ctx *httprouter.HTTPContext) error { queueID := ctx.URLParam("queueID") if queueID == "" { diff --git a/db/queries/tokens.sql b/db/queries/tokens.sql index 3e419f82..f8ea29b4 100644 --- a/db/queries/tokens.sql +++ b/db/queries/tokens.sql @@ -112,4 +112,4 @@ FROM tokens WHERE id = ? AND chain_id = ? AND external_id = ? HAVING num_of_tokens = 1; -- name: DeleteToken :execresult -DELETE FROM tokens WHERE id = ? AND chain_id = ? AND external_id = ?; +DELETE FROM tokens WHERE id = ? AND chain_id = ? AND external_id = ?; \ No newline at end of file diff --git a/scanner/scanner.go b/scanner/scanner.go index a9a39310..aa8f2158 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -362,6 +362,12 @@ func (s *Scanner) getLatestBlockNumbersUpdates() { } } +// updateInternalTokenStatus updates the internal token status of the scanner +// with the given information. It is used to update the last block number and +// the synced status of the token in the scanner. It is used to avoid +// overloading the database with requests to get tokens information in every +// iteration of the scanner. It is used in the SaveHolders function to update +// the token status after saving the holders in the database. func (s *Scanner) updateInternalTokenStatus(token ScannerToken, lastBlock uint64, synced bool, totalSupply *big.Int, ) { @@ -380,6 +386,11 @@ func (s *Scanner) updateInternalTokenStatus(token ScannerToken, lastBlock uint64 s.tokensMtx.Unlock() } +// prepareToken prepares the token to be scanned. It calculates the creation +// block of the token if it is not ready yet. It updates the token in the +// scanner but also the token information in the database. It returns an error +// if something fails in the process. It sets the last block of the token to +// the creation block of the token to start scanning from the creation block. func (s *Scanner) prepareToken(token *ScannerToken) error { ctx, cancel := context.WithTimeout(s.ctx, UPDATE_TIMEOUT) defer cancel() diff --git a/scanner/updater.go b/scanner/updater.go index 8fec3cb3..d0c7885f 100644 --- a/scanner/updater.go +++ b/scanner/updater.go @@ -44,10 +44,7 @@ type UpdateRequest struct { // iterate over the requests, repeating the process of getting the token holders // balances and saving them in the database until the last block is greater or // equal to the end block. The end block is the block number where the token -// holders balances are up to date. The holders providers must include an -// instance of a TokenFilter to store the processed transactions to avoid -// re-processing them, but also rescanning a synced token to find missing -// transactions. +// holders balances are up to date. type Updater struct { ctx context.Context cancel context.CancelFunc @@ -113,10 +110,13 @@ func (u *Updater) Start(ctx context.Context, concurrentTokens int) { return } // update the request in the queue - log.Infow("updating request in the queue", "lastBlock", req.LastBlock, "done", req.Done) if err := u.SetRequest(id, &res); err != nil { log.Errorf("error updating request in the queue: %v", err) } + log.Infow("token processed", + "address", res.Address.Hex(), + "lastBlock", res.LastBlock, + "done", res.Done) }(id, *req) } } @@ -216,6 +216,11 @@ func RequestID(address common.Address, chainID uint64, externalID string) (strin return hex.EncodeToString(bHash[:4]), nil } +// next returns the next request in the queue that is not being processed or +// already done. It will return the request and its ID. If the queue is empty or +// the next request is out of the range of the sorted queue, it will return nil +// and an it will return nil and an empty string. If the next request is found +// it updates the next request index to the next request in the sorted queue. func (u *Updater) next() (*UpdateRequest, string) { u.queueMtx.Lock() defer u.queueMtx.Unlock() From cb943ab43891048751e63c550e2f0d55eacb2fe9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lucas=20Men=C3=A9ndez?= Date: Thu, 27 Jun 2024 17:05:53 +0200 Subject: [PATCH 21/21] extend and fix treedb api, return the new logs ids from holder providers to add them to the token filter after save holders --- db/treedb/treedb.go | 103 +++++++++++++++++----- scanner/const.go | 12 ++- scanner/providers/holders_provider.go | 6 +- scanner/providers/web3/erc20_provider.go | 18 ++-- scanner/providers/web3/erc721_provider.go | 18 ++-- scanner/providers/web3/erc777_provider.go | 18 ++-- scanner/providers/web3/web3_provider.go | 6 +- scanner/scanner.go | 2 +- scanner/updater.go | 9 +- 9 files changed, 135 insertions(+), 57 deletions(-) diff --git a/db/treedb/treedb.go b/db/treedb/treedb.go index 2544bf67..4804d825 100644 --- a/db/treedb/treedb.go +++ b/db/treedb/treedb.go @@ -23,7 +23,7 @@ const filterTreeLevels = 64 // ErrNotInitialized is returned when no tree is initialized in a TreeDB // instance, which means that LoadTree has not been called and the tree is // not ready to be used. -var ErrNotInitialized = fmt.Errorf("tree not initialized, call Load first") +var ErrNotInitialized = fmt.Errorf("tree not initialized, call LoadTree first") // TokenFilter is a filter associated with a token. type TreeDB struct { @@ -56,6 +56,9 @@ func LoadTree(db db.Database, prefix string) (*TreeDB, error) { }, wTx.Commit() } +// Close closes the tree database. If the tree is not nil, it closes the +// underlying database. If the parent database is not nil, it closes it too. +// It returns an error if any of the databases cannot be closed. func (tdb *TreeDB) Close() error { if tdb.tree != nil { if err := tdb.tree.DB().Close(); err != nil { @@ -68,11 +71,11 @@ func (tdb *TreeDB) Close() error { return nil } -// DeleteTree deletes a tree from the database identified by current prefix. -// It iterates over all the keys in the tree and deletes them. If some key -// cannot be deleted, it logs a warning and continues with the next key. It -// commits the transaction at the end. -func (tdb *TreeDB) Delete() error { +// Purge deletes a tree from the database identified by current prefix. It +// iterates over all the keys in the tree and deletes them. If some key cannot +// be deleted, it logs a warning and continues with the next key. It commits the +// transaction at the end. +func (tdb *TreeDB) Purge() error { treeDB := prefixeddb.NewPrefixedDatabase(tdb.parentDB, []byte(tdb.prefix)) wTx := treeDB.WriteTx() if err := treeDB.Iterate(nil, func(k, _ []byte) bool { @@ -86,36 +89,94 @@ func (tdb *TreeDB) Delete() error { return wTx.Commit() } -// Add adds a key to the tree. -func (tdb *TreeDB) Add(key, value []byte) error { +// Add adds a key to the tree. It no write transaction is provided, it creates +// a new one and commits it at the end. It returns an error if the tree is not +// initialized, if there is an error adding the key-value pair or committing +// the transaction if it was created. If a transaction is provided, it does +// not commit or discard it. +func (tdb *TreeDB) Add(wtx db.WriteTx, key, value []byte) error { if tdb.tree == nil { return ErrNotInitialized } + commitTx := wtx == nil + if commitTx { + wtx = tdb.tree.DB().WriteTx() + defer wtx.Discard() + } + if err := tdb.tree.Add(wtx, key, value); err != nil { + return err + } + if commitTx { + return wtx.Commit() + } + return nil +} + +// Del deletes a key from the tree. If no write transaction is provided, it +// creates a new one and commits it at the end. It returns an error if the tree +// is not initialized, if there is an error deleting the key-value pair or +// committing the transaction if it was provided. If a transaction is provided, +// it does not commit or discard it. +func (tdb *TreeDB) Del(wtx db.WriteTx, key []byte) error { + if tdb.tree == nil { + return ErrNotInitialized + } + commitTx := wtx == nil + if commitTx { + wtx = tdb.tree.DB().WriteTx() + defer wtx.Discard() + } + if err := tdb.tree.Del(wtx, key); err != nil { + return err + } + if commitTx { + return wtx.Commit() + } + return nil +} + +// AddBatch adds a batch of keys and values to the tree. It is more efficient +// than calling Add for each key-value pair. It returns an error if the length +// of keys and values is different, if the tree is not initialized, if there +// is an error adding a key-value pair or committing the transaction. It uses +// a new write transaction to add all the keys and commits it at the end. If +// something goes wrong, it returns an error and discards the transaction. +func (tdb *TreeDB) AddBatch(keys, values [][]byte) error { + if tdb.tree == nil { + return ErrNotInitialized + } + if len(keys) != len(values) { + return fmt.Errorf("keys and values must have the same length") + } wTx := tdb.tree.DB().WriteTx() defer wTx.Discard() - if err := tdb.tree.Add(wTx, key, value); err != nil { - return err + for i := range keys { + if err := tdb.tree.Add(wTx, keys[i], values[i]); err != nil { + return err + } } return wTx.Commit() } -// AddKey adds a key to the tree with nil value. It accepts variadic keys. +// AddKey adds a key to the tree with nil value. It accepts variadic keys. It +// uses a new write transaction to add all the keys and commits it at the end. +// If something goes wrong, it returns an error and discards the transaction. func (tdb *TreeDB) AddKey(key ...[]byte) error { if tdb.tree == nil { return ErrNotInitialized } - wTx := tdb.tree.DB().WriteTx() - defer wTx.Discard() + wtx := tdb.tree.DB().WriteTx() + defer wtx.Discard() for _, k := range key { - if err := tdb.tree.Add(wTx, k, nil); err != nil { + if err := tdb.tree.Add(wtx, k, nil); err != nil { return err } } - return wTx.Commit() + return wtx.Commit() } -// TestKey checks if a key is in the tree. -func (tdb *TreeDB) TestKey(key []byte) (bool, error) { +// CheckKeyKey checks if a key is in the tree. +func (tdb *TreeDB) CheckKey(key []byte) (bool, error) { if tdb.tree == nil { return false, ErrNotInitialized } @@ -129,10 +190,10 @@ func (tdb *TreeDB) TestKey(key []byte) (bool, error) { return true, nil } -// TestAndAddKey checks if a key is in the tree, if not, add it to the tree. It -// is the combination of Test and conditional Add. -func (tdb *TreeDB) TestAndAddKey(key []byte) (bool, error) { - exists, err := tdb.TestKey(key) +// CheckAndAddKey checks if a key is in the tree, if not, add it to the tree. It +// is the combination of CheckKey and conditional AddKey. +func (tdb *TreeDB) CheckAndAddKey(key []byte) (bool, error) { + exists, err := tdb.CheckKey(key) if err != nil { return false, err } diff --git a/scanner/const.go b/scanner/const.go index b47f56fd..4b56ab5a 100644 --- a/scanner/const.go +++ b/scanner/const.go @@ -3,9 +3,15 @@ package scanner import "time" const ( - READ_TIMEOUT = time.Minute - SCAN_TIMEOUT = 5 * time.Minute - SAVE_TIMEOUT = 5 * time.Minute + // READ_TIMEOUT is the timeout to get sorted tokens to scan from the database + READ_TIMEOUT = time.Minute + // SAVE_TIMEOUT is the timeout to save the scanned tokens to the database + SAVE_TIMEOUT = 5 * time.Minute + // PREPARE_TIMEOUT is the timeout to prepare the tokens to scan (calculate + // the birth block number, etc.) + PREPARE_TIMEOUT = 5 * time.Minute + // UPDATE_TIMEOUT is the timeout to update the tokens from their holders + // providers UPDATE_TIMEOUT = 15 * time.Minute ) diff --git a/scanner/providers/holders_provider.go b/scanner/providers/holders_provider.go index f8cde985..c031b62c 100644 --- a/scanner/providers/holders_provider.go +++ b/scanner/providers/holders_provider.go @@ -19,6 +19,7 @@ type BlocksDelta struct { Block uint64 Synced bool TotalSupply *big.Int + NewLogs [][]byte } // Filter interface defines the basic methods to interact with a filter to @@ -26,9 +27,8 @@ type BlocksDelta struct { // for example, if a token is rescanned. It allows to implement different // filters, such as in-memory, disk, merkle tree, etc. type Filter interface { - AddKey(key ...[]byte) error - TestKey(key []byte) (bool, error) - TestAndAddKey(key []byte) (bool, error) + CheckKey(key []byte) (bool, error) + CheckAndAddKey(key []byte) (bool, error) } // HolderProvider is the interface that wraps the basic methods to interact with diff --git a/scanner/providers/web3/erc20_provider.go b/scanner/providers/web3/erc20_provider.go index 3eb3c746..8d9765b0 100644 --- a/scanner/providers/web3/erc20_provider.go +++ b/scanner/providers/web3/erc20_provider.go @@ -161,7 +161,7 @@ func (p *ERC20HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fro balances := make(map[common.Address]*big.Int) // iterate the logs and update the balances log.Infow("parsing logs", "address", p.address, "type", p.TypeName(), "count", len(logs)) - processedLogs := &partialProcessedLogs{} + processedLogs := &PartialProcessedLogs{} for _, currentLog := range logs { // skip the log if it has been removed if currentLog.Removed { @@ -177,6 +177,7 @@ func (p *ERC20HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fro AlreadyProcessedLogsCount: alreadyProcessedLogs, Synced: false, TotalSupply: big.NewInt(0), + NewLogs: *processedLogs, }, errors.Join(ErrParsingTokenLogs, fmt.Errorf("[ERC20] %s: %w", p.address, err)) } // check if the log has been already processed and add it to the filter @@ -190,6 +191,7 @@ func (p *ERC20HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fro AlreadyProcessedLogsCount: alreadyProcessedLogs, Synced: false, TotalSupply: big.NewInt(0), + NewLogs: *processedLogs, }, errors.Join(ErrCheckingProcessedLogs, fmt.Errorf("[ERC20] %s: %w", p.address, err)) } // if it is the first scan, it will not check if the log has been @@ -211,9 +213,6 @@ func (p *ERC20HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fro balances[logData.From] = new(big.Int).Neg(logData.Value) } } - if err := p.filter.AddKey(processedLogs.ids...); err != nil { - return nil, nil, errors.Join(ErrAddingProcessedLogs, fmt.Errorf("[ERC20] %s: %w", p.address, err)) - } log.Infow("logs parsed", "count", len(balances), "new_logs", newTransfers, @@ -230,6 +229,7 @@ func (p *ERC20HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fro AlreadyProcessedLogsCount: alreadyProcessedLogs, Synced: synced, TotalSupply: big.NewInt(0), + NewLogs: *processedLogs, } if delta.TotalSupply, err = p.TotalSupply(nil); err != nil { log.Warnw("error getting total supply, it will retry in the next iteration", "error", err) @@ -396,7 +396,7 @@ func (p *ERC20HolderProvider) CensusKeys(data map[common.Address]*big.Int) (map[ // number and log index. It returns true if the log has been already processed // or false if it has not been processed yet. If some error occurs, it returns // false and the error. -func (p *ERC20HolderProvider) isLogAlreadyProcessed(l types.Log, pl *partialProcessedLogs) (bool, error) { +func (p *ERC20HolderProvider) isLogAlreadyProcessed(l types.Log, pl *PartialProcessedLogs) (bool, error) { // if the filter is not defined, return false if p.filter == nil { return false, nil @@ -410,7 +410,7 @@ func (p *ERC20HolderProvider) isLogAlreadyProcessed(l types.Log, pl *partialProc } // check if the hash is in the filter hID := hashFn.Sum(nil)[:8] - exists, err := p.filter.TestKey(hID) + exists, err := p.filter.CheckKey(hID) if err != nil { return false, err } @@ -418,12 +418,14 @@ func (p *ERC20HolderProvider) isLogAlreadyProcessed(l types.Log, pl *partialProc return true, nil } // if the hash is not in the filter, check if it is in the partial filter - for _, id := range pl.ids { + logs := *pl + for _, id := range logs { if bytes.Equal(id, hID) { return true, nil } } // add the hash to the partial filter if it has not been processed and return - pl.ids = append(pl.ids, hID) + logs = append(logs, hID) + *pl = logs return false, nil } diff --git a/scanner/providers/web3/erc721_provider.go b/scanner/providers/web3/erc721_provider.go index 8e188ebb..d50cd270 100644 --- a/scanner/providers/web3/erc721_provider.go +++ b/scanner/providers/web3/erc721_provider.go @@ -159,7 +159,7 @@ func (p *ERC721HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fr balances := make(map[common.Address]*big.Int) // iterate the logs and update the balances log.Infow("parsing logs", "address", p.address, "type", p.TypeName(), "count", len(logs)) - processedLogs := &partialProcessedLogs{} + processedLogs := &PartialProcessedLogs{} for _, currentLog := range logs { // skip the log if it has been removed if currentLog.Removed { @@ -175,6 +175,7 @@ func (p *ERC721HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fr AlreadyProcessedLogsCount: alreadyProcessedLogs, Synced: false, TotalSupply: big.NewInt(0), + NewLogs: *processedLogs, }, errors.Join(ErrParsingTokenLogs, fmt.Errorf("[ERC721] %s: %w", p.address, err)) } // check if the log has been already processed and add it to the filter @@ -188,6 +189,7 @@ func (p *ERC721HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fr AlreadyProcessedLogsCount: alreadyProcessedLogs, Synced: false, TotalSupply: big.NewInt(0), + NewLogs: *processedLogs, }, errors.Join(ErrCheckingProcessedLogs, fmt.Errorf("[ERC721] %s: %w", p.address, err)) } // if it is the first scan, it will not check if the log has been @@ -209,9 +211,6 @@ func (p *ERC721HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fr balances[logData.From] = big.NewInt(-1) } } - if err := p.filter.AddKey(processedLogs.ids...); err != nil { - return nil, nil, errors.Join(ErrAddingProcessedLogs, fmt.Errorf("[ERC20] %s: %w", p.address, err)) - } log.Infow("logs parsed", "count", len(balances), "new_logs", newTransfers, @@ -228,6 +227,7 @@ func (p *ERC721HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fr AlreadyProcessedLogsCount: alreadyProcessedLogs, Synced: synced, TotalSupply: big.NewInt(0), + NewLogs: *processedLogs, } if delta.TotalSupply, err = p.TotalSupply(nil); err != nil { log.Warnw("error getting total supply, it will retry in the next iteration", "error", err) @@ -392,7 +392,7 @@ func (p *ERC721HolderProvider) CensusKeys(data map[common.Address]*big.Int) (map // number and log index. It returns true if the log has been already processed // or false if it has not been processed yet. If some error occurs, it returns // false and the error. -func (p *ERC721HolderProvider) isLogAlreadyProcessed(l types.Log, pl *partialProcessedLogs) (bool, error) { +func (p *ERC721HolderProvider) isLogAlreadyProcessed(l types.Log, pl *PartialProcessedLogs) (bool, error) { // if the filter is not defined, return false if p.filter == nil { return false, nil @@ -406,7 +406,7 @@ func (p *ERC721HolderProvider) isLogAlreadyProcessed(l types.Log, pl *partialPro } // check if the hash is in the filter hID := hashFn.Sum(nil)[:8] - exists, err := p.filter.TestKey(hID) + exists, err := p.filter.CheckKey(hID) if err != nil { return false, err } @@ -414,12 +414,14 @@ func (p *ERC721HolderProvider) isLogAlreadyProcessed(l types.Log, pl *partialPro return true, nil } // if the hash is not in the filter, check if it is in the partial filter - for _, id := range pl.ids { + logs := *pl + for _, id := range logs { if bytes.Equal(id, hID) { return true, nil } } // add the hash to the partial filter if it has not been processed and return - pl.ids = append(pl.ids, hID) + logs = append(logs, hID) + *pl = logs return false, nil } diff --git a/scanner/providers/web3/erc777_provider.go b/scanner/providers/web3/erc777_provider.go index 71bec291..a9d790a6 100644 --- a/scanner/providers/web3/erc777_provider.go +++ b/scanner/providers/web3/erc777_provider.go @@ -159,7 +159,7 @@ func (p *ERC777HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fr balances := make(map[common.Address]*big.Int) // iterate the logs and update the balances log.Infow("parsing logs", "address", p.address, "type", p.TypeName(), "count", len(logs)) - processedLogs := &partialProcessedLogs{} + processedLogs := &PartialProcessedLogs{} for _, currentLog := range logs { // skip the log if it has been removed if currentLog.Removed { @@ -175,6 +175,7 @@ func (p *ERC777HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fr AlreadyProcessedLogsCount: alreadyProcessedLogs, Synced: false, TotalSupply: big.NewInt(0), + NewLogs: *processedLogs, }, errors.Join(ErrParsingTokenLogs, fmt.Errorf("[ERC777] %s: %w", p.address, err)) } // check if the log has been already processed and add it to the filter @@ -188,6 +189,7 @@ func (p *ERC777HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fr AlreadyProcessedLogsCount: alreadyProcessedLogs, Synced: false, TotalSupply: big.NewInt(0), + NewLogs: *processedLogs, }, errors.Join(ErrCheckingProcessedLogs, fmt.Errorf("[ERC777] %s: %w", p.address, err)) } // if it is the first scan, it will not check if the log has been @@ -209,9 +211,6 @@ func (p *ERC777HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fr balances[logData.From] = big.NewInt(-1) } } - if err := p.filter.AddKey(processedLogs.ids...); err != nil { - return nil, nil, errors.Join(ErrAddingProcessedLogs, fmt.Errorf("[ERC20] %s: %w", p.address, err)) - } log.Infow("logs parsed", "count", len(balances), "new_logs", newTransfers, @@ -228,6 +227,7 @@ func (p *ERC777HolderProvider) HoldersBalances(ctx context.Context, _ []byte, fr AlreadyProcessedLogsCount: alreadyProcessedLogs, Synced: synced, TotalSupply: big.NewInt(0), + NewLogs: *processedLogs, } if delta.TotalSupply, err = p.TotalSupply(nil); err != nil { log.Warnw("error getting total supply, it will retry in the next iteration", "error", err) @@ -392,7 +392,7 @@ func (p *ERC777HolderProvider) CensusKeys(data map[common.Address]*big.Int) (map // number and log index. It returns true if the log has been already processed // or false if it has not been processed yet. If some error occurs, it returns // false and the error. -func (p *ERC777HolderProvider) isLogAlreadyProcessed(l types.Log, pl *partialProcessedLogs) (bool, error) { +func (p *ERC777HolderProvider) isLogAlreadyProcessed(l types.Log, pl *PartialProcessedLogs) (bool, error) { // if the filter is not defined, return false if p.filter == nil { return false, nil @@ -406,7 +406,7 @@ func (p *ERC777HolderProvider) isLogAlreadyProcessed(l types.Log, pl *partialPro } // check if the hash is in the filter hID := hashFn.Sum(nil)[:8] - exists, err := p.filter.TestKey(hID) + exists, err := p.filter.CheckKey(hID) if err != nil { return false, err } @@ -414,12 +414,14 @@ func (p *ERC777HolderProvider) isLogAlreadyProcessed(l types.Log, pl *partialPro return true, nil } // if the hash is not in the filter, check if it is in the partial filter - for _, id := range pl.ids { + logs := *pl + for _, id := range logs { if bytes.Equal(id, hID) { return true, nil } } // add the hash to the partial filter if it has not been processed and return - pl.ids = append(pl.ids, hID) + logs = append(logs, hID) + *pl = logs return false, nil } diff --git a/scanner/providers/web3/web3_provider.go b/scanner/providers/web3/web3_provider.go index 206bed30..4c338e66 100644 --- a/scanner/providers/web3/web3_provider.go +++ b/scanner/providers/web3/web3_provider.go @@ -30,13 +30,11 @@ type Web3ProviderConfig struct { DB *db.Database } -// partialProcessedLogs struct is used to store the logs that are partially +// PartialProcessedLogs struct is used to store the logs that are partially // processed by the provider. It is used to avoid to process the same logs // multiple times if the provider is rescanned and to store the logs that are // already processed in a single call to the token filter. -type partialProcessedLogs struct { - ids [][]byte -} +type PartialProcessedLogs [][]byte // creationBlock function returns the block number of the creation of a contract // address. It uses the `eth_getCode` method to get the contract code at the diff --git a/scanner/scanner.go b/scanner/scanner.go index aa8f2158..0b23d3fb 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -392,7 +392,7 @@ func (s *Scanner) updateInternalTokenStatus(token ScannerToken, lastBlock uint64 // if something fails in the process. It sets the last block of the token to // the creation block of the token to start scanning from the creation block. func (s *Scanner) prepareToken(token *ScannerToken) error { - ctx, cancel := context.WithTimeout(s.ctx, UPDATE_TIMEOUT) + ctx, cancel := context.WithTimeout(s.ctx, PREPARE_TIMEOUT) defer cancel() // get the provider by token type provider, err := s.providerManager.GetProvider(ctx, token.Type) diff --git a/scanner/updater.go b/scanner/updater.go index d0c7885f..70d64f1f 100644 --- a/scanner/updater.go +++ b/scanner/updater.go @@ -277,13 +277,14 @@ func (u *Updater) process(id string, req UpdateRequest) (UpdateRequest, error) { return req, fmt.Errorf("error getting provider for token: %v", err) } // if the token is a external token, return an error + var filter *treedb.TreeDB if !provider.IsExternal() { chainAddress, ok := u.networks.ChainAddress(req.ChainID, req.Address.Hex()) if !ok { return req, fmt.Errorf("error getting chain address for token: %v", err) } // load filter of the token from the database - filter, err := treedb.LoadTree(u.kvdb, chainAddress) + filter, err = treedb.LoadTree(u.kvdb, chainAddress) if err != nil { return req, err } @@ -362,6 +363,12 @@ func (u *Updater) process(id string, req UpdateRequest) (UpdateRequest, error) { if err != nil { return req, fmt.Errorf("error saving token holders balances: %v", err) } + // add the new keys to the filter if it is defined (not external token) + if filter != nil && delta.NewLogs != nil { + if err := filter.AddKey(delta.NewLogs...); err != nil { + return req, fmt.Errorf("error adding keys to filter: %v", err) + } + } log.Debugw("token holders balances updated", "token", req.Address.Hex(), "chainID", req.ChainID,