diff --git a/changelog.md b/changelog.md index 184b657ed9..f2ec931d7a 100644 --- a/changelog.md +++ b/changelog.md @@ -20,6 +20,7 @@ ### Fixes +* [1530](https://github.com/zeta-chain/node/pull/1530) - Outbound tx confirmation/inclusion enhancement * [1496](https://github.com/zeta-chain/node/issues/1496) - post block header for enabled EVM chains only * [1518](https://github.com/zeta-chain/node/pull/1518) - Avoid duplicate keysign if an outTx is already pending * fix Code4rena issue - zetaclients potentially miss inTx when PostSend (or other RPC) fails diff --git a/zetaclient/bitcoin_client.go b/zetaclient/bitcoin_client.go index 4e0651b45e..2b9f242b56 100644 --- a/zetaclient/bitcoin_client.go +++ b/zetaclient/bitcoin_client.go @@ -57,9 +57,9 @@ type BitcoinChainClient struct { Mu *sync.Mutex // lock for all the maps, utxos and core params pendingNonce uint64 - includedTxHashes map[string]uint64 // key: tx hash - includedTxResults map[string]btcjson.GetTransactionResult // key: chain-tss-nonce - broadcastedTx map[string]string // key: chain-tss-nonce, value: outTx hash + includedTxHashes map[string]uint64 // key: tx hash + includedTxResults map[string]*btcjson.GetTransactionResult // key: chain-tss-nonce + broadcastedTx map[string]string // key: chain-tss-nonce, value: outTx hash utxos []btcjson.ListUnspentResult params observertypes.ChainParams @@ -148,7 +148,7 @@ func NewBitcoinClient( ob.zetaClient = bridge ob.Tss = tss ob.includedTxHashes = make(map[string]uint64) - ob.includedTxResults = make(map[string]btcjson.GetTransactionResult) + ob.includedTxResults = make(map[string]*btcjson.GetTransactionResult) ob.broadcastedTx = make(map[string]string) ob.params = btcCfg.ChainParams @@ -442,24 +442,23 @@ func (ob *BitcoinChainClient) IsSendOutTxProcessed(sendHash string, nonce uint64 } // Try including this outTx broadcasted by myself - inMempool, err := ob.checkNSaveIncludedTx(txnHash, params) - if err != nil { - ob.logger.ObserveOutTx.Error().Err(err).Msg("IsSendOutTxProcessed: checkNSaveIncludedTx failed") + txResult, inMempool := ob.checkIncludedTx(txnHash, params) + if txResult == nil { + ob.logger.ObserveOutTx.Error().Err(err).Msg("IsSendOutTxProcessed: checkIncludedTx failed") return false, false, err - } - if inMempool { // to avoid unnecessary Tss keysign + } else if inMempool { // still in mempool (should avoid unnecessary Tss keysign) ob.logger.ObserveOutTx.Info().Msgf("IsSendOutTxProcessed: outTx %s is still in mempool", outTxID) return true, false, nil + } else { // included + ob.setIncludedTx(nonce, txResult) } // Get tx result again in case it is just included - ob.Mu.Lock() - res, included = ob.includedTxResults[outTxID] - ob.Mu.Unlock() - if !included { + res = ob.getIncludedTx(nonce) + if res == nil { return false, false, nil } - ob.logger.ObserveOutTx.Info().Msgf("IsSendOutTxProcessed: checkNSaveIncludedTx succeeded for outTx %s", outTxID) + ob.logger.ObserveOutTx.Info().Msgf("IsSendOutTxProcessed: setIncludedTx succeeded for outTx %s", outTxID) } // It's safe to use cctx's amount to post confirmation because it has already been verified in observeOutTx() @@ -830,14 +829,11 @@ func (ob *BitcoinChainClient) refreshPendingNonce() { } func (ob *BitcoinChainClient) getOutTxidByNonce(nonce uint64, test bool) (string, error) { - ob.Mu.Lock() - res, included := ob.includedTxResults[ob.GetTxID(nonce)] - ob.Mu.Unlock() // There are 2 types of txids an observer can trust // 1. The ones had been verified and saved by observer self. // 2. The ones had been finalized in zetacore based on majority vote. - if included { + if res := ob.getIncludedTx(nonce); res != nil { return res.TxID, nil } if !test { // if not unit test, get cctx from zetacore @@ -1020,13 +1016,28 @@ func (ob *BitcoinChainClient) observeOutTx() { if len(tracker.HashList) > 1 { ob.logger.ObserveOutTx.Warn().Msgf("observeOutTx: oops, outTxID %s got multiple (%d) outTx hashes", outTxID, len(tracker.HashList)) } - // verify outTx hashes + // iterate over all txHashes to find the truly included one. + // we do it this (inefficient) way because we don't rely on the first one as it may be a false positive (for unknown reason). + txCount := 0 + var txResult *btcjson.GetTransactionResult for _, txHash := range tracker.HashList { - _, err := ob.checkNSaveIncludedTx(txHash.TxHash, params) - if err != nil { - ob.logger.ObserveOutTx.Error().Err(err).Msg("observeOutTx: checkNSaveIncludedTx failed") + result, inMempool := ob.checkIncludedTx(txHash.TxHash, params) + if result != nil && !inMempool { // included + txCount++ + txResult = result + ob.logger.ObserveOutTx.Info().Msgf("observeOutTx: included outTx %s for chain %d nonce %d", txHash.TxHash, ob.chain.ChainId, tracker.Nonce) + if txCount > 1 { + ob.logger.ObserveOutTx.Error().Msgf( + "observeOutTx: checkIncludedTx passed, txCount %d chain %d nonce %d result %v", txCount, ob.chain.ChainId, tracker.Nonce, result) + } } } + if txCount == 1 { // should be only one txHash included for each nonce + ob.setIncludedTx(tracker.Nonce, txResult) + } else if txCount > 1 { + ob.removeIncludedTx(tracker.Nonce) // we can't tell which txHash is true, so we remove all (if any) to be safe + ob.logger.ObserveOutTx.Error().Msgf("observeOutTx: included multiple (%d) outTx for chain %d nonce %d", txCount, ob.chain.ChainId, tracker.Nonce) + } } ticker.UpdateInterval(ob.GetChainParams().OutTxTicker, ob.logger.ObserveOutTx) case <-ob.stop: @@ -1036,50 +1047,69 @@ func (ob *BitcoinChainClient) observeOutTx() { } } -// checkNSaveIncludedTx either includes a new outTx or update an existing outTx result. -// Returns inMempool, error -func (ob *BitcoinChainClient) checkNSaveIncludedTx(txHash string, params types.OutboundTxParams) (bool, error) { +// checkIncludedTx checks if a txHash is included and returns (txResult, inMempool) +// Note: if txResult is nil, then inMempool flag should be ignored. +func (ob *BitcoinChainClient) checkIncludedTx(txHash string, params types.OutboundTxParams) (*btcjson.GetTransactionResult, bool) { outTxID := ob.GetTxID(params.OutboundTxTssNonce) hash, getTxResult, err := ob.GetTxResultByHash(txHash) if err != nil { - return false, errors.Wrapf(err, "checkNSaveIncludedTx: error GetTxResultByHash: %s", txHash) + ob.logger.ObserveOutTx.Error().Err(err).Msgf("checkIncludedTx: error GetTxResultByHash: %s", txHash) + return nil, false + } + if txHash != getTxResult.TxID { // just in case, we'll use getTxResult.TxID later + ob.logger.ObserveOutTx.Error().Msgf("checkIncludedTx: inconsistent txHash %s and getTxResult.TxID %s", txHash, getTxResult.TxID) + return nil, false } if getTxResult.Confirmations >= 0 { // check included tx only err = ob.checkTssOutTxResult(hash, getTxResult, params, params.OutboundTxTssNonce) if err != nil { - return false, errors.Wrapf(err, "checkNSaveIncludedTx: error verify bitcoin outTx %s outTxID %s", txHash, outTxID) + ob.logger.ObserveOutTx.Error().Err(err).Msgf("checkIncludedTx: error verify bitcoin outTx %s outTxID %s", txHash, outTxID) + return nil, false } + return getTxResult, false // included + } + return getTxResult, true // in mempool +} - ob.Mu.Lock() - defer ob.Mu.Unlock() - nonce, foundHash := ob.includedTxHashes[txHash] - res, foundRes := ob.includedTxResults[outTxID] - - // include new outTx and enforce rigid 1-to-1 mapping: outTxID(nonce) <===> txHash - if !foundHash && !foundRes { - ob.includedTxHashes[txHash] = params.OutboundTxTssNonce - ob.includedTxResults[outTxID] = *getTxResult - if params.OutboundTxTssNonce >= ob.pendingNonce { // try increasing pending nonce on every newly included outTx - ob.pendingNonce = params.OutboundTxTssNonce + 1 - } - ob.logger.ObserveOutTx.Info().Msgf("checkNSaveIncludedTx: included new bitcoin outTx %s outTxID %s pending nonce %d", txHash, outTxID, ob.pendingNonce) - } - // update saved tx result as confirmations may increase - if foundHash && foundRes { - ob.includedTxResults[outTxID] = *getTxResult - if getTxResult.Confirmations > res.Confirmations { - ob.logger.ObserveOutTx.Info().Msgf("checkNSaveIncludedTx: bitcoin outTx %s got confirmations %d", txHash, getTxResult.Confirmations) - } - } - if !foundHash && foundRes { // be alert for duplicate payment!!! As we got a new hash paying same cctx. It might happen (e.g. majority of signers get crupted) - ob.logger.ObserveOutTx.Error().Msgf("checkNSaveIncludedTx: duplicate payment by bitcoin outTx %s outTxID %s, prior result %v, current result %v", txHash, outTxID, res, *getTxResult) +// setIncludedTx saves included tx result in memory +func (ob *BitcoinChainClient) setIncludedTx(nonce uint64, getTxResult *btcjson.GetTransactionResult) { + txHash := getTxResult.TxID + outTxID := ob.GetTxID(nonce) + + ob.Mu.Lock() + defer ob.Mu.Unlock() + res, found := ob.includedTxResults[outTxID] + + if !found { // not found. + ob.includedTxResults[outTxID] = getTxResult // include new outTx and enforce rigid 1-to-1 mapping: nonce <===> txHash + if nonce >= ob.pendingNonce { // try increasing pending nonce on every newly included outTx + ob.pendingNonce = nonce + 1 } - if foundHash && !foundRes { - ob.logger.ObserveOutTx.Error().Msgf("checkNSaveIncludedTx: unreachable code path! outTx %s outTxID %s, prior nonce %d, current nonce %d", txHash, outTxID, nonce, params.OutboundTxTssNonce) + ob.logger.ObserveOutTx.Info().Msgf("setIncludedTx: included new bitcoin outTx %s outTxID %s pending nonce %d", txHash, outTxID, ob.pendingNonce) + } else if txHash == res.TxID { // found same hash. + ob.includedTxResults[outTxID] = getTxResult // update tx result as confirmations may increase + if getTxResult.Confirmations > res.Confirmations { + ob.logger.ObserveOutTx.Info().Msgf("setIncludedTx: bitcoin outTx %s got confirmations %d", txHash, getTxResult.Confirmations) } - return false, nil + } else { // found other hash. + // be alert for duplicate payment!!! As we got a new hash paying same cctx (for whatever reason). + delete(ob.includedTxResults, outTxID) // we can't tell which txHash is true, so we remove all to be safe + ob.logger.ObserveOutTx.Error().Msgf("setIncludedTx: duplicate payment by bitcoin outTx %s outTxID %s, prior outTx %s", txHash, outTxID, res.TxID) } - return true, nil // in mempool +} + +// getIncludedTx gets the receipt and transaction from memory +func (ob *BitcoinChainClient) getIncludedTx(nonce uint64) *btcjson.GetTransactionResult { + ob.Mu.Lock() + defer ob.Mu.Unlock() + return ob.includedTxResults[ob.GetTxID(nonce)] +} + +// removeIncludedTx removes included tx's result from memory +func (ob *BitcoinChainClient) removeIncludedTx(nonce uint64) { + ob.Mu.Lock() + defer ob.Mu.Unlock() + delete(ob.includedTxResults, ob.GetTxID(nonce)) } // Basic TSS outTX checks: diff --git a/zetaclient/btc_signer_test.go b/zetaclient/btc_signer_test.go index 8bee941a98..5c12e67876 100644 --- a/zetaclient/btc_signer_test.go +++ b/zetaclient/btc_signer_test.go @@ -398,7 +398,7 @@ func createTestClient(t *testing.T) *BitcoinChainClient { client := &BitcoinChainClient{ Tss: tss, Mu: &sync.Mutex{}, - includedTxResults: make(map[string]btcjson.GetTransactionResult), + includedTxResults: make(map[string]*btcjson.GetTransactionResult), } // Create 10 dummy UTXOs (22.44 BTC in total) @@ -413,7 +413,7 @@ func createTestClient(t *testing.T) *BitcoinChainClient { func mineTxNSetNonceMark(ob *BitcoinChainClient, nonce uint64, txid string, preMarkIndex int) { // Mine transaction outTxID := ob.GetTxID(nonce) - ob.includedTxResults[outTxID] = btcjson.GetTransactionResult{TxID: txid} + ob.includedTxResults[outTxID] = &btcjson.GetTransactionResult{TxID: txid} // Set nonce mark tssAddress := ob.Tss.BTCAddressWitnessPubkeyHash().EncodeAddress() diff --git a/zetaclient/evm_client.go b/zetaclient/evm_client.go index 005fa10260..badce4eca6 100644 --- a/zetaclient/evm_client.go +++ b/zetaclient/evm_client.go @@ -296,11 +296,11 @@ func (ob *EVMChainClient) Stop() { // returns: isIncluded, isConfirmed, Error // If isConfirmed, it also post to ZetaCore func (ob *EVMChainClient) IsSendOutTxProcessed(sendHash string, nonce uint64, cointype common.CoinType, logger zerolog.Logger) (bool, bool, error) { - if !ob.isTxConfirmed(nonce) { - return false, false, nil - } params := ob.GetChainParams() receipt, transaction := ob.GetTxNReceipt(nonce) + if receipt == nil || transaction == nil { // not confirmed yet + return false, false, nil + } sendID := fmt.Sprintf("%s-%d", ob.chain.String(), nonce) logger = logger.With().Str("sendID", sendID).Logger() @@ -543,13 +543,6 @@ func (ob *EVMChainClient) IsSendOutTxProcessed(sendHash string, nonce uint64, co return false, false, nil } -// The lowest nonce we observe outTx for each chain -var lowestOutTxNonceToObserve = map[int64]uint64{ - 5: 113000, // Goerli - 97: 102600, // BSC testnet - 80001: 154500, // Mumbai -} - // FIXME: there's a chance that a txhash in OutTxChan may not deliver when Stop() is called // observeOutTx periodically checks all the txhash in potential outbound txs func (ob *EVMChainClient) observeOutTx() { @@ -558,7 +551,7 @@ func (ob *EVMChainClient) observeOutTx() { if err != nil || timeoutNonce <= 0 { timeoutNonce = 100 * 3 // process up to 100 hashes } - ob.logger.ObserveOutTx.Info().Msgf("observeOutTx using timeoutNonce %d seconds", timeoutNonce) + ob.logger.ObserveOutTx.Info().Msgf("observeOutTx: using timeoutNonce %d seconds", timeoutNonce) ticker, err := NewDynamicTicker(fmt.Sprintf("EVM_observeOutTx_%d", ob.chain.ChainId), ob.GetChainParams().OutTxTicker) if err != nil { @@ -577,28 +570,37 @@ func (ob *EVMChainClient) observeOutTx() { //FIXME: remove this timeout here to ensure that all trackers are queried outTimeout := time.After(time.Duration(timeoutNonce) * time.Second) TRACKERLOOP: - // Skip old gabbage trackers as we spent too much time on querying them for _, tracker := range trackers { nonceInt := tracker.Nonce - if nonceInt < lowestOutTxNonceToObserve[ob.chain.ChainId] { - continue - } if ob.isTxConfirmed(nonceInt) { // Go to next tracker if this one already has a confirmed tx continue } + txCount := 0 + var receipt *ethtypes.Receipt + var transaction *ethtypes.Transaction for _, txHash := range tracker.HashList { select { case <-outTimeout: - ob.logger.ObserveOutTx.Warn().Msgf("observeOutTx timeout on chain %d nonce %d", ob.chain.ChainId, nonceInt) + ob.logger.ObserveOutTx.Warn().Msgf("observeOutTx: timeout on chain %d nonce %d", ob.chain.ChainId, nonceInt) break TRACKERLOOP default: - if ob.confirmTxByHash(txHash.TxHash, nonceInt) { - ob.logger.ObserveOutTx.Info().Msgf("observeOutTx confirmed outTx %s for chain %d nonce %d", txHash.TxHash, ob.chain.ChainId, nonceInt) - break + if recpt, tx, ok := ob.checkConfirmedTx(txHash.TxHash, nonceInt); ok { + txCount++ + receipt = recpt + transaction = tx + ob.logger.ObserveOutTx.Info().Msgf("observeOutTx: confirmed outTx %s for chain %d nonce %d", txHash.TxHash, ob.chain.ChainId, nonceInt) + if txCount > 1 { + ob.logger.ObserveOutTx.Error().Msgf( + "observeOutTx: checkConfirmedTx passed, txCount %d chain %d nonce %d receipt %v transaction %v", txCount, ob.chain.ChainId, nonceInt, receipt, transaction) + } } - ob.logger.ObserveOutTx.Debug().Msgf("observeOutTx outTx %s for chain %d nonce %d not confirmed yet", txHash.TxHash, ob.chain.ChainId, nonceInt) } } + if txCount == 1 { // should be only one txHash confirmed for each nonce. + ob.SetTxNReceipt(nonceInt, receipt, transaction) + } else if txCount > 1 { // should not happen. We can't tell which txHash is true. It might happen (e.g. glitchy/hacked endpoint) + ob.logger.ObserveOutTx.Error().Msgf("observeOutTx: confirmed multiple (%d) outTx for chain %d nonce %d", txCount, ob.chain.ChainId, nonceInt) + } } ticker.UpdateInterval(ob.GetChainParams().OutTxTicker, ob.logger.ObserveOutTx) case <-ob.stop: @@ -611,47 +613,45 @@ func (ob *EVMChainClient) observeOutTx() { // SetPendingTx sets the pending transaction in memory func (ob *EVMChainClient) SetPendingTx(nonce uint64, transaction *ethtypes.Transaction) { ob.Mu.Lock() + defer ob.Mu.Unlock() ob.outTxPendingTransactions[ob.GetTxID(nonce)] = transaction - ob.Mu.Unlock() } // GetPendingTx gets the pending transaction from memory func (ob *EVMChainClient) GetPendingTx(nonce uint64) *ethtypes.Transaction { ob.Mu.Lock() - transaction := ob.outTxPendingTransactions[ob.GetTxID(nonce)] - ob.Mu.Unlock() - return transaction + defer ob.Mu.Unlock() + return ob.outTxPendingTransactions[ob.GetTxID(nonce)] } // SetTxNReceipt sets the receipt and transaction in memory func (ob *EVMChainClient) SetTxNReceipt(nonce uint64, receipt *ethtypes.Receipt, transaction *ethtypes.Transaction) { ob.Mu.Lock() + defer ob.Mu.Unlock() delete(ob.outTxPendingTransactions, ob.GetTxID(nonce)) // remove pending transaction, if any ob.outTXConfirmedReceipts[ob.GetTxID(nonce)] = receipt ob.outTXConfirmedTransactions[ob.GetTxID(nonce)] = transaction - ob.Mu.Unlock() } -// getTxNReceipt gets the receipt and transaction from memory +// GetTxNReceipt gets the receipt and transaction from memory func (ob *EVMChainClient) GetTxNReceipt(nonce uint64) (*ethtypes.Receipt, *ethtypes.Transaction) { ob.Mu.Lock() + defer ob.Mu.Unlock() receipt := ob.outTXConfirmedReceipts[ob.GetTxID(nonce)] transaction := ob.outTXConfirmedTransactions[ob.GetTxID(nonce)] - ob.Mu.Unlock() return receipt, transaction } // isTxConfirmed returns true if there is a confirmed tx for 'nonce' func (ob *EVMChainClient) isTxConfirmed(nonce uint64) bool { ob.Mu.Lock() - confirmed := ob.outTXConfirmedReceipts[ob.GetTxID(nonce)] != nil && ob.outTXConfirmedTransactions[ob.GetTxID(nonce)] != nil - ob.Mu.Unlock() - return confirmed + defer ob.Mu.Unlock() + return ob.outTXConfirmedReceipts[ob.GetTxID(nonce)] != nil && ob.outTXConfirmedTransactions[ob.GetTxID(nonce)] != nil } -// confirmTxByHash checks if a txHash is confirmed and saves transaction and receipt in memory -// returns true if confirmed or false otherwise -func (ob *EVMChainClient) confirmTxByHash(txHash string, nonce uint64) bool { +// checkConfirmedTx checks if a txHash is confirmed +// returns (receipt, transaction, true) if confirmed or (nil, nil, false) otherwise +func (ob *EVMChainClient) checkConfirmedTx(txHash string, nonce uint64) (*ethtypes.Receipt, *ethtypes.Transaction, bool) { ctxt, cancel := context.WithTimeout(context.Background(), 3*time.Second) defer cancel() @@ -659,15 +659,34 @@ func (ob *EVMChainClient) confirmTxByHash(txHash string, nonce uint64) bool { transaction, isPending, err := ob.evmClient.TransactionByHash(ctxt, ethcommon.HexToHash(txHash)) if err != nil { log.Error().Err(err).Msgf("confirmTxByHash: TransactionByHash error, txHash %s nonce %d", txHash, nonce) - return false + return nil, nil, false } if transaction == nil { // should not happen log.Error().Msgf("confirmTxByHash: transaction is nil for txHash %s nonce %d", txHash, nonce) - return false + return nil, nil, false + } + + // check tx sender and nonce + signer := ethtypes.NewLondonSigner(big.NewInt(ob.chain.ChainId)) + from, err := signer.Sender(transaction) + if err != nil { + log.Error().Err(err).Msgf("confirmTxByHash: local recovery of sender address failed for txHash %s chain %d", transaction.Hash().Hex(), ob.chain.ChainId) + return nil, nil, false + } + if from != ob.Tss.EVMAddress() { // must be TSS address + log.Error().Msgf("confirmTxByHash: sender %s for txHash %s chain %d is not TSS address %s", + from.Hex(), transaction.Hash().Hex(), ob.chain.ChainId, ob.Tss.EVMAddress().Hex()) + return nil, nil, false } - if isPending { // save pending transaction + if transaction.Nonce() != nonce { // must match cctx nonce + log.Error().Msgf("confirmTxByHash: txHash %s nonce mismatch: wanted %d, got tx nonce %d", txHash, nonce, transaction.Nonce()) + return nil, nil, false + } + + // save pending transaction + if isPending { ob.SetPendingTx(nonce, transaction) - return false + return nil, nil, false } // query receipt @@ -676,33 +695,26 @@ func (ob *EVMChainClient) confirmTxByHash(txHash string, nonce uint64) bool { if err != ethereum.NotFound { log.Warn().Err(err).Msgf("confirmTxByHash: TransactionReceipt error, txHash %s nonce %d", txHash, nonce) } - return false + return nil, nil, false } if receipt == nil { // should not happen log.Error().Msgf("confirmTxByHash: receipt is nil for txHash %s nonce %d", txHash, nonce) - return false + return nil, nil, false } - // check nonce and confirmations - if transaction.Nonce() != nonce { - log.Error().Msgf("confirmTxByHash: txHash %s nonce mismatch: wanted %d, got tx nonce %d", txHash, nonce, transaction.Nonce()) - return false - } + // check confirmations confHeight := receipt.BlockNumber.Uint64() + ob.GetChainParams().ConfirmationCount if confHeight >= math.MaxInt64 { log.Error().Msgf("confirmTxByHash: confHeight is too large for txHash %s nonce %d", txHash, nonce) - return false + return nil, nil, false } if confHeight > ob.GetLastBlockHeight() { log.Info().Msgf("confirmTxByHash: txHash %s nonce %d included but not confirmed: receipt block %d, current block %d", txHash, nonce, receipt.BlockNumber, ob.GetLastBlockHeight()) - return false + return nil, nil, false } - // confirmed, save receipt and transaction - ob.SetTxNReceipt(nonce, receipt, transaction) - - return true + return receipt, transaction, true } // SetLastBlockHeightScanned set last block height scanned (not necessarily caught up with external block; could be slow/paused)