feat: parse inscription like witness data (#2524)

* parse inscription like witness data * more comment * remove unused code * Update zetaclient/chains/bitcoin/tx_script.go Co-authored-by: Dmitry S <[email protected]> * Update zetaclient/chains/bitcoin/observer/inbound.go Co-authored-by: Dmitry S <[email protected]> * Update zetaclient/chains/bitcoin/tx_script.go Co-authored-by: Dmitry S <[email protected]> * Update zetaclient/chains/bitcoin/tx_script.go Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> * pull origin * Update zetaclient/chains/bitcoin/observer/inbound.go Co-authored-by: Dmitry S <[email protected]> * review feedbacks * update review feedbacks * update make generate * fix linter * remove over flow * Update zetaclient/chains/bitcoin/observer/inbound.go Co-authored-by: Francisco de Borja Aranda Castillejo <[email protected]> * Update zetaclient/chains/bitcoin/tokenizer.go Co-authored-by: Francisco de Borja Aranda Castillejo <[email protected]> * Update zetaclient/chains/bitcoin/tokenizer.go Co-authored-by: Francisco de Borja Aranda Castillejo <[email protected]> * Update zetaclient/chains/bitcoin/tokenizer.go Co-authored-by: Francisco de Borja Aranda Castillejo <[email protected]> * Update zetaclient/chains/bitcoin/tokenizer.go Co-authored-by: Francisco de Borja Aranda Castillejo <[email protected]> * update review feedback * update code commnet * update comment * more comments * Update changelog.md --------- Co-authored-by: Dmitry S <[email protected]> Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> Co-authored-by: Francisco de Borja Aranda Castillejo <[email protected]>
zeta-chain · Jul 30, 2024 · 6e9dc53 · 6e9dc53
1 parent 5771df3
commit 6e9dc53
Show file tree

Hide file tree

Showing 5 changed files with 334 additions and 1 deletion.
diff --git a/changelog.md b/changelog.md
@@ -42,6 +42,7 @@
 * [2518](https://github.com/zeta-chain/node/pull/2518) - add support for Solana address in zetacore
 * [2483](https://github.com/zeta-chain/node/pull/2483) - add priorityFee (gasTipCap) gas to the state
 * [2567](https://github.com/zeta-chain/node/pull/2567) - add sign latency metric to zetaclient (zetaclient_sign_latency)
+* [2524](https://github.com/zeta-chain/node/pull/2524) - add inscription envolop parsing 
 
 ### Refactor
 
@@ -588,4 +589,4 @@ Getting the correct TSS address for Bitcoin now requires proviidng the Bitcoin c
 ### CI
 
 * [1218](https://github.com/zeta-chain/node/pull/1218) - cross-compile release binaries and simplify PR testings
-* [1302](https://github.com/zeta-chain/node/pull/1302) - add mainnet builds to goreleaser
+* [1302](https://github.com/zeta-chain/node/pull/1302) - add mainnet builds to goreleaser
diff --git a/zetaclient/chains/bitcoin/observer/inbound.go b/zetaclient/chains/bitcoin/observer/inbound.go
@@ -477,3 +477,39 @@ func GetBtcEvent(
 	}
 	return nil, nil
 }
+
+// GetBtcEventWithWitness either returns a valid BTCInboundEvent or nil.
+// This method supports data with more than 80 bytes by scanning the witness for possible presence of a tapscript.
+// It will first prioritize OP_RETURN over tapscript.
+func GetBtcEventWithWitness(
+	client interfaces.BTCRPCClient,
+	tx btcjson.TxRawResult,
+	tssAddress string,
+	blockNumber uint64,
+	logger zerolog.Logger,
+	netParams *chaincfg.Params,
+	depositorFee float64,
+) (*BTCInboundEvent, error) {
+	// first check for OP_RETURN data
+	event, err := GetBtcEvent(
+		client,
+		tx,
+		tssAddress,
+		blockNumber,
+		logger,
+		netParams,
+		depositorFee,
+	)
+
+	if err != nil {
+		return nil, errors.Wrap(err, "unable to get btc event")
+	}
+
+	if event != nil {
+		return event, nil
+	}
+
+	// TODO: integrate parsing script
+
+	return nil, nil
+}
diff --git a/zetaclient/chains/bitcoin/tokenizer.go b/zetaclient/chains/bitcoin/tokenizer.go
@@ -0,0 +1,162 @@
+package bitcoin
+
+import (
+	"encoding/binary"
+	"fmt"
+
+	"github.com/btcsuite/btcd/txscript"
+)
+
+func newScriptTokenizer(script []byte) scriptTokenizer {
+	return scriptTokenizer{
+		script: script,
+		offset: 0,
+	}
+}
+
+// scriptTokenizer is supposed to be replaced by txscript.ScriptTokenizer. However,
+// it seems currently the btcsuite version does not have ScriptTokenizer. A simplified
+// version of that is implemented here. This is fully compatible with txscript.ScriptTokenizer
+// one should consider upgrading txscript and remove this implementation
+type scriptTokenizer struct {
+	script []byte
+	offset int
+	op     byte
+	data   []byte
+	err    error
+}
+
+// Done returns true when either all opcodes have been exhausted or a parse
+// failure was encountered and therefore the state has an associated error.
+func (t *scriptTokenizer) Done() bool {
+	return t.err != nil || t.offset >= len(t.script)
+}
+
+// Data returns the data associated with the most recently successfully parsed
+// opcode.
+func (t *scriptTokenizer) Data() []byte {
+	return t.data
+}
+
+// Err returns any errors currently associated with the tokenizer.  This will
+// only be non-nil in the case a parsing error was encountered.
+func (t *scriptTokenizer) Err() error {
+	return t.err
+}
+
+// Opcode returns the current opcode associated with the tokenizer.
+func (t *scriptTokenizer) Opcode() byte {
+	return t.op
+}
+
+// Next attempts to parse the next opcode and returns whether or not it was
+// successful.  It will not be successful if invoked when already at the end of
+// the script, a parse failure is encountered, or an associated error already
+// exists due to a previous parse failure.
+//
+// In the case of a true return, the parsed opcode and data can be obtained with
+// the associated functions and the offset into the script will either point to
+// the next opcode or the end of the script if the final opcode was parsed.
+//
+// In the case of a false return, the parsed opcode and data will be the last
+// successfully parsed values (if any) and the offset into the script will
+// either point to the failing opcode or the end of the script if the function
+// was invoked when already at the end of the script.
+//
+// Invoking this function when already at the end of the script is not
+// considered an error and will simply return false.
+func (t *scriptTokenizer) Next() bool {
+	if t.Done() {
+		return false
+	}
+
+	op := t.script[t.offset]
+
+	// Only the following op_code will be encountered:
+	// OP_PUSHDATA*, OP_DATA_*, OP_CHECKSIG, OP_IF, OP_ENDIF, OP_FALSE
+	switch {
+	// No additional data.  Note that some of the opcodes, notably OP_1NEGATE,
+	// OP_0, and OP_[1-16] represent the data themselves.
+	case op == txscript.OP_FALSE || op == txscript.OP_IF || op == txscript.OP_CHECKSIG || op == txscript.OP_ENDIF:
+		t.offset++
+		t.op = op
+		t.data = nil
+		return true
+
+	// Data pushes of specific lengths -- OP_DATA_[1-75].
+	case op >= txscript.OP_DATA_1 && op <= txscript.OP_DATA_75:
+		script := t.script[t.offset:]
+
+		// The length should be: int(op) - txscript.OP_DATA_1 + 2, i.e. op is txscript.OP_DATA_10, that means
+		// the data length should be 10, which is txscript.OP_DATA_10 - txscript.OP_DATA_1 + 1.
+		// Here, 2 instead of 1 because `script` also includes the opcode which means it contains one more byte.
+		// Since txscript.OP_DATA_1 is 1, then length is just int(op) - 1 + 2 = int(op) + 1
+		length := int(op) + 1
+		if len(script) < length {
+			t.err = fmt.Errorf("opcode %d detected, but script only %d bytes remaining", op, len(script))
+			return false
+		}
+
+		// Move the offset forward and set the opcode and data accordingly.
+		t.offset += length
+		t.op = op
+		t.data = script[1:length]
+		return true
+
+	case op > txscript.OP_PUSHDATA4:
+		t.err = fmt.Errorf("unexpected op code %d", op)
+		return false
+
+	// Data pushes with parsed lengths -- OP_PUSHDATA{1,2,4}.
+	default:
+		var length int
+		switch op {
+		case txscript.OP_PUSHDATA1:
+			length = 1
+		case txscript.OP_PUSHDATA2:
+			length = 2
+		case txscript.OP_PUSHDATA4:
+			length = 4
+		default:
+			t.err = fmt.Errorf("unexpected op code %d", op)
+			return false
+		}
+
+		script := t.script[t.offset+1:]
+		if len(script) < length {
+			t.err = fmt.Errorf("opcode %d requires %d bytes, only %d remaining", op, length, len(script))
+			return false
+		}
+
+		// Next -length bytes are little endian length of data.
+		var dataLen int
+		switch length {
+		case 1:
+			dataLen = int(script[0])
+		case 2:
+			dataLen = int(binary.LittleEndian.Uint16(script[:length]))
+		case 4:
+			dataLen = int(binary.LittleEndian.Uint32(script[:length]))
+		default:
+			t.err = fmt.Errorf("invalid opcode length %d", length)
+			return false
+		}
+
+		// Move to the beginning of the data.
+		script = script[length:]
+
+		// Disallow entries that do not fit script or were sign extended.
+		if dataLen > len(script) || dataLen < 0 {
+			t.err = fmt.Errorf("opcode %d pushes %d bytes, only %d remaining", op, dataLen, len(script))
+			return false
+		}
+
+		// Move the offset forward and set the opcode and data accordingly.
+		// 1 is the opcode size, which is just 1 byte. int(op) is the opcode value,
+		// it should not be mixed with the size.
+		t.offset += 1 + length + dataLen
+		t.op = op
+		t.data = script[:dataLen]
+		return true
+	}
+}
diff --git a/zetaclient/chains/bitcoin/tx_script.go b/zetaclient/chains/bitcoin/tx_script.go
@@ -192,6 +192,36 @@ func DecodeOpReturnMemo(scriptHex string, txid string) ([]byte, bool, error) {
 	return nil, false, nil
 }
 
+// DecodeScript decodes memo wrapped in an inscription like script in witness
+// returns (memo, found, error)
+//
+// Note: the format of the script is following that of "inscription" defined in ordinal theory.
+// However, to separate from inscription (as this use case is not an NFT), simplifications are made.
+// The bitcoin envelope script is as follows:
+// OP_DATA_32 <32 byte of public key> OP_CHECKSIG
+// OP_FALSE
+// OP_IF
+//
+//	OP_PUSH 0x...
+//	OP_PUSH 0x...
+//
+// OP_ENDIF
+// There are no content-type or any other attributes, it's just raw bytes.
+func DecodeScript(script []byte) ([]byte, bool, error) {
+	t := newScriptTokenizer(script)
+
+	if err := checkInscriptionEnvelope(&t); err != nil {
+		return nil, false, errors.Wrap(err, "checkInscriptionEnvelope: unable to check the envelope")
+	}
+
+	memoBytes, err := decodeInscriptionPayload(&t)
+	if err != nil {
+		return nil, false, errors.Wrap(err, "decodeInscriptionPayload: unable to decode the payload")
+	}
+
+	return memoBytes, true, nil
+}
+
 // EncodeAddress returns a human-readable payment address given a ripemd160 hash
 // and netID which encodes the bitcoin network and address type.  It is used
 // in both pay-to-pubkey-hash (P2PKH) and pay-to-script-hash (P2SH) address
@@ -245,3 +275,44 @@ func DecodeTSSVout(vout btcjson.Vout, receiverExpected string, chain chains.Chai
 
 	return receiverVout, amount, nil
 }
+
+func decodeInscriptionPayload(t *scriptTokenizer) ([]byte, error) {
+	if !t.Next() || t.Opcode() != txscript.OP_FALSE {
+		return nil, fmt.Errorf("OP_FALSE not found")
+	}
+
+	if !t.Next() || t.Opcode() != txscript.OP_IF {
+		return nil, fmt.Errorf("OP_IF not found")
+	}
+
+	memo := make([]byte, 0)
+	var next byte
+	for t.Next() {
+		next = t.Opcode()
+		if next == txscript.OP_ENDIF {
+			return memo, nil
+		}
+		if next < txscript.OP_DATA_1 || next > txscript.OP_PUSHDATA4 {
+			return nil, fmt.Errorf("expecting data push, found %d", next)
+		}
+		memo = append(memo, t.Data()...)
+	}
+	if t.Err() != nil {
+		return nil, t.Err()
+	}
+	return nil, fmt.Errorf("should contain more data, but script ended")
+}
+
+// checkInscriptionEnvelope decodes the envelope for the script monitoring. The format is
+// OP_PUSHBYTES_32 <32 bytes> OP_CHECKSIG <Content>
+func checkInscriptionEnvelope(t *scriptTokenizer) error {
+	if !t.Next() || t.Opcode() != txscript.OP_DATA_32 {
+		return fmt.Errorf("cannot obtain public key bytes op %d or err %s", t.Opcode(), t.Err())
+	}
+
+	if !t.Next() || t.Opcode() != txscript.OP_CHECKSIG {
+		return fmt.Errorf("cannot parse OP_CHECKSIG, op %d or err %s", t.Opcode(), t.Err())
+	}
+
+	return nil
+}
diff --git a/zetaclient/chains/bitcoin/tx_script_test.go b/zetaclient/chains/bitcoin/tx_script_test.go
@@ -491,3 +491,66 @@ func TestDecodeTSSVoutErrors(t *testing.T) {
 		require.Zero(t, amount)
 	})
 }
+
+func TestDecodeScript(t *testing.T) {
+	t.Run("should decode longer data ok", func(t *testing.T) {
+		// 600 bytes of random data generated offline
+		data := "2001a7bae79bd61c2368fe41a565061d6cf22b4f509fbc1652caea06d98b8fd0c7ac00634d0802c7faa771dd05f27993d22c42988758882d20080241074462884c8774e1cdf4b04e5b3b74b6568bd1769722708306c66270b6b2a7f68baced83627eeeb2d494e8a1749277b92a4c5a90b1b4f6038e5f704405515109d4d0021612ad298b8dad6e12245f8f0020e11a7a319652ba6abe261958201ce5e83131cd81302c0ecec60d4afa9f72540fc84b6b9c1f3d903ab25686df263b192a403a4aa22b799ba24369c49ff4042012589a07d4211e05f80f18a1262de5a1577ce0ec9e1fa9283cfa25d98d7d0b4217951dfcb8868570318c63f1e1424cfdb7d7a33c6b9e3ced4b2ffa0178b3a5fac8bace2991e382a402f56a2c6a9191463740910056483e4fd0f5ac729ffac66bf1b3ec4570c4e75c116f7d9fd65718ec3ed6c7647bf335b77e7d6a4e2011276dc8031b78403a1ad82c92fb339ec916c263b6dd0f003ba4381ad5410e90e88effbfa7f961b8e8a6011c525643a434f7abe2c1928a892cc57d6291831216c4e70cb80a39a79a3889211070e767c23db396af9b4c2093c3743d8cbcbfcb73d29361ecd3857e94ab3c800be1299fd36a5685ec60607a60d8c2e0f99ff0b8b9e86354d39a43041f7d552e95fe2d33b6fc0f540715da0e7e1b344c778afe73f82d00881352207b719f67dcb00b4ff645974d4fd7711363d26400e2852890cb6ea9cbfe63ac43080870049b1023be984331560c6350bb64da52b4b81bc8910934915f0a96701f4c50646d5386146596443bee9b2d116706e1687697fb42542196c1d764419c23a914896f9212946518ac59e1ba5d1fc37e503313133ebdf2ced5785e0eaa9738fe3f9ad73646e733931ebb7cff26e96106fe68"
+		script, _ := hex.DecodeString(data)
+
+		memo, isFound, err := DecodeScript(script)
+		require.Nil(t, err)
+		require.True(t, isFound)
+
+		// the expected memo
+		expected := "c7faa771dd05f27993d22c42988758882d20080241074462884c8774e1cdf4b04e5b3b74b6568bd1769722708306c66270b6b2a7f68baced83627eeeb2d494e8a1749277b92a4c5a90b1b4f6038e5f704405515109d4d0021612ad298b8dad6e12245f8f0020e11a7a319652ba6abe261958201ce5e83131cd81302c0ecec60d4afa9f72540fc84b6b9c1f3d903ab25686df263b192a403a4aa22b799ba24369c49ff4042012589a07d4211e05f80f18a1262de5a1577ce0ec9e1fa9283cfa25d98d7d0b4217951dfcb8868570318c63f1e1424cfdb7d7a33c6b9e3ced4b2ffa0178b3a5fac8bace2991e382a402f56a2c6a9191463740910056483e4fd0f5ac729ffac66bf1b3ec4570c4e75c116f7d9fd65718ec3ed6c7647bf335b77e7d6a4e2011276dc8031b78403a1ad82c92fb339ec916c263b6dd0f003ba4381ad5410e90e88effbfa7f961b8e8a6011c525643a434f7abe2c1928a892cc57d6291831216c4e70cb80a39a79a3889211070e767c23db396af9b4c2093c3743d8cbcbfcb73d29361ecd3857e94ab3c800be1299fd36a5685ec60607a60d8c2e0f99ff0b8b9e86354d39a43041f7d552e95fe2d33b6fc0f540715da0e7e1b344c778afe73f82d00881352207b719f67dcb00b4ff645974d4fd7711363d26400e2852890cb6ea9cbfe63ac43080870049b1023be984331560c6350bb64da52b4b81bc8910934915f0a96701f646d5386146596443bee9b2d116706e1687697fb42542196c1d764419c23a914896f9212946518ac59e1ba5d1fc37e503313133ebdf2ced5785e0eaa9738fe3f9ad73646e733931ebb7cff26e96106fe"
+		require.Equal(t, hex.EncodeToString(memo), expected)
+	})
+
+	t.Run("should decode shorter data ok", func(t *testing.T) {
+		// 81 bytes of random data generated offline
+		data := "20d6f59371037bf30115d9fd6016f0e3ef552cdfc0367ee20aa9df3158f74aaeb4ac00634c51bdd33073d76f6b4ae6510d69218100575eafabadd16e5faf9f42bd2fbbae402078bdcaa4c0413ce96d053e3c0bbd4d5944d6857107d640c248bdaaa7de959d9c1e6b9962b51428e5a554c28c397160881668"
+		script, _ := hex.DecodeString(data)
+
+		memo, isFound, err := DecodeScript(script)
+		require.Nil(t, err)
+		require.True(t, isFound)
+
+		// the expected memo
+		expected := "bdd33073d76f6b4ae6510d69218100575eafabadd16e5faf9f42bd2fbbae402078bdcaa4c0413ce96d053e3c0bbd4d5944d6857107d640c248bdaaa7de959d9c1e6b9962b51428e5a554c28c3971608816"
+		require.Equal(t, hex.EncodeToString(memo), expected)
+	})
+
+	t.Run("decode error due to missing data byte", func(t *testing.T) {
+		// missing OP_ENDIF at the end
+		data := "20cabd6ecc0245c40f27ca6299dcd3732287c317f3946734f04e27568fc5334218ac00634d0802000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004c500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000068"
+		script, _ := hex.DecodeString(data)
+
+		memo, isFound, err := DecodeScript(script)
+		require.ErrorContains(t, err, "should contain more data, but script ended")
+		require.False(t, isFound)
+		require.Nil(t, memo)
+	})
+
+	t.Run("decode error due to missing data for public key", func(t *testing.T) {
+		// missing OP_ENDIF at the end
+		data := "2001a7bae79bd61c2368fe41a565061d6cf22b4f509fbc1652caea06d98b8fd0"
+		script, _ := hex.DecodeString(data)
+
+		memo, isFound, err := DecodeScript(script)
+		require.ErrorContains(t, err, "cannot obtain public key bytes")
+		require.False(t, isFound)
+		require.Nil(t, memo)
+	})
+
+	t.Run("decode error due to missing OP_CHECKSIG", func(t *testing.T) {
+		// missing OP_ENDIF at the end
+		data := "2001a7bae79bd61c2368fe41a565061d6cf22b4f509fbc1652caea06d98b8fd0c7ab"
+		script, _ := hex.DecodeString(data)
+
+		memo, isFound, err := DecodeScript(script)
+		require.ErrorContains(t, err, "cannot parse OP_CHECKSIG")
+		require.False(t, isFound)
+		require.Nil(t, memo)
+	})
+}