diff --git a/README.md b/README.md index 3ea5345..adf94d4 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,207 @@ # abi-functions -Utility functions for using an ABI to decode EVM calls and events. +This repo contains BigQuery UDFs to decode EVM calls and events **without knowing an ABI** using 2 key functions: -## Examples +* `abi_functions.PARSE_CALL(input)` +* `abi_functions.PARSE_EVENT(data, topics)` -### Example 1 +To support ABI-less decoding, each of the above function uses a large +1MB library containing 2 compressed maps, one covering known call +methods and known event topics. Using the call 4byte methodID or the +event's topic[0], the most likely ABI is synthesized from the map and +decoding is attempted. For events, given the number of topics +observed and the ABI guess, any mismatch results in combinatorial +search on all possible indexed combinations, with the first success +considered valid. + + +### How It Works: +* Big Picture: approach taken in `PARSE_CALL` and `PARSE_EVENT` is to build a 1MB library of 2 _compressed_ maps: `call_map` and `event_map` +* A `signatures` table is compiled by aggregating ABI signatures with 2.8MM+ records from multiple open-source repos. However most of these hex signatures have never been observed on chain. +* So, we tally _actual_ observations (`numObservations` from `logs` and `transactions`) and presence in ABI contracts (`numContracts` from `contracts`) +in `crypto_ethereum.{` EVM Chains to build a reduced dataset +* The current size of this reduced dataset with known ABIs is: + +``` ++-----------------------+--------------+----------+ +| length(hex_signature) | abiAvailable | count(*) | ++-----------------------+--------------+----------+ +| 10 | 20147 | 63600 | +| 66 | 11008 | 11008 | ++-----------------------+--------------+----------+ +``` + +* To maximize decoding rates, we actually only include records with 2+ or more observations in 2023 to fit in a single 1MB map. Potentially the maps could be split into 2 files and increase coverage a bit further. + +### Plan: + +This repo is a work in progress + +* Measure decoding rates and time to decode a day. + +* Rebuild underlying map with GitHub Actions on a weekly basis. We will update the `abi_functions` dataset weekly to support new call methods and new topics. + +* Support EVM chain-specific libraries and EVM ecosystem wide libraries. + +## Key Decoding User-defined Functions, with Examples + +### [PARSE_CALL](sql/PARSE_CALL.sql) + +This function parses `transaction.input` *without* an ABI to produce key/value pairs decoding the input: + +Usage: (single record) +```sql +select `substrate-etl.abi_functions.PARSE_CALL`("0xe343fe12000000000000000000000000bd402a0cf18148389c6f10b6e67aea915c3960ec000000000000000000000000c02aaa39b223fe8d0a0e5c4f27ead9083c756cc20000000000000000000000000258f474786ddfd37abce6df6bbb1dd5dfc4434a0000000000000000000000000000000000000000000000000ed8db3e1827446c00000000000000000000000000000000000000000000000000000032bfdfa8d0") as call_args +``` +returns + +``` +[{ + "call_args": [{ + "name": "fromToken", + "value": "0xbd402A0cf18148389c6f10b6e67aea915C3960ec" + }, { + "name": "toToken", + "value": "0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2" + }, { + "name": "recipient", + "value": "0x0258F474786DdFd37ABCE6df6BBb1Dd5dfC4434a" + }, { + "name": "shareToMin", + "value": "1069845971240174700" + }, { + "name": "shareFrom", + "value": "217967470800" + }] +}] +``` + +This can be used on _arbitrary_ inputs in bulk processing with _varying_ calls / ABIs: + +Usage: (bulk processing) +``` +SELECT + `hash`, + LEFT(input, 10) AS methodID, + input, + `substrate-etl.abi_functions.PARSE_CALL`(input) +FROM + `bigquery-public-data.crypto_ethereum.transactions` AS transactions +WHERE + DATE(block_timestamp) = "2023-06-01" and length(input) >= 10 + limit 2000; +``` + +This can be used on _arbitrary_ inputs in bulk call processing with the same ABI as well: + +Usage: (bulk processing) +```sql +SELECT `substrate-etl.abi_functions`.PARSE_CALL(input) +FROM + `bigquery-public-data.crypto_ethereum.transactions` AS tx +WHERE tx.to_address = '0x084b1c3c81545d370f3634392de611caabff8148' + --filter to method `claimWithResolver(address,address)` + AND tx.input LIKE '0x0f5a5466%' + AND tx.receipt_status = 1 +``` + +### [PARSE_EVENT](sql/PARSE_EVENT.sql) + +This function parses `logs.topics` and `logs.data` **without an ABI** to produce key/value pairs. + +* `PARSE_EVENT(data, topics)` + +Usage: (single record) +``` +select `substrate-etl.abi_functions.PARSE_EVENT`("0x00000000000000000000000000000000000000000000000000036cc84b90729200000000000000000000000000000000000000000000000000000000000000400000000000000000000000000000000000000000000000000000000000000002000000000000000000000000c02aaa39b223fe8d0a0e5c4f27ead9083c756cc2000000000000000000000000323012d0a49fbd86dd1c4d363c7ea63244852e21", ["0x6fd378a9d8b7345c2e5b18229aaf1e39d32b177b501d0a0d26a0a858a23a9624"]) as events +``` + +returns: +```json +[{ + "events": [{ + "name": "from", + "value": "0x40CB4DA705a044016e66dB2E30AdE93EbFe4abD4" + }, { + "name": "to", + "value": "0x9A44630bD49001645291f2A08F4F07eB04bC184a" + }, { + "name": "value", + "value": "50000000000000000000" + }] +}] +``` + +This can be used for bulk processing events with _varying_ ABIs: + +Usage: (bulk processing of 500 records of a day) +``` +SELECT + transaction_hash, + DATA, + topics, + `substrate-etl.abi_functions.PARSE_EVENT`(data, topics) AS events +FROM + `bigquery-public-data.crypto_ethereum.logs` AS logs +WHERE + DATE(block_timestamp) = "2023-06-01" +LIMIT + 500; +``` + +Usage: (bulk processing of `PunkBidEntered` events from calls to the `CryptoPunksMarket` contract: + +```sql +SELECT CryptoPunksMarket_evt_PunkBidEntered(`etherscan`.PARSE_EVENT(lg.data,lg.topics)).* +FROM + `bigquery-public-data.crypto_ethereum.transactions` AS tx + ,`bigquery-public-data.crypto_ethereum.logs` AS lg +WHERE TRUE + AND tx.to_address = '0xb47e3cd837ddf8e4c57f05d70ab865de6e193bbb' + AND lg.address = tx.to_address + AND ARRAY_LENGTH(lg.topics) > 0 + --this is the keccak256 hash of `PunkBidEntered(uint256,uint256,address)` from the ABI + AND lg.topics[OFFSET(0)] = '0x5b859394fabae0c1ba88baffe67e751ab5248d2e879028b8c8d6897b0519f56a' + AND tx.hash = lg.transaction_hash + AND tx.receipt_status = 1 +``` + +### [GENERATE_ABI_FUNCTIONS](sql/GENERATE_ABI_FUNCTIONS.sql) + +Calling this function with a name prefix and an ABI will return two arrays of `CREATE FUNCTION` statements that can be used with a contract that implements the ABI. +- an array for processing method calls (parses `transactions.input`) +- an array for processing log events (parses `logs.topics`) + +Consider the [ABI](abi/CryptoPunksMarket.abi) for the `CryptoPunksMarket` contract at [`0xb47e3cd837ddf8e4c57f05d70ab865de6e193bbb`](https://etherscan.io/address/0xb47e3cd837ddf8e4c57f05d70ab865de6e193bbb). + +We can invoke the code generator as: + +```sql +SELECT `abi_functions`.GENERATE_ABI_FUNCTIONS("CryptoPunksMarket",ABI) +``` + +This produces a number of statments, for example this one for processing calls to the contract's `name()` function: + +```sql +CREATE TEMP FUNCTION CryptoPunksMarket_call_name(kv ARRAY>) RETURNS STRUCT<> LANGUAGE js AS """ +want=new Set([]);res=[];kv.forEach(function(el){if(want.has(el.arg)){res[el.arg]=el.val}});return res; +""" +OPTIONS(library="gs://blockchain-etl-bigquery/ethers.js"); +``` + +and this one for processing `Assign` event logs + +```sql +CREATE TEMP FUNCTION CryptoPunksMarket_evt_Assign(kv ARRAY>) RETURNS STRUCT LANGUAGE js AS """ +want=new Set(['to','punkIndex']);res=[];kv.forEach(function(el){if(want.has(el.arg)){res[el.arg]=el.val}});return res; +""" +OPTIONS(library="gs://blockchain-etl-bigquery/ethers.js"); +``` + +Note that each of these requires an array of key/value pairs, `kv ARRAY>`. + + +#### Example 1 Assuming you have the [ABI](https://github.com/blockchain-etl/abi-functions/blob/main/abi/CryptoPunksMarket.abi) in a table mapping contract to address, this will return a table of all bids to buy CryptoPunks: @@ -42,7 +239,7 @@ Generating a result like: | 728 | 5000000000000 | 0x664e23e4a17a4c7da26c706be5a861c0f7ff569d | | 2207 | 48500000000000000000 | 0x1919db36ca2fa2e15f9000fd9cdc2edcf863e685 | -### Example 2 +#### Example 2 ```sql CREATE TEMP FUNCTION parse_ReverseRegistrar_call_claimWithResolver(kv ARRAY>) RETURNS STRUCT LANGUAGE js AS """want=new Set(['owner','resolver']);res=[];kv.forEach(function(el){if(want.has(el.arg)){res[el.arg]=el.val}});return res;""" OPTIONS(library="gs://blockchain-etl-bigquery/ethers.js"); @@ -70,72 +267,3 @@ Generating a result like: | owner | `0xe11d762cc7b0448ed8c565734b742ce39bbb38a6` | | resolver | `0x0465719485db64e24d73d1619e03950830e4a5b3` | -## Functions in this repo - -### [GENERATE_ABI_FUNCTIONS](sql/GENERATE_ABI_FUNCTIONS.sql) - -Calling this function with a name prefix and an ABI will return two arrays of `CREATE FUNCTION` statements that can be used with a contract that implements the ABI. -- an array for processing method calls (parses `transactions.input`) -- an array for processing log events (parses `logs.topics`) - -Consider the [ABI](abi/CryptoPunksMarket.abi) for the `CryptoPunksMarket` contract at [`0xb47e3cd837ddf8e4c57f05d70ab865de6e193bbb`](https://etherscan.io/address/0xb47e3cd837ddf8e4c57f05d70ab865de6e193bbb). - -We can invoke the code generator as: - -```sql -SELECT `abi_functions`.GENERATE_ABI_FUNCTIONS("CryptoPunksMarket",ABI) -``` - -This produces a number of statments, for example this one for processing calls to the contract's `name()` function: - -```sql -CREATE TEMP FUNCTION CryptoPunksMarket_call_name(kv ARRAY>) RETURNS STRUCT<> LANGUAGE js AS """ -want=new Set([]);res=[];kv.forEach(function(el){if(want.has(el.arg)){res[el.arg]=el.val}});return res; -""" -OPTIONS(library="gs://blockchain-etl-bigquery/ethers.js"); -``` - -and this one for processing `Assign` event logs - -```sql -CREATE TEMP FUNCTION CryptoPunksMarket_evt_Assign(kv ARRAY>) RETURNS STRUCT LANGUAGE js AS """ -want=new Set(['to','punkIndex']);res=[];kv.forEach(function(el){if(want.has(el.arg)){res[el.arg]=el.val}});return res; -""" -OPTIONS(library="gs://blockchain-etl-bigquery/ethers.js"); -``` - -Note that each of these requires an array of key/value pairs, `kv ARRAY>`. - -### [PARSE_CALL](sql/PARSE_CALL.sql) - -This function parses `transaction.input` in the context of an ABI to produce key/value pairs. Here's an example using the ENS ReverseRegistrar contract: - -```sql -SELECT `abi_functions`.PARSE_CALL(ABI,input) -FROM - `bigquery-public-data.crypto_ethereum.transactions` AS tx -WHERE tx.to_address = '0x084b1c3c81545d370f3634392de611caabff8148' - --filter to method `claimWithResolver(address,address)` - AND tx.input LIKE '0x0f5a5466%' - AND tx.receipt_status = 1 -``` - -### [PARSE_EVENT](sql/PARSE_EVENT.sql) - -This function parses `logs.topics` and `logs.data` in the context of an ABI to produce key/value pairs. Here's an example extracting `PunkBidEntered` events from calls to the CryptoPunksMarket contract: - -```sql -SELECT CryptoPunksMarket_evt_PunkBidEntered(`etherscan`.PARSE_EVENT(ABI,'PunkBidEntered',lg.topics,lg.data)).* -FROM - `bigquery-public-data.crypto_ethereum.transactions` AS tx - ,`bigquery-public-data.crypto_ethereum.logs` AS lg -WHERE TRUE - AND tx.to_address = '0xb47e3cd837ddf8e4c57f05d70ab865de6e193bbb' - AND lg.address = tx.to_address - AND ARRAY_LENGTH(lg.topics) > 0 - --this is the keccak256 hash of `PunkBidEntered(uint256,uint256,address)` from the ABI - AND lg.topics[OFFSET(0)] = '0x5b859394fabae0c1ba88baffe67e751ab5248d2e879028b8c8d6897b0519f56a' - AND tx.hash = lg.transaction_hash - AND tx.receipt_status = 1 -``` - diff --git a/sql/PARSE_CALL.sql b/sql/PARSE_CALL.sql index 9f0629e..aa9361c 100644 --- a/sql/PARSE_CALL.sql +++ b/sql/PARSE_CALL.sql @@ -1,35 +1,46 @@ -CREATE OR REPLACE FUNCTION `abi_functions`.PARSE_CALL(abi STRING, dat STRING) RETURNS ARRAY> LANGUAGE js AS """ - abi = JSON.parse(abi); - var interface_instance = new ethers.utils.Interface(abi); - txargs = []; - txtypes = []; - res = []; - - // try-catch - input may be malformed. - try { - var pt = interface_instance.parseTransaction({data: dat}); - - frag = ""; - abi.forEach(function(x){ - if (x.name == pt.name) { - frag = x; - } - }); - txvals = pt.args; - //txtypes[pt.name] - frag.inputs.forEach(function(x) { - txargs.push(x.name); - txtypes.push(x.type); - }); - for (i = 0; i < txvals.length; i++) { - val = txvals[i]; - if (txtypes[i] == 'address') { - val = val.toLowerCase(); +CREATE OR REPLACE FUNCTION `substrate-etl.abi_functions.PARSE_CALL`(data STRING) +RETURNS ARRAY> +LANGUAGE js AS +""" +function convertBN(val) { + if (Array.isArray(val)) { + for (let i = 0; i < val.length; i++) { + val[i] = convertBN(val[i]) + } + } else if ( val._isBigNumber ) { + return val.toString(); + } + return val; +} +try { + let abiencoded = cn_calls[data.substring(0,10)]; + if ( abiencoded ) { + let pieces = abiencoded.split("|"); + if ( pieces.length >= 3 ) { + let functionName = pieces[0]; + // "stateMutability":"nonpayable" + let abi = {"name":functionName, "type":"function","inputs":[], "outputs":[], "stateMutability":"nonpayable"}; + let flds = pieces[1].length > 0 ? pieces[1].split(",") : []; + let types_unindexed = pieces[2].length > 0 ? pieces[2].split(",") : []; + for (let idx = 0; idx < flds.length; idx++) { + abi.inputs.push({name: flds[idx], type: types_unindexed[idx] }); } - res.push({arg:txargs[i],val:val}); + const iface = new ethers.utils.Interface([abi]); + const decodedData = iface.parseTransaction({ data }); + return(abi.inputs.map(function(y, idx) { + let v = decodedData.args[idx]; + return({name: y.name, value: convertBN(v) }); + })); } - } catch(e) {} - return res; + } +} catch (err) { + return [{name: "err", value: `${err.toString()}`}]; +} +return null; """ -OPTIONS(library="gs://blockchain-etl-bigquery/ethers.js"); +OPTIONS( + library = ["gs://cdn.polkaholic.io/pako.min.js", "gs://cdn.polkaholic.io/colorfulnotion-decoderData.js", "gs://blockchain-etl-bigquery/ethers-v5.js"] +); + + diff --git a/sql/PARSE_EVENT.sql b/sql/PARSE_EVENT.sql index 96e0505..296a823 100644 --- a/sql/PARSE_EVENT.sql +++ b/sql/PARSE_EVENT.sql @@ -1,50 +1,128 @@ -CREATE OR REPLACE FUNCTION `abi_functions`.PARSE_EVENT(abi STRING, functionname STRING, logtopics ARRAY, logdata STRING) RETURNS ARRAY> LANGUAGE js AS """ -abi = JSON.parse(abi); - var interface_instance = new ethers.utils.Interface(abi); - // PunkBidEntered (index_topic_1 uint256 punkIndex, uint256 value, index_topic_2 address fromAddress) - // PunkBidEntered(uint256,uint256,address) - // JSON.stringify(pt) - // var topicID = ethers.utils.keccak256(ethers.utils.toUtf8Bytes('PunkBidEntered(uint256,uint256,address)')); - // {"anonymous":false,"inputs":[{"indexed":true,"name":"punkIndex","type":"uint256"},{"indexed":false,"name":"value","type":"uint256"},{"indexed":true,"name":"fromAddress","type":"address"}],"name":"PunkBidEntered","type":"event"} - - var stripZeros = /^0x0+/; - var stripAddress = /^0x0{24}/; - - txargs = []; - txtypes = []; - res = []; - - // try-catch - input may be malformed. - try { -// var pt = interface_instance.parseTransaction({data: dat}); - - frag = ""; - abi.forEach(function(x){ - if (x.name == functionname && x.type == "event") { - frag = x; - } - }); +CREATE OR REPLACE FUNCTION `substrate-etl.abi_functions.PARSE_EVENT`(data STRING, topics ARRAY) +RETURNS ARRAY> +LANGUAGE js AS +""" +function generateBinaryNumbers(N, k) { + const result = []; + function generateCombinations(n, k, prefix = '') { + if (k === 0) { + result.push(prefix.padStart(N, '0')); + return; + } + if (n === 0) return; + generateCombinations(n - 1, k - 1, '1' + prefix); + generateCombinations(n - 1, k, '0'+ prefix); + } + generateCombinations(N, k); + return result; +} +function convertBN(val) { + if (Array.isArray(val)) { + for (let i = 0; i < val.length; i++) { + val[i] = convertBN(val[i]) + } + } else if ( val._isBigNumber ) { + return val.toString(); + } + return val; +} - j=1; - for (i=0; i < frag.inputs.length; i++) { - typ = frag.inputs[i].type; - if (frag.inputs[i].indexed) { - val = logtopics[j]; - j++; +let abiCandidates = []; +try { + let abi_maybe = null; + let text_signature = null; + let abiencoded = cn_events[topics[0].substring(0,10)]; + if ( abiencoded ) { + let pieces = abiencoded.split("|"); + if ( pieces.length == 4 ) { + let eventName = pieces[0]; + let abi = {"name":eventName,"type":"event","inputs":[], "outputs":[], "stateMutability":"nonpayable"}; + let flds = pieces[1].length > 0 ? pieces[1].split(",") : []; + let types_unindexed = pieces[2].length > 0 ? pieces[2].split(",") : []; + let types_indexed = pieces[3].length > 0 ? pieces[3].split(",") : []; + let types = []; + let u = 0; + let i = 0; + for (let idx = 0; idx < flds.length; idx++) { + let f = flds[idx]; + let lastchar = f.substring(f.length-1, f.length); + if ( lastchar == "*" ) { + abi.inputs.push({"indexed": true, name: f.substring(0, f.length-1), type: types_indexed[i] }); + types.push(types_indexed[i]); + i++; + } else { + abi.inputs.push({"indexed": false, name: f, type: types_unindexed[u] }); + types.push(types_unindexed[u]); + u++; + } } - else { - val = logdata - } - if (typ == "uint256") { - val = Number(val.replace(stripZeros, '0x')); - } - else if (typ == "address") { - val = val.toLowerCase(); - val = val.replace(stripAddress, '0x'); - } - res.push({arg:frag.inputs[i].name,val:val}); + abi_maybe = JSON.stringify(abi); + text_signature = `${eventName}(${types.join(",")})` } - } catch(e) {} - return res; + } else { + return null; + } + let abiMaybe = JSON.parse(abi_maybe); + let N = abiMaybe.inputs.length; + let topicLen = 0; + abiMaybe.inputs.forEach(function(x) { + if ( x.indexed ) topicLen++; + }); + if ( topics.length != topicLen + 1 ) { + if ( topics.length == 1 || topics.length == N + 1) { + let v = topics.length == 1 ? false : true; + let cand = JSON.parse(abi_maybe); + for (let i = 0; i < N; i++) { + cand.inputs[i].indexed = v; + } + abiCandidates.push(cand); + } else if ( (topics.length == 2 || topics.length == N ) ){ + let v = ( topics.length == 2 ) ? true : false; + for ( let i = 0; i < N; i++) { + let cand = JSON.parse(abi_maybe); + for (let j = 0; j < N; j++) { + cand.inputs[j].indexed = ( i == j ) ? v : ! v; + } + abiCandidates.push(cand); + } + } else { + let binaryStrings = generateBinaryNumbers(N, topics.length - 1); + for ( const b of binaryStrings ) { + let cand = JSON.parse(abi_maybe); + for (let j = 0; j < b.length; j++) { + cand.inputs[j].indexed = ( b[j] == "1" ) ? true : false; + } + abiCandidates.push(cand); + } + } + + } else { + abiCandidates.push(abiMaybe); + } + let tries = 0; + for (const abiCandidate of abiCandidates) { + try { + const iface = new ethers.utils.Interface([abiCandidate]); + const decodedData = iface.decodeEventLog(ethers.utils.id(text_signature), data, topics); + let res = abiCandidate.inputs.map(function(y, idx){ + let v = decodedData[idx]; + return({name: y.name, value: convertBN(v) }); + }); + if ( tries > 0 ) { + res.push({"name": "tries", "value": tries}); + } + return res; + } catch ( err ) { + tries++; + } + } +} catch (err) { + return [{name: "err", value: `${err.toString()}`}]; +} + +return [{name: "err", value: `decode failure`}]; """ -OPTIONS(library="gs://blockchain-etl-bigquery/ethers.js"); +OPTIONS( + library = ["gs://cdn.polkaholic.io/pako.min.js", "gs://cdn.polkaholic.io/colorfulnotion-decoderData.js", "gs://blockchain-etl-bigquery/ethers-v5.js"] +); +