From b7b5db63ee5922057dfe8f3464935fa032770220 Mon Sep 17 00:00:00 2001 From: zeapoz Date: Fri, 29 Sep 2023 14:08:37 +0200 Subject: [PATCH 1/2] feat: merkle tree --- .gitignore | 11 ++++ Cargo.toml | 7 ++- src/lib.rs | 48 +++++++++------ src/tree.rs | 170 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 215 insertions(+), 21 deletions(-) create mode 100644 src/tree.rs diff --git a/.gitignore b/.gitignore index 6985cf1..a072a6c 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,14 @@ Cargo.lock # MSVC Windows builds of rustc generate these, which store debugging information *.pdb + +# Direnv files. +.direnv/ +.envrc + +# Nix files. +flake.nix +flake.lock + +# db +db/ diff --git a/Cargo.toml b/Cargo.toml index 4331cde..c13032a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,13 +2,16 @@ name = "state-reconstruct" version = "0.1.0" edition = "2021" - # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] clap = { version = "4.4.0", features = ["string"] } -ethers = "2.0.10" +ethers = "1" eyre = "0.6.8" +hex = "0.4.3" +indexmap = "2.0.1" serde_json = "1.0.107" thiserror = "1.0" tokio = { version = "1.32.0", features = ["macros"] } +zk_evm = { git = "https://github.com/matter-labs/era-zk_evm.git" } +zksync_merkle_tree = { git = "https://github.com/matter-labs/zksync-era.git" } diff --git a/src/lib.rs b/src/lib.rs index 2c9c360..46b7675 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,14 +1,14 @@ #![feature(array_chunks)] - -use std::fs; +// #![warn(clippy::pedantic)] mod state; +mod tree; use crate::state::CommitBlockInfoV1; use ethers::{ abi::{Contract, Function}, prelude::*, - providers::{Http, Provider}, + providers::Provider, }; use eyre::Result; @@ -17,11 +17,6 @@ pub const ZK_SYNC_ADDR: &str = "0x32400084C286CF3E17e7B677ea9583e60a000324"; pub const GENESIS_BLOCK: u64 = 16_627_460; pub const BLOCK_STEP: u64 = 128; -pub fn create_initial_state() { - let _input = fs::read_to_string(INITAL_STATE_PATH).unwrap(); - todo!(); -} - pub async fn init_eth_adapter(http_url: &str) -> (Provider, Contract) { let provider = Provider::::try_from(http_url).expect("could not instantiate HTTP Provider"); @@ -32,7 +27,10 @@ pub async fn init_eth_adapter(http_url: &str) -> (Provider, Contract) { (provider, contract) } -fn parse_calldata(commit_blocks_fn: &Function, calldata: &[u8]) -> Result> { +pub fn parse_calldata( + commit_blocks_fn: &Function, + calldata: &[u8], +) -> Result> { let mut parsed_input = commit_blocks_fn .decode_input(&calldata[4..]) .map_err(|e| state::ParseError::InvalidCalldata(e.to_string()))?; @@ -58,7 +56,7 @@ fn parse_calldata(commit_blocks_fn: &Function, calldata: &[u8]) -> Result Result> #[cfg(test)] mod tests { + use std::env; + use ethers::{ providers::Middleware, types::{Address, BlockNumber, Filter}, @@ -108,13 +108,23 @@ mod tests { use eyre::Result; + use crate::tree::TreeWrapper; + use super::*; #[ignore] #[tokio::test] async fn it_works() -> Result<()> { + // TODO: This should be an env variable / CLI argument. + let db_dir = env::current_dir()?.join("db"); + // TODO: Save / Load from existing db. + if db_dir.exists() { + std::fs::remove_dir_all(&db_dir)?; + } + let mut tree = TreeWrapper::new(db_dir.as_path())?; + let (provider, contract) = init_eth_adapter("https://eth.llamarpc.com").await; - let latest_block = provider + let latest_l1_block = provider .get_block(BlockNumber::Latest) .await? .unwrap() @@ -126,7 +136,7 @@ mod tests { let mut current_block = GENESIS_BLOCK; let mut latest_l2_block_number = U256::default(); - while current_block <= latest_block.0[0] { + while current_block <= latest_l1_block.0[0] { // Create a filter showing only `BlockCommit`s from the [`ZK_SYNC_ADDR`]. // TODO: Filter by executed blocks too. let filter = Filter::new() @@ -137,8 +147,6 @@ mod tests { // Grab all relevant logs. let logs = provider.get_logs(&filter).await?; - - println!("{}", logs.iter().len()); for log in logs { println!("{:?}", log); // log.topics: @@ -153,14 +161,16 @@ mod tests { } if let Some(tx_hash) = log.transaction_hash { - let tx = provider.get_transaction(tx_hash).await?; - let calldata = tx.unwrap().input; + let tx = provider.get_transaction(tx_hash).await?.unwrap(); + let calldata = tx.input; let blocks = parse_calldata(&function, &calldata)?; - // TODO: Apply transaction to L2. - latest_l2_block_number = new_l2_block_number; + let num_blocks = blocks.len(); + println!("Parsed {} new blocks", num_blocks); - println!("parsed {} new blocks", blocks.len()); + for block in blocks { + latest_l2_block_number = tree.insert_block(block); + } } } diff --git a/src/tree.rs b/src/tree.rs new file mode 100644 index 0000000..645a14a --- /dev/null +++ b/src/tree.rs @@ -0,0 +1,170 @@ +// FIXME: Remove once we have a binary in place. +#![allow(dead_code)] +use std::{fs, path::Path, str::FromStr}; + +use ethers::types::{Address, H256, U256}; +use zk_evm::aux_structures::LogQuery; +use zksync_merkle_tree::{Database, MerkleTree, RocksDBWrapper}; + +use eyre::Result; + +use crate::{state::CommitBlockInfoV1, INITAL_STATE_PATH}; + +pub struct TreeWrapper<'a> { + pub tree: MerkleTree<'a, RocksDBWrapper>, + // FIXME: How to save this for persistant storage? + pub index_to_key: Vec, +} + +impl TreeWrapper<'static> { + pub fn new(db_dir: &Path) -> Result { + let db = RocksDBWrapper::new(db_dir); + let mut tree = MerkleTree::new(db); + let index_to_key = reconstruct_genesis_state(&mut tree, INITAL_STATE_PATH)?; + + Ok(Self { tree, index_to_key }) + } + + /// Inserts a block into the tree and returns the new block number. + pub fn insert_block(&mut self, block: CommitBlockInfoV1) -> U256 { + let new_l2_block_number = block.block_number; + // INITIAL CALLDATA. + let mut key_value_pairs: Vec<(U256, H256)> = + Vec::with_capacity(block.initial_storage_changes.len()); + for (key, value) in &block.initial_storage_changes { + let key = U256::from_little_endian(key); + let value = H256::from(value); + + key_value_pairs.push((key, value)); + self.index_to_key.push(key); + } + + // REPEATED CALLDATA. + for (index, value) in &block.repeated_storage_changes { + let index = *index as usize; + // Index is 1-based so we subtract 1. + let key = *self.index_to_key.get(index - 1).unwrap(); + let value = H256::from(value); + + key_value_pairs.push((key, value)); + } + + let output = self.tree.extend(key_value_pairs); + let root_hash = output.root_hash; + + assert_eq!(root_hash.as_bytes(), block.new_state_root); + println!( + "Root hash of block {} = {}", + new_l2_block_number, + hex::encode(root_hash) + ); + + U256::from(new_l2_block_number) + } +} + +/// Attempts to reconstruct the genesis state from a CSV file. +fn reconstruct_genesis_state( + tree: &mut MerkleTree, + path: &str, +) -> Result> { + fn cleanup_encoding(input: &'_ str) -> &'_ str { + input + .strip_prefix("E'\\\\x") + .unwrap() + .strip_suffix('\'') + .unwrap() + } + + let mut block_batched_accesses = vec![]; + + let input = fs::read_to_string(path)?; + for line in input.lines() { + let mut separated = line.split(','); + let _derived_key = separated.next().unwrap(); + let address = separated.next().unwrap(); + let key = separated.next().unwrap(); + let value = separated.next().unwrap(); + let op_number: u32 = separated.next().unwrap().parse()?; + let _ = separated.next().unwrap(); + let miniblock_number: u32 = separated.next().unwrap().parse()?; + + if miniblock_number != 0 { + break; + } + + let address = Address::from_str(cleanup_encoding(address))?; + let key = U256::from_str_radix(cleanup_encoding(key), 16)?; + let value = U256::from_str_radix(cleanup_encoding(value), 16)?; + + let record = (address, key, value, op_number); + block_batched_accesses.push(record); + } + + // Sort in block block. + block_batched_accesses.sort_by(|a, b| match a.0.cmp(&b.0) { + std::cmp::Ordering::Equal => match a.1.cmp(&b.1) { + std::cmp::Ordering::Equal => match a.3.cmp(&b.3) { + std::cmp::Ordering::Equal => { + panic!("must be unique") + } + a => a, + }, + a => a, + }, + a => a, + }); + + let mut key_set = std::collections::HashSet::new(); + + // Batch. + for el in &block_batched_accesses { + let derived_key = LogQuery::derive_final_address_for_params(&el.0, &el.1); + key_set.insert(derived_key); + } + + let mut batched = vec![]; + let mut it = block_batched_accesses.into_iter(); + let mut previous = it.next().unwrap(); + for el in it { + if el.0 != previous.0 || el.1 != previous.1 { + batched.push((previous.0, previous.1, previous.2)); + } + + previous = el; + } + + // Finalize. + batched.push((previous.0, previous.1, previous.2)); + + println!("Have {} unique keys in the tree", key_set.len()); + + let mut index_to_key = Vec::with_capacity(batched.len()); + let mut key_value_pairs: Vec<(U256, H256)> = Vec::with_capacity(batched.len()); + for (address, key, value) in batched { + let derived_key = LogQuery::derive_final_address_for_params(&address, &key); + // TODO: what to do here? + // let version = tree.latest_version().unwrap_or_default(); + // let _leaf = tree.read_leaves(version, &[key]); + + // let existing_value = U256::from_big_endian(existing_leaf.leaf.value()); + // if existing_value == value { + // // we downgrade to read + // // println!("Downgrading to read") + // } else { + // we write + let mut tmp = [0u8; 32]; + value.to_big_endian(&mut tmp); + + let key = U256::from_little_endian(&derived_key); + let value = H256::from(tmp); + key_value_pairs.push((key, value)); + index_to_key.push(key); + } + + let output = tree.extend(key_value_pairs); + dbg!(tree.latest_version()); + println!("Initial state root = {}", hex::encode(output.root_hash)); + + Ok(index_to_key) +} From 6dd4e065d89b13135171bb170f056e353a84e484 Mon Sep 17 00:00:00 2001 From: zeapoz Date: Fri, 29 Sep 2023 14:09:09 +0200 Subject: [PATCH 2/2] chore: clippy lints --- src/state.rs | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/state.rs b/src/state.rs index 17685f0..8573da6 100644 --- a/src/state.rs +++ b/src/state.rs @@ -1,9 +1,10 @@ use ethers::{abi, types::U256}; use eyre::Result; -use std::collections::HashMap; +use indexmap::IndexMap; use std::vec::Vec; use thiserror::Error; +#[allow(clippy::enum_variant_names)] #[derive(Error, Debug)] pub enum ParseError { #[error("invalid Calldata: {0}")] @@ -36,9 +37,9 @@ pub struct CommitBlockInfoV1 { /// Hash of all priority operations from this block. pub priority_operations_hash: Vec, /// Storage write access as a concatenation key-value. - pub initial_storage_changes: HashMap<[u8; 32], [u8; 32]>, + pub initial_storage_changes: IndexMap<[u8; 32], [u8; 32]>, /// Storage write access as a concatenation index-value. - pub repeated_storage_changes: HashMap, + pub repeated_storage_changes: IndexMap, /// Concatenation of all L2 -> L1 logs in the block. pub l2_logs: Vec, /// (contract bytecodes) array of L2 bytecodes that were deployed. @@ -176,8 +177,8 @@ impl TryFrom<&abi::Token> for CommitBlockInfoV1 { number_of_l1_txs, l2_logs_tree_root, priority_operations_hash, - initial_storage_changes: HashMap::default(), - repeated_storage_changes: HashMap::default(), + initial_storage_changes: IndexMap::default(), + repeated_storage_changes: IndexMap::default(), l2_logs: l2_logs.to_vec(), factory_deps: smartcontracts, }; @@ -295,21 +296,21 @@ pub enum L2ToL1Pubdata { /// Data needed to commit new block pub struct CommitBlockInfoV2 { /// L2 block number. - block_number: u64, + pub block_number: u64, /// Unix timestamp denoting the start of the block execution. - timestamp: u64, + pub timestamp: u64, /// The serial number of the shortcut index that's used as a unique identifier for storage keys that were used twice or more. - index_repeated_storage_changes: u64, + pub index_repeated_storage_changes: u64, /// The state root of the full state tree. - new_state_root: Vec, + pub new_state_root: Vec, /// Number of priority operations to be processed. - number_of_l1_txs: U256, + pub number_of_l1_txs: U256, /// Hash of all priority operations from this block. - priority_operations_hash: Vec, + pub priority_operations_hash: Vec, /// Concatenation of all L2 -> L1 system logs in the block. - system_logs: Vec, + pub system_logs: Vec, /// Total pubdata committed to as part of bootloader run. Contents are: l2Tol1Logs <> l2Tol1Messages <> publishedBytecodes <> stateDiffs. - total_l2_to_l1_pubdata: Vec, + pub total_l2_to_l1_pubdata: Vec, } impl CommitBlockInfoV1 {