Skip to content

Commit

Permalink
Introduce RocksDB-backed Merkle Tree (#6)
Browse files Browse the repository at this point in the history
* feat: merkle tree

* chore: clippy lints
  • Loading branch information
zeapoz authored Oct 2, 2023
1 parent 1ee13e0 commit 9952538
Show file tree
Hide file tree
Showing 5 changed files with 229 additions and 34 deletions.
11 changes: 11 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,14 @@ Cargo.lock

# MSVC Windows builds of rustc generate these, which store debugging information
*.pdb

# Direnv files.
.direnv/
.envrc

# Nix files.
flake.nix
flake.lock

# db
db/
7 changes: 5 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@
name = "state-reconstruct"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
clap = { version = "4.4.0", features = ["string"] }
ethers = "2.0.10"
ethers = "1"
eyre = "0.6.8"
hex = "0.4.3"
indexmap = "2.0.1"
serde_json = "1.0.107"
thiserror = "1.0"
tokio = { version = "1.32.0", features = ["macros"] }
zk_evm = { git = "https://github.com/matter-labs/era-zk_evm.git" }
zksync_merkle_tree = { git = "https://github.com/matter-labs/zksync-era.git" }
48 changes: 29 additions & 19 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
#![feature(array_chunks)]

use std::fs;
// #![warn(clippy::pedantic)]

mod state;
mod tree;
use crate::state::CommitBlockInfoV1;

use ethers::{
abi::{Contract, Function},
prelude::*,
providers::{Http, Provider},
providers::Provider,
};
use eyre::Result;

Expand All @@ -17,11 +17,6 @@ pub const ZK_SYNC_ADDR: &str = "0x32400084C286CF3E17e7B677ea9583e60a000324";
pub const GENESIS_BLOCK: u64 = 16_627_460;
pub const BLOCK_STEP: u64 = 128;

pub fn create_initial_state() {
let _input = fs::read_to_string(INITAL_STATE_PATH).unwrap();
todo!();
}

pub async fn init_eth_adapter(http_url: &str) -> (Provider<Http>, Contract) {
let provider =
Provider::<Http>::try_from(http_url).expect("could not instantiate HTTP Provider");
Expand All @@ -32,7 +27,10 @@ pub async fn init_eth_adapter(http_url: &str) -> (Provider<Http>, Contract) {
(provider, contract)
}

fn parse_calldata(commit_blocks_fn: &Function, calldata: &[u8]) -> Result<Vec<CommitBlockInfoV1>> {
pub fn parse_calldata(
commit_blocks_fn: &Function,
calldata: &[u8],
) -> Result<Vec<CommitBlockInfoV1>> {
let mut parsed_input = commit_blocks_fn
.decode_input(&calldata[4..])
.map_err(|e| state::ParseError::InvalidCalldata(e.to_string()))?;
Expand All @@ -58,7 +56,7 @@ fn parse_calldata(commit_blocks_fn: &Function, calldata: &[u8]) -> Result<Vec<Co
);
};

let abi::Token::Uint(previous_l2_block_number) = stored_block_info[0].clone() else {
let abi::Token::Uint(_previous_l2_block_number) = stored_block_info[0].clone() else {
return Err(state::ParseError::InvalidStoredBlockInfo(
"cannot parse previous L2 block number".to_string(),
)
Expand Down Expand Up @@ -101,20 +99,32 @@ fn parse_commit_block_info(data: &abi::Token) -> Result<Vec<CommitBlockInfoV1>>

#[cfg(test)]
mod tests {
use std::env;

use ethers::{
providers::Middleware,
types::{Address, BlockNumber, Filter},
};

use eyre::Result;

use crate::tree::TreeWrapper;

use super::*;

#[ignore]
#[tokio::test]
async fn it_works() -> Result<()> {
// TODO: This should be an env variable / CLI argument.
let db_dir = env::current_dir()?.join("db");
// TODO: Save / Load from existing db.
if db_dir.exists() {
std::fs::remove_dir_all(&db_dir)?;
}
let mut tree = TreeWrapper::new(db_dir.as_path())?;

let (provider, contract) = init_eth_adapter("https://eth.llamarpc.com").await;
let latest_block = provider
let latest_l1_block = provider
.get_block(BlockNumber::Latest)
.await?
.unwrap()
Expand All @@ -126,7 +136,7 @@ mod tests {

let mut current_block = GENESIS_BLOCK;
let mut latest_l2_block_number = U256::default();
while current_block <= latest_block.0[0] {
while current_block <= latest_l1_block.0[0] {
// Create a filter showing only `BlockCommit`s from the [`ZK_SYNC_ADDR`].
// TODO: Filter by executed blocks too.
let filter = Filter::new()
Expand All @@ -137,8 +147,6 @@ mod tests {

// Grab all relevant logs.
let logs = provider.get_logs(&filter).await?;

println!("{}", logs.iter().len());
for log in logs {
println!("{:?}", log);
// log.topics:
Expand All @@ -153,14 +161,16 @@ mod tests {
}

if let Some(tx_hash) = log.transaction_hash {
let tx = provider.get_transaction(tx_hash).await?;
let calldata = tx.unwrap().input;
let tx = provider.get_transaction(tx_hash).await?.unwrap();
let calldata = tx.input;
let blocks = parse_calldata(&function, &calldata)?;

// TODO: Apply transaction to L2.
latest_l2_block_number = new_l2_block_number;
let num_blocks = blocks.len();
println!("Parsed {} new blocks", num_blocks);

println!("parsed {} new blocks", blocks.len());
for block in blocks {
latest_l2_block_number = tree.insert_block(block);
}
}
}

Expand Down
27 changes: 14 additions & 13 deletions src/state.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use ethers::{abi, types::U256};
use eyre::Result;
use std::collections::HashMap;
use indexmap::IndexMap;
use std::vec::Vec;
use thiserror::Error;

#[allow(clippy::enum_variant_names)]
#[derive(Error, Debug)]
pub enum ParseError {
#[error("invalid Calldata: {0}")]
Expand Down Expand Up @@ -36,9 +37,9 @@ pub struct CommitBlockInfoV1 {
/// Hash of all priority operations from this block.
pub priority_operations_hash: Vec<u8>,
/// Storage write access as a concatenation key-value.
pub initial_storage_changes: HashMap<[u8; 32], [u8; 32]>,
pub initial_storage_changes: IndexMap<[u8; 32], [u8; 32]>,
/// Storage write access as a concatenation index-value.
pub repeated_storage_changes: HashMap<u64, [u8; 32]>,
pub repeated_storage_changes: IndexMap<u64, [u8; 32]>,
/// Concatenation of all L2 -> L1 logs in the block.
pub l2_logs: Vec<u8>,
/// (contract bytecodes) array of L2 bytecodes that were deployed.
Expand Down Expand Up @@ -176,8 +177,8 @@ impl TryFrom<&abi::Token> for CommitBlockInfoV1 {
number_of_l1_txs,
l2_logs_tree_root,
priority_operations_hash,
initial_storage_changes: HashMap::default(),
repeated_storage_changes: HashMap::default(),
initial_storage_changes: IndexMap::default(),
repeated_storage_changes: IndexMap::default(),
l2_logs: l2_logs.to_vec(),
factory_deps: smartcontracts,
};
Expand Down Expand Up @@ -295,21 +296,21 @@ pub enum L2ToL1Pubdata {
/// Data needed to commit new block
pub struct CommitBlockInfoV2 {
/// L2 block number.
block_number: u64,
pub block_number: u64,
/// Unix timestamp denoting the start of the block execution.
timestamp: u64,
pub timestamp: u64,
/// The serial number of the shortcut index that's used as a unique identifier for storage keys that were used twice or more.
index_repeated_storage_changes: u64,
pub index_repeated_storage_changes: u64,
/// The state root of the full state tree.
new_state_root: Vec<u8>,
pub new_state_root: Vec<u8>,
/// Number of priority operations to be processed.
number_of_l1_txs: U256,
pub number_of_l1_txs: U256,
/// Hash of all priority operations from this block.
priority_operations_hash: Vec<u8>,
pub priority_operations_hash: Vec<u8>,
/// Concatenation of all L2 -> L1 system logs in the block.
system_logs: Vec<u8>,
pub system_logs: Vec<u8>,
/// Total pubdata committed to as part of bootloader run. Contents are: l2Tol1Logs <> l2Tol1Messages <> publishedBytecodes <> stateDiffs.
total_l2_to_l1_pubdata: Vec<L2ToL1Pubdata>,
pub total_l2_to_l1_pubdata: Vec<L2ToL1Pubdata>,
}

impl CommitBlockInfoV1 {
Expand Down
170 changes: 170 additions & 0 deletions src/tree.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
// FIXME: Remove once we have a binary in place.
#![allow(dead_code)]
use std::{fs, path::Path, str::FromStr};

use ethers::types::{Address, H256, U256};
use zk_evm::aux_structures::LogQuery;
use zksync_merkle_tree::{Database, MerkleTree, RocksDBWrapper};

use eyre::Result;

use crate::{state::CommitBlockInfoV1, INITAL_STATE_PATH};

pub struct TreeWrapper<'a> {
pub tree: MerkleTree<'a, RocksDBWrapper>,
// FIXME: How to save this for persistant storage?
pub index_to_key: Vec<U256>,
}

impl TreeWrapper<'static> {
pub fn new(db_dir: &Path) -> Result<Self> {
let db = RocksDBWrapper::new(db_dir);
let mut tree = MerkleTree::new(db);
let index_to_key = reconstruct_genesis_state(&mut tree, INITAL_STATE_PATH)?;

Ok(Self { tree, index_to_key })
}

/// Inserts a block into the tree and returns the new block number.
pub fn insert_block(&mut self, block: CommitBlockInfoV1) -> U256 {
let new_l2_block_number = block.block_number;
// INITIAL CALLDATA.
let mut key_value_pairs: Vec<(U256, H256)> =
Vec::with_capacity(block.initial_storage_changes.len());
for (key, value) in &block.initial_storage_changes {
let key = U256::from_little_endian(key);
let value = H256::from(value);

key_value_pairs.push((key, value));
self.index_to_key.push(key);
}

// REPEATED CALLDATA.
for (index, value) in &block.repeated_storage_changes {
let index = *index as usize;
// Index is 1-based so we subtract 1.
let key = *self.index_to_key.get(index - 1).unwrap();
let value = H256::from(value);

key_value_pairs.push((key, value));
}

let output = self.tree.extend(key_value_pairs);
let root_hash = output.root_hash;

assert_eq!(root_hash.as_bytes(), block.new_state_root);
println!(
"Root hash of block {} = {}",
new_l2_block_number,
hex::encode(root_hash)
);

U256::from(new_l2_block_number)
}
}

/// Attempts to reconstruct the genesis state from a CSV file.
fn reconstruct_genesis_state<D: Database>(
tree: &mut MerkleTree<D>,
path: &str,
) -> Result<Vec<U256>> {
fn cleanup_encoding(input: &'_ str) -> &'_ str {
input
.strip_prefix("E'\\\\x")
.unwrap()
.strip_suffix('\'')
.unwrap()
}

let mut block_batched_accesses = vec![];

let input = fs::read_to_string(path)?;
for line in input.lines() {
let mut separated = line.split(',');
let _derived_key = separated.next().unwrap();
let address = separated.next().unwrap();
let key = separated.next().unwrap();
let value = separated.next().unwrap();
let op_number: u32 = separated.next().unwrap().parse()?;
let _ = separated.next().unwrap();
let miniblock_number: u32 = separated.next().unwrap().parse()?;

if miniblock_number != 0 {
break;
}

let address = Address::from_str(cleanup_encoding(address))?;
let key = U256::from_str_radix(cleanup_encoding(key), 16)?;
let value = U256::from_str_radix(cleanup_encoding(value), 16)?;

let record = (address, key, value, op_number);
block_batched_accesses.push(record);
}

// Sort in block block.
block_batched_accesses.sort_by(|a, b| match a.0.cmp(&b.0) {
std::cmp::Ordering::Equal => match a.1.cmp(&b.1) {
std::cmp::Ordering::Equal => match a.3.cmp(&b.3) {
std::cmp::Ordering::Equal => {
panic!("must be unique")
}
a => a,
},
a => a,
},
a => a,
});

let mut key_set = std::collections::HashSet::new();

// Batch.
for el in &block_batched_accesses {
let derived_key = LogQuery::derive_final_address_for_params(&el.0, &el.1);
key_set.insert(derived_key);
}

let mut batched = vec![];
let mut it = block_batched_accesses.into_iter();
let mut previous = it.next().unwrap();
for el in it {
if el.0 != previous.0 || el.1 != previous.1 {
batched.push((previous.0, previous.1, previous.2));
}

previous = el;
}

// Finalize.
batched.push((previous.0, previous.1, previous.2));

println!("Have {} unique keys in the tree", key_set.len());

let mut index_to_key = Vec::with_capacity(batched.len());
let mut key_value_pairs: Vec<(U256, H256)> = Vec::with_capacity(batched.len());
for (address, key, value) in batched {
let derived_key = LogQuery::derive_final_address_for_params(&address, &key);
// TODO: what to do here?
// let version = tree.latest_version().unwrap_or_default();
// let _leaf = tree.read_leaves(version, &[key]);

// let existing_value = U256::from_big_endian(existing_leaf.leaf.value());
// if existing_value == value {
// // we downgrade to read
// // println!("Downgrading to read")
// } else {
// we write
let mut tmp = [0u8; 32];
value.to_big_endian(&mut tmp);

let key = U256::from_little_endian(&derived_key);
let value = H256::from(tmp);
key_value_pairs.push((key, value));
index_to_key.push(key);
}

let output = tree.extend(key_value_pairs);
dbg!(tree.latest_version());
println!("Initial state root = {}", hex::encode(output.root_hash));

Ok(index_to_key)
}

0 comments on commit 9952538

Please sign in to comment.