Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce RocksDB-backed Merkle Tree #6

Merged
merged 2 commits into from
Oct 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,14 @@ Cargo.lock

# MSVC Windows builds of rustc generate these, which store debugging information
*.pdb

# Direnv files.
.direnv/
.envrc

# Nix files.
flake.nix
flake.lock

# db
db/
7 changes: 5 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@
name = "state-reconstruct"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
clap = { version = "4.4.0", features = ["string"] }
ethers = "2.0.10"
ethers = "1"
tuommaki marked this conversation as resolved.
Show resolved Hide resolved
eyre = "0.6.8"
hex = "0.4.3"
indexmap = "2.0.1"
serde_json = "1.0.107"
thiserror = "1.0"
tokio = { version = "1.32.0", features = ["macros"] }
zk_evm = { git = "https://github.com/matter-labs/era-zk_evm.git" }
zksync_merkle_tree = { git = "https://github.com/matter-labs/zksync-era.git" }
48 changes: 29 additions & 19 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
#![feature(array_chunks)]

use std::fs;
// #![warn(clippy::pedantic)]

mod state;
mod tree;
use crate::state::CommitBlockInfoV1;

use ethers::{
abi::{Contract, Function},
prelude::*,
providers::{Http, Provider},
providers::Provider,
};
use eyre::Result;

Expand All @@ -17,11 +17,6 @@ pub const ZK_SYNC_ADDR: &str = "0x32400084C286CF3E17e7B677ea9583e60a000324";
pub const GENESIS_BLOCK: u64 = 16_627_460;
pub const BLOCK_STEP: u64 = 128;

pub fn create_initial_state() {
let _input = fs::read_to_string(INITAL_STATE_PATH).unwrap();
todo!();
}

pub async fn init_eth_adapter(http_url: &str) -> (Provider<Http>, Contract) {
let provider =
Provider::<Http>::try_from(http_url).expect("could not instantiate HTTP Provider");
Expand All @@ -32,7 +27,10 @@ pub async fn init_eth_adapter(http_url: &str) -> (Provider<Http>, Contract) {
(provider, contract)
}

fn parse_calldata(commit_blocks_fn: &Function, calldata: &[u8]) -> Result<Vec<CommitBlockInfoV1>> {
pub fn parse_calldata(
commit_blocks_fn: &Function,
calldata: &[u8],
) -> Result<Vec<CommitBlockInfoV1>> {
let mut parsed_input = commit_blocks_fn
.decode_input(&calldata[4..])
.map_err(|e| state::ParseError::InvalidCalldata(e.to_string()))?;
Expand All @@ -58,7 +56,7 @@ fn parse_calldata(commit_blocks_fn: &Function, calldata: &[u8]) -> Result<Vec<Co
);
};

let abi::Token::Uint(previous_l2_block_number) = stored_block_info[0].clone() else {
let abi::Token::Uint(_previous_l2_block_number) = stored_block_info[0].clone() else {
return Err(state::ParseError::InvalidStoredBlockInfo(
"cannot parse previous L2 block number".to_string(),
)
Expand Down Expand Up @@ -101,20 +99,32 @@ fn parse_commit_block_info(data: &abi::Token) -> Result<Vec<CommitBlockInfoV1>>

#[cfg(test)]
mod tests {
use std::env;

use ethers::{
providers::Middleware,
types::{Address, BlockNumber, Filter},
};

use eyre::Result;

use crate::tree::TreeWrapper;

use super::*;

#[ignore]
#[tokio::test]
async fn it_works() -> Result<()> {
// TODO: This should be an env variable / CLI argument.
let db_dir = env::current_dir()?.join("db");
// TODO: Save / Load from existing db.
if db_dir.exists() {
std::fs::remove_dir_all(&db_dir)?;
}
let mut tree = TreeWrapper::new(db_dir.as_path())?;

let (provider, contract) = init_eth_adapter("https://eth.llamarpc.com").await;
let latest_block = provider
let latest_l1_block = provider
.get_block(BlockNumber::Latest)
.await?
.unwrap()
Expand All @@ -126,7 +136,7 @@ mod tests {

let mut current_block = GENESIS_BLOCK;
let mut latest_l2_block_number = U256::default();
while current_block <= latest_block.0[0] {
while current_block <= latest_l1_block.0[0] {
// Create a filter showing only `BlockCommit`s from the [`ZK_SYNC_ADDR`].
// TODO: Filter by executed blocks too.
let filter = Filter::new()
Expand All @@ -137,8 +147,6 @@ mod tests {

// Grab all relevant logs.
let logs = provider.get_logs(&filter).await?;

println!("{}", logs.iter().len());
for log in logs {
println!("{:?}", log);
// log.topics:
Expand All @@ -153,14 +161,16 @@ mod tests {
}

if let Some(tx_hash) = log.transaction_hash {
let tx = provider.get_transaction(tx_hash).await?;
let calldata = tx.unwrap().input;
let tx = provider.get_transaction(tx_hash).await?.unwrap();
let calldata = tx.input;
let blocks = parse_calldata(&function, &calldata)?;

// TODO: Apply transaction to L2.
latest_l2_block_number = new_l2_block_number;
let num_blocks = blocks.len();
println!("Parsed {} new blocks", num_blocks);

println!("parsed {} new blocks", blocks.len());
for block in blocks {
latest_l2_block_number = tree.insert_block(block);
}
}
}

Expand Down
27 changes: 14 additions & 13 deletions src/state.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use ethers::{abi, types::U256};
use eyre::Result;
use std::collections::HashMap;
use indexmap::IndexMap;
use std::vec::Vec;
use thiserror::Error;

#[allow(clippy::enum_variant_names)]
#[derive(Error, Debug)]
pub enum ParseError {
#[error("invalid Calldata: {0}")]
Expand Down Expand Up @@ -36,9 +37,9 @@ pub struct CommitBlockInfoV1 {
/// Hash of all priority operations from this block.
pub priority_operations_hash: Vec<u8>,
/// Storage write access as a concatenation key-value.
pub initial_storage_changes: HashMap<[u8; 32], [u8; 32]>,
pub initial_storage_changes: IndexMap<[u8; 32], [u8; 32]>,
/// Storage write access as a concatenation index-value.
pub repeated_storage_changes: HashMap<u64, [u8; 32]>,
pub repeated_storage_changes: IndexMap<u64, [u8; 32]>,
/// Concatenation of all L2 -> L1 logs in the block.
pub l2_logs: Vec<u8>,
/// (contract bytecodes) array of L2 bytecodes that were deployed.
Expand Down Expand Up @@ -176,8 +177,8 @@ impl TryFrom<&abi::Token> for CommitBlockInfoV1 {
number_of_l1_txs,
l2_logs_tree_root,
priority_operations_hash,
initial_storage_changes: HashMap::default(),
repeated_storage_changes: HashMap::default(),
initial_storage_changes: IndexMap::default(),
repeated_storage_changes: IndexMap::default(),
l2_logs: l2_logs.to_vec(),
factory_deps: smartcontracts,
};
Expand Down Expand Up @@ -295,21 +296,21 @@ pub enum L2ToL1Pubdata {
/// Data needed to commit new block
pub struct CommitBlockInfoV2 {
/// L2 block number.
block_number: u64,
pub block_number: u64,
/// Unix timestamp denoting the start of the block execution.
timestamp: u64,
pub timestamp: u64,
/// The serial number of the shortcut index that's used as a unique identifier for storage keys that were used twice or more.
index_repeated_storage_changes: u64,
pub index_repeated_storage_changes: u64,
/// The state root of the full state tree.
new_state_root: Vec<u8>,
pub new_state_root: Vec<u8>,
/// Number of priority operations to be processed.
number_of_l1_txs: U256,
pub number_of_l1_txs: U256,
/// Hash of all priority operations from this block.
priority_operations_hash: Vec<u8>,
pub priority_operations_hash: Vec<u8>,
/// Concatenation of all L2 -> L1 system logs in the block.
system_logs: Vec<u8>,
pub system_logs: Vec<u8>,
/// Total pubdata committed to as part of bootloader run. Contents are: l2Tol1Logs <> l2Tol1Messages <> publishedBytecodes <> stateDiffs.
total_l2_to_l1_pubdata: Vec<L2ToL1Pubdata>,
pub total_l2_to_l1_pubdata: Vec<L2ToL1Pubdata>,
}

impl CommitBlockInfoV1 {
Expand Down
170 changes: 170 additions & 0 deletions src/tree.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
// FIXME: Remove once we have a binary in place.
#![allow(dead_code)]
use std::{fs, path::Path, str::FromStr};

use ethers::types::{Address, H256, U256};
use zk_evm::aux_structures::LogQuery;
use zksync_merkle_tree::{Database, MerkleTree, RocksDBWrapper};

use eyre::Result;

use crate::{state::CommitBlockInfoV1, INITAL_STATE_PATH};

pub struct TreeWrapper<'a> {
pub tree: MerkleTree<'a, RocksDBWrapper>,
// FIXME: How to save this for persistant storage?
pub index_to_key: Vec<U256>,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we just write it ~as is? i.e. <num values: 32bit unsigned>,<value0>, <value1>, <value2>... - all numbers big endian encoded?

Copy link
Member Author

@zeapoz zeapoz Oct 2, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not fully sure of what you want, would it still be a Vec, containing the length as the first element and the rest of the values (keys) in their non-parsed variant?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, sorry I expressed myself a bit poorly. I basically responded to your FIXME comment question from above. No need to change anything here, but my thinking was that one possibility for persistent storage solution here would be to just write the values out to file, prefixing them by number of elements.

No need to do anything here 🙂

}

impl TreeWrapper<'static> {
pub fn new(db_dir: &Path) -> Result<Self> {
let db = RocksDBWrapper::new(db_dir);
let mut tree = MerkleTree::new(db);
let index_to_key = reconstruct_genesis_state(&mut tree, INITAL_STATE_PATH)?;

Ok(Self { tree, index_to_key })
}

/// Inserts a block into the tree and returns the new block number.
pub fn insert_block(&mut self, block: CommitBlockInfoV1) -> U256 {
let new_l2_block_number = block.block_number;
// INITIAL CALLDATA.
let mut key_value_pairs: Vec<(U256, H256)> =
Vec::with_capacity(block.initial_storage_changes.len());
for (key, value) in &block.initial_storage_changes {
let key = U256::from_little_endian(key);
let value = H256::from(value);

key_value_pairs.push((key, value));
self.index_to_key.push(key);
}

// REPEATED CALLDATA.
for (index, value) in &block.repeated_storage_changes {
let index = *index as usize;
// Index is 1-based so we subtract 1.
let key = *self.index_to_key.get(index - 1).unwrap();
let value = H256::from(value);

key_value_pairs.push((key, value));
}

let output = self.tree.extend(key_value_pairs);
let root_hash = output.root_hash;

assert_eq!(root_hash.as_bytes(), block.new_state_root);
println!(
"Root hash of block {} = {}",
new_l2_block_number,
hex::encode(root_hash)
);

U256::from(new_l2_block_number)
}
}

/// Attempts to reconstruct the genesis state from a CSV file.
fn reconstruct_genesis_state<D: Database>(
tree: &mut MerkleTree<D>,
path: &str,
) -> Result<Vec<U256>> {
fn cleanup_encoding(input: &'_ str) -> &'_ str {
input
.strip_prefix("E'\\\\x")
.unwrap()
.strip_suffix('\'')
.unwrap()
}

let mut block_batched_accesses = vec![];

let input = fs::read_to_string(path)?;
for line in input.lines() {
let mut separated = line.split(',');
let _derived_key = separated.next().unwrap();
let address = separated.next().unwrap();
let key = separated.next().unwrap();
let value = separated.next().unwrap();
let op_number: u32 = separated.next().unwrap().parse()?;
let _ = separated.next().unwrap();
let miniblock_number: u32 = separated.next().unwrap().parse()?;

if miniblock_number != 0 {
break;
}

let address = Address::from_str(cleanup_encoding(address))?;
let key = U256::from_str_radix(cleanup_encoding(key), 16)?;
let value = U256::from_str_radix(cleanup_encoding(value), 16)?;

let record = (address, key, value, op_number);
block_batched_accesses.push(record);
}

// Sort in block block.
block_batched_accesses.sort_by(|a, b| match a.0.cmp(&b.0) {
std::cmp::Ordering::Equal => match a.1.cmp(&b.1) {
std::cmp::Ordering::Equal => match a.3.cmp(&b.3) {
std::cmp::Ordering::Equal => {
panic!("must be unique")
}
a => a,
},
a => a,
},
a => a,
});

let mut key_set = std::collections::HashSet::new();

// Batch.
for el in &block_batched_accesses {
let derived_key = LogQuery::derive_final_address_for_params(&el.0, &el.1);
key_set.insert(derived_key);
}

let mut batched = vec![];
let mut it = block_batched_accesses.into_iter();
let mut previous = it.next().unwrap();
for el in it {
if el.0 != previous.0 || el.1 != previous.1 {
batched.push((previous.0, previous.1, previous.2));
}

previous = el;
}

// Finalize.
batched.push((previous.0, previous.1, previous.2));

println!("Have {} unique keys in the tree", key_set.len());

let mut index_to_key = Vec::with_capacity(batched.len());
let mut key_value_pairs: Vec<(U256, H256)> = Vec::with_capacity(batched.len());
for (address, key, value) in batched {
let derived_key = LogQuery::derive_final_address_for_params(&address, &key);
// TODO: what to do here?
// let version = tree.latest_version().unwrap_or_default();
// let _leaf = tree.read_leaves(version, &[key]);

// let existing_value = U256::from_big_endian(existing_leaf.leaf.value());
// if existing_value == value {
// // we downgrade to read
// // println!("Downgrading to read")
// } else {
// we write
let mut tmp = [0u8; 32];
value.to_big_endian(&mut tmp);
tuommaki marked this conversation as resolved.
Show resolved Hide resolved

let key = U256::from_little_endian(&derived_key);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do these endianness conversions work correctly on all platforms? i.e. is derived_key little-endian even on big-endian platform?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would assume so, though I can't guarantee. Do you have an idea on how to assert this?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe checking the LogQuery implementation is one way? I'm not sure if this is a big concern right now, since there are very little big endian platforms alive anymore, and probably none of them will run this code, but I got curious since there's both endianness conversion / assumption in our code base. 🙂

let value = H256::from(tmp);
key_value_pairs.push((key, value));
index_to_key.push(key);
}

let output = tree.extend(key_value_pairs);
dbg!(tree.latest_version());
println!("Initial state root = {}", hex::encode(output.root_hash));

Ok(index_to_key)
}