Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support importing legacy snapshots #114

Merged
merged 3 commits into from
Aug 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,072 changes: 1,006 additions & 66 deletions Cargo.lock

Large diffs are not rendered by default.

7 changes: 6 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[workspace]
members = ["state-reconstruct-fetcher", "state-reconstruct-storage"]
members = [
"state-reconstruct-fetcher",
"state-reconstruct-storage",
"state-reconstruct-utils",
]

[dependencies]
async-trait = "0.1.74"
Expand All @@ -24,6 +28,7 @@ serde = { version = "1.0.189", features = ["derive"] }
serde_json = { version = "1.0.107", features = ["std"] }
state-reconstruct-fetcher = { path = "./state-reconstruct-fetcher" }
state-reconstruct-storage = { path = "./state-reconstruct-storage" }
state-reconstruct-utils = { path = "./state-reconstruct-utils" }
thiserror = "1.0.50"
tikv-jemallocator = "0.5"
tokio = { version = "1.33.0", features = ["macros"] }
Expand Down
13 changes: 5 additions & 8 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

mod cli;
mod processor;
mod util;

use std::{
env,
Expand All @@ -23,17 +22,15 @@ use state_reconstruct_fetcher::{
l1_fetcher::{L1Fetcher, L1FetcherOptions},
types::CommitBlock,
};
use state_reconstruct_utils::json;
use tikv_jemallocator::Jemalloc;
use tokio::sync::mpsc;
use tracing_subscriber::{filter::LevelFilter, EnvFilter};

use crate::{
processor::{
json::JsonSerializationProcessor,
tree::{query_tree::QueryTree, TreeProcessor},
Processor,
},
util::json,
use crate::processor::{
json::JsonSerializationProcessor,
tree::{query_tree::QueryTree, TreeProcessor},
Processor,
};

#[global_allocator]
Expand Down
9 changes: 7 additions & 2 deletions src/processor/snapshot/importer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@ use ethers::types::U64;
use eyre::Result;
use regex::{Captures, Regex};
use state_reconstruct_storage::types::{
Proto, SnapshotFactoryDependencies, SnapshotHeader, SnapshotStorageLogsChunk,
LegacyProto, Proto, SnapshotFactoryDependencies, SnapshotHeader, SnapshotStorageLogsChunk,
SnapshotStorageLogsChunkMetadata,
SnapshotVersion::{Version0, Version1},
};
use tokio::sync::mpsc::{self, Sender};

Expand Down Expand Up @@ -87,7 +88,11 @@ impl SnapshotImporter {
let total_chunks = filepaths.len();
for (i, path) in filepaths.into_iter().enumerate() {
let bytes = fs::read(path)?;
let storage_logs_chunk = SnapshotStorageLogsChunk::decode(&bytes)?;

let storage_logs_chunk = match header.version {
Version0 => SnapshotStorageLogsChunk::decode_legacy(&bytes)?,
Version1 => SnapshotStorageLogsChunk::decode(&bytes)?,
};
tracing::info!("Read chunk {}/{}, processing...", i + 1, total_chunks);
tx.send(storage_logs_chunk).await?;
}
Expand Down
16 changes: 2 additions & 14 deletions src/processor/snapshot/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ pub mod exporter;
pub mod importer;

use async_trait::async_trait;
use blake2::{Blake2s256, Digest};
use ethers::types::{Address, H256, U256, U64};
use eyre::Result;
use state_reconstruct_fetcher::{
Expand All @@ -17,13 +16,13 @@ use state_reconstruct_storage::{
bytecode,
types::{SnapshotFactoryDependency, SnapshotStorageLog},
};
use state_reconstruct_utils::{derive_final_address_for_params, h256_to_u256, unpack_block_info};
use tokio::sync::mpsc;

use super::Processor;
use crate::util::{h256_to_u256, unpack_block_info};

pub const DEFAULT_DB_PATH: &str = "snapshot_db";
pub const SNAPSHOT_HEADER_FILE_NAME: &str = "snapshot-header.json";
pub const SNAPSHOT_HEADER_FILE_NAME: &str = "snapshot_header.json";
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why? I bet I'll confuse the name when following some old doc, sooner or later...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Every other snapshot file name uses _ over - as a separator. I don't think we should deviate from that

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK...

pub const SNAPSHOT_FACTORY_DEPS_FILE_NAME_SUFFIX: &str = "factory_deps.proto.gzip";

pub struct SnapshotBuilder {
Expand Down Expand Up @@ -235,17 +234,6 @@ fn reconstruct_genesis_state(database: &mut SnapshotDatabase, path: &str) -> Res
Ok(())
}

fn derive_final_address_for_params(address: &Address, key: &U256) -> [u8; 32] {
let mut buffer = [0u8; 64];
buffer[12..32].copy_from_slice(&address.0);
key.to_big_endian(&mut buffer[32..64]);

let mut result = [0u8; 32];
result.copy_from_slice(Blake2s256::digest(buffer).as_slice());

result
}

#[cfg(test)]
mod tests {
use std::fs;
Expand Down
13 changes: 1 addition & 12 deletions src/processor/tree/tree_wrapper.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use std::{collections::HashMap, fs, num::NonZeroU32, path::Path, str::FromStr, sync::Arc};

use blake2::{Blake2s256, Digest};
use ethers::types::{Address, H256, U256, U64};
use eyre::Result;
use state_reconstruct_fetcher::{
Expand All @@ -10,6 +9,7 @@ use state_reconstruct_fetcher::{
use state_reconstruct_storage::{
reconstruction::ReconstructionDatabase, types::SnapshotStorageLogsChunk, PackingType,
};
use state_reconstruct_utils::derive_final_address_for_params;
use thiserror::Error;
use tokio::sync::{
mpsc::{self, Receiver},
Expand Down Expand Up @@ -326,14 +326,3 @@ fn reconstruct_genesis_state<D: Database>(

Ok(())
}

fn derive_final_address_for_params(address: &Address, key: &U256) -> [u8; 32] {
let mut buffer = [0u8; 64];
buffer[12..32].copy_from_slice(&address.0);
key.to_big_endian(&mut buffer[32..64]);

let mut result = [0u8; 32];
result.copy_from_slice(Blake2s256::digest(buffer).as_slice());

result
}
1 change: 1 addition & 0 deletions state-reconstruct-storage/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ prost = "0.12.4"
rocksdb = "0.21.0"
thiserror = "1.0.50"
zkevm_opcode_defs = { git = "https://github.com/matter-labs/era-zkevm_opcode_defs.git" }
state-reconstruct-utils = { path = "../state-reconstruct-utils" }

[build-dependencies]
prost-build = "0.12.4"
61 changes: 59 additions & 2 deletions state-reconstruct-storage/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@ use std::{
};

use bytes::BytesMut;
use ethers::types::{H256, U256, U64};
use ethers::types::{Address, H256, U256, U64};
use eyre::Result;
use flate2::{read::GzDecoder, write::GzEncoder, Compression};
use prost::Message;
use serde::{Deserialize, Serialize};
use serde_repr::{Deserialize_repr, Serialize_repr};
use state_reconstruct_utils::derive_final_address_for_params;

use super::bytecode;

Expand Down Expand Up @@ -73,6 +74,27 @@ pub trait Proto {
}
}

pub trait LegacyProto {
type ProtoStruct: Message + Default;

fn from_legacy_proto(proto: Self::ProtoStruct) -> Result<Self>
where
Self: Sized;

/// Decode a slice of gzip-compressed bytes into [`Self`].
fn decode_legacy(bytes: &[u8]) -> Result<Self>
where
Self: Sized,
{
let mut decoder = GzDecoder::new(bytes);
let mut decompressed_bytes = Vec::new();
decoder.read_to_end(&mut decompressed_bytes)?;

let proto = Self::ProtoStruct::decode(&decompressed_bytes[..])?;
Self::from_legacy_proto(proto)
}
}

/// Version of snapshot influencing the format of data stored in GCS.
#[derive(Clone, Default, Debug, Serialize_repr, Deserialize_repr)]
#[repr(u16)]
Expand Down Expand Up @@ -140,6 +162,20 @@ impl Proto for SnapshotStorageLogsChunk {
}
}

impl LegacyProto for SnapshotStorageLogsChunk {
type ProtoStruct = protobuf::SnapshotStorageLogsChunk;

fn from_legacy_proto(proto: Self::ProtoStruct) -> Result<Self> {
Ok(Self {
storage_logs: proto
.storage_logs
.into_iter()
.map(SnapshotStorageLog::from_legacy_proto)
.collect::<Result<Vec<_>>>()?,
})
}
}

// "most recent" for each key together with info when the key was first used
#[derive(Default, Debug, Serialize, Deserialize)]
pub struct SnapshotStorageLog {
Expand Down Expand Up @@ -169,7 +205,28 @@ impl Proto for SnapshotStorageLog {
fn from_proto(proto: Self::ProtoStruct) -> Result<Self> {
let value_bytes: [u8; 32] = proto.storage_value().try_into()?;
Ok(Self {
key: U256::from_big_endian(proto.hashed_key()),
key: StorageKey::from_big_endian(proto.hashed_key()),
value: StorageValue::from(&value_bytes),
l1_batch_number_of_initial_write: proto.l1_batch_number_of_initial_write().into(),
enumeration_index: proto.enumeration_index(),
})
}
}

impl LegacyProto for SnapshotStorageLog {
type ProtoStruct = protobuf::SnapshotStorageLog;

fn from_legacy_proto(proto: Self::ProtoStruct) -> Result<Self> {
let address_bytes: [u8; 20] = proto.account_address().try_into()?;
let address = Address::from(address_bytes);
let storage_key = StorageKey::from_big_endian(proto.storage_key());
let hashed_key = StorageKey::from_little_endian(&derive_final_address_for_params(
&address,
&storage_key,
));
let value_bytes: [u8; 32] = proto.storage_value().try_into()?;
Ok(Self {
key: hashed_key,
value: StorageValue::from(&value_bytes),
l1_batch_number_of_initial_write: proto.l1_batch_number_of_initial_write().into(),
enumeration_index: proto.enumeration_index(),
Expand Down
16 changes: 16 additions & 0 deletions state-reconstruct-utils/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[package]
name = "state-reconstruct-utils"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
blake2 = "0.10.6"
ethers = "2.0.14"
primitive-types = "0.12.2"
serde = "1.0.204"
serde_json = "1.0.122"
zksync_storage = { git = "https://github.com/matter-labs/zksync-era.git" }

[build-dependencies]
File renamed without changes.
13 changes: 13 additions & 0 deletions src/util/mod.rs → state-reconstruct-utils/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use blake2::{Blake2s256, Digest};
use ethers::types::Address;
use primitive_types::{H256, U256};

pub mod json;
Expand All @@ -14,3 +16,14 @@ pub fn unpack_block_info(info: U256) -> (u64, u64) {
let block_timestamp = (info % SYSTEM_BLOCK_INFO_BLOCK_NUMBER_MULTIPLIER).as_u64();
(block_number, block_timestamp)
}

pub fn derive_final_address_for_params(address: &Address, key: &U256) -> [u8; 32] {
let mut buffer = [0u8; 64];
buffer[12..32].copy_from_slice(&address.0);
key.to_big_endian(&mut buffer[32..64]);

let mut result = [0u8; 32];
result.copy_from_slice(Blake2s256::digest(buffer).as_slice());

result
}