diff --git a/Cargo.lock b/Cargo.lock index d169b91ea..ac94b86c4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,17 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "ahash" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + [[package]] name = "ahash" version = "0.8.7" @@ -506,6 +517,30 @@ dependencies = [ "serde_with 1.14.0", ] +[[package]] +name = "borsh" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f58b559fd6448c6e2fd0adb5720cd98a2506594cafa4737ff98c396f3e82f667" +dependencies = [ + "borsh-derive", + "cfg_aliases", +] + +[[package]] +name = "borsh-derive" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7aadb5b6ccbd078890f6d7003694e33816e6b784358f18e15e7e6d9f065a57cd" +dependencies = [ + "once_cell", + "proc-macro-crate 3.1.0", + "proc-macro2", + "quote", + "syn 2.0.48", + "syn_derive", +] + [[package]] name = "bumpalo" version = "3.14.0" @@ -518,6 +553,39 @@ version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3ac9f8b63eca6fd385229b3675f6cc0dc5c8a5c8a54a59d4f52ffd670d87b0c" +[[package]] +name = "byte-unit" +version = "5.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ac19bdf0b2665407c39d82dbc937e951e7e2001609f0fb32edd0af45a2d63e" +dependencies = [ + "rust_decimal", + "serde", + "utf8-width", +] + +[[package]] +name = "bytecheck" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2" +dependencies = [ + "bytecheck_derive", + "ptr_meta", + "simdutf8", +] + +[[package]] +name = "bytecheck_derive" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "byteorder" version = "1.5.0" @@ -562,6 +630,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cfg_aliases" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" + [[package]] name = "chrono" version = "0.4.33" @@ -1574,6 +1648,9 @@ name = "hashbrown" version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash 0.7.8", +] [[package]] name = "hashbrown" @@ -1581,7 +1658,7 @@ version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ff8ae62cd3a9102e5637afc8452c55acf3844001bd5374e0b0bd7b6616c038" dependencies = [ - "ahash", + "ahash 0.8.7", ] [[package]] @@ -1590,7 +1667,7 @@ version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" dependencies = [ - "ahash", + "ahash 0.8.7", "allocator-api2", ] @@ -2177,7 +2254,7 @@ version = "0.21.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fde3af1a009ed76a778cb84fdef9e7dbbdf5775ae3e4cc1f434a6a307f6f76c5" dependencies = [ - "ahash", + "ahash 0.8.7", "metrics-macros", "portable-atomic", ] @@ -2812,6 +2889,29 @@ dependencies = [ "toml_edit 0.21.1", ] +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + [[package]] name = "proc-macro-hack" version = "0.5.20+deprecated" @@ -2847,6 +2947,26 @@ dependencies = [ "unarray", ] +[[package]] +name = "ptr_meta" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" +dependencies = [ + "ptr_meta_derive", +] + +[[package]] +name = "ptr_meta_derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "quanta" version = "0.11.1" @@ -2994,6 +3114,15 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" +[[package]] +name = "rend" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c" +dependencies = [ + "bytecheck", +] + [[package]] name = "reqwest" version = "0.11.24" @@ -3130,6 +3259,35 @@ dependencies = [ "digest 0.10.7", ] +[[package]] +name = "rkyv" +version = "0.7.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cba464629b3394fc4dbc6f940ff8f5b4ff5c7aef40f29166fd4ad12acbc99c0" +dependencies = [ + "bitvec", + "bytecheck", + "bytes", + "hashbrown 0.12.3", + "ptr_meta", + "rend", + "rkyv_derive", + "seahash", + "tinyvec", + "uuid", +] + +[[package]] +name = "rkyv_derive" +version = "0.7.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7dddfff8de25e6f62b9d64e6e432bf1c6736c57d20323e15ee10435fbda7c65" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "rlp" version = "0.5.2" @@ -3208,6 +3366,22 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e666a5496a0b2186dbcd0ff6106e29e093c15591bde62c20d3842007c6978a09" +[[package]] +name = "rust_decimal" +version = "1.34.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39449a79f45e8da28c57c341891b69a183044b29518bb8f86dbac9df60bb7df" +dependencies = [ + "arrayvec", + "borsh", + "bytes", + "num-traits", + "rand", + "rkyv", + "serde", + "serde_json", +] + [[package]] name = "rustc-demangle" version = "0.1.23" @@ -3427,6 +3601,12 @@ dependencies = [ "untrusted", ] +[[package]] +name = "seahash" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" + [[package]] name = "sec1" version = "0.7.3" @@ -3706,6 +3886,12 @@ dependencies = [ "rand_core", ] +[[package]] +name = "simdutf8" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" + [[package]] name = "siphasher" version = "0.3.11" @@ -3830,7 +4016,7 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d84b0a3c3739e220d94b3239fd69fb1f74bc36e16643423bd99de3b43c21bfbd" dependencies = [ - "ahash", + "ahash 0.8.7", "atoi", "bigdecimal", "byteorder", @@ -4029,6 +4215,7 @@ dependencies = [ "anyhow", "async-trait", "binary_macros", + "byte-unit", "chrono", "clap", "const-hex", @@ -4169,6 +4356,18 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "syn_derive" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1329189c02ff984e9736652b1631330da25eaa6bc639089ed4915d25446cbe7b" +dependencies = [ + "proc-macro-error", + "proc-macro2", + "quote", + "syn 2.0.48", +] + [[package]] name = "sync_wrapper" version = "0.1.2" @@ -4669,12 +4868,24 @@ version = "2.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" +[[package]] +name = "utf8-width" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86bd8d4e895da8537e5315b8254664e6b769c4ff3db18321b297a1e7004392e3" + [[package]] name = "utf8parse" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "uuid" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a" + [[package]] name = "valuable" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 74a901bfb..0e11dd412 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ default-run = "stratus" # stdx anyhow = "1.0.79" async-trait = "0.1.77" +byte-unit = "5.1.4" chrono = "0.4.31" clap = { version = "4.4.18", features = ["derive", "env"] } const_format = "0.2.32" diff --git a/src/eth/storage/csv/csv_exporter.rs b/src/eth/storage/csv/csv_exporter.rs index 68f50272a..62a9ae85a 100644 --- a/src/eth/storage/csv/csv_exporter.rs +++ b/src/eth/storage/csv/csv_exporter.rs @@ -3,6 +3,8 @@ use std::fs::File; use anyhow::Context; use anyhow::Ok; +use byte_unit::Byte; +use byte_unit::Unit; use itertools::Itertools; use crate::eth::primitives::Account; @@ -14,7 +16,7 @@ use crate::eth::primitives::TransactionMined; // Constants // ----------------------------------------------------------------------------- -const ACCOUNT_FILE: &str = "data/accounts"; +const ACCOUNTS_FILE: &str = "data/accounts"; const ACCOUNTS_HEADERS: [&str; 10] = [ "id", @@ -64,10 +66,10 @@ pub struct CsvExporter { staged_blocks: Vec, accounts_csv: csv::Writer, - accounts_id: usize, + accounts_id: LastId, transactions_csv: csv::Writer, - transactions_id: usize, + transactions_id: LastId, } impl CsvExporter { @@ -77,16 +79,14 @@ impl CsvExporter { staged_blocks: Vec::new(), staged_accounts: Vec::new(), - accounts_csv: csv_writer(ACCOUNT_FILE, BlockNumber::ZERO, &ACCOUNTS_HEADERS)?, - accounts_id: 0, + accounts_csv: csv_writer(ACCOUNTS_FILE, BlockNumber::ZERO, &ACCOUNTS_HEADERS)?, + accounts_id: LastId::new_zero(ACCOUNTS_FILE), transactions_csv: csv_writer(TRANSACTIONS_FILE, number, &TRANSACTIONS_HEADERS)?, - transactions_id: read_csv_last_id(TRANSACTIONS_FILE)?, + transactions_id: LastId::new(TRANSACTIONS_FILE)?, }) } -} -impl CsvExporter { // ------------------------------------------------------------------------- // Stagers // ------------------------------------------------------------------------- @@ -111,19 +111,24 @@ impl CsvExporter { let accounts = self.staged_accounts.drain(..).collect_vec(); self.export_accounts(accounts)?; - // export blocks + // export block parts let blocks = self.staged_blocks.drain(..).collect_vec(); for block in blocks { self.export_transactions(block.transactions)?; } + + // flush + self.transactions_csv.flush()?; + self.transactions_id.save()?; + Ok(()) } fn export_accounts(&mut self, accounts: Vec) -> anyhow::Result<()> { for account in accounts { - self.accounts_id += 1; + self.accounts_id.value += 1; let row = [ - self.accounts_id.to_string(), // id + self.accounts_id.value.to_string(), // id account.address.to_string(), // address account.bytecode.map(|x| x.to_string()).unwrap_or_default(), // bytecode account.balance.to_string(), // latest_balance @@ -142,9 +147,9 @@ impl CsvExporter { fn export_transactions(&mut self, transactions: Vec) -> anyhow::Result<()> { for tx in transactions { - self.transactions_id += 1; + self.transactions_id.value += 1; let row = [ - self.transactions_id.to_string(), // id + self.transactions_id.value.to_string(), // id tx.input.hash.to_string(), // hash tx.input.from.to_string(), // signer_address tx.input.nonce.to_string(), // nonce @@ -167,8 +172,6 @@ impl CsvExporter { ]; self.transactions_csv.write_record(row).context("failed to write csv transaction")?; } - self.transactions_csv.flush()?; - write_csv_last_id(TRANSACTIONS_FILE, self.transactions_id)?; Ok(()) } } @@ -177,13 +180,47 @@ impl CsvExporter { // Helpers // ----------------------------------------------------------------------------- +struct LastId { + file: String, + value: usize, +} + +impl LastId { + /// Creates a new instance with default zero value. + fn new_zero(base_path: &'static str) -> Self { + let file = format!("{}-last-id.txt", base_path); + Self { file, value: 0 } + } + + /// Creates a new instance from an existing saved file or assumes default value when file does not exist. + fn new(base_path: &'static str) -> anyhow::Result { + let mut id = Self::new_zero(base_path); + + // when file exist, read value from file + if fs::metadata(&id.file).is_ok() { + let content = fs::read_to_string(&id.file).context("failed to read last_id file")?; + id.value = content.parse().context("failed to parse last_id file content")?; + } + + Ok(id) + } + + /// Saves the current value to file. + fn save(&self) -> anyhow::Result<()> { + fs::write(&self.file, self.value.to_string()).context("failed to write last_id file")?; + Ok(()) + } +} + /// Creates a new CSV writer at the specified path. If the file exists, it will overwrite it. fn csv_writer(base_path: &'static str, number: BlockNumber, headers: &[&'static str]) -> anyhow::Result> { let path = format!("{}-{}.csv", base_path, number); + let buffer_size = Byte::from_u64_with_unit(4, Unit::MiB).unwrap(); let mut writer = csv::WriterBuilder::new() .has_headers(true) .delimiter(b'\t') .quote_style(csv::QuoteStyle::Always) + .buffer_capacity(buffer_size.as_u64() as usize) .from_path(path) .context("failed to create csv writer")?; @@ -192,27 +229,6 @@ fn csv_writer(base_path: &'static str, number: BlockNumber, headers: &[&'static Ok(writer) } -/// Reads the last id saved to a CSV file. -fn read_csv_last_id(base_path: &'static str) -> anyhow::Result { - let file = format!("{}-last-id.txt", base_path); - - // when file does not exist, assume 0 - if fs::metadata(file.clone()).is_err() { - return Ok(0); - } - - // when file exists, read the last id from file - let content = fs::read_to_string(file).context("failed to read last_id file")?; - let id = content.parse().context("failed to parse last_id file content")?; - Ok(id) -} - -fn write_csv_last_id(base_path: &'static str, id: usize) -> anyhow::Result<()> { - let file = format!("{}-last-id.txt", base_path); - fs::write(file, id.to_string()).context("failed to write last_id file")?; - Ok(()) -} - /// Returns the current date formatted for the CSV file. fn now() -> String { let now = chrono::Utc::now();