From 4fc58a0812425064c3b288c660d6cf32239f85bc Mon Sep 17 00:00:00 2001 From: carneiro-cw <156914855+carneiro-cw@users.noreply.github.com> Date: Tue, 17 Dec 2024 01:25:27 -0300 Subject: [PATCH] enha: rework rocksdb configuration (#1871) * enha: rework rocksdb configuration --- Cargo.lock | 5 + Cargo.toml | 4 +- src/eth/storage/permanent/rocks/mod.rs | 4 +- .../storage/permanent/rocks/rocks_config.rs | 153 ++++-------------- src/eth/storage/permanent/rocks/rocks_db.rs | 2 +- .../permanent/rocks/rocks_permanent.rs | 6 - .../storage/permanent/rocks/rocks_state.rs | 29 +--- 7 files changed, 50 insertions(+), 153 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 34b9005a5..287576cac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -526,6 +526,7 @@ checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" dependencies = [ "funty", "radium", + "serde", "tap", "wyz", ] @@ -1744,6 +1745,7 @@ checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" dependencies = [ "ahash 0.8.11", "allocator-api2", + "serde", ] [[package]] @@ -1785,6 +1787,9 @@ name = "hex" version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +dependencies = [ + "serde", +] [[package]] name = "hex-literal" diff --git a/Cargo.toml b/Cargo.toml index efb72fa0c..293cb1bb2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -132,10 +132,10 @@ indicatif = "=0.17.8" # ------------------------------------------------------------------------------ [target.'cfg(not(all(target_arch = "aarch64", target_os = "linux")))'.dependencies] -revm = { version = "=9.0.0", features = ["asm-keccak"] } +revm = { version = "=9.0.0", features = ["asm-keccak", "serde"] } [target.'cfg(all(target_arch = "aarch64", target_os = "linux"))'.dependencies] -revm = { version = "=9.0.0" } +revm = { version = "=9.0.0", features = ["serde"]} [target.'cfg(not(target_env = "msvc"))'.dependencies] tikv-jemallocator = { version = "=0.6", optional = true } diff --git a/src/eth/storage/permanent/rocks/mod.rs b/src/eth/storage/permanent/rocks/mod.rs index e868b02dc..214324b0d 100644 --- a/src/eth/storage/permanent/rocks/mod.rs +++ b/src/eth/storage/permanent/rocks/mod.rs @@ -7,7 +7,7 @@ pub use rocks_state::RocksStorageState; mod rocks_permanent; /// State handler for DB and column families. -mod rocks_state; +pub mod rocks_state; /// CFs versionated by value variant. mod cf_versions; @@ -19,7 +19,7 @@ mod rocks_cf; mod rocks_config; /// Functionalities related to the whole database. -mod rocks_db; +pub mod rocks_db; /// All types to be serialized and desserialized in the db. pub mod types; diff --git a/src/eth/storage/permanent/rocks/rocks_config.rs b/src/eth/storage/permanent/rocks/rocks_config.rs index 25a6d5218..4916215ef 100644 --- a/src/eth/storage/permanent/rocks/rocks_config.rs +++ b/src/eth/storage/permanent/rocks/rocks_config.rs @@ -2,14 +2,6 @@ use rocksdb::BlockBasedOptions; use rocksdb::Cache; use rocksdb::Options; -const GIGABYTE: usize = 1024 * 1024 * 1024; -const MEGABYTE: usize = 1024 * 1024; -const KILOBYTE: usize = 1024; - -const GIGABYTE_U64: u64 = 1024 * 1024 * 1024; -const MEGABYTE_U64: u64 = 1024 * 1024; - -#[derive(Debug, Clone, Copy)] pub enum CacheSetting { /// Enabled cache with the given size in bytes Enabled(usize), @@ -18,8 +10,7 @@ pub enum CacheSetting { #[derive(Debug, Clone, Copy)] pub enum DbConfig { - LargeSSTFiles, - FastWriteSST, + OptimizedPointLookUp, Default, } @@ -30,7 +21,7 @@ impl Default for DbConfig { } impl DbConfig { - pub fn to_options(self, cache_setting: CacheSetting) -> Options { + pub fn to_options(self, cache_setting: CacheSetting, prefix_len: Option) -> Options { let mut opts = Options::default(); let mut block_based_options = BlockBasedOptions::default(); @@ -38,131 +29,51 @@ impl DbConfig { opts.create_missing_column_families(true); opts.increase_parallelism(16); - // NOTE: As per the rocks db wiki: "The overhead of statistics is usually small but non-negligible. We usually observe an overhead of 5%-10%." + block_based_options.set_pin_l0_filter_and_index_blocks_in_cache(true); + block_based_options.set_cache_index_and_filter_blocks(true); + block_based_options.set_bloom_filter(15.5, false); + + // due to the nature of our application enabling rocks metrics decreases point lookup performance by 5x. #[cfg(feature = "metrics")] { opts.enable_statistics(); opts.set_statistics_level(rocksdb::statistics::StatsLevel::ExceptTimeForMutex); } - match self { - DbConfig::LargeSSTFiles => { - // Set the compaction style to Level Compaction - opts.set_compaction_style(rocksdb::DBCompactionStyle::Level); - - // Configure the size of SST files at each level - opts.set_target_file_size_base(512 * MEGABYTE_U64); - - // Increase the file size multiplier to expand file size at upper levels - opts.set_target_file_size_multiplier(2); // Each level grows in file size quicker - - // Reduce the number of L0 files that trigger compaction, increasing frequency - opts.set_level_zero_file_num_compaction_trigger(2); - - // Reduce thresholds for slowing and stopping writes, which forces more frequent compaction - opts.set_level_zero_slowdown_writes_trigger(10); - opts.set_level_zero_stop_writes_trigger(20); - - // Increase the max bytes for L1 to allow more data before triggering compaction - opts.set_max_bytes_for_level_base(2 * GIGABYTE_U64); - - // Increase the level multiplier to aggressively increase space at each level - opts.set_max_bytes_for_level_multiplier(8.0); // Exponential growth of levels is more pronounced - - // Configure block size to optimize for larger blocks, improving sequential read performance - block_based_options.set_block_size(128 * KILOBYTE); - - // Increase the number of write buffers to delay flushing, optimizing CPU usage for compaction - opts.set_max_write_buffer_number(5); - opts.set_write_buffer_size(128 * MEGABYTE); // 128MB per write buffer - - // Keep a higher number of open files to accommodate more files being produced by aggressive compaction - opts.set_max_open_files(20_000); - - // Apply more aggressive compression settings, if I/O and CPU permit - opts.set_compression_per_level(&[ - rocksdb::DBCompressionType::Lz4, - rocksdb::DBCompressionType::Zstd, // Use Zstd for higher compression from L1 onwards - ]); - } - DbConfig::FastWriteSST => { - // Continue using Level Compaction due to its effective use of I/O and CPU for writes - opts.set_compaction_style(rocksdb::DBCompactionStyle::Level); - - // Increase initial SST file sizes to reduce the frequency of writes to disk - opts.set_target_file_size_base(512 * MEGABYTE_U64); // Starting at 512MB for L1 - - // Minimize the file size multiplier to control the growth of file sizes at upper levels - opts.set_target_file_size_multiplier(1); // Minimal increase in file size at upper levels - - // Increase triggers for write slowdown and stop to maximize buffer before I/O actions - opts.set_level_zero_file_num_compaction_trigger(100); // Slow down writes at 100 L0 files - opts.set_level_zero_stop_writes_trigger(200); // Stop writes at 200 L0 files - - // Expand the maximum bytes for base level to further delay the need for compaction-related I/O - opts.set_max_bytes_for_level_base(2048 * MEGABYTE_U64); - - // Use a higher level multiplier to increase space exponentially at higher levels - opts.set_max_bytes_for_level_multiplier(10.0); - - // Opt for larger block sizes to decrease the number of read and write operations to disk - block_based_options.set_block_size(512 * KILOBYTE); // 512KB blocks + if let Some(prefix_len) = prefix_len { + let transform = rocksdb::SliceTransform::create_fixed_prefix(prefix_len); + block_based_options.set_index_type(rocksdb::BlockBasedIndexType::HashSearch); + opts.set_memtable_prefix_bloom_ratio(0.15); + opts.set_prefix_extractor(transform); + } - // Maximize the use of write buffers to extend the time data stays in memory before flushing - opts.set_max_write_buffer_number(16); - opts.set_write_buffer_size(GIGABYTE); // 1GB per write buffer + if let CacheSetting::Enabled(cache_size) = cache_setting { + let block_cache = Cache::new_lru_cache(cache_size / 2); + let row_cache = Cache::new_lru_cache(cache_size / 2); - // Allow a very high number of open files to minimize the overhead of opening and closing files - opts.set_max_open_files(20_000); + opts.set_row_cache(&row_cache); + block_based_options.set_block_cache(&block_cache); + } - // Choose compression that balances CPU use and effective storage reduction - opts.set_compression_per_level(&[rocksdb::DBCompressionType::Lz4, rocksdb::DBCompressionType::Zstd]); + match self { + DbConfig::OptimizedPointLookUp => { + block_based_options.set_data_block_hash_ratio(0.3); + block_based_options.set_data_block_index_type(rocksdb::DataBlockIndexType::BinaryAndHash); - // Enable settings that make full use of CPU to handle more data in memory and process compaction - opts.set_allow_concurrent_memtable_write(true); - opts.set_enable_write_thread_adaptive_yield(true); + opts.set_use_direct_reads(true); + opts.set_memtable_whole_key_filtering(true); + opts.set_compression_type(rocksdb::DBCompressionType::None); } DbConfig::Default => { - block_based_options.set_ribbon_filter(15.5); // https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter - - opts.set_allow_concurrent_memtable_write(true); - opts.set_enable_write_thread_adaptive_yield(true); - - let transform = rocksdb::SliceTransform::create_fixed_prefix(10); - opts.set_prefix_extractor(transform); - opts.set_memtable_prefix_bloom_ratio(0.2); - - // Enable a size-tiered compaction style, which is good for workloads with a high rate of updates and overwrites - opts.set_compaction_style(rocksdb::DBCompactionStyle::Universal); - - let mut universal_compact_options = rocksdb::UniversalCompactOptions::default(); - universal_compact_options.set_size_ratio(10); - universal_compact_options.set_min_merge_width(2); - universal_compact_options.set_max_merge_width(6); - universal_compact_options.set_max_size_amplification_percent(50); - universal_compact_options.set_compression_size_percent(-1); - universal_compact_options.set_stop_style(rocksdb::UniversalCompactionStopStyle::Total); - opts.set_universal_compaction_options(&universal_compact_options); - - let pt_opts = rocksdb::PlainTableFactoryOptions { - user_key_length: 0, - bloom_bits_per_key: 10, - hash_table_ratio: 0.75, - index_sparseness: 8, - encoding_type: rocksdb::KeyEncodingType::Plain, // Default encoding - full_scan_mode: false, // Optimized for point lookups rather than full scans - huge_page_tlb_size: 0, // Not using huge pages - store_index_in_file: false, // Store index in memory for faster access - }; - opts.set_plain_table_factory(&pt_opts); + opts.set_compression_type(rocksdb::DBCompressionType::Lz4); + opts.set_bottommost_compression_type(rocksdb::DBCompressionType::Zstd); + opts.set_bottommost_compression_options(-14, 32767, 0, 16 * 1024, true); // mostly defaults except max_dict_bytes + opts.set_bottommost_zstd_max_train_bytes(1600 * 1024, true); } } - if let CacheSetting::Enabled(cache_size) = cache_setting { - let cache = Cache::new_lru_cache(cache_size); - block_based_options.set_block_cache(&cache); - block_based_options.set_cache_index_and_filter_blocks(true); - } + opts.set_block_based_table_factory(&block_based_options); + opts } } diff --git a/src/eth/storage/permanent/rocks/rocks_db.rs b/src/eth/storage/permanent/rocks/rocks_db.rs index cf9f10c4a..0a843058a 100644 --- a/src/eth/storage/permanent/rocks/rocks_db.rs +++ b/src/eth/storage/permanent/rocks/rocks_db.rs @@ -25,7 +25,7 @@ pub fn create_or_open_db(path: impl AsRef, cf_configs: &HashMap<&'static s let cf_config_iter = cf_configs.iter().map(|(name, opts)| (*name, opts.clone())); tracing::debug!("generating options for column families"); - let db_opts = DbConfig::Default.to_options(CacheSetting::Disabled); + let db_opts = DbConfig::Default.to_options(CacheSetting::Disabled, None); if !path.exists() { tracing::warn!(?path, "RocksDB at path doesn't exist, creating a new one there instead"); diff --git a/src/eth/storage/permanent/rocks/rocks_permanent.rs b/src/eth/storage/permanent/rocks/rocks_permanent.rs index 90b6e7c60..6eb40ab3d 100644 --- a/src/eth/storage/permanent/rocks/rocks_permanent.rs +++ b/src/eth/storage/permanent/rocks/rocks_permanent.rs @@ -125,12 +125,6 @@ impl PermanentStorage for RocksPermanentStorage { } fn save_block(&self, block: Block) -> anyhow::Result<()> { - #[cfg(feature = "metrics")] - { - self.state.export_metrics().inspect_err(|e| { - tracing::error!(reason = ?e, "failed to export metrics in RocksPermanent"); - })?; - } self.state.save_block(block).inspect_err(|e| { tracing::error!(reason = ?e, "failed to save block in RocksPermanent"); }) diff --git a/src/eth/storage/permanent/rocks/rocks_state.rs b/src/eth/storage/permanent/rocks/rocks_state.rs index 9c5fa32f7..1581226c7 100644 --- a/src/eth/storage/permanent/rocks/rocks_state.rs +++ b/src/eth/storage/permanent/rocks/rocks_state.rs @@ -25,7 +25,6 @@ use super::cf_versions::CfAccountsHistoryValue; use super::cf_versions::CfAccountsValue; use super::cf_versions::CfBlocksByHashValue; use super::cf_versions::CfBlocksByNumberValue; -use super::cf_versions::CfLogsValue; use super::cf_versions::CfTransactionsValue; use super::rocks_cf::RocksCfRef; use super::rocks_config::CacheSetting; @@ -35,7 +34,6 @@ use super::types::AccountRocksdb; use super::types::AddressRocksdb; use super::types::BlockNumberRocksdb; use super::types::HashRocksdb; -use super::types::IndexRocksdb; use super::types::SlotIndexRocksdb; use super::types::SlotValueRocksdb; use crate::eth::primitives::Account; @@ -66,7 +64,7 @@ cfg_if::cfg_if! { } } -fn generate_cf_options_map(cache_multiplier: Option) -> HashMap<&'static str, Options> { +pub fn generate_cf_options_map(cache_multiplier: Option) -> HashMap<&'static str, Options> { let cache_multiplier = cache_multiplier.unwrap_or(1.0); // multiplies the given size in GBs by the cache multiplier @@ -77,14 +75,13 @@ fn generate_cf_options_map(cache_multiplier: Option) -> HashMap<&'static st }; hmap! { - "accounts" => DbConfig::Default.to_options(cached_in_gigs_and_multiplied(15)), - "accounts_history" => DbConfig::FastWriteSST.to_options(CacheSetting::Disabled), - "account_slots" => DbConfig::Default.to_options(cached_in_gigs_and_multiplied(45)), - "account_slots_history" => DbConfig::FastWriteSST.to_options(CacheSetting::Disabled), - "transactions" => DbConfig::LargeSSTFiles.to_options(CacheSetting::Disabled), - "blocks_by_number" => DbConfig::LargeSSTFiles.to_options(CacheSetting::Disabled), - "blocks_by_hash" => DbConfig::LargeSSTFiles.to_options(CacheSetting::Disabled), - "logs" => DbConfig::LargeSSTFiles.to_options(CacheSetting::Disabled), + "accounts" => DbConfig::OptimizedPointLookUp.to_options(cached_in_gigs_and_multiplied(15), None), + "accounts_history" => DbConfig::Default.to_options(CacheSetting::Disabled, Some(20)), + "account_slots" => DbConfig::OptimizedPointLookUp.to_options(cached_in_gigs_and_multiplied(45), Some(20)), + "account_slots_history" => DbConfig::Default.to_options(CacheSetting::Disabled, Some(52)), + "transactions" => DbConfig::Default.to_options(CacheSetting::Disabled, None), + "blocks_by_number" => DbConfig::Default.to_options(CacheSetting::Disabled, None), + "blocks_by_hash" => DbConfig::Default.to_options(CacheSetting::Disabled, None) } } @@ -117,7 +114,6 @@ pub struct RocksStorageState { pub transactions: RocksCfRef, pub blocks_by_number: RocksCfRef, blocks_by_hash: RocksCfRef, - logs: RocksCfRef<(HashRocksdb, IndexRocksdb), CfLogsValue>, /// Last collected stats for a histogram #[cfg(feature = "metrics")] prev_stats: Mutex>, @@ -156,7 +152,6 @@ impl RocksStorageState { transactions: new_cf_ref(&db, "transactions", &cf_options_map)?, blocks_by_number: new_cf_ref(&db, "blocks_by_number", &cf_options_map)?, blocks_by_hash: new_cf_ref(&db, "blocks_by_hash", &cf_options_map)?, - logs: new_cf_ref(&db, "logs", &cf_options_map)?, #[cfg(feature = "metrics")] prev_stats: Mutex::default(), #[cfg(feature = "metrics")] @@ -200,7 +195,6 @@ impl RocksStorageState { self.transactions.clear()?; self.blocks_by_number.clear()?; self.blocks_by_hash.clear()?; - self.logs.clear()?; Ok(()) } @@ -432,16 +426,11 @@ impl RocksStorageState { let account_changes = block.compact_account_changes(); let mut txs_batch = vec![]; - let mut logs_batch = vec![]; for transaction in block.transactions.iter().cloned() { txs_batch.push((transaction.input.hash.into(), transaction.block_number.into())); - for log in transaction.logs { - logs_batch.push(((transaction.input.hash.into(), log.log_index.into()), transaction.block_number.into())); - } } self.transactions.prepare_batch_insertion(txs_batch, batch)?; - self.logs.prepare_batch_insertion(logs_batch, batch)?; let number = block.number(); let block_hash = block.hash(); @@ -521,7 +510,6 @@ impl RocksStorageState { self.transactions.clear().context("when clearing transactions")?; self.blocks_by_hash.clear().context("when clearing blocks_by_hash")?; self.blocks_by_number.clear().context("when clearing blocks_by_number")?; - self.logs.clear().context("when clearing logs")?; Ok(()) } } @@ -587,7 +575,6 @@ impl RocksStorageState { self.accounts_history.export_metrics(); self.blocks_by_hash.export_metrics(); self.blocks_by_number.export_metrics(); - self.logs.export_metrics(); self.transactions.export_metrics(); Ok(()) }