From 6a2fcd82e5f4746932e1802e76e1d4031e475615 Mon Sep 17 00:00:00 2001 From: renancloudwalk <53792026+renancloudwalk@users.noreply.github.com> Date: Wed, 24 Apr 2024 15:43:38 -0300 Subject: [PATCH] Rocksdb advanced tuning (#686) * chore: add FastWriteSST mode * chore: strategically use Zstd for lower levels * chore: add missing tweaks * fix: Lz4 import * lint --- src/eth/storage/rocks/rocks_state.rs | 4 +-- src/eth/storage/rocks_db.rs | 46 +++++++++++++++++++++++++--- 2 files changed, 44 insertions(+), 6 deletions(-) diff --git a/src/eth/storage/rocks/rocks_state.rs b/src/eth/storage/rocks/rocks_state.rs index 639642ead..941e8e97c 100644 --- a/src/eth/storage/rocks/rocks_state.rs +++ b/src/eth/storage/rocks/rocks_state.rs @@ -57,9 +57,9 @@ impl Default for RocksStorageState { let state = Self { accounts: Arc::new(RocksDb::new("./data/accounts.rocksdb", DbConfig::Default).unwrap()), - accounts_history: Arc::new(RocksDb::new("./data/accounts_history.rocksdb", DbConfig::LargeSSTFiles).unwrap()), + accounts_history: Arc::new(RocksDb::new("./data/accounts_history.rocksdb", DbConfig::FastWriteSST).unwrap()), account_slots: Arc::new(RocksDb::new("./data/account_slots.rocksdb", DbConfig::Default).unwrap()), - account_slots_history: Arc::new(RocksDb::new("./data/account_slots_history.rocksdb", DbConfig::LargeSSTFiles).unwrap()), + account_slots_history: Arc::new(RocksDb::new("./data/account_slots_history.rocksdb", DbConfig::FastWriteSST).unwrap()), transactions: Arc::new(RocksDb::new("./data/transactions.rocksdb", DbConfig::LargeSSTFiles).unwrap()), blocks_by_number: Arc::new(RocksDb::new("./data/blocks_by_number.rocksdb", DbConfig::LargeSSTFiles).unwrap()), blocks_by_hash: Arc::new(RocksDb::new("./data/blocks_by_hash.rocksdb", DbConfig::LargeSSTFiles).unwrap()), //XXX this is not needed we can afford to have blocks_by_hash pointing into blocks_by_number diff --git a/src/eth/storage/rocks_db.rs b/src/eth/storage/rocks_db.rs index 2b3f6cc69..eb9e17cd9 100644 --- a/src/eth/storage/rocks_db.rs +++ b/src/eth/storage/rocks_db.rs @@ -17,6 +17,7 @@ use serde::Serialize; pub enum DbConfig { LargeSSTFiles, + FastWriteSST, Default, } @@ -40,7 +41,7 @@ impl Deserialize<'de> + std::hash::Hash + Eq, V: Seriali opts.set_compaction_style(rocksdb::DBCompactionStyle::Level); // Configure the size of SST files at each level - opts.set_target_file_size_base(64 * 1024 * 1024); // Starting at 64MB for L1 + opts.set_target_file_size_base(512 * 1024 * 1024); // Increase the file size multiplier to expand file size at upper levels opts.set_target_file_size_multiplier(2); // Each level grows in file size quicker @@ -53,7 +54,7 @@ impl Deserialize<'de> + std::hash::Hash + Eq, V: Seriali opts.set_level_zero_stop_writes_trigger(20); // Increase the max bytes for L1 to allow more data before triggering compaction - opts.set_max_bytes_for_level_base(512 * 1024 * 1024); // Setting it to 512MB + opts.set_max_bytes_for_level_base(2048 * 1024 * 1024); // Increase the level multiplier to aggressively increase space at each level opts.set_max_bytes_for_level_multiplier(8.0); // Exponential growth of levels is more pronounced @@ -66,14 +67,51 @@ impl Deserialize<'de> + std::hash::Hash + Eq, V: Seriali opts.set_write_buffer_size(128 * 1024 * 1024); // 128MB per write buffer // Keep a higher number of open files to accommodate more files being produced by aggressive compaction - opts.set_max_open_files(2000); + opts.set_max_open_files(20000); // Apply more aggressive compression settings, if I/O and CPU permit opts.set_compression_per_level(&[ - rocksdb::DBCompressionType::None, // No compression for L0 + rocksdb::DBCompressionType::Lz4, rocksdb::DBCompressionType::Zstd, // Use Zstd for higher compression from L1 onwards ]); } + DbConfig::FastWriteSST => { + // Continue using Level Compaction due to its effective use of I/O and CPU for writes + opts.set_compaction_style(rocksdb::DBCompactionStyle::Level); + + // Increase initial SST file sizes to reduce the frequency of writes to disk + opts.set_target_file_size_base(512 * 1024 * 1024); // Starting at 512MB for L1 + + // Minimize the file size multiplier to control the growth of file sizes at upper levels + opts.set_target_file_size_multiplier(1); // Minimal increase in file size at upper levels + + // Increase triggers for write slowdown and stop to maximize buffer before I/O actions + opts.set_level_zero_file_num_compaction_trigger(100); // Slow down writes at 100 L0 files + opts.set_level_zero_stop_writes_trigger(200); // Stop writes at 200 L0 files + + // Expand the maximum bytes for base level to further delay the need for compaction-related I/O + opts.set_max_bytes_for_level_base(2048 * 1024 * 1024); + + // Use a higher level multiplier to increase space exponentially at higher levels + opts.set_max_bytes_for_level_multiplier(10.0); + + // Opt for larger block sizes to decrease the number of read and write operations to disk + block_based_options.set_block_size(512 * 1024); // 512KB blocks + + // Maximize the use of write buffers to extend the time data stays in memory before flushing + opts.set_max_write_buffer_number(16); + opts.set_write_buffer_size(1024 * 1024 * 1024); // 1GB per write buffer + + // Allow a very high number of open files to minimize the overhead of opening and closing files + opts.set_max_open_files(20000); + + // Choose compression that balances CPU use and effective storage reduction + opts.set_compression_per_level(&[rocksdb::DBCompressionType::Lz4, rocksdb::DBCompressionType::Zstd]); + + // Enable settings that make full use of CPU to handle more data in memory and process compaction + opts.set_allow_concurrent_memtable_write(true); + opts.set_enable_write_thread_adaptive_yield(true); + } DbConfig::Default => { block_based_options.set_ribbon_filter(15.5); // https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter