From 57572d59c8b21d303663ac5338aed597138a5247 Mon Sep 17 00:00:00 2001 From: "Jeff Washington (jwash)" Date: Wed, 3 Apr 2024 09:19:01 -0500 Subject: [PATCH] add scan_index for improving index generation (#524) * add scan_index for improving index generation * pr feedback * rework some stuff from pr feedback * get rid of redundant if * deal with rent correctly --- accounts-db/src/accounts_db.rs | 107 +++++++++++++++++++++---------- accounts-db/src/accounts_file.rs | 10 ++- accounts-db/src/append_vec.rs | 69 ++++++++++++++++++-- 3 files changed, 144 insertions(+), 42 deletions(-) diff --git a/accounts-db/src/accounts_db.rs b/accounts-db/src/accounts_db.rs index 3e9eaa1445..76e3d76a29 100644 --- a/accounts-db/src/accounts_db.rs +++ b/accounts-db/src/accounts_db.rs @@ -8642,8 +8642,6 @@ impl AccountsDb { if accounts.next().is_none() { return SlotIndexGenerationInfo::default(); } - let accounts = storage.accounts.account_iter(); - let secondary = !self.account_indexes.is_empty(); let mut rent_paying_accounts_by_partition = Vec::default(); @@ -8652,46 +8650,87 @@ impl AccountsDb { let mut amount_to_top_off_rent = 0; let mut stored_size_alive = 0; - let items = accounts.map(|stored_account| { - stored_size_alive += stored_account.stored_size(); - let pubkey = stored_account.pubkey(); - if secondary { + let (dirty_pubkeys, insert_time_us, mut generate_index_results) = if !secondary { + let mut items_local = Vec::default(); + storage.accounts.scan_index(|info| { + stored_size_alive += info.stored_size_aligned; + if info.index_info.lamports > 0 { + accounts_data_len += info.index_info.data_len; + } + items_local.push(info.index_info); + }); + let items = items_local.into_iter().map(|info| { + if let Some(amount_to_top_off_rent_this_account) = Self::stats_for_rent_payers( + &info.pubkey, + info.lamports, + info.data_len as usize, + info.rent_epoch, + info.executable, + rent_collector, + ) { + amount_to_top_off_rent += amount_to_top_off_rent_this_account; + num_accounts_rent_paying += 1; + // remember this rent-paying account pubkey + rent_paying_accounts_by_partition.push(info.pubkey); + } + + ( + info.pubkey, + AccountInfo::new( + StorageLocation::AppendVec(store_id, info.offset), // will never be cached + info.lamports, + ), + ) + }); + self.accounts_index + .insert_new_if_missing_into_primary_index( + slot, + storage.approx_stored_count(), + items, + ) + } else { + let accounts = storage.accounts.account_iter(); + let items = accounts.map(|stored_account| { + stored_size_alive += stored_account.stored_size(); + let pubkey = stored_account.pubkey(); self.accounts_index.update_secondary_indexes( pubkey, &stored_account, &self.account_indexes, ); - } - if !stored_account.is_zero_lamport() { - accounts_data_len += stored_account.data().len() as u64; - } - - if let Some(amount_to_top_off_rent_this_account) = Self::stats_for_rent_payers( - pubkey, - stored_account.lamports(), - stored_account.data().len(), - stored_account.rent_epoch(), - stored_account.executable(), - rent_collector, - ) { - amount_to_top_off_rent += amount_to_top_off_rent_this_account; - num_accounts_rent_paying += 1; - // remember this rent-paying account pubkey - rent_paying_accounts_by_partition.push(*pubkey); - } + if !stored_account.is_zero_lamport() { + accounts_data_len += stored_account.data().len() as u64; + } - ( - *pubkey, - AccountInfo::new( - StorageLocation::AppendVec(store_id, stored_account.offset()), // will never be cached + if let Some(amount_to_top_off_rent_this_account) = Self::stats_for_rent_payers( + pubkey, stored_account.lamports(), - ), - ) - }); + stored_account.data().len(), + stored_account.rent_epoch(), + stored_account.executable(), + rent_collector, + ) { + amount_to_top_off_rent += amount_to_top_off_rent_this_account; + num_accounts_rent_paying += 1; + // remember this rent-paying account pubkey + rent_paying_accounts_by_partition.push(*pubkey); + } - let (dirty_pubkeys, insert_time_us, mut generate_index_results) = self - .accounts_index - .insert_new_if_missing_into_primary_index(slot, storage.approx_stored_count(), items); + ( + *pubkey, + AccountInfo::new( + StorageLocation::AppendVec(store_id, stored_account.offset()), // will never be cached + stored_account.lamports(), + ), + ) + }); + self.accounts_index + .insert_new_if_missing_into_primary_index( + slot, + storage.approx_stored_count(), + items, + ) + }; if let Some(duplicates_this_slot) = std::mem::take(&mut generate_index_results.duplicates) { // there were duplicate pubkeys in this same slot diff --git a/accounts-db/src/accounts_file.rs b/accounts-db/src/accounts_file.rs index 72f0373d95..f8a1e5cce8 100644 --- a/accounts-db/src/accounts_file.rs +++ b/accounts-db/src/accounts_file.rs @@ -6,7 +6,7 @@ use { }, accounts_db::AccountsFileId, accounts_hash::AccountHash, - append_vec::{AppendVec, AppendVecError}, + append_vec::{AppendVec, AppendVecError, IndexInfo}, storable_accounts::StorableAccounts, tiered_storage::{ error::TieredStorageError, hot::HOT_FORMAT, index::IndexOffset, TieredStorage, @@ -180,6 +180,14 @@ impl AccountsFile { AccountsFileIter::new(self) } + /// iterate over all entries to put in index + pub(crate) fn scan_index(&self, callback: impl FnMut(IndexInfo)) { + match self { + Self::AppendVec(av) => av.scan_index(callback), + Self::TieredStorage(_ts) => unimplemented!(), + } + } + /// iterate over all pubkeys pub(crate) fn scan_pubkeys(&self, callback: impl FnMut(&Pubkey)) { match self { diff --git a/accounts-db/src/append_vec.rs b/accounts-db/src/append_vec.rs index 4b63b8c0e0..d0f9bde0d0 100644 --- a/accounts-db/src/append_vec.rs +++ b/accounts-db/src/append_vec.rs @@ -197,6 +197,26 @@ impl<'append_vec> ReadableAccount for AppendVecStoredAccountMeta<'append_vec> { } } +/// info from an entry useful for building an index +pub(crate) struct IndexInfo { + /// size of entry, aligned to next u64 + /// This matches the return of `get_account` + pub stored_size_aligned: usize, + /// info on the entry + pub index_info: IndexInfoInner, +} + +/// info from an entry useful for building an index +pub(crate) struct IndexInfoInner { + /// offset to this entry + pub offset: usize, + pub pubkey: Pubkey, + pub lamports: u64, + pub rent_epoch: Epoch, + pub executable: bool, + pub data_len: u64, +} + /// offsets to help navigate the persisted format of `AppendVec` #[derive(Debug)] struct AccountOffsets { @@ -204,6 +224,8 @@ struct AccountOffsets { offset_to_end_of_data: usize, /// offset to the next account. This will be aligned. next_account_offset: usize, + /// # of bytes (aligned) to store this account, including variable sized data + stored_size_aligned: usize, } /// A thread-safe, file-backed block of memory used to store `Account` instances. Append operations @@ -598,17 +620,50 @@ impl AppendVec { /// the next account is then aligned on a 64 bit boundary. /// With these helpers, we can skip over reading some of the data depending on what the caller wants. fn next_account_offset(start_offset: usize, stored_meta: &StoredMeta) -> AccountOffsets { - let start_of_data = start_offset - + std::mem::size_of::() - + std::mem::size_of::() - + std::mem::size_of::(); - let aligned_data_len = u64_align!(stored_meta.data_len as usize); - let next_account_offset = start_of_data + aligned_data_len; - let offset_to_end_of_data = start_of_data + stored_meta.data_len as usize; + let stored_size_unaligned = STORE_META_OVERHEAD + stored_meta.data_len as usize; + let stored_size_aligned = u64_align!(stored_size_unaligned); + let offset_to_end_of_data = start_offset + stored_size_unaligned; + let next_account_offset = start_offset + stored_size_aligned; AccountOffsets { next_account_offset, offset_to_end_of_data, + stored_size_aligned, + } + } + + /// Iterate over all accounts and call `callback` with `IndexInfo` for each. + /// This fn can help generate an index of the data in this storage. + pub(crate) fn scan_index(&self, mut callback: impl FnMut(IndexInfo)) { + let mut offset = 0; + loop { + let Some((stored_meta, next)) = self.get_type::(offset) else { + // eof + break; + }; + let Some((account_meta, _)) = self.get_type::(next) else { + // eof + break; + }; + let next = Self::next_account_offset(offset, stored_meta); + if next.offset_to_end_of_data > self.len() { + // data doesn't fit, so don't include this account + break; + } + callback(IndexInfo { + index_info: { + IndexInfoInner { + pubkey: stored_meta.pubkey, + lamports: account_meta.lamports, + offset, + data_len: stored_meta.data_len, + executable: account_meta.executable, + rent_epoch: account_meta.rent_epoch, + } + }, + stored_size_aligned: next.stored_size_aligned, + }); + offset = next.next_account_offset; } }