Skip to content

Commit

Permalink
add scan_index for improving index generation (#524)
Browse files Browse the repository at this point in the history
* add scan_index for improving index generation

* pr feedback

* rework some stuff from pr feedback

* get rid of redundant if

* deal with rent correctly
  • Loading branch information
jeffwashington authored Apr 3, 2024
1 parent 0168e0a commit 57572d5
Show file tree
Hide file tree
Showing 3 changed files with 144 additions and 42 deletions.
107 changes: 73 additions & 34 deletions accounts-db/src/accounts_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8642,8 +8642,6 @@ impl AccountsDb {
if accounts.next().is_none() {
return SlotIndexGenerationInfo::default();
}
let accounts = storage.accounts.account_iter();

let secondary = !self.account_indexes.is_empty();

let mut rent_paying_accounts_by_partition = Vec::default();
Expand All @@ -8652,46 +8650,87 @@ impl AccountsDb {
let mut amount_to_top_off_rent = 0;
let mut stored_size_alive = 0;

let items = accounts.map(|stored_account| {
stored_size_alive += stored_account.stored_size();
let pubkey = stored_account.pubkey();
if secondary {
let (dirty_pubkeys, insert_time_us, mut generate_index_results) = if !secondary {
let mut items_local = Vec::default();
storage.accounts.scan_index(|info| {
stored_size_alive += info.stored_size_aligned;
if info.index_info.lamports > 0 {
accounts_data_len += info.index_info.data_len;
}
items_local.push(info.index_info);
});
let items = items_local.into_iter().map(|info| {
if let Some(amount_to_top_off_rent_this_account) = Self::stats_for_rent_payers(
&info.pubkey,
info.lamports,
info.data_len as usize,
info.rent_epoch,
info.executable,
rent_collector,
) {
amount_to_top_off_rent += amount_to_top_off_rent_this_account;
num_accounts_rent_paying += 1;
// remember this rent-paying account pubkey
rent_paying_accounts_by_partition.push(info.pubkey);
}

(
info.pubkey,
AccountInfo::new(
StorageLocation::AppendVec(store_id, info.offset), // will never be cached
info.lamports,
),
)
});
self.accounts_index
.insert_new_if_missing_into_primary_index(
slot,
storage.approx_stored_count(),
items,
)
} else {
let accounts = storage.accounts.account_iter();
let items = accounts.map(|stored_account| {
stored_size_alive += stored_account.stored_size();
let pubkey = stored_account.pubkey();
self.accounts_index.update_secondary_indexes(
pubkey,
&stored_account,
&self.account_indexes,
);
}
if !stored_account.is_zero_lamport() {
accounts_data_len += stored_account.data().len() as u64;
}

if let Some(amount_to_top_off_rent_this_account) = Self::stats_for_rent_payers(
pubkey,
stored_account.lamports(),
stored_account.data().len(),
stored_account.rent_epoch(),
stored_account.executable(),
rent_collector,
) {
amount_to_top_off_rent += amount_to_top_off_rent_this_account;
num_accounts_rent_paying += 1;
// remember this rent-paying account pubkey
rent_paying_accounts_by_partition.push(*pubkey);
}
if !stored_account.is_zero_lamport() {
accounts_data_len += stored_account.data().len() as u64;
}

(
*pubkey,
AccountInfo::new(
StorageLocation::AppendVec(store_id, stored_account.offset()), // will never be cached
if let Some(amount_to_top_off_rent_this_account) = Self::stats_for_rent_payers(
pubkey,
stored_account.lamports(),
),
)
});
stored_account.data().len(),
stored_account.rent_epoch(),
stored_account.executable(),
rent_collector,
) {
amount_to_top_off_rent += amount_to_top_off_rent_this_account;
num_accounts_rent_paying += 1;
// remember this rent-paying account pubkey
rent_paying_accounts_by_partition.push(*pubkey);
}

let (dirty_pubkeys, insert_time_us, mut generate_index_results) = self
.accounts_index
.insert_new_if_missing_into_primary_index(slot, storage.approx_stored_count(), items);
(
*pubkey,
AccountInfo::new(
StorageLocation::AppendVec(store_id, stored_account.offset()), // will never be cached
stored_account.lamports(),
),
)
});
self.accounts_index
.insert_new_if_missing_into_primary_index(
slot,
storage.approx_stored_count(),
items,
)
};

if let Some(duplicates_this_slot) = std::mem::take(&mut generate_index_results.duplicates) {
// there were duplicate pubkeys in this same slot
Expand Down
10 changes: 9 additions & 1 deletion accounts-db/src/accounts_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use {
},
accounts_db::AccountsFileId,
accounts_hash::AccountHash,
append_vec::{AppendVec, AppendVecError},
append_vec::{AppendVec, AppendVecError, IndexInfo},
storable_accounts::StorableAccounts,
tiered_storage::{
error::TieredStorageError, hot::HOT_FORMAT, index::IndexOffset, TieredStorage,
Expand Down Expand Up @@ -180,6 +180,14 @@ impl AccountsFile {
AccountsFileIter::new(self)
}

/// iterate over all entries to put in index
pub(crate) fn scan_index(&self, callback: impl FnMut(IndexInfo)) {
match self {
Self::AppendVec(av) => av.scan_index(callback),
Self::TieredStorage(_ts) => unimplemented!(),
}
}

/// iterate over all pubkeys
pub(crate) fn scan_pubkeys(&self, callback: impl FnMut(&Pubkey)) {
match self {
Expand Down
69 changes: 62 additions & 7 deletions accounts-db/src/append_vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,13 +197,35 @@ impl<'append_vec> ReadableAccount for AppendVecStoredAccountMeta<'append_vec> {
}
}

/// info from an entry useful for building an index
pub(crate) struct IndexInfo {
/// size of entry, aligned to next u64
/// This matches the return of `get_account`
pub stored_size_aligned: usize,
/// info on the entry
pub index_info: IndexInfoInner,
}

/// info from an entry useful for building an index
pub(crate) struct IndexInfoInner {
/// offset to this entry
pub offset: usize,
pub pubkey: Pubkey,
pub lamports: u64,
pub rent_epoch: Epoch,
pub executable: bool,
pub data_len: u64,
}

/// offsets to help navigate the persisted format of `AppendVec`
#[derive(Debug)]
struct AccountOffsets {
/// offset to the end of the &[u8] data
offset_to_end_of_data: usize,
/// offset to the next account. This will be aligned.
next_account_offset: usize,
/// # of bytes (aligned) to store this account, including variable sized data
stored_size_aligned: usize,
}

/// A thread-safe, file-backed block of memory used to store `Account` instances. Append operations
Expand Down Expand Up @@ -598,17 +620,50 @@ impl AppendVec {
/// the next account is then aligned on a 64 bit boundary.
/// With these helpers, we can skip over reading some of the data depending on what the caller wants.
fn next_account_offset(start_offset: usize, stored_meta: &StoredMeta) -> AccountOffsets {
let start_of_data = start_offset
+ std::mem::size_of::<StoredMeta>()
+ std::mem::size_of::<AccountMeta>()
+ std::mem::size_of::<AccountHash>();
let aligned_data_len = u64_align!(stored_meta.data_len as usize);
let next_account_offset = start_of_data + aligned_data_len;
let offset_to_end_of_data = start_of_data + stored_meta.data_len as usize;
let stored_size_unaligned = STORE_META_OVERHEAD + stored_meta.data_len as usize;
let stored_size_aligned = u64_align!(stored_size_unaligned);
let offset_to_end_of_data = start_offset + stored_size_unaligned;
let next_account_offset = start_offset + stored_size_aligned;

AccountOffsets {
next_account_offset,
offset_to_end_of_data,
stored_size_aligned,
}
}

/// Iterate over all accounts and call `callback` with `IndexInfo` for each.
/// This fn can help generate an index of the data in this storage.
pub(crate) fn scan_index(&self, mut callback: impl FnMut(IndexInfo)) {
let mut offset = 0;
loop {
let Some((stored_meta, next)) = self.get_type::<StoredMeta>(offset) else {
// eof
break;
};
let Some((account_meta, _)) = self.get_type::<AccountMeta>(next) else {
// eof
break;
};
let next = Self::next_account_offset(offset, stored_meta);
if next.offset_to_end_of_data > self.len() {
// data doesn't fit, so don't include this account
break;
}
callback(IndexInfo {
index_info: {
IndexInfoInner {
pubkey: stored_meta.pubkey,
lamports: account_meta.lamports,
offset,
data_len: stored_meta.data_len,
executable: account_meta.executable,
rent_epoch: account_meta.rent_epoch,
}
},
stored_size_aligned: next.stored_size_aligned,
});
offset = next.next_account_offset;
}
}

Expand Down

0 comments on commit 57572d5

Please sign in to comment.