Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
XiangpengHao committed Nov 21, 2024
1 parent c06451f commit f095e5d
Show file tree
Hide file tree
Showing 6 changed files with 257 additions and 124 deletions.
3 changes: 0 additions & 3 deletions bench/scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,6 @@ impl ShumaiBench for TestBench {
unique_values.insert(k);
}

#[cfg(feature = "stats")]
println!("{}", self.index.stats());

Some(serde_json::json!({
"unique_values": unique_values.len(),
}))
Expand Down
2 changes: 1 addition & 1 deletion src/base_node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ impl BaseNode {
(version & 0b10) == 0b10
}

pub(crate) fn get_count(&self) -> usize {
pub(crate) fn value_count(&self) -> usize {
self.meta.count as usize
}

Expand Down
7 changes: 2 additions & 5 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ mod utils;

mod range_scan;

#[cfg(feature = "stats")]
mod stats;

#[cfg(test)]
Expand Down Expand Up @@ -72,7 +71,7 @@ impl Allocator for DefaultAllocator {

pub struct U64Congee<
V: Clone + From<usize> + Into<usize>,
A: Allocator + Clone + 'static = DefaultAllocator,
A: Allocator + Clone + Send + 'static = DefaultAllocator,
> {
inner: RawCongee<8, A>,
pt_val: PhantomData<V>,
Expand Down Expand Up @@ -130,7 +129,7 @@ impl<V: Clone + From<usize> + Into<usize>> U64Congee<V> {
pub struct Congee<
K: Clone + From<usize>,
V: Clone + From<usize>,
A: Allocator + Clone + 'static = DefaultAllocator,
A: Allocator + Clone + Send + 'static = DefaultAllocator,
> where
usize: From<K>,
usize: From<V>,
Expand Down Expand Up @@ -391,8 +390,6 @@ where
}

/// Display the internal node statistics
#[cfg(feature = "stats")]
#[cfg_attr(docsrs, doc(cfg(feature = "stats")))]
pub fn stats(&self) -> stats::NodeStats {
self.inner.stats()
}
Expand Down
235 changes: 151 additions & 84 deletions src/stats.rs
Original file line number Diff line number Diff line change
@@ -1,54 +1,74 @@
use std::{fmt::Display, ptr::NonNull};
use std::{collections::HashMap, fmt::Display, ptr::NonNull};

use crate::{
base_node::{BaseNode, NodeType},
node_256::Node256,
node_ptr::PtrType,
tree::RawCongee,
tree::{CongeeVisitor, RawCongee},
Allocator,
};

#[derive(Default, Debug, serde::Serialize)]
pub struct NodeStats(Vec<LevelStats>);
#[cfg_attr(feature = "stats", derive(serde::Serialize))]
#[derive(Default, Debug, Clone)]
pub struct NodeStats {
levels: HashMap<usize, LevelStats>,
}

impl Display for NodeStats {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
fn calc_load_factor(n: (usize, usize), scale: usize) -> f64 {
if n.0 == 0 {
fn calc_load_factor(n: &NodeInfo, scale: usize) -> f64 {
if n.node_count == 0 {
return 0.0;
}
(n.1 as f64) / (n.0 as f64 * scale as f64)
(n.value_count as f64) / (n.node_count as f64 * scale as f64)
}

let mut total_node = 0;
let mut levels = self.levels.values().collect::<Vec<_>>();
levels.sort_by_key(|l| l.level);

let mut node_count = 0;
let mut total_f = 0.0;
let mut memory_size = 0;
let mut value_count = 0;

for l in self.0.iter() {
total_f += l.n4.1 as f64 / 4.0;
total_f += l.n16.1 as f64 / 16.0;
total_f += l.n48.1 as f64 / 48.0;
total_f += l.n256.1 as f64 / 256.0;
for l in levels.iter() {
total_f += l.n4.value_count as f64 / 4.0;
total_f += l.n16.value_count as f64 / 16.0;
total_f += l.n48.value_count as f64 / 48.0;
total_f += l.n256.value_count as f64 / 256.0;

total_node += l.total_nodes();
node_count += l.node_count();
memory_size += l.memory_size();
value_count += l.value_count();

writeln!(
f,
"Level: {} --- || N4: {:8}, {:8.2} || N16: {:8}, {:8.2} || N48: {:8}, {:8.2} || N256: {:8}, {:8.2} ||",
l.level,
l.n4.0,
calc_load_factor(l.n4, 4),
l.n16.0,
calc_load_factor(l.n16, 16),
l.n48.0,
calc_load_factor(l.n48, 48),
l.n256.0,
calc_load_factor(l.n256, 256),
l.n4.node_count,
calc_load_factor(&l.n4, 4),
l.n16.node_count,
calc_load_factor(&l.n16, 16),
l.n48.node_count,
calc_load_factor(&l.n48, 48),
l.n256.node_count,
calc_load_factor(&l.n256, 256),
)?;
}

let load_factor = total_f / (total_node as f64);
writeln!(
f,
"Overall node count: {}, value count: {}",
node_count, value_count
)?;

let last_level = levels.last().unwrap();
writeln!(
f,
"Last level node: {}, value count: {}",
last_level.node_count(),
last_level.value_count(),
)?;

let load_factor = total_f / (node_count as f64);
if load_factor < 0.5 {
writeln!(f, "Load factor: {load_factor:.2} (too low)")?;
} else {
Expand All @@ -61,88 +81,135 @@ impl Display for NodeStats {
}
}

#[derive(Debug, serde::Serialize, Clone)]
#[cfg_attr(feature = "stats", derive(serde::Serialize))]
#[derive(Debug, Clone, Default)]
struct NodeInfo {
node_count: usize,
value_count: usize,
}

#[cfg_attr(feature = "stats", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub struct LevelStats {
level: usize,
n4: (usize, usize), // (node count, leaf count)
n16: (usize, usize),
n48: (usize, usize),
n256: (usize, usize),
n4: NodeInfo, // (node count, leaf count)
n16: NodeInfo,
n48: NodeInfo,
n256: NodeInfo,
}

impl LevelStats {
fn new_level(level: usize) -> Self {
Self {
level,
n4: (0, 0),
n16: (0, 0),
n48: (0, 0),
n256: (0, 0),
n4: NodeInfo::default(),
n16: NodeInfo::default(),
n48: NodeInfo::default(),
n256: NodeInfo::default(),
}
}

fn memory_size(&self) -> usize {
self.n4.0 * NodeType::N4.node_layout().size()
+ self.n16.0 * NodeType::N16.node_layout().size()
+ self.n48.0 * NodeType::N48.node_layout().size()
+ self.n256.0 * NodeType::N256.node_layout().size()
self.n4.node_count * NodeType::N4.node_layout().size()
+ self.n16.node_count * NodeType::N16.node_layout().size()
+ self.n48.node_count * NodeType::N48.node_layout().size()
+ self.n256.node_count * NodeType::N256.node_layout().size()
}

fn node_count(&self) -> usize {
self.n4.node_count + self.n16.node_count + self.n48.node_count + self.n256.node_count
}

fn total_nodes(&self) -> usize {
self.n4.0 + self.n16.0 + self.n48.0 + self.n256.0
fn value_count(&self) -> usize {
self.n4.value_count + self.n16.value_count + self.n48.value_count + self.n256.value_count
}
}

impl<const K_LEN: usize, A: Allocator + Clone> RawCongee<K_LEN, A> {
/// Returns the node stats for the tree.
pub fn stats(&self) -> NodeStats {
let mut node_stats = NodeStats::default();
struct StatsVisitor {
node_stats: NodeStats,
}

let mut sub_nodes = vec![(0, 0, unsafe {
std::mem::transmute::<NonNull<Node256>, NonNull<BaseNode>>(self.root)
})];
impl<const K_LEN: usize> CongeeVisitor<K_LEN> for StatsVisitor {
fn pre_visit_sub_node(&mut self, node: NonNull<BaseNode>) {
let node = BaseNode::read_lock(node).unwrap();
let tree_level = node.as_ref().prefix().len();

while let Some((level, key_level, node)) = sub_nodes.pop() {
let node = BaseNode::read_lock(node).unwrap();
if !self.node_stats.levels.contains_key(&tree_level) {

Check failure on line 137 in src/stats.rs

View workflow job for this annotation

GitHub Actions / check

usage of `contains_key` followed by `insert` on a `HashMap`
self.node_stats
.levels
.insert(tree_level, LevelStats::new_level(tree_level));
}

if node_stats.0.len() <= level {
node_stats.0.push(LevelStats::new_level(level));
match node.as_ref().get_type() {
crate::base_node::NodeType::N4 => {
self.node_stats
.levels
.get_mut(&tree_level)
.unwrap()
.n4
.node_count += 1;
self.node_stats
.levels
.get_mut(&tree_level)
.unwrap()
.n4
.value_count += node.as_ref().value_count();
}

match node.as_ref().get_type() {
crate::base_node::NodeType::N4 => {
node_stats.0[level].n4.0 += 1;
node_stats.0[level].n4.1 += node.as_ref().get_count();
}
crate::base_node::NodeType::N16 => {
node_stats.0[level].n16.0 += 1;
node_stats.0[level].n16.1 += node.as_ref().get_count();
}
crate::base_node::NodeType::N48 => {
node_stats.0[level].n48.0 += 1;
node_stats.0[level].n48.1 += node.as_ref().get_count();
}
crate::base_node::NodeType::N256 => {
node_stats.0[level].n256.0 += 1;
node_stats.0[level].n256.1 += node.as_ref().get_count();
}
crate::base_node::NodeType::N16 => {
self.node_stats
.levels
.get_mut(&tree_level)
.unwrap()
.n16
.node_count += 1;
self.node_stats
.levels
.get_mut(&tree_level)
.unwrap()
.n16
.value_count += node.as_ref().value_count();
}

let children = node.as_ref().get_children(0, 255);
for (_k, n) in children {
match n.downcast::<K_LEN>(level) {
PtrType::Payload(_) => {}
PtrType::SubNode(sub_node) => {
let child_node = BaseNode::read_lock(sub_node).unwrap();
sub_nodes.push((
level + 1,
key_level + 1 + child_node.as_ref().prefix().len(),
sub_node,
));
}
}
crate::base_node::NodeType::N48 => {
self.node_stats
.levels
.get_mut(&tree_level)
.unwrap()
.n48
.node_count += 1;
self.node_stats
.levels
.get_mut(&tree_level)
.unwrap()
.n48
.value_count += node.as_ref().value_count();
}
crate::base_node::NodeType::N256 => {
self.node_stats
.levels
.get_mut(&tree_level)
.unwrap()
.n256
.node_count += 1;
self.node_stats
.levels
.get_mut(&tree_level)
.unwrap()
.n256
.value_count += node.as_ref().value_count();
}
}
node_stats
}
}

impl<const K_LEN: usize, A: Allocator + Clone + Send> RawCongee<K_LEN, A> {
/// Returns the node stats for the tree.
pub fn stats(&self) -> NodeStats {
let mut visitor = StatsVisitor {
node_stats: NodeStats::default(),
};

self.dfs_visitor_slow(&mut visitor).unwrap();

return visitor.node_stats;

Check failure on line 213 in src/stats.rs

View workflow job for this annotation

GitHub Actions / check

unneeded `return` statement
}
}
12 changes: 8 additions & 4 deletions src/tests/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,14 @@ fn small_insert() {

#[test]
fn test_sparse_keys() {
let key_cnt = 100_000;
let key_cnt = 30_000;
let tree = RawCongee::default();
let mut keys = Vec::<usize>::with_capacity(key_cnt);

let guard = crossbeam_epoch::pin();
let mut rng = StdRng::seed_from_u64(12);
for _i in 0..key_cnt {
let k = thread_rng().gen::<usize>() & 0x7fff_ffff_ffff_ffff;
let k = rng.gen::<usize>() & 0x7fff_ffff_ffff_ffff;
keys.push(k);

let key: [u8; 8] = k.to_be_bytes();
Expand All @@ -56,13 +57,12 @@ fn test_sparse_keys() {
assert_eq!(v, *i);
}

#[cfg(feature = "stats")]
println!("{}", tree.stats());
}

use rand::prelude::StdRng;
use rand::seq::SliceRandom;
use rand::{thread_rng, Rng, SeedableRng};
use rand::{Rng, SeedableRng};

#[test]
fn test_concurrent_insert() {
Expand Down Expand Up @@ -105,6 +105,8 @@ fn test_concurrent_insert() {
let val = tree.get(&key, &guard).unwrap();
assert_eq!(val, *v);
}

assert_eq!(tree.value_count(&guard), key_space.len());
}

#[cfg(all(feature = "shuttle", test))]
Expand Down Expand Up @@ -192,6 +194,8 @@ fn test_concurrent_insert_read() {
assert_eq!(val, *v);
}

assert_eq!(tree.value_count(&guard), key_space.len());

drop(guard);
drop(tree);
}
Expand Down
Loading

0 comments on commit f095e5d

Please sign in to comment.