Skip to content

Commit

Permalink
better perf with less boundary check
Browse files Browse the repository at this point in the history
  • Loading branch information
XiangpengHao committed Oct 7, 2024
1 parent 5a1d660 commit 02f12b5
Show file tree
Hide file tree
Showing 7 changed files with 38 additions and 68 deletions.
4 changes: 2 additions & 2 deletions bench/basic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,15 +217,15 @@ fn main() {
IndexType::Flurry => {
let mut test_bench = TestBench {
index: flurry::HashMap::new(),
initial_cnt: 50_000_000,
initial_cnt: 100_000_000,
};
let result = shumai::run(&mut test_bench, c, repeat);
result.write_json().unwrap();
}
IndexType::ART => {
let mut test_bench = TestBench {
index: Art::default(),
initial_cnt: 50_000_000,
initial_cnt: 100_000_000,
};
let result = shumai::run(&mut test_bench, c, repeat);
result.write_json().unwrap();
Expand Down
6 changes: 5 additions & 1 deletion src/base_node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,11 @@ impl BaseNode {
}

pub(crate) fn prefix(&self) -> &[u8] {
self.meta.prefix[..self.meta.prefix_cnt as usize].as_ref()
unsafe {
self.meta
.prefix
.get_unchecked(..self.meta.prefix_cnt as usize)
}
}

pub(crate) fn insert_grow<CurT: Node, BiggerT: Node, A: Allocator + Send + Clone + 'static>(
Expand Down
45 changes: 12 additions & 33 deletions src/node_16.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,41 +64,20 @@ impl Node16 {
}

fn get_child_pos(&self, key: u8) -> Option<usize> {
#[cfg(all(target_feature = "sse2", not(miri)))]
unsafe {
self.get_child_pos_sse2(key)
}

#[cfg(any(not(target_feature = "sse2"), miri))]
self.get_child_pos_linear(key)
}

#[cfg(any(not(target_feature = "sse2"), miri))]
fn get_child_pos_linear(&self, key: u8) -> Option<usize> {
for i in 0..self.base.meta.count {
if self.keys[i as usize] == Self::flip_sign(key) {
return Some(i as usize);
// TODO: xiangpeng check this code is being auto-vectorized
let target = Self::flip_sign(key);
for (i, k) in self
.keys
.iter()
.enumerate()
.take(self.base.meta.count as usize)
{
if *k == target {
return Some(i);
}
}
None
}

#[cfg(target_feature = "sse2")]
unsafe fn get_child_pos_sse2(&self, key: u8) -> Option<usize> {
use std::arch::x86_64::{
__m128i, _mm_cmpeq_epi8, _mm_loadu_si128, _mm_movemask_epi8, _mm_set1_epi8,
};
let cmp = _mm_cmpeq_epi8(
_mm_set1_epi8(Self::flip_sign(key) as i8),
_mm_loadu_si128(&self.keys as *const [u8; 16] as *const __m128i),
);
let bit_field = _mm_movemask_epi8(cmp) & ((1 << self.base.meta.count) - 1);
if bit_field > 0 {
Some(Self::ctz(bit_field as u16) as usize)
} else {
None
}
}
}

pub(crate) struct Node16Iter<'a> {
Expand Down Expand Up @@ -223,8 +202,8 @@ impl Node for Node16 {

fn get_child(&self, key: u8) -> Option<NodePtr> {
let pos = self.get_child_pos(key)?;
let child = self.children[pos];
Some(child)
let child = unsafe { self.children.get_unchecked(pos) };
Some(*child)
}

#[cfg(feature = "db_extension")]
Expand Down
18 changes: 4 additions & 14 deletions src/node_256.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ impl Node256 {
fn get_mask(&self, key: usize) -> bool {
let idx = key / 8;
let bit = key % 8;
let key_mask = self.key_mask[idx];
key_mask & (1 << bit) != 0
let key_mask = unsafe { self.key_mask.get_unchecked(idx) };
*key_mask & (1 << bit) != 0
}
}

Expand Down Expand Up @@ -118,18 +118,8 @@ impl Node for Node256 {

fn get_child(&self, key: u8) -> Option<NodePtr> {
if self.get_mask(key as usize) {
let child = self.children[key as usize];

#[cfg(all(target_feature = "sse2", not(miri)))]
{
let ptr = child.as_ptr();
use core::arch::x86_64::{_mm_prefetch, _MM_HINT_T0};
unsafe {
_mm_prefetch(ptr as *const i8, _MM_HINT_T0);
}
}

Some(child)
let child = unsafe { self.children.get_unchecked(key as usize) };
Some(*child)
} else {
None
}
Expand Down
12 changes: 8 additions & 4 deletions src/node_4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,10 +138,14 @@ impl Node for Node4 {
}

fn get_child(&self, key: u8) -> Option<NodePtr> {
for i in 0..self.base.meta.count {
if self.keys[i as usize] == key {
let child = self.children[i as usize];
return Some(child);
for (k, c) in self
.keys
.iter()
.zip(self.children.iter())
.take(self.base.meta.count as usize)
{
if *k == key {
return Some(*c);
}
}
None
Expand Down
17 changes: 4 additions & 13 deletions src/node_48.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,21 +115,12 @@ impl Node for Node48 {
}

fn get_child(&self, key: u8) -> Option<NodePtr> {
if self.child_idx[key as usize] == EMPTY_MARKER {
let pos = unsafe { self.child_idx.get_unchecked(key as usize) };
if *pos == EMPTY_MARKER {
None
} else {
let child = self.children[self.child_idx[key as usize] as usize];

#[cfg(all(target_feature = "sse2", not(miri)))]
{
let ptr = child.as_ptr();
use core::arch::x86_64::{_mm_prefetch, _MM_HINT_T0};
unsafe {
_mm_prefetch(ptr as *const i8, _MM_HINT_T0);
}
}

Some(child)
let child = unsafe { self.children.get_unchecked(*pos as usize) };
Some(*child)
}
}

Expand Down
4 changes: 3 additions & 1 deletion src/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,9 @@ impl<T: RawKey, A: Allocator + Clone + Send> RawTree<T, A> {
return None;
}

let child_node = node.as_ref().get_child(key.as_bytes()[level as usize]);
let child_node = node
.as_ref()
.get_child(unsafe { *key.as_bytes().get_unchecked(level as usize) });
if node.check_version().is_err() {
continue 'outer;
}
Expand Down

0 comments on commit 02f12b5

Please sign in to comment.