diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9c2bdbd..7bd9a0d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -55,10 +55,10 @@ jobs: - uses: actions-rs/toolchain@v1 name: Setup toolchain with: - toolchain: stable + toolchain: nightly override: true components: rust-src - - run: sudo apt install libnuma-dev llvm-dev + - run: sudo apt install llvm-dev - run: cargo test - run: > env ASAN_OPTIONS="detect_odr_violation=0" RUSTFLAGS="-Z sanitizer=address" @@ -76,10 +76,9 @@ jobs: - uses: actions-rs/toolchain@v1 name: Setup toolchain with: - toolchain: stable + toolchain: nightly override: true components: rust-src - - run: sudo apt install libnuma-dev - run: cargo test --features "shuttle" shuttle_concurrent_insert_read --release -- --nocapture - run: cargo test --features "shuttle" shuttle_insert_only --release -- --nocapture @@ -96,7 +95,6 @@ jobs: toolchain: stable override: true components: rust-src - - run: sudo apt install libnuma-dev - run: env SHUMAI_FILTER="ART" SHUMAI_THREAD=4 cargo bench --bench basic - uses: actions/upload-artifact@v3 with: diff --git a/Cargo.toml b/Cargo.toml index 79f2613..69a913f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "congee" -version = "0.3.0" +version = "0.3.1" edition = "2021" description = "A Rust implementation of ART-OLC concurrent adaptive radix tree." keywords = ["ART", "adaptive-radix-tree", "concurrent"] @@ -25,6 +25,7 @@ flurry = "0.5.1" mimalloc = { version = "0.1.43", default-features = false } selfsimilar = "0.1.0" shuttle = "0.7.1" +ahash = "0.8.11" [[bench]] name = "basic" diff --git a/bench/README.md b/bench/README.md new file mode 100644 index 0000000..b174281 --- /dev/null +++ b/bench/README.md @@ -0,0 +1,77 @@ +## Benchmark Guide + +### Multi-thread performance of Congee + +```bash +env SHUMAI_FILTER="ReadOnly-ART" RUSTFLAGS='-C target-cpu=native' cargo bench --bench basic +``` + + +### Single thread Congee vs std::HashMap + +We internally use `AHashMap` as it performs much better than `std::HashMap`. + +```bash +env SHUMAI_FILTER="single-thread" RUSTFLAGS='-C target-cpu=native' cargo bench --bench basic +``` + +Example output: +``` +============================================================ +Loading data... +finished in 1.95s +============================================================ +Running benchmark for 3 seconds with 1 threads: basic-single-thread-ART +Iteration 0 finished------------------ +11555348 + +Iteration 1 finished------------------ +11300362 + +Iteration 2 finished------------------ +11287117 + +Benchmark results saved to file: target/benchmark/2024-10-07/10-57-basic-single-thread-ART.json +============================================================ +Loading data... +finished in 5.52s +============================================================ +Running benchmark for 3 seconds with 1 threads: basic-single-thread-SingleHashMap +Iteration 0 finished------------------ +14818107 + +Iteration 1 finished------------------ +14582199 + +Iteration 2 finished------------------ +14339184 + +Benchmark results saved to file: target/benchmark/2024-10-07/10-57-basic-single-thread-SingleHashMap.json +``` + +### Collect perf metrics + +Simply add `--features "perf"` to the cargo bench command. + +```bash +env SHUMAI_FILTER="single-thread" RUSTFLAGS='-C target-cpu=native' cargo bench --bench basic --features "perf" +``` + +Example output: +```json +{ + "name": "perf", + "value": { + "branch_miss": 41634853, + "branches": 1213790237, + "cache_miss": 344751417, + "cache_reference": 621479438, + "context_switch": 0, + "cpu_migration": 0, + "cycles": 32342888548, + "inst": 11316270133, + "page_faults": 0, + "stalled_cycles_frontend": 376732948 + } +} +``` diff --git a/bench/basic.rs b/bench/basic.rs index 546e911..e764702 100644 --- a/bench/basic.rs +++ b/bench/basic.rs @@ -2,7 +2,7 @@ use congee::Art; use rand::{thread_rng, Rng}; use serde::{Deserialize, Serialize}; use shumai::{config, ShumaiBench}; -use std::fmt::Display; +use std::{cell::UnsafeCell, fmt::Display}; use mimalloc::MiMalloc; @@ -25,6 +25,7 @@ impl Display for Workload { #[derive(Serialize, Clone, Copy, Debug, Deserialize)] pub enum IndexType { + SingleHashMap, Flurry, ART, } @@ -109,6 +110,71 @@ impl DBIndex for Art { } } +/// A single thread hash map. +/// This benchmark want to answer the question: +/// How does Congee perform compare to the best single thread hash map? +/// +/// HashMap is being used as the foundation of HashJoin: +/// https://github.com/apache/datafusion/blob/main/datafusion/physical-plan/src/joins/utils.rs#L123 +/// +/// But is there a better way to do it? +struct SingleThreadHashMap { + map: UnsafeCell>, // only allow single thread access +} + +impl SingleThreadHashMap { + fn new(cap: usize) -> Self { + Self { + map: UnsafeCell::new(ahash::AHashMap::with_capacity(cap)), + } + } +} + +unsafe impl Send for SingleThreadHashMap {} +unsafe impl Sync for SingleThreadHashMap {} + +impl DBIndex for SingleThreadHashMap { + type Guard<'a> = (); + + fn pin(&self) -> Self::Guard<'_> { + () + } + fn insert(&self, key: usize, v: usize, _guard: &Self::Guard<'_>) { + unsafe { + (*self.map.get()).insert(key, v); + } + } + + fn get(&self, key: &usize, _guard: &Self::Guard<'_>) -> Option { + unsafe { (*self.map.get()).get(key).cloned() } + } + + fn update<'a>( + &'a self, + key: &usize, + new: usize, + _guard: &Self::Guard<'a>, + ) -> Option<(usize, Option)> { + unsafe { + (*self.map.get()) + .entry(*key) + .and_modify(|v| *v = new) + .or_insert(new); + } + Some((*key, Some(*key))) + } + + fn scan<'a>( + &'a self, + _low_key: &usize, + _high_key: &usize, + _results: &mut [(usize, usize)], + _guard: &Self::Guard<'a>, + ) -> usize { + unimplemented!("SingleThreadHashMap can't scan") + } +} + impl DBIndex for flurry::HashMap { type Guard<'a> = flurry::Guard<'a>; @@ -222,6 +288,18 @@ fn main() { let result = shumai::run(&mut test_bench, c, repeat); result.write_json().unwrap(); } + IndexType::SingleHashMap => { + if c.threads.len() > 1 || c.threads[0] != 1 { + panic!("SingleHashMap only support single thread!"); + } + let initial_cnt = 100_000_000; + let mut test_bench = TestBench { + index: SingleThreadHashMap::new(initial_cnt), + initial_cnt, + }; + let result = shumai::run(&mut test_bench, c, repeat); + result.write_json().unwrap(); + } IndexType::ART => { let mut test_bench = TestBench { index: Art::default(), diff --git a/bench/benchmark.toml b/bench/benchmark.toml index 6c33d38..4e529ee 100644 --- a/bench/benchmark.toml +++ b/bench/benchmark.toml @@ -2,9 +2,17 @@ name = "basic" threads = [1, 2, 4, 8, 32] time = 3 -workload = ["ReadOnly", "InsertOnly", "UpdateOnly", "ScanOnly"] +workload = ["ReadOnly", "InsertOnly", "UpdateOnly", "ScanOnly"] index_type = ["Flurry", "ART"] +[[Basic]] +name = "single-thread" +threads = [1] +time = 3 +workload = ["ReadOnly"] +index_type = ["ART", "SingleHashMap"] + + [[Scan]] name = "scan" threads = [1, 2, 4, 8]