From dde46823a87f358eee5a928c27e2731f9a53d616 Mon Sep 17 00:00:00 2001 From: Edward Amsden Date: Wed, 1 Feb 2023 18:22:15 -0600 Subject: [PATCH 1/5] Add non-copying version of murmur3_32 that reads directly from a byte buffer --- src/murmur3_32.rs | 50 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/src/murmur3_32.rs b/src/murmur3_32.rs index 465db1b..466d16a 100644 --- a/src/murmur3_32.rs +++ b/src/murmur3_32.rs @@ -7,6 +7,7 @@ // modified, or distributed except according to those terms. use std::io::{Read, Result}; +use std::cmp::min; use crate::read_bytes; @@ -60,6 +61,55 @@ pub fn murmur3_32(source: &mut T, seed: u32) -> Result { } } +/// Use the 32 bit variant of murmur3 to hash [u8] without copying the buffer. +/// +/// # Example +/// +/// ``` +/// use murmur3::murmur3_32::nocopy; +/// let hash_result = murmur3_32_nocopy("hello world".as_bytes(), 0) +/// ``` +pub fn murmur3_32_nocopy(source: &[u8], seed: u32) -> u32 { + let mut buffer = source; + let mut processed = 0; + let mut state = seed; + loop { + match min(buffer.len(), 4) { + 0 => return finish(state, processed), + 1 => { + processed += 1; + let k: u32 = buffer[0] as u32; + state ^= calc_k(k); + return finish(state, processed); + }, + 2 => { + processed += 2; + let k: u32 = ((buffer[1] as u32) << 8) | (buffer [0] as u32); + state ^= calc_k(k); + return finish(state, processed); + }, + 3 => { + processed += 3; + let k: u32 = + ((buffer[2] as u32) << 16) | ((buffer [1] as u32) << 8) | (buffer[0] as u32); + state ^= calc_k(k); + return finish(state, processed); + }, + 4 => { + processed += 4; + let k: u32 = + ((buffer[3] as u32) << 24) | ((buffer[2] as u32) << 16) | + ((buffer[1] as u32) << 8) | (buffer[0] as u32); + state ^= calc_k(k); + state = state.rotate_left(R2); + state = (state.wrapping_mul(M)).wrapping_add(N); + buffer = &buffer[4..]; + }, + _ => unreachable!() + }; + }; +} + fn finish(state: u32, processed: u32) -> u32 { let mut hash = state; hash ^= processed as u32; From 9ab57eb1b70f0b25f9e17e76fb8a4863df87052a Mon Sep 17 00:00:00 2001 From: Edward Amsden Date: Mon, 13 Feb 2023 19:46:27 -0600 Subject: [PATCH 2/5] cargo fmt --- src/murmur3_32.rs | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/murmur3_32.rs b/src/murmur3_32.rs index 466d16a..d6ec749 100644 --- a/src/murmur3_32.rs +++ b/src/murmur3_32.rs @@ -6,8 +6,8 @@ // option. All files in the project carrying such notice may not be copied, // modified, or distributed except according to those terms. -use std::io::{Read, Result}; use std::cmp::min; +use std::io::{Read, Result}; use crate::read_bytes; @@ -64,7 +64,7 @@ pub fn murmur3_32(source: &mut T, seed: u32) -> Result { /// Use the 32 bit variant of murmur3 to hash [u8] without copying the buffer. /// /// # Example -/// +/// /// ``` /// use murmur3::murmur3_32::nocopy; /// let hash_result = murmur3_32_nocopy("hello world".as_bytes(), 0) @@ -81,33 +81,34 @@ pub fn murmur3_32_nocopy(source: &[u8], seed: u32) -> u32 { let k: u32 = buffer[0] as u32; state ^= calc_k(k); return finish(state, processed); - }, + } 2 => { processed += 2; - let k: u32 = ((buffer[1] as u32) << 8) | (buffer [0] as u32); - state ^= calc_k(k); + let k: u32 = ((buffer[1] as u32) << 8) | (buffer[0] as u32); + state ^= calc_k(k); return finish(state, processed); - }, + } 3 => { processed += 3; let k: u32 = - ((buffer[2] as u32) << 16) | ((buffer [1] as u32) << 8) | (buffer[0] as u32); + ((buffer[2] as u32) << 16) | ((buffer[1] as u32) << 8) | (buffer[0] as u32); state ^= calc_k(k); return finish(state, processed); - }, + } 4 => { processed += 4; - let k: u32 = - ((buffer[3] as u32) << 24) | ((buffer[2] as u32) << 16) | - ((buffer[1] as u32) << 8) | (buffer[0] as u32); - state ^= calc_k(k); + let k: u32 = ((buffer[3] as u32) << 24) + | ((buffer[2] as u32) << 16) + | ((buffer[1] as u32) << 8) + | (buffer[0] as u32); + state ^= calc_k(k); state = state.rotate_left(R2); state = (state.wrapping_mul(M)).wrapping_add(N); buffer = &buffer[4..]; - }, - _ => unreachable!() + } + _ => unreachable!(), }; - }; + } } fn finish(state: u32, processed: u32) -> u32 { From c6f8a948ae6887ea21f51a8ca771084c2b3907fb Mon Sep 17 00:00:00 2001 From: Edward Amsden Date: Tue, 14 Feb 2023 14:05:59 -0600 Subject: [PATCH 3/5] Add murmur3_32_of_slice benchmark --- benches/bench.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/benches/bench.rs b/benches/bench.rs index 42d53a0..3d7259f 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -1,5 +1,4 @@ #![feature(test)] - extern crate murmur3_sys; extern crate test; @@ -23,6 +22,17 @@ fn bench_32(b: &mut Bencher) { }); } +#[bench] +fn bench_32_slice(b: &mut Bencher) { + let string: &[u8] = + test::black_box(b"Lorem ipsum dolor sit amet, consectetur adipisicing elit"); + b.bytes = string.len() as u64; + b.iter(|| { + let tmp = &string[0..string.len()]; + murmur3_32_of_slice(tmp, 0); + }); +} + #[bench] fn bench_c_32(b: &mut Bencher) { let string: &[u8] = From f54779deab7c75a45a88cbd5a6141b3125644ae7 Mon Sep 17 00:00:00 2001 From: Edward Amsden Date: Tue, 14 Feb 2023 14:06:32 -0600 Subject: [PATCH 4/5] Fix clippy error --- src/murmur3_32.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/murmur3_32.rs b/src/murmur3_32.rs index d6ec749..e0a3368 100644 --- a/src/murmur3_32.rs +++ b/src/murmur3_32.rs @@ -69,7 +69,7 @@ pub fn murmur3_32(source: &mut T, seed: u32) -> Result { /// use murmur3::murmur3_32::nocopy; /// let hash_result = murmur3_32_nocopy("hello world".as_bytes(), 0) /// ``` -pub fn murmur3_32_nocopy(source: &[u8], seed: u32) -> u32 { +pub fn murmur3_32_of_slice(source: &[u8], seed: u32) -> u32 { let mut buffer = source; let mut processed = 0; let mut state = seed; @@ -113,7 +113,7 @@ pub fn murmur3_32_nocopy(source: &[u8], seed: u32) -> u32 { fn finish(state: u32, processed: u32) -> u32 { let mut hash = state; - hash ^= processed as u32; + hash ^= processed; hash ^= hash.wrapping_shr(R1); hash = hash.wrapping_mul(C1); hash ^= hash.wrapping_shr(R2); From 7878a0fbecf465720891b625f1a1fc3e19b6a960 Mon Sep 17 00:00:00 2001 From: Edward Amsden Date: Thu, 16 Feb 2023 05:12:52 -0600 Subject: [PATCH 5/5] tests for murmur3_32_of_slice --- src/murmur3_32.rs | 4 ++-- tests/quickcheck.rs | 16 +++++++++++++++- tests/test.rs | 7 +++++++ 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/src/murmur3_32.rs b/src/murmur3_32.rs index e0a3368..9a97924 100644 --- a/src/murmur3_32.rs +++ b/src/murmur3_32.rs @@ -66,8 +66,8 @@ pub fn murmur3_32(source: &mut T, seed: u32) -> Result { /// # Example /// /// ``` -/// use murmur3::murmur3_32::nocopy; -/// let hash_result = murmur3_32_nocopy("hello world".as_bytes(), 0) +/// use murmur3::murmur3_32_of_slice; +/// let hash_result = murmur3_32_of_slice("hello world".as_bytes(), 0); /// ``` pub fn murmur3_32_of_slice(source: &[u8], seed: u32) -> u32 { let mut buffer = source; diff --git a/tests/quickcheck.rs b/tests/quickcheck.rs index 560282d..c72d897 100644 --- a/tests/quickcheck.rs +++ b/tests/quickcheck.rs @@ -13,7 +13,7 @@ extern crate murmur3_sys; use std::io::Cursor; -use murmur3::murmur3_32; +use murmur3::{murmur3_32, murmur3_32_of_slice}; use murmur3_sys::MurmurHash3_x86_32; use murmur3::murmur3_x86_128; @@ -36,6 +36,20 @@ quickcheck! { } } +quickcheck! { + fn quickcheck_32_slice(input:(u32, Vec)) -> bool{ + let seed = input.0; + let xs = input.1; + let mut output: [u8; 4] = [0; 4]; + unsafe { + MurmurHash3_x86_32(xs.as_ptr() as _, xs.len() as i32, seed, output.as_mut_ptr() as _) + }; + let output = u32::from_le_bytes(output); + let output2 = murmur3_32_of_slice(&xs[..], seed); + output == output2 + } +} + quickcheck! { fn quickcheck_x86_128(input:(u32, Vec)) -> bool { let seed = input.0; diff --git a/tests/test.rs b/tests/test.rs index 7dbad10..e2a596c 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -368,6 +368,13 @@ fn test_static_strings() { test.string ); + assert_eq!( + murmur3::murmur3_32_of_slice(test.string.as_bytes(), 0), + test.hash_32, + "Failed 32_of_slice on string {}", + test.string, + ); + let mut string = String::new(); str_as_chained_cursor(test.string) .read_to_string(&mut string)