From 17b961d780633c3774262187a41425019d78a3db Mon Sep 17 00:00:00 2001 From: zackcam Date: Wed, 4 Dec 2024 01:14:04 +0000 Subject: [PATCH] Updating defrag and tests to use cursors and make test more robust by getting hits Signed-off-by: zackcam --- .github/workflows/ci.yml | 2 +- Cargo.toml | 2 +- src/bloom/data_type.rs | 13 +- src/bloom/utils.rs | 94 ++++++----- src/wrapper/bloom_callback.rs | 279 +++++++++++++-------------------- src/wrapper/defrag.rs | 41 +++++ src/wrapper/mod.rs | 1 + tests/test_bloom_defrag.py | 199 +++++++++++++++++------ tests/test_bloom_metrics.py | 8 +- tests/test_save_and_restore.py | 13 +- 10 files changed, 365 insertions(+), 287 deletions(-) create mode 100644 src/wrapper/defrag.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a73862c..da72f7a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -100,4 +100,4 @@ jobs: - name: Update module path run: echo "MODULE_PATH=$(realpath target/release/libvalkey_bloom.so)" >> $GITHUB_ENV - name: Run integration tests - run: python -m pytest --cache-clear -v "tests/" + run: python -m pytest --cache-clear -v "tests/" -m "not skip_for_asan" diff --git a/Cargo.toml b/Cargo.toml index abe8e79..431cd18 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,7 @@ homepage = "https://github.com/valkey-io/valkey-bloom" valkey-module = "0.1.2" valkey-module-macros = "0" linkme = "0" -bloomfilter = { path = "../rust-bloom-filter", features = ["serde"] } +bloomfilter = { version = "3.0.1", features = ["serde"] } lazy_static = "1.4.0" bit-vec = "0.8.0" libc = "0.2" diff --git a/src/bloom/data_type.rs b/src/bloom/data_type.rs index c0693d7..1d33dfa 100644 --- a/src/bloom/data_type.rs +++ b/src/bloom/data_type.rs @@ -104,18 +104,13 @@ impl ValkeyDataType for BloomFilterType { (FIXED_SIP_KEY_ONE_A, FIXED_SIP_KEY_ONE_B), (FIXED_SIP_KEY_TWO_A, FIXED_SIP_KEY_TWO_B), ]; - let filter = BloomFilter::from_existing( - bitmap.as_ref(), - number_of_bits, - number_of_hash_functions as u32, - sip_keys, - num_items as u32, - capacity as u32, - ); + let filter = + BloomFilter::from_existing(bitmap.as_ref(), num_items as u32, capacity as u32); filters.push(Box::new(filter)); } BLOOM_OBJECT_TOTAL_MEMORY_BYTES.fetch_add( - mem::size_of::(), + mem::size_of::() + + (filters.capacity() * std::mem::size_of::>()), std::sync::atomic::Ordering::Relaxed, ); BLOOM_NUM_OBJECTS.fetch_add(1, std::sync::atomic::Ordering::Relaxed); diff --git a/src/bloom/utils.rs b/src/bloom/utils.rs index 39bde73..3a91bd6 100644 --- a/src/bloom/utils.rs +++ b/src/bloom/utils.rs @@ -6,6 +6,7 @@ use crate::{ metrics, }; use bloomfilter; +use bloomfilter::{deserialize, serialize}; use serde::{Deserialize, Serialize}; use std::{mem, os::raw::c_void, sync::atomic::Ordering}; use valkey_module::{logging, raw}; @@ -86,14 +87,15 @@ impl BloomFilterType { return Err(BloomError::ExceedsMaxBloomSize); } metrics::BLOOM_NUM_OBJECTS.fetch_add(1, Ordering::Relaxed); - metrics::BLOOM_OBJECT_TOTAL_MEMORY_BYTES.fetch_add( - mem::size_of::(), - std::sync::atomic::Ordering::Relaxed, - ); // Create the bloom filter and add to the main BloomFilter object. let bloom: Box = Box::new(BloomFilter::new(fp_rate, capacity)); let filters = vec![bloom]; + metrics::BLOOM_OBJECT_TOTAL_MEMORY_BYTES.fetch_add( + mem::size_of::() + + (filters.capacity() * std::mem::size_of::>()), + std::sync::atomic::Ordering::Relaxed, + ); let bloom = BloomFilterType { expansion, fp_rate, @@ -104,16 +106,17 @@ impl BloomFilterType { /// Create a new BloomFilterType object from an existing one. pub fn create_copy_from(from_bf: &BloomFilterType) -> BloomFilterType { - let mut filters: Vec> = Vec::with_capacity(from_bf.filters.len()); + let mut filters: Vec> = Vec::with_capacity(from_bf.filters.capacity()); metrics::BLOOM_NUM_OBJECTS.fetch_add(1, Ordering::Relaxed); - metrics::BLOOM_OBJECT_TOTAL_MEMORY_BYTES.fetch_add( - mem::size_of::(), - std::sync::atomic::Ordering::Relaxed, - ); for filter in &from_bf.filters { let new_filter = Box::new(BloomFilter::create_copy_from(filter)); filters.push(new_filter); } + metrics::BLOOM_OBJECT_TOTAL_MEMORY_BYTES.fetch_add( + mem::size_of::() + + (filters.capacity() * std::mem::size_of::>()), + std::sync::atomic::Ordering::Relaxed, + ); BloomFilterType { expansion: from_bf.expansion, fp_rate: from_bf.fp_rate, @@ -208,10 +211,19 @@ impl BloomFilterType { return Err(BloomError::ExceedsMaxBloomSize); } let mut new_filter = Box::new(BloomFilter::new(new_fp_rate, new_capacity)); + let capacity_before = self.filters.capacity(); // Add item. new_filter.set(item); new_filter.num_items += 1; self.filters.push(new_filter); + // If we went over capacity and scaled the vec out we need to update the memory usage by the new capacity + if capacity_before != self.filters.capacity() { + metrics::BLOOM_OBJECT_TOTAL_MEMORY_BYTES.fetch_add( + (self.filters.capacity() - capacity_before) + * std::mem::size_of::>(), + std::sync::atomic::Ordering::Relaxed, + ); + } metrics::BLOOM_NUM_ITEMS_ACROSS_OBJECTS .fetch_add(1, std::sync::atomic::Ordering::Relaxed); @@ -286,7 +298,8 @@ impl BloomFilterType { // add bloom filter type metrics. metrics::BLOOM_NUM_OBJECTS.fetch_add(1, std::sync::atomic::Ordering::Relaxed); metrics::BLOOM_OBJECT_TOTAL_MEMORY_BYTES.fetch_add( - mem::size_of::(), + mem::size_of::() + + (item.filters.capacity() * std::mem::size_of::>()), std::sync::atomic::Ordering::Relaxed, ); // add bloom filter metrics. @@ -321,11 +334,25 @@ impl BloomFilterType { /// well within the u32::MAX limit. #[derive(Serialize, Deserialize)] pub struct BloomFilter { + #[serde( + serialize_with = "serialize", + deserialize_with = "deserialize_boxed_bloom" + )] pub bloom: Box>, pub num_items: u32, pub capacity: u32, } +use bloomfilter::Bloom; +use serde::Deserializer; + +pub fn deserialize_boxed_bloom<'de, D>(deserializer: D) -> Result>, D::Error> +where + D: Deserializer<'de>, +{ + deserialize(deserializer).map(Box::new) +} + impl BloomFilter { /// Instantiate empty BloomFilter object. pub fn new(fp_rate: f64, capacity: u32) -> BloomFilter { @@ -333,7 +360,8 @@ impl BloomFilter { capacity as usize, fp_rate, &configs::FIXED_SEED, - ); + ) + .unwrap(); let fltr = BloomFilter { bloom: Box::new(bloom), num_items: 0, @@ -349,20 +377,8 @@ impl BloomFilter { } /// Create a new BloomFilter from dumped information (RDB load). - pub fn from_existing( - bitmap: &[u8], - number_of_bits: u64, - number_of_hash_functions: u32, - sip_keys: [(u64, u64); 2], - num_items: u32, - capacity: u32, - ) -> BloomFilter { - let bloom = bloomfilter::Bloom::from_existing( - bitmap, - number_of_bits, - number_of_hash_functions, - sip_keys, - ); + pub fn from_existing(bitmap: &[u8], num_items: u32, capacity: u32) -> BloomFilter { + let bloom = bloomfilter::Bloom::from_slice(bitmap).unwrap(); let fltr = BloomFilter { bloom: Box::new(bloom), num_items, @@ -382,7 +398,7 @@ impl BloomFilter { pub fn number_of_bytes(&self) -> usize { std::mem::size_of::() + std::mem::size_of::>() - + (self.bloom.number_of_bits() / 8) as usize + + (self.bloom.len() / 8) as usize } /// Caculates the number of bytes that the bloom filter will require to be allocated. @@ -418,21 +434,15 @@ impl BloomFilter { /// Create a new BloomFilter from an existing BloomFilter object (COPY command). pub fn create_copy_from(bf: &BloomFilter) -> BloomFilter { - BloomFilter::from_existing( - &bf.bloom.bitmap(), - bf.bloom.number_of_bits(), - bf.bloom.number_of_hash_functions(), - bf.bloom.sip_keys(), - bf.num_items, - bf.capacity, - ) + BloomFilter::from_existing(bf.bloom.as_slice(), bf.num_items, bf.capacity) } } impl Drop for BloomFilterType { fn drop(&mut self) { metrics::BLOOM_OBJECT_TOTAL_MEMORY_BYTES.fetch_sub( - std::mem::size_of::(), + std::mem::size_of::() + + (self.filters.capacity() * std::mem::size_of::>()), std::sync::atomic::Ordering::Relaxed, ); metrics::BLOOM_NUM_OBJECTS.fetch_sub(1, Ordering::Relaxed); @@ -457,6 +467,7 @@ mod tests { use crate::configs::{ FIXED_SIP_KEY_ONE_A, FIXED_SIP_KEY_ONE_B, FIXED_SIP_KEY_TWO_A, FIXED_SIP_KEY_TWO_B, }; + use configs::FIXED_SEED; use rand::{distributions::Alphanumeric, Rng}; /// Returns random string with specified number of characters. @@ -573,8 +584,8 @@ mod tests { .filters .iter() .any( - |filter| (filter.bloom.sip_keys() == restore_filter.bloom.sip_keys()) - && (restore_filter.bloom.sip_keys() == expected_sip_keys) + |filter| (filter.bloom.seed() == restore_filter.bloom.seed()) + && (restore_filter.bloom.seed() == FIXED_SEED) ))); assert!(restored_bloom_filter_type .filters @@ -590,7 +601,7 @@ mod tests { .all(|restore_filter| original_bloom_filter_type .filters .iter() - .any(|filter| filter.bloom.bitmap() == restore_filter.bloom.bitmap()))); + .any(|filter| filter.bloom.as_slice() == restore_filter.bloom.as_slice()))); let (error_count, _) = check_items_exist( restored_bloom_filter_type, 1, @@ -739,11 +750,8 @@ mod tests { fn test_sip_keys() { // The value of sip keys generated by the sip_keys with fixed seed should be equal to the constant in configs.rs let test_bloom_filter = BloomFilter::new(0.5_f64, 1000_u32); - let test_sip_keys = test_bloom_filter.bloom.sip_keys(); - assert_eq!(test_sip_keys[0].0, FIXED_SIP_KEY_ONE_A); - assert_eq!(test_sip_keys[0].1, FIXED_SIP_KEY_ONE_B); - assert_eq!(test_sip_keys[1].0, FIXED_SIP_KEY_TWO_A); - assert_eq!(test_sip_keys[1].1, FIXED_SIP_KEY_TWO_B); + let test_sip_keys = test_bloom_filter.bloom.seed(); + assert!(test_sip_keys == FIXED_SEED); } #[test] diff --git a/src/wrapper/bloom_callback.rs b/src/wrapper/bloom_callback.rs index 480774b..06236da 100644 --- a/src/wrapper/bloom_callback.rs +++ b/src/wrapper/bloom_callback.rs @@ -3,13 +3,11 @@ use crate::bloom::data_type::ValkeyDataType; use crate::bloom::utils::BloomFilter; use crate::bloom::utils::BloomFilterType; use crate::configs; -use bit_vec::BitVec; use bloomfilter::Bloom; use lazy_static::lazy_static; use std::ffi::CString; use std::mem; use std::os::raw::{c_char, c_int, c_void}; -use std::ptr; use std::ptr::null_mut; use std::sync::atomic::Ordering; use std::sync::Mutex; @@ -18,6 +16,8 @@ use valkey_module::logging::{log_io_error, ValkeyLogLevel}; use valkey_module::raw; use valkey_module::{RedisModuleDefragCtx, RedisModuleString}; +use super::defrag::Defrag; + // Note: methods in this mod are for the bloom module data type callbacks. // The reason they are unsafe is because the callback methods are expected to be // "unsafe extern C" based on the Rust module API definition @@ -32,13 +32,13 @@ pub unsafe extern "C" fn bloom_rdb_save(rdb: *mut raw::RedisModuleIO, value: *mu let mut filter_list_iter = filter_list.iter().peekable(); while let Some(filter) = filter_list_iter.next() { let bloom = &filter.bloom; - let bitmap = bloom.bitmap(); + let bitmap = bloom.as_slice(); raw::RedisModule_SaveStringBuffer.unwrap()( rdb, bitmap.as_ptr().cast::(), bitmap.len(), ); - raw::save_unsigned(rdb, bloom.number_of_bits()); + raw::save_unsigned(rdb, bloom.len()); raw::save_unsigned(rdb, bloom.number_of_hash_functions() as u64); raw::save_unsigned(rdb, filter.capacity as u64); if filter_list_iter.peek().is_none() { @@ -136,142 +136,96 @@ pub unsafe extern "C" fn bloom_free_effort( curr_item.free_effort() } -// /// # Safety -// /// Raw handler for the Bloom object's defrag callback. -// pub unsafe extern "C" fn bloom_defrag( -// _defrag_ctx: *mut RedisModuleDefragCtx, -// _from_key: *mut RedisModuleString, -// value: *mut *mut c_void, -// ) -> i32 { -// if !configs::BLOOM_DEFRAG.load(Ordering::Relaxed) { -// return 0; -// } -// let bloom_filter_type: &mut BloomFilterType = &mut *(*value).cast::(); - -// let num_filts = bloom_filter_type.filters.len(); - -// for _ in 0..num_filts { -// let bloom_filter_box = bloom_filter_type.filters.remove(0); -// let bloom_filter = Box::into_raw(bloom_filter_box); -// logging::log_warning(format!("Before Address: {:p}", bloom_filter)); -// let defrag_result = unsafe { -// raw::RedisModule_DefragAlloc.unwrap()( -// core::ptr::null_mut(), -// bloom_filter as *mut c_void, -// ) -// }; -// let mut defragged_filter = { -// if !defrag_result.is_null() { -// Box::from_raw(defrag_result as *mut BloomFilter) -// } else { -// Box::from_raw(bloom_filter) -// } -// }; -// logging::log_warning(format!("After Address: {:p}", defragged_filter)); -// // let test = Box::leak(defragged_filter.bloom); -// // let tes = Box::into_raw(test); -// // let inner_bloom = mem::replace( -// // &mut defragged_filter.bloom, -// // Box::new(bloomfilter::Bloom::new(1, 1)), -// // ); -// // let inner_bloom = mem::replace( -// // &mut defragged_filter.bloom, -// // Box::from_raw(ptr::null::>() as *mut bloomfilter::Bloom<[u8]>), -// // ); -// let inner_bloom = mem::take(&mut defragged_filter.bloom); -// let inner_bloom_ptr = Box::into_raw(inner_bloom); -// logging::log_warning(format!("Before bloom Address: {:p}", inner_bloom_ptr)); -// let defragged_inner_bloom = raw::RedisModule_DefragAlloc.unwrap()( -// core::ptr::null_mut(), -// inner_bloom_ptr as *mut c_void, -// ); -// defragged_filter.bloom = { -// if !defrag_result.is_null() { -// Box::from_raw(defragged_inner_bloom as *mut bloomfilter::Bloom<[u8]>) -// } else { -// Box::from_raw(inner_bloom_ptr) -// } -// }; -// logging::log_warning(format!("After bloom Address: {:p}", defragged_filter.bloom)); -// bloom_filter_type.filters.push(defragged_filter); -// } -// let val = unsafe { raw::RedisModule_DefragAlloc.unwrap()(core::ptr::null_mut(), *value) }; -// if !val.is_null() { -// *value = val; -// } -// 0 -// } - +/// Lazy static for a default temporary bloom that gets swapped during defrag. lazy_static! { static ref DEFRAG_BLOOM_FILTER: Mutex>>> = - Mutex::new(Some(Box::new(Bloom::<[u8]>::new(1, 1)))); - static ref DEFRAG_VEC: Mutex>> = Mutex::new(Some(Vec::new())); + Mutex::new(Some(Box::new(Bloom::<[u8]>::new(1, 1).unwrap()))); } -fn external_vec_defrag(mut vec: Vec) -> Vec { - let clonev = vec.clone(); +/// Defragments a vector of bytes. This function is designed to be used as a callback. +/// +/// This function takes ownership of a `Vec`, attempts to defragment it using an external +/// defragmentation mechanism, and returns a new `Vec` that may have been defragmented. +/// +/// # Arguments +/// +/// * `vec` - A `Vec` to be defragmented. +/// +/// # Returns +/// +/// Returns a new `Vec` that may have been defragmented. If defragmentation was successful, +/// the returned vector will use the newly allocated memory. If defragmentation failed or was +/// not necessary, the original vector's memory will be used. +fn external_vec_defrag(mut vec: Vec) -> Vec { + let defrag = Defrag::new(core::ptr::null_mut()); let len = vec.len(); let capacity = vec.capacity(); - // let ptr: *mut u32 = vec.as_mut_ptr(); let vec_ptr = Box::into_raw(vec.into_boxed_slice()) as *mut c_void; - logging::log_warning(format!("Before vec_ptr start Address: {:p}", vec_ptr)); - - let defragged_filters_ptr = - unsafe { raw::RedisModule_DefragAlloc.unwrap()(core::ptr::null_mut(), vec_ptr) }; - logging::log_warning(format!( - "After hmmm vec Address: {:p}", - defragged_filters_ptr - )); + let defragged_filters_ptr = unsafe { defrag.alloc(vec_ptr) }; if !defragged_filters_ptr.is_null() { - unsafe { Vec::from_raw_parts(defragged_filters_ptr as *mut u32, len, capacity) } + unsafe { Vec::from_raw_parts(defragged_filters_ptr as *mut u8, len, capacity) } } else { - unsafe { Vec::from_raw_parts(vec_ptr as *mut u32, len, capacity) } + unsafe { Vec::from_raw_parts(vec_ptr as *mut u8, len, capacity) } } - // unsafe { Vec::from_raw_parts(defragged_filters_ptr as *mut u32, len, capacity) } -} - -fn external_bitvec_defrag(bit_vec: BitVec) -> BitVec { - // let ptr: *mut BitVec = Box::into_raw(Box::new(bit_vec)); - // logging::log_warning(format!("Before bloom bit_vec Address: {:p}", ptr)); - // let defrag_result = - // unsafe { raw::RedisModule_DefragAlloc.unwrap()(core::ptr::null_mut(), ptr as *mut c_void) }; - // let mut defragged_filter = unsafe { Box::from_raw(defrag_result as *mut BitVec) }; - // logging::log_warning(format!("After bloom bit_vec Address: {:p}", defragged_filter)); - // *defragged_filter - bit_vec } /// # Safety /// Raw handler for the Bloom object's defrag callback. +/// +/// There are a few different structures we will be defragging we will explain them top down then afterwards state the order in which +/// we will defrag. Starting from the top which is passed in as the variable named value. We have the BloomFilterType this BloomFilterType +/// contains a vec of BloomFilters. These BloomFilters then each have a Bloom object. Finally each of these Bloom objects have a Vec. +/// +/// This order of defragmention is as follows (1 to 3 is in a loop for the number of filters): +/// 1. BloomFilter within the BloomFilterType +/// 2. Bloom objects within each BloomFilter +/// 3. Vec within each Bloom object +/// 4. Vec of BloomFilters in the BloomFilterType +/// 5. The BloomFilterType itself +/// +/// We use a cursor to track the current filter of BloomFilterType that we are defragging. This cursor will start at 0 +/// if we finished all the filters the last time we defragged this object or if we havent defragged it before. We will determine +/// that we have spent to much time on defragging this specific object from the should_stop_defrag() method. If we didn't defrag +/// all the filters then we set the cursor so we know where to start from the next time we defrag and return a 1 to show we didn't +/// finish. +/// +/// # Arguments +/// +/// * `defrag_ctx` - A raw pointer to the defragmentation context. +/// * `_from_key` - A raw pointer to the Redis module string (unused in this function). +/// * `value` - A mutable raw pointer to a raw pointer representing the BloomFilterType to be defragmented. +/// +/// # Returns +/// +/// Returns an `i32` where: +/// * 0 indicates successful complete defragmentation. +/// * 1 indicates incomplete defragmentation (not all filters were defragged). pub unsafe extern "C" fn bloom_defrag( - _defrag_ctx: *mut RedisModuleDefragCtx, + defrag_ctx: *mut RedisModuleDefragCtx, _from_key: *mut RedisModuleString, value: *mut *mut c_void, ) -> i32 { - // logging::log_warning(format!("After here 0")); + // If defrag is disabled we will just exit straight away + if !configs::BLOOM_DEFRAG.load(Ordering::Relaxed) { + return 0; + } - let bloom_filter_type: &mut BloomFilterType = &mut *(*value).cast::(); + // Get the cursor for the BloomFilterType otherwise start the cursor at 0 + let mut cursor: u64 = 0; + let defrag = Defrag::new(defrag_ctx); + defrag.curserget(&mut cursor); - let num_filts = bloom_filter_type.filters.len(); + // Convert pointer to BloomFilterType so we can operate on it. + let bloom_filter_type: &mut BloomFilterType = &mut *(*value).cast::(); - logging::log_warning(format!( - "defrag in box Address: {:p}", - bloom_filter_type.filters.as_ptr() - )); + let num_filters = bloom_filter_type.filters.len(); - for _ in 0..num_filts { - let bloom_filter_box = bloom_filter_type.filters.remove(0); + // While we are within a timeframe decided from should_stop_defrag and not over the number of filters defrag the next filter + while defrag.should_stop_defrag() == 0 && cursor < num_filters.try_into().unwrap() { + // Remove the current filter, unbox it, and attempt to defragment. + let bloom_filter_box = bloom_filter_type.filters.remove(cursor.try_into().unwrap()); let bloom_filter = Box::into_raw(bloom_filter_box); - - let defrag_result = unsafe { - raw::RedisModule_DefragAlloc.unwrap()( - core::ptr::null_mut(), - bloom_filter as *mut c_void, - ) - }; - - logging::log_warning(format!("Before Vec start Address: {:p}", defrag_result)); - + let defrag_result = defrag.alloc(bloom_filter as *mut c_void); let mut defragged_filter = { if !defrag_result.is_null() { Box::from_raw(defrag_result as *mut BloomFilter) @@ -279,80 +233,69 @@ pub unsafe extern "C" fn bloom_defrag( Box::from_raw(bloom_filter) } }; - let mut defrag_b = DEFRAG_BLOOM_FILTER.lock().unwrap(); + // Swap the Bloom object with a temporary one for defragmentation + let mut temporary_bloom = DEFRAG_BLOOM_FILTER.lock().unwrap(); let inner_bloom = mem::replace( &mut defragged_filter.bloom, - defrag_b.take().expect("We expect default to exist"), + temporary_bloom.take().expect("We expect default to exist"), ); + // Convert the inner_bloom into the correct type and then try to defragment it let inner_bloom_ptr = Box::into_raw(inner_bloom); - let defragged_inner_bloom = raw::RedisModule_DefragAlloc.unwrap()( - core::ptr::null_mut(), - inner_bloom_ptr as *mut c_void, - ); - logging::log_warning(format!("defrag in box Address: {:p}", defragged_filter)); + let defragged_inner_bloom = defrag.alloc(inner_bloom_ptr as *mut c_void); + // Defragment the Vec within the Bloom object using the external callback if !defragged_inner_bloom.is_null() { - let inner_bloom = mem::replace( - &mut defragged_filter.bloom, - Box::from_raw(defragged_inner_bloom as *mut bloomfilter::Bloom<[u8]>), - ); - *defrag_b = Some(inner_bloom); // Resetting the original static + let mut inner_bloom = + unsafe { Box::from_raw(defragged_inner_bloom as *mut bloomfilter::Bloom<[u8]>) }; + let external_bloom = + inner_bloom.realloc_large_heap_allocated_objects(external_vec_defrag); + let old_bloom = mem::replace(&mut defragged_filter.bloom, Box::new(external_bloom)); + *temporary_bloom = Some(old_bloom); // Reset the original static } else { - let inner_bloom = - mem::replace(&mut defragged_filter.bloom, Box::from_raw(inner_bloom_ptr)); - *defrag_b = Some(inner_bloom); // Resetting the original static + let mut inner_bloom = unsafe { Box::from_raw(inner_bloom_ptr) }; + let external_bloom = + inner_bloom.realloc_large_heap_allocated_objects(external_vec_defrag); + let old_bloom = mem::replace(&mut defragged_filter.bloom, Box::new(external_bloom)); + *temporary_bloom = Some(old_bloom); // Reset the original static } - // let inner_bloom = mem::replace( - // &mut defragged_filter.bloom, - // Box::from_raw(defragged_inner_bloom as *mut bloomfilter::Bloom<[u8]>), - // ); - // *defrag_b = Some(inner_bloom); // Resetting the original static - - // logging::log_warning(format!("1bloom filter len: {}", bloom_filter_type.filters.len())); - // let mut defrag_v = DEFRAG_VEC.lock().unwrap(); - // let placeholder = defrag_v.take().unwrap(); - // defragged_filter - // .bloom - // .defrag_no(external_bitvec_defrag, external_vec_defrag); - // // *defrag_v = Some(newplaceholder); // Resetting the original static - // logging::log_warning(format!("After bloom Address: {:p}", defragged_filter.bloom)); - - // logging::log_warning(format!("2bloom filter len: {}", bloom_filter_type.filters.len())); - defragged_filter - .bloom - .defrag_no(external_bitvec_defrag, external_vec_defrag); - bloom_filter_type.filters.push(defragged_filter); + // Reinsert the defragmented filter and increment the cursor + bloom_filter_type + .filters + .insert(cursor.try_into().unwrap(), defragged_filter); + cursor += 1; } + // Save the cursor for where we will start defragmenting from next time + defrag.curserset(cursor); + // If not all filters were looked at, return 1 to indicate incomplete defragmentation + if cursor < (num_filters).try_into().unwrap() { + return 1; + } + // Defragment the Vec of filters itself let filters_vec = mem::take(&mut bloom_filter_type.filters); let filters_ptr = Box::into_raw(filters_vec.into_boxed_slice()) as *mut c_void; - // logging::log_warning(format!("Before Vec start Address: {:p}", filters_ptr)); - - let defragged_filters_ptr = - unsafe { raw::RedisModule_DefragAlloc.unwrap()(core::ptr::null_mut(), filters_ptr) }; - logging::log_warning(format!( - "After Vec start Address: {:p} \n\n\n", - defragged_filters_ptr - )); + let defragged_filters_ptr = defrag.alloc(filters_ptr); if !defragged_filters_ptr.is_null() { bloom_filter_type.filters = unsafe { Vec::from_raw_parts( defragged_filters_ptr as *mut Box, - num_filts, - num_filts, + num_filters, + num_filters, ) }; } else { bloom_filter_type.filters = unsafe { - Vec::from_raw_parts(filters_ptr as *mut Box, num_filts, num_filts) + Vec::from_raw_parts( + filters_ptr as *mut Box, + num_filters, + num_filters, + ) }; } - // logging::log_warning(format!("After here last")); - - let val = unsafe { raw::RedisModule_DefragAlloc.unwrap()(core::ptr::null_mut(), *value) }; + // Finally, attempt to defragment the BloomFilterType itself + let val = defrag.alloc(*value); if !val.is_null() { *value = val; } - logging::log_warning("After here super last"); - + // Return 0 to indicate successful complete defragmentation 0 } diff --git a/src/wrapper/defrag.rs b/src/wrapper/defrag.rs new file mode 100644 index 0000000..6f65ea4 --- /dev/null +++ b/src/wrapper/defrag.rs @@ -0,0 +1,41 @@ +use std::os::raw::{c_ulong, c_void}; + +use valkey_module::raw; + +pub struct Defrag { + pub defrag_ctx: *mut raw::RedisModuleDefragCtx, +} + +impl Defrag { + pub const fn new(defrag_ctx: *mut raw::RedisModuleDefragCtx) -> Self { + Self { defrag_ctx } + } + + /// # Safety + /// + /// This function should not be called before the horsemen are ready. + pub unsafe fn alloc(&self, ptr: *mut c_void) -> *mut c_void { + unsafe { raw::RedisModule_DefragAlloc.unwrap()(self.defrag_ctx, ptr) } + } + + /// # Safety + /// + /// This function should not be called before the horsemen are ready. + pub unsafe fn curserset(&self, cursor: u64) -> i32 { + unsafe { raw::RedisModule_DefragCursorSet.unwrap()(self.defrag_ctx, cursor) } + } + + /// # Safety + /// + /// This function should not be called before the horsemen are ready. + pub unsafe fn curserget(&self, cursor: *mut u64) -> i32 { + unsafe { raw::RedisModule_DefragCursorGet.unwrap()(self.defrag_ctx, cursor) } + } + + /// # Safety + /// + /// This function should not be called before the horsemen are ready. + pub unsafe fn should_stop_defrag(&self) -> i32 { + unsafe { raw::RedisModule_DefragShouldStop.unwrap()(self.defrag_ctx) } + } +} diff --git a/src/wrapper/mod.rs b/src/wrapper/mod.rs index 09023e2..d198382 100644 --- a/src/wrapper/mod.rs +++ b/src/wrapper/mod.rs @@ -1 +1,2 @@ pub mod bloom_callback; +pub mod defrag; diff --git a/tests/test_bloom_defrag.py b/tests/test_bloom_defrag.py index 358448b..62728f0 100644 --- a/tests/test_bloom_defrag.py +++ b/tests/test_bloom_defrag.py @@ -2,75 +2,176 @@ from valkeytests.valkey_test_case import ValkeyAction from valkey_bloom_test_case import ValkeyBloomTestCaseBase from valkeytests.conftest import resource_port_tracker +from util.waiters import * +import pytest +@pytest.mark.skip_for_asan(reason="These tests are skipped due to not being able to set activedefrag to yes when valkey server is an ASAN build") class TestBloomDefrag(ValkeyBloomTestCaseBase): - def get_custom_args(self): - args = super().get_custom_args() - # args.update({'activedefrag': 'yes'}) - - args.update({'activedefrag': 'yes', 'active-defrag-threshold-lower': '0', 'active-defrag-ignore-bytes': '1'}) - return args - - def test_bloom_defrag(self): - stats = self.parse_valkey_stats() - defrag_hits = int(stats.get('active_defrag_hits', 0)) - defrag_misses = int(stats.get('active_defrag_misses', 0)) - assert defrag_hits == 0 - assert defrag_misses == 0 - mem_info = self.client.execute_command('INFO MEMORY ') - print(mem_info) - print("\n\n\n\n") - - self.client.execute_command(command) + def test_bloom_defrag(self): + # Set defragmentation thresholds + self.client.config_set('activedefrag', 'no') + self.client.config_set('active-defrag-ignore-bytes', '1') + self.client.config_set('active-defrag-threshold-lower', '2') + + # Set a lower maxmemory + max_memory = 20 * 1024 * 1024 + self.client.config_set('maxmemory', str(max_memory)) - # setting max_memory through config set + # Initial stats + stats = self.parse_valkey_info("STATS") + initial_defrag_hits = int(stats.get('active_defrag_hits', 0)) + initial_defrag_misses = int(stats.get('active_defrag_misses', 0)) - # defrag_misses = self.client.execute_command('INFO STATS')["active_defrag_misses"] - # assert defrag_misses == 0 - scale_names = [f'scale_{i}' for i in range(1, 1000)] - # Loop through the scale names and execute the command + # Create fragmentation by inserting and deleting data + scale_names = [f'scale_{i}' for i in range(1, 2000)] + # Insert data for scale in scale_names: - command = f'bf.insert {scale} CAPACITY 1 EXPANSION 1 ITEMS 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17' + command = f'bf.insert {scale} CAPACITY 1 EXPANSION 2 ITEMS ' + ' '.join(str(i) for i in range(1, 100)) self.client.execute_command(command) - time.sleep(1) - mem_info = self.client.execute_command('INFO MEMORY ') - stats = self.parse_valkey_stats() - defrag_hits = int(stats.get('active_defrag_hits', 0)) - defrag_misses = int(stats.get('active_defrag_misses', 0)) - print(f"Active defrag hits: {defrag_hits}") - print(f"Active defrag misses: {defrag_misses}") - print(mem_info) + # Delete every other item to create fragmentation + for scale in scale_names[::2]: + self.client.execute_command(f'DEL {scale}') + # Add a wait due to lazy delete where if we call info to early we wont get the correct memory info + time.sleep(5) + # Will probably want to check a field in this instead of printing once we understand more + print("Memory info after insertions and deletions:") + print(self.client.execute_command('INFO MEMORY')) + memory_info_non_defragged = self.parse_valkey_info("MEMORY") - # unlink_count = int(len(scale_names) * 0.8) + # Enable defragmentation and defrag items. + self.client.config_set('activedefrag', 'yes') + self.wait_for_defrag(initial_defrag_hits, initial_defrag_misses) - # for scale in scale_names[:unlink_count]: - # self.client.execute_command(f'DEL {scale}') + # Will probably want to check a field in this instead of printing once we understand more + print("Memory info after first defragmentation:") + memory_info_after_defrag = self.parse_valkey_info("MEMORY") + first_defrag_stats = self.parse_valkey_info("STATS") + first_defrag_hits = int(first_defrag_stats.get('active_defrag_hits', 0)) + first_defrag_misses = int(first_defrag_stats.get('active_defrag_misses', 0)) - # mem_info = self.client.execute_command('INFO MEMORY ') - # print("\n\n\n\n") + # Assertion we got hits and misses when defragging + assert first_defrag_hits > initial_defrag_hits and first_defrag_misses > initial_defrag_misses + assert float(memory_info_after_defrag.get('allocator_frag_ratio', 0)) < float(memory_info_non_defragged.get('allocator_frag_ratio', 0)) + # Check that items we added still exist in the respective bloom objects + counter = 1 + for scale in scale_names[1::2]: + command = f'bf.exists {scale} {str(counter)}' + assert self.client.execute_command(command) == 1 + counter += 1 + if counter >= 100: + counter = 1 - # print(mem_info) - assert defrag_hits == 0 - assert defrag_misses == 0 + self.client.bgsave() + self.server.wait_for_save_done() + self.server.restart(remove_rdb=False, remove_nodes_conf=False, connect_client=True) + assert self.server.is_alive() + wait_for_equal(lambda: self.server.is_rdb_done_loading(), True) - def parse_valkey_stats(self): - mem_info = self.client.execute_command('INFO STATS \n\n\n') + # Set config as we had before saving and restarting + self.client.config_set('activedefrag', 'yes') + self.client.config_set('active-defrag-ignore-bytes', '1') + self.client.config_set('active-defrag-threshold-lower', '2') + self.client.config_set('maxmemory', str(max_memory)) - # Split the string into lines - lines = mem_info.decode('utf-8').split('\r\n') - - # Create a dictionary to store the key-value pairs + # Defrag items again and make sure no crashes happen + self.wait_for_defrag(first_defrag_hits, first_defrag_misses) + + final_stats = self.parse_valkey_info("STATS") + final_defrag_hits = int(final_stats.get('active_defrag_hits', 0)) + final_defrag_misses = int(final_stats.get('active_defrag_misses', 0)) + assert final_defrag_hits > initial_defrag_hits or final_defrag_misses > initial_defrag_misses, "No defragmentation occurred after RDB load" + # Check that items we added still exist in the respective bloom objects + counter = 1 + for scale in scale_names[1::2]: + command = f'bf.exists {scale} {str(counter)}' + assert self.client.execute_command(command) == 1 + counter += 1 + if counter >= 100: + counter = 1 + + def parse_valkey_info(self, section): + mem_info = self.client.execute_command('INFO ' + section) + print (mem_info) + lines = mem_info.decode('utf-8').split('\r\n') stats_dict = {} - - # Parse each line for line in lines: if ':' in line: key, value = line.split(':', 1) stats_dict[key.strip()] = value.strip() + return stats_dict + + def wait_for_defrag(self, initial_hits, initial_misses): + max_wait_time = 20 + start_time = time.time() + + while time.time() - start_time < max_wait_time: + time.sleep(10) + stats = self.parse_valkey_info("STATS") + defrag_hits = int(stats.get('active_defrag_hits', 0)) + defrag_misses = int(stats.get('active_defrag_misses', 0)) + + if defrag_hits > initial_hits or defrag_misses > initial_misses: + return + + + def test_bloom_defrag_non_scale(self): + # Set defragmentation thresholds + self.client.config_set('activedefrag', 'no') + self.client.config_set('active-defrag-ignore-bytes', '1') + self.client.config_set('active-defrag-threshold-lower', '2') - return stats_dict \ No newline at end of file + # Set a lower maxmemory + max_memory = 20 * 1024 * 1024 + self.client.config_set('maxmemory', str(max_memory)) + + # Initial stats + stats = self.parse_valkey_info("STATS") + initial_defrag_hits = int(stats.get('active_defrag_hits', 0)) + initial_defrag_misses = int(stats.get('active_defrag_misses', 0)) + + + # Create fragmentation by inserting and deleting data + scale_names = [f'scale_{i}' for i in range(1, 2000)] + # Insert data + for scale in scale_names: + command = f'bf.insert {scale} CAPACITY 200 EXPANSION 2 ITEMS ' + ' '.join(str(i) for i in range(1, 100)) + self.client.execute_command(command) + + # Delete every other item to create fragmentation + for scale in scale_names[::2]: + self.client.execute_command(f'DEL {scale}') + # Add a wait due to lazy delete where if we call info to early we wont get the correct memory info + time.sleep(5) + # Will probably want to check a field in this instead of printing once we understand more + print("Memory info after insertions and deletions:") + print(self.client.execute_command('INFO MEMORY')) + memory_info_non_defragged = self.parse_valkey_info("MEMORY") + + # Enable defragmentation and defrag items. + self.client.config_set('activedefrag', 'yes') + self.wait_for_defrag(initial_defrag_hits, initial_defrag_misses) + + # Will probably want to check a field in this instead of printing once we understand more + print("Memory info after first defragmentation:") + memory_info_after_defrag = self.parse_valkey_info("MEMORY") + + first_defrag_stats = self.parse_valkey_info("STATS") + first_defrag_hits = int(first_defrag_stats.get('active_defrag_hits', 0)) + first_defrag_misses = int(first_defrag_stats.get('active_defrag_misses', 0)) + + # Assertion we got hits and misses when defragging + assert first_defrag_hits > initial_defrag_hits and first_defrag_misses > initial_defrag_misses + assert float(memory_info_after_defrag.get('allocator_frag_ratio', 0)) < float(memory_info_non_defragged.get('allocator_frag_ratio', 0)) + # Check that items we added still exist in the respective bloom objects + counter = 1 + for scale in scale_names[1::2]: + command = f'bf.exists {scale} {str(counter)}' + assert self.client.execute_command(command) == 1 + counter += 1 + if counter >= 100: + counter = 1 diff --git a/tests/test_bloom_metrics.py b/tests/test_bloom_metrics.py index 4f7afb1..a5bf5ac 100644 --- a/tests/test_bloom_metrics.py +++ b/tests/test_bloom_metrics.py @@ -135,8 +135,6 @@ def test_save_and_restore_metrics(self): # Compare original and loaded scaled bloomfilter infos new_client = self.server.get_new_client() - restored_info_obj = new_client.execute_command('BF.INFO key1') - for i in range(1, len(original_info_obj), 2): - assert original_info_obj[i] == restored_info_obj[i] - - self.verify_bloom_metrics(new_client.execute_command("INFO bf"), original_info_obj[3] + DEFAULT_BLOOM_FILTER_SIZE, 2, 3, 7501, 21000 + DEFAULT_BLOOM_FILTER_CAPACITY) + # When we scale out in the original it scales the vec by a factor of four. However when we load from an rdb we create an exact sized vec, this means the last + # two 8 byte sized allocations for a vec are no longer in memory so we have 16 less bytes in memory now. Figure out if we want to do this + self.verify_bloom_metrics(new_client.execute_command("INFO bf"), original_info_obj[3] + DEFAULT_BLOOM_FILTER_SIZE - 16, 2, 3, 7501, 21000 + DEFAULT_BLOOM_FILTER_CAPACITY) diff --git a/tests/test_save_and_restore.py b/tests/test_save_and_restore.py index 398c97a..bfdee08 100644 --- a/tests/test_save_and_restore.py +++ b/tests/test_save_and_restore.py @@ -15,12 +15,11 @@ def test_basic_save_and_restore(self): bf_info_result_1 = client.execute_command('BF.INFO testSave') assert(len(bf_info_result_1)) != 0 curr_item_count_1 = client.info_obj().num_keys() - + # Keep the server running for 1 second more to have a larger uptime. + time.sleep(1) # save rdb, restart sever client.bgsave() self.server.wait_for_save_done() - # Keep the server running for 1 second more to have a larger uptime. - time.sleep(1) uptime_in_sec_1 = self.client.info_obj().uptime_in_secs() self.server.restart(remove_rdb=False, remove_nodes_conf=False, connect_client=True) uptime_in_sec_2 = self.client.info_obj().uptime_in_secs() @@ -36,14 +35,6 @@ def test_basic_save_and_restore(self): bf_info_result_2 = client.execute_command('BF.INFO testSave') assert bf_info_result_2 == bf_info_result_1 - def get_custom_args(self): - args = super().get_custom_args() - # args.update({'activedefrag': 'yes'}) - - args.update({'activedefrag': 'yes', 'active-defrag-threshold-lower': '0', 'active-defrag-ignore-bytes': '1'}) - return args - - def test_basic_save_many(self): client = self.server.get_new_client() count = 500