-
Notifications
You must be signed in to change notification settings - Fork 9
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Updating defragmentation to defrag both bloomfiltertype and bloomfiler structs #24
Changes from all commits
f5c4cfa
9b59338
05d3e75
5285813
29ae5a3
7de8717
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,9 @@ | ||
use crate::bloom::utils::BloomFilter; | ||
use crate::bloom::utils::BloomFilterType; | ||
use crate::configs; | ||
use crate::metrics::BLOOM_NUM_OBJECTS; | ||
use crate::metrics::BLOOM_OBJECT_TOTAL_MEMORY_BYTES; | ||
use crate::wrapper::bloom_callback; | ||
use crate::wrapper::digest::Digest; | ||
use crate::MODULE_NAME; | ||
use std::mem; | ||
use std::os::raw::c_int; | ||
use valkey_module::native_types::ValkeyType; | ||
use valkey_module::{logging, raw}; | ||
|
@@ -71,14 +68,18 @@ impl ValkeyDataType for BloomFilterType { | |
let Ok(fp_rate) = raw::load_double(rdb) else { | ||
return None; | ||
}; | ||
|
||
let Ok(tightening_ratio) = raw::load_double(rdb) else { | ||
return None; | ||
}; | ||
let mut filters: Vec<BloomFilter> = Vec::with_capacity(num_filters as usize); | ||
let Ok(is_seed_random_u64) = raw::load_unsigned(rdb) else { | ||
return None; | ||
}; | ||
let is_seed_random = is_seed_random_u64 == 1; | ||
// We start off with capacity as 1 to match the same expansion of the vector that would have occurred during bloom | ||
// object creation and scaling as a result of BF.* operations. | ||
let mut filters = Vec::with_capacity(1); | ||
|
||
for i in 0..num_filters { | ||
let Ok(bitmap) = raw::load_string_buffer(rdb) else { | ||
return None; | ||
|
@@ -114,20 +115,17 @@ impl ValkeyDataType for BloomFilterType { | |
logging::log_warning("Failed to restore bloom object: Object in fixed seed mode, but seed does not match FIXED_SEED."); | ||
return None; | ||
} | ||
filters.push(filter); | ||
filters.push(Box::new(filter)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this to avoid re-assigning the ownership of filter variable? I don't understand the reason for it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we put the filter in the box this allows us to take it out later and have ownership which means we can update the pointers address to actually change it. Without putting it into the box we couldn't actually change the addresses associated with it. Documentation for box |
||
} | ||
BLOOM_OBJECT_TOTAL_MEMORY_BYTES.fetch_add( | ||
mem::size_of::<BloomFilterType>(), | ||
std::sync::atomic::Ordering::Relaxed, | ||
); | ||
BLOOM_NUM_OBJECTS.fetch_add(1, std::sync::atomic::Ordering::Relaxed); | ||
|
||
let item = BloomFilterType { | ||
expansion: expansion as u32, | ||
fp_rate, | ||
tightening_ratio, | ||
is_seed_random, | ||
filters, | ||
}; | ||
item.bloom_filter_type_incr_metrics_on_new_create(); | ||
Some(item) | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,5 @@ | ||
use lazy_static::lazy_static; | ||
use std::sync::atomic::AtomicBool; | ||
use std::sync::atomic::AtomicI64; | ||
use std::sync::atomic::{AtomicBool, AtomicI64}; | ||
|
||
/// Configurations | ||
pub const BLOOM_CAPACITY_DEFAULT: i64 = 100000; | ||
|
@@ -17,6 +16,7 @@ pub const BLOOM_FP_RATE_MAX: f64 = 1.0; | |
|
||
pub const BLOOM_USE_RANDOM_SEED_DEFAULT: bool = true; | ||
|
||
pub const BLOOM_DEFRAG_DEAFULT: bool = true; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How will a user/operator change this value? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The user can add this to the start up arguments: |
||
// Max Memory usage allowed per bloom filter within a bloom object (64MB). | ||
// Beyond this threshold, a bloom object is classified as large and is exempt from defrag operations. | ||
// Also, write operations that result in bloom object allocation larger than this size will be rejected. | ||
|
@@ -30,6 +30,7 @@ lazy_static! { | |
pub static ref BLOOM_MEMORY_LIMIT_PER_FILTER: AtomicI64 = | ||
AtomicI64::new(BLOOM_MEMORY_LIMIT_PER_FILTER_DEFAULT); | ||
pub static ref BLOOM_USE_RANDOM_SEED: AtomicBool = AtomicBool::default(); | ||
pub static ref BLOOM_DEFRAG: AtomicBool = AtomicBool::new(BLOOM_DEFRAG_DEAFULT); | ||
} | ||
|
||
/// Constants | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we add a short documentation explaining why we use this -
Vec::with_capacity(1)
here? I think I had made this comment earlier