Skip to content

Commit

Permalink
add some assertions to make code safe
Browse files Browse the repository at this point in the history
  • Loading branch information
andygrove committed Jun 3, 2024
1 parent 33c7fee commit 3964fb0
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 7 deletions.
7 changes: 5 additions & 2 deletions core/src/execution/datafusion/spark_hash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,13 @@ pub(crate) fn spark_compatible_murmur3_hash<T: AsRef<[u8]>>(data: T, seed: u32)
let len = data.len();
let len_aligned = len - len % 4;

if len == 0 {
panic!("cannot hash empty slice");
}

// safety:
// avoid boundary checking in performance critical codes.
// all operations are garenteed to be safe
// all operations are guaranteed to be safe
unsafe {
let mut h1 = hash_bytes_by_int(
std::slice::from_raw_parts(data.get_unchecked(0), len_aligned),
Expand Down Expand Up @@ -690,7 +694,6 @@ mod tests {
}

#[test]
#[ignore] // thread caused non-unwinding panic. aborting.
fn test_str() {
let input = vec![
"hello", "bar", "", "😁", "天地", "a", "ab", "abc", "abcd", "abcde",
Expand Down
6 changes: 3 additions & 3 deletions core/src/execution/shuffle/row.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ impl Default for SparkUnsafeRow {
}

impl SparkUnsafeRow {
fn new(schema: &[DataType]) -> Self {
fn new(schema: &Vec<DataType>) -> Self {
Self {
row_addr: -1,
row_size: -1,
Expand Down Expand Up @@ -1046,7 +1046,7 @@ pub(crate) fn append_columns(
row_sizes_ptr: *mut jint,
row_start: usize,
row_end: usize,
schema: &[DataType],
schema: &Vec<DataType>,
column_idx: usize,
builder: &mut Box<dyn ArrayBuilder>,
prefer_dictionary_ratio: f64,
Expand Down Expand Up @@ -3283,7 +3283,7 @@ pub fn process_sorted_row_partition(
batch_size: usize,
row_addresses_ptr: *mut jlong,
row_sizes_ptr: *mut jint,
schema: &[DataType],
schema: &Vec<DataType>,
output_path: String,
prefer_dictionary_ratio: f64,
checksum_enabled: bool,
Expand Down
2 changes: 1 addition & 1 deletion core/src/execution/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ where
pos += 1;
}
} else {
assert!(pos < self.len(), "rdxsort pos out of range");
unsafe {
ptr::copy_nonoverlapping(
bucket.as_ptr(),
Expand Down Expand Up @@ -193,7 +194,6 @@ mod tests {
}

#[test]
#[ignore] // thread caused non-unwinding panic. aborting.
fn test_rdxsort() {
let mut v = vec![
pack_pointer(1, 0),
Expand Down
3 changes: 2 additions & 1 deletion core/src/parquet/util/hash_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ const MURMUR_R: i32 = 47;
unsafe fn murmur_hash2_64a(data_bytes: &[u8], seed: u64) -> u64 {
let len = data_bytes.len();
let len_64 = (len / 8) * 8;
assert!(len_64 > 0);
assert!(len_64 <= len);
let data_bytes_64 =
std::slice::from_raw_parts(&data_bytes[0..len_64] as *const [u8] as *const u64, len / 8);

Expand Down Expand Up @@ -135,7 +137,6 @@ mod tests {
use super::*;

#[test]
#[ignore] // thread caused non-unwinding panic. aborting.
fn test_murmur2_64a() {
unsafe {
let result = murmur_hash2_64a(b"hello", 123);
Expand Down

0 comments on commit 3964fb0

Please sign in to comment.