From 3964fb0d14983b54a0faf76d1e4f0e27f76dbba5 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 3 Jun 2024 13:13:49 -0600 Subject: [PATCH] add some assertions to make code safe --- core/src/execution/datafusion/spark_hash.rs | 7 +++++-- core/src/execution/shuffle/row.rs | 6 +++--- core/src/execution/sort.rs | 2 +- core/src/parquet/util/hash_util.rs | 3 ++- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/core/src/execution/datafusion/spark_hash.rs b/core/src/execution/datafusion/spark_hash.rs index 09b748c87..0c87baa32 100644 --- a/core/src/execution/datafusion/spark_hash.rs +++ b/core/src/execution/datafusion/spark_hash.rs @@ -82,9 +82,13 @@ pub(crate) fn spark_compatible_murmur3_hash>(data: T, seed: u32) let len = data.len(); let len_aligned = len - len % 4; + if len == 0 { + panic!("cannot hash empty slice"); + } + // safety: // avoid boundary checking in performance critical codes. - // all operations are garenteed to be safe + // all operations are guaranteed to be safe unsafe { let mut h1 = hash_bytes_by_int( std::slice::from_raw_parts(data.get_unchecked(0), len_aligned), @@ -690,7 +694,6 @@ mod tests { } #[test] - #[ignore] // thread caused non-unwinding panic. aborting. fn test_str() { let input = vec![ "hello", "bar", "", "😁", "天地", "a", "ab", "abc", "abcd", "abcde", diff --git a/core/src/execution/shuffle/row.rs b/core/src/execution/shuffle/row.rs index 2d1312c16..419ef9b4b 100644 --- a/core/src/execution/shuffle/row.rs +++ b/core/src/execution/shuffle/row.rs @@ -207,7 +207,7 @@ impl Default for SparkUnsafeRow { } impl SparkUnsafeRow { - fn new(schema: &[DataType]) -> Self { + fn new(schema: &Vec) -> Self { Self { row_addr: -1, row_size: -1, @@ -1046,7 +1046,7 @@ pub(crate) fn append_columns( row_sizes_ptr: *mut jint, row_start: usize, row_end: usize, - schema: &[DataType], + schema: &Vec, column_idx: usize, builder: &mut Box, prefer_dictionary_ratio: f64, @@ -3283,7 +3283,7 @@ pub fn process_sorted_row_partition( batch_size: usize, row_addresses_ptr: *mut jlong, row_sizes_ptr: *mut jint, - schema: &[DataType], + schema: &Vec, output_path: String, prefer_dictionary_ratio: f64, checksum_enabled: bool, diff --git a/core/src/execution/sort.rs b/core/src/execution/sort.rs index 4f88327be..ab779057c 100644 --- a/core/src/execution/sort.rs +++ b/core/src/execution/sort.rs @@ -159,6 +159,7 @@ where pos += 1; } } else { + assert!(pos < self.len(), "rdxsort pos out of range"); unsafe { ptr::copy_nonoverlapping( bucket.as_ptr(), @@ -193,7 +194,6 @@ mod tests { } #[test] - #[ignore] // thread caused non-unwinding panic. aborting. fn test_rdxsort() { let mut v = vec![ pack_pointer(1, 0), diff --git a/core/src/parquet/util/hash_util.rs b/core/src/parquet/util/hash_util.rs index 0343913f7..29dbd3780 100644 --- a/core/src/parquet/util/hash_util.rs +++ b/core/src/parquet/util/hash_util.rs @@ -45,6 +45,8 @@ const MURMUR_R: i32 = 47; unsafe fn murmur_hash2_64a(data_bytes: &[u8], seed: u64) -> u64 { let len = data_bytes.len(); let len_64 = (len / 8) * 8; + assert!(len_64 > 0); + assert!(len_64 <= len); let data_bytes_64 = std::slice::from_raw_parts(&data_bytes[0..len_64] as *const [u8] as *const u64, len / 8); @@ -135,7 +137,6 @@ mod tests { use super::*; #[test] - #[ignore] // thread caused non-unwinding panic. aborting. fn test_murmur2_64a() { unsafe { let result = murmur_hash2_64a(b"hello", 123);