Skip to content

Commit

Permalink
remove unsafe read_num_bytes_u32 and TODO comments for more unsafe co…
Browse files Browse the repository at this point in the history
…de that needs to be fixed
  • Loading branch information
andygrove committed Jun 9, 2024
1 parent b1ab750 commit 039bafe
Show file tree
Hide file tree
Showing 4 changed files with 6 additions and 26 deletions.
25 changes: 2 additions & 23 deletions core/src/common/bit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,18 +118,6 @@ pub fn read_num_bytes_u64(size: usize, src: &[u8]) -> u64 {
trailing_bits(v, size * 8)
}

/// u32 specific version of read_num_bytes!
/// This is faster than read_num_bytes! because this method avoids buffer copies.
#[inline]
pub fn read_num_bytes_u32(size: usize, src: &[u8]) -> u32 {
debug_assert!(size <= src.len());
if unlikely(src.len() < 4) {
return read_num_bytes!(u32, size, src);
}
let in_ptr = src as *const [u8] as *const u8 as *const u32;
let v = unsafe { in_ptr.read_unaligned() };
trailing_bits(v as u64, size * 8) as u32
}

/// Similar to the `read_num_bytes` but read nums from bytes in big-endian order
/// This is used to read bytes from Java's OutputStream which writes bytes in big-endian
Expand Down Expand Up @@ -795,6 +783,7 @@ impl BitReader {
}

let in_buf = &self.buffer.as_slice()[self.byte_offset..];
// TODO this is unsafe
let mut in_ptr = in_buf as *const [u8] as *const u8 as *const u32;
while total - i >= 32 {
in_ptr = unpack32(in_ptr, dst, num_bits);
Expand Down Expand Up @@ -836,6 +825,7 @@ impl BitReader {

unsafe {
let in_buf = &self.buffer.as_slice()[self.byte_offset..];
// TODO this is unsafe
let mut in_ptr = in_buf as *const [u8] as *const u8 as *const u32;
// FIXME assert!(memory::is_ptr_aligned(in_ptr));
if size_of::<T>() == 4 {
Expand Down Expand Up @@ -1017,17 +1007,6 @@ mod tests {
}
}

#[test]
fn test_read_num_bytes_u32() {
let buffer: Vec<u8> = vec![0, 1, 2, 3];
for size in 0..buffer.len() {
assert_eq!(
read_num_bytes_u32(size, &buffer),
read_num_bytes!(u32, size, &buffer),
);
}
}

#[test]
fn test_ceil() {
assert_eq!(ceil(0, 1), 0);
Expand Down
2 changes: 1 addition & 1 deletion core/src/parquet/mutable_vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ impl ParquetMutableVector {
// We need to update offset buffer for binary.
if Self::is_binary_type(&self.arrow_type) {
let mut offset = self.num_values * 4;
let prev_offset_value = bit::read_num_bytes_u32(4, &self.value_buffer[offset..]);
let prev_offset_value = read_num_bytes!(u32, 4, &self.value_buffer[offset..]);
offset += 4;
(0..n).for_each(|_| {
bit::memcpy_value(&prev_offset_value, 4, &mut self.value_buffer[offset..]);
Expand Down
4 changes: 2 additions & 2 deletions core/src/parquet/read/levels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use parquet::schema::types::ColumnDescPtr;

use super::values::Decoder;
use crate::{
common::bit::{self, read_num_bytes_u32, BitReader},
common::bit::{self, BitReader},
parquet::ParquetMutableVector,
unlikely,
};
Expand Down Expand Up @@ -89,7 +89,7 @@ impl LevelDecoder {
0
} else if self.need_length {
let u32_size = mem::size_of::<u32>();
let data_size = read_num_bytes_u32(u32_size, page_data.as_slice()) as usize;
let data_size = read_num_bytes!(u32, u32_size, page_data.as_slice()) as usize;
self.bit_reader = Some(BitReader::new(page_data.slice(u32_size), data_size));
u32_size + data_size
} else {
Expand Down
1 change: 1 addition & 0 deletions core/src/parquet/read/values.rs
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,7 @@ macro_rules! make_int_variant_impl {

while num - i >= 32 {
unsafe {
// TODO this is unsafe
let in_slice = std::slice::from_raw_parts(in_ptr, 32);

for n in 0..32 {
Expand Down

0 comments on commit 039bafe

Please sign in to comment.