From 35b3585b2e2b8d5f21b437102dc007f8d5fcf6de Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sun, 9 Jun 2024 09:04:49 -0600 Subject: [PATCH] fix crc32 --- core/src/parquet/util/hash_util.rs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/core/src/parquet/util/hash_util.rs b/core/src/parquet/util/hash_util.rs index bf226494b..8a5d903c6 100644 --- a/core/src/parquet/util/hash_util.rs +++ b/core/src/parquet/util/hash_util.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +use byteorder::{LittleEndian, ReadBytesExt}; + fn hash_(data: &[u8], seed: u32) -> u32 { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] unsafe { @@ -106,16 +108,12 @@ unsafe fn crc32_hash(bytes: &[u8], seed: u32) -> u32 { let num_words = num_bytes / u32_num_bytes; num_bytes %= u32_num_bytes; - let bytes_u32: &[u32] = std::slice::from_raw_parts( - &bytes[0..num_words * u32_num_bytes] as *const [u8] as *const u32, - num_words, - ); - let mut offset = 0; let mut hash = seed; - while offset < num_words { - hash = _mm_crc32_u32(hash, bytes_u32[offset]); - offset += 1; + for _ in 0..num_words { + let mut buf = &bytes[offset..offset + 4]; + hash = _mm_crc32_u32(hash, buf.read_u32::().unwrap()); + offset += 4; } offset = num_words * u32_num_bytes;