From 0b70677fd17709f8dc3a6a00f4d73518d9cf9762 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 5 Jul 2024 13:31:26 -0600 Subject: [PATCH 1/8] add miri workflow --- .github/workflows/miri.yml | 47 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 .github/workflows/miri.yml diff --git a/.github/workflows/miri.yml b/.github/workflows/miri.yml new file mode 100644 index 000000000..78a80046b --- /dev/null +++ b/.github/workflows/miri.yml @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Run Miri Safety Checks + +on: + push: + paths-ignore: + - "doc/**" + - "docs/**" + - "**.md" + pull_request: + paths-ignore: + - "doc/**" + - "docs/**" + - "**.md" + # manual trigger + # https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow + workflow_dispatch: + +jobs: + miri: + name: "Miri" + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Install Miri + run: | + rustup toolchain install nightly --component miri + rustup override set nightly + cargo miri setup + - name: Test with Miri + run: MIRIFLAGS="-Zmiri-disable-isolation" cargo miri test From abf91b4814edb4eed7387f3b75000ef44d006c98 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 5 Jul 2024 13:34:37 -0600 Subject: [PATCH 2/8] fix --- .github/workflows/miri.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/miri.yml b/.github/workflows/miri.yml index 78a80046b..384a672b7 100644 --- a/.github/workflows/miri.yml +++ b/.github/workflows/miri.yml @@ -44,4 +44,6 @@ jobs: rustup override set nightly cargo miri setup - name: Test with Miri - run: MIRIFLAGS="-Zmiri-disable-isolation" cargo miri test + run: | + cd core + MIRIFLAGS="-Zmiri-disable-isolation" cargo miri test From 5faf16762b3285c124885e10f0ca2647e578e2ef Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 5 Jul 2024 13:48:33 -0600 Subject: [PATCH 3/8] ignore some tests when miri is enabled --- core/src/errors.rs | 9 +++++++++ core/src/execution/datafusion/spark_hash.rs | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/core/src/errors.rs b/core/src/errors.rs index b6f4d0889..352f934ef 100644 --- a/core/src/errors.rs +++ b/core/src/errors.rs @@ -586,6 +586,7 @@ mod tests { } #[test] + #[cfg_attr(miri, ignore)] pub fn error_from_panic() { let _guard = attach_current_thread(); let mut env = jvm().get_env().unwrap(); @@ -604,6 +605,7 @@ mod tests { // Verify that functions that return an object are handled correctly. This is basically // a test of the "happy path". #[test] + #[cfg_attr(miri, ignore)] pub fn object_result() { let _guard = attach_current_thread(); let mut env = jvm().get_env().unwrap(); @@ -621,6 +623,7 @@ mod tests { // Verify that functions that return an native time are handled correctly. This is basically // a test of the "happy path". #[test] + #[cfg_attr(miri, ignore)] pub fn jlong_result() { let _guard = attach_current_thread(); let mut env = jvm().get_env().unwrap(); @@ -637,6 +640,7 @@ mod tests { // Verify that functions that return an array can handle throwing exceptions. The test // causes an exception by dividing by zero. #[test] + #[cfg_attr(miri, ignore)] pub fn jlong_panic_exception() { let _guard = attach_current_thread(); let mut env = jvm().get_env().unwrap(); @@ -657,6 +661,7 @@ mod tests { // Verify that functions that return an native time are handled correctly. This is basically // a test of the "happy path". #[test] + #[cfg_attr(miri, ignore)] pub fn jlong_result_ok() { let _guard = attach_current_thread(); let mut env = jvm().get_env().unwrap(); @@ -673,6 +678,7 @@ mod tests { // Verify that functions that return an native time are handled correctly. This is basically // a test of the "happy path". #[test] + #[cfg_attr(miri, ignore)] pub fn jlong_result_err() { let _guard = attach_current_thread(); let mut env = jvm().get_env().unwrap(); @@ -693,6 +699,7 @@ mod tests { // Verify that functions that return an array are handled correctly. This is basically // a test of the "happy path". #[test] + #[cfg_attr(miri, ignore)] pub fn jint_array_result() { let _guard = attach_current_thread(); let mut env = jvm().get_env().unwrap(); @@ -713,6 +720,7 @@ mod tests { // Verify that functions that return an array can handle throwing exceptions. The test // causes an exception by dividing by zero. #[test] + #[cfg_attr(miri, ignore)] pub fn jint_array_panic_exception() { let _guard = attach_current_thread(); let mut env = jvm().get_env().unwrap(); @@ -736,6 +744,7 @@ mod tests { /// See [`object_panic_exception`] for a test which involves generating a panic and verifying /// that the resulting stack trace includes the offending call. #[test] + #[cfg_attr(miri, ignore)] pub fn stacktrace_string() { // Setup: Start with a backtrace that includes all of the expected scenarios, including // cases where the file and location are not provided as part of the backtrace capture diff --git a/core/src/execution/datafusion/spark_hash.rs b/core/src/execution/datafusion/spark_hash.rs index 1f0658aec..dea5b0f50 100644 --- a/core/src/execution/datafusion/spark_hash.rs +++ b/core/src/execution/datafusion/spark_hash.rs @@ -90,7 +90,7 @@ pub(crate) fn spark_compatible_murmur3_hash>(data: T, seed: u32) unsafe { let mut h1 = if len_aligned > 0 { hash_bytes_by_int( - std::slice::from_raw_parts(data.get_unchecked(0), len_aligned), + &data[0..len_aligned], seed, ) } else { From dff2e4720edd1ad1b727d7ca163803c7ce51eae3 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 5 Jul 2024 15:51:35 -0600 Subject: [PATCH 4/8] fix one safety warning and ignore some slow tests --- core/Cargo.toml | 1 + core/src/execution/datafusion/expressions/cast.rs | 2 ++ core/src/execution/datafusion/expressions/xxhash64.rs | 1 + core/src/execution/datafusion/shuffle_writer.rs | 1 + core/src/execution/kernels/temporal.rs | 3 +++ core/src/execution/sort.rs | 3 ++- 6 files changed, 10 insertions(+), 1 deletion(-) diff --git a/core/Cargo.toml b/core/Cargo.toml index c3e924a44..2c450aa39 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -97,6 +97,7 @@ twox-hash = "1.6.3" [features] default = [] +nightly = [] [profile.release] debug = true diff --git a/core/src/execution/datafusion/expressions/cast.rs b/core/src/execution/datafusion/expressions/cast.rs index 9e3205cef..154ff28b5 100644 --- a/core/src/execution/datafusion/expressions/cast.rs +++ b/core/src/execution/datafusion/expressions/cast.rs @@ -1646,6 +1646,7 @@ mod tests { use super::*; #[test] + #[cfg_attr(miri, ignore)] // test takes too long with miri fn timestamp_parser_test() { // write for all formats assert_eq!( @@ -1683,6 +1684,7 @@ mod tests { } #[test] + #[cfg_attr(miri, ignore)] // test takes too long with miri fn test_cast_string_to_timestamp() { let array: ArrayRef = Arc::new(StringArray::from(vec![ Some("2020-01-01T12:34:56.123456"), diff --git a/core/src/execution/datafusion/expressions/xxhash64.rs b/core/src/execution/datafusion/expressions/xxhash64.rs index 94b9e04ba..508cfe59b 100644 --- a/core/src/execution/datafusion/expressions/xxhash64.rs +++ b/core/src/execution/datafusion/expressions/xxhash64.rs @@ -162,6 +162,7 @@ mod test { use twox_hash::XxHash64; #[test] + #[cfg_attr(miri, ignore)] // test takes too long with miri fn test_xxhash64_random() { let mut rng = rand::thread_rng(); for len in 0..128 { diff --git a/core/src/execution/datafusion/shuffle_writer.rs b/core/src/execution/datafusion/shuffle_writer.rs index 5afc9a53e..637cb6d51 100644 --- a/core/src/execution/datafusion/shuffle_writer.rs +++ b/core/src/execution/datafusion/shuffle_writer.rs @@ -1447,6 +1447,7 @@ mod test { } #[test] + #[cfg_attr(miri, ignore)] // miri can't call foreign function `ZSTD_createCCtx` fn test_insert_larger_batch() { let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8, true)])); let mut b = StringBuilder::new(); diff --git a/core/src/execution/kernels/temporal.rs b/core/src/execution/kernels/temporal.rs index 1868c6fe5..f5256a196 100644 --- a/core/src/execution/kernels/temporal.rs +++ b/core/src/execution/kernels/temporal.rs @@ -824,6 +824,7 @@ mod tests { use std::sync::Arc; #[test] + #[cfg_attr(miri, ignore)] // test takes too long with miri fn test_date_trunc() { let size = 1000; let mut vec: Vec = Vec::with_capacity(size); @@ -962,6 +963,7 @@ mod tests { } #[test] + #[cfg_attr(miri, ignore)] // test takes too long with miri fn test_timestamp_trunc() { let size = 1000; let mut vec: Vec = Vec::with_capacity(size); @@ -998,6 +1000,7 @@ mod tests { } #[test] + #[cfg_attr(miri, ignore)] // test takes too long with miri // This test only verifies that the various input array types work. Actually correctness to // ensure this produces the same results as spark is verified in the JVM tests fn test_timestamp_trunc_array_fmt_dyn() { diff --git a/core/src/execution/sort.rs b/core/src/execution/sort.rs index eeeb11d5b..6848001d7 100644 --- a/core/src/execution/sort.rs +++ b/core/src/execution/sort.rs @@ -165,7 +165,8 @@ where // because they are defined as Vec> ptr::copy_nonoverlapping( bucket.as_ptr(), - self.get_unchecked_mut(pos), + //self.get_unchecked_mut(pos), + self.as_mut_ptr().add(pos), bucket.len(), ); } From 87cd6c8392b84e61382f143816965f1411a48f65 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 5 Jul 2024 15:51:49 -0600 Subject: [PATCH 5/8] fmt --- core/src/execution/datafusion/spark_hash.rs | 5 +---- core/src/execution/kernels/temporal.rs | 4 ++-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/core/src/execution/datafusion/spark_hash.rs b/core/src/execution/datafusion/spark_hash.rs index dea5b0f50..15bded3a8 100644 --- a/core/src/execution/datafusion/spark_hash.rs +++ b/core/src/execution/datafusion/spark_hash.rs @@ -89,10 +89,7 @@ pub(crate) fn spark_compatible_murmur3_hash>(data: T, seed: u32) // data is &[u8] so we do not need to check for proper alignment unsafe { let mut h1 = if len_aligned > 0 { - hash_bytes_by_int( - &data[0..len_aligned], - seed, - ) + hash_bytes_by_int(&data[0..len_aligned], seed) } else { seed as i32 }; diff --git a/core/src/execution/kernels/temporal.rs b/core/src/execution/kernels/temporal.rs index f5256a196..ba3ffeddb 100644 --- a/core/src/execution/kernels/temporal.rs +++ b/core/src/execution/kernels/temporal.rs @@ -1001,8 +1001,8 @@ mod tests { #[test] #[cfg_attr(miri, ignore)] // test takes too long with miri - // This test only verifies that the various input array types work. Actually correctness to - // ensure this produces the same results as spark is verified in the JVM tests + // This test only verifies that the various input array types work. Actually correctness to + // ensure this produces the same results as spark is verified in the JVM tests fn test_timestamp_trunc_array_fmt_dyn() { let size = 10; let formats = [ From c7f34aba117f3b76aa51d05f43d15df9884f4a73 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 5 Jul 2024 15:57:29 -0600 Subject: [PATCH 6/8] prepare for review --- core/src/execution/kernels/temporal.rs | 7 ++++--- core/src/execution/sort.rs | 1 - 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/core/src/execution/kernels/temporal.rs b/core/src/execution/kernels/temporal.rs index ba3ffeddb..9cf35af1a 100644 --- a/core/src/execution/kernels/temporal.rs +++ b/core/src/execution/kernels/temporal.rs @@ -1000,9 +1000,10 @@ mod tests { } #[test] - #[cfg_attr(miri, ignore)] // test takes too long with miri - // This test only verifies that the various input array types work. Actually correctness to - // ensure this produces the same results as spark is verified in the JVM tests + // test takes too long with miri + #[cfg_attr(miri, ignore)] + // This test only verifies that the various input array types work. Actually correctness to + // ensure this produces the same results as spark is verified in the JVM tests fn test_timestamp_trunc_array_fmt_dyn() { let size = 10; let formats = [ diff --git a/core/src/execution/sort.rs b/core/src/execution/sort.rs index 6848001d7..b8687652c 100644 --- a/core/src/execution/sort.rs +++ b/core/src/execution/sort.rs @@ -165,7 +165,6 @@ where // because they are defined as Vec> ptr::copy_nonoverlapping( bucket.as_ptr(), - //self.get_unchecked_mut(pos), self.as_mut_ptr().add(pos), bucket.len(), ); From 2cac6ef25c67b120ad8d311f1cb6c0d0346c66e4 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 5 Jul 2024 16:03:20 -0600 Subject: [PATCH 7/8] add comments --- core/src/errors.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/core/src/errors.rs b/core/src/errors.rs index 352f934ef..09875bd9f 100644 --- a/core/src/errors.rs +++ b/core/src/errors.rs @@ -586,7 +586,7 @@ mod tests { } #[test] - #[cfg_attr(miri, ignore)] + #[cfg_attr(miri, ignore)] // miri can't call foreign function `dlopen` pub fn error_from_panic() { let _guard = attach_current_thread(); let mut env = jvm().get_env().unwrap(); @@ -605,7 +605,7 @@ mod tests { // Verify that functions that return an object are handled correctly. This is basically // a test of the "happy path". #[test] - #[cfg_attr(miri, ignore)] + #[cfg_attr(miri, ignore)] // miri can't call foreign function `dlopen` pub fn object_result() { let _guard = attach_current_thread(); let mut env = jvm().get_env().unwrap(); @@ -623,7 +623,7 @@ mod tests { // Verify that functions that return an native time are handled correctly. This is basically // a test of the "happy path". #[test] - #[cfg_attr(miri, ignore)] + #[cfg_attr(miri, ignore)] // miri can't call foreign function `dlopen` pub fn jlong_result() { let _guard = attach_current_thread(); let mut env = jvm().get_env().unwrap(); @@ -640,7 +640,7 @@ mod tests { // Verify that functions that return an array can handle throwing exceptions. The test // causes an exception by dividing by zero. #[test] - #[cfg_attr(miri, ignore)] + #[cfg_attr(miri, ignore)] // miri can't call foreign function `dlopen` pub fn jlong_panic_exception() { let _guard = attach_current_thread(); let mut env = jvm().get_env().unwrap(); @@ -661,7 +661,7 @@ mod tests { // Verify that functions that return an native time are handled correctly. This is basically // a test of the "happy path". #[test] - #[cfg_attr(miri, ignore)] + #[cfg_attr(miri, ignore)] // miri can't call foreign function `dlopen` pub fn jlong_result_ok() { let _guard = attach_current_thread(); let mut env = jvm().get_env().unwrap(); @@ -678,7 +678,7 @@ mod tests { // Verify that functions that return an native time are handled correctly. This is basically // a test of the "happy path". #[test] - #[cfg_attr(miri, ignore)] + #[cfg_attr(miri, ignore)] // miri can't call foreign function `dlopen` pub fn jlong_result_err() { let _guard = attach_current_thread(); let mut env = jvm().get_env().unwrap(); @@ -699,7 +699,7 @@ mod tests { // Verify that functions that return an array are handled correctly. This is basically // a test of the "happy path". #[test] - #[cfg_attr(miri, ignore)] + #[cfg_attr(miri, ignore)] // miri can't call foreign function `dlopen` pub fn jint_array_result() { let _guard = attach_current_thread(); let mut env = jvm().get_env().unwrap(); @@ -720,7 +720,7 @@ mod tests { // Verify that functions that return an array can handle throwing exceptions. The test // causes an exception by dividing by zero. #[test] - #[cfg_attr(miri, ignore)] + #[cfg_attr(miri, ignore)] // miri can't call foreign function `dlopen` pub fn jint_array_panic_exception() { let _guard = attach_current_thread(); let mut env = jvm().get_env().unwrap(); @@ -744,7 +744,7 @@ mod tests { /// See [`object_panic_exception`] for a test which involves generating a panic and verifying /// that the resulting stack trace includes the offending call. #[test] - #[cfg_attr(miri, ignore)] + #[cfg_attr(miri, ignore)] // miri can't call foreign function `dlopen` pub fn stacktrace_string() { // Setup: Start with a backtrace that includes all of the expected scenarios, including // cases where the file and location are not provided as part of the backtrace capture From 48fe6c15102fbef88bbd12d6c64aa9a212db73b9 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 10 Jul 2024 16:18:19 -0600 Subject: [PATCH 8/8] update to reflect change in directory name --- .github/workflows/miri.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/miri.yml b/.github/workflows/miri.yml index 384a672b7..a07ecc35e 100644 --- a/.github/workflows/miri.yml +++ b/.github/workflows/miri.yml @@ -45,5 +45,5 @@ jobs: cargo miri setup - name: Test with Miri run: | - cd core + cd native MIRIFLAGS="-Zmiri-disable-isolation" cargo miri test