diff --git a/.github/actions/build_pandas/action.yml b/.github/actions/build_pandas/action.yml index 460ae2f8594c0..63f687324b0ae 100644 --- a/.github/actions/build_pandas/action.yml +++ b/.github/actions/build_pandas/action.yml @@ -4,6 +4,12 @@ inputs: editable: description: Whether to build pandas in editable mode (default true) default: true + meson_args: + description: Extra flags to pass to meson + required: false + cflags_adds: + description: Items to append to the CFLAGS variable + required: false runs: using: composite steps: @@ -24,11 +30,12 @@ runs: - name: Build Pandas run: | + export CFLAGS="$CFLAGS ${{ inputs.cflags_adds }}" if [[ ${{ inputs.editable }} == "true" ]]; then - pip install -e . --no-build-isolation -v --no-deps \ + pip install -e . --no-build-isolation -v --no-deps ${{ inputs.meson_args }} \ --config-settings=setup-args="--werror" else - pip install . --no-build-isolation -v --no-deps \ + pip install . --no-build-isolation -v --no-deps ${{ inputs.meson_args }} \ --config-settings=setup-args="--werror" fi shell: bash -el {0} diff --git a/.github/actions/run-tests/action.yml b/.github/actions/run-tests/action.yml index fd7c3587f2254..b4778b74df335 100644 --- a/.github/actions/run-tests/action.yml +++ b/.github/actions/run-tests/action.yml @@ -1,9 +1,16 @@ name: Run tests and report results +inputs: + preload: + description: Preload arguments for sanitizer + required: false + asan_options: + description: Arguments for Address Sanitizer (ASAN) + required: false runs: using: composite steps: - name: Test - run: ci/run_tests.sh + run: ${{ inputs.asan_options }} ${{ inputs.preload }} ci/run_tests.sh shell: bash -el {0} - name: Publish test results diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 88d705dbd9251..57f9893d36044 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -96,6 +96,14 @@ jobs: - name: "Pyarrow Nightly" env_file: actions-311-pyarrownightly.yaml pattern: "not slow and not network and not single_cpu" + - name: "ASAN / UBSAN" + env_file: actions-311-sanitizers.yaml + pattern: "not slow and not network and not single_cpu and not skip_ubsan" + asan_options: "ASAN_OPTIONS=detect_leaks=0" + preload: LD_PRELOAD=$(gcc -print-file-name=libasan.so) + meson_args: --config-settings=setup-args="-Db_sanitize=address,undefined" + cflags_adds: -fno-sanitize-recover=all + pytest_workers: -1 # disable pytest-xdist as it swallows stderr from ASAN fail-fast: false name: ${{ matrix.name || format('ubuntu-latest {0}', matrix.env_file) }} env: @@ -105,7 +113,7 @@ jobs: PANDAS_COPY_ON_WRITE: ${{ matrix.pandas_copy_on_write || '0' }} PANDAS_CI: ${{ matrix.pandas_ci || '1' }} TEST_ARGS: ${{ matrix.test_args || '' }} - PYTEST_WORKERS: 'auto' + PYTEST_WORKERS: ${{ matrix.pytest_workers || 'auto' }} PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }} # Clipboard tests QT_QPA_PLATFORM: offscreen @@ -174,16 +182,25 @@ jobs: - name: Build Pandas id: build uses: ./.github/actions/build_pandas + with: + meson_args: ${{ matrix.meson_args }} + cflags_adds: ${{ matrix.cflags_adds }} - name: Test (not single_cpu) uses: ./.github/actions/run-tests if: ${{ matrix.name != 'Pypy' }} + with: + preload: ${{ matrix.preload }} + asan_options: ${{ matrix.asan_options }} env: # Set pattern to not single_cpu if not already set PATTERN: ${{ env.PATTERN == '' && 'not single_cpu' || matrix.pattern }} - name: Test (single_cpu) uses: ./.github/actions/run-tests + with: + preload: ${{ matrix.preload }} + asan_options: ${{ matrix.asan_options }} env: PATTERN: 'single_cpu' PYTEST_WORKERS: 0 diff --git a/ci/deps/actions-311-sanitizers.yaml b/ci/deps/actions-311-sanitizers.yaml new file mode 100644 index 0000000000000..dcd381066b0ea --- /dev/null +++ b/ci/deps/actions-311-sanitizers.yaml @@ -0,0 +1,32 @@ +name: pandas-dev +channels: + - conda-forge +dependencies: + - python=3.11 + + # build dependencies + - versioneer[toml] + - cython>=0.29.33 + - meson[ninja]=1.2.1 + - meson-python=0.13.1 + + # test dependencies + - pytest>=7.3.2 + - pytest-cov + - pytest-xdist>=2.2.0 + - pytest-localserver>=0.7.1 + - pytest-qt>=4.2.0 + - boto3 + - hypothesis>=6.46.1 + - pyqt>=5.15.9 + + # required dependencies + - python-dateutil + - numpy<2 + - pytz + + # pandas dependencies + - pip + + - pip: + - "tzdata>=2022.7" diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index e1abd0344e356..f64d6a886fe9f 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -3206,6 +3206,7 @@ def test_from_out_of_bounds_ns_datetime( assert item.asm8.dtype == exp_dtype assert dtype == exp_dtype + @pytest.mark.skip_ubsan def test_out_of_s_bounds_datetime64(self, constructor): scalar = np.datetime64(np.iinfo(np.int64).max, "D") result = constructor(scalar) @@ -3241,6 +3242,7 @@ def test_from_out_of_bounds_ns_timedelta( assert item.asm8.dtype == exp_dtype assert dtype == exp_dtype + @pytest.mark.skip_ubsan @pytest.mark.parametrize("cls", [np.datetime64, np.timedelta64]) def test_out_of_s_bounds_timedelta64(self, constructor, cls): scalar = cls(np.iinfo(np.int64).max, "D") diff --git a/pandas/tests/groupby/test_cumulative.py b/pandas/tests/groupby/test_cumulative.py index bf572609f3d37..1bdbef6d50c4c 100644 --- a/pandas/tests/groupby/test_cumulative.py +++ b/pandas/tests/groupby/test_cumulative.py @@ -60,6 +60,7 @@ def test_groupby_cumprod(): tm.assert_series_equal(actual, expected) +@pytest.mark.skip_ubsan def test_groupby_cumprod_overflow(): # GH#37493 if we overflow we return garbage consistent with numpy df = DataFrame({"key": ["b"] * 4, "value": 100_000}) diff --git a/pandas/tests/io/parser/common/test_float.py b/pandas/tests/io/parser/common/test_float.py index 4b23774ee2d5b..6069c23936297 100644 --- a/pandas/tests/io/parser/common/test_float.py +++ b/pandas/tests/io/parser/common/test_float.py @@ -40,7 +40,14 @@ def test_scientific_no_exponent(all_parsers_all_precisions): tm.assert_frame_equal(df_roundtrip, df) -@pytest.mark.parametrize("neg_exp", [-617, -100000, -99999999999999999]) +@pytest.mark.parametrize( + "neg_exp", + [ + -617, + -100000, + pytest.param(-99999999999999999, marks=pytest.mark.skip_ubsan), + ], +) def test_very_negative_exponent(all_parsers_all_precisions, neg_exp): # GH#38753 parser, precision = all_parsers_all_precisions @@ -51,6 +58,7 @@ def test_very_negative_exponent(all_parsers_all_precisions, neg_exp): tm.assert_frame_equal(result, expected) +@pytest.mark.skip_ubsan @xfail_pyarrow # AssertionError: Attributes of DataFrame.iloc[:, 0] are different @pytest.mark.parametrize("exp", [999999999999999999, -999999999999999999]) def test_too_many_exponent_digits(all_parsers_all_precisions, exp, request): diff --git a/pandas/tests/scalar/timedelta/methods/test_round.py b/pandas/tests/scalar/timedelta/methods/test_round.py index 4beb39510413c..e54adb27d126b 100644 --- a/pandas/tests/scalar/timedelta/methods/test_round.py +++ b/pandas/tests/scalar/timedelta/methods/test_round.py @@ -61,6 +61,7 @@ def test_round_invalid(self): with pytest.raises(ValueError, match=msg): t1.round(freq) + @pytest.mark.skip_ubsan def test_round_implementation_bounds(self): # See also: analogous test for Timestamp # GH#38964 @@ -86,6 +87,7 @@ def test_round_implementation_bounds(self): with pytest.raises(OutOfBoundsTimedelta, match=msg): Timedelta.max.round("s") + @pytest.mark.skip_ubsan @given(val=st.integers(min_value=iNaT + 1, max_value=lib.i8max)) @pytest.mark.parametrize( "method", [Timedelta.round, Timedelta.floor, Timedelta.ceil] diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index cc1e91893e308..d2fa0f722ca6f 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -966,6 +966,7 @@ def test_td_op_timedelta_timedeltalike_array(self, op, arr): class TestTimedeltaComparison: + @pytest.mark.skip_ubsan def test_compare_pytimedelta_bounds(self): # GH#49021 don't overflow on comparison with very large pytimedeltas diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index ac605df935226..d4398f66e6f89 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -551,6 +551,7 @@ def test_timedelta_hash_equality(self): ns_td = Timedelta(1, "ns") assert hash(ns_td) != hash(ns_td.to_pytimedelta()) + @pytest.mark.skip_ubsan @pytest.mark.xfail( reason="pd.Timedelta violates the Python hash invariant (GH#44504).", ) diff --git a/pandas/tests/scalar/timestamp/methods/test_tz_localize.py b/pandas/tests/scalar/timestamp/methods/test_tz_localize.py index 9df0a023730de..af3dee1880d2e 100644 --- a/pandas/tests/scalar/timestamp/methods/test_tz_localize.py +++ b/pandas/tests/scalar/timestamp/methods/test_tz_localize.py @@ -25,6 +25,7 @@ class TestTimestampTZLocalize: + @pytest.mark.skip_ubsan def test_tz_localize_pushes_out_of_bounds(self): # GH#12677 # tz_localize that pushes away from the boundary is OK diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index 98e4d581dc104..3975f3c46aaa1 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -822,6 +822,7 @@ def test_barely_out_of_bounds(self): with pytest.raises(OutOfBoundsDatetime, match=msg): Timestamp("2262-04-11 23:47:16.854775808") + @pytest.mark.skip_ubsan def test_bounds_with_different_units(self): out_of_bounds_dates = ("1677-09-21", "2262-04-12") diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 417a56dc074a6..6791ac0340640 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1140,6 +1140,7 @@ def test_to_datetime_dt64s_out_of_ns_bounds(self, cache, dt, errors): assert ts.unit == "s" assert ts.asm8 == dt + @pytest.mark.skip_ubsan def test_to_datetime_dt64d_out_of_bounds(self, cache): dt64 = np.datetime64(np.iinfo(np.int64).max, "D") diff --git a/pyproject.toml b/pyproject.toml index 6e3424f9a7075..ca19f463edf40 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -523,6 +523,7 @@ markers = [ "db: tests requiring a database (mysql or postgres)", "clipboard: mark a pd.read_clipboard test", "arm_slow: mark a test as slow for arm64 architecture", + "skip_ubsan: Tests known to fail UBSAN check", ] [tool.mypy] diff --git a/scripts/tests/data/deps_minimum.toml b/scripts/tests/data/deps_minimum.toml index f8535635e12ab..3be6be17d1ee2 100644 --- a/scripts/tests/data/deps_minimum.toml +++ b/scripts/tests/data/deps_minimum.toml @@ -382,6 +382,7 @@ markers = [ "db: tests requiring a database (mysql or postgres)", "clipboard: mark a pd.read_clipboard test", "arm_slow: mark a test as slow for arm64 architecture", + "skip_ubsan: tests known to invoke undefined behavior", ] [tool.mypy]