pandas-dev · mroeschke · Dec 21, 2023 · Sep 11, 2023 · Sep 11, 2023 · Sep 12, 2023
@@ -4,6 +4,12 @@ inputs:
   editable:
     description: Whether to build pandas in editable mode (default true)
     default: true
+  meson_args:
+    description: Extra flags to pass to meson
+    required: false
+  cflags_adds:
+    description: Items to append to the CFLAGS variable
+    required: false
 runs:
   using: composite
   steps:
@@ -24,9 +30,10 @@ runs:
 
     - name: Build Pandas
       run: |
+        export CFLAGS="$CFLAGS ${{ inputs.cflags_adds }}"
         if [[ ${{ inputs.editable }} == "true" ]]; then
-          pip install -e . --no-build-isolation -v --no-deps
+          pip install -e . --no-build-isolation -v --no-deps ${{ inputs.meson_args }}
         else
-          pip install . --no-build-isolation -v --no-deps
+          pip install . --no-build-isolation -v --no-deps ${{ inputs.meson_args }}
         fi
       shell: bash -el {0}
@@ -305,6 +305,45 @@ jobs:
       group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-musl
       cancel-in-progress: true
 
+  ASAN_UBSAN:
+    runs-on: ubuntu-22.04
+    timeout-minutes: 90
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+
+    - name: Set up Conda
+      uses: ./.github/actions/setup-conda
+      with:
+        environment-file: ci/deps/actions-311-numpydev.yaml
+
+    - name: Build Pandas
+      id: build
+      uses: ./.github/actions/build_pandas
+      with:
+        meson_args: --config-settings=setup-args="-Db_sanitize=address,undefined"
+        cflags_adds: -fno-sanitize-recover=all
+
+    - name: Test (not single_cpu)
+      uses: ./.github/actions/run-tests
+      env:
+        PATTERN: "not slow and not network and not single_cpu and not known_ub"
+        PYTEST_WORKERS: 'auto'
+        PYTEST_TARGET: 'pandas'
+        ASAN_OPTIONS: detect_leaks=0
+        LD_PRELOAD: $(gcc -print-file-name=libasan.so)
+
+    - name: Test (single_cpu)
+      uses: ./.github/actions/run-tests
+      env:
+        PATTERN: "single_cpu and not known_ub"
+        PYTEST_WORKERS: 0
+        PYTEST_TARGET: 'pandas'
+        ASAN_OPTIONS: detect_leaks=0
+        LD_PRELOAD: $(gcc -print-file-name=libasan.so)
+
   python-dev:
     # This job may or may not run depending on the state of the next
     # unreleased Python version. DO NOT DELETE IT.

diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -3197,6 +3197,7 @@ def test_from_out_of_bounds_ns_datetime(
         assert item.asm8.dtype == exp_dtype
         assert dtype == exp_dtype
 
+    @pytest.mark.known_ub
     def test_out_of_s_bounds_datetime64(self, constructor):
         scalar = np.datetime64(np.iinfo(np.int64).max, "D")
         result = constructor(scalar)
@@ -3232,6 +3233,7 @@ def test_from_out_of_bounds_ns_timedelta(
         assert item.asm8.dtype == exp_dtype
         assert dtype == exp_dtype
 
+    @pytest.mark.known_ub
     @pytest.mark.parametrize("cls", [np.datetime64, np.timedelta64])
     def test_out_of_s_bounds_timedelta64(self, constructor, cls):
         scalar = cls(np.iinfo(np.int64).max, "D")

diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
@@ -1391,6 +1391,7 @@ def test_groupby_apply_to_series_name():
     tm.assert_series_equal(result, expected)
 
 
+@pytest.mark.known_ub
 @pytest.mark.parametrize("dropna", [True, False])
 def test_apply_na(dropna):
     # GH#28984

diff --git a/pandas/tests/groupby/test_cumulative.py b/pandas/tests/groupby/test_cumulative.py
@@ -60,6 +60,7 @@ def test_groupby_cumprod():
     tm.assert_series_equal(actual, expected)
 
 
+@pytest.mark.known_ub
 def test_groupby_cumprod_overflow():
     # GH#37493 if we overflow we return garbage consistent with numpy
     df = DataFrame({"key": ["b"] * 4, "value": 100_000})

diff --git a/pandas/tests/io/parser/common/test_float.py b/pandas/tests/io/parser/common/test_float.py
@@ -40,7 +40,14 @@ def test_scientific_no_exponent(all_parsers_all_precisions):
     tm.assert_frame_equal(df_roundtrip, df)
 
 
-@pytest.mark.parametrize("neg_exp", [-617, -100000, -99999999999999999])
+@pytest.mark.parametrize(
+    "neg_exp",
+    [
+        -617,
+        -100000,
+        pytest.param(-99999999999999999, marks=pytest.mark.known_ub),
+    ],
+)
 def test_very_negative_exponent(all_parsers_all_precisions, neg_exp):
     # GH#38753
     parser, precision = all_parsers_all_precisions
@@ -51,6 +58,7 @@ def test_very_negative_exponent(all_parsers_all_precisions, neg_exp):
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.known_ub
 @xfail_pyarrow  # AssertionError: Attributes of DataFrame.iloc[:, 0] are different
 @pytest.mark.parametrize("exp", [999999999999999999, -999999999999999999])
 def test_too_many_exponent_digits(all_parsers_all_precisions, exp, request):

diff --git a/pandas/tests/scalar/timedelta/methods/test_round.py b/pandas/tests/scalar/timedelta/methods/test_round.py
@@ -61,6 +61,7 @@ def test_round_invalid(self):
             with pytest.raises(ValueError, match=msg):
                 t1.round(freq)
 
+    @pytest.mark.known_ub
     def test_round_implementation_bounds(self):
         # See also: analogous test for Timestamp
         # GH#38964
@@ -86,6 +87,7 @@ def test_round_implementation_bounds(self):
         with pytest.raises(OutOfBoundsTimedelta, match=msg):
             Timedelta.max.round("s")
 
+    @pytest.mark.known_ub
     @given(val=st.integers(min_value=iNaT + 1, max_value=lib.i8max))
     @pytest.mark.parametrize(
         "method", [Timedelta.round, Timedelta.floor, Timedelta.ceil]

diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py
@@ -966,6 +966,7 @@ def test_td_op_timedelta_timedeltalike_array(self, op, arr):
 
 
 class TestTimedeltaComparison:
+    @pytest.mark.known_ub
     def test_compare_pytimedelta_bounds(self):
         # GH#49021 don't overflow on comparison with very large pytimedeltas
 

diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py
@@ -551,6 +551,7 @@ def test_timedelta_hash_equality(self):
         ns_td = Timedelta(1, "ns")
         assert hash(ns_td) != hash(ns_td.to_pytimedelta())
 
+    @pytest.mark.known_ub
     @pytest.mark.xfail(
         reason="pd.Timedelta violates the Python hash invariant (GH#44504).",
     )

diff --git a/pandas/tests/scalar/timestamp/methods/test_tz_localize.py b/pandas/tests/scalar/timestamp/methods/test_tz_localize.py
@@ -25,6 +25,7 @@
 
 
 class TestTimestampTZLocalize:
+    @pytest.mark.known_ub
     def test_tz_localize_pushes_out_of_bounds(self):
         # GH#12677
         # tz_localize that pushes away from the boundary is OK

diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py
@@ -815,6 +815,7 @@ def test_barely_out_of_bounds(self):
         with pytest.raises(OutOfBoundsDatetime, match=msg):
             Timestamp("2262-04-11 23:47:16.854775808")
 
+    @pytest.mark.known_ub
     def test_bounds_with_different_units(self):
         out_of_bounds_dates = ("1677-09-21", "2262-04-12")
 

diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
@@ -1140,6 +1140,7 @@ def test_to_datetime_dt64s_out_of_ns_bounds(self, cache, dt, errors):
         assert ts.unit == "s"
         assert ts.asm8 == dt
 
+    @pytest.mark.known_ub
     def test_to_datetime_dt64d_out_of_bounds(self, cache):
         dt64 = np.datetime64(np.iinfo(np.int64).max, "D")
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -524,6 +524,7 @@ markers = [
   "clipboard: mark a pd.read_clipboard test",
   "arm_slow: mark a test as slow for arm64 architecture",
   "arraymanager: mark a test to run with ArrayManager enabled",
+  "known_ub: tests known to invoke undefined behavior",
 ]
 
 [tool.mypy]

diff --git a/scripts/tests/data/deps_minimum.toml b/scripts/tests/data/deps_minimum.toml
@@ -383,6 +383,7 @@ markers = [
   "clipboard: mark a pd.read_clipboard test",
   "arm_slow: mark a test as slow for arm64 architecture",
   "arraymanager: mark a test to run with ArrayManager enabled",
+  "known_ub: tests that trigger known undefined behavior",
 ]
 
 [tool.mypy]