From 66d83d1297450d5f53fbaee82f055f3aca45e382 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 11 Sep 2023 17:10:59 -0400 Subject: [PATCH 01/43] enable ASAN/UBSAN in pandas CI --- .github/actions/build_pandas/action.yml | 4 ++-- .github/workflows/unit-tests.yml | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/actions/build_pandas/action.yml b/.github/actions/build_pandas/action.yml index 73d7723e2fb49..ba2740ed4a631 100644 --- a/.github/actions/build_pandas/action.yml +++ b/.github/actions/build_pandas/action.yml @@ -25,8 +25,8 @@ runs: - name: Build Pandas run: | if [[ ${{ inputs.editable }} == "true" ]]; then - pip install -e . --no-build-isolation -v + pip install -e . --no-build-isolation -v --config-settings=setup-args="-Db_sanitize=address,undefined" else - pip install . --no-build-isolation -v + pip install . --no-build-isolation -v --config-settings=setup-args="-Db_sanitize=address,undefined" fi shell: bash -el {0} diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index f2b426269098b..4c3925b8e8063 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -157,6 +157,8 @@ jobs: - name: Build Pandas id: build uses: ./.github/actions/build_pandas + env: + CFLAGS: "$CFLAGS -fno-sanitize-recover=all" - name: Test (not single_cpu) uses: ./.github/actions/run-tests @@ -164,12 +166,16 @@ jobs: env: # Set pattern to not single_cpu if not already set PATTERN: ${{ env.PATTERN == '' && 'not single_cpu' || matrix.pattern }} + ASAN_OPTIONS: detect_leaks=0 + LD_PRELOAD: $(gcc -print-file-name=libasan.so) - name: Test (single_cpu) uses: ./.github/actions/run-tests env: PATTERN: 'single_cpu' PYTEST_WORKERS: 0 + ASAN_OPTIONS: detect_leaks=0 + LD_PRELOAD: $(gcc -print-file-name=libasan.so) if: ${{ matrix.pattern == '' && (always() && steps.build.outcome == 'success')}} macos-windows: From 7aa2e7ad38b0e3a8a29d9b405d6dc923db3be402 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 11 Sep 2023 19:44:10 -0400 Subject: [PATCH 02/43] try input --- .github/actions/build_pandas/action.yml | 15 +++++++++++++-- .github/workflows/unit-tests.yml | 2 ++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/.github/actions/build_pandas/action.yml b/.github/actions/build_pandas/action.yml index ba2740ed4a631..063cb4c7d3254 100644 --- a/.github/actions/build_pandas/action.yml +++ b/.github/actions/build_pandas/action.yml @@ -4,6 +4,9 @@ inputs: editable: description: Whether to build pandas in editable mode (default true) default: true + sanitize: + description: Whether sanitizers should be used or not + default: false runs: using: composite steps: @@ -25,8 +28,16 @@ runs: - name: Build Pandas run: | if [[ ${{ inputs.editable }} == "true" ]]; then - pip install -e . --no-build-isolation -v --config-settings=setup-args="-Db_sanitize=address,undefined" + if [[ ${{ inputs.sanitize }} == "true" ]]; then + pip install -e . --no-build-isolation -v --config-settings=setup-args="-Db_sanitize=address,undefined" + else + pip install -e . --no-build-isolation -v + fi else - pip install . --no-build-isolation -v --config-settings=setup-args="-Db_sanitize=address,undefined" + if [[ ${{ inputs.sanitize }} == "true" ]]; then + pip install . --no-build-isolation -v --config-settings=setup-args="-Db_sanitize=address,undefined" + else + pip install . --no-build-isolation -v + fi fi shell: bash -el {0} diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 4c3925b8e8063..d42da5ec3f42a 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -159,6 +159,8 @@ jobs: uses: ./.github/actions/build_pandas env: CFLAGS: "$CFLAGS -fno-sanitize-recover=all" + with: + sanitize: true - name: Test (not single_cpu) uses: ./.github/actions/run-tests From a5b380813ff448cec5380d2779f06f633c671169 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 11 Sep 2023 20:00:30 -0400 Subject: [PATCH 03/43] try removing sanitize --- .github/workflows/unit-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index d42da5ec3f42a..68b65fcae4854 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -160,7 +160,7 @@ jobs: env: CFLAGS: "$CFLAGS -fno-sanitize-recover=all" with: - sanitize: true + sanitize: false - name: Test (not single_cpu) uses: ./.github/actions/run-tests From 7b58c6d245083d04f5bbfc7e3c6a0feb4df102f7 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 11 Sep 2023 20:03:22 -0400 Subject: [PATCH 04/43] try no CFLAGS --- .github/workflows/unit-tests.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 68b65fcae4854..0b2752a9c1e9e 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -157,10 +157,8 @@ jobs: - name: Build Pandas id: build uses: ./.github/actions/build_pandas - env: - CFLAGS: "$CFLAGS -fno-sanitize-recover=all" with: - sanitize: false + sanitize: true - name: Test (not single_cpu) uses: ./.github/actions/run-tests From 18111b01d09f576965bda7e521bfb9ac59c60da0 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 11 Sep 2023 20:19:39 -0400 Subject: [PATCH 05/43] try GH string substituion --- .github/workflows/unit-tests.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 0b2752a9c1e9e..9cdefaa2c6df2 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -154,9 +154,17 @@ jobs: with: environment-file: ci/deps/${{ matrix.env_file }} + - name: Set sanitizer flags + run: | + echo "CFLAGS=$CFLAGS -fno-sanitize-recover=all" >> "$GITHUB_ENV" + - name: echo flags + run: | + printf '%s\n' "CFLAGS" - name: Build Pandas id: build uses: ./.github/actions/build_pandas + env: + CFLAGS: "$CFLAGS" with: sanitize: true From 438cdfa2d5e82e636374606e312a85cf7f85d9d4 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 11 Sep 2023 21:32:41 -0400 Subject: [PATCH 06/43] change flags in build script --- .github/actions/build_pandas/action.yml | 8 ++++++-- .github/workflows/unit-tests.yml | 8 -------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/.github/actions/build_pandas/action.yml b/.github/actions/build_pandas/action.yml index 063cb4c7d3254..1050369ccece9 100644 --- a/.github/actions/build_pandas/action.yml +++ b/.github/actions/build_pandas/action.yml @@ -29,13 +29,17 @@ runs: run: | if [[ ${{ inputs.editable }} == "true" ]]; then if [[ ${{ inputs.sanitize }} == "true" ]]; then - pip install -e . --no-build-isolation -v --config-settings=setup-args="-Db_sanitize=address,undefined" + CFLAGS=$CFLAGS -fno-sanitize-recover=all \ + pip install -e . --no-build-isolation -v \ + --config-settings=setup-args="-Db_sanitize=address,undefined" else pip install -e . --no-build-isolation -v fi else if [[ ${{ inputs.sanitize }} == "true" ]]; then - pip install . --no-build-isolation -v --config-settings=setup-args="-Db_sanitize=address,undefined" + CFLAGS=$CFLAGS -fno-sanitize-recover=all \ + pip install . --no-build-isolation -v \ + --config-settings=setup-args="-Db_sanitize=address,undefined" else pip install . --no-build-isolation -v fi diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 9cdefaa2c6df2..0b2752a9c1e9e 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -154,17 +154,9 @@ jobs: with: environment-file: ci/deps/${{ matrix.env_file }} - - name: Set sanitizer flags - run: | - echo "CFLAGS=$CFLAGS -fno-sanitize-recover=all" >> "$GITHUB_ENV" - - name: echo flags - run: | - printf '%s\n' "CFLAGS" - name: Build Pandas id: build uses: ./.github/actions/build_pandas - env: - CFLAGS: "$CFLAGS" with: sanitize: true From b18cf9dd61daeb0c4e25e768941a760bb7342d46 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 11 Sep 2023 22:15:26 -0400 Subject: [PATCH 07/43] quotes --- .github/actions/build_pandas/action.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/actions/build_pandas/action.yml b/.github/actions/build_pandas/action.yml index 1050369ccece9..6a38ebc1c5279 100644 --- a/.github/actions/build_pandas/action.yml +++ b/.github/actions/build_pandas/action.yml @@ -29,7 +29,7 @@ runs: run: | if [[ ${{ inputs.editable }} == "true" ]]; then if [[ ${{ inputs.sanitize }} == "true" ]]; then - CFLAGS=$CFLAGS -fno-sanitize-recover=all \ + CFLAGS="$CFLAGS -fno-sanitize-recover=all" \ pip install -e . --no-build-isolation -v \ --config-settings=setup-args="-Db_sanitize=address,undefined" else @@ -37,7 +37,7 @@ runs: fi else if [[ ${{ inputs.sanitize }} == "true" ]]; then - CFLAGS=$CFLAGS -fno-sanitize-recover=all \ + CFLAGS="$CFLAGS -fno-sanitize-recover=all" \ pip install . --no-build-isolation -v \ --config-settings=setup-args="-Db_sanitize=address,undefined" else From 69cb6f6cdd944566f8816d873ebce471a15d02fb Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 11 Sep 2023 22:36:49 -0400 Subject: [PATCH 08/43] update script run --- .github/actions/run-tests/action.yml | 13 ++++++++++++- .github/workflows/unit-tests.yml | 4 ++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/.github/actions/run-tests/action.yml b/.github/actions/run-tests/action.yml index fd7c3587f2254..b66adf18a4847 100644 --- a/.github/actions/run-tests/action.yml +++ b/.github/actions/run-tests/action.yml @@ -1,9 +1,20 @@ name: Run tests and report results +inputs: + sanitize: + description: Whether sanitizers should be used or not + default: false runs: using: composite steps: - name: Test - run: ci/run_tests.sh + run: | + if [[ ${{ inputs.sanitize }} == "true" ]]; then + ASAN_OPTIONS=detect_leaks=0 \ + LD_PRELOAD=$(gcc -print-file-name=libasan.so) \ + ci/run_tests.sh + else + ci/run_tests.sh + fi shell: bash -el {0} - name: Publish test results diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 0b2752a9c1e9e..a219cf0aacc9d 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -163,11 +163,11 @@ jobs: - name: Test (not single_cpu) uses: ./.github/actions/run-tests if: ${{ matrix.name != 'Pypy' }} + with: + sanitize: true env: # Set pattern to not single_cpu if not already set PATTERN: ${{ env.PATTERN == '' && 'not single_cpu' || matrix.pattern }} - ASAN_OPTIONS: detect_leaks=0 - LD_PRELOAD: $(gcc -print-file-name=libasan.so) - name: Test (single_cpu) uses: ./.github/actions/run-tests From 6f5fb1180b92e7ecd0c96e7fb4291ccfc4ad5748 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 11 Sep 2023 22:38:22 -0400 Subject: [PATCH 09/43] single_cpu updates --- .github/workflows/unit-tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index a219cf0aacc9d..907a549e7b191 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -171,11 +171,11 @@ jobs: - name: Test (single_cpu) uses: ./.github/actions/run-tests + with: + sanitize: true env: PATTERN: 'single_cpu' PYTEST_WORKERS: 0 - ASAN_OPTIONS: detect_leaks=0 - LD_PRELOAD: $(gcc -print-file-name=libasan.so) if: ${{ matrix.pattern == '' && (always() && steps.build.outcome == 'success')}} macos-windows: From 663d6d44c1aeb08e55fd4595217837f35dd3fe95 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 13 Sep 2023 20:44:28 -0400 Subject: [PATCH 10/43] asan checks for datetime funcs --- pandas/_libs/tslib.pyx | 2 +- pandas/_libs/tslibs/conversion.pyx | 3 ++- pandas/_libs/tslibs/np_datetime.pyx | 2 ++ pandas/_libs/tslibs/offsets.pyx | 9 +++++++++ pandas/_libs/tslibs/period.pyx | 10 ++++++++++ pandas/_libs/tslibs/strptime.pyx | 2 ++ pandas/_libs/tslibs/timestamps.pyx | 1 + 7 files changed, 27 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 20a18cf56779f..14b5515f6c635 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -94,8 +94,8 @@ def _test_parse_iso8601(ts: str): obj = _TSObject() string_to_dts(ts, &obj.dts, &out_bestunit, &out_local, &out_tzoffset, True) + check_dts_bounds(&obj.dts, NPY_FR_ns) obj.value = npy_datetimestruct_to_datetime(NPY_FR_ns, &obj.dts) - check_dts_bounds(&obj.dts) if out_local == 1: obj.tzinfo = timezone(timedelta(minutes=out_tzoffset)) obj.value = tz_localize_to_utc_single(obj.value, obj.tzinfo) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 45c4d7809fe7a..025b67c195e2c 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -397,6 +397,7 @@ cdef _TSObject convert_datetime_to_tsobject( if nanos: obj.dts.ps = nanos * 1000 + check_dts_bounds(&obj.dts, reso) obj.value = npy_datetimestruct_to_datetime(reso, &obj.dts) if obj.tzinfo is not None and not is_utc(obj.tzinfo): @@ -404,7 +405,6 @@ cdef _TSObject convert_datetime_to_tsobject( pps = periods_per_second(reso) obj.value -= int(offset.total_seconds() * pps) - check_dts_bounds(&obj.dts, reso) check_overflows(obj, reso) return obj @@ -434,6 +434,7 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts, datetime dt Py_ssize_t pos + check_dts_bounds(&dts, reso) value = npy_datetimestruct_to_datetime(reso, &dts) obj.dts = dts obj.tzinfo = timezone(timedelta(minutes=tzoffset)) diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index c3ee68e14a8d4..84f606767ddb9 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -267,6 +267,7 @@ cdef int64_t pydatetime_to_dt64(datetime val, Note we are assuming that the datetime object is timezone-naive. """ pydatetime_to_dtstruct(val, dts) + check_dts_bounds(dts, reso) return npy_datetimestruct_to_datetime(reso, dts) @@ -282,6 +283,7 @@ cdef int64_t pydate_to_dt64( date val, npy_datetimestruct *dts, NPY_DATETIMEUNIT reso=NPY_FR_ns ): pydate_to_dtstruct(val, dts) + check_dts_bounds(dts, reso) return npy_datetimestruct_to_datetime(reso, dts) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 2b6f84844ff49..8e84b441d2cf7 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -63,6 +63,7 @@ from pandas._libs.tslibs.nattype cimport ( ) from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, + check_dts_bounds, get_unit_from_dtype, import_pandas_datetime, npy_datetimestruct, @@ -3093,6 +3094,8 @@ cdef class SemiMonthOffset(SingleConstructorOffset): days_in_month = get_days_in_month(dts.year, dts.month) dts.day = min(to_day, days_in_month) + with gil: + check_dts_bounds(&dts, reso) res_val = npy_datetimestruct_to_datetime(reso, &dts) # Analogous to: out[i] = res_val @@ -4762,6 +4765,8 @@ cdef ndarray shift_quarters( dts.month = month_add_months(dts, modby * n - months_since) dts.day = get_day_of_month(&dts, day_opt) + with gil: + check_dts_bounds(&dts, reso) res_val = npy_datetimestruct_to_datetime(reso, &dts) # Analogous to: out[i] = res_val @@ -4820,6 +4825,8 @@ def shift_months( dts.month = month_add_months(dts, months) dts.day = min(dts.day, get_days_in_month(dts.year, dts.month)) + with gil: + check_dts_bounds(&dts, reso) res_val = npy_datetimestruct_to_datetime(reso, &dts) # Analogous to: out[i] = res_val @@ -4846,6 +4853,8 @@ def shift_months( dts.month = month_add_months(dts, months_to_roll) dts.day = get_day_of_month(&dts, day_opt) + with gil: + check_dts_bounds(&dts, reso) res_val = npy_datetimestruct_to_datetime(reso, &dts) # Analogous to: out[i] = res_val diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index c37e9cd7ef1f3..3081c54ed49c2 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -721,6 +721,8 @@ cdef int64_t unix_date_from_ymd(int year, int month, int day) noexcept nogil: npy_datetimestruct dts int64_t unix_date + with gil: + check_dts_bounds(&dts, NPY_FR_D) memset(&dts, 0, sizeof(npy_datetimestruct)) dts.year = year dts.month = month @@ -738,6 +740,8 @@ cdef int64_t dts_to_year_ordinal(npy_datetimestruct *dts, int to_end) noexcept n cdef: int64_t result + with gil: + check_dts_bounds(dts, NPY_DATETIMEUNIT.NPY_FR_Y) result = npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT.NPY_FR_Y, dts) if dts.month > to_end: return result + 1 @@ -795,14 +799,20 @@ cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) noexcept nogi return dts_to_qtr_ordinal(dts, fmonth) elif freq_group == FR_WK: + with gil: + check_dts_bounds(dts, NPY_FR_D) unix_date = npy_datetimestruct_to_datetime(NPY_FR_D, dts) return unix_date_to_week(unix_date, freq - FR_WK) elif freq == FR_BUS: + with gil: + check_dts_bounds(dts, NPY_FR_D) unix_date = npy_datetimestruct_to_datetime(NPY_FR_D, dts) return DtoB(dts, 0, unix_date) unit = freq_group_code_to_npy_unit(freq) + with gil: + check_dts_bounds(dts, unit) return npy_datetimestruct_to_datetime(unit, dts) diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index e6924a4e24dff..4dc1894f759d5 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -325,6 +325,7 @@ def array_strptime( if string_to_dts_succeeded: # No error reported by string_to_dts, pick back up # where we left off + check_dts_bounds(&dts, NPY_FR_ns) value = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts) if out_local == 1: # Store the out_tzoffset in seconds @@ -513,6 +514,7 @@ def array_strptime( dts.us = us dts.ps = ns * 1000 + check_dts_bounds(&dts, NPY_FR_ns) iresult[i] = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts) check_dts_bounds(&dts) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 944a2b0e97382..ec1e845b6f4ec 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -581,6 +581,7 @@ cdef class _Timestamp(ABCTimestamp): if own_tz is not None and not is_utc(own_tz): pydatetime_to_dtstruct(self, &dts) + check_dts_bounds(&dts, self._creso) val = npy_datetimestruct_to_datetime(self._creso, &dts) + self.nanosecond else: val = self._value From 466056d1e2c348d45b7e3edd30a5640309bfe1ce Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 15 Sep 2023 08:01:05 -0400 Subject: [PATCH 11/43] try smaller config --- .github/workflows/unit-tests.yml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 907a549e7b191..f74aaef8c0729 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -77,6 +77,11 @@ jobs: env_file: actions-311-numpydev.yaml pattern: "not slow and not network and not single_cpu" test_args: "-W error::DeprecationWarning -W error::FutureWarning" + - name: "ASAN/UBSAN" + env_file: actions-311-numpydev.yaml + pattern: "not slow and not network and not single_cpu" + test_args: "-W error::DeprecationWarning -W error::FutureWarning" + sanitize: true - name: "Pyarrow Nightly" env_file: actions-311-pyarrownightly.yaml pattern: "not slow and not network and not single_cpu" @@ -90,6 +95,7 @@ jobs: PANDAS_COPY_ON_WRITE: ${{ matrix.pandas_copy_on_write || '0' }} PANDAS_CI: ${{ matrix.pandas_ci || '1' }} TEST_ARGS: ${{ matrix.test_args || '' }} + SANITIZE: ${{ matrix.sanitize || false }} PYTEST_WORKERS: 'auto' PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }} concurrency: @@ -158,13 +164,13 @@ jobs: id: build uses: ./.github/actions/build_pandas with: - sanitize: true + sanitize: ${{ env.SANITIZE }} - name: Test (not single_cpu) uses: ./.github/actions/run-tests if: ${{ matrix.name != 'Pypy' }} with: - sanitize: true + sanitize: ${{ env.SANITIZE }} env: # Set pattern to not single_cpu if not already set PATTERN: ${{ env.PATTERN == '' && 'not single_cpu' || matrix.pattern }} From d4074ca90ce1f0ea1b316dd9f6e2e243edae269e Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 15 Sep 2023 15:06:54 -0400 Subject: [PATCH 12/43] checkpoint --- .../vendored/numpy/datetime/np_datetime.h | 3 +- .../src/vendored/numpy/datetime/np_datetime.c | 59 ++++++++++++++++--- 2 files changed, 54 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/include/pandas/vendored/numpy/datetime/np_datetime.h b/pandas/_libs/include/pandas/vendored/numpy/datetime/np_datetime.h index 6b5135f559482..83138e0dc679e 100644 --- a/pandas/_libs/include/pandas/vendored/numpy/datetime/np_datetime.h +++ b/pandas/_libs/include/pandas/vendored/numpy/datetime/np_datetime.h @@ -67,7 +67,8 @@ static const npy_datetimestruct _M_MAX_DTS = { PyObject *extract_utc_offset(PyObject *obj); npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, - const npy_datetimestruct *dts); + const npy_datetimestruct *dts, + bool *overflow); void pandas_datetime_to_datetimestruct(npy_datetime val, NPY_DATETIMEUNIT fr, npy_datetimestruct *result); diff --git a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c index 49016f79de5b9..e2101557faa93 100644 --- a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c +++ b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c @@ -22,6 +22,7 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #endif // NPY_NO_DEPRECATED_API +#include #include #include @@ -304,17 +305,42 @@ PyObject *extract_utc_offset(PyObject *obj) { /* * Converts a datetime from a datetimestruct to a datetime based * on a metadata unit. The date is assumed to be valid. + * + * In case of an overflow, the library returns -1 and sets the + * overflow argument to true */ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, - const npy_datetimestruct *dts) { + const npy_datetimestruct *dts, + bool *overflow) { npy_datetime ret; if (base == NPY_FR_Y) { /* Truncate to the year */ + if (dts->year < INT64_MIN + 1970) { + *overflow = true; + return -1; + } ret = dts->year - 1970; } else if (base == NPY_FR_M) { - /* Truncate to the month */ - ret = 12 * (dts->year - 1970) + (dts->month - 1); + /* Truncate to the month */ + if (dts->year < INT64_MIN + 1970) { + *overflow = true; + return -1; + } + const npy_int64 years = dts->year - 1970; + + if ((years > INT64_MAX / 12) || (years < INT64_MIN / 12)) { + *overflow = true; + return -1; + } + const npy_int64 months = years * 12; + + if (dts->month == INT32_MIN) { + *overflow = true; + return -1; + } + + ret = months + dts->month - 1; } else { /* Otherwise calculate the number of days to start */ npy_int64 days = get_datetimestruct_days(dts); @@ -325,15 +351,34 @@ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, if (days >= 0) { ret = days / 7; } else { - ret = (days - 6) / 7; + if (days < INT64_MIN + 6) { + *overflow = true; + return -1; + } + ret = (days - 6) / 7; } break; case NPY_FR_D: ret = days; break; - case NPY_FR_h: - ret = days * 24 + dts->hour; - break; + case NPY_FR_h: { + if ((days > INT64_MAX / 24) || (days < INT64_MIN / 24)) { + *overflow = true; + return -1; + } + npy_int64 hours = days * 24; + + if ( + ((dts->hour > 0) && (hours > INT64_MAX - dts->hour)) + || ((dts->hour < 0) && (hours < INT64_MIN - dts->hour)) + ) { + *overflow = true; + return -1; + } + + ret = hours + dts->hour; + break; + } case NPY_FR_m: ret = (days * 24 + dts->hour) * 60 + dts->min; break; From e303ba11fde1642f01077d6bd6efed342ac40db2 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 27 Oct 2023 08:42:44 -0400 Subject: [PATCH 13/43] bool fixup --- .../_libs/include/pandas/vendored/numpy/datetime/np_datetime.h | 3 +-- pandas/_libs/src/vendored/numpy/datetime/np_datetime.c | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/_libs/include/pandas/vendored/numpy/datetime/np_datetime.h b/pandas/_libs/include/pandas/vendored/numpy/datetime/np_datetime.h index ca05ff5c11b11..e4e90a7ea24cf 100644 --- a/pandas/_libs/include/pandas/vendored/numpy/datetime/np_datetime.h +++ b/pandas/_libs/include/pandas/vendored/numpy/datetime/np_datetime.h @@ -67,8 +67,7 @@ static const npy_datetimestruct _M_MAX_DTS = { PyObject *extract_utc_offset(PyObject *obj); npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, - const npy_datetimestruct *dts, - bool *overflow); + const npy_datetimestruct *dts); void pandas_datetime_to_datetimestruct(npy_datetime val, NPY_DATETIMEUNIT fr, npy_datetimestruct *result); diff --git a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c index a0da03268604c..e3ad6fab5bdd8 100644 --- a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c +++ b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c @@ -23,7 +23,6 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt #endif // NPY_NO_DEPRECATED_API #include -#include #include "pandas/vendored/numpy/datetime/np_datetime.h" #include From 46d1034c46ef66ab79b38740f768870eecddc310 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 15 Nov 2023 21:54:35 -0500 Subject: [PATCH 14/43] reverts --- pandas/_libs/tslibs/offsets.pyx | 9 --------- pandas/_libs/tslibs/period.pyx | 10 ---------- pandas/_libs/tslibs/timestamps.pyx | 1 - 3 files changed, 20 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index ba63947e01306..7f3a72178a359 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -68,7 +68,6 @@ from pandas._libs.tslibs.nattype cimport ( ) from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, - check_dts_bounds, get_unit_from_dtype, import_pandas_datetime, npy_datetimestruct, @@ -3119,8 +3118,6 @@ cdef class SemiMonthOffset(SingleConstructorOffset): days_in_month = get_days_in_month(dts.year, dts.month) dts.day = min(to_day, days_in_month) - with gil: - check_dts_bounds(&dts, reso) res_val = npy_datetimestruct_to_datetime(reso, &dts) # Analogous to: out[i] = res_val @@ -4939,8 +4936,6 @@ cdef ndarray shift_quarters( dts.month = month_add_months(dts, modby * n - months_since) dts.day = get_day_of_month(&dts, day_opt) - with gil: - check_dts_bounds(&dts, reso) res_val = npy_datetimestruct_to_datetime(reso, &dts) # Analogous to: out[i] = res_val @@ -4999,8 +4994,6 @@ def shift_months( dts.month = month_add_months(dts, months) dts.day = min(dts.day, get_days_in_month(dts.year, dts.month)) - with gil: - check_dts_bounds(&dts, reso) res_val = npy_datetimestruct_to_datetime(reso, &dts) # Analogous to: out[i] = res_val @@ -5027,8 +5020,6 @@ def shift_months( dts.month = month_add_months(dts, months_to_roll) dts.day = get_day_of_month(&dts, day_opt) - with gil: - check_dts_bounds(&dts, reso) res_val = npy_datetimestruct_to_datetime(reso, &dts) # Analogous to: out[i] = res_val diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 6e10d5c49c882..318e018689a78 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -730,8 +730,6 @@ cdef int64_t unix_date_from_ymd(int year, int month, int day) noexcept nogil: npy_datetimestruct dts int64_t unix_date - with gil: - check_dts_bounds(&dts, NPY_FR_D) memset(&dts, 0, sizeof(npy_datetimestruct)) dts.year = year dts.month = month @@ -749,8 +747,6 @@ cdef int64_t dts_to_year_ordinal(npy_datetimestruct *dts, int to_end) noexcept n cdef: int64_t result - with gil: - check_dts_bounds(dts, NPY_DATETIMEUNIT.NPY_FR_Y) result = npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT.NPY_FR_Y, dts) if dts.month > to_end: return result + 1 @@ -808,20 +804,14 @@ cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) noexcept nogi return dts_to_qtr_ordinal(dts, fmonth) elif freq_group == FR_WK: - with gil: - check_dts_bounds(dts, NPY_FR_D) unix_date = npy_datetimestruct_to_datetime(NPY_FR_D, dts) return unix_date_to_week(unix_date, freq - FR_WK) elif freq == FR_BUS: - with gil: - check_dts_bounds(dts, NPY_FR_D) unix_date = npy_datetimestruct_to_datetime(NPY_FR_D, dts) return DtoB(dts, 0, unix_date) unit = freq_group_code_to_npy_unit(freq) - with gil: - check_dts_bounds(dts, unit) return npy_datetimestruct_to_datetime(unit, dts) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index c67141fcaf2ab..56a6885d4a9e0 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -580,7 +580,6 @@ cdef class _Timestamp(ABCTimestamp): if own_tz is not None and not is_utc(own_tz): pydatetime_to_dtstruct(self, &dts) - check_dts_bounds(&dts, self._creso) val = npy_datetimestruct_to_datetime(self._creso, &dts) + self.nanosecond else: val = self._value From 929c731d9698adce401e6554b20052a247c9255f Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 17 Nov 2023 15:30:35 -0800 Subject: [PATCH 15/43] known UB marker --- .github/workflows/unit-tests.yml | 2 +- pandas/tests/io/parser/common/test_float.py | 10 +++++++++- pyproject.toml | 1 + scripts/tests/data/deps_minimum.toml | 1 + 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 0ade3b3fd6d34..76b72f74d7090 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -83,7 +83,7 @@ jobs: test_args: "-W error::DeprecationWarning -W error::FutureWarning" - name: "ASAN/UBSAN" env_file: actions-311-numpydev.yaml - pattern: "not slow and not network and not single_cpu" + pattern: "not slow and not network and not single_cpu and not known_ub" test_args: "-W error::DeprecationWarning -W error::FutureWarning" sanitize: true - name: "Pyarrow Nightly" diff --git a/pandas/tests/io/parser/common/test_float.py b/pandas/tests/io/parser/common/test_float.py index 4b23774ee2d5b..6ed32427bebf9 100644 --- a/pandas/tests/io/parser/common/test_float.py +++ b/pandas/tests/io/parser/common/test_float.py @@ -40,7 +40,14 @@ def test_scientific_no_exponent(all_parsers_all_precisions): tm.assert_frame_equal(df_roundtrip, df) -@pytest.mark.parametrize("neg_exp", [-617, -100000, -99999999999999999]) +@pytest.mark.parametrize( + "neg_exp", + [ + -617, + -100000, + pytest.param(-99999999999999999, marks=pytest.mark.known_ub), + ], +) def test_very_negative_exponent(all_parsers_all_precisions, neg_exp): # GH#38753 parser, precision = all_parsers_all_precisions @@ -51,6 +58,7 @@ def test_very_negative_exponent(all_parsers_all_precisions, neg_exp): tm.assert_frame_equal(result, expected) +@pytest.mark.known_ub @xfail_pyarrow # AssertionError: Attributes of DataFrame.iloc[:, 0] are different @pytest.mark.parametrize("exp", [999999999999999999, -999999999999999999]) def test_too_many_exponent_digits(all_parsers_all_precisions, exp, request): diff --git a/pyproject.toml b/pyproject.toml index 8ebd70762b2a5..d191af219ed8f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -518,6 +518,7 @@ markers = [ "clipboard: mark a pd.read_clipboard test", "arm_slow: mark a test as slow for arm64 architecture", "arraymanager: mark a test to run with ArrayManager enabled", + "known_ub: tests known to invoke undefined behavior", ] [tool.mypy] diff --git a/scripts/tests/data/deps_minimum.toml b/scripts/tests/data/deps_minimum.toml index c74ad3d17a4a9..546dc99fc10d4 100644 --- a/scripts/tests/data/deps_minimum.toml +++ b/scripts/tests/data/deps_minimum.toml @@ -383,6 +383,7 @@ markers = [ "clipboard: mark a pd.read_clipboard test", "arm_slow: mark a test as slow for arm64 architecture", "arraymanager: mark a test to run with ArrayManager enabled", + "known_ub: tests that trigger known undefined behavior", ] [tool.mypy] From 6483e07a1aa2545dd189cc59ac6bd438b833f629 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 1 Dec 2023 16:16:26 -0800 Subject: [PATCH 16/43] Finished marking tests with known UB --- pandas/tests/groupby/test_apply.py | 1 + pandas/tests/groupby/test_cumulative.py | 1 + pandas/tests/scalar/timedelta/methods/test_round.py | 2 ++ pandas/tests/scalar/timedelta/test_arithmetic.py | 1 + pandas/tests/scalar/timedelta/test_timedelta.py | 1 + pandas/tests/scalar/timestamp/methods/test_tz_localize.py | 1 + pandas/tests/scalar/timestamp/test_constructors.py | 1 + pandas/tests/tools/test_to_datetime.py | 1 + 8 files changed, 9 insertions(+) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 60b386adb664a..827d8a52dd977 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1391,6 +1391,7 @@ def test_groupby_apply_to_series_name(): tm.assert_series_equal(result, expected) +@pytest.mark.known_ub @pytest.mark.parametrize("dropna", [True, False]) def test_apply_na(dropna): # GH#28984 diff --git a/pandas/tests/groupby/test_cumulative.py b/pandas/tests/groupby/test_cumulative.py index bf572609f3d37..ab5aeee18ba7b 100644 --- a/pandas/tests/groupby/test_cumulative.py +++ b/pandas/tests/groupby/test_cumulative.py @@ -60,6 +60,7 @@ def test_groupby_cumprod(): tm.assert_series_equal(actual, expected) +@pytest.mark.known_ub def test_groupby_cumprod_overflow(): # GH#37493 if we overflow we return garbage consistent with numpy df = DataFrame({"key": ["b"] * 4, "value": 100_000}) diff --git a/pandas/tests/scalar/timedelta/methods/test_round.py b/pandas/tests/scalar/timedelta/methods/test_round.py index 4beb39510413c..524a0b5736cee 100644 --- a/pandas/tests/scalar/timedelta/methods/test_round.py +++ b/pandas/tests/scalar/timedelta/methods/test_round.py @@ -61,6 +61,7 @@ def test_round_invalid(self): with pytest.raises(ValueError, match=msg): t1.round(freq) + @pytest.mark.known_ub def test_round_implementation_bounds(self): # See also: analogous test for Timestamp # GH#38964 @@ -86,6 +87,7 @@ def test_round_implementation_bounds(self): with pytest.raises(OutOfBoundsTimedelta, match=msg): Timedelta.max.round("s") + @pytest.mark.known_ub @given(val=st.integers(min_value=iNaT + 1, max_value=lib.i8max)) @pytest.mark.parametrize( "method", [Timedelta.round, Timedelta.floor, Timedelta.ceil] diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 363ae1fa9c644..3e578db6723a7 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -966,6 +966,7 @@ def test_td_op_timedelta_timedeltalike_array(self, op, arr): class TestTimedeltaComparison: + @pytest.mark.known_ub def test_compare_pytimedelta_bounds(self): # GH#49021 don't overflow on comparison with very large pytimedeltas diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index ac605df935226..94b5a7821c20c 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -551,6 +551,7 @@ def test_timedelta_hash_equality(self): ns_td = Timedelta(1, "ns") assert hash(ns_td) != hash(ns_td.to_pytimedelta()) + @pytest.mark.known_ub @pytest.mark.xfail( reason="pd.Timedelta violates the Python hash invariant (GH#44504).", ) diff --git a/pandas/tests/scalar/timestamp/methods/test_tz_localize.py b/pandas/tests/scalar/timestamp/methods/test_tz_localize.py index 9df0a023730de..8169aa1035cf4 100644 --- a/pandas/tests/scalar/timestamp/methods/test_tz_localize.py +++ b/pandas/tests/scalar/timestamp/methods/test_tz_localize.py @@ -25,6 +25,7 @@ class TestTimestampTZLocalize: + @pytest.mark.known_ub def test_tz_localize_pushes_out_of_bounds(self): # GH#12677 # tz_localize that pushes away from the boundary is OK diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index 91314a497b1fb..c5f1694802930 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -815,6 +815,7 @@ def test_barely_out_of_bounds(self): with pytest.raises(OutOfBoundsDatetime, match=msg): Timestamp("2262-04-11 23:47:16.854775808") + @pytest.mark.known_ub def test_bounds_with_different_units(self): out_of_bounds_dates = ("1677-09-21", "2262-04-12") diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 8139fe52c7037..627e148610348 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1140,6 +1140,7 @@ def test_to_datetime_dt64s_out_of_ns_bounds(self, cache, dt, errors): assert ts.unit == "s" assert ts.asm8 == dt + @pytest.mark.known_ub def test_to_datetime_dt64d_out_of_bounds(self, cache): dt64 = np.datetime64(np.iinfo(np.int64).max, "D") From b87a2102397ba3763703009382e63c111995d756 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 1 Dec 2023 21:14:30 -0800 Subject: [PATCH 17/43] dedicated CI job --- .github/workflows/unit-tests.yml | 47 ++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 9a871ffdb5d12..9d2a97df7114c 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -93,11 +93,6 @@ jobs: env_file: actions-311-numpydev.yaml pattern: "not slow and not network and not single_cpu" test_args: "-W error::DeprecationWarning -W error::FutureWarning" - - name: "ASAN/UBSAN" - env_file: actions-311-numpydev.yaml - pattern: "not slow and not network and not single_cpu and not known_ub" - test_args: "-W error::DeprecationWarning -W error::FutureWarning" - sanitize: true - name: "Pyarrow Nightly" env_file: actions-311-pyarrownightly.yaml pattern: "not slow and not network and not single_cpu" @@ -110,7 +105,6 @@ jobs: PANDAS_COPY_ON_WRITE: ${{ matrix.pandas_copy_on_write || '0' }} PANDAS_CI: ${{ matrix.pandas_ci || '1' }} TEST_ARGS: ${{ matrix.test_args || '' }} - SANITIZE: ${{ matrix.sanitize || false }} PYTEST_WORKERS: 'auto' PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }} # Clipboard tests @@ -180,22 +174,16 @@ jobs: - name: Build Pandas id: build uses: ./.github/actions/build_pandas - with: - sanitize: ${{ env.SANITIZE }} - name: Test (not single_cpu) uses: ./.github/actions/run-tests if: ${{ matrix.name != 'Pypy' }} - with: - sanitize: ${{ env.SANITIZE }} env: # Set pattern to not single_cpu if not already set PATTERN: ${{ env.PATTERN == '' && 'not single_cpu' || matrix.pattern }} - name: Test (single_cpu) uses: ./.github/actions/run-tests - with: - sanitize: true env: PATTERN: 'single_cpu' PYTEST_WORKERS: 0 @@ -317,6 +305,41 @@ jobs: group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-musl cancel-in-progress: true + ASAN/UBSAN: + runs-on: ubuntu-22.04 + timeout-minutes: 90 + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Conda + uses: ./.github/actions/setup-conda + with: + environment-file: ci/deps/actions-311-numpydev.yaml + + - name: Build Pandas + id: build + uses: ./.github/actions/build_pandas + with: + sanitize: true + + - name: Test (not single_cpu) + uses: ./.github/actions/run-tests + with: + sanitize: true + env: + PATTERN: "not slow and not network and not single_cpu and not known_ub" + + - name: Test (single_cpu) + uses: ./.github/actions/run-tests + with: + sanitize: true + env: + PATTERN: 'single_cpu' + PYTEST_WORKERS: 0 + python-dev: # This job may or may not run depending on the state of the next # unreleased Python version. DO NOT DELETE IT. From 46ec0237896fb1a074ed438985b216ce5c5d95b6 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 1 Dec 2023 21:26:21 -0800 Subject: [PATCH 18/43] identifier fix --- .github/workflows/unit-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 9d2a97df7114c..91780c80e5997 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -305,7 +305,7 @@ jobs: group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-musl cancel-in-progress: true - ASAN/UBSAN: + ASAN_UBSAN: runs-on: ubuntu-22.04 timeout-minutes: 90 steps: From 8695dca20815a21b9db00d255bac7d07655e3e9f Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 1 Dec 2023 21:47:57 -0800 Subject: [PATCH 19/43] fixes --- .github/workflows/unit-tests.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 91780c80e5997..65cb0f1389a67 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -331,14 +331,17 @@ jobs: sanitize: true env: PATTERN: "not slow and not network and not single_cpu and not known_ub" + PYTEST_WORKERS: 'auto' + PYTEST_TARGET: 'pandas' - name: Test (single_cpu) uses: ./.github/actions/run-tests with: sanitize: true env: - PATTERN: 'single_cpu' + PATTERN: 'single_cpu and not known_ub' PYTEST_WORKERS: 0 + PYTEST_TARGET: 'pandas' python-dev: # This job may or may not run depending on the state of the next From 05319ae53d5822fca6c1f0edc9b316c58c0f38f2 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 1 Dec 2023 22:28:28 -0800 Subject: [PATCH 20/43] more test skip --- pandas/tests/frame/test_constructors.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 84189f8149d81..f13a492305659 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -3197,6 +3197,7 @@ def test_from_out_of_bounds_ns_datetime( assert item.asm8.dtype == exp_dtype assert dtype == exp_dtype + @pytest.mark.known_ub def test_out_of_s_bounds_datetime64(self, constructor): scalar = np.datetime64(np.iinfo(np.int64).max, "D") result = constructor(scalar) @@ -3232,6 +3233,7 @@ def test_from_out_of_bounds_ns_timedelta( assert item.asm8.dtype == exp_dtype assert dtype == exp_dtype + @pytest.mark.known_ub @pytest.mark.parametrize("cls", [np.datetime64, np.timedelta64]) def test_out_of_s_bounds_timedelta64(self, constructor, cls): scalar = cls(np.iinfo(np.int64).max, "D") From 6d76a5733897a071aec5b36c7d2901f98d4a1c51 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 1 Dec 2023 23:24:35 -0800 Subject: [PATCH 21/43] try quotes --- .github/workflows/unit-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 65cb0f1389a67..5711096b55034 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -339,7 +339,7 @@ jobs: with: sanitize: true env: - PATTERN: 'single_cpu and not known_ub' + PATTERN: "single_cpu and not known_ub" PYTEST_WORKERS: 0 PYTEST_TARGET: 'pandas' From f5dd440058883c39f289836c314a7cce00d5a16a Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 1 Dec 2023 23:33:43 -0800 Subject: [PATCH 22/43] simplify ci --- .github/actions/build_pandas/action.yml | 22 +++++----------------- .github/actions/run-tests/action.yml | 13 +------------ .github/workflows/unit-tests.yml | 12 +++++++----- 3 files changed, 13 insertions(+), 34 deletions(-) diff --git a/.github/actions/build_pandas/action.yml b/.github/actions/build_pandas/action.yml index 273a31bf36e03..db3494db4d35b 100644 --- a/.github/actions/build_pandas/action.yml +++ b/.github/actions/build_pandas/action.yml @@ -4,9 +4,9 @@ inputs: editable: description: Whether to build pandas in editable mode (default true) default: true - sanitize: - description: Whether sanitizers should be used or not - default: false + meson_args: + description: Extra flags to pass to meson + required: false runs: using: composite steps: @@ -28,20 +28,8 @@ runs: - name: Build Pandas run: | if [[ ${{ inputs.editable }} == "true" ]]; then - if [[ ${{ inputs.sanitize }} == "true" ]]; then - CFLAGS="$CFLAGS -fno-sanitize-recover=all" \ - pip install -e . --no-build-isolation -v \ - --config-settings=setup-args="-Db_sanitize=address,undefined" --no-deps - else - pip install -e . --no-build-isolation -v --no-deps - fi + pip install -e . --no-build-isolation -v --no-deps ${{ inputs.meson_args }} else - if [[ ${{ inputs.sanitize }} == "true" ]]; then - CFLAGS="$CFLAGS -fno-sanitize-recover=all" \ - pip install . --no-build-isolation -v \ - --config-settings=setup-args="-Db_sanitize=address,undefined" --no-deps - else - pip install . --no-build-isolation -v --no-deps - fi + pip install . --no-build-isolation -v --no-deps ${{ inputs.meson_args }} fi shell: bash -el {0} diff --git a/.github/actions/run-tests/action.yml b/.github/actions/run-tests/action.yml index b66adf18a4847..fd7c3587f2254 100644 --- a/.github/actions/run-tests/action.yml +++ b/.github/actions/run-tests/action.yml @@ -1,20 +1,9 @@ name: Run tests and report results -inputs: - sanitize: - description: Whether sanitizers should be used or not - default: false runs: using: composite steps: - name: Test - run: | - if [[ ${{ inputs.sanitize }} == "true" ]]; then - ASAN_OPTIONS=detect_leaks=0 \ - LD_PRELOAD=$(gcc -print-file-name=libasan.so) \ - ci/run_tests.sh - else - ci/run_tests.sh - fi + run: ci/run_tests.sh shell: bash -el {0} - name: Publish test results diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 5711096b55034..cf558fed7f2b4 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -323,25 +323,27 @@ jobs: id: build uses: ./.github/actions/build_pandas with: - sanitize: true + meson_args: --config-settings=setup-args="-Db_sanitize=address,undefined" + env: + CFLAGS="$CFLAGS -fno-sanitize-recover=all" - name: Test (not single_cpu) uses: ./.github/actions/run-tests - with: - sanitize: true env: PATTERN: "not slow and not network and not single_cpu and not known_ub" PYTEST_WORKERS: 'auto' PYTEST_TARGET: 'pandas' + ASAN_OPTIONS: detect_leaks=0 + LD_PRELOAD: $(gcc -print-file-name=libasan.so) - name: Test (single_cpu) uses: ./.github/actions/run-tests - with: - sanitize: true env: PATTERN: "single_cpu and not known_ub" PYTEST_WORKERS: 0 PYTEST_TARGET: 'pandas' + ASAN_OPTIONS: detect_leaks=0 + LD_PRELOAD: $(gcc -print-file-name=libasan.so) python-dev: # This job may or may not run depending on the state of the next From 12aa1d11346fdc7916b158c09681675f4f708d50 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 1 Dec 2023 23:43:41 -0800 Subject: [PATCH 23/43] try CFLAGS --- .github/actions/build_pandas/action.yml | 4 ++++ .github/workflows/unit-tests.yml | 3 +-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/actions/build_pandas/action.yml b/.github/actions/build_pandas/action.yml index db3494db4d35b..7b3ddd79263d3 100644 --- a/.github/actions/build_pandas/action.yml +++ b/.github/actions/build_pandas/action.yml @@ -7,6 +7,9 @@ inputs: meson_args: description: Extra flags to pass to meson required: false + cflags_adds: + description: Items to append to the CFLAGS variable + required: false runs: using: composite steps: @@ -27,6 +30,7 @@ runs: - name: Build Pandas run: | + export CFLAGS="$CFLAGS ${{ inputs.cflags_adds }}" if [[ ${{ inputs.editable }} == "true" ]]; then pip install -e . --no-build-isolation -v --no-deps ${{ inputs.meson_args }} else diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index cf558fed7f2b4..2d292faefeac1 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -324,8 +324,7 @@ jobs: uses: ./.github/actions/build_pandas with: meson_args: --config-settings=setup-args="-Db_sanitize=address,undefined" - env: - CFLAGS="$CFLAGS -fno-sanitize-recover=all" + cflags_adds: -fno-sanitize-recover=all - name: Test (not single_cpu) uses: ./.github/actions/run-tests From 628d1c2754773a92531f0df0268742f6a9136d16 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sat, 2 Dec 2023 00:34:50 -0800 Subject: [PATCH 24/43] preload args --- .github/actions/run-tests/action.yml | 6 +++++- .github/workflows/unit-tests.yml | 10 ++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/.github/actions/run-tests/action.yml b/.github/actions/run-tests/action.yml index fd7c3587f2254..02a400ab23263 100644 --- a/.github/actions/run-tests/action.yml +++ b/.github/actions/run-tests/action.yml @@ -1,9 +1,13 @@ name: Run tests and report results +inputs: + preload: + description: Preload arguments for sanitizer + required: false runs: using: composite steps: - name: Test - run: ci/run_tests.sh + run: ${{ inputs.preload }} ci/run_tests.sh shell: bash -el {0} - name: Publish test results diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 2d292faefeac1..c5197ec745b94 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -328,21 +328,23 @@ jobs: - name: Test (not single_cpu) uses: ./.github/actions/run-tests + with: + preload: LD_PRELOAD=$(gcc -print-file-name=libasan.so) env: PATTERN: "not slow and not network and not single_cpu and not known_ub" PYTEST_WORKERS: 'auto' PYTEST_TARGET: 'pandas' - ASAN_OPTIONS: detect_leaks=0 - LD_PRELOAD: $(gcc -print-file-name=libasan.so) + ASAN_OPTIONS: detect_leaks=0 # leak detection in Python not yet working - name: Test (single_cpu) uses: ./.github/actions/run-tests + with: + preload: LD_PRELOAD=$(gcc -print-file-name=libasan.so) env: PATTERN: "single_cpu and not known_ub" PYTEST_WORKERS: 0 PYTEST_TARGET: 'pandas' - ASAN_OPTIONS: detect_leaks=0 - LD_PRELOAD: $(gcc -print-file-name=libasan.so) + ASAN_OPTIONS: detect_leaks=0 # leak detection in Python not yet working python-dev: # This job may or may not run depending on the state of the next From 1de633ee28ab2e419574c99e449957d7a7aa00ba Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sat, 2 Dec 2023 10:02:56 -0800 Subject: [PATCH 25/43] skip single_cpu tests --- .github/workflows/unit-tests.yml | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index c5197ec745b94..43f3877ae0940 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -336,15 +336,16 @@ jobs: PYTEST_TARGET: 'pandas' ASAN_OPTIONS: detect_leaks=0 # leak detection in Python not yet working - - name: Test (single_cpu) - uses: ./.github/actions/run-tests - with: - preload: LD_PRELOAD=$(gcc -print-file-name=libasan.so) - env: - PATTERN: "single_cpu and not known_ub" - PYTEST_WORKERS: 0 - PYTEST_TARGET: 'pandas' - ASAN_OPTIONS: detect_leaks=0 # leak detection in Python not yet working + # Skipping single CPU tests for now - see pytest GH issue 641 for error + #- name: Test (single_cpu) + # uses: ./.github/actions/run-tests + # with: + # preload: LD_PRELOAD=$(gcc -print-file-name=libasan.so) + # env: + # PATTERN: "single_cpu and not known_ub" + # PYTEST_WORKERS: 0 + # PYTEST_TARGET: 'pandas' + # ASAN_OPTIONS: detect_leaks=0 # leak detection in Python not yet working python-dev: # This job may or may not run depending on the state of the next From 3e295c56a4c3233fe2b9d525f3e94931d5ad3e83 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sat, 2 Dec 2023 10:04:48 -0800 Subject: [PATCH 26/43] wording --- scripts/tests/data/deps_minimum.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/tests/data/deps_minimum.toml b/scripts/tests/data/deps_minimum.toml index 3230359f0decf..c5eed77928271 100644 --- a/scripts/tests/data/deps_minimum.toml +++ b/scripts/tests/data/deps_minimum.toml @@ -383,7 +383,7 @@ markers = [ "clipboard: mark a pd.read_clipboard test", "arm_slow: mark a test as slow for arm64 architecture", "arraymanager: mark a test to run with ArrayManager enabled", - "known_ub: tests that trigger known undefined behavior", + "known_ub: tests known to invoke undefined behavior", ] [tool.mypy] From d5809b82ca74537c4b2f39e260c7ccd31d4c12d3 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 4 Dec 2023 16:36:48 -0800 Subject: [PATCH 27/43] removed unneeded marker --- pandas/tests/groupby/test_apply.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 827d8a52dd977..60b386adb664a 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1391,7 +1391,6 @@ def test_groupby_apply_to_series_name(): tm.assert_series_equal(result, expected) -@pytest.mark.known_ub @pytest.mark.parametrize("dropna", [True, False]) def test_apply_na(dropna): # GH#28984 From 6266422371022ba227ae01634c40090e26635784 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 4 Dec 2023 16:43:05 -0800 Subject: [PATCH 28/43] float set implementations --- .../_libs/include/pandas/vendored/klib/khash_python.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/_libs/include/pandas/vendored/klib/khash_python.h b/pandas/_libs/include/pandas/vendored/klib/khash_python.h index 5a933b45d9e21..78528e27296fe 100644 --- a/pandas/_libs/include/pandas/vendored/klib/khash_python.h +++ b/pandas/_libs/include/pandas/vendored/klib/khash_python.h @@ -130,13 +130,23 @@ static inline khuint32_t kh_float32_hash_func(float val) { KHASH_INIT(name, khfloat64_t, khval_t, 1, kh_float64_hash_func, \ kh_floats_hash_equal) +#define KHASH_SET_INIT_FLOAT64(name) \ + KHASH_INIT(name, khfloat64_t, char, 0, kh_float64_hash_func, \ + kh_floats_hash_equal) + KHASH_MAP_INIT_FLOAT64(float64, size_t) +KHASH_SET_INIT_FLOAT64(float64_set) #define KHASH_MAP_INIT_FLOAT32(name, khval_t) \ KHASH_INIT(name, khfloat32_t, khval_t, 1, kh_float32_hash_func, \ kh_floats_hash_equal) +#define KHASH_SET_INIT_FLOAT32(name) \ + KHASH_INIT(name, khfloat32_t, char, 0, kh_float32_hash_func, \ + kh_floats_hash_equal) + KHASH_MAP_INIT_FLOAT32(float32, size_t) +KHASH_SET_INIT_FLOAT32(float32_set) static inline khint32_t kh_complex128_hash_func(khcomplex128_t val) { return kh_float64_hash_func(val.real) ^ kh_float64_hash_func(val.imag); From b68a533a7f642a9dd2de16e29cd0266a928effe8 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 4 Dec 2023 17:07:50 -0800 Subject: [PATCH 29/43] Revert "float set implementations" This reverts commit 6266422371022ba227ae01634c40090e26635784. --- .../_libs/include/pandas/vendored/klib/khash_python.h | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/pandas/_libs/include/pandas/vendored/klib/khash_python.h b/pandas/_libs/include/pandas/vendored/klib/khash_python.h index 78528e27296fe..5a933b45d9e21 100644 --- a/pandas/_libs/include/pandas/vendored/klib/khash_python.h +++ b/pandas/_libs/include/pandas/vendored/klib/khash_python.h @@ -130,23 +130,13 @@ static inline khuint32_t kh_float32_hash_func(float val) { KHASH_INIT(name, khfloat64_t, khval_t, 1, kh_float64_hash_func, \ kh_floats_hash_equal) -#define KHASH_SET_INIT_FLOAT64(name) \ - KHASH_INIT(name, khfloat64_t, char, 0, kh_float64_hash_func, \ - kh_floats_hash_equal) - KHASH_MAP_INIT_FLOAT64(float64, size_t) -KHASH_SET_INIT_FLOAT64(float64_set) #define KHASH_MAP_INIT_FLOAT32(name, khval_t) \ KHASH_INIT(name, khfloat32_t, khval_t, 1, kh_float32_hash_func, \ kh_floats_hash_equal) -#define KHASH_SET_INIT_FLOAT32(name) \ - KHASH_INIT(name, khfloat32_t, char, 0, kh_float32_hash_func, \ - kh_floats_hash_equal) - KHASH_MAP_INIT_FLOAT32(float32, size_t) -KHASH_SET_INIT_FLOAT32(float32_set) static inline khint32_t kh_complex128_hash_func(khcomplex128_t val) { return kh_float64_hash_func(val.real) ^ kh_float64_hash_func(val.imag); From a03ad1e84f52646a8f0ef4144fa591d45263b03c Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 15 Dec 2023 17:17:16 -0500 Subject: [PATCH 30/43] change marker name --- .github/workflows/unit-tests.yml | 4 ++-- pandas/tests/frame/test_constructors.py | 4 ++-- pandas/tests/groupby/test_cumulative.py | 2 +- pandas/tests/io/parser/common/test_float.py | 4 ++-- pandas/tests/scalar/timedelta/methods/test_round.py | 4 ++-- pandas/tests/scalar/timedelta/test_arithmetic.py | 2 +- pandas/tests/scalar/timedelta/test_timedelta.py | 2 +- pandas/tests/scalar/timestamp/methods/test_tz_localize.py | 2 +- pandas/tests/scalar/timestamp/test_constructors.py | 2 +- pandas/tests/tools/test_to_datetime.py | 2 +- pyproject.toml | 2 +- scripts/tests/data/deps_minimum.toml | 2 +- 12 files changed, 16 insertions(+), 16 deletions(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 7367d57cc8066..8d49a8a3f8e50 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -331,7 +331,7 @@ jobs: with: preload: LD_PRELOAD=$(gcc -print-file-name=libasan.so) env: - PATTERN: "not slow and not network and not single_cpu and not known_ub" + PATTERN: "not slow and not network and not single_cpu and not skip_ubsan" PYTEST_WORKERS: 'auto' PYTEST_TARGET: 'pandas' ASAN_OPTIONS: detect_leaks=0 # leak detection in Python not yet working @@ -342,7 +342,7 @@ jobs: # with: # preload: LD_PRELOAD=$(gcc -print-file-name=libasan.so) # env: - # PATTERN: "single_cpu and not known_ub" + # PATTERN: "single_cpu and not skip_ubsan" # PYTEST_WORKERS: 0 # PYTEST_TARGET: 'pandas' # ASAN_OPTIONS: detect_leaks=0 # leak detection in Python not yet working diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index aebeff88730b9..f64d6a886fe9f 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -3206,7 +3206,7 @@ def test_from_out_of_bounds_ns_datetime( assert item.asm8.dtype == exp_dtype assert dtype == exp_dtype - @pytest.mark.known_ub + @pytest.mark.skip_ubsan def test_out_of_s_bounds_datetime64(self, constructor): scalar = np.datetime64(np.iinfo(np.int64).max, "D") result = constructor(scalar) @@ -3242,7 +3242,7 @@ def test_from_out_of_bounds_ns_timedelta( assert item.asm8.dtype == exp_dtype assert dtype == exp_dtype - @pytest.mark.known_ub + @pytest.mark.skip_ubsan @pytest.mark.parametrize("cls", [np.datetime64, np.timedelta64]) def test_out_of_s_bounds_timedelta64(self, constructor, cls): scalar = cls(np.iinfo(np.int64).max, "D") diff --git a/pandas/tests/groupby/test_cumulative.py b/pandas/tests/groupby/test_cumulative.py index ab5aeee18ba7b..1bdbef6d50c4c 100644 --- a/pandas/tests/groupby/test_cumulative.py +++ b/pandas/tests/groupby/test_cumulative.py @@ -60,7 +60,7 @@ def test_groupby_cumprod(): tm.assert_series_equal(actual, expected) -@pytest.mark.known_ub +@pytest.mark.skip_ubsan def test_groupby_cumprod_overflow(): # GH#37493 if we overflow we return garbage consistent with numpy df = DataFrame({"key": ["b"] * 4, "value": 100_000}) diff --git a/pandas/tests/io/parser/common/test_float.py b/pandas/tests/io/parser/common/test_float.py index 6ed32427bebf9..6069c23936297 100644 --- a/pandas/tests/io/parser/common/test_float.py +++ b/pandas/tests/io/parser/common/test_float.py @@ -45,7 +45,7 @@ def test_scientific_no_exponent(all_parsers_all_precisions): [ -617, -100000, - pytest.param(-99999999999999999, marks=pytest.mark.known_ub), + pytest.param(-99999999999999999, marks=pytest.mark.skip_ubsan), ], ) def test_very_negative_exponent(all_parsers_all_precisions, neg_exp): @@ -58,7 +58,7 @@ def test_very_negative_exponent(all_parsers_all_precisions, neg_exp): tm.assert_frame_equal(result, expected) -@pytest.mark.known_ub +@pytest.mark.skip_ubsan @xfail_pyarrow # AssertionError: Attributes of DataFrame.iloc[:, 0] are different @pytest.mark.parametrize("exp", [999999999999999999, -999999999999999999]) def test_too_many_exponent_digits(all_parsers_all_precisions, exp, request): diff --git a/pandas/tests/scalar/timedelta/methods/test_round.py b/pandas/tests/scalar/timedelta/methods/test_round.py index 524a0b5736cee..e54adb27d126b 100644 --- a/pandas/tests/scalar/timedelta/methods/test_round.py +++ b/pandas/tests/scalar/timedelta/methods/test_round.py @@ -61,7 +61,7 @@ def test_round_invalid(self): with pytest.raises(ValueError, match=msg): t1.round(freq) - @pytest.mark.known_ub + @pytest.mark.skip_ubsan def test_round_implementation_bounds(self): # See also: analogous test for Timestamp # GH#38964 @@ -87,7 +87,7 @@ def test_round_implementation_bounds(self): with pytest.raises(OutOfBoundsTimedelta, match=msg): Timedelta.max.round("s") - @pytest.mark.known_ub + @pytest.mark.skip_ubsan @given(val=st.integers(min_value=iNaT + 1, max_value=lib.i8max)) @pytest.mark.parametrize( "method", [Timedelta.round, Timedelta.floor, Timedelta.ceil] diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 3e578db6723a7..a5d786cfba427 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -966,7 +966,7 @@ def test_td_op_timedelta_timedeltalike_array(self, op, arr): class TestTimedeltaComparison: - @pytest.mark.known_ub + @pytest.mark.skip_ubsan def test_compare_pytimedelta_bounds(self): # GH#49021 don't overflow on comparison with very large pytimedeltas diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 94b5a7821c20c..d4398f66e6f89 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -551,7 +551,7 @@ def test_timedelta_hash_equality(self): ns_td = Timedelta(1, "ns") assert hash(ns_td) != hash(ns_td.to_pytimedelta()) - @pytest.mark.known_ub + @pytest.mark.skip_ubsan @pytest.mark.xfail( reason="pd.Timedelta violates the Python hash invariant (GH#44504).", ) diff --git a/pandas/tests/scalar/timestamp/methods/test_tz_localize.py b/pandas/tests/scalar/timestamp/methods/test_tz_localize.py index 8169aa1035cf4..af3dee1880d2e 100644 --- a/pandas/tests/scalar/timestamp/methods/test_tz_localize.py +++ b/pandas/tests/scalar/timestamp/methods/test_tz_localize.py @@ -25,7 +25,7 @@ class TestTimestampTZLocalize: - @pytest.mark.known_ub + @pytest.mark.skip_ubsan def test_tz_localize_pushes_out_of_bounds(self): # GH#12677 # tz_localize that pushes away from the boundary is OK diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index d229d59335476..3975f3c46aaa1 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -822,7 +822,7 @@ def test_barely_out_of_bounds(self): with pytest.raises(OutOfBoundsDatetime, match=msg): Timestamp("2262-04-11 23:47:16.854775808") - @pytest.mark.known_ub + @pytest.mark.skip_ubsan def test_bounds_with_different_units(self): out_of_bounds_dates = ("1677-09-21", "2262-04-12") diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index ed0af5c2144e8..368a50022b1b7 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1140,7 +1140,7 @@ def test_to_datetime_dt64s_out_of_ns_bounds(self, cache, dt, errors): assert ts.unit == "s" assert ts.asm8 == dt - @pytest.mark.known_ub + @pytest.mark.skip_ubsan def test_to_datetime_dt64d_out_of_bounds(self, cache): dt64 = np.datetime64(np.iinfo(np.int64).max, "D") diff --git a/pyproject.toml b/pyproject.toml index 0365f21830f2e..ea6c801f241ba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -524,7 +524,7 @@ markers = [ "clipboard: mark a pd.read_clipboard test", "arm_slow: mark a test as slow for arm64 architecture", "arraymanager: mark a test to run with ArrayManager enabled", - "known_ub: tests known to invoke undefined behavior", + "skip_ubsan: Tests known to fail UBSAN check", ] [tool.mypy] diff --git a/scripts/tests/data/deps_minimum.toml b/scripts/tests/data/deps_minimum.toml index c5eed77928271..968b3b170f38a 100644 --- a/scripts/tests/data/deps_minimum.toml +++ b/scripts/tests/data/deps_minimum.toml @@ -383,7 +383,7 @@ markers = [ "clipboard: mark a pd.read_clipboard test", "arm_slow: mark a test as slow for arm64 architecture", "arraymanager: mark a test to run with ArrayManager enabled", - "known_ub: tests known to invoke undefined behavior", + "skip_ubsan: tests known to invoke undefined behavior", ] [tool.mypy] From 656edb1d2d75cc77cd4f7a6c45fef15fc9814e57 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 15 Dec 2023 17:21:58 -0500 Subject: [PATCH 31/43] dedicated actions file --- .github/workflows/unit-tests.yml | 21 ++++++++++---------- ci/deps/actions-311-sanitizers.yaml | 30 +++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 11 deletions(-) create mode 100644 ci/deps/actions-311-sanitizers.yaml diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 8d49a8a3f8e50..d585d61e2f082 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -317,7 +317,7 @@ jobs: - name: Set up Conda uses: ./.github/actions/setup-conda with: - environment-file: ci/deps/actions-311-numpydev.yaml + environment-file: ci/deps/actions-311-sanitizers.yaml - name: Build Pandas id: build @@ -336,16 +336,15 @@ jobs: PYTEST_TARGET: 'pandas' ASAN_OPTIONS: detect_leaks=0 # leak detection in Python not yet working - # Skipping single CPU tests for now - see pytest GH issue 641 for error - #- name: Test (single_cpu) - # uses: ./.github/actions/run-tests - # with: - # preload: LD_PRELOAD=$(gcc -print-file-name=libasan.so) - # env: - # PATTERN: "single_cpu and not skip_ubsan" - # PYTEST_WORKERS: 0 - # PYTEST_TARGET: 'pandas' - # ASAN_OPTIONS: detect_leaks=0 # leak detection in Python not yet working + - name: Test (single_cpu) + uses: ./.github/actions/run-tests + with: + preload: LD_PRELOAD=$(gcc -print-file-name=libasan.so) + env: + PATTERN: "single_cpu and not skip_ubsan" + PYTEST_WORKERS: 0 + PYTEST_TARGET: 'pandas' + ASAN_OPTIONS: detect_leaks=0 # leak detection in Python not yet working python-dev: # This job may or may not run depending on the state of the next diff --git a/ci/deps/actions-311-sanitizers.yaml b/ci/deps/actions-311-sanitizers.yaml new file mode 100644 index 0000000000000..4744d622fc687 --- /dev/null +++ b/ci/deps/actions-311-sanitizers.yaml @@ -0,0 +1,30 @@ +name: pandas-dev +channels: + - conda-forge +dependencies: + - python=3.11 + + # build dependencies + - versioneer[toml] + - cython>=0.29.33 + - meson[ninja]=1.2.1 + - meson-python=0.13.1 + + # test dependencies + - pytest>=7.3.2 + - pytest-cov + - pytest-xdist>=2.2.0 + - pytest-localserver>=0.7.1 + - pytest-qt>=4.2.0 + - boto3 + + # required dependencies + - python-dateutil + - numpy<2 + - pytz + + # pandas dependencies + - pip + + - pip: + - "tzdata>=2022.7" From 2aabda1e4fa96e187fbb0acd3649e296d370f307 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 15 Dec 2023 17:29:22 -0500 Subject: [PATCH 32/43] consolidated into matrix --- .github/actions/run-tests/action.yml | 5 ++- .github/workflows/unit-tests.yml | 57 ++++++++-------------------- 2 files changed, 20 insertions(+), 42 deletions(-) diff --git a/.github/actions/run-tests/action.yml b/.github/actions/run-tests/action.yml index 02a400ab23263..b4778b74df335 100644 --- a/.github/actions/run-tests/action.yml +++ b/.github/actions/run-tests/action.yml @@ -3,11 +3,14 @@ inputs: preload: description: Preload arguments for sanitizer required: false + asan_options: + description: Arguments for Address Sanitizer (ASAN) + required: false runs: using: composite steps: - name: Test - run: ${{ inputs.preload }} ci/run_tests.sh + run: ${{ inputs.asan_options }} ${{ inputs.preload }} ci/run_tests.sh shell: bash -el {0} - name: Publish test results diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index d585d61e2f082..5b26bdb669744 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -96,6 +96,13 @@ jobs: - name: "Pyarrow Nightly" env_file: actions-311-pyarrownightly.yaml pattern: "not slow and not network and not single_cpu" + - name: "ASAN / UBSAN" + env_file: actions-311-sanitizers.yaml + pattern: "not slow and not network and not single_cpu and not skip_ubsan" + asan_options="detect_leaks=0" + preload: LD_PRELOAD=$(gcc -print-file-name=libasan.so) + meson_args: --config-settings=setup-args="-Db_sanitize=address,undefined" + cflags_adds: -fno-sanitize-recover=all fail-fast: false name: ${{ matrix.name || format('ubuntu-latest {0}', matrix.env_file) }} env: @@ -174,16 +181,25 @@ jobs: - name: Build Pandas id: build uses: ./.github/actions/build_pandas + with: + meson_args: --config-settings=setup-args="-Db_sanitize=address,undefined" + cflags_adds: -fno-sanitize-recover=all - name: Test (not single_cpu) uses: ./.github/actions/run-tests if: ${{ matrix.name != 'Pypy' }} + with: + preload: ${{ matrix.preload }} + asan_options: ${{ matrix.asan_options }} env: # Set pattern to not single_cpu if not already set PATTERN: ${{ env.PATTERN == '' && 'not single_cpu' || matrix.pattern }} - name: Test (single_cpu) uses: ./.github/actions/run-tests + with: + preload: ${{ matrix.preload }} + asan_options: ${{ matrix.asan_options }} env: PATTERN: 'single_cpu' PYTEST_WORKERS: 0 @@ -305,47 +321,6 @@ jobs: group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-musl cancel-in-progress: true - ASAN_UBSAN: - runs-on: ubuntu-22.04 - timeout-minutes: 90 - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Set up Conda - uses: ./.github/actions/setup-conda - with: - environment-file: ci/deps/actions-311-sanitizers.yaml - - - name: Build Pandas - id: build - uses: ./.github/actions/build_pandas - with: - meson_args: --config-settings=setup-args="-Db_sanitize=address,undefined" - cflags_adds: -fno-sanitize-recover=all - - - name: Test (not single_cpu) - uses: ./.github/actions/run-tests - with: - preload: LD_PRELOAD=$(gcc -print-file-name=libasan.so) - env: - PATTERN: "not slow and not network and not single_cpu and not skip_ubsan" - PYTEST_WORKERS: 'auto' - PYTEST_TARGET: 'pandas' - ASAN_OPTIONS: detect_leaks=0 # leak detection in Python not yet working - - - name: Test (single_cpu) - uses: ./.github/actions/run-tests - with: - preload: LD_PRELOAD=$(gcc -print-file-name=libasan.so) - env: - PATTERN: "single_cpu and not skip_ubsan" - PYTEST_WORKERS: 0 - PYTEST_TARGET: 'pandas' - ASAN_OPTIONS: detect_leaks=0 # leak detection in Python not yet working - python-dev: # This job may or may not run depending on the state of the next # unreleased Python version. DO NOT DELETE IT. From 3056e5f682b194feadc78f5c7f96008c6ca363af Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 15 Dec 2023 17:41:04 -0500 Subject: [PATCH 33/43] fixup --- .github/workflows/unit-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 5b26bdb669744..5d3e407246420 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -99,7 +99,7 @@ jobs: - name: "ASAN / UBSAN" env_file: actions-311-sanitizers.yaml pattern: "not slow and not network and not single_cpu and not skip_ubsan" - asan_options="detect_leaks=0" + asan_options: "ASAN_OPTIONS=detect_leaks=0" preload: LD_PRELOAD=$(gcc -print-file-name=libasan.so) meson_args: --config-settings=setup-args="-Db_sanitize=address,undefined" cflags_adds: -fno-sanitize-recover=all From 89b2b80086859f46dd0d7f2f4f1bb40031a4e754 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 15 Dec 2023 18:00:40 -0500 Subject: [PATCH 34/43] typos --- .github/workflows/unit-tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 5d3e407246420..829c1b6b18063 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -182,8 +182,8 @@ jobs: id: build uses: ./.github/actions/build_pandas with: - meson_args: --config-settings=setup-args="-Db_sanitize=address,undefined" - cflags_adds: -fno-sanitize-recover=all + meson_args: ${{ matrix.meson_args }} + cflags_adds: ${{ matrix.cflags_adds }} - name: Test (not single_cpu) uses: ./.github/actions/run-tests From d591b78ddd98e2f8212e35a604e2d76bbb49caca Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 15 Dec 2023 20:42:28 -0500 Subject: [PATCH 35/43] fixups --- .github/workflows/unit-tests.yml | 2 +- ci/deps/actions-311-sanitizers.yaml | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 829c1b6b18063..6854f7a924066 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -190,7 +190,7 @@ jobs: if: ${{ matrix.name != 'Pypy' }} with: preload: ${{ matrix.preload }} - asan_options: ${{ matrix.asan_options }} + asan_options: ${{ matrix.asan_options }} env: # Set pattern to not single_cpu if not already set PATTERN: ${{ env.PATTERN == '' && 'not single_cpu' || matrix.pattern }} diff --git a/ci/deps/actions-311-sanitizers.yaml b/ci/deps/actions-311-sanitizers.yaml index 4744d622fc687..18e7a360a56c9 100644 --- a/ci/deps/actions-311-sanitizers.yaml +++ b/ci/deps/actions-311-sanitizers.yaml @@ -6,7 +6,7 @@ dependencies: # build dependencies - versioneer[toml] - - cython>=0.29.33 + - cython>=0.29.33 - meson[ninja]=1.2.1 - meson-python=0.13.1 @@ -17,6 +17,7 @@ dependencies: - pytest-localserver>=0.7.1 - pytest-qt>=4.2.0 - boto3 + - hypothesis>=6.46.1 # required dependencies - python-dateutil From 644206637ea54b82d468d45e79ddeae96fb5013d Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 15 Dec 2023 22:40:31 -0500 Subject: [PATCH 36/43] add qt? --- ci/deps/actions-311-sanitizers.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/deps/actions-311-sanitizers.yaml b/ci/deps/actions-311-sanitizers.yaml index 18e7a360a56c9..dcd381066b0ea 100644 --- a/ci/deps/actions-311-sanitizers.yaml +++ b/ci/deps/actions-311-sanitizers.yaml @@ -18,6 +18,7 @@ dependencies: - pytest-qt>=4.2.0 - boto3 - hypothesis>=6.46.1 + - pyqt>=5.15.9 # required dependencies - python-dateutil From 02bf20daaa306ce84214e56173abb14f9923f4a4 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 19 Dec 2023 15:40:20 -0500 Subject: [PATCH 37/43] intentional UB with verbose --- .github/workflows/unit-tests.yml | 1 + pandas/_libs/src/vendored/ujson/python/objToJSON.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 6854f7a924066..4c5839a035261 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -103,6 +103,7 @@ jobs: preload: LD_PRELOAD=$(gcc -print-file-name=libasan.so) meson_args: --config-settings=setup-args="-Db_sanitize=address,undefined" cflags_adds: -fno-sanitize-recover=all + test_args: "-v" fail-fast: false name: ${{ matrix.name || format('ubuntu-latest {0}', matrix.env_file) }} env: diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON.c b/pandas/_libs/src/vendored/ujson/python/objToJSON.c index 8bba95dd456de..98b8a44949d60 100644 --- a/pandas/_libs/src/vendored/ujson/python/objToJSON.c +++ b/pandas/_libs/src/vendored/ujson/python/objToJSON.c @@ -2062,5 +2062,8 @@ PyObject *objToJSON(PyObject *Py_UNUSED(self), PyObject *args, encoder->free(ret); } + int myvar = INT_MAX; + printf("This value should overflow: %d\n", myvar + 1); + return newobj; } From 01070f35dad1bcc7f5345976fb2f58a5c3575b25 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 20 Dec 2023 18:00:32 -0500 Subject: [PATCH 38/43] disable pytest-xdist --- .github/workflows/unit-tests.yml | 4 ++-- pandas/_libs/src/vendored/ujson/python/objToJSON.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 4c5839a035261..57f9893d36044 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -103,7 +103,7 @@ jobs: preload: LD_PRELOAD=$(gcc -print-file-name=libasan.so) meson_args: --config-settings=setup-args="-Db_sanitize=address,undefined" cflags_adds: -fno-sanitize-recover=all - test_args: "-v" + pytest_workers: -1 # disable pytest-xdist as it swallows stderr from ASAN fail-fast: false name: ${{ matrix.name || format('ubuntu-latest {0}', matrix.env_file) }} env: @@ -113,7 +113,7 @@ jobs: PANDAS_COPY_ON_WRITE: ${{ matrix.pandas_copy_on_write || '0' }} PANDAS_CI: ${{ matrix.pandas_ci || '1' }} TEST_ARGS: ${{ matrix.test_args || '' }} - PYTEST_WORKERS: 'auto' + PYTEST_WORKERS: ${{ matrix.pytest_workers || 'auto' }} PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }} # Clipboard tests QT_QPA_PLATFORM: offscreen diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON.c b/pandas/_libs/src/vendored/ujson/python/objToJSON.c index 98b8a44949d60..a54c6bccab43a 100644 --- a/pandas/_libs/src/vendored/ujson/python/objToJSON.c +++ b/pandas/_libs/src/vendored/ujson/python/objToJSON.c @@ -2062,8 +2062,8 @@ PyObject *objToJSON(PyObject *Py_UNUSED(self), PyObject *args, encoder->free(ret); } - int myvar = INT_MAX; - printf("This value should overflow: %d\n", myvar + 1); + int someints[2] = {0, 1}; + printf("the OOB value is: %d\n", someints[2]); return newobj; } From 57ed2863ea68686abb617d29a580cf166079ac61 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 20 Dec 2023 18:12:00 -0500 Subject: [PATCH 39/43] original issue --- pandas/_libs/src/vendored/ujson/python/objToJSON.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON.c b/pandas/_libs/src/vendored/ujson/python/objToJSON.c index a54c6bccab43a..98b8a44949d60 100644 --- a/pandas/_libs/src/vendored/ujson/python/objToJSON.c +++ b/pandas/_libs/src/vendored/ujson/python/objToJSON.c @@ -2062,8 +2062,8 @@ PyObject *objToJSON(PyObject *Py_UNUSED(self), PyObject *args, encoder->free(ret); } - int someints[2] = {0, 1}; - printf("the OOB value is: %d\n", someints[2]); + int myvar = INT_MAX; + printf("This value should overflow: %d\n", myvar + 1); return newobj; } From 677da0e4bce131d6992c320920885eded2a5d796 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 20 Dec 2023 18:45:37 -0500 Subject: [PATCH 40/43] remove UB --- pandas/_libs/src/vendored/ujson/python/objToJSON.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON.c b/pandas/_libs/src/vendored/ujson/python/objToJSON.c index 98b8a44949d60..8bba95dd456de 100644 --- a/pandas/_libs/src/vendored/ujson/python/objToJSON.c +++ b/pandas/_libs/src/vendored/ujson/python/objToJSON.c @@ -2062,8 +2062,5 @@ PyObject *objToJSON(PyObject *Py_UNUSED(self), PyObject *args, encoder->free(ret); } - int myvar = INT_MAX; - printf("This value should overflow: %d\n", myvar + 1); - return newobj; } From af0150a8ec5058219a3628bbfb950bca21fceb8c Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 20 Dec 2023 20:57:31 -0500 Subject: [PATCH 41/43] Revert "remove UB" This reverts commit 677da0e4bce131d6992c320920885eded2a5d796. --- pandas/_libs/src/vendored/ujson/python/objToJSON.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON.c b/pandas/_libs/src/vendored/ujson/python/objToJSON.c index 8bba95dd456de..98b8a44949d60 100644 --- a/pandas/_libs/src/vendored/ujson/python/objToJSON.c +++ b/pandas/_libs/src/vendored/ujson/python/objToJSON.c @@ -2062,5 +2062,8 @@ PyObject *objToJSON(PyObject *Py_UNUSED(self), PyObject *args, encoder->free(ret); } + int myvar = INT_MAX; + printf("This value should overflow: %d\n", myvar + 1); + return newobj; } From 4647f123a3b477bb64f5261f90b1a8874dae712e Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 20 Dec 2023 22:11:08 -0500 Subject: [PATCH 42/43] merge fixup --- pyproject.toml | 1 - scripts/tests/data/deps_minimum.toml | 1 - 2 files changed, 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index edee426e20b2a..ca19f463edf40 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -523,7 +523,6 @@ markers = [ "db: tests requiring a database (mysql or postgres)", "clipboard: mark a pd.read_clipboard test", "arm_slow: mark a test as slow for arm64 architecture", - "arraymanager: mark a test to run with ArrayManager enabled", "skip_ubsan: Tests known to fail UBSAN check", ] diff --git a/scripts/tests/data/deps_minimum.toml b/scripts/tests/data/deps_minimum.toml index 968b3b170f38a..3be6be17d1ee2 100644 --- a/scripts/tests/data/deps_minimum.toml +++ b/scripts/tests/data/deps_minimum.toml @@ -382,7 +382,6 @@ markers = [ "db: tests requiring a database (mysql or postgres)", "clipboard: mark a pd.read_clipboard test", "arm_slow: mark a test as slow for arm64 architecture", - "arraymanager: mark a test to run with ArrayManager enabled", "skip_ubsan: tests known to invoke undefined behavior", ] From cba79f61ce80d791540b469145af306e014c8d33 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 20 Dec 2023 23:40:27 -0500 Subject: [PATCH 43/43] remove UB --- pandas/_libs/src/vendored/ujson/python/objToJSON.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON.c b/pandas/_libs/src/vendored/ujson/python/objToJSON.c index 98b8a44949d60..8bba95dd456de 100644 --- a/pandas/_libs/src/vendored/ujson/python/objToJSON.c +++ b/pandas/_libs/src/vendored/ujson/python/objToJSON.c @@ -2062,8 +2062,5 @@ PyObject *objToJSON(PyObject *Py_UNUSED(self), PyObject *args, encoder->free(ret); } - int myvar = INT_MAX; - printf("This value should overflow: %d\n", myvar + 1); - return newobj; }