From 33f1610ee1fb3aa9b3dc3bcae22a7748dfae35fd Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 30 Nov 2023 20:58:26 -0800 Subject: [PATCH 01/13] Add Werror to CI --- .github/workflows/unit-tests.yml | 6 +++--- pandas/_libs/src/vendored/ujson/python/objToJSON.c | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 30397632a0af6..ffcd2ae32c09c 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -254,7 +254,7 @@ jobs: python -m pip install --no-cache-dir -U pip wheel setuptools meson[ninja]==1.2.1 meson-python==0.13.1 python -m pip install numpy --config-settings=setup-args="-Dallow-noblas=true" python -m pip install --no-cache-dir versioneer[toml] cython python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1 - python -m pip install --no-cache-dir --no-build-isolation -e . + python -m pip install --no-cache-dir --no-build-isolation -e . --config-settings=setup-args="--werror" python -m pip list --no-cache-dir export PANDAS_CI=1 python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml @@ -292,7 +292,7 @@ jobs: . ~/virtualenvs/pandas-dev/bin/activate python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.2.1 python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1 - python -m pip install --no-cache-dir --no-build-isolation -e . + python -m pip install --no-cache-dir --no-build-isolation -e . --config-settings=setup-args="--werror" python -m pip list --no-cache-dir - name: Run Tests @@ -365,7 +365,7 @@ jobs: python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy python -m pip install versioneer[toml] python -m pip install python-dateutil pytz tzdata cython hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov - python -m pip install -ve . --no-build-isolation --no-index --no-deps + python -m pip install -ve . --no-build-isolation --no-index --no-deps --config-settings=setup-args="--werror" python -m pip list - name: Run Tests diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON.c b/pandas/_libs/src/vendored/ujson/python/objToJSON.c index 6271791fe201e..980a29564c597 100644 --- a/pandas/_libs/src/vendored/ujson/python/objToJSON.c +++ b/pandas/_libs/src/vendored/ujson/python/objToJSON.c @@ -1227,6 +1227,8 @@ static char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, const int type_num = PyArray_TYPE(labels); PyArray_Descr *dtype = PyArray_DESCR(labels); + int this_int_is_not_used_and_warnings_should_be_errors; + for (npy_intp i = 0; i < num; i++) { item = PyArray_GETITEM(labels, dataptr); if (!item) { From 62513b1fa71d50f4400210ca8b690f8d19a49e9d Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 30 Nov 2023 22:44:40 -0800 Subject: [PATCH 02/13] Add to build action --- .github/actions/build_pandas/action.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/actions/build_pandas/action.yml b/.github/actions/build_pandas/action.yml index 3ee10efaaf96f..460ae2f8594c0 100644 --- a/.github/actions/build_pandas/action.yml +++ b/.github/actions/build_pandas/action.yml @@ -25,8 +25,10 @@ runs: - name: Build Pandas run: | if [[ ${{ inputs.editable }} == "true" ]]; then - pip install -e . --no-build-isolation -v --no-deps + pip install -e . --no-build-isolation -v --no-deps \ + --config-settings=setup-args="--werror" else - pip install . --no-build-isolation -v --no-deps + pip install . --no-build-isolation -v --no-deps \ + --config-settings=setup-args="--werror" fi shell: bash -el {0} From b477c32e9cc73a4b94084099b797cae2b592c6b9 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 1 Dec 2023 08:08:16 -0800 Subject: [PATCH 03/13] CircleCI Werror --- .circleci/setup_env.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/setup_env.sh b/.circleci/setup_env.sh index 4f81acb6d2099..eef4db1191a9a 100755 --- a/.circleci/setup_env.sh +++ b/.circleci/setup_env.sh @@ -55,6 +55,6 @@ if pip show pandas 1>/dev/null; then fi echo "Install pandas" -python -m pip install --no-build-isolation -ve . +python -m pip install --no-build-isolation -ve . --config-settings=setup-args="--werror" echo "done" From acce2f067755e27f5a9ea39cb2adabd30009bfc2 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 1 Dec 2023 08:18:10 -0800 Subject: [PATCH 04/13] Removed artificial failure --- pandas/_libs/src/vendored/ujson/python/objToJSON.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON.c b/pandas/_libs/src/vendored/ujson/python/objToJSON.c index 980a29564c597..6271791fe201e 100644 --- a/pandas/_libs/src/vendored/ujson/python/objToJSON.c +++ b/pandas/_libs/src/vendored/ujson/python/objToJSON.c @@ -1227,8 +1227,6 @@ static char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, const int type_num = PyArray_TYPE(labels); PyArray_Descr *dtype = PyArray_DESCR(labels); - int this_int_is_not_used_and_warnings_should_be_errors; - for (npy_intp i = 0; i < num; i++) { item = PyArray_GETITEM(labels, dataptr); if (!item) { From 5aa4640b0253577305f7d74f354a929515d24e14 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 1 Dec 2023 08:57:16 -0800 Subject: [PATCH 05/13] suppress float -> int cast warning --- pandas/_libs/src/vendored/ujson/python/objToJSON.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON.c b/pandas/_libs/src/vendored/ujson/python/objToJSON.c index 6271791fe201e..613d0a5398bcf 100644 --- a/pandas/_libs/src/vendored/ujson/python/objToJSON.c +++ b/pandas/_libs/src/vendored/ujson/python/objToJSON.c @@ -1250,7 +1250,10 @@ static char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, i8date = get_long_attr(item, "_value"); } else { if (PyDelta_Check(item)) { - i8date = total_seconds(item) * 1000000000LL; // nanoseconds per second + // TODO(anyone): cast below loses precision if total_seconds return + // value exceeds number of bits that significand can hold + i8date = (int64_t)total_seconds(item) * + 1000000000LL; // nanoseconds per second } else { // datetime.* objects don't follow above rules i8date = PyDateTimeToEpoch(item, NPY_FR_ns); From 45c73cacb94a50f6be9c5aeb23e7662276671c05 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 1 Dec 2023 09:14:47 -0800 Subject: [PATCH 06/13] more warning suppression --- pandas/_libs/src/vendored/ujson/python/objToJSON.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON.c b/pandas/_libs/src/vendored/ujson/python/objToJSON.c index 613d0a5398bcf..3c971e7c790d3 100644 --- a/pandas/_libs/src/vendored/ujson/python/objToJSON.c +++ b/pandas/_libs/src/vendored/ujson/python/objToJSON.c @@ -1491,10 +1491,13 @@ static void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { } return; } else if (PyDelta_Check(obj)) { - npy_int64 value = - PyObject_HasAttrString(obj, "_value") ? get_long_attr(obj, "_value") - : // pd.Timedelta object or pd.NaT - total_seconds(obj) * 1000000000LL; // nanoseconds per sec + // pd.Timedelta object or pd.NaT should evaluate true here + // fallback to nanoseconds per sec for other objects + // TODO(anyone): cast below loses precision if total_seconds return + // value exceeds number of bits that significand can hold + npy_int64 value = PyObject_HasAttrString(obj, "_value") + ? get_long_attr(obj, "_value") + : (int64_t)total_seconds(obj) * 1000000000LL; if (value == get_nat()) { tc->type = JT_NULL; From 9cb838775c7b5b5c707ae26af824614078d6417b Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 1 Dec 2023 15:23:23 -0800 Subject: [PATCH 07/13] macOS warning fixup --- pandas/_libs/include/pandas/datetime/pd_datetime.h | 2 +- pandas/_libs/src/datetime/date_conversions.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/include/pandas/datetime/pd_datetime.h b/pandas/_libs/include/pandas/datetime/pd_datetime.h index 7674fbbe743fe..a51f8cea71513 100644 --- a/pandas/_libs/include/pandas/datetime/pd_datetime.h +++ b/pandas/_libs/include/pandas/datetime/pd_datetime.h @@ -33,7 +33,7 @@ extern "C" { typedef struct { npy_datetime (*npy_datetimestruct_to_datetime)(NPY_DATETIMEUNIT, const npy_datetimestruct *); - int (*scaleNanosecToUnit)(npy_int64 *, NPY_DATETIMEUNIT); + int (*scaleNanosecToUnit)(int64_t *, NPY_DATETIMEUNIT); char *(*int64ToIso)(int64_t, NPY_DATETIMEUNIT, NPY_DATETIMEUNIT, size_t *); char *(*PyDateTimeToIso)(PyObject *, NPY_DATETIMEUNIT, size_t *); npy_datetime (*PyDateTimeToEpoch)(PyObject *, NPY_DATETIMEUNIT); diff --git a/pandas/_libs/src/datetime/date_conversions.c b/pandas/_libs/src/datetime/date_conversions.c index 7eaf8aad12f43..99081746b2c97 100644 --- a/pandas/_libs/src/datetime/date_conversions.c +++ b/pandas/_libs/src/datetime/date_conversions.c @@ -20,7 +20,7 @@ The full license is in the LICENSE file, distributed with this software. * * Mutates the provided value directly. Returns 0 on success, non-zero on error. */ -int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit) { +int scaleNanosecToUnit(int64_t *value, NPY_DATETIMEUNIT unit) { switch (unit) { case NPY_FR_ns: break; From a5952e81049603ea1bdc648474e0d7e2a0245b72 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 1 Dec 2023 15:42:07 -0800 Subject: [PATCH 08/13] declaration fixup --- pandas/_libs/include/pandas/datetime/date_conversions.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/include/pandas/datetime/date_conversions.h b/pandas/_libs/include/pandas/datetime/date_conversions.h index 9a4a02ea89b4d..e039991847a62 100644 --- a/pandas/_libs/include/pandas/datetime/date_conversions.h +++ b/pandas/_libs/include/pandas/datetime/date_conversions.h @@ -12,7 +12,7 @@ The full license is in the LICENSE file, distributed with this software. #include // Scales value inplace from nanosecond resolution to unit resolution -int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit); +int scaleNanosecToUnit(int64_t *value, NPY_DATETIMEUNIT unit); // Converts an int64 object representing a date to ISO format // up to precision `base` e.g. base="s" yields 2020-01-03T00:00:00Z From f7f8fae8fef7d60e862974393687f06e5486c27c Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 1 Dec 2023 16:27:27 -0800 Subject: [PATCH 09/13] more macOS fix --- pandas/_libs/src/datetime/pd_datetime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/src/datetime/pd_datetime.c b/pandas/_libs/src/datetime/pd_datetime.c index 030d734aeab21..606edf1184aad 100644 --- a/pandas/_libs/src/datetime/pd_datetime.c +++ b/pandas/_libs/src/datetime/pd_datetime.c @@ -176,7 +176,7 @@ static npy_datetime PyDateTimeToEpoch(PyObject *dt, NPY_DATETIMEUNIT base) { } } - npy_datetime npy_dt = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts); + int64_t npy_dt = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts); if (scaleNanosecToUnit(&npy_dt, base) == -1) { PyErr_Format(PyExc_ValueError, "Call to scaleNanosecToUnit with value %" NPY_DATETIME_FMT From 41db0a79de0467f409d1d7f57e175dd63fb3991d Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 1 Dec 2023 16:34:57 -0800 Subject: [PATCH 10/13] more macOS --- pandas/_libs/src/vendored/ujson/python/objToJSON.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON.c b/pandas/_libs/src/vendored/ujson/python/objToJSON.c index 8df9028f63fa9..fe126b09afb41 100644 --- a/pandas/_libs/src/vendored/ujson/python/objToJSON.c +++ b/pandas/_libs/src/vendored/ujson/python/objToJSON.c @@ -1236,7 +1236,7 @@ static char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, } int is_datetimelike = 0; - npy_int64 i8date; + int64_t i8date; NPY_DATETIMEUNIT dateUnit = NPY_FR_ns; if (PyTypeNum_ISDATETIME(type_num)) { is_datetimelike = 1; From 5f13b1505d36270a88274d2d1ab8d37bd83fee05 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 1 Dec 2023 17:11:50 -0800 Subject: [PATCH 11/13] even more macOS --- pandas/_libs/src/vendored/ujson/python/objToJSON.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON.c b/pandas/_libs/src/vendored/ujson/python/objToJSON.c index fe126b09afb41..b4825784996da 100644 --- a/pandas/_libs/src/vendored/ujson/python/objToJSON.c +++ b/pandas/_libs/src/vendored/ujson/python/objToJSON.c @@ -1502,9 +1502,9 @@ static void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { // fallback to nanoseconds per sec for other objects // TODO(anyone): cast below loses precision if total_seconds return // value exceeds number of bits that significand can hold - npy_int64 value = PyObject_HasAttrString(obj, "_value") - ? get_long_attr(obj, "_value") - : (int64_t)total_seconds(obj) * 1000000000LL; + int64_t value = PyObject_HasAttrString(obj, "_value") + ? get_long_attr(obj, "_value") + : (int64_t)total_seconds(obj) * 1000000000LL; if (value == get_nat()) { tc->type = JT_NULL; From c31a4aa2eb021f51ef030a55dd371ffb4181e4fe Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 1 Dec 2023 17:22:11 -0800 Subject: [PATCH 12/13] more macOS --- pandas/_libs/src/vendored/ujson/python/objToJSON.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON.c b/pandas/_libs/src/vendored/ujson/python/objToJSON.c index b4825784996da..2daec844c4640 100644 --- a/pandas/_libs/src/vendored/ujson/python/objToJSON.c +++ b/pandas/_libs/src/vendored/ujson/python/objToJSON.c @@ -1240,7 +1240,7 @@ static char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, NPY_DATETIMEUNIT dateUnit = NPY_FR_ns; if (PyTypeNum_ISDATETIME(type_num)) { is_datetimelike = 1; - i8date = *(npy_int64 *)dataptr; + i8date = *(int64_t *)dataptr; dateUnit = get_datetime_metadata_from_dtype(dtype).base; } else if (PyDate_Check(item) || PyDelta_Check(item)) { is_datetimelike = 1; @@ -1294,7 +1294,7 @@ static char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, ret = 0; break; } - snprintf(cLabel, size_of_cLabel, "%" NPY_DATETIME_FMT, i8date); + snprintf(cLabel, size_of_cLabel, "%" PRId64, i8date); len = strlen(cLabel); } } From 04739fb8ab89324e5bfada23c49c80e3e1703766 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 5 Dec 2023 13:55:07 -0800 Subject: [PATCH 13/13] test casting precision --- pandas/_libs/src/vendored/ujson/python/objToJSON.c | 8 +++++--- pandas/tests/io/json/test_pandas.py | 12 ++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON.c b/pandas/_libs/src/vendored/ujson/python/objToJSON.c index 2daec844c4640..41bd2d23fe8ea 100644 --- a/pandas/_libs/src/vendored/ujson/python/objToJSON.c +++ b/pandas/_libs/src/vendored/ujson/python/objToJSON.c @@ -1252,8 +1252,9 @@ static char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, if (PyDelta_Check(item)) { // TODO(anyone): cast below loses precision if total_seconds return // value exceeds number of bits that significand can hold - i8date = (int64_t)total_seconds(item) * - 1000000000LL; // nanoseconds per second + // also liable to overflow + i8date = (int64_t)(total_seconds(item) * + 1000000000LL); // nanoseconds per second } else { // datetime.* objects don't follow above rules i8date = PyDateTimeToEpoch(item, NPY_FR_ns); @@ -1502,9 +1503,10 @@ static void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { // fallback to nanoseconds per sec for other objects // TODO(anyone): cast below loses precision if total_seconds return // value exceeds number of bits that significand can hold + // also liable to overflow int64_t value = PyObject_HasAttrString(obj, "_value") ? get_long_attr(obj, "_value") - : (int64_t)total_seconds(obj) * 1000000000LL; + : (int64_t)(total_seconds(obj) * 1000000000LL); if (value == get_nat()) { tc->type = JT_NULL; diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 428c73c282426..40d3a7a7da4a4 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1144,6 +1144,18 @@ def test_timedelta_to_json(self, as_object, date_format, timedelta_typ): result = ser.to_json(date_format=date_format) assert result == expected + @pytest.mark.parametrize("as_object", [True, False]) + @pytest.mark.parametrize("timedelta_typ", [pd.Timedelta, timedelta]) + def test_timedelta_to_json_fractional_precision(self, as_object, timedelta_typ): + data = [timedelta_typ(milliseconds=42)] + ser = Series(data, index=data) + if as_object: + ser = ser.astype(object) + + result = ser.to_json() + expected = '{"42":42}' + assert result == expected + def test_default_handler(self): value = object() frame = DataFrame({"a": [7, value]})