Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Werror to CI #56277

Merged
merged 14 commits into from
Dec 6, 2023
2 changes: 1 addition & 1 deletion .circleci/setup_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,6 @@ if pip show pandas 1>/dev/null; then
fi

echo "Install pandas"
python -m pip install --no-build-isolation -ve .
python -m pip install --no-build-isolation -ve . --config-settings=setup-args="--werror"

echo "done"
6 changes: 4 additions & 2 deletions .github/actions/build_pandas/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@ runs:
- name: Build Pandas
run: |
if [[ ${{ inputs.editable }} == "true" ]]; then
pip install -e . --no-build-isolation -v --no-deps
pip install -e . --no-build-isolation -v --no-deps \
--config-settings=setup-args="--werror"
else
pip install . --no-build-isolation -v --no-deps
pip install . --no-build-isolation -v --no-deps \
--config-settings=setup-args="--werror"
fi
shell: bash -el {0}
6 changes: 3 additions & 3 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ jobs:
python -m pip install --no-cache-dir -U pip wheel setuptools meson[ninja]==1.2.1 meson-python==0.13.1
python -m pip install numpy --config-settings=setup-args="-Dallow-noblas=true"
python -m pip install --no-cache-dir versioneer[toml] cython python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1
python -m pip install --no-cache-dir --no-build-isolation -e .
python -m pip install --no-cache-dir --no-build-isolation -e . --config-settings=setup-args="--werror"
python -m pip list --no-cache-dir
export PANDAS_CI=1
python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml
Expand Down Expand Up @@ -292,7 +292,7 @@ jobs:
. ~/virtualenvs/pandas-dev/bin/activate
python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.2.1
python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1
python -m pip install --no-cache-dir --no-build-isolation -e .
python -m pip install --no-cache-dir --no-build-isolation -e . --config-settings=setup-args="--werror"
python -m pip list --no-cache-dir

- name: Run Tests
Expand Down Expand Up @@ -365,7 +365,7 @@ jobs:
python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy
python -m pip install versioneer[toml]
python -m pip install python-dateutil pytz tzdata cython hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov
python -m pip install -ve . --no-build-isolation --no-index --no-deps
python -m pip install -ve . --no-build-isolation --no-index --no-deps --config-settings=setup-args="--werror"
python -m pip list

- name: Run Tests
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/include/pandas/datetime/date_conversions.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ The full license is in the LICENSE file, distributed with this software.
#include <numpy/ndarraytypes.h>

// Scales value inplace from nanosecond resolution to unit resolution
int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit);
int scaleNanosecToUnit(int64_t *value, NPY_DATETIMEUNIT unit);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@phofl pointed out on macOS that npy_int64 is a typedef for a long whereas int64_t is a typedef for a long long. Unfortunately referencing these via pointer violates the strict aliasing rule

For now just picked int64_t as it was bigger. I'm not sure in practice that this matters to much, but we do freely mix these uses today


// Converts an int64 object representing a date to ISO format
// up to precision `base` e.g. base="s" yields 2020-01-03T00:00:00Z
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/include/pandas/datetime/pd_datetime.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ extern "C" {
typedef struct {
npy_datetime (*npy_datetimestruct_to_datetime)(NPY_DATETIMEUNIT,
const npy_datetimestruct *);
int (*scaleNanosecToUnit)(npy_int64 *, NPY_DATETIMEUNIT);
int (*scaleNanosecToUnit)(int64_t *, NPY_DATETIMEUNIT);
char *(*int64ToIso)(int64_t, NPY_DATETIMEUNIT, NPY_DATETIMEUNIT, size_t *);
char *(*PyDateTimeToIso)(PyObject *, NPY_DATETIMEUNIT, size_t *);
npy_datetime (*PyDateTimeToEpoch)(PyObject *, NPY_DATETIMEUNIT);
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/src/datetime/date_conversions.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ The full license is in the LICENSE file, distributed with this software.
*
* Mutates the provided value directly. Returns 0 on success, non-zero on error.
*/
int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit) {
int scaleNanosecToUnit(int64_t *value, NPY_DATETIMEUNIT unit) {
switch (unit) {
case NPY_FR_ns:
break;
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/src/datetime/pd_datetime.c
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ static npy_datetime PyDateTimeToEpoch(PyObject *dt, NPY_DATETIMEUNIT base) {
}
}

npy_datetime npy_dt = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts);
int64_t npy_dt = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts);
if (scaleNanosecToUnit(&npy_dt, base) == -1) {
PyErr_Format(PyExc_ValueError,
"Call to scaleNanosecToUnit with value %" NPY_DATETIME_FMT
Expand Down
24 changes: 16 additions & 8 deletions pandas/_libs/src/vendored/ujson/python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -1236,11 +1236,11 @@ static char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
}

int is_datetimelike = 0;
npy_int64 i8date;
int64_t i8date;
NPY_DATETIMEUNIT dateUnit = NPY_FR_ns;
if (PyTypeNum_ISDATETIME(type_num)) {
is_datetimelike = 1;
i8date = *(npy_int64 *)dataptr;
i8date = *(int64_t *)dataptr;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not a safe cast, but leaving to another PR to fix. The proper way to do this would be via memcpy

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think @seberg said the cast was fine, when I added this in #56114.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is likely pedantic but I'm fairly certain this is a strict aliasing violation. Where/when that matters may be up for debate. The memcpy from that comment would seemingly be safer

dateUnit = get_datetime_metadata_from_dtype(dtype).base;
} else if (PyDate_Check(item) || PyDelta_Check(item)) {
is_datetimelike = 1;
Expand All @@ -1250,7 +1250,11 @@ static char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
i8date = get_long_attr(item, "_value");
} else {
if (PyDelta_Check(item)) {
i8date = total_seconds(item) * 1000000000LL; // nanoseconds per second
// TODO(anyone): cast below loses precision if total_seconds return
// value exceeds number of bits that significand can hold
// also liable to overflow
i8date = (int64_t)(total_seconds(item) *
1000000000LL); // nanoseconds per second
} else {
// datetime.* objects don't follow above rules
i8date = PyDateTimeToEpoch(item, NPY_FR_ns);
Expand Down Expand Up @@ -1291,7 +1295,7 @@ static char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
ret = 0;
break;
}
snprintf(cLabel, size_of_cLabel, "%" NPY_DATETIME_FMT, i8date);
snprintf(cLabel, size_of_cLabel, "%" PRId64, i8date);
len = strlen(cLabel);
}
}
Expand Down Expand Up @@ -1495,10 +1499,14 @@ static void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
}
return;
} else if (PyDelta_Check(obj)) {
npy_int64 value =
PyObject_HasAttrString(obj, "_value") ? get_long_attr(obj, "_value")
: // pd.Timedelta object or pd.NaT
total_seconds(obj) * 1000000000LL; // nanoseconds per sec
// pd.Timedelta object or pd.NaT should evaluate true here
// fallback to nanoseconds per sec for other objects
// TODO(anyone): cast below loses precision if total_seconds return
// value exceeds number of bits that significand can hold
// also liable to overflow
int64_t value = PyObject_HasAttrString(obj, "_value")
? get_long_attr(obj, "_value")
: (int64_t)(total_seconds(obj) * 1000000000LL);

if (value == get_nat()) {
tc->type = JT_NULL;
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1144,6 +1144,18 @@ def test_timedelta_to_json(self, as_object, date_format, timedelta_typ):
result = ser.to_json(date_format=date_format)
assert result == expected

@pytest.mark.parametrize("as_object", [True, False])
@pytest.mark.parametrize("timedelta_typ", [pd.Timedelta, timedelta])
def test_timedelta_to_json_fractional_precision(self, as_object, timedelta_typ):
data = [timedelta_typ(milliseconds=42)]
ser = Series(data, index=data)
if as_object:
ser = ser.astype(object)

result = ser.to_json()
expected = '{"42":42}'
assert result == expected

def test_default_handler(self):
value = object()
frame = DataFrame({"a": [7, value]})
Expand Down
Loading