From 3e3c7139f45002b0f13139e461396fb2a71a32be Mon Sep 17 00:00:00 2001 From: gupta-paras Date: Thu, 12 Oct 2023 17:53:36 +0530 Subject: [PATCH 1/3] BUG: DataFrame.to_json OverflowError with np.long* dtypes --- doc/source/whatsnew/v2.2.0.rst | 1 + pandas/_libs/src/vendored/ujson/python/objToJSON.c | 5 +++++ pandas/tests/io/json/test_ujson.py | 7 +++++++ 3 files changed, 13 insertions(+) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 29a2d5c0b5877..a5e96a512b2df 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -348,6 +348,7 @@ I/O - Bug in :func:`read_csv` with ``engine="pyarrow"`` where ``usecols`` wasn't working with a csv with no headers (:issue:`54459`) - Bug in :func:`read_excel`, with ``engine="xlrd"`` (``xls`` files) erroring when file contains NaNs/Infs (:issue:`54564`) - Bug in :func:`to_excel`, with ``OdsWriter`` (``ods`` files) writing boolean/string value (:issue:`54994`) +- Bug in :meth:`DataFrame.to_json` OverflowError with np.long* dtypes (:issue:`55403`) - Bug in :meth:`pandas.read_excel` with an ODS file without cached formatted cell for float values (:issue:`55219`) Period diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON.c b/pandas/_libs/src/vendored/ujson/python/objToJSON.c index 8c55505f61b51..71a7cfc792b83 100644 --- a/pandas/_libs/src/vendored/ujson/python/objToJSON.c +++ b/pandas/_libs/src/vendored/ujson/python/objToJSON.c @@ -1610,6 +1610,11 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { PyArray_DescrFromType(NPY_DOUBLE)); tc->type = JT_DOUBLE; return; + } else if (PyArray_IsScalar(obj, LongDouble)) { + PyErr_Format(PyExc_TypeError, + "%R (np.longdouble) is not JSON serializable at the moment", + obj); + goto INVALID; } else if (PyArray_Check(obj) && PyArray_CheckScalar(obj)) { PyErr_Format(PyExc_TypeError, "%R (0d array) is not JSON serializable at the moment", diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index d5f8c5200c4a3..ec075a3669516 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -818,6 +818,13 @@ def test_0d_array(self): with pytest.raises(TypeError, match=msg): ujson.ujson_dumps(np.array(1)) + def test_array_long_double(self): + msg = re.compile( + "1234.5.*\\(np.longdouble\\) is not JSON serializable at the moment" + ) + with pytest.raises(TypeError, match=msg): + ujson.ujson_dumps(np.longdouble(1234.5)) + class TestPandasJSONTests: def test_dataframe(self, orient): From 7a29022938fbd92362c70546c2632a18eca39dbd Mon Sep 17 00:00:00 2001 From: gupta-paras Date: Fri, 13 Oct 2023 22:13:48 +0530 Subject: [PATCH 2/3] BUG: DataFrame.to_json OverflowError with np.long* dtypes #Comment-1 --- pandas/_libs/src/vendored/ujson/python/objToJSON.c | 13 ++++++------- pandas/tests/io/json/test_ujson.py | 6 ++++-- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON.c b/pandas/_libs/src/vendored/ujson/python/objToJSON.c index 71a7cfc792b83..085f0ff00583c 100644 --- a/pandas/_libs/src/vendored/ujson/python/objToJSON.c +++ b/pandas/_libs/src/vendored/ujson/python/objToJSON.c @@ -1610,14 +1610,13 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { PyArray_DescrFromType(NPY_DOUBLE)); tc->type = JT_DOUBLE; return; - } else if (PyArray_IsScalar(obj, LongDouble)) { + } else if (PyArray_CheckScalar(obj)) { + /* This handles all cases of array of zero dimension (numpy.array(1)) OR + unimplemented serializable for numpy scalar type like: numpy.longdouble. + If we plan to handle any other numpy type for serializations, + we need to keep it above this block. */ PyErr_Format(PyExc_TypeError, - "%R (np.longdouble) is not JSON serializable at the moment", - obj); - goto INVALID; - } else if (PyArray_Check(obj) && PyArray_CheckScalar(obj)) { - PyErr_Format(PyExc_TypeError, - "%R (0d array) is not JSON serializable at the moment", + "%R (numpy-scalar) is not JSON serializable at the moment", obj); goto INVALID; } else if (object_is_na_type(obj)) { diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index ec075a3669516..646f6745936bd 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -814,13 +814,15 @@ def test_array_float(self): def test_0d_array(self): # gh-18878 - msg = re.escape("array(1) (0d array) is not JSON serializable at the moment") + msg = re.escape( + "array(1) (numpy-scalar) is not JSON serializable at the moment" + ) with pytest.raises(TypeError, match=msg): ujson.ujson_dumps(np.array(1)) def test_array_long_double(self): msg = re.compile( - "1234.5.*\\(np.longdouble\\) is not JSON serializable at the moment" + "1234.5.* \\(numpy-scalar\\) is not JSON serializable at the moment" ) with pytest.raises(TypeError, match=msg): ujson.ujson_dumps(np.longdouble(1234.5)) From 774519f9b19433b3c08040a9072e0368499de282 Mon Sep 17 00:00:00 2001 From: gupta-paras Date: Sat, 14 Oct 2023 08:26:34 +0530 Subject: [PATCH 3/3] BUG: DataFrame.to_json OverflowError with np.long* dtypes #Comment-2 --- doc/source/whatsnew/v2.2.0.rst | 2 +- pandas/_libs/src/vendored/ujson/python/objToJSON.c | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index a5e96a512b2df..e40553353fc20 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -348,8 +348,8 @@ I/O - Bug in :func:`read_csv` with ``engine="pyarrow"`` where ``usecols`` wasn't working with a csv with no headers (:issue:`54459`) - Bug in :func:`read_excel`, with ``engine="xlrd"`` (``xls`` files) erroring when file contains NaNs/Infs (:issue:`54564`) - Bug in :func:`to_excel`, with ``OdsWriter`` (``ods`` files) writing boolean/string value (:issue:`54994`) -- Bug in :meth:`DataFrame.to_json` OverflowError with np.long* dtypes (:issue:`55403`) - Bug in :meth:`pandas.read_excel` with an ODS file without cached formatted cell for float values (:issue:`55219`) +- Bug where :meth:`DataFrame.to_json` would raise an ``OverflowError`` instead of a ``TypeError`` with unsupported NumPy types (:issue:`55403`) Period ^^^^^^ diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON.c b/pandas/_libs/src/vendored/ujson/python/objToJSON.c index 085f0ff00583c..a4c93f1560a0e 100644 --- a/pandas/_libs/src/vendored/ujson/python/objToJSON.c +++ b/pandas/_libs/src/vendored/ujson/python/objToJSON.c @@ -1611,10 +1611,6 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { tc->type = JT_DOUBLE; return; } else if (PyArray_CheckScalar(obj)) { - /* This handles all cases of array of zero dimension (numpy.array(1)) OR - unimplemented serializable for numpy scalar type like: numpy.longdouble. - If we plan to handle any other numpy type for serializations, - we need to keep it above this block. */ PyErr_Format(PyExc_TypeError, "%R (numpy-scalar) is not JSON serializable at the moment", obj);