Skip to content

Commit

Permalink
BUG: read_json not handling string dtype when converting to dates (#5…
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl authored Nov 27, 2023
1 parent a29e4f6 commit f6eee83
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 15 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,7 @@ I/O
- Bug in :func:`read_csv` where ``on_bad_lines="warn"`` would write to ``stderr`` instead of raise a Python warning. This now yields a :class:`.errors.ParserWarning` (:issue:`54296`)
- Bug in :func:`read_csv` with ``engine="pyarrow"`` where ``usecols`` wasn't working with a csv with no headers (:issue:`54459`)
- Bug in :func:`read_excel`, with ``engine="xlrd"`` (``xls`` files) erroring when file contains NaNs/Infs (:issue:`54564`)
- Bug in :func:`read_json` not handling dtype conversion properly if ``infer_string`` is set (:issue:`56195`)
- Bug in :func:`to_excel`, with ``OdsWriter`` (``ods`` files) writing boolean/string value (:issue:`54994`)
- Bug in :meth:`DataFrame.to_hdf` and :func:`read_hdf` with ``datetime64`` dtypes with non-nanosecond resolution failing to round-trip correctly (:issue:`55622`)
- Bug in :meth:`pandas.read_excel` with ``engine="odf"`` (``ods`` files) when string contains annotation (:issue:`55200`)
Expand Down
11 changes: 9 additions & 2 deletions pandas/io/json/_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@
from pandas.util._exceptions import find_stack_level
from pandas.util._validators import check_dtype_backend

from pandas.core.dtypes.common import ensure_str
from pandas.core.dtypes.common import (
ensure_str,
is_string_dtype,
)
from pandas.core.dtypes.dtypes import PeriodDtype
from pandas.core.dtypes.generic import ABCIndex

Expand Down Expand Up @@ -1249,7 +1252,7 @@ def _try_convert_data(
if self.dtype_backend is not lib.no_default and not isinstance(data, ABCIndex):
# Fall through for conversion later on
return data, True
elif data.dtype == "object":
elif is_string_dtype(data.dtype):
# try float
try:
data = data.astype("float64")
Expand Down Expand Up @@ -1301,6 +1304,10 @@ def _try_convert_to_date(self, data):
return data, False

new_data = data

if new_data.dtype == "string":
new_data = new_data.astype(object)

if new_data.dtype == "object":
try:
new_data = data.astype("int64")
Expand Down
30 changes: 17 additions & 13 deletions pandas/tests/io/json/test_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,27 +93,31 @@ def test_read_unsupported_compression_type():
pd.read_json(path, compression="unsupported")


@pytest.mark.parametrize(
"infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))]
)
@pytest.mark.parametrize("to_infer", [True, False])
@pytest.mark.parametrize("read_infer", [True, False])
def test_to_json_compression(
compression_only, read_infer, to_infer, compression_to_extension
compression_only, read_infer, to_infer, compression_to_extension, infer_string
):
# see gh-15008
compression = compression_only
with pd.option_context("future.infer_string", infer_string):
# see gh-15008
compression = compression_only

# We'll complete file extension subsequently.
filename = "test."
filename += compression_to_extension[compression]
# We'll complete file extension subsequently.
filename = "test."
filename += compression_to_extension[compression]

df = pd.DataFrame({"A": [1]})
df = pd.DataFrame({"A": [1]})

to_compression = "infer" if to_infer else compression
read_compression = "infer" if read_infer else compression
to_compression = "infer" if to_infer else compression
read_compression = "infer" if read_infer else compression

with tm.ensure_clean(filename) as path:
df.to_json(path, compression=to_compression)
result = pd.read_json(path, compression=read_compression)
tm.assert_frame_equal(result, df)
with tm.ensure_clean(filename) as path:
df.to_json(path, compression=to_compression)
result = pd.read_json(path, compression=read_compression)
tm.assert_frame_equal(result, df)


def test_to_json_compression_mode(compression):
Expand Down

0 comments on commit f6eee83

Please sign in to comment.