From 13cd37e6115d6dfd47e3556c60766ff44aaa3490 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 26 Nov 2023 23:23:27 +0100 Subject: [PATCH] BUG: hdf can't deal with ea dtype columns --- doc/source/whatsnew/v2.1.4.rst | 1 + pandas/io/pytables.py | 3 +-- pandas/tests/io/pytables/test_round_trip.py | 15 +++++++++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.1.4.rst b/doc/source/whatsnew/v2.1.4.rst index 77ce303dc1bfe..0cbf211305d12 100644 --- a/doc/source/whatsnew/v2.1.4.rst +++ b/doc/source/whatsnew/v2.1.4.rst @@ -24,6 +24,7 @@ Bug fixes - Bug in :class:`Series` constructor raising DeprecationWarning when ``index`` is a list of :class:`Series` (:issue:`55228`) - Bug in :meth:`Index.__getitem__` returning wrong result for Arrow dtypes and negative stepsize (:issue:`55832`) - Fixed bug in :meth:`DataFrame.__setitem__` casting :class:`Index` with object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`) +- Fixed bug in :meth:`DataFrame.to_hdf` raising when columns have ``StringDtype`` (:issue:`55088`) - Fixed bug in :meth:`Index.insert` casting object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`) - Fixed bug in :meth:`Series.str.translate` losing object dtype when string option is set (:issue:`56152`) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 9e0e3686e4aa2..50611197ad7dd 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -57,7 +57,6 @@ is_bool_dtype, is_complex_dtype, is_list_like, - is_object_dtype, is_string_dtype, needs_i8_conversion, ) @@ -2647,7 +2646,7 @@ class DataIndexableCol(DataCol): is_data_indexable = True def validate_names(self) -> None: - if not is_object_dtype(Index(self.values).dtype): + if not is_string_dtype(Index(self.values).dtype): # TODO: should the message here be more specifically non-str? raise ValueError("cannot have non-object label DataIndexableCol") diff --git a/pandas/tests/io/pytables/test_round_trip.py b/pandas/tests/io/pytables/test_round_trip.py index 6c24843f18d0d..baac90e52e962 100644 --- a/pandas/tests/io/pytables/test_round_trip.py +++ b/pandas/tests/io/pytables/test_round_trip.py @@ -531,3 +531,18 @@ def test_round_trip_equals(tmp_path, setup_path): tm.assert_frame_equal(df, other) assert df.equals(other) assert other.equals(df) + + +def test_infer_string_columns(tmp_path, setup_path): + # GH# + pytest.importorskip("pyarrow") + path = tmp_path / setup_path + with pd.option_context("future.infer_string", True): + df = DataFrame(1, columns=list("ABCD"), index=list(range(10))).set_index( + ["A", "B"] + ) + expected = df.copy() + df.to_hdf(path, key="df", format="table") + + result = read_hdf(path, "df") + tm.assert_frame_equal(result, expected)