Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: pytables with non-nano dt64 #55622

Merged
merged 4 commits into from
Oct 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -365,8 +365,10 @@ I/O
- Bug in :func:`read_csv` with ``engine="pyarrow"`` where ``usecols`` wasn't working with a csv with no headers (:issue:`54459`)
- Bug in :func:`read_excel`, with ``engine="xlrd"`` (``xls`` files) erroring when file contains NaNs/Infs (:issue:`54564`)
- Bug in :func:`to_excel`, with ``OdsWriter`` (``ods`` files) writing boolean/string value (:issue:`54994`)
- Bug in :meth:`DataFrame.to_hdf` and :func:`read_hdf` with ``datetime64`` dtypes with non-nanosecond resolution failing to round-trip correctly (:issue:`55622`)
- Bug in :meth:`pandas.read_excel` with an ODS file without cached formatted cell for float values (:issue:`55219`)
- Bug where :meth:`DataFrame.to_json` would raise an ``OverflowError`` instead of a ``TypeError`` with unsupported NumPy types (:issue:`55403`)
-

Period
^^^^^^
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/computation/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def stringify(value):

kind = ensure_decoded(self.kind)
meta = ensure_decoded(self.meta)
if kind in ("datetime64", "datetime"):
if kind == "datetime" or (kind and kind.startswith("datetime64")):
if isinstance(v, (int, float)):
v = stringify(v)
v = ensure_decoded(v)
Expand Down
38 changes: 24 additions & 14 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -2152,7 +2152,6 @@ def convert(

val_kind = _ensure_decoded(self.kind)
values = _maybe_convert(values, val_kind, encoding, errors)

kwargs = {}
kwargs["name"] = _ensure_decoded(self.index_name)

Expand Down Expand Up @@ -2577,7 +2576,7 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
dtype = _ensure_decoded(dtype_name)

# reverse converts
if dtype == "datetime64":
if dtype.startswith("datetime64"):
# recreate with tz if indicated
converted = _set_tz(converted, tz, coerce=True)

Expand Down Expand Up @@ -2870,7 +2869,9 @@ def _get_index_factory(self, attrs):

def f(values, freq=None, tz=None):
# data are already in UTC, localize and convert if tz present
dta = DatetimeArray._simple_new(values.values, freq=freq)
dta = DatetimeArray._simple_new(
values.values, dtype=values.dtype, freq=freq
)
result = DatetimeIndex._simple_new(dta, name=None)
if tz is not None:
result = result.tz_localize("UTC").tz_convert(tz)
Expand Down Expand Up @@ -2961,7 +2962,7 @@ def read_array(self, key: str, start: int | None = None, stop: int | None = None
else:
ret = node[start:stop]

if dtype == "datetime64":
if dtype and dtype.startswith("datetime64"):
# reconstruct a timezone if indicated
tz = getattr(attrs, "tz", None)
ret = _set_tz(ret, tz, coerce=True)
Expand Down Expand Up @@ -3170,7 +3171,7 @@ def write_array(

elif lib.is_np_dtype(value.dtype, "M"):
self._handle.create_array(self.group, key, value.view("i8"))
getattr(self.group, key)._v_attrs.value_type = "datetime64"
getattr(self.group, key)._v_attrs.value_type = str(value.dtype)
elif isinstance(value.dtype, DatetimeTZDtype):
# store as UTC
# with a zone
Expand All @@ -3185,7 +3186,7 @@ def write_array(
# error: Item "ExtensionArray" of "Union[Any, ExtensionArray]" has no
# attribute "tz"
node._v_attrs.tz = _get_tz(value.tz) # type: ignore[union-attr]
node._v_attrs.value_type = "datetime64"
node._v_attrs.value_type = f"datetime64[{value.dtype.unit}]"
elif lib.is_np_dtype(value.dtype, "m"):
self._handle.create_array(self.group, key, value.view("i8"))
getattr(self.group, key)._v_attrs.value_type = "timedelta64"
Expand Down Expand Up @@ -4689,7 +4690,6 @@ def read(
selection = Selection(self, where=where, start=start, stop=stop)
# apply the selection filters & axis orderings
df = self.process_axes(df, selection=selection, columns=columns)

return df


Expand Down Expand Up @@ -4932,11 +4932,12 @@ def _set_tz(
# call below (which returns an ndarray). So we are only non-lossy
# if `tz` matches `values.tz`.
assert values.tz is None or values.tz == tz
if values.tz is not None:
return values

if tz is not None:
if isinstance(values, DatetimeIndex):
name = values.name
values = values.asi8
else:
name = None
values = values.ravel()
Expand Down Expand Up @@ -5019,8 +5020,12 @@ def _convert_index(name: str, index: Index, encoding: str, errors: str) -> Index
def _unconvert_index(data, kind: str, encoding: str, errors: str) -> np.ndarray | Index:
index: Index | np.ndarray

if kind == "datetime64":
index = DatetimeIndex(data)
if kind.startswith("datetime64"):
if kind == "datetime64":
# created before we stored resolution information
index = DatetimeIndex(data)
else:
index = DatetimeIndex(data.view(kind))
elif kind == "timedelta64":
index = TimedeltaIndex(data)
elif kind == "date":
Expand Down Expand Up @@ -5194,6 +5199,8 @@ def _maybe_convert(values: np.ndarray, val_kind: str, encoding: str, errors: str
def _get_converter(kind: str, encoding: str, errors: str):
if kind == "datetime64":
return lambda x: np.asarray(x, dtype="M8[ns]")
elif "datetime64" in kind:
return lambda x: np.asarray(x, dtype=kind)
elif kind == "string":
return lambda x: _unconvert_string_array(
x, nan_rep=None, encoding=encoding, errors=errors
Expand All @@ -5203,7 +5210,7 @@ def _get_converter(kind: str, encoding: str, errors: str):


def _need_convert(kind: str) -> bool:
if kind in ("datetime64", "string"):
if kind in ("datetime64", "string") or "datetime64" in kind:
return True
return False

Expand Down Expand Up @@ -5248,7 +5255,7 @@ def _dtype_to_kind(dtype_str: str) -> str:
elif dtype_str.startswith(("int", "uint")):
kind = "integer"
elif dtype_str.startswith("datetime64"):
kind = "datetime64"
kind = dtype_str
elif dtype_str.startswith("timedelta"):
kind = "timedelta64"
elif dtype_str.startswith("bool"):
Expand All @@ -5273,8 +5280,11 @@ def _get_data_and_dtype_name(data: ArrayLike):
if isinstance(data, Categorical):
data = data.codes

# For datetime64tz we need to drop the TZ in tests TODO: why?
dtype_name = data.dtype.name.split("[")[0]
if isinstance(data.dtype, DatetimeTZDtype):
# For datetime64tz we need to drop the TZ in tests TODO: why?
dtype_name = f"datetime64[{data.dtype.unit}]"
else:
dtype_name = data.dtype.name

if data.dtype.kind in "mM":
data = np.asarray(data.view("i8"))
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/pytables/test_append.py
Original file line number Diff line number Diff line change
Expand Up @@ -772,7 +772,7 @@ def test_append_raise(setup_path):
"dtype->bytes24,kind->string,shape->(1, 30)] "
"vs current table "
"[name->values_block_1,cname->values_block_1,"
"dtype->datetime64,kind->datetime64,shape->None]"
"dtype->datetime64[s],kind->datetime64[s],shape->None]"
)
with pytest.raises(ValueError, match=msg):
store.append("df", df)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/pytables/test_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def test_table_index_incompatible_dtypes(setup_path):

with ensure_clean_store(setup_path) as store:
store.put("frame", df1, format="table")
msg = re.escape("incompatible kind in col [integer - datetime64]")
msg = re.escape("incompatible kind in col [integer - datetime64[ns]]")
with pytest.raises(TypeError, match=msg):
store.put("frame", df2, format="table", append=True)

Expand Down
16 changes: 11 additions & 5 deletions pandas/tests/io/pytables/test_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,16 +541,22 @@ def test_store_index_name(setup_path):
tm.assert_frame_equal(recons, df)


@pytest.mark.parametrize("tz", [None, "US/Pacific"])
@pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
@pytest.mark.parametrize("table_format", ["table", "fixed"])
def test_store_index_name_numpy_str(tmp_path, table_format, setup_path):
def test_store_index_name_numpy_str(tmp_path, table_format, setup_path, unit, tz):
# GH #13492
idx = Index(
pd.to_datetime([dt.date(2000, 1, 1), dt.date(2000, 1, 2)]),
name="cols\u05d2",
)
idx1 = Index(
pd.to_datetime([dt.date(2010, 1, 1), dt.date(2010, 1, 2)]),
name="rows\u05d0",
).tz_localize(tz)
idx1 = (
Index(
pd.to_datetime([dt.date(2010, 1, 1), dt.date(2010, 1, 2)]),
name="rows\u05d0",
)
.as_unit(unit)
.tz_localize(tz)
)
df = DataFrame(np.arange(4).reshape(2, 2), columns=idx, index=idx1)

Expand Down
Loading