Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: pytables with non-nano dt64 #55622

Merged
merged 4 commits into from
Oct 23, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,9 @@ I/O
- Bug in :func:`read_excel`, with ``engine="xlrd"`` (``xls`` files) erroring when file contains NaNs/Infs (:issue:`54564`)
- Bug in :func:`to_excel`, with ``OdsWriter`` (``ods`` files) writing boolean/string value (:issue:`54994`)
- Bug in :meth:`pandas.read_excel` with an ODS file without cached formatted cell for float values (:issue:`55219`)
- Bug in reading and writing ``datetime64`` dtypes with non-nanosecond resolution failing to round-trip correctly (:issue:`55622`)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this mention read_hdf/to_hdf?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep, will update

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated+green

- Bug where :meth:`DataFrame.to_json` would raise an ``OverflowError`` instead of a ``TypeError`` with unsupported NumPy types (:issue:`55403`)
-

Period
^^^^^^
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/computation/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def stringify(value):

kind = ensure_decoded(self.kind)
meta = ensure_decoded(self.meta)
if kind in ("datetime64", "datetime"):
if kind == "datetime" or (kind and kind.startswith("datetime64")):
if isinstance(v, (int, float)):
v = stringify(v)
v = ensure_decoded(v)
Expand Down
38 changes: 24 additions & 14 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -2152,7 +2152,6 @@ def convert(

val_kind = _ensure_decoded(self.kind)
values = _maybe_convert(values, val_kind, encoding, errors)

kwargs = {}
kwargs["name"] = _ensure_decoded(self.index_name)

Expand Down Expand Up @@ -2577,7 +2576,7 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
dtype = _ensure_decoded(dtype_name)

# reverse converts
if dtype == "datetime64":
if dtype.startswith("datetime64"):
# recreate with tz if indicated
converted = _set_tz(converted, tz, coerce=True)

Expand Down Expand Up @@ -2870,7 +2869,9 @@ def _get_index_factory(self, attrs):

def f(values, freq=None, tz=None):
# data are already in UTC, localize and convert if tz present
dta = DatetimeArray._simple_new(values.values, freq=freq)
dta = DatetimeArray._simple_new(
values.values, dtype=values.dtype, freq=freq
)
result = DatetimeIndex._simple_new(dta, name=None)
if tz is not None:
result = result.tz_localize("UTC").tz_convert(tz)
Expand Down Expand Up @@ -2961,7 +2962,7 @@ def read_array(self, key: str, start: int | None = None, stop: int | None = None
else:
ret = node[start:stop]

if dtype == "datetime64":
if dtype and dtype.startswith("datetime64"):
# reconstruct a timezone if indicated
tz = getattr(attrs, "tz", None)
ret = _set_tz(ret, tz, coerce=True)
Expand Down Expand Up @@ -3170,7 +3171,7 @@ def write_array(

elif lib.is_np_dtype(value.dtype, "M"):
self._handle.create_array(self.group, key, value.view("i8"))
getattr(self.group, key)._v_attrs.value_type = "datetime64"
getattr(self.group, key)._v_attrs.value_type = str(value.dtype)
elif isinstance(value.dtype, DatetimeTZDtype):
# store as UTC
# with a zone
Expand All @@ -3185,7 +3186,7 @@ def write_array(
# error: Item "ExtensionArray" of "Union[Any, ExtensionArray]" has no
# attribute "tz"
node._v_attrs.tz = _get_tz(value.tz) # type: ignore[union-attr]
node._v_attrs.value_type = "datetime64"
node._v_attrs.value_type = f"datetime64[{value.dtype.unit}]"
elif lib.is_np_dtype(value.dtype, "m"):
self._handle.create_array(self.group, key, value.view("i8"))
getattr(self.group, key)._v_attrs.value_type = "timedelta64"
Expand Down Expand Up @@ -4689,7 +4690,6 @@ def read(
selection = Selection(self, where=where, start=start, stop=stop)
# apply the selection filters & axis orderings
df = self.process_axes(df, selection=selection, columns=columns)

return df


Expand Down Expand Up @@ -4932,11 +4932,12 @@ def _set_tz(
# call below (which returns an ndarray). So we are only non-lossy
# if `tz` matches `values.tz`.
assert values.tz is None or values.tz == tz
if values.tz is not None:
return values

if tz is not None:
if isinstance(values, DatetimeIndex):
name = values.name
values = values.asi8
else:
name = None
values = values.ravel()
Expand Down Expand Up @@ -5019,8 +5020,12 @@ def _convert_index(name: str, index: Index, encoding: str, errors: str) -> Index
def _unconvert_index(data, kind: str, encoding: str, errors: str) -> np.ndarray | Index:
index: Index | np.ndarray

if kind == "datetime64":
index = DatetimeIndex(data)
if kind.startswith("datetime64"):
if kind == "datetime64":
# created before we stored resolution information
index = DatetimeIndex(data)
else:
index = DatetimeIndex(data.view(kind))
elif kind == "timedelta64":
index = TimedeltaIndex(data)
elif kind == "date":
Expand Down Expand Up @@ -5194,6 +5199,8 @@ def _maybe_convert(values: np.ndarray, val_kind: str, encoding: str, errors: str
def _get_converter(kind: str, encoding: str, errors: str):
if kind == "datetime64":
return lambda x: np.asarray(x, dtype="M8[ns]")
elif "datetime64" in kind:
return lambda x: np.asarray(x, dtype=kind)
elif kind == "string":
return lambda x: _unconvert_string_array(
x, nan_rep=None, encoding=encoding, errors=errors
Expand All @@ -5203,7 +5210,7 @@ def _get_converter(kind: str, encoding: str, errors: str):


def _need_convert(kind: str) -> bool:
if kind in ("datetime64", "string"):
if kind in ("datetime64", "string") or "datetime64" in kind:
return True
return False

Expand Down Expand Up @@ -5248,7 +5255,7 @@ def _dtype_to_kind(dtype_str: str) -> str:
elif dtype_str.startswith(("int", "uint")):
kind = "integer"
elif dtype_str.startswith("datetime64"):
kind = "datetime64"
kind = dtype_str
elif dtype_str.startswith("timedelta"):
kind = "timedelta64"
elif dtype_str.startswith("bool"):
Expand All @@ -5273,8 +5280,11 @@ def _get_data_and_dtype_name(data: ArrayLike):
if isinstance(data, Categorical):
data = data.codes

# For datetime64tz we need to drop the TZ in tests TODO: why?
dtype_name = data.dtype.name.split("[")[0]
if isinstance(data.dtype, DatetimeTZDtype):
# For datetime64tz we need to drop the TZ in tests TODO: why?
dtype_name = f"datetime64[{data.dtype.unit}]"
else:
dtype_name = data.dtype.name

if data.dtype.kind in "mM":
data = np.asarray(data.view("i8"))
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/pytables/test_append.py
Original file line number Diff line number Diff line change
Expand Up @@ -772,7 +772,7 @@ def test_append_raise(setup_path):
"dtype->bytes24,kind->string,shape->(1, 30)] "
"vs current table "
"[name->values_block_1,cname->values_block_1,"
"dtype->datetime64,kind->datetime64,shape->None]"
"dtype->datetime64[s],kind->datetime64[s],shape->None]"
)
with pytest.raises(ValueError, match=msg):
store.append("df", df)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/pytables/test_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def test_table_index_incompatible_dtypes(setup_path):

with ensure_clean_store(setup_path) as store:
store.put("frame", df1, format="table")
msg = re.escape("incompatible kind in col [integer - datetime64]")
msg = re.escape("incompatible kind in col [integer - datetime64[ns]]")
with pytest.raises(TypeError, match=msg):
store.put("frame", df2, format="table", append=True)

Expand Down
16 changes: 11 additions & 5 deletions pandas/tests/io/pytables/test_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,16 +541,22 @@ def test_store_index_name(setup_path):
tm.assert_frame_equal(recons, df)


@pytest.mark.parametrize("tz", [None, "US/Pacific"])
@pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
@pytest.mark.parametrize("table_format", ["table", "fixed"])
def test_store_index_name_numpy_str(tmp_path, table_format, setup_path):
def test_store_index_name_numpy_str(tmp_path, table_format, setup_path, unit, tz):
# GH #13492
idx = Index(
pd.to_datetime([dt.date(2000, 1, 1), dt.date(2000, 1, 2)]),
name="cols\u05d2",
)
idx1 = Index(
pd.to_datetime([dt.date(2010, 1, 1), dt.date(2010, 1, 2)]),
name="rows\u05d0",
).tz_localize(tz)
idx1 = (
Index(
pd.to_datetime([dt.date(2010, 1, 1), dt.date(2010, 1, 2)]),
name="rows\u05d0",
)
.as_unit(unit)
.tz_localize(tz)
)
df = DataFrame(np.arange(4).reshape(2, 2), columns=idx, index=idx1)

Expand Down
Loading