Skip to content

Commit

Permalink
DEPR: DatetimeArray/TimedeltaArray.__init__ (pandas-dev#56043)
Browse files Browse the repository at this point in the history
* DEPR: DatetimeArray/TimedeltaArray.__init__

* mypy fixup

* fix PeriodArray test

* update doctest

* remove accidental
  • Loading branch information
jbrockmendel authored and cbpygit committed Jan 2, 2024
1 parent 35bc2d3 commit a4ef147
Show file tree
Hide file tree
Showing 26 changed files with 258 additions and 193 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,7 @@ Other Deprecations
- Deprecated :func:`pd.core.internals.api.make_block`, use public APIs instead (:issue:`40226`)
- Deprecated :func:`read_gbq` and :meth:`DataFrame.to_gbq`. Use ``pandas_gbq.read_gbq`` and ``pandas_gbq.to_gbq`` instead https://pandas-gbq.readthedocs.io/en/latest/api.html (:issue:`55525`)
- Deprecated :meth:`.DataFrameGroupBy.fillna` and :meth:`.SeriesGroupBy.fillna`; use :meth:`.DataFrameGroupBy.ffill`, :meth:`.DataFrameGroupBy.bfill` for forward and backward filling or :meth:`.DataFrame.fillna` to fill with a single value (or the Series equivalents) (:issue:`55718`)
- Deprecated :meth:`DatetimeArray.__init__` and :meth:`TimedeltaArray.__init__`, use :func:`array` instead (:issue:`55623`)
- Deprecated :meth:`Index.format`, use ``index.astype(str)`` or ``index.map(formatter)`` instead (:issue:`55413`)
- Deprecated :meth:`Series.ravel`, the underlying array is already 1D, so ravel is not necessary (:issue:`52511`)
- Deprecated :meth:`Series.resample` and :meth:`DataFrame.resample` with a :class:`PeriodIndex` (and the 'convention' keyword), convert to :class:`DatetimeIndex` (with ``.to_timestamp()``) before resampling instead (:issue:`53481`)
Expand Down
11 changes: 5 additions & 6 deletions pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,21 +133,20 @@ def view(self, dtype: Dtype | None = None) -> ArrayLike:
cls = dtype.construct_array_type()
return cls(arr.view("i8"), dtype=dtype)
elif isinstance(dtype, DatetimeTZDtype):
# error: Incompatible types in assignment (expression has type
# "type[DatetimeArray]", variable has type "type[PeriodArray]")
cls = dtype.construct_array_type() # type: ignore[assignment]
dt_cls = dtype.construct_array_type()
dt64_values = arr.view(f"M8[{dtype.unit}]")
return cls(dt64_values, dtype=dtype)
return dt_cls._simple_new(dt64_values, dtype=dtype)
elif lib.is_np_dtype(dtype, "M") and is_supported_dtype(dtype):
from pandas.core.arrays import DatetimeArray

dt64_values = arr.view(dtype)
return DatetimeArray(dt64_values, dtype=dtype)
return DatetimeArray._simple_new(dt64_values, dtype=dtype)

elif lib.is_np_dtype(dtype, "m") and is_supported_dtype(dtype):
from pandas.core.arrays import TimedeltaArray

td64_values = arr.view(dtype)
return TimedeltaArray(td64_values, dtype=dtype)
return TimedeltaArray._simple_new(td64_values, dtype=dtype)

# error: Argument "dtype" to "view" of "_ArrayOrScalarCommon" has incompatible
# type "Union[ExtensionDtype, dtype[Any]]"; expected "Union[dtype[Any], None,
Expand Down
17 changes: 12 additions & 5 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ def _unbox_scalar(
Examples
--------
>>> arr = pd.arrays.DatetimeArray(np.array(['1970-01-01'], 'datetime64[ns]'))
>>> arr = pd.array(np.array(['1970-01-01'], 'datetime64[ns]'))
>>> arr._unbox_scalar(arr[0])
numpy.datetime64('1970-01-01T00:00:00.000000000')
"""
Expand Down Expand Up @@ -1409,7 +1409,7 @@ def __add__(self, other):
if isinstance(result, np.ndarray) and lib.is_np_dtype(result.dtype, "m"):
from pandas.core.arrays import TimedeltaArray

return TimedeltaArray(result)
return TimedeltaArray._from_sequence(result)
return result

def __radd__(self, other):
Expand Down Expand Up @@ -1469,7 +1469,7 @@ def __sub__(self, other):
if isinstance(result, np.ndarray) and lib.is_np_dtype(result.dtype, "m"):
from pandas.core.arrays import TimedeltaArray

return TimedeltaArray(result)
return TimedeltaArray._from_sequence(result)
return result

def __rsub__(self, other):
Expand All @@ -1488,7 +1488,7 @@ def __rsub__(self, other):
# Avoid down-casting DatetimeIndex
from pandas.core.arrays import DatetimeArray

other = DatetimeArray(other)
other = DatetimeArray._from_sequence(other)
return other - self
elif self.dtype.kind == "M" and hasattr(other, "dtype") and not other_is_dt64:
# GH#19959 datetime - datetime is well-defined as timedelta,
Expand Down Expand Up @@ -1725,7 +1725,7 @@ def _groupby_op(
self = cast("DatetimeArray | TimedeltaArray", self)
new_dtype = f"m8[{self.unit}]"
res_values = res_values.view(new_dtype)
return TimedeltaArray(res_values)
return TimedeltaArray._simple_new(res_values, dtype=res_values.dtype)

res_values = res_values.view(self._ndarray.dtype)
return self._from_backing_data(res_values)
Expand Down Expand Up @@ -1944,6 +1944,13 @@ class TimelikeOps(DatetimeLikeArrayMixin):
def __init__(
self, values, dtype=None, freq=lib.no_default, copy: bool = False
) -> None:
warnings.warn(
# GH#55623
f"{type(self).__name__}.__init__ is deprecated and will be "
"removed in a future version. Use pd.array instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
if dtype is not None:
dtype = pandas_dtype(dtype)

Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,8 @@ class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps): # type: ignore[misc]
Examples
--------
>>> pd.arrays.DatetimeArray(pd.DatetimeIndex(['2023-01-01', '2023-01-02']),
... freq='D')
>>> pd.arrays.DatetimeArray._from_sequence(
... pd.DatetimeIndex(['2023-01-01', '2023-01-02'], freq='D'))
<DatetimeArray>
['2023-01-01 00:00:00', '2023-01-02 00:00:00']
Length: 2, dtype: datetime64[ns]
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,7 +664,7 @@ def to_timestamp(self, freq=None, how: str = "start") -> DatetimeArray:
new_parr = self.asfreq(freq, how=how)

new_data = libperiod.periodarr_to_dt64arr(new_parr.asi8, base)
dta = DatetimeArray(new_data)
dta = DatetimeArray._from_sequence(new_data)

if self.freq.name == "B":
# See if we can retain BDay instead of Day in cases where
Expand Down
8 changes: 5 additions & 3 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ class TimedeltaArray(dtl.TimelikeOps):
Examples
--------
>>> pd.arrays.TimedeltaArray(pd.TimedeltaIndex(['1h', '2h']))
>>> pd.arrays.TimedeltaArray._from_sequence(pd.TimedeltaIndex(['1h', '2h']))
<TimedeltaArray>
['0 days 01:00:00', '0 days 02:00:00']
Length: 2, dtype: timedelta64[ns]
Expand Down Expand Up @@ -709,11 +709,13 @@ def __neg__(self) -> TimedeltaArray:
return type(self)._simple_new(-self._ndarray, dtype=self.dtype, freq=freq)

def __pos__(self) -> TimedeltaArray:
return type(self)(self._ndarray.copy(), freq=self.freq)
return type(self)._simple_new(
self._ndarray.copy(), dtype=self.dtype, freq=self.freq
)

def __abs__(self) -> TimedeltaArray:
# Note: freq is not preserved
return type(self)(np.abs(self._ndarray))
return type(self)._simple_new(np.abs(self._ndarray), dtype=self.dtype)

# ----------------------------------------------------------------
# Conversion Methods - Vectorized analogues of Timedelta methods
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -919,7 +919,7 @@ def __from_arrow__(self, array: pa.Array | pa.ChunkedArray) -> DatetimeArray:
else:
np_arr = array.to_numpy()

return DatetimeArray(np_arr, dtype=self, copy=False)
return DatetimeArray._from_sequence(np_arr, dtype=self, copy=False)

def __setstate__(self, state) -> None:
# for pickle compat. __get_state__ is defined in the
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -697,7 +697,10 @@ def _fast_union(self, other: Self, sort=None) -> Self:
dates = concat_compat([left._values, right_chunk])
# The can_fast_union check ensures that the result.freq
# should match self.freq
dates = type(self._data)(dates, freq=self.freq)
assert isinstance(dates, type(self._data))
# error: Item "ExtensionArray" of "ExtensionArray |
# ndarray[Any, Any]" has no attribute "_freq"
assert dates._freq == self.freq # type: ignore[union-attr]
result = type(self)._simple_new(dates)
return result
else:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2345,7 +2345,7 @@ def make_na_array(dtype: DtypeObj, shape: Shape, fill_value) -> ArrayLike:
ts = Timestamp(fill_value).as_unit(dtype.unit)
i8values = np.full(shape, ts._value)
dt64values = i8values.view(f"M8[{dtype.unit}]")
return DatetimeArray(dt64values, dtype=dtype)
return DatetimeArray._simple_new(dt64values, dtype=dtype)

elif is_1d_only_ea_dtype(dtype):
dtype = cast(ExtensionDtype, dtype)
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/ops/array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,7 @@ def maybe_prepare_scalar_for_op(obj, shape: Shape):
new_dtype = get_supported_dtype(obj.dtype)
obj = obj.astype(new_dtype)
right = np.broadcast_to(obj, shape)
return DatetimeArray(right)
return DatetimeArray._simple_new(right, dtype=right.dtype)

return Timestamp(obj)

Expand All @@ -563,7 +563,7 @@ def maybe_prepare_scalar_for_op(obj, shape: Shape):
new_dtype = get_supported_dtype(obj.dtype)
obj = obj.astype(new_dtype)
right = np.broadcast_to(obj, shape)
return TimedeltaArray(right)
return TimedeltaArray._simple_new(right, dtype=right.dtype)

# In particular non-nanosecond timedelta64 needs to be cast to
# nanoseconds, or else we get undesired behavior like
Expand Down
103 changes: 63 additions & 40 deletions pandas/tests/arrays/datetimes/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,16 @@ def test_from_sequence_invalid_type(self):
def test_only_1dim_accepted(self):
arr = np.array([0, 1, 2, 3], dtype="M8[h]").astype("M8[ns]")

with pytest.raises(ValueError, match="Only 1-dimensional"):
# 3-dim, we allow 2D to sneak in for ops purposes GH#29853
DatetimeArray(arr.reshape(2, 2, 1))
depr_msg = "DatetimeArray.__init__ is deprecated"
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
with pytest.raises(ValueError, match="Only 1-dimensional"):
# 3-dim, we allow 2D to sneak in for ops purposes GH#29853
DatetimeArray(arr.reshape(2, 2, 1))

with pytest.raises(ValueError, match="Only 1-dimensional"):
# 0-dim
DatetimeArray(arr[[0]].squeeze())
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
with pytest.raises(ValueError, match="Only 1-dimensional"):
# 0-dim
DatetimeArray(arr[[0]].squeeze())

def test_freq_validation(self):
# GH#24623 check that invalid instances cannot be created with the
Expand All @@ -36,8 +39,10 @@ def test_freq_validation(self):
"Inferred frequency h from passed values does not "
"conform to passed frequency W-SUN"
)
with pytest.raises(ValueError, match=msg):
DatetimeArray(arr, freq="W")
depr_msg = "DatetimeArray.__init__ is deprecated"
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
with pytest.raises(ValueError, match=msg):
DatetimeArray(arr, freq="W")

@pytest.mark.parametrize(
"meth",
Expand Down Expand Up @@ -72,31 +77,40 @@ def test_from_pandas_array(self):
tm.assert_datetime_array_equal(result, expected)

def test_mismatched_timezone_raises(self):
arr = DatetimeArray(
np.array(["2000-01-01T06:00:00"], dtype="M8[ns]"),
dtype=DatetimeTZDtype(tz="US/Central"),
)
depr_msg = "DatetimeArray.__init__ is deprecated"
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
arr = DatetimeArray(
np.array(["2000-01-01T06:00:00"], dtype="M8[ns]"),
dtype=DatetimeTZDtype(tz="US/Central"),
)
dtype = DatetimeTZDtype(tz="US/Eastern")
msg = r"dtype=datetime64\[ns.*\] does not match data dtype datetime64\[ns.*\]"
with pytest.raises(TypeError, match=msg):
DatetimeArray(arr, dtype=dtype)
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
with pytest.raises(TypeError, match=msg):
DatetimeArray(arr, dtype=dtype)

# also with mismatched tzawareness
with pytest.raises(TypeError, match=msg):
DatetimeArray(arr, dtype=np.dtype("M8[ns]"))
with pytest.raises(TypeError, match=msg):
DatetimeArray(arr.tz_localize(None), dtype=arr.dtype)
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
with pytest.raises(TypeError, match=msg):
DatetimeArray(arr, dtype=np.dtype("M8[ns]"))
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
with pytest.raises(TypeError, match=msg):
DatetimeArray(arr.tz_localize(None), dtype=arr.dtype)

def test_non_array_raises(self):
with pytest.raises(ValueError, match="list"):
DatetimeArray([1, 2, 3])
depr_msg = "DatetimeArray.__init__ is deprecated"
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
with pytest.raises(ValueError, match="list"):
DatetimeArray([1, 2, 3])

def test_bool_dtype_raises(self):
arr = np.array([1, 2, 3], dtype="bool")

depr_msg = "DatetimeArray.__init__ is deprecated"
msg = "Unexpected value for 'dtype': 'bool'. Must be"
with pytest.raises(ValueError, match=msg):
DatetimeArray(arr)
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
with pytest.raises(ValueError, match=msg):
DatetimeArray(arr)

msg = r"dtype bool cannot be converted to datetime64\[ns\]"
with pytest.raises(TypeError, match=msg):
Expand All @@ -109,43 +123,52 @@ def test_bool_dtype_raises(self):
pd.to_datetime(arr)

def test_incorrect_dtype_raises(self):
with pytest.raises(ValueError, match="Unexpected value for 'dtype'."):
DatetimeArray(np.array([1, 2, 3], dtype="i8"), dtype="category")
depr_msg = "DatetimeArray.__init__ is deprecated"
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
with pytest.raises(ValueError, match="Unexpected value for 'dtype'."):
DatetimeArray(np.array([1, 2, 3], dtype="i8"), dtype="category")

with pytest.raises(ValueError, match="Unexpected value for 'dtype'."):
DatetimeArray(np.array([1, 2, 3], dtype="i8"), dtype="m8[s]")
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
with pytest.raises(ValueError, match="Unexpected value for 'dtype'."):
DatetimeArray(np.array([1, 2, 3], dtype="i8"), dtype="m8[s]")

with pytest.raises(ValueError, match="Unexpected value for 'dtype'."):
DatetimeArray(np.array([1, 2, 3], dtype="i8"), dtype="M8[D]")
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
with pytest.raises(ValueError, match="Unexpected value for 'dtype'."):
DatetimeArray(np.array([1, 2, 3], dtype="i8"), dtype="M8[D]")

def test_mismatched_values_dtype_units(self):
arr = np.array([1, 2, 3], dtype="M8[s]")
dtype = np.dtype("M8[ns]")
msg = "Values resolution does not match dtype."
depr_msg = "DatetimeArray.__init__ is deprecated"

with pytest.raises(ValueError, match=msg):
DatetimeArray(arr, dtype=dtype)
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
with pytest.raises(ValueError, match=msg):
DatetimeArray(arr, dtype=dtype)

dtype2 = DatetimeTZDtype(tz="UTC", unit="ns")
with pytest.raises(ValueError, match=msg):
DatetimeArray(arr, dtype=dtype2)
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
with pytest.raises(ValueError, match=msg):
DatetimeArray(arr, dtype=dtype2)

def test_freq_infer_raises(self):
with pytest.raises(ValueError, match="Frequency inference"):
DatetimeArray(np.array([1, 2, 3], dtype="i8"), freq="infer")
depr_msg = "DatetimeArray.__init__ is deprecated"
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
with pytest.raises(ValueError, match="Frequency inference"):
DatetimeArray(np.array([1, 2, 3], dtype="i8"), freq="infer")

def test_copy(self):
data = np.array([1, 2, 3], dtype="M8[ns]")
arr = DatetimeArray(data, copy=False)
arr = DatetimeArray._from_sequence(data, copy=False)
assert arr._ndarray is data

arr = DatetimeArray(data, copy=True)
arr = DatetimeArray._from_sequence(data, copy=True)
assert arr._ndarray is not data

@pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
def test_numpy_datetime_unit(self, unit):
data = np.array([1, 2, 3], dtype=f"M8[{unit}]")
arr = DatetimeArray(data)
arr = DatetimeArray._from_sequence(data)
assert arr.unit == unit
assert arr[0].unit == unit

Expand Down Expand Up @@ -210,7 +233,7 @@ def test_from_arrowtest_from_arrow_with_different_units_and_timezones_with_(
dtype = DatetimeTZDtype(unit=pd_unit, tz=pd_tz)

result = dtype.__from_arrow__(arr)
expected = DatetimeArray(
expected = DatetimeArray._from_sequence(
np.array(data, dtype=f"datetime64[{pa_unit}]").astype(f"datetime64[{pd_unit}]"),
dtype=dtype,
)
Expand Down Expand Up @@ -238,7 +261,7 @@ def test_from_arrow_from_empty(unit, tz):
dtype = DatetimeTZDtype(unit=unit, tz=tz)

result = dtype.__from_arrow__(arr)
expected = DatetimeArray(np.array(data, dtype=f"datetime64[{unit}]"))
expected = DatetimeArray._from_sequence(np.array(data, dtype=f"datetime64[{unit}]"))
expected = expected.tz_localize(tz=tz)
tm.assert_extension_array_equal(result, expected)

Expand All @@ -254,7 +277,7 @@ def test_from_arrow_from_integers():
dtype = DatetimeTZDtype(unit="ns", tz="UTC")

result = dtype.__from_arrow__(arr)
expected = DatetimeArray(np.array(data, dtype="datetime64[ns]"))
expected = DatetimeArray._from_sequence(np.array(data, dtype="datetime64[ns]"))
expected = expected.tz_localize("UTC")
tm.assert_extension_array_equal(result, expected)

Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/arrays/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ def test_array_copy():
),
(
np.array([1, 2], dtype="M8[ns]"),
DatetimeArray(np.array([1, 2], dtype="M8[ns]")),
DatetimeArray._from_sequence(np.array([1, 2], dtype="M8[ns]")),
),
(
np.array([1, 2], dtype="M8[us]"),
Expand Down Expand Up @@ -327,11 +327,11 @@ def test_array_copy():
),
(
np.array([1, 2], dtype="m8[ns]"),
TimedeltaArray(np.array([1, 2], dtype="m8[ns]")),
TimedeltaArray._from_sequence(np.array([1, 2], dtype="m8[ns]")),
),
(
np.array([1, 2], dtype="m8[us]"),
TimedeltaArray(np.array([1, 2], dtype="m8[us]")),
TimedeltaArray._from_sequence(np.array([1, 2], dtype="m8[us]")),
),
# integer
([1, 2], IntegerArray._from_sequence([1, 2], dtype="Int64")),
Expand Down
Loading

0 comments on commit a4ef147

Please sign in to comment.