Skip to content

Commit

Permalink
Merge branch 'main' into FIX-date_range-inclusive
Browse files Browse the repository at this point in the history
  • Loading branch information
PiotrekB416 authored Oct 31, 2023
2 parents dcb4707 + b0a0c68 commit a3f2097
Show file tree
Hide file tree
Showing 11 changed files with 66 additions and 38 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ jobs:
env_file: actions-311.yaml
pattern: "not slow and not network and not single_cpu"
pandas_copy_on_write: "1"
- name: "Copy-on-Write (warnings)"
- name: "Copy-on-Write 3.11 (warnings)"
env_file: actions-311.yaml
pattern: "not slow and not network and not single_cpu"
pandas_copy_on_write: "warn"
Expand Down Expand Up @@ -98,7 +98,7 @@ jobs:
PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}
concurrency:
# https://github.community/t/concurrecy-not-work-for-push/183068/7
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_copy_on_write || '' }}
cancel-in-progress: true

services:
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Fixed regressions

Bug fixes
~~~~~~~~~
- Bug in :meth:`DatetimeIndex.diff` raising ``TypeError`` (:issue:`55080`)
- Fixed bug in :meth:`DatetimeArray._generate_range` where ``inclusive`` argument behavior did not match interval notation (:issue:`55293`,:issue:`46331`)
-

Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,7 @@ Datetimelike
- Bug in addition or subtraction of :class:`BusinessDay` offset with ``offset`` attribute to non-nanosecond :class:`Index`, :class:`Series`, or :class:`DataFrame` column giving incorrect results (:issue:`55608`)
- Bug in addition or subtraction of :class:`DateOffset` objects with microsecond components to ``datetime64`` :class:`Index`, :class:`Series`, or :class:`DataFrame` columns with non-nanosecond resolution (:issue:`55595`)
- Bug in addition or subtraction of very large :class:`Tick` objects with :class:`Timestamp` or :class:`Timedelta` objects raising ``OverflowError`` instead of ``OutOfBoundsTimedelta`` (:issue:`55503`)
- Bug in creating a :class:`Index`, :class:`Series`, or :class:`DataFrame` with a non-nanosecond :class:`DatetimeTZDtype` and inputs that would be out of bounds with nanosecond resolution incorrectly raising ``OutOfBoundsDatetime`` (:issue:`54620`)
- Bug in creating a :class:`Index`, :class:`Series`, or :class:`DataFrame` with a non-nanosecond ``datetime64`` dtype and inputs that would be out of bounds for a ``datetime64[ns]`` incorrectly raising ``OutOfBoundsDatetime`` (:issue:`55756`)
-

Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslib.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,5 @@ def array_to_datetime(
# returned ndarray may be object dtype or datetime64[ns]

def array_to_datetime_with_tz(
values: npt.NDArray[np.object_], tz: tzinfo
values: npt.NDArray[np.object_], tz: tzinfo, creso: int
) -> npt.NDArray[np.int64]: ...
32 changes: 17 additions & 15 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,12 @@ cpdef array_to_datetime(
iresult[i] = _ts.value

tz = _ts.tzinfo
if tz is not None:
if _ts.value == NPY_NAT:
# e.g. "NaT" string or empty string, we do not consider
# this as either tzaware or tznaive. See
# test_to_datetime_with_empty_str_utc_false_format_mixed
pass
elif tz is not None:
# dateutil timezone objects cannot be hashed, so
# store the UTC offsets in seconds instead
nsecs = tz.utcoffset(None).total_seconds()
Expand Down Expand Up @@ -610,7 +615,6 @@ cdef _array_to_datetime_object(
# 1) NaT or NaT-like values
# 2) datetime strings, which we return as datetime.datetime
# 3) special strings - "now" & "today"
unique_timezones = set()
for i in range(n):
# Analogous to: val = values[i]
val = <object>(<PyObject**>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
Expand Down Expand Up @@ -640,7 +644,6 @@ cdef _array_to_datetime_object(
tzinfo=tsobj.tzinfo,
fold=tsobj.fold,
)
unique_timezones.add(tsobj.tzinfo)

except (ValueError, OverflowError) as ex:
ex.args = (f"{ex}, at position {i}", )
Expand All @@ -658,20 +661,19 @@ cdef _array_to_datetime_object(

cnp.PyArray_MultiIter_NEXT(mi)

if len(unique_timezones) > 1:
warnings.warn(
"In a future version of pandas, parsing datetimes with mixed time "
"zones will raise an error unless `utc=True`. "
"Please specify `utc=True` to opt in to the new behaviour "
"and silence this warning. To create a `Series` with mixed offsets and "
"`object` dtype, please use `apply` and `datetime.datetime.strptime`",
FutureWarning,
stacklevel=find_stack_level(),
)
warnings.warn(
"In a future version of pandas, parsing datetimes with mixed time "
"zones will raise an error unless `utc=True`. "
"Please specify `utc=True` to opt in to the new behaviour "
"and silence this warning. To create a `Series` with mixed offsets and "
"`object` dtype, please use `apply` and `datetime.datetime.strptime`",
FutureWarning,
stacklevel=find_stack_level(),
)
return oresult_nd, None


def array_to_datetime_with_tz(ndarray values, tzinfo tz):
def array_to_datetime_with_tz(ndarray values, tzinfo tz, NPY_DATETIMEUNIT creso):
"""
Vectorized analogue to pd.Timestamp(value, tz=tz)
Expand Down Expand Up @@ -707,7 +709,7 @@ def array_to_datetime_with_tz(ndarray values, tzinfo tz):
else:
# datetime64, tznaive pydatetime, int, float
ts = ts.tz_localize(tz)
ts = ts.as_unit("ns")
ts = (<_Timestamp>ts)._as_creso(creso)
ival = ts._value

# Analogous to: result[i] = ival
Expand Down
19 changes: 11 additions & 8 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ def _from_sequence_not_strict(
# DatetimeTZDtype
unit = dtype.unit

subarr, tz, inferred_freq = _sequence_to_dt64ns(
subarr, tz, inferred_freq = _sequence_to_dt64(
data,
copy=copy,
tz=tz,
Expand Down Expand Up @@ -2173,7 +2173,7 @@ def std(
# Constructor Helpers


def _sequence_to_dt64ns(
def _sequence_to_dt64(
data,
*,
copy: bool = False,
Expand All @@ -2199,7 +2199,8 @@ def _sequence_to_dt64ns(
Returns
-------
result : numpy.ndarray
The sequence converted to a numpy array with dtype ``datetime64[ns]``.
The sequence converted to a numpy array with dtype ``datetime64[unit]``.
Where `unit` is "ns" unless specified otherwise by `out_unit`.
tz : tzinfo or None
Either the user-provided tzinfo or one inferred from the data.
inferred_freq : Tick or None
Expand All @@ -2222,9 +2223,9 @@ def _sequence_to_dt64ns(
data, copy = maybe_convert_dtype(data, copy, tz=tz)
data_dtype = getattr(data, "dtype", None)

out_dtype = DT64NS_DTYPE
if out_unit is not None:
out_dtype = np.dtype(f"M8[{out_unit}]")
if out_unit is None:
out_unit = "ns"
out_dtype = np.dtype(f"M8[{out_unit}]")

if data_dtype == object or is_string_dtype(data_dtype):
# TODO: We do not have tests specific to string-dtypes,
Expand All @@ -2235,8 +2236,10 @@ def _sequence_to_dt64ns(
elif tz is not None and ambiguous == "raise":
# TODO: yearfirst/dayfirst/etc?
obj_data = np.asarray(data, dtype=object)
i8data = tslib.array_to_datetime_with_tz(obj_data, tz)
return i8data.view(DT64NS_DTYPE), tz, None
i8data = tslib.array_to_datetime_with_tz(
obj_data, tz, abbrev_to_npy_unit(out_unit)
)
return i8data.view(out_dtype), tz, None
else:
# data comes back here as either i8 to denote UTC timestamps
# or M8[ns] to denote wall times
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6991,7 +6991,7 @@ def infer_objects(self, copy: bool = True) -> Index:
return result

@final
def diff(self, periods: int = 1) -> Self:
def diff(self, periods: int = 1) -> Index:
"""
Computes the difference between consecutive values in the Index object.
Expand All @@ -7017,7 +7017,7 @@ def diff(self, periods: int = 1) -> Self:
Index([nan, 10.0, 10.0, 10.0, 10.0], dtype='float64')
"""
return self._constructor(self.to_series().diff(periods))
return Index(self.to_series().diff(periods))

@final
def round(self, decimals: int = 0) -> Self:
Expand Down
11 changes: 7 additions & 4 deletions pandas/tests/indexes/datetimes/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1013,16 +1013,19 @@ def test_dti_convert_datetime_list(self, tzstr):
dr2 = DatetimeIndex(list(dr), name="foo", freq="D")
tm.assert_index_equal(dr, dr2)

def test_dti_constructor_with_non_nano_dtype(self):
# GH#55756
@pytest.mark.parametrize("tz", [None, "UTC", "US/Pacific"])
def test_dti_constructor_with_non_nano_dtype(self, tz):
# GH#55756, GH#54620
ts = Timestamp("2999-01-01")
dtype = "M8[us]"
if tz is not None:
dtype = f"M8[us, {tz}]"
# NB: the 2500 is interpreted as nanoseconds and rounded *down*
# to 2 microseconds
vals = [ts, "2999-01-02 03:04:05.678910", 2500]
result = DatetimeIndex(vals, dtype=dtype)
exp_arr = np.array([ts.asm8, vals[1], 2], dtype=dtype)
expected = DatetimeIndex(exp_arr, dtype=dtype)
exp_arr = np.array([ts.asm8, vals[1], 2], dtype="M8[us]")
expected = DatetimeIndex(exp_arr, dtype="M8[us]").tz_localize(tz)
tm.assert_index_equal(result, expected)

result2 = DatetimeIndex(np.array(vals, dtype=object), dtype=dtype)
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/indexes/test_datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,11 @@ def test_where_cast_str(self, simple_index):

result = index.where(mask, ["foo"])
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize("unit", ["ns", "us", "ms", "s"])
def test_diff(self, unit):
# GH 55080
dti = pd.to_datetime([10, 20, 30], unit=unit).as_unit(unit)
result = dti.diff(1)
expected = pd.TimedeltaIndex([pd.NaT, 10, 10], unit=unit).as_unit(unit)
tm.assert_index_equal(result, expected)
11 changes: 7 additions & 4 deletions pandas/tests/series/methods/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,18 +107,21 @@ def test_astype_dict_like(self, dtype_class):


class TestAstype:
def test_astype_object_to_dt64_non_nano(self):
# GH#55756
@pytest.mark.parametrize("tz", [None, "UTC", "US/Pacific"])
def test_astype_object_to_dt64_non_nano(self, tz):
# GH#55756, GH#54620
ts = Timestamp("2999-01-01")
dtype = "M8[us]"
if tz is not None:
dtype = f"M8[us, {tz}]"
# NB: the 2500 is interpreted as nanoseconds and rounded *down*
# to 2 microseconds
vals = [ts, "2999-01-02 03:04:05.678910", 2500]
ser = Series(vals, dtype=object)
result = ser.astype(dtype)

exp_arr = np.array([ts.asm8, vals[1], 2], dtype=dtype)
expected = Series(exp_arr, dtype=dtype)
exp_arr = np.array([ts.asm8, vals[1], 2], dtype="M8[us]")
expected = Series(exp_arr, dtype="M8[us]").dt.tz_localize(tz)
tm.assert_series_equal(result, expected)

def test_astype_mixed_object_to_dt64tz(self):
Expand Down
11 changes: 9 additions & 2 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -3675,10 +3675,17 @@ def test_from_numeric_arrow_dtype(any_numeric_ea_dtype):

def test_to_datetime_with_empty_str_utc_false_format_mixed():
# GH 50887
result = to_datetime(["2020-01-01 00:00+00:00", ""], format="mixed")
expected = Index([Timestamp("2020-01-01 00:00+00:00"), "NaT"], dtype=object)
vals = ["2020-01-01 00:00+00:00", ""]
result = to_datetime(vals, format="mixed")
expected = Index([Timestamp("2020-01-01 00:00+00:00"), "NaT"], dtype="M8[ns, UTC]")
tm.assert_index_equal(result, expected)

# Check that a couple of other similar paths work the same way
alt = to_datetime(vals)
tm.assert_index_equal(alt, expected)
alt2 = DatetimeIndex(vals)
tm.assert_index_equal(alt2, expected)


def test_to_datetime_with_empty_str_utc_false_offsets_and_format_mixed():
# GH 50887
Expand Down

0 comments on commit a3f2097

Please sign in to comment.