Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into ci/debug
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke committed Oct 31, 2023
2 parents 4dc96ad + f04da3c commit d1373df
Show file tree
Hide file tree
Showing 23 changed files with 178 additions and 62 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ jobs:
env_file: actions-311.yaml
pattern: "not slow and not network and not single_cpu"
pandas_copy_on_write: "1"
- name: "Copy-on-Write (warnings)"
- name: "Copy-on-Write 3.11 (warnings)"
env_file: actions-311.yaml
pattern: "not slow and not network and not single_cpu"
pandas_copy_on_write: "warn"
Expand Down Expand Up @@ -98,7 +98,7 @@ jobs:
PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}
concurrency:
# https://github.community/t/concurrecy-not-work-for-push/183068/7
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_copy_on_write || '' }}
cancel-in-progress: true

services:
Expand Down
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.1.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ Fixed regressions

Bug fixes
~~~~~~~~~
-
- Bug in :meth:`DatetimeIndex.diff` raising ``TypeError`` (:issue:`55080`)
-

.. ---------------------------------------------------------------------------
Expand Down
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,8 @@ Datetimelike
- Bug in addition or subtraction of :class:`BusinessDay` offset with ``offset`` attribute to non-nanosecond :class:`Index`, :class:`Series`, or :class:`DataFrame` column giving incorrect results (:issue:`55608`)
- Bug in addition or subtraction of :class:`DateOffset` objects with microsecond components to ``datetime64`` :class:`Index`, :class:`Series`, or :class:`DataFrame` columns with non-nanosecond resolution (:issue:`55595`)
- Bug in addition or subtraction of very large :class:`Tick` objects with :class:`Timestamp` or :class:`Timedelta` objects raising ``OverflowError`` instead of ``OutOfBoundsTimedelta`` (:issue:`55503`)
- Bug in creating a :class:`Index`, :class:`Series`, or :class:`DataFrame` with a non-nanosecond :class:`DatetimeTZDtype` and inputs that would be out of bounds with nanosecond resolution incorrectly raising ``OutOfBoundsDatetime`` (:issue:`54620`)
- Bug in creating a :class:`Index`, :class:`Series`, or :class:`DataFrame` with a non-nanosecond ``datetime64`` dtype and inputs that would be out of bounds for a ``datetime64[ns]`` incorrectly raising ``OutOfBoundsDatetime`` (:issue:`55756`)
-

Timedelta
Expand Down
3 changes: 2 additions & 1 deletion pandas/_libs/tslib.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,11 @@ def array_to_datetime(
dayfirst: bool = ...,
yearfirst: bool = ...,
utc: bool = ...,
creso: int = ...,
) -> tuple[np.ndarray, tzinfo | None]: ...

# returned ndarray may be object dtype or datetime64[ns]

def array_to_datetime_with_tz(
values: npt.NDArray[np.object_], tz: tzinfo
values: npt.NDArray[np.object_], tz: tzinfo, creso: int
) -> npt.NDArray[np.int64]: ...
25 changes: 15 additions & 10 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ from pandas._libs.tslibs.conversion cimport (
get_datetime64_nanos,
parse_pydatetime,
)
from pandas._libs.tslibs.dtypes cimport npy_unit_to_abbrev
from pandas._libs.tslibs.nattype cimport (
NPY_NAT,
c_NaT as NaT,
Expand Down Expand Up @@ -277,6 +278,7 @@ def array_with_unit_to_datetime(
result, tz = array_to_datetime(
values.astype(object, copy=False),
errors=errors,
creso=NPY_FR_ns,
)
return result, tz

Expand Down Expand Up @@ -408,6 +410,7 @@ cpdef array_to_datetime(
bint dayfirst=False,
bint yearfirst=False,
bint utc=False,
NPY_DATETIMEUNIT creso=NPY_FR_ns,
):
"""
Converts a 1D array of date-like values to a numpy array of either:
Expand All @@ -434,6 +437,7 @@ cpdef array_to_datetime(
yearfirst parsing behavior when encountering datetime strings
utc : bool, default False
indicator whether the dates should be UTC
creso : NPY_DATETIMEUNIT, default NPY_FR_ns
Returns
-------
Expand All @@ -457,13 +461,14 @@ cpdef array_to_datetime(
set out_tzoffset_vals = set()
tzinfo tz_out = None
cnp.flatiter it = cnp.PyArray_IterNew(values)
NPY_DATETIMEUNIT creso = NPY_FR_ns
DatetimeParseState state = DatetimeParseState()
str reso_str

# specify error conditions
assert is_raise or is_ignore or is_coerce

result = np.empty((<object>values).shape, dtype="M8[ns]")
reso_str = npy_unit_to_abbrev(creso)
result = np.empty((<object>values).shape, dtype=f"M8[{reso_str}]")
iresult = result.view("i8").ravel()

for i in range(n):
Expand All @@ -480,11 +485,11 @@ cpdef array_to_datetime(
iresult[i] = parse_pydatetime(val, &dts, creso=creso)

elif PyDate_Check(val):
iresult[i] = pydate_to_dt64(val, &dts)
check_dts_bounds(&dts)
iresult[i] = pydate_to_dt64(val, &dts, reso=creso)
check_dts_bounds(&dts, creso)

elif is_datetime64_object(val):
iresult[i] = get_datetime64_nanos(val, NPY_FR_ns)
iresult[i] = get_datetime64_nanos(val, creso)

elif is_integer_object(val) or is_float_object(val):
# these must be ns unit by-definition
Expand All @@ -493,23 +498,23 @@ cpdef array_to_datetime(
iresult[i] = NPY_NAT
else:
# we now need to parse this as if unit='ns'
iresult[i] = cast_from_unit(val, "ns")
iresult[i] = cast_from_unit(val, "ns", out_reso=creso)

elif isinstance(val, str):
# string
if type(val) is not str:
# GH#32264 np.str_ object
val = str(val)

if parse_today_now(val, &iresult[i], utc):
if parse_today_now(val, &iresult[i], utc, creso):
# We can't _quite_ dispatch this to convert_str_to_tsobject
# bc there isn't a nice way to pass "utc"
continue

_ts = convert_str_to_tsobject(
val, None, unit="ns", dayfirst=dayfirst, yearfirst=yearfirst
)
_ts.ensure_reso(NPY_FR_ns, val)
_ts.ensure_reso(creso, val)

iresult[i] = _ts.value

Expand Down Expand Up @@ -666,7 +671,7 @@ cdef _array_to_datetime_object(
return oresult_nd, None


def array_to_datetime_with_tz(ndarray values, tzinfo tz):
def array_to_datetime_with_tz(ndarray values, tzinfo tz, NPY_DATETIMEUNIT creso):
"""
Vectorized analogue to pd.Timestamp(value, tz=tz)
Expand Down Expand Up @@ -702,7 +707,7 @@ def array_to_datetime_with_tz(ndarray values, tzinfo tz):
else:
# datetime64, tznaive pydatetime, int, float
ts = ts.tz_localize(tz)
ts = ts.as_unit("ns")
ts = (<_Timestamp>ts)._as_creso(creso)
ival = ts._value

# Analogous to: result[i] = ival
Expand Down
4 changes: 3 additions & 1 deletion pandas/_libs/tslibs/conversion.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ cdef int64_t get_datetime64_nanos(object val, NPY_DATETIMEUNIT reso) except? -1

cpdef datetime localize_pydatetime(datetime dt, tzinfo tz)
cdef int64_t cast_from_unit(object ts, str unit, NPY_DATETIMEUNIT out_reso=*) except? -1
cpdef (int64_t, int) precision_from_unit(str unit, NPY_DATETIMEUNIT out_reso=*)
cpdef (int64_t, int) precision_from_unit(
NPY_DATETIMEUNIT in_reso, NPY_DATETIMEUNIT out_reso=*
)

cdef maybe_localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso)

Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/conversion.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@ DT64NS_DTYPE: np.dtype
TD64NS_DTYPE: np.dtype

def precision_from_unit(
unit: str,
in_reso: int, # NPY_DATETIMEUNIT
) -> tuple[int, int]: ... # (int64_t, _)
def localize_pydatetime(dt: datetime, tz: tzinfo | None) -> datetime: ...
25 changes: 16 additions & 9 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ cdef int64_t cast_from_unit(
cdef:
int64_t m
int p
NPY_DATETIMEUNIT in_reso

if unit in ["Y", "M"]:
if is_float_object(ts) and not ts.is_integer():
Expand All @@ -123,7 +124,14 @@ cdef int64_t cast_from_unit(
dt64obj = np.datetime64(ts, unit)
return get_datetime64_nanos(dt64obj, out_reso)

m, p = precision_from_unit(unit, out_reso)
in_reso = abbrev_to_npy_unit(unit)
if out_reso < in_reso and in_reso != NPY_DATETIMEUNIT.NPY_FR_GENERIC:
# We will end up rounding (always *down*), so don't need the fractional
# part of `ts`.
m, _ = precision_from_unit(out_reso, in_reso)
return (<int64_t>ts) // m

m, p = precision_from_unit(in_reso, out_reso)

# cast the unit, multiply base/frac separately
# to avoid precision issues from float -> int
Expand All @@ -146,8 +154,8 @@ cdef int64_t cast_from_unit(
) from err


cpdef inline (int64_t, int) precision_from_unit(
str unit,
cpdef (int64_t, int) precision_from_unit(
NPY_DATETIMEUNIT in_reso,
NPY_DATETIMEUNIT out_reso=NPY_DATETIMEUNIT.NPY_FR_ns,
):
"""
Expand All @@ -163,25 +171,24 @@ cpdef inline (int64_t, int) precision_from_unit(
int64_t m
int64_t multiplier
int p
NPY_DATETIMEUNIT reso = abbrev_to_npy_unit(unit)

if reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
reso = NPY_DATETIMEUNIT.NPY_FR_ns
if reso == NPY_DATETIMEUNIT.NPY_FR_Y:
if in_reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
in_reso = NPY_DATETIMEUNIT.NPY_FR_ns
if in_reso == NPY_DATETIMEUNIT.NPY_FR_Y:
# each 400 years we have 97 leap years, for an average of 97/400=.2425
# extra days each year. We get 31556952 by writing
# 3600*24*365.2425=31556952
multiplier = periods_per_second(out_reso)
m = multiplier * 31556952
elif reso == NPY_DATETIMEUNIT.NPY_FR_M:
elif in_reso == NPY_DATETIMEUNIT.NPY_FR_M:
# 2629746 comes from dividing the "Y" case by 12.
multiplier = periods_per_second(out_reso)
m = multiplier * 2629746
else:
# Careful: if get_conversion_factor raises, the exception does
# not propagate, instead we get a warning about an ignored exception.
# https://github.com/pandas-dev/pandas/pull/51483#discussion_r1115198951
m = get_conversion_factor(reso, out_reso)
m = get_conversion_factor(in_reso, out_reso)

p = <int>log10(m) # number of digits in 'm' minus 1
return m, p
Expand Down
25 changes: 23 additions & 2 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -4616,7 +4616,28 @@ _lite_rule_alias = {
"ns": "ns",
}

_dont_uppercase = {"h", "bh", "cbh", "MS", "ms", "s", "me", "qe"}
_dont_uppercase = {
"h",
"bh",
"cbh",
"MS",
"ms",
"s",
"me",
"qe",
"qe-dec",
"qe-jan",
"qe-feb",
"qe-mar",
"qe-apr",
"qe-may",
"qe-jun",
"qe-jul",
"qe-aug",
"qe-sep",
"qe-oct",
"qe-nov",
}


INVALID_FREQ_ERR_MSG = "Invalid frequency: {0}"
Expand All @@ -4635,7 +4656,7 @@ def _get_offset(name: str) -> BaseOffset:
--------
_get_offset('EOM') --> BMonthEnd(1)
"""
if name not in _dont_uppercase:
if name.lower() not in _dont_uppercase:
name = name.upper()
name = _lite_rule_alias.get(name, name)
name = _lite_rule_alias.get(name.lower(), name)
Expand Down
4 changes: 3 additions & 1 deletion pandas/_libs/tslibs/strptime.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@ from cpython.datetime cimport (
)
from numpy cimport int64_t

from pandas._libs.tslibs.np_datetime cimport NPY_DATETIMEUNIT

cdef bint parse_today_now(str val, int64_t* iresult, bint utc)

cdef bint parse_today_now(str val, int64_t* iresult, bint utc, NPY_DATETIMEUNIT creso)


cdef class DatetimeParseState:
Expand Down
19 changes: 12 additions & 7 deletions pandas/_libs/tslibs/strptime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -111,22 +111,27 @@ def _test_format_is_iso(f: str) -> bool:
return format_is_iso(f)


cdef bint parse_today_now(str val, int64_t* iresult, bint utc):
cdef bint parse_today_now(
str val, int64_t* iresult, bint utc, NPY_DATETIMEUNIT creso
):
# We delay this check for as long as possible
# because it catches relatively rare cases
cdef:
_Timestamp ts

# Multiply by 1000 to convert to nanos, since these methods naturally have
# microsecond resolution
if val == "now":
if utc:
iresult[0] = Timestamp.utcnow()._value * 1000
ts = <_Timestamp>Timestamp.utcnow()
iresult[0] = ts._as_creso(creso)._value
else:
# GH#18705 make sure to_datetime("now") matches Timestamp("now")
# Note using Timestamp.now() is faster than Timestamp("now")
iresult[0] = Timestamp.now()._value * 1000
ts = <_Timestamp>Timestamp.now()
iresult[0] = ts._as_creso(creso)._value
return True
elif val == "today":
iresult[0] = Timestamp.today()._value * 1000
ts = <_Timestamp>Timestamp.today()
iresult[0] = ts._as_creso(creso)._value
return True
return False

Expand Down Expand Up @@ -363,7 +368,7 @@ def array_strptime(
check_dts_bounds(&dts)
continue

if parse_today_now(val, &iresult[i], utc):
if parse_today_now(val, &iresult[i], utc, NPY_FR_ns):
continue

# Some ISO formats can't be parsed by string_to_dts
Expand Down
4 changes: 1 addition & 3 deletions pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -303,18 +303,16 @@ cdef object ensure_td64ns(object ts):
cdef:
NPY_DATETIMEUNIT td64_unit
int64_t td64_value, mult
str unitstr

td64_unit = get_datetime64_unit(ts)
if (
td64_unit != NPY_DATETIMEUNIT.NPY_FR_ns
and td64_unit != NPY_DATETIMEUNIT.NPY_FR_GENERIC
):
unitstr = npy_unit_to_abbrev(td64_unit)

td64_value = cnp.get_timedelta64_value(ts)

mult = precision_from_unit(unitstr)[0]
mult = precision_from_unit(td64_unit)[0]
try:
# NB: cython#1381 this cannot be *=
td64_value = td64_value * mult
Expand Down
Loading

0 comments on commit d1373df

Please sign in to comment.