Skip to content

Commit

Permalink
Merge branch 'main' into ref-maybe_convert_objects
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed Dec 13, 2023
2 parents 468bc53 + cbe1b32 commit a6434dd
Show file tree
Hide file tree
Showing 25 changed files with 155 additions and 55 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/docbuild-and-upload.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ jobs:
- name: Build Pandas
uses: ./.github/actions/build_pandas

- name: Test website
run: python -m pytest web/

- name: Build website
run: python web/pandas_web.py web/pandas --target-path=web/build

Expand Down
29 changes: 14 additions & 15 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ def array_with_unit_to_datetime(
f"unit='{unit}' not valid with non-numerical val='{val}'"
)

except (ValueError, OutOfBoundsDatetime, TypeError) as err:
except (ValueError, TypeError) as err:
if is_raise:
err.args = (f"{err}, at position {i}",)
raise
Expand Down Expand Up @@ -435,15 +435,15 @@ cpdef array_to_datetime(
Parameters
----------
values : ndarray of object
date-like objects to convert
date-like objects to convert
errors : str, default 'raise'
error behavior when parsing
error behavior when parsing
dayfirst : bool, default False
dayfirst parsing behavior when encountering datetime strings
dayfirst parsing behavior when encountering datetime strings
yearfirst : bool, default False
yearfirst parsing behavior when encountering datetime strings
yearfirst parsing behavior when encountering datetime strings
utc : bool, default False
indicator whether the dates should be UTC
indicator whether the dates should be UTC
creso : NPY_DATETIMEUNIT, default NPY_FR_ns
Set to NPY_FR_GENERIC to infer a resolution.
Expand All @@ -464,7 +464,7 @@ cpdef array_to_datetime(
bint is_ignore = errors == "ignore"
bint is_coerce = errors == "coerce"
bint is_same_offsets
_TSObject _ts
_TSObject tsobj
float tz_offset
set out_tzoffset_vals = set()
tzinfo tz, tz_out = None
Expand Down Expand Up @@ -550,29 +550,28 @@ cpdef array_to_datetime(
creso = state.creso
continue

_ts = convert_str_to_tsobject(
tsobj = convert_str_to_tsobject(
val, None, dayfirst=dayfirst, yearfirst=yearfirst
)

if _ts.value == NPY_NAT:
if tsobj.value == NPY_NAT:
# e.g. "NaT" string or empty string, we do not consider
# this as either tzaware or tznaive. See
# test_to_datetime_with_empty_str_utc_false_format_mixed
# We also do not update resolution inference based on this,
# see test_infer_with_nat_int_float_str
iresult[i] = _ts.value
iresult[i] = tsobj.value
continue

item_reso = _ts.creso
item_reso = tsobj.creso
state.update_creso(item_reso)
if infer_reso:
creso = state.creso

_ts.ensure_reso(creso, val)

iresult[i] = _ts.value
tsobj.ensure_reso(creso, val)
iresult[i] = tsobj.value

tz = _ts.tzinfo
tz = tsobj.tzinfo
if tz is not None:
# dateutil timezone objects cannot be hashed, so
# store the UTC offsets in seconds instead
Expand Down
5 changes: 2 additions & 3 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ from cpython.datetime cimport (
import_datetime()

from pandas._libs.missing cimport checknull_with_nat_and_na
from pandas._libs.tslibs.base cimport ABCTimestamp
from pandas._libs.tslibs.dtypes cimport (
abbrev_to_npy_unit,
get_supported_reso,
Expand Down Expand Up @@ -492,7 +491,7 @@ cdef _TSObject convert_datetime_to_tsobject(
pydatetime_to_dtstruct(ts, &obj.dts)
obj.tzinfo = ts.tzinfo

if isinstance(ts, ABCTimestamp):
if isinstance(ts, _Timestamp):
obj.dts.ps = ts.nanosecond * 1000

if nanos:
Expand Down Expand Up @@ -766,7 +765,7 @@ cpdef inline datetime localize_pydatetime(datetime dt, tzinfo tz):
"""
if tz is None:
return dt
elif isinstance(dt, ABCTimestamp):
elif isinstance(dt, _Timestamp):
return dt.tz_localize(tz)
return _localize_pydatetime(dt, tz)

Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/dtypes.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ OFFSET_TO_PERIOD_FREQSTR: dict[str, str]

def periods_per_day(reso: int = ...) -> int: ...
def periods_per_second(reso: int) -> int: ...
def abbrev_to_npy_unit(abbrev: str) -> int: ...
def abbrev_to_npy_unit(abbrev: str | None) -> int: ...
def freq_to_period_freqstr(freq_n: int, freq_name: str) -> str: ...

class PeriodDtypeBase:
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/np_datetime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,7 @@ cdef int64_t get_conversion_factor(
):
raise ValueError("unit-less resolutions are not supported")
if from_unit > to_unit:
raise ValueError
raise ValueError("from_unit must be <= to_unit")

if from_unit == to_unit:
return 1
Expand Down
8 changes: 4 additions & 4 deletions pandas/_libs/tslibs/strptime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -319,14 +319,14 @@ def array_strptime(
Py_ssize_t i, n = len(values)
npy_datetimestruct dts
int64_t[::1] iresult
object val, tz
object val
bint seen_datetime_offset = False
bint is_raise = errors=="raise"
bint is_ignore = errors=="ignore"
bint is_coerce = errors=="coerce"
bint is_same_offsets
set out_tzoffset_vals = set()
tzinfo tz_out = None
tzinfo tz, tz_out = None
bint iso_format = format_is_iso(fmt)
NPY_DATETIMEUNIT out_bestunit, item_reso
int out_local = 0, out_tzoffset = 0
Expand Down Expand Up @@ -484,7 +484,7 @@ def array_strptime(
tz = None
out_tzoffset_vals.add("naive")

except (ValueError, OutOfBoundsDatetime) as ex:
except ValueError as ex:
ex.args = (
f"{str(ex)}, at position {i}. You might want to try:\n"
" - passing `format` if your strings have a consistent format;\n"
Expand Down Expand Up @@ -1084,7 +1084,7 @@ cdef tzinfo parse_timezone_directive(str z):
cdef:
int hours, minutes, seconds, pad_number, microseconds
int total_minutes
object gmtoff_remainder, gmtoff_remainder_padding
str gmtoff_remainder, gmtoff_remainder_padding

if z == "Z":
return timezone(timedelta(0))
Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -499,9 +499,9 @@ cdef int64_t parse_timedelta_string(str ts) except? -1:
"""

cdef:
unicode c
str c
bint neg = 0, have_dot = 0, have_value = 0, have_hhmmss = 0
object current_unit = None
str current_unit = None
int64_t result = 0, m = 0, r
list number = [], frac = [], unit = []

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1453,7 +1453,7 @@ class NumpyEADtype(ExtensionDtype):

def __init__(self, dtype: npt.DTypeLike | NumpyEADtype | None) -> None:
if isinstance(dtype, NumpyEADtype):
# make constructor univalent
# make constructor idempotent
dtype = dtype.numpy_dtype
self._dtype = np.dtype(dtype)

Expand Down
9 changes: 4 additions & 5 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,6 @@
from pandas.core.arrays.sparse import SparseFrameAccessor
from pandas.core.construction import (
ensure_wrapped_if_datetimelike,
extract_array,
sanitize_array,
sanitize_masked_array,
)
Expand Down Expand Up @@ -8784,11 +8783,11 @@ def combine_first(self, other: DataFrame) -> DataFrame:
"""
from pandas.core.computation import expressions

def combiner(x, y):
mask = extract_array(isna(x))
def combiner(x: Series, y: Series):
mask = x.isna()._values

x_values = extract_array(x, extract_numpy=True)
y_values = extract_array(y, extract_numpy=True)
x_values = x._values
y_values = y._values

# If the column y in other DataFrame is not in first DataFrame,
# just return y_values.
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1201,7 +1201,7 @@ def coerce(values):
values = to_numeric(values, errors=errors)

# prevent overflow in case of int8 or int16
if is_integer_dtype(values):
if is_integer_dtype(values.dtype):
values = values.astype("int64", copy=False)
return values

Expand Down
2 changes: 1 addition & 1 deletion pandas/io/sas/sas7bdat.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def _convert_datetimes(sas_datetimes: pd.Series, unit: str) -> pd.Series:
----------
sas_datetimes : {Series, Sequence[float]}
Dates or datetimes in SAS
unit : {str}
unit : {'d', 's'}
"d" if the floats represent dates, "s" for datetimes
Returns
Expand Down
3 changes: 0 additions & 3 deletions pandas/io/stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,9 +234,6 @@
stata_epoch: Final = datetime(1960, 1, 1)


# TODO: Add typing. As of January 2020 it is not possible to type this function since
# mypy doesn't understand that a Series and an int can be combined using mathematical
# operations. (+, -).
def _stata_elapsed_date_to_datetime_vec(dates: Series, fmt: str) -> Series:
"""
Convert from SIF to datetime. https://www.stata.com/help.cgi?datetime
Expand Down
4 changes: 2 additions & 2 deletions pandas/plotting/_matplotlib/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1495,7 +1495,7 @@ def _is_ts_plot(self) -> bool:
return not self.x_compat and self.use_index and self._use_dynamic_x()

@final
def _use_dynamic_x(self):
def _use_dynamic_x(self) -> bool:
return use_dynamic_x(self._get_ax(0), self.data)

def _make_plot(self, fig: Figure) -> None:
Expand Down Expand Up @@ -1537,7 +1537,7 @@ def _make_plot(self, fig: Figure) -> None:
errors = self._get_errorbars(label=label, index=i)
kwds = dict(kwds, **errors)

label = pprint_thing(label) # .encode('utf-8')
label = pprint_thing(label)
label = self._mark_right_label(label, index=i)
kwds["label"] = label

Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/arithmetic/test_datetime64.py
Original file line number Diff line number Diff line change
Expand Up @@ -1082,6 +1082,8 @@ def test_dt64arr_addsub_intlike(
self, request, dtype, index_or_series_or_array, freq, tz_naive_fixture
):
# GH#19959, GH#19123, GH#19012
# GH#55860 use index_or_series_or_array instead of box_with_array
# bc DataFrame alignment makes it inapplicable
tz = tz_naive_fixture

if freq is None:
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/arithmetic/test_timedelta64.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,7 @@ def test_addition_ops(self):
tdi + Index([1, 2, 3], dtype=np.int64)

# this is a union!
# FIXME: don't leave commented-out
# pytest.raises(TypeError, lambda : Index([1,2,3]) + tdi)

result = tdi + dti # name will be reset
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/numpy_/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def test_constructor_from_string():
assert result == expected


def test_dtype_univalent(any_numpy_dtype):
def test_dtype_idempotent(any_numpy_dtype):
dtype = NumpyEADtype(any_numpy_dtype)

result = NumpyEADtype(dtype)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/string_/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,7 +463,7 @@ def test_min_max_numpy(method, box, dtype, request, arrow_string_storage):
assert result == expected


def test_fillna_args(dtype, request, arrow_string_storage):
def test_fillna_args(dtype, arrow_string_storage):
# GH 37987

arr = pd.array(["a", pd.NA], dtype=dtype)
Expand Down
4 changes: 1 addition & 3 deletions pandas/tests/arrays/test_datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,9 +183,7 @@ def test_take_fill_raises(self, fill_value, arr1d):
arr1d.take([0, 1], allow_fill=True, fill_value=fill_value)

def test_take_fill(self, arr1d):
np.arange(10, dtype="i8") * 24 * 3600 * 10**9

arr = arr1d # self.array_cls(data, freq="D")
arr = arr1d

result = arr.take([-1, 1], allow_fill=True, fill_value=None)
assert result[0] is NaT
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/datetimes/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -959,7 +959,7 @@ def test_dti_tz_constructors(self, tzstr):
for other in [idx2, idx3, idx4]:
tm.assert_index_equal(idx1, other)

def test_dti_construction_univalent(self, unit):
def test_dti_construction_idempotent(self, unit):
rng = date_range(
"03/12/2012 00:00", periods=10, freq="W-FRI", tz="US/Eastern", unit=unit
)
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/io/parser/common/test_data_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
)

skip_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")


@skip_pyarrow
@xfail_pyarrow
def test_read_data_list(all_parsers):
parser = all_parsers
kwargs = {"index_col": 0}
Expand Down
12 changes: 6 additions & 6 deletions pandas/tests/io/sas/test_sas7bdat.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
import pandas as pd
import pandas._testing as tm

from pandas.io.sas.sas7bdat import SAS7BDATReader


@pytest.fixture
def dirpath(datapath):
Expand Down Expand Up @@ -127,8 +129,6 @@ def test_encoding_options(datapath):
pass
tm.assert_frame_equal(df1, df2)

from pandas.io.sas.sas7bdat import SAS7BDATReader

with contextlib.closing(SAS7BDATReader(fname, convert_header_text=False)) as rdr:
df3 = rdr.read()
for x, y in zip(df1.columns, df3.columns):
Expand Down Expand Up @@ -189,10 +189,9 @@ def test_date_time(datapath):
fname, parse_dates=["Date1", "Date2", "DateTime", "DateTimeHi", "Taiw"]
)
# GH 19732: Timestamps imported from sas will incur floating point errors
# 2023-11-16 we don't know the correct "expected" result bc we do not have
# access to SAS to read the sas7bdat file. We are really just testing
# that we are "close". This only seems to be an issue near the
# implementation bounds.
# See GH#56014 for discussion of the correct "expected" results
# We are really just testing that we are "close". This only seems to be
# an issue near the implementation bounds.

df[df.columns[3]] = df.iloc[:, 3].dt.round("us")
df0["Date1"] = df0["Date1"].astype("M8[s]")
Expand Down Expand Up @@ -271,6 +270,7 @@ def test_max_sas_date(datapath):
# NB. max datetime in SAS dataset is 31DEC9999:23:59:59.999
# but this is read as 29DEC9999:23:59:59.998993 by a buggy
# sas7bdat module
# See also GH#56014 for discussion of the correct "expected" results.
fname = datapath("io", "sas", "data", "max_sas_date.sas7bdat")
df = pd.read_sas(fname, encoding="iso-8859-1")

Expand Down
1 change: 1 addition & 0 deletions pandas/tests/io/test_stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ def test_read_dta2(self, datapath):
# datapath("io", "data", "stata", "stata2_113.dta")
# )

# FIXME: don't leave commented-out
# buggy test because of the NaT comparison on certain platforms
# Format 113 test fails since it does not support tc and tC formats
# tm.assert_frame_equal(parsed_113, expected)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -3107,7 +3107,7 @@ class TestDatetimeParsingWrappers:
("Thu Sep 25 2003", datetime(2003, 9, 25)),
("Sep 25 2003", datetime(2003, 9, 25)),
("January 1 2014", datetime(2014, 1, 1)),
# GHE10537
# GH#10537
("2014-06", datetime(2014, 6, 1)),
("06-2014", datetime(2014, 6, 1)),
("2014-6", datetime(2014, 6, 1)),
Expand Down
Loading

0 comments on commit a6434dd

Please sign in to comment.