Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/pandas-dev/pandas into json…
Browse files Browse the repository at this point in the history
…_overflow
  • Loading branch information
gupta-paras committed Oct 12, 2023
2 parents dd3cc48 + c9a98f0 commit 99da098
Show file tree
Hide file tree
Showing 31 changed files with 181 additions and 127 deletions.
14 changes: 7 additions & 7 deletions doc/source/user_guide/timeseries.rst
Original file line number Diff line number Diff line change
Expand Up @@ -896,9 +896,9 @@ into ``freq`` keyword arguments. The available date offsets and associated frequ
:class:`~pandas.tseries.offsets.BQuarterBegin`, ``'BQS'``, "business quarter begin"
:class:`~pandas.tseries.offsets.FY5253Quarter`, ``'REQ'``, "retail (aka 52-53 week) quarter"
:class:`~pandas.tseries.offsets.YearEnd`, ``'Y'``, "calendar year end"
:class:`~pandas.tseries.offsets.YearBegin`, ``'AS'`` or ``'BYS'``,"calendar year begin"
:class:`~pandas.tseries.offsets.BYearEnd`, ``'BA'``, "business year end"
:class:`~pandas.tseries.offsets.BYearBegin`, ``'BAS'``, "business year begin"
:class:`~pandas.tseries.offsets.YearBegin`, ``'YS'`` or ``'BYS'``,"calendar year begin"
:class:`~pandas.tseries.offsets.BYearEnd`, ``'BY'``, "business year end"
:class:`~pandas.tseries.offsets.BYearBegin`, ``'BYS'``, "business year begin"
:class:`~pandas.tseries.offsets.FY5253`, ``'RE'``, "retail (aka 52-53 week) year"
:class:`~pandas.tseries.offsets.Easter`, None, "Easter holiday"
:class:`~pandas.tseries.offsets.BusinessHour`, ``'bh'``, "business hour"
Expand Down Expand Up @@ -1259,9 +1259,9 @@ frequencies. We will refer to these aliases as *offset aliases*.
"QS", "quarter start frequency"
"BQS", "business quarter start frequency"
"Y", "year end frequency"
"BA, BY", "business year end frequency"
"AS, YS", "year start frequency"
"BAS, BYS", "business year start frequency"
"BY", "business year end frequency"
"YS", "year start frequency"
"BYS", "business year start frequency"
"h", "hourly frequency"
"bh", "business hour frequency"
"cbh", "custom business hour frequency"
Expand Down Expand Up @@ -1692,7 +1692,7 @@ the end of the interval.
.. warning::

The default values for ``label`` and ``closed`` is '**left**' for all
frequency offsets except for 'ME', 'Y', 'Q', 'BM', 'BA', 'BQ', and 'W'
frequency offsets except for 'ME', 'Y', 'Q', 'BM', 'BY', 'BQ', and 'W'
which all have a default of 'right'.

This might unintendedly lead to looking ahead, where the value for a later
Expand Down
20 changes: 16 additions & 4 deletions doc/source/whatsnew/v0.20.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -886,11 +886,23 @@ This would happen with a ``lexsorted``, but non-monotonic levels. (:issue:`15622

This is *unchanged* from prior versions, but shown for illustration purposes:

.. ipython:: python
.. code-block:: python
df = pd.DataFrame(np.arange(6), columns=['value'],
index=pd.MultiIndex.from_product([list('BA'), range(3)]))
df
In [81]: df = pd.DataFrame(np.arange(6), columns=['value'],
....: index=pd.MultiIndex.from_product([list('BA'), range(3)]))
....:
In [82]: df
Out[82]:
value
B 0 0
1 1
2 2
A 0 3
1 4
2 5
[6 rows x 1 columns]
.. code-block:: python
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,7 @@ Other
^^^^^
- Bug in :func:`cut` incorrectly allowing cutting of timezone-aware datetimes with timezone-naive bins (:issue:`54964`)
- Bug in :meth:`DataFrame.apply` where passing ``raw=True`` ignored ``args`` passed to the applied function (:issue:`55009`)
- Bug in rendering ``inf`` values inside a a :class:`DataFrame` with the ``use_inf_as_na`` option enabled (:issue:`55483`)
- Bug in rendering a :class:`Series` with a :class:`MultiIndex` when one of the index level's names is 0 not having that name displayed (:issue:`55415`)
-

Expand Down
1 change: 0 additions & 1 deletion pandas/_libs/missing.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,3 @@ def isneginf_scalar(val: object) -> bool: ...
def checknull(val: object, inf_as_na: bool = ...) -> bool: ...
def isnaobj(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ...
def is_numeric_na(values: np.ndarray) -> npt.NDArray[np.bool_]: ...
def is_float_nan(values: np.ndarray) -> npt.NDArray[np.bool_]: ...
25 changes: 0 additions & 25 deletions pandas/_libs/missing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -255,31 +255,6 @@ cdef bint checknull_with_nat_and_na(object obj):
return checknull_with_nat(obj) or obj is C_NA


@cython.wraparound(False)
@cython.boundscheck(False)
def is_float_nan(values: ndarray) -> ndarray:
"""
True for elements which correspond to a float nan

Returns
-------
ndarray[bool]
"""
cdef:
ndarray[uint8_t] result
Py_ssize_t i, N
object val

N = len(values)
result = np.zeros(N, dtype=np.uint8)

for i in range(N):
val = values[i]
if util.is_nan(val):
result[i] = True
return result.view(bool)


@cython.wraparound(False)
@cython.boundscheck(False)
def is_numeric_na(values: ndarray) -> ndarray:
Expand Down
44 changes: 40 additions & 4 deletions pandas/_libs/tslibs/dtypes.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -192,9 +192,6 @@ OFFSET_TO_PERIOD_FREQSTR: dict = {
"BQS": "Q",
"QS": "Q",
"BQ": "Q",
"BA": "Y",
"AS": "Y",
"BAS": "Y",
"MS": "M",
"D": "D",
"B": "B",
Expand All @@ -205,9 +202,9 @@ OFFSET_TO_PERIOD_FREQSTR: dict = {
"ns": "ns",
"h": "h",
"Q": "Q",
"Y": "Y",
"W": "W",
"ME": "M",
"Y": "Y",
"BY": "Y",
"YS": "Y",
"BYS": "Y",
Expand Down Expand Up @@ -244,6 +241,45 @@ DEPR_ABBREVS: dict[str, str]= {
"A-SEP": "Y-SEP",
"A-OCT": "Y-OCT",
"A-NOV": "Y-NOV",
"BA": "BY",
"BA-DEC": "BY-DEC",
"BA-JAN": "BY-JAN",
"BA-FEB": "BY-FEB",
"BA-MAR": "BY-MAR",
"BA-APR": "BY-APR",
"BA-MAY": "BY-MAY",
"BA-JUN": "BY-JUN",
"BA-JUL": "BY-JUL",
"BA-AUG": "BY-AUG",
"BA-SEP": "BY-SEP",
"BA-OCT": "BY-OCT",
"BA-NOV": "BY-NOV",
"AS": "YS",
"AS-DEC": "YS-DEC",
"AS-JAN": "YS-JAN",
"AS-FEB": "YS-FEB",
"AS-MAR": "YS-MAR",
"AS-APR": "YS-APR",
"AS-MAY": "YS-MAY",
"AS-JUN": "YS-JUN",
"AS-JUL": "YS-JUL",
"AS-AUG": "YS-AUG",
"AS-SEP": "YS-SEP",
"AS-OCT": "YS-OCT",
"AS-NOV": "YS-NOV",
"BAS": "BYS",
"BAS-DEC": "BYS-DEC",
"BAS-JAN": "BYS-JAN",
"BAS-FEB": "BYS-FEB",
"BAS-MAR": "BYS-MAR",
"BAS-APR": "BYS-APR",
"BAS-MAY": "BYS-MAY",
"BAS-JUN": "BYS-JUN",
"BAS-JUL": "BYS-JUL",
"BAS-AUG": "BYS-AUG",
"BAS-SEP": "BYS-SEP",
"BAS-OCT": "BYS-OCT",
"BAS-NOV": "BYS-NOV",
"H": "h",
"BH": "bh",
"CBH": "cbh",
Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/tslibs/fields.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -253,8 +253,8 @@ def get_start_end_field(
# month of year. Other offsets use month, startingMonth as ending
# month of year.

if (freqstr[0:2] in ["MS", "QS", "AS"]) or (
freqstr[1:3] in ["MS", "QS", "AS"]):
if (freqstr[0:2] in ["MS", "QS", "YS"]) or (
freqstr[1:3] in ["MS", "QS", "YS"]):
end_month = 12 if month_kw == 1 else month_kw - 1
start_month = month_kw
else:
Expand Down
21 changes: 9 additions & 12 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2414,7 +2414,7 @@ cdef class BYearEnd(YearOffset):

_outputName = "BusinessYearEnd"
_default_month = 12
_prefix = "BA"
_prefix = "BY"
_day_opt = "business_end"


Expand Down Expand Up @@ -2453,7 +2453,7 @@ cdef class BYearBegin(YearOffset):

_outputName = "BusinessYearBegin"
_default_month = 1
_prefix = "BAS"
_prefix = "BYS"
_day_opt = "business_start"


Expand Down Expand Up @@ -2552,7 +2552,7 @@ cdef class YearBegin(YearOffset):
"""

_default_month = 1
_prefix = "AS"
_prefix = "YS"
_day_opt = "start"


Expand Down Expand Up @@ -4540,10 +4540,10 @@ CDay = CustomBusinessDay
prefix_mapping = {
offset._prefix: offset
for offset in [
YearBegin, # 'AS'
YearBegin, # 'YS'
YearEnd, # 'Y'
BYearBegin, # 'BAS'
BYearEnd, # 'BA'
BYearBegin, # 'BYS'
BYearEnd, # 'BY'
BusinessDay, # 'B'
BusinessMonthBegin, # 'BMS'
BusinessMonthEnd, # 'BM'
Expand Down Expand Up @@ -4584,12 +4584,9 @@ _lite_rule_alias = {
"Q": "Q-DEC",

"Y": "Y-DEC", # YearEnd(month=12),
"AS": "AS-JAN", # YearBegin(month=1),
"YS": "AS-JAN",
"BA": "BA-DEC", # BYearEnd(month=12),
"BY": "BA-DEC",
"BAS": "BAS-JAN", # BYearBegin(month=1),
"BYS": "BAS-JAN",
"YS": "YS-JAN", # YearBegin(month=1),
"BY": "BY-DEC", # BYearEnd(month=12),
"BYS": "BYS-JAN", # BYearBegin(month=1),

"Min": "min",
"min": "min",
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -2526,7 +2526,7 @@ def _round_temporally(
raise ValueError(f"Must specify a valid frequency: {freq}")
pa_supported_unit = {
"Y": "year",
"AS": "year",
"YS": "year",
"Q": "quarter",
"QS": "quarter",
"M": "month",
Expand Down
12 changes: 10 additions & 2 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1663,7 +1663,14 @@ def __repr__(self) -> str:
self, self._formatter(), indent_for_name=False
).rstrip(", \n")
class_name = f"<{type(self).__name__}>\n"
return f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}"
footer = self._get_repr_footer()
return f"{class_name}{data}\n{footer}"

def _get_repr_footer(self) -> str:
# GH#24278
if self.ndim > 1:
return f"Shape: {self.shape}, dtype: {self.dtype}"
return f"Length: {len(self)}, dtype: {self.dtype}"

def _repr_2d(self) -> str:
from pandas.io.formats.printing import format_object_summary
Expand All @@ -1679,7 +1686,8 @@ def _repr_2d(self) -> str:
]
data = ",\n".join(lines)
class_name = f"<{type(self).__name__}>"
return f"{class_name}\n[\n{data}\n]\nShape: {self.shape}, dtype: {self.dtype}"
footer = self._get_repr_footer()
return f"{class_name}\n[\n{data}\n]\n{footer}"

def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]:
"""
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2177,7 +2177,7 @@ def _repr_categories(self) -> list[str]:
category_strs = [x.strip() for x in category_strs]
return category_strs

def _repr_categories_info(self) -> str:
def _get_repr_footer(self) -> str:
"""
Returns a string representation of the footer.
"""
Expand Down Expand Up @@ -2229,7 +2229,7 @@ def __repr__(self) -> str:
"""
String representation.
"""
footer = self._repr_categories_info()
footer = self._get_repr_footer()
length = len(self)
max_len = 10
if length > max_len:
Expand Down
13 changes: 2 additions & 11 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
is_datetime_array,
no_default,
)
from pandas._libs.missing import is_float_nan
from pandas._libs.tslibs import (
IncompatibleFrequency,
OutOfBoundsDatetime,
Expand Down Expand Up @@ -1390,16 +1389,8 @@ def _format_with_header(self, *, header: list[str_t], na_rep: str_t) -> list[str

if is_object_dtype(values.dtype) or is_string_dtype(values.dtype):
values = np.asarray(values)
values = lib.maybe_convert_objects(values, safe=True)

result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values]

# could have nans
mask = is_float_nan(values)
if mask.any():
result_arr = np.array(result)
result_arr[mask] = na_rep
result = result_arr.tolist()
# TODO: why do we need different justify for these cases?
result = trim_front(format_array(values, None, justify="all"))
else:
result = trim_front(format_array(values, None, justify="left"))
return header + result
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -992,11 +992,11 @@ def date_range(
**Specify a unit**
>>> pd.date_range(start="2017-01-01", periods=10, freq="100AS", unit="s")
>>> pd.date_range(start="2017-01-01", periods=10, freq="100YS", unit="s")
DatetimeIndex(['2017-01-01', '2117-01-01', '2217-01-01', '2317-01-01',
'2417-01-01', '2517-01-01', '2617-01-01', '2717-01-01',
'2817-01-01', '2917-01-01'],
dtype='datetime64[s]', freq='100AS-JAN')
dtype='datetime64[s]', freq='100YS-JAN')
"""
if freq is None and com.any_none(periods, start, end):
freq = "D"
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -2101,7 +2101,7 @@ def __init__(
else:
freq = to_offset(freq)

end_types = {"ME", "Y", "Q", "BM", "BA", "BQ", "W"}
end_types = {"ME", "Y", "Q", "BM", "BY", "BQ", "W"}
rule = freq.rule_code
if rule in end_types or ("-" in rule and rule[: rule.find("-")] in end_types):
if closed is None:
Expand Down Expand Up @@ -2299,7 +2299,7 @@ def _adjust_bin_edges(

if self.freq.name in ("BM", "ME", "W") or self.freq.name.split("-")[0] in (
"BQ",
"BA",
"BY",
"Q",
"Y",
"W",
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -5729,7 +5729,7 @@ def to_timestamp(
2023-01-01 1
2024-01-01 2
2025-01-01 3
Freq: AS-JAN, dtype: int64
Freq: YS-JAN, dtype: int64
Using `freq` which is the offset that the Timestamps will have
Expand Down
Loading

0 comments on commit 99da098

Please sign in to comment.