Skip to content

Commit

Permalink
REF: share Index/Block get_values_for_csv (#55485)
Browse files Browse the repository at this point in the history
* REF: share Index/Block get_values_for_csv

* mypy fixup
  • Loading branch information
jbrockmendel authored Oct 13, 2023
1 parent e1368cf commit abba4e2
Show file tree
Hide file tree
Showing 13 changed files with 158 additions and 148 deletions.
154 changes: 127 additions & 27 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
algos as libalgos,
index as libindex,
lib,
writers,
)
from pandas._libs.internals import BlockValuesRefs
import pandas._libs.join as libjoin
Expand Down Expand Up @@ -97,7 +98,6 @@
is_bool_dtype,
is_ea_or_datetimelike_dtype,
is_float,
is_float_dtype,
is_hashable,
is_integer,
is_iterator,
Expand All @@ -119,6 +119,7 @@
ExtensionDtype,
IntervalDtype,
PeriodDtype,
SparseDtype,
)
from pandas.core.dtypes.generic import (
ABCDataFrame,
Expand Down Expand Up @@ -151,7 +152,9 @@
ArrowExtensionArray,
BaseMaskedArray,
Categorical,
DatetimeArray,
ExtensionArray,
TimedeltaArray,
)
from pandas.core.arrays.string_ import StringArray
from pandas.core.base import (
Expand Down Expand Up @@ -199,7 +202,10 @@
MultiIndex,
Series,
)
from pandas.core.arrays import PeriodArray
from pandas.core.arrays import (
IntervalArray,
PeriodArray,
)

__all__ = ["Index"]

Expand Down Expand Up @@ -1403,7 +1409,7 @@ def _format_with_header(self, *, header: list[str_t], na_rep: str_t) -> list[str
result = trim_front(formatted)
return header + result

def _format_native_types(
def _get_values_for_csv(
self,
*,
na_rep: str_t = "",
Expand All @@ -1412,30 +1418,14 @@ def _format_native_types(
date_format=None,
quoting=None,
) -> npt.NDArray[np.object_]:
"""
Actually format specific types of the index.
"""
from pandas.io.formats.format import FloatArrayFormatter

if is_float_dtype(self.dtype) and not isinstance(self.dtype, ExtensionDtype):
formatter = FloatArrayFormatter(
self._values,
na_rep=na_rep,
float_format=float_format,
decimal=decimal,
quoting=quoting,
fixed_width=False,
)
return formatter.get_result_as_array()

mask = isna(self)
if self.dtype != object and not quoting:
values = np.asarray(self).astype(str)
else:
values = np.array(self, dtype=object, copy=True)

values[mask] = na_rep
return values
return get_values_for_csv(
self._values,
na_rep=na_rep,
decimal=decimal,
float_format=float_format,
date_format=date_format,
quoting=quoting,
)

def _summary(self, name=None) -> str_t:
"""
Expand Down Expand Up @@ -7629,3 +7619,113 @@ def _maybe_try_sort(result: Index | ArrayLike, sort: bool | None):
stacklevel=find_stack_level(),
)
return result


def get_values_for_csv(
values: ArrayLike,
*,
date_format,
na_rep: str = "nan",
quoting=None,
float_format=None,
decimal: str = ".",
) -> npt.NDArray[np.object_]:
"""
Convert to types which can be consumed by the standard library's
csv.writer.writerows.
"""
if isinstance(values, Categorical) and values.categories.dtype.kind in "Mm":
# GH#40754 Convert categorical datetimes to datetime array
values = algos.take_nd(
values.categories._values,
ensure_platform_int(values._codes),
fill_value=na_rep,
)

values = ensure_wrapped_if_datetimelike(values)

if isinstance(values, (DatetimeArray, TimedeltaArray)):
if values.ndim == 1:
result = values._format_native_types(na_rep=na_rep, date_format=date_format)
result = result.astype(object, copy=False)
return result

# GH#21734 Process every column separately, they might have different formats
results_converted = []
for i in range(len(values)):
result = values[i, :]._format_native_types(
na_rep=na_rep, date_format=date_format
)
results_converted.append(result.astype(object, copy=False))
return np.vstack(results_converted)

elif isinstance(values.dtype, PeriodDtype):
# TODO: tests that get here in column path
values = cast("PeriodArray", values)
res = values._format_native_types(na_rep=na_rep, date_format=date_format)
return res

elif isinstance(values.dtype, IntervalDtype):
# TODO: tests that get here in column path
values = cast("IntervalArray", values)
mask = values.isna()
if not quoting:
result = np.asarray(values).astype(str)
else:
result = np.array(values, dtype=object, copy=True)

result[mask] = na_rep
return result

elif values.dtype.kind == "f" and not isinstance(values.dtype, SparseDtype):
# see GH#13418: no special formatting is desired at the
# output (important for appropriate 'quoting' behaviour),
# so do not pass it through the FloatArrayFormatter
if float_format is None and decimal == ".":
mask = isna(values)

if not quoting:
values = values.astype(str)
else:
values = np.array(values, dtype="object")

values[mask] = na_rep
values = values.astype(object, copy=False)
return values

from pandas.io.formats.format import FloatArrayFormatter

formatter = FloatArrayFormatter(
values,
na_rep=na_rep,
float_format=float_format,
decimal=decimal,
quoting=quoting,
fixed_width=False,
)
res = formatter.get_result_as_array()
res = res.astype(object, copy=False)
return res

elif isinstance(values, ExtensionArray):
mask = isna(values)

new_values = np.asarray(values.astype(object))
new_values[mask] = na_rep
return new_values

else:
mask = isna(values)
itemsize = writers.word_len(na_rep)

if values.dtype != _dtype_obj and not quoting and itemsize:
values = values.astype(str)
if values.dtype.itemsize / np.dtype("U1").itemsize < itemsize:
# enlarge for the na_rep
values = values.astype(f"<U{itemsize}")
else:
values = np.array(values, dtype="object")

values[mask] = na_rep
values = values.astype(object, copy=False)
return values
2 changes: 1 addition & 1 deletion pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ def _format_with_header(
# TODO: not reached in tests 2023-10-11
# matches base class except for whitespace padding and date_format
return header + list(
self._format_native_types(na_rep=na_rep, date_format=date_format)
self._get_values_for_csv(na_rep=na_rep, date_format=date_format)
)

@property
Expand Down
1 change: 0 additions & 1 deletion pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@ def _new_DatetimeIndex(cls, d):
"tzinfo",
"dtype",
"to_pydatetime",
"_format_native_types",
"date",
"time",
"timetz",
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1384,15 +1384,15 @@ def _formatter_func(self, tup):
formatter_funcs = [level._formatter_func for level in self.levels]
return tuple(func(val) for func, val in zip(formatter_funcs, tup))

def _format_native_types(
def _get_values_for_csv(
self, *, na_rep: str = "nan", **kwargs
) -> npt.NDArray[np.object_]:
new_levels = []
new_codes = []

# go through the levels and format them
for level, level_codes in zip(self.levels, self.codes):
level_strs = level._format_native_types(na_rep=na_rep, **kwargs)
level_strs = level._get_values_for_csv(na_rep=na_rep, **kwargs)
# add nan values, if there are any
mask = level_codes == -1
if mask.any():
Expand All @@ -1408,7 +1408,7 @@ def _format_native_types(

if len(new_levels) == 1:
# a single-level multi-index
return Index(new_levels[0].take(new_codes[0]))._format_native_types()
return Index(new_levels[0].take(new_codes[0]))._get_values_for_csv()
else:
# reconstruct the multi-index
mi = MultiIndex(
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def _new_PeriodIndex(cls, **d):
PeriodArray,
wrap=True,
)
@inherit_names(["is_leap_year", "_format_native_types"], PeriodArray)
@inherit_names(["is_leap_year"], PeriodArray)
class PeriodIndex(DatetimeIndexOpsMixin):
"""
Immutable ndarray holding ordinal values indicating regular periods in time.
Expand Down
1 change: 0 additions & 1 deletion pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@
"sum",
"std",
"median",
"_format_native_types",
],
TimedeltaArray,
)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
Index,
ensure_index,
)
from pandas.core.indexes.base import get_values_for_csv
from pandas.core.internals.base import (
DataManager,
SingleDataManager,
Expand All @@ -79,7 +80,6 @@
ensure_block_shape,
external_values,
extract_pandas_array,
get_values_for_csv,
maybe_coerce_values,
new_block,
)
Expand Down
Loading

0 comments on commit abba4e2

Please sign in to comment.