From 71a3e3c12abd934159f43fc4d925803e063c8c6d Mon Sep 17 00:00:00 2001 From: Rob <124158982+rob-sil@users.noreply.github.com> Date: Sat, 9 Dec 2023 13:07:44 -0600 Subject: [PATCH] BUG: `concat` should keep series names unless `ignore_index=True` (#56365) * Keep series names when not ignoring them * Split test into two shorter tests * whatsnew * tolist * Split test for concat on index --- doc/source/whatsnew/v2.2.0.rst | 1 + pandas/core/reshape/concat.py | 22 +++++++------- pandas/tests/reshape/concat/test_concat.py | 35 +++++++++++++++++++--- 3 files changed, 44 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 49c28e3dc5d85..ffefb9f41fb56 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -661,6 +661,7 @@ Groupby/resample/rolling Reshaping ^^^^^^^^^ - Bug in :func:`concat` ignoring ``sort`` parameter when passed :class:`DatetimeIndex` indexes (:issue:`54769`) +- Bug in :func:`concat` renaming :class:`Series` when ``ignore_index=False`` (:issue:`15047`) - Bug in :func:`merge_asof` raising ``TypeError`` when ``by`` dtype is not ``object``, ``int64``, or ``uint64`` (:issue:`22794`) - Bug in :func:`merge` returning columns in incorrect order when left and/or right is empty (:issue:`51929`) - Bug in :meth:`DataFrame.melt` where an exception was raised if ``var_name`` was not a string (:issue:`55948`) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 1bc548de91f01..d46348fff7a02 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -464,7 +464,7 @@ def __init__( # if we have mixed ndims, then convert to highest ndim # creating column numbers as needed if len(ndims) > 1: - objs, sample = self._sanitize_mixed_ndim(objs, sample, ignore_index, axis) + objs = self._sanitize_mixed_ndim(objs, sample, ignore_index, axis) self.objs = objs @@ -580,7 +580,7 @@ def _sanitize_mixed_ndim( sample: Series | DataFrame, ignore_index: bool, axis: AxisInt, - ) -> tuple[list[Series | DataFrame], Series | DataFrame]: + ) -> list[Series | DataFrame]: # if we have mixed ndims, then convert to highest ndim # creating column numbers as needed @@ -601,19 +601,21 @@ def _sanitize_mixed_ndim( else: name = getattr(obj, "name", None) if ignore_index or name is None: - name = current_column - current_column += 1 - - # doing a row-wise concatenation so need everything - # to line up - if self._is_frame and axis == 1: - name = 0 + if axis == 1: + # doing a row-wise concatenation so need everything + # to line up + name = 0 + else: + # doing a column-wise concatenation so need series + # to have unique names + name = current_column + current_column += 1 obj = sample._constructor({name: obj}, copy=False) new_objs.append(obj) - return new_objs, sample + return new_objs def get_result(self): cons: Callable[..., DataFrame | Series] diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index ea0d510d2b8f8..9e34d02091e69 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -267,11 +267,10 @@ def test_with_mixed_tuples(self, sort): # it works concat([df1, df2], sort=sort) - def test_concat_mixed_objs(self): - # concat mixed series/frames + def test_concat_mixed_objs_columns(self): + # Test column-wise concat for mixed series/frames (axis=1) # G2385 - # axis 1 index = date_range("01-Jan-2013", periods=10, freq="h") arr = np.arange(10, dtype="int64") s1 = Series(arr, index=index) @@ -324,13 +323,41 @@ def test_concat_mixed_objs(self): result = concat([s1, df, s2], axis=1, ignore_index=True) tm.assert_frame_equal(result, expected) - # axis 0 + def test_concat_mixed_objs_index(self): + # Test row-wise concat for mixed series/frames with a common name + # GH2385, GH15047 + + index = date_range("01-Jan-2013", periods=10, freq="h") + arr = np.arange(10, dtype="int64") + s1 = Series(arr, index=index) + s2 = Series(arr, index=index) + df = DataFrame(arr.reshape(-1, 1), index=index) + expected = DataFrame( np.tile(arr, 3).reshape(-1, 1), index=index.tolist() * 3, columns=[0] ) result = concat([s1, df, s2]) tm.assert_frame_equal(result, expected) + def test_concat_mixed_objs_index_names(self): + # Test row-wise concat for mixed series/frames with distinct names + # GH2385, GH15047 + + index = date_range("01-Jan-2013", periods=10, freq="h") + arr = np.arange(10, dtype="int64") + s1 = Series(arr, index=index, name="foo") + s2 = Series(arr, index=index, name="bar") + df = DataFrame(arr.reshape(-1, 1), index=index) + + expected = DataFrame( + np.kron(np.where(np.identity(3) == 1, 1, np.nan), arr).T, + index=index.tolist() * 3, + columns=["foo", 0, "bar"], + ) + result = concat([s1, df, s2]) + tm.assert_frame_equal(result, expected) + + # Rename all series to 0 when ignore_index=True expected = DataFrame(np.tile(arr, 3).reshape(-1, 1), columns=[0]) result = concat([s1, df, s2], ignore_index=True) tm.assert_frame_equal(result, expected)