Skip to content

Commit

Permalink
BUG: concat should keep series names unless ignore_index=True (#5…
Browse files Browse the repository at this point in the history
…6365)

* Keep series names when not ignoring them

* Split test into two shorter tests

* whatsnew

* tolist

* Split test for concat on index
  • Loading branch information
rob-sil authored Dec 9, 2023
1 parent 23c20de commit 71a3e3c
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 14 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,7 @@ Groupby/resample/rolling
Reshaping
^^^^^^^^^
- Bug in :func:`concat` ignoring ``sort`` parameter when passed :class:`DatetimeIndex` indexes (:issue:`54769`)
- Bug in :func:`concat` renaming :class:`Series` when ``ignore_index=False`` (:issue:`15047`)
- Bug in :func:`merge_asof` raising ``TypeError`` when ``by`` dtype is not ``object``, ``int64``, or ``uint64`` (:issue:`22794`)
- Bug in :func:`merge` returning columns in incorrect order when left and/or right is empty (:issue:`51929`)
- Bug in :meth:`DataFrame.melt` where an exception was raised if ``var_name`` was not a string (:issue:`55948`)
Expand Down
22 changes: 12 additions & 10 deletions pandas/core/reshape/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,7 @@ def __init__(
# if we have mixed ndims, then convert to highest ndim
# creating column numbers as needed
if len(ndims) > 1:
objs, sample = self._sanitize_mixed_ndim(objs, sample, ignore_index, axis)
objs = self._sanitize_mixed_ndim(objs, sample, ignore_index, axis)

self.objs = objs

Expand Down Expand Up @@ -580,7 +580,7 @@ def _sanitize_mixed_ndim(
sample: Series | DataFrame,
ignore_index: bool,
axis: AxisInt,
) -> tuple[list[Series | DataFrame], Series | DataFrame]:
) -> list[Series | DataFrame]:
# if we have mixed ndims, then convert to highest ndim
# creating column numbers as needed

Expand All @@ -601,19 +601,21 @@ def _sanitize_mixed_ndim(
else:
name = getattr(obj, "name", None)
if ignore_index or name is None:
name = current_column
current_column += 1

# doing a row-wise concatenation so need everything
# to line up
if self._is_frame and axis == 1:
name = 0
if axis == 1:
# doing a row-wise concatenation so need everything
# to line up
name = 0
else:
# doing a column-wise concatenation so need series
# to have unique names
name = current_column
current_column += 1

obj = sample._constructor({name: obj}, copy=False)

new_objs.append(obj)

return new_objs, sample
return new_objs

def get_result(self):
cons: Callable[..., DataFrame | Series]
Expand Down
35 changes: 31 additions & 4 deletions pandas/tests/reshape/concat/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,11 +267,10 @@ def test_with_mixed_tuples(self, sort):
# it works
concat([df1, df2], sort=sort)

def test_concat_mixed_objs(self):
# concat mixed series/frames
def test_concat_mixed_objs_columns(self):
# Test column-wise concat for mixed series/frames (axis=1)
# G2385

# axis 1
index = date_range("01-Jan-2013", periods=10, freq="h")
arr = np.arange(10, dtype="int64")
s1 = Series(arr, index=index)
Expand Down Expand Up @@ -324,13 +323,41 @@ def test_concat_mixed_objs(self):
result = concat([s1, df, s2], axis=1, ignore_index=True)
tm.assert_frame_equal(result, expected)

# axis 0
def test_concat_mixed_objs_index(self):
# Test row-wise concat for mixed series/frames with a common name
# GH2385, GH15047

index = date_range("01-Jan-2013", periods=10, freq="h")
arr = np.arange(10, dtype="int64")
s1 = Series(arr, index=index)
s2 = Series(arr, index=index)
df = DataFrame(arr.reshape(-1, 1), index=index)

expected = DataFrame(
np.tile(arr, 3).reshape(-1, 1), index=index.tolist() * 3, columns=[0]
)
result = concat([s1, df, s2])
tm.assert_frame_equal(result, expected)

def test_concat_mixed_objs_index_names(self):
# Test row-wise concat for mixed series/frames with distinct names
# GH2385, GH15047

index = date_range("01-Jan-2013", periods=10, freq="h")
arr = np.arange(10, dtype="int64")
s1 = Series(arr, index=index, name="foo")
s2 = Series(arr, index=index, name="bar")
df = DataFrame(arr.reshape(-1, 1), index=index)

expected = DataFrame(
np.kron(np.where(np.identity(3) == 1, 1, np.nan), arr).T,
index=index.tolist() * 3,
columns=["foo", 0, "bar"],
)
result = concat([s1, df, s2])
tm.assert_frame_equal(result, expected)

# Rename all series to 0 when ignore_index=True
expected = DataFrame(np.tile(arr, 3).reshape(-1, 1), columns=[0])
result = concat([s1, df, s2], ignore_index=True)
tm.assert_frame_equal(result, expected)
Expand Down

0 comments on commit 71a3e3c

Please sign in to comment.