From 994cab4a10957b066de8d9582a41b77e3697dfd7 Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Wed, 13 Nov 2024 16:02:27 -0500 Subject: [PATCH] BUG: Don't merge Excel cells to a single row with merge_cells=False (#60293) * BUG: Don't merge Excel cells to a single row with merge_cells=False * Cleanup comment * Improve test --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/io/formats/excel.py | 66 +++++++++------------------ pandas/tests/io/excel/test_writers.py | 46 ++++++++++++++----- 3 files changed, 57 insertions(+), 56 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index de69166b8c196..f2c4f85a50ec3 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -702,6 +702,7 @@ I/O - Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`) - Bug in :meth:`read_stata` where extreme value integers were incorrectly interpreted as missing for format versions 111 and prior (:issue:`58130`) - Bug in :meth:`read_stata` where the missing code for double was not recognised for format versions 105 and prior (:issue:`58149`) +- Bug in :meth:`to_excel` where :class:`MultiIndex` columns would be merged to a single row when ``merge_cells=False`` is passed (:issue:`60274`) Period ^^^^^^ diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 52b5755558900..6a3e215de3f96 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -48,7 +48,6 @@ CSSWarning, ) from pandas.io.formats.format import get_level_lengths -from pandas.io.formats.printing import pprint_thing if TYPE_CHECKING: from pandas._typing import ( @@ -620,9 +619,8 @@ def _format_header_mi(self) -> Iterable[ExcelCell]: return columns = self.columns - level_strs = columns._format_multi( - sparsify=self.merge_cells in {True, "columns"}, include_names=False - ) + merge_columns = self.merge_cells in {True, "columns"} + level_strs = columns._format_multi(sparsify=merge_columns, include_names=False) level_lengths = get_level_lengths(level_strs) coloffset = 0 lnum = 0 @@ -630,51 +628,34 @@ def _format_header_mi(self) -> Iterable[ExcelCell]: if self.index and isinstance(self.df.index, MultiIndex): coloffset = self.df.index.nlevels - 1 - if self.merge_cells in {True, "columns"}: - # Format multi-index as a merged cells. - for lnum, name in enumerate(columns.names): - yield ExcelCell( - row=lnum, - col=coloffset, - val=name, - style=None, - ) + for lnum, name in enumerate(columns.names): + yield ExcelCell( + row=lnum, + col=coloffset, + val=name, + style=None, + ) - for lnum, (spans, levels, level_codes) in enumerate( - zip(level_lengths, columns.levels, columns.codes) - ): - values = levels.take(level_codes) - for i, span_val in spans.items(): - mergestart, mergeend = None, None - if span_val > 1: - mergestart, mergeend = lnum, coloffset + i + span_val - yield CssExcelCell( - row=lnum, - col=coloffset + i + 1, - val=values[i], - style=None, - css_styles=getattr(self.styler, "ctx_columns", None), - css_row=lnum, - css_col=i, - css_converter=self.style_converter, - mergestart=mergestart, - mergeend=mergeend, - ) - else: - # Format in legacy format with dots to indicate levels. - for i, values in enumerate(zip(*level_strs)): - v = ".".join(map(pprint_thing, values)) + for lnum, (spans, levels, level_codes) in enumerate( + zip(level_lengths, columns.levels, columns.codes) + ): + values = levels.take(level_codes) + for i, span_val in spans.items(): + mergestart, mergeend = None, None + if merge_columns and span_val > 1: + mergestart, mergeend = lnum, coloffset + i + span_val yield CssExcelCell( row=lnum, col=coloffset + i + 1, - val=v, + val=values[i], style=None, css_styles=getattr(self.styler, "ctx_columns", None), css_row=lnum, css_col=i, css_converter=self.style_converter, + mergestart=mergestart, + mergeend=mergeend, ) - self.rowcounter = lnum def _format_header_regular(self) -> Iterable[ExcelCell]: @@ -798,11 +779,8 @@ def _format_hierarchical_rows(self) -> Iterable[ExcelCell]: # MultiIndex columns require an extra row # with index names (blank if None) for - # unambiguous round-trip, unless not merging, - # in which case the names all go on one row Issue #11328 - if isinstance(self.columns, MultiIndex) and ( - self.merge_cells in {True, "columns"} - ): + # unambiguous round-trip, Issue #11328 + if isinstance(self.columns, MultiIndex): self.rowcounter += 1 # if index labels are not empty go ahead and dump diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 81aa0be24bffc..051aa1f386d92 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -870,27 +870,49 @@ def test_to_excel_multiindex_nan_label(self, merge_cells, tmp_excel): # Test for Issue 11328. If column indices are integers, make # sure they are handled correctly for either setting of # merge_cells - def test_to_excel_multiindex_cols(self, merge_cells, frame, tmp_excel): + def test_to_excel_multiindex_cols(self, merge_cells, tmp_excel): + # GH#11328 + frame = DataFrame( + { + "A": [1, 2, 3], + "B": [4, 5, 6], + "C": [7, 8, 9], + } + ) arrays = np.arange(len(frame.index) * 2, dtype=np.int64).reshape(2, -1) new_index = MultiIndex.from_arrays(arrays, names=["first", "second"]) frame.index = new_index - new_cols_index = MultiIndex.from_tuples([(40, 1), (40, 2), (50, 1), (50, 2)]) + new_cols_index = MultiIndex.from_tuples([(40, 1), (40, 2), (50, 1)]) frame.columns = new_cols_index - header = [0, 1] - if not merge_cells: - header = 0 - - # round trip frame.to_excel(tmp_excel, sheet_name="test1", merge_cells=merge_cells) + + # Check round trip + with ExcelFile(tmp_excel) as reader: + result = pd.read_excel( + reader, sheet_name="test1", header=[0, 1], index_col=[0, 1] + ) + tm.assert_frame_equal(result, frame) + + # GH#60274 + # Check with header/index_col None to determine which cells were merged with ExcelFile(tmp_excel) as reader: - df = pd.read_excel( - reader, sheet_name="test1", header=header, index_col=[0, 1] + result = pd.read_excel( + reader, sheet_name="test1", header=None, index_col=None ) + expected = DataFrame( + { + 0: [np.nan, np.nan, "first", 0, 1, 2], + 1: [np.nan, np.nan, "second", 3, 4, 5], + 2: [40.0, 1.0, np.nan, 1.0, 2.0, 3.0], + 3: [np.nan, 2.0, np.nan, 4.0, 5.0, 6.0], + 4: [50.0, 1.0, np.nan, 7.0, 8.0, 9.0], + } + ) if not merge_cells: - fm = frame.columns._format_multi(sparsify=False, include_names=False) - frame.columns = [".".join(map(str, q)) for q in zip(*fm)] - tm.assert_frame_equal(frame, df) + # MultiIndex column value is repeated + expected.loc[0, 3] = 40.0 + tm.assert_frame_equal(result, expected) def test_to_excel_multiindex_dates(self, merge_cells, tmp_excel): # try multiindex with dates