From b6483895ba16eee25f0d0d5f96406a686057cd00 Mon Sep 17 00:00:00 2001 From: Dominique Garmier Date: Sun, 29 Oct 2023 23:07:37 +0100 Subject: [PATCH 1/8] fixed bug where pd.NA was being cast to NaN during formatting --- pandas/io/formats/format.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 6b201f6bf3cfe..669414cf55ea6 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -696,9 +696,8 @@ def _truncate_vertically(self) -> None: assert self.max_rows_fitted is not None row_num = self.max_rows_fitted // 2 if row_num >= 1: - head = self.tr_frame.iloc[:row_num, :] - tail = self.tr_frame.iloc[-row_num:, :] - self.tr_frame = concat((head, tail)) + dropped_indices = self.tr_frame.index[row_num:-row_num] + self.tr_frame = self.tr_frame.drop(dropped_indices) else: row_num = cast(int, self.max_rows) self.tr_frame = self.tr_frame.iloc[:row_num, :] From 1efa677cda475564882a301e74789b668c0beb2c Mon Sep 17 00:00:00 2001 From: Dominique Garmier Date: Mon, 30 Oct 2023 22:38:39 +0100 Subject: [PATCH 2/8] implementing suggestion by Richard Shadrach --- pandas/io/formats/format.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 669414cf55ea6..70bd92dc083c0 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -696,8 +696,9 @@ def _truncate_vertically(self) -> None: assert self.max_rows_fitted is not None row_num = self.max_rows_fitted // 2 if row_num >= 1: - dropped_indices = self.tr_frame.index[row_num:-row_num] - self.tr_frame = self.tr_frame.drop(dropped_indices) + _len = len(self.tr_frame) + rows = np.arange(_len) + self.tr_frame = self.tr_frame.iloc[(rows < row_num) | (rows >= _len - row_num)] else: row_num = cast(int, self.max_rows) self.tr_frame = self.tr_frame.iloc[:row_num, :] From 1c4d435aea04c5c78873d2d652e5044ac26ff6cc Mon Sep 17 00:00:00 2001 From: Dominique Garmier Date: Tue, 31 Oct 2023 09:05:28 +0100 Subject: [PATCH 3/8] added test passed new test locally --- pandas/tests/io/formats/test_format.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 8901eb99b7612..4a7c8a25a785c 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -171,6 +171,17 @@ def test_repr_truncation(self): with option_context("display.max_colwidth", max_len + 2): assert "..." not in repr(df) + def test_repr_truncation_preserver_na(self): + # https://github.com/pandas-dev/pandas/issues/55630 + with option_context("display.max_rows", 10): + df = DataFrame( + {"A" : [pd.NA for _ in range(100)]} + ) + + r = repr(df) + for row in r.split("\n")[1:-2]: + assert row.endswith("") or row.endswith("...") + def test_max_colwidth_negative_int_raises(self): # Deprecation enforced from: # https://github.com/pandas-dev/pandas/issues/31532 From 331ea63ef38117f23293348061d8142236b898ec Mon Sep 17 00:00:00 2001 From: Dominique Garmier Date: Tue, 31 Oct 2023 09:08:08 +0100 Subject: [PATCH 4/8] typo --- pandas/tests/io/formats/test_format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 4a7c8a25a785c..fc66e9b29bff7 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -171,7 +171,7 @@ def test_repr_truncation(self): with option_context("display.max_colwidth", max_len + 2): assert "..." not in repr(df) - def test_repr_truncation_preserver_na(self): + def test_repr_truncation_preserves_na(self): # https://github.com/pandas-dev/pandas/issues/55630 with option_context("display.max_rows", 10): df = DataFrame( From b23a481f1c8705560fa51794ebe0597b7ee53042 Mon Sep 17 00:00:00 2001 From: Dominique Garmier Date: Tue, 31 Oct 2023 21:48:37 +0100 Subject: [PATCH 5/8] running pre-commit --- pandas/io/formats/format.py | 4 +++- pandas/tests/io/formats/test_format.py | 6 ++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 70bd92dc083c0..2408d188aa80d 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -698,7 +698,9 @@ def _truncate_vertically(self) -> None: if row_num >= 1: _len = len(self.tr_frame) rows = np.arange(_len) - self.tr_frame = self.tr_frame.iloc[(rows < row_num) | (rows >= _len - row_num)] + self.tr_frame = self.tr_frame.iloc[ + (rows < row_num) | (rows >= _len - row_num) + ] else: row_num = cast(int, self.max_rows) self.tr_frame = self.tr_frame.iloc[:row_num, :] diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index fc66e9b29bff7..71b3e98a820a3 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -174,13 +174,11 @@ def test_repr_truncation(self): def test_repr_truncation_preserves_na(self): # https://github.com/pandas-dev/pandas/issues/55630 with option_context("display.max_rows", 10): - df = DataFrame( - {"A" : [pd.NA for _ in range(100)]} - ) + df = DataFrame({"A": [pd.NA for _ in range(100)]}) r = repr(df) for row in r.split("\n")[1:-2]: - assert row.endswith("") or row.endswith("...") + assert row.endswith(("", "...")) def test_max_colwidth_negative_int_raises(self): # Deprecation enforced from: From 6017dcc70757770628cad260f914bf073165c1e4 Mon Sep 17 00:00:00 2001 From: Dominique Garmier Date: Wed, 1 Nov 2023 13:33:09 +0100 Subject: [PATCH 6/8] this is more efficient --- pandas/io/formats/format.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 2408d188aa80d..9018b90e0b000 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -15,6 +15,7 @@ from decimal import Decimal from functools import partial from io import StringIO +from itertools import chain import math import re from shutil import get_terminal_size @@ -697,9 +698,8 @@ def _truncate_vertically(self) -> None: row_num = self.max_rows_fitted // 2 if row_num >= 1: _len = len(self.tr_frame) - rows = np.arange(_len) self.tr_frame = self.tr_frame.iloc[ - (rows < row_num) | (rows >= _len - row_num) + chain(range(row_num), range(_len - row_num, _len)) ] else: row_num = cast(int, self.max_rows) From 722abaa517bf379c213ec082dc3ae5316d0f76bd Mon Sep 17 00:00:00 2001 From: Dominique Garmier Date: Thu, 2 Nov 2023 09:20:59 +0100 Subject: [PATCH 7/8] implemented using numpy --- pandas/io/formats/format.py | 6 ++---- pandas/tests/io/formats/test_format.py | 9 +++------ 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 9018b90e0b000..8c763cbab41b3 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -15,7 +15,6 @@ from decimal import Decimal from functools import partial from io import StringIO -from itertools import chain import math import re from shutil import get_terminal_size @@ -698,9 +697,8 @@ def _truncate_vertically(self) -> None: row_num = self.max_rows_fitted // 2 if row_num >= 1: _len = len(self.tr_frame) - self.tr_frame = self.tr_frame.iloc[ - chain(range(row_num), range(_len - row_num, _len)) - ] + _slice = np.hstack([np.arange(row_num), np.arange(_len - row_num, _len)]) + self.tr_frame = self.tr_frame.iloc[_slice] else: row_num = cast(int, self.max_rows) self.tr_frame = self.tr_frame.iloc[:row_num, :] diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 71b3e98a820a3..fb94f9776f309 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -173,12 +173,9 @@ def test_repr_truncation(self): def test_repr_truncation_preserves_na(self): # https://github.com/pandas-dev/pandas/issues/55630 - with option_context("display.max_rows", 10): - df = DataFrame({"A": [pd.NA for _ in range(100)]}) - - r = repr(df) - for row in r.split("\n")[1:-2]: - assert row.endswith(("", "...")) + df = DataFrame({"a": [pd.NA for _ in range(100)]}) + with option_context("display.max_rows", 2, "display.show_dimensions", False): + assert repr(df) == (" a\n0 \n.. ...\n99 ") def test_max_colwidth_negative_int_raises(self): # Deprecation enforced from: From bf0d1b1647b0b10e6092a601b712a0a6e3d5034a Mon Sep 17 00:00:00 2001 From: Dominique Garmier Date: Fri, 3 Nov 2023 21:02:22 +0100 Subject: [PATCH 8/8] changed test size from 100 to 10 --- pandas/tests/io/formats/test_format.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index d57adf5c93647..218e371366456 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -173,9 +173,9 @@ def test_repr_truncation(self): def test_repr_truncation_preserves_na(self): # https://github.com/pandas-dev/pandas/issues/55630 - df = DataFrame({"a": [pd.NA for _ in range(100)]}) + df = DataFrame({"a": [pd.NA for _ in range(10)]}) with option_context("display.max_rows", 2, "display.show_dimensions", False): - assert repr(df) == (" a\n0 \n.. ...\n99 ") + assert repr(df) == " a\n0 \n.. ...\n9 " def test_max_colwidth_negative_int_raises(self): # Deprecation enforced from: