From d654823bf3d6b5246467968b588eaa6b4efe1218 Mon Sep 17 00:00:00 2001
From: Flytre <flytre7@gmail.com>
Date: Wed, 29 Nov 2023 10:55:38 -0500
Subject: [PATCH 1/2] Fix -GH 55677:

Added support for the python parser to handle using skiprows and chunk_size options at the same time to ensure API contract is met.

Added a regression test to ensure this bug can be quickly caught in the future if it reappears.

Signed-off-by: Flytre <flytre7@gmail.com>
---
 pandas/io/parsers/python_parser.py      | 21 ++++------
 pandas/tests/io/parser/test_skiprows.py | 56 ++++++++++++++++++-------
 2 files changed, 51 insertions(+), 26 deletions(-)

diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
index fae3293414b02..742d9f6defc76 100644
--- a/pandas/io/parsers/python_parser.py
+++ b/pandas/io/parsers/python_parser.py
@@ -1117,18 +1117,15 @@ def _get_lines(self, rows: int | None = None) -> list[list[Scalar]]:
                 new_rows = []
                 try:
                     if rows is not None:
-                        rows_to_skip = 0
-                        if self.skiprows is not None and self.pos is not None:
-                            # Only read additional rows if pos is in skiprows
-                            rows_to_skip = len(
-                                set(self.skiprows) - set(range(self.pos))
-                            )
-
-                        for _ in range(rows + rows_to_skip):
-                            # assert for mypy, data is Iterator[str] or None, would
-                            # error in next
-                            assert self.data is not None
-                            new_rows.append(next(self.data))
+                        row_index = 0
+                        row_ct = 0
+                        offset = self.pos if self.pos is not None else 0
+                        while row_ct < rows:
+                            new_row = next(self.data)
+                            if not self.skipfunc(offset + row_index):
+                                row_ct += 1
+                            row_index += 1
+                            new_rows.append(new_row)
 
                         len_new_rows = len(new_rows)
                         new_rows = self._remove_skipped_rows(new_rows)
diff --git a/pandas/tests/io/parser/test_skiprows.py b/pandas/tests/io/parser/test_skiprows.py
index 9146af3f969e6..6ef66d2567fe1 100644
--- a/pandas/tests/io/parser/test_skiprows.py
+++ b/pandas/tests/io/parser/test_skiprows.py
@@ -99,11 +99,11 @@ def test_skip_rows_blank(all_parsers):
     [
         (
             """id,text,num_lines
-1,"line 11
-line 12",2
-2,"line 21
-line 22",2
-3,"line 31",1""",
+    1,"line 11
+    line 12",2
+    2,"line 21
+    line 22",2
+    3,"line 31",1""",
             {"skiprows": [1]},
             DataFrame(
                 [[2, "line 21\nline 22", 2], [3, "line 31", 1]],
@@ -156,23 +156,23 @@ def test_skip_row_with_quote(all_parsers):
     [
         (
             """id,text,num_lines
-1,"line \n'11' line 12",2
-2,"line \n'21' line 22",2
-3,"line \n'31' line 32",1""",
+    1,"line \n'11' line 12",2
+    2,"line \n'21' line 22",2
+    3,"line \n'31' line 32",1""",
             [[2, "line \n'21' line 22", 2], [3, "line \n'31' line 32", 1]],
         ),
         (
             """id,text,num_lines
-1,"line '11\n' line 12",2
-2,"line '21\n' line 22",2
-3,"line '31\n' line 32",1""",
+    1,"line '11\n' line 12",2
+    2,"line '21\n' line 22",2
+    3,"line '31\n' line 32",1""",
             [[2, "line '21\n' line 22", 2], [3, "line '31\n' line 32", 1]],
         ),
         (
             """id,text,num_lines
-1,"line '11\n' \r\tline 12",2
-2,"line '21\n' \r\tline 22",2
-3,"line '31\n' \r\tline 32",1""",
+    1,"line '11\n' \r\tline 12",2
+    2,"line '21\n' \r\tline 22",2
+    3,"line '31\n' \r\tline 32",1""",
             [[2, "line '21\n' \r\tline 22", 2], [3, "line '31\n' \r\tline 32", 1]],
         ),
     ],
@@ -301,3 +301,31 @@ def test_skip_rows_and_n_rows(all_parsers):
     result = parser.read_csv(StringIO(data), nrows=5, skiprows=[2, 4, 6])
     expected = DataFrame({"a": [1, 3, 5, 7, 8], "b": ["a", "c", "e", "g", "h"]})
     tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow
+def test_skip_rows_with_chunks(all_parsers):
+    # GH 55677
+    data = """col_a
+10
+20
+30
+40
+50
+60
+70
+80
+90
+100
+"""
+    parser = all_parsers
+    reader = parser.read_csv(
+        StringIO(data), engine=parser, skiprows=lambda x: x in [1, 4, 5], chunksize=4
+    )
+    df1 = next(reader)
+    df2 = next(reader)
+
+    tm.assert_frame_equal(
+        df1, DataFrame({"col_a": [20, 30, 60, 70]}, index=[0, 1, 2, 3])
+    )
+    tm.assert_frame_equal(df2, DataFrame({"col_a": [80, 90, 100]}, index=[4, 5, 6]))

From f11d7a824499bd70283ad369eba183a5d76ad8dd Mon Sep 17 00:00:00 2001
From: Flytre <flytre7@gmail.com>
Date: Wed, 29 Nov 2023 11:14:40 -0500
Subject: [PATCH 2/2] Fix -GH 55677:

Added support for the python parser to handle using skiprows and chunk_size options at the same time to ensure API contract is met.

Added a regression test to ensure this bug can be quickly caught in the future if it reappears.

Signed-off-by: Flytre <flytre7@gmail.com>
---
 doc/source/whatsnew/v2.2.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index efa4a52993a90..08fa594632259 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -407,6 +407,7 @@ MultiIndex
 
 I/O
 ^^^
+- Bug in :func:`read_csv` where ``engine="python"`` was causing a ``TypeError`` when a callable skiprows and a chunk size was specified. (:issue:`55677`)
 - Bug in :func:`read_csv` where ``on_bad_lines="warn"`` would write to ``stderr`` instead of raise a Python warning. This now yields a :class:`.errors.ParserWarning` (:issue:`54296`)
 - Bug in :func:`read_csv` with ``engine="pyarrow"`` where ``usecols`` wasn't working with a csv with no headers (:issue:`54459`)
 - Bug in :func:`read_excel`, with ``engine="xlrd"`` (``xls`` files) erroring when file contains NaNs/Infs (:issue:`54564`)