From bb9e2e96404390bd2d2888e5275ce39b07aa55f5 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Fri, 3 Nov 2023 07:57:12 +0100 Subject: [PATCH] ENH: Allow passing `read_only`, `data_only` and `keep_links` arguments to openpyxl using `engine_kwargs` Previously it was not possible to override the default values for `openpyxl.reader.excel.load_workbook`'s `read_only`, `data_only` and `keep_links` arguments (see #55027). Now these options can be changed via `engine_kwargs`. Closes #55027 --- doc/source/whatsnew/v2.2.0.rst | 1 + pandas/io/excel/_openpyxl.py | 10 +++++++--- pandas/tests/io/excel/test_openpyxl.py | 15 +++++++++++++++ 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 26b5705a1f3db6..3893f4653ac1b6 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -81,6 +81,7 @@ Other enhancements - :func:`tseries.api.guess_datetime_format` is now part of the public API (:issue:`54727`) - :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`) - :meth:`ExtensionArray.duplicated` added to allow extension type implementations of the ``duplicated`` method (:issue:`55255`) +- Allow passing ``read_only``, ``data_only`` and ``keep_links`` arguments to openpyxl using ``engine_kwargs`` of :func:`read_excel` (:issue:`55027`) - DataFrame.apply now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`) - Implement masked algorithms for :meth:`Series.value_counts` (:issue:`54984`) diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index ca7e84f7d64766..81eec2f06cd9a5 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -567,11 +567,15 @@ def load_workbook( ) -> Workbook: from openpyxl import load_workbook + if engine_kwargs is None: + engine_kwargs = {} + + engine_kwargs.setdefault("read_only", True) + engine_kwargs.setdefault("data_only", True) + engine_kwargs.setdefault("keep_links", False) + return load_workbook( filepath_or_buffer, - read_only=True, - data_only=True, - keep_links=False, **engine_kwargs, ) diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index da94c74f2303ec..c94e229b074b7b 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -130,6 +130,21 @@ def test_engine_kwargs_append_data_only(ext, data_only, expected): DataFrame().to_excel(writer, sheet_name="Sheet2") +@pytest.mark.parametrize("kwarg_name", ["read_only", "data_only"]) +@pytest.mark.parametrize("kwarg_value", [True, False]) +def test_engine_kwargs_append_reader(datapath, ext, kwarg_name, kwarg_value): + # GH 55027 + # test that `read_only` and `data_only` can be passed to + # `openpyxl.reader.excel.load_workbook` via `engine_kwargs` + from pandas.io.excel._openpyxl import OpenpyxlReader + + filename = datapath("io", "data", "excel", "test1" + ext) + with contextlib.closing( + OpenpyxlReader(filename, engine_kwargs={kwarg_name: kwarg_value}) + ) as reader: + assert getattr(reader.book, kwarg_name) == kwarg_value + + @pytest.mark.parametrize( "mode,expected", [("w", ["baz"]), ("a", ["foo", "bar", "baz"])] )