From 548c16e68b8b1cc5b6f3df3bf34c2d19d4b10123 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 18 Dec 2023 00:26:29 +0100 Subject: [PATCH 1/6] DOC: Update docstring for read_excel --- pandas/io/excel/_base.py | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 2884294377ec9..ddf7c3db91d13 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -165,31 +165,12 @@ Supported engines: "xlrd", "openpyxl", "odf", "pyxlsb", "calamine". Engine compatibility : - - ``xlr`` supports old-style Excel files (.xls). - ``openpyxl`` supports newer Excel file formats. - - ``odf`` supports OpenDocument file formats (.odf, .ods, .odt). - - ``pyxlsb`` supports Binary Excel files. - ``calamine`` supports Excel (.xls, .xlsx, .xlsm, .xlsb) and OpenDocument (.ods) file formats. - - .. versionchanged:: 1.2.0 - The engine `xlrd `_ - now only supports old-style ``.xls`` files. - When ``engine=None``, the following logic will be - used to determine the engine: - - - If ``path_or_buffer`` is an OpenDocument format (.odf, .ods, .odt), - then `odf `_ will be used. - - Otherwise if ``path_or_buffer`` is an xls format, - ``xlrd`` will be used. - - Otherwise if ``path_or_buffer`` is in xlsb format, - ``pyxlsb`` will be used. - - .. versionadded:: 1.3.0 - - Otherwise ``openpyxl`` will be used. - - .. versionchanged:: 1.3.0 - + - ``odf`` supports OpenDocument file formats (.odf, .ods, .odt). + - ``pyxlsb`` supports Binary Excel files. + - ``xlrd`` supports old-style Excel files (.xls). converters : dict, default None Dict of functions for converting values in certain columns. Keys can either be integers or column labels, values are functions that take one From 0d17d622a006e3e42b792b78e896f93ced30412d Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 3 Jan 2024 23:19:01 +0100 Subject: [PATCH 2/6] Update --- pandas/io/excel/_base.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index ddf7c3db91d13..fe6d0b7c790ca 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -171,6 +171,13 @@ - ``odf`` supports OpenDocument file formats (.odf, .ods, .odt). - ``pyxlsb`` supports Binary Excel files. - ``xlrd`` supports old-style Excel files (.xls). + + When ``engine=None``, the following logic will be used to determine the engine: + + - If ``path_or_buffer`` is an OpenDocument format (.odf, .ods, .odt), + then `odf `_ will be used. + - Otherwise if ``path_or_buffer`` is an xls format, ``xlrd`` will be used. + - Otherwise if ``path_or_buffer`` is in xlsb format, ``pyxlsb`` will be used. converters : dict, default None Dict of functions for converting values in certain columns. Keys can either be integers or column labels, values are functions that take one From 411eda77b8f3ba19dfbf4c4093779f36419c464c Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 3 Jan 2024 23:21:20 +0100 Subject: [PATCH 3/6] Update --- pandas/io/excel/_base.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index e14e126bdd213..0343b21fa7622 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -160,9 +160,8 @@ If converters are specified, they will be applied INSTEAD of dtype conversion. If you use ``None``, it will infer the dtype of each column based on the data. -engine : str, default None +engine : {"xlrd", "openpyxl", "odf", "pyxlsb", "calamine"}, default None If io is not a buffer or path, this must be set to identify io. - Supported engines: "xlrd", "openpyxl", "odf", "pyxlsb", "calamine". Engine compatibility : - ``openpyxl`` supports newer Excel file formats. From 21d88bfb813a448956a31ec9a032f3f733f2f275 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 3 Jan 2024 23:24:13 +0100 Subject: [PATCH 4/6] Update user guide --- doc/source/user_guide/io.rst | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 6148086452d54..cf61735062d2f 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -3471,20 +3471,14 @@ saving a ``DataFrame`` to Excel. Generally the semantics are similar to working with :ref:`csv` data. See the :ref:`cookbook` for some advanced strategies. -.. warning:: - - The `xlrd `__ package is now only for reading - old-style ``.xls`` files. +.. note:: - Before pandas 1.3.0, the default argument ``engine=None`` to :func:`~pandas.read_excel` - would result in using the ``xlrd`` engine in many cases, including new - Excel 2007+ (``.xlsx``) files. pandas will now default to using the - `openpyxl `__ engine. + When ``engine=None``, the following logic will be used to determine the engine: - It is strongly encouraged to install ``openpyxl`` to read Excel 2007+ - (``.xlsx``) files. - **Please do not report issues when using ``xlrd`` to read ``.xlsx`` files.** - This is no longer supported, switch to using ``openpyxl`` instead. + - If ``path_or_buffer`` is an OpenDocument format (.odf, .ods, .odt), + then `odf `_ will be used. + - Otherwise if ``path_or_buffer`` is an xls format, ``xlrd`` will be used. + - Otherwise if ``path_or_buffer`` is in xlsb format, ``pyxlsb`` will be used. .. _io.excel_reader: From b72d32b693f813c5761077cd7764695da8e664e4 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 3 Jan 2024 23:24:56 +0100 Subject: [PATCH 5/6] Update user guide --- doc/source/user_guide/io.rst | 1 + pandas/io/excel/_base.py | 1 + 2 files changed, 2 insertions(+) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index cf61735062d2f..b3ad23e0d4104 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -3479,6 +3479,7 @@ See the :ref:`cookbook` for some advanced strategies. then `odf `_ will be used. - Otherwise if ``path_or_buffer`` is an xls format, ``xlrd`` will be used. - Otherwise if ``path_or_buffer`` is in xlsb format, ``pyxlsb`` will be used. + - Otherwise ``openpyxl`` will be used. .. _io.excel_reader: diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 0343b21fa7622..6b787c0f4ca49 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -177,6 +177,7 @@ then `odf `_ will be used. - Otherwise if ``path_or_buffer`` is an xls format, ``xlrd`` will be used. - Otherwise if ``path_or_buffer`` is in xlsb format, ``pyxlsb`` will be used. + - Otherwise ``openpyxl`` will be used. converters : dict, default None Dict of functions for converting values in certain columns. Keys can either be integers or column labels, values are functions that take one From c39f1544b69478d1d68652baa39537cca2d87593 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 3 Jan 2024 23:31:50 +0100 Subject: [PATCH 6/6] Fixup --- pandas/io/excel/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 6b787c0f4ca49..786f719337b84 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -160,7 +160,7 @@ If converters are specified, they will be applied INSTEAD of dtype conversion. If you use ``None``, it will infer the dtype of each column based on the data. -engine : {"xlrd", "openpyxl", "odf", "pyxlsb", "calamine"}, default None +engine : {{'openpyxl', 'calamine', 'odf', 'pyxlsb', 'xlrd'}}, default None If io is not a buffer or path, this must be set to identify io. Engine compatibility :