diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index ff19ef4eae179..6bee9ef52446f 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -390,6 +390,7 @@ I/O - Bug in :func:`read_excel`, with ``engine="xlrd"`` (``xls`` files) erroring when file contains NaNs/Infs (:issue:`54564`) - Bug in :func:`to_excel`, with ``OdsWriter`` (``ods`` files) writing boolean/string value (:issue:`54994`) - Bug in :meth:`DataFrame.to_hdf` and :func:`read_hdf` with ``datetime64`` dtypes with non-nanosecond resolution failing to round-trip correctly (:issue:`55622`) +- Bug in :meth:`pandas.read_excel` with ``engine="odf"`` (``ods`` files) when string contains annotation (:issue:`55200`) - Bug in :meth:`pandas.read_excel` with an ODS file without cached formatted cell for float values (:issue:`55219`) - Bug where :meth:`DataFrame.to_json` would raise an ``OverflowError`` instead of a ``TypeError`` with unsupported NumPy types (:issue:`55403`) - diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index 277f64f636731..69b514da32857 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -228,8 +228,10 @@ def _get_cell_string_value(self, cell) -> str: """ from odf.element import Element from odf.namespaces import TEXTNS + from odf.office import Annotation from odf.text import S + office_annotation = Annotation().qname text_s = S().qname value = [] @@ -239,6 +241,8 @@ def _get_cell_string_value(self, cell) -> str: if fragment.qname == text_s: spaces = int(fragment.attributes.get((TEXTNS, "c"), 1)) value.append(" " * spaces) + elif fragment.qname == office_annotation: + continue else: # recursive impl needed in case of nested fragments # with multiple spaces diff --git a/pandas/tests/io/data/excel/test_cell_annotation.ods b/pandas/tests/io/data/excel/test_cell_annotation.ods new file mode 100644 index 0000000000000..60246e6c7e4df Binary files /dev/null and b/pandas/tests/io/data/excel/test_cell_annotation.ods differ diff --git a/pandas/tests/io/excel/test_odf.py b/pandas/tests/io/excel/test_odf.py index 9d49718bec357..46dc751fa499f 100644 --- a/pandas/tests/io/excel/test_odf.py +++ b/pandas/tests/io/excel/test_odf.py @@ -59,3 +59,14 @@ def test_read_unempty_cells(): result = pd.read_excel("test_unempty_cells.ods") tm.assert_frame_equal(result, expected) + + +def test_read_cell_annotation(): + expected = pd.DataFrame( + ["test"], + columns=["Column 1"], + ) + + result = pd.read_excel("test_cell_annotation.ods") + + tm.assert_frame_equal(result, expected)