From 45b937d64f6b7b6971856a47e379c7c87af7e00a Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 26 Nov 2023 19:21:54 +0100 Subject: [PATCH] BUG: scatter discarding string columns (#56142) * BUG: scatter discarding string columns * Add test --- doc/source/whatsnew/v2.2.0.rst | 2 +- pandas/plotting/_matplotlib/core.py | 2 +- pandas/tests/plotting/frame/test_frame.py | 13 ++++++++++--- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index fbff38aeefc51..d252c19a95d4a 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -546,8 +546,8 @@ Period Plotting ^^^^^^^^ - Bug in :meth:`DataFrame.plot.box` with ``vert=False`` and a matplotlib ``Axes`` created with ``sharey=True`` (:issue:`54941`) +- Bug in :meth:`DataFrame.plot.scatter` discaring string columns (:issue:`56142`) - Bug in :meth:`Series.plot` when reusing an ``ax`` object failing to raise when a ``how`` keyword is passed (:issue:`55953`) -- Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 7096c1281a1b6..99314e60b7c00 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -688,7 +688,7 @@ def _compute_plot_data(self): # GH 18755, include object and category type for scatter plot if self._kind == "scatter": - include_type.extend(["object", "category"]) + include_type.extend(["object", "category", "string"]) numeric_data = data.select_dtypes(include=include_type, exclude=exclude_type) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index f1fc1174416ca..2a864abc5ea4a 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -12,6 +12,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + from pandas.core.dtypes.api import is_list_like import pandas as pd @@ -22,6 +24,7 @@ Series, bdate_range, date_range, + option_context, plotting, ) import pandas._testing as tm @@ -794,13 +797,17 @@ def test_scatterplot_datetime_data(self, x, y): _check_plot_works(df.plot.scatter, x=x, y=y) + @pytest.mark.parametrize( + "infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))] + ) @pytest.mark.parametrize("x, y", [("a", "b"), (0, 1)]) @pytest.mark.parametrize("b_col", [[2, 3, 4], ["a", "b", "c"]]) - def test_scatterplot_object_data(self, b_col, x, y): + def test_scatterplot_object_data(self, b_col, x, y, infer_string): # GH 18755 - df = DataFrame({"a": ["A", "B", "C"], "b": b_col}) + with option_context("future.infer_string", infer_string): + df = DataFrame({"a": ["A", "B", "C"], "b": b_col}) - _check_plot_works(df.plot.scatter, x=x, y=y) + _check_plot_works(df.plot.scatter, x=x, y=y) @pytest.mark.parametrize("ordered", [True, False]) @pytest.mark.parametrize(