Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: replace na_rep for pd.NA values in to_string #54959

Closed
wants to merge 19 commits into from
Closed
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -7340,8 +7340,8 @@ def value_counts(
>>> df
first_name middle_name
0 John Smith
1 Anne <NA>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All the usages that were changed should still use <NA> rather than NaN. This should only change if na_rep was specified as something else

Copy link
Contributor Author

@rsm-23 rsm-23 Oct 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The expected value in these tests is <NA> but according to docs it should be NaN since that's the default value for na_rep. Somehow this behaviour is not in place.

@mroeschke

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think since the current behavior has been the case for a few releases now, the default of na_rep="NaN" should be changed to na_rep=lib.no_default so that NaN, NA, and None can continue to use their reprs here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mroeschke the problem is if I set na_rep=lib.no_default then the string representation will not contain either of NaN, NA or None. I have tried setting the default as <NA> and reverted most of the test cases, however, there's issue in a couple of them.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using na_rep=lib.no_default would mean making NaN, NA and None return their string defaults like

if self.na_rep is lib.no_default:
    ...existing logic
else:
    ...use na_rap

2 John <NA>
1 Anne NaN
2 John NaN
3 Beth Louise

>>> df.value_counts()
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6907,9 +6907,9 @@ def convert_dtypes(
>>> dfn = df.convert_dtypes()
>>> dfn
a b c d e f
0 1 x True h 10 <NA>
1 2 y False i <NA> 100.5
2 3 z <NA> <NA> 20 200.0
0 1 x True h 10 NaN
1 2 y False i NaN 100.5
2 3 z NaN NaN 20 200.0

>>> dfn.dtypes
a Int32
Expand All @@ -6934,7 +6934,7 @@ def convert_dtypes(
>>> s.convert_dtypes()
0 a
1 b
2 <NA>
2 NaN
dtype: string
"""
check_dtype_backend(dtype_backend)
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/accessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,10 +387,10 @@ def isocalendar(self) -> DataFrame:
>>> ser.dt.isocalendar()
year week day
0 2009 53 5
1 <NA> <NA> <NA>
1 NaN NaN NaN
>>> ser.dt.isocalendar().week
0 53
1 <NA>
1 NaN
Name: week, dtype: UInt32
"""
return self._get_values().isocalendar().set_index(self._parent.index)
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1298,7 +1298,7 @@ def _format(x):
if x is None:
return "None"
elif x is NA:
return str(NA)
return self.na_rep
elif x is NaT or np.isnat(x):
return "NaT"
except (TypeError, ValueError):
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/arrays/boolean/test_repr.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@

def test_repr():
df = pd.DataFrame({"A": pd.array([True, False, None], dtype="boolean")})
expected = " A\n0 True\n1 False\n2 <NA>"
expected = " A\n0 True\n1 False\n2 NaN"
assert repr(df) == expected

expected = "0 True\n1 False\n2 <NA>\nName: A, dtype: boolean"
expected = "0 True\n1 False\n2 NaN\nName: A, dtype: boolean"
assert repr(df.A) == expected

expected = "<BooleanArray>\n[True, False, <NA>]\nLength: 3, dtype: boolean"
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/floating/test_repr.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,5 +43,5 @@ def test_repr_array_long():
def test_frame_repr(data_missing):
df = pd.DataFrame({"A": data_missing})
result = repr(df)
expected = " A\n0 <NA>\n1 0.1"
expected = " A\n0 NaN\n1 0.1"
assert result == expected
2 changes: 1 addition & 1 deletion pandas/tests/arrays/integer/test_repr.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,5 +63,5 @@ def test_repr_array_long():
def test_frame_repr(data_missing):
df = pd.DataFrame({"A": data_missing})
result = repr(df)
expected = " A\n0 <NA>\n1 1"
expected = " A\n0 NaN\n1 1"
assert result == expected
10 changes: 2 additions & 8 deletions pandas/tests/arrays/string_/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,10 @@ def cls(dtype):

def test_repr(dtype):
df = pd.DataFrame({"A": pd.array(["a", pd.NA, "b"], dtype=dtype)})
if dtype.storage == "pyarrow_numpy":
expected = " A\n0 a\n1 NaN\n2 b"
else:
expected = " A\n0 a\n1 <NA>\n2 b"
expected = " A\n0 a\n1 NaN\n2 b"
assert repr(df) == expected

if dtype.storage == "pyarrow_numpy":
expected = "0 a\n1 NaN\n2 b\nName: A, dtype: string"
else:
expected = "0 a\n1 <NA>\n2 b\nName: A, dtype: string"
expected = "0 a\n1 NaN\n2 b\nName: A, dtype: string"
assert repr(df.A) == expected

if dtype.storage == "pyarrow":
Expand Down
12 changes: 6 additions & 6 deletions pandas/tests/io/formats/test_to_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,9 +283,9 @@ def test_nullable_float_to_string(float_ea_dtype):
result = s.to_string()
expected = dedent(
"""\
0 0.0
1 1.0
2 <NA>"""
0 0.0
1 1.0
2 NaN"""
)
assert result == expected

Expand All @@ -297,9 +297,9 @@ def test_nullable_int_to_string(any_int_ea_dtype):
result = s.to_string()
expected = dedent(
"""\
0 0
1 1
2 <NA>"""
0 0
1 1
2 NaN"""
)
assert result == expected

Expand Down