Skip to content

Commit

Permalink
ENH: Preserve attrs in to_dataframe()
Browse files Browse the repository at this point in the history
  • Loading branch information
snowman2 committed May 18, 2021
1 parent 49aa235 commit 505d983
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 1 deletion.
1 change: 1 addition & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ New Features
By `Jimmy Westling <https://github.com/illviljan>`_.
- Raise more informative error when decoding time variables with invalid reference dates.
(:issue:`5199`, :pull:`5288`). By `Giacomo Caria <https://github.com/gcaria>`_.
- Preserve attrs in `to_dataframe()` (:issue:`5327`). By `Alan Snow <https://github.com/snowman2>`_

Breaking changes
~~~~~~~~~~~~~~~~
Expand Down
2 changes: 2 additions & 0 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -2714,7 +2714,9 @@ def to_dataframe(
ordered_dims = ds._normalize_dim_order(dim_order=dim_order)

df = ds._to_dataframe(ordered_dims)
attrs = df[unique_name].attrs
df.columns = [name if c == unique_name else c for c in df.columns]
df[name].attrs = attrs
return df

def to_series(self) -> pd.Series:
Expand Down
7 changes: 6 additions & 1 deletion xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5182,7 +5182,12 @@ def _to_dataframe(self, ordered_dims: Mapping[Hashable, int]):
for k in columns
]
index = self.coords.to_index([*ordered_dims])
return pd.DataFrame(dict(zip(columns, data)), index=index)
pdf = pd.DataFrame(dict(zip(columns, data)), index=index)
# add attributes to dataframe
pdf.attrs = self.attrs
for column in columns:
pdf[column].attrs = self[column].attrs
return pdf

def to_dataframe(self, dim_order: List[Hashable] = None) -> pd.DataFrame:
"""Convert this dataset into a pandas.DataFrame.
Expand Down
14 changes: 14 additions & 0 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -3681,6 +3681,20 @@ def test_to_dataframe_0length(self):
assert len(actual) == 0
assert_array_equal(actual.index.names, list("ABC"))

def test_to_dataframe__attrs(self):
arr = DataArray(
np.zeros((5, 5)),
name="test",
dims=("y", "x"),
coords={"y": np.arange(1, 6), "x": np.arange(2, 7)},
attrs={"long_name": "Description of data array", "_FillValue": -1},
)
df = arr.to_dataframe()
assert df[df.columns[0]].attrs == {
"long_name": "Description of data array",
"_FillValue": -1,
}

def test_to_pandas_name_matches_coordinate(self):
# coordinate with same name as array
arr = DataArray([1, 2, 3], dims="x", name="x")
Expand Down
18 changes: 18 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4205,6 +4205,24 @@ def test_to_and_from_dataframe(self):
expected = pd.DataFrame([[]], index=idx)
assert expected.equals(actual), (expected, actual)

def test_to_dataframe__attrs(self):
ds = Dataset(
{
"a": (
"t",
[1],
{"long_name": "Description of data array", "_FillValue": -1},
)
},
attrs={"test": "test"},
)
df = ds.to_dataframe()
assert df.attrs == {"test": "test"}
assert df[df.columns[0]].attrs == {
"long_name": "Description of data array",
"_FillValue": -1,
}

def test_from_dataframe_categorical(self):
cat = pd.CategoricalDtype(
categories=["foo", "bar", "baz", "qux", "quux", "corge"]
Expand Down

0 comments on commit 505d983

Please sign in to comment.