ENH: Preserve attrs in to_dataframe()

pydata · May 18, 2021 · 505d983 · 505d983
1 parent 49aa235
commit 505d983
Show file tree

Hide file tree

Showing 5 changed files with 41 additions and 1 deletion.
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -33,6 +33,7 @@ New Features
   By `Jimmy Westling <https://github.com/illviljan>`_.
 - Raise more informative error when decoding time variables with invalid reference dates.
   (:issue:`5199`, :pull:`5288`). By `Giacomo Caria <https://github.com/gcaria>`_.
+- Preserve attrs in `to_dataframe()` (:issue:`5327`). By `Alan Snow <https://github.com/snowman2>`_
 
 Breaking changes
 ~~~~~~~~~~~~~~~~

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -2714,7 +2714,9 @@ def to_dataframe(
             ordered_dims = ds._normalize_dim_order(dim_order=dim_order)
 
         df = ds._to_dataframe(ordered_dims)
+        attrs = df[unique_name].attrs
         df.columns = [name if c == unique_name else c for c in df.columns]
+        df[name].attrs = attrs
         return df
 
     def to_series(self) -> pd.Series:

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -5182,7 +5182,12 @@ def _to_dataframe(self, ordered_dims: Mapping[Hashable, int]):
             for k in columns
         ]
         index = self.coords.to_index([*ordered_dims])
-        return pd.DataFrame(dict(zip(columns, data)), index=index)
+        pdf = pd.DataFrame(dict(zip(columns, data)), index=index)
+        # add attributes to dataframe
+        pdf.attrs = self.attrs
+        for column in columns:
+            pdf[column].attrs = self[column].attrs
+        return pdf
 
     def to_dataframe(self, dim_order: List[Hashable] = None) -> pd.DataFrame:
         """Convert this dataset into a pandas.DataFrame.

diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
@@ -3681,6 +3681,20 @@ def test_to_dataframe_0length(self):
         assert len(actual) == 0
         assert_array_equal(actual.index.names, list("ABC"))
 
+    def test_to_dataframe__attrs(self):
+        arr = DataArray(
+            np.zeros((5, 5)),
+            name="test",
+            dims=("y", "x"),
+            coords={"y": np.arange(1, 6), "x": np.arange(2, 7)},
+            attrs={"long_name": "Description of data array", "_FillValue": -1},
+        )
+        df = arr.to_dataframe()
+        assert df[df.columns[0]].attrs == {
+            "long_name": "Description of data array",
+            "_FillValue": -1,
+        }
+
     def test_to_pandas_name_matches_coordinate(self):
         # coordinate with same name as array
         arr = DataArray([1, 2, 3], dims="x", name="x")

diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
@@ -4205,6 +4205,24 @@ def test_to_and_from_dataframe(self):
         expected = pd.DataFrame([[]], index=idx)
         assert expected.equals(actual), (expected, actual)
 
+    def test_to_dataframe__attrs(self):
+        ds = Dataset(
+            {
+                "a": (
+                    "t",
+                    [1],
+                    {"long_name": "Description of data array", "_FillValue": -1},
+                )
+            },
+            attrs={"test": "test"},
+        )
+        df = ds.to_dataframe()
+        assert df.attrs == {"test": "test"}
+        assert df[df.columns[0]].attrs == {
+            "long_name": "Description of data array",
+            "_FillValue": -1,
+        }
+
     def test_from_dataframe_categorical(self):
         cat = pd.CategoricalDtype(
             categories=["foo", "bar", "baz", "qux", "quux", "corge"]