From 0f23a51569f27e481e209ce3ed57d002663e26e1 Mon Sep 17 00:00:00 2001 From: Phil Solimine <15682144+doctor-phil@users.noreply.github.com> Date: Tue, 5 Nov 2024 13:12:10 -0800 Subject: [PATCH] Update groupby.md --- lectures/pandas/groupby.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lectures/pandas/groupby.md b/lectures/pandas/groupby.md index fb3f249c..0b81b9e9 100644 --- a/lectures/pandas/groupby.md +++ b/lectures/pandas/groupby.md @@ -213,7 +213,7 @@ def smallest_by_b(df): ``` ```{code-cell} python -gbA.apply(smallest_by_b) +gbA.apply(smallest_by_b, include_groups=False) ``` Notice that the return value from applying our series transform to `gbA` @@ -250,7 +250,7 @@ index and a `Date` column added. df2 = df.copy() df2["Date"] = pd.date_range( start=pd.Timestamp.today().strftime("%m/%d/%Y"), - freq="BQ", + freq="BQE", periods=df.shape[0] ) df2 = df2.set_index("A") @@ -260,7 +260,7 @@ df2 We can group by year. ```{code-cell} python -df2.groupby(pd.Grouper(key="Date", freq="A")).count() +df2.groupby(pd.Grouper(key="Date", freq="YE")).count() ``` We can group by the `A` level of the index. @@ -272,14 +272,14 @@ df2.groupby(pd.Grouper(level="A")).count() We can combine these to group by both. ```{code-cell} python -df2.groupby([pd.Grouper(key="Date", freq="A"), pd.Grouper(level="A")]).count() +df2.groupby([pd.Grouper(key="Date", freq="YE"), pd.Grouper(level="A")]).count() ``` And we can combine `pd.Grouper` with a string, where the string denotes a column name ```{code-cell} python -df2.groupby([pd.Grouper(key="Date", freq="A"), "B"]).count() +df2.groupby([pd.Grouper(key="Date", freq="YE"), "B"]).count() ``` ## Case Study: Airline Delays