From 68de3ac5e1383703694b84a4c1800d5bcd9b9ce4 Mon Sep 17 00:00:00 2001 From: AlessandroMiola Date: Fri, 29 Nov 2024 08:06:41 +0100 Subject: [PATCH] docs: clean lazyframe docstrings --- narwhals/dataframe.py | 335 +++++++++++++++++++++++++----------------- 1 file changed, 197 insertions(+), 138 deletions(-) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 245822bba..b4d01ee11 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -2971,9 +2971,10 @@ def to_native(self) -> FrameT: >>> import polars as pl >>> import pyarrow as pa >>> import narwhals as nw + >>> >>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]} >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.LazyFrame(data) + >>> lf_pl = pl.LazyFrame(data) >>> df_pa = pa.table(data) Calling `to_native` on a Narwhals DataFrame returns the native object: @@ -2983,7 +2984,7 @@ def to_native(self) -> FrameT: 0 1 6.0 a 1 2 7.0 b 2 3 8.0 c - >>> nw.from_native(df_pl).to_native().collect() + >>> nw.from_native(lf_pl).to_native().collect() shape: (3, 3) ┌─────┬─────┬─────┐ │ foo ┆ bar ┆ ham │ @@ -3009,7 +3010,7 @@ def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Se >>> >>> data = {"a": [1, 2, 3], "ba": [4, 5, 6]} >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.LazyFrame(data) + >>> lf_pl = pl.LazyFrame(data) Let's define a dataframe-agnostic function: @@ -3024,7 +3025,7 @@ def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Se 0 1 1 2 2 3 - >>> agnostic_pipe(df_pl).collect() + >>> agnostic_pipe(lf_pl).collect() shape: (3, 1) ┌─────┐ │ a │ @@ -3057,7 +3058,7 @@ def drop_nulls(self: Self, subset: str | list[str] | None = None) -> Self: >>> >>> data = {"a": [1.0, 2.0, None], "ba": [1.0, None, 2.0]} >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.LazyFrame(data) + >>> lf_pl = pl.LazyFrame(data) Let's define a dataframe-agnostic function: @@ -3070,7 +3071,7 @@ def drop_nulls(self: Self, subset: str | list[str] | None = None) -> Self: >>> agnostic_drop_nulls(df_pd) a ba 0 1.0 1.0 - >>> agnostic_drop_nulls(df_pl).collect() + >>> agnostic_drop_nulls(lf_pl).collect() shape: (1, 2) ┌─────┬─────┐ │ a ┆ ba │ @@ -3093,7 +3094,7 @@ def with_row_index(self, name: str = "index") -> Self: >>> >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.LazyFrame(data) + >>> lf_pl = pl.LazyFrame(data) Let's define a dataframe-agnostic function: @@ -3108,7 +3109,7 @@ def with_row_index(self, name: str = "index") -> Self: 0 0 1 4 1 1 2 5 2 2 3 6 - >>> agnostic_with_row_index(df_pl).collect() + >>> agnostic_with_row_index(lf_pl).collect() shape: (3, 3) ┌───────┬─────┬─────┐ │ index ┆ a ┆ b │ @@ -3216,6 +3217,8 @@ def with_columns( >>> import pandas as pd >>> import polars as pl >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> >>> df = { ... "a": [1, 2, 3, 4], ... "b": [0.5, 4, 10, 13], @@ -3228,19 +3231,19 @@ def with_columns( Let's define a dataframe-agnostic function in which we pass an expression to add it as a new column: - >>> @nw.narwhalify - ... def func(df): - ... return df.with_columns((nw.col("a") * 2).alias("2a")) + >>> def agnostic_with_columns(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns((nw.col("a") * 2).alias("2a")).to_native() We can then pass either pandas or Polars to `func`: - >>> func(df_pd) + >>> agnostic_with_columns(df_pd) a b c 2a 0 1 0.5 True 2 1 2 4.0 True 4 2 3 10.0 False 6 3 4 13.0 True 8 - >>> func(df_pl) + >>> agnostic_with_columns(df_pl) shape: (4, 4) ┌─────┬──────┬───────┬─────┐ │ a ┆ b ┆ c ┆ 2a │ @@ -3252,7 +3255,7 @@ def with_columns( │ 3 ┆ 10.0 ┆ false ┆ 6 │ │ 4 ┆ 13.0 ┆ true ┆ 8 │ └─────┴──────┴───────┴─────┘ - >>> func(lf_pl).collect() + >>> agnostic_with_columns(lf_pl).collect() shape: (4, 4) ┌─────┬──────┬───────┬─────┐ │ a ┆ b ┆ c ┆ 2a │ @@ -3290,6 +3293,8 @@ def select( >>> import pandas as pd >>> import polars as pl >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> >>> df = { ... "foo": [1, 2, 3], ... "bar": [6, 7, 8], @@ -3302,18 +3307,18 @@ def select( Let's define a dataframe-agnostic function in which we pass the name of a column to select that column. - >>> @nw.narwhalify - ... def func(df): - ... return df.select("foo") + >>> def agnostic_select(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select("foo").to_native() We can then pass either pandas or Polars to `func`: - >>> func(df_pd) + >>> agnostic_select(df_pd) foo 0 1 1 2 2 3 - >>> func(df_pl) + >>> agnostic_select(df_pl) shape: (3, 1) ┌─────┐ │ foo │ @@ -3324,7 +3329,7 @@ def select( │ 2 │ │ 3 │ └─────┘ - >>> func(lf_pl).collect() + >>> agnostic_select(lf_pl).collect() shape: (3, 1) ┌─────┐ │ foo │ @@ -3338,15 +3343,16 @@ def select( Multiple columns can be selected by passing a list of column names. - >>> @nw.narwhalify - ... def func(df): - ... return df.select(["foo", "bar"]) - >>> func(df_pd) + >>> def agnostic_select(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(["foo", "bar"]).to_native() + >>> + >>> agnostic_select(df_pd) foo bar 0 1 6 1 2 7 2 3 8 - >>> func(df_pl) + >>> agnostic_select(df_pl) shape: (3, 2) ┌─────┬─────┐ │ foo ┆ bar │ @@ -3357,7 +3363,7 @@ def select( │ 2 ┆ 7 │ │ 3 ┆ 8 │ └─────┴─────┘ - >>> func(lf_pl).collect() + >>> agnostic_select(lf_pl).collect() shape: (3, 2) ┌─────┬─────┐ │ foo ┆ bar │ @@ -3372,15 +3378,16 @@ def select( Multiple columns can also be selected using positional arguments instead of a list. Expressions are also accepted. - >>> @nw.narwhalify - ... def func(df): - ... return df.select(nw.col("foo"), nw.col("bar") + 1) - >>> func(df_pd) + >>> def agnostic_select(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("foo"), nw.col("bar") + 1).to_native() + >>> + >>> agnostic_select(df_pd) foo bar 0 1 7 1 2 8 2 3 9 - >>> func(df_pl) + >>> agnostic_select(df_pl) shape: (3, 2) ┌─────┬─────┐ │ foo ┆ bar │ @@ -3391,7 +3398,7 @@ def select( │ 2 ┆ 8 │ │ 3 ┆ 9 │ └─────┴─────┘ - >>> func(lf_pl).collect() + >>> agnostic_select(lf_pl).collect() shape: (3, 2) ┌─────┬─────┐ │ foo ┆ bar │ @@ -3405,15 +3412,16 @@ def select( Use keyword arguments to easily name your expression inputs. - >>> @nw.narwhalify - ... def func(df): - ... return df.select(threshold=nw.col("foo") * 2) - >>> func(df_pd) + >>> def agnostic_select(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(threshold=nw.col("foo") * 2).to_native() + >>> + >>> agnostic_select(df_pd) threshold 0 2 1 4 2 6 - >>> func(df_pl) + >>> agnostic_select(df_pl) shape: (3, 1) ┌───────────┐ │ threshold │ @@ -3424,7 +3432,7 @@ def select( │ 4 │ │ 6 │ └───────────┘ - >>> func(lf_pl).collect() + >>> agnostic_select(lf_pl).collect() shape: (3, 1) ┌───────────┐ │ threshold │ @@ -3450,24 +3458,26 @@ def rename(self, mapping: dict[str, str]) -> Self: >>> import pandas as pd >>> import polars as pl >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> >>> data = {"foo": [1, 2, 3], "bar": [6, 7, 8], "ham": ["a", "b", "c"]} >>> df_pd = pd.DataFrame(data) >>> lf_pl = pl.LazyFrame(data) We define a library agnostic function: - >>> @nw.narwhalify - ... def func(df): - ... return df.rename({"foo": "apple"}) + >>> def agnostic_rename(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.rename({"foo": "apple"}).to_native() We can then pass either pandas or Polars to `func`: - >>> func(df_pd) + >>> agnostic_rename(df_pd) apple bar ham 0 1 6 a 1 2 7 b 2 3 8 c - >>> func(lf_pl).collect() + >>> agnostic_rename(lf_pl).collect() shape: (3, 3) ┌───────┬─────┬─────┐ │ apple ┆ bar ┆ ham │ @@ -3491,6 +3501,8 @@ def head(self, n: int = 5) -> Self: >>> import narwhals as nw >>> import pandas as pd >>> import polars as pl + >>> from narwhals.typing import IntoFrameT + >>> >>> data = { ... "a": [1, 2, 3, 4, 5, 6], ... "b": [7, 8, 9, 10, 11, 12], @@ -3501,18 +3513,18 @@ def head(self, n: int = 5) -> Self: Let's define a dataframe-agnostic function that gets the first 3 rows. - >>> @nw.narwhalify - ... def func(df): - ... return df.head(3) + >>> def agnostic_head(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.head(3).to_native() We can then pass either pandas or Polars to `func`: - >>> func(df_pd) + >>> agnostic_head(df_pd) a b 0 1 7 1 2 8 2 3 9 - >>> func(df_pl) + >>> agnostic_head(df_pl) shape: (3, 2) ┌─────┬─────┐ │ a ┆ b │ @@ -3523,7 +3535,7 @@ def head(self, n: int = 5) -> Self: │ 2 ┆ 8 │ │ 3 ┆ 9 │ └─────┴─────┘ - >>> func(lf_pl).collect() + >>> agnostic_head(lf_pl).collect() shape: (3, 2) ┌─────┬─────┐ │ a ┆ b │ @@ -3547,6 +3559,8 @@ def tail(self, n: int = 5) -> Self: >>> import narwhals as nw >>> import pandas as pd >>> import polars as pl + >>> from narwhals.typing import IntoFrameT + >>> >>> data = { ... "a": [1, 2, 3, 4, 5, 6], ... "b": [7, 8, 9, 10, 11, 12], @@ -3557,18 +3571,18 @@ def tail(self, n: int = 5) -> Self: Let's define a dataframe-agnostic function that gets the last 3 rows. - >>> @nw.narwhalify - ... def func(df): - ... return df.tail(3) + >>> def agnostic_tail(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.tail(3).to_native() We can then pass either pandas or Polars to `func`: - >>> func(df_pd) + >>> agnostic_tail(df_pd) a b 3 4 10 4 5 11 5 6 12 - >>> func(df_pl) + >>> agnostic_tail(df_pl) shape: (3, 2) ┌─────┬─────┐ │ a ┆ b │ @@ -3579,7 +3593,7 @@ def tail(self, n: int = 5) -> Self: │ 5 ┆ 11 │ │ 6 ┆ 12 │ └─────┴─────┘ - >>> func(lf_pl).collect() + >>> agnostic_tail(lf_pl).collect() shape: (3, 2) ┌─────┬─────┐ │ a ┆ b │ @@ -3610,24 +3624,26 @@ def drop(self, *columns: str | Iterable[str], strict: bool = True) -> Self: >>> import pandas as pd >>> import polars as pl >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> >>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]} >>> df_pd = pd.DataFrame(data) >>> lf_pl = pl.LazyFrame(data) We define a library agnostic function: - >>> @nw.narwhalify - ... def func(df): - ... return df.drop("ham") + >>> def agnostic_drop(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.drop("ham").to_native() We can then pass either pandas or Polars to `func`: - >>> func(df_pd) + >>> agnostic_drop(df_pd) foo bar 0 1 6.0 1 2 7.0 2 3 8.0 - >>> func(lf_pl).collect() + >>> agnostic_drop(lf_pl).collect() shape: (3, 2) ┌─────┬─────┐ │ foo ┆ bar │ @@ -3641,16 +3657,16 @@ def drop(self, *columns: str | Iterable[str], strict: bool = True) -> Self: Use positional arguments to drop multiple columns. - >>> @nw.narwhalify - ... def func(df): - ... return df.drop("foo", "ham") + >>> def agnostic_drop(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.drop("foo", "ham").to_native() - >>> func(df_pd) + >>> agnostic_drop(df_pd) bar 0 6.0 1 7.0 2 8.0 - >>> func(lf_pl).collect() + >>> agnostic_drop(lf_pl).collect() shape: (3, 1) ┌─────┐ │ bar │ @@ -3695,6 +3711,8 @@ def unique( >>> import pandas as pd >>> import polars as pl >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> >>> data = { ... "foo": [1, 2, 3, 1], ... "bar": ["a", "a", "a", "a"], @@ -3705,16 +3723,16 @@ def unique( We define a library agnostic function: - >>> @nw.narwhalify - ... def func(df): - ... return df.unique(["bar", "ham"]) + >>> def agnostic_unique(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.unique(["bar", "ham"]).to_native() We can then pass either pandas or Polars to `func`: - >>> func(df_pd) + >>> agnostic_unique(df_pd) foo bar ham 0 1 a b - >>> func(lf_pl).collect() + >>> agnostic_unique(lf_pl).collect() shape: (1, 3) ┌─────┬─────┬─────┐ │ foo ┆ bar ┆ ham │ @@ -3744,7 +3762,7 @@ def filter( >>> import pandas as pd >>> import polars as pl >>> import narwhals as nw - >>> from narwhals.typing import IntoFrame + >>> from narwhals.typing import IntoFrameT >>> >>> data = { ... "foo": [1, 2, 3], @@ -3758,7 +3776,7 @@ def filter( Let's define a dataframe-agnostic function in which we filter on one condition. - >>> def agnostic_filter(df_native: IntoFrame) -> IntoFrame: + >>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT: ... df = nw.from_native(df_native) ... return df.filter(nw.col("foo") > 1).to_native() @@ -3791,10 +3809,10 @@ def filter( Filter on multiple conditions: - - >>> def agnostic_filter(df_native: IntoFrame) -> IntoFrame: + >>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT: ... df = nw.from_native(df_native) ... return df.filter((nw.col("foo") < 3) & (nw.col("ham") == "a")).to_native() + >>> >>> agnostic_filter(df_pd) foo bar ham 0 1 6 a @@ -3819,13 +3837,13 @@ def filter( Provide multiple filters using `*args` syntax: - >>> def agnostic_filter(df_native: IntoFrame) -> IntoFrame: + >>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT: ... df = nw.from_native(df_native) - ... dframe = df.filter( + ... return df.filter( ... nw.col("foo") == 1, ... nw.col("ham") == "a", - ... ) - ... return dframe.to_native() + ... ).to_native() + >>> >>> agnostic_filter(df_pd) foo bar ham 0 1 6 a @@ -3850,12 +3868,12 @@ def filter( Filter on an OR condition: - >>> def agnostic_filter(df_native: IntoFrame) -> IntoFrame: + >>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT: ... df = nw.from_native(df_native) - ... dframe = df.filter( + ... return df.filter( ... (nw.col("foo") == 1) | (nw.col("ham") == "c") ... ).to_native() - ... return dframe + >>> >>> agnostic_filter(df_pd) foo bar ham 0 1 6 a @@ -3883,9 +3901,10 @@ def filter( Provide multiple filters using `**kwargs` syntax: - >>> def agnostic_filter(df_native: IntoFrame) -> IntoFrame: + >>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT: ... df = nw.from_native(df_native) ... return df.filter(foo=2, ham="b").to_native() + >>> >>> agnostic_filter(df_pd) foo bar ham 1 2 7 b @@ -3929,6 +3948,8 @@ def group_by( >>> import pandas as pd >>> import polars as pl >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> >>> df = { ... "a": ["a", "b", "a", "b", "c"], ... "b": [1, 2, 1, 3, 3], @@ -3941,18 +3962,18 @@ def group_by( Let's define a dataframe-agnostic function in which we group by one column and call `agg` to compute the grouped sum of another column. - >>> @nw.narwhalify - ... def func(df): - ... return df.group_by("a").agg(nw.col("b").sum()).sort("a") + >>> def agnostic_group_by_agg(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.group_by("a").agg(nw.col("b").sum()).sort("a").to_native() We can then pass either pandas or Polars to `func`: - >>> func(df_pd) + >>> agnostic_group_by_agg(df_pd) a b 0 a 2 1 b 5 2 c 3 - >>> func(df_pl) + >>> agnostic_group_by_agg(df_pl) shape: (3, 2) ┌─────┬─────┐ │ a ┆ b │ @@ -3963,7 +3984,7 @@ def group_by( │ b ┆ 5 │ │ c ┆ 3 │ └─────┴─────┘ - >>> func(lf_pl).collect() + >>> agnostic_group_by_agg(lf_pl).collect() shape: (3, 2) ┌─────┬─────┐ │ a ┆ b │ @@ -3977,16 +3998,19 @@ def group_by( Group by multiple columns by passing a list of column names. - >>> @nw.narwhalify - ... def func(df): - ... return df.group_by(["a", "b"]).agg(nw.max("c")).sort(["a", "b"]) - >>> func(df_pd) + >>> def agnostic_group_by_agg(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return ( + ... df.group_by(["a", "b"]).agg(nw.max("c")).sort(["a", "b"]).to_native() + ... ) + >>> + >>> agnostic_group_by_agg(df_pd) a b c 0 a 1 5 1 b 2 4 2 b 3 2 3 c 3 1 - >>> func(df_pl) + >>> agnostic_group_by_agg(df_pl) shape: (4, 3) ┌─────┬─────┬─────┐ │ a ┆ b ┆ c │ @@ -3998,7 +4022,7 @@ def group_by( │ b ┆ 3 ┆ 2 │ │ c ┆ 3 ┆ 1 │ └─────┴─────┴─────┘ - >>> func(lf_pl).collect() + >>> agnostic_group_by_agg(lf_pl).collect() shape: (4, 3) ┌─────┬─────┬─────┐ │ a ┆ b ┆ c │ @@ -4041,29 +4065,31 @@ def sort( >>> import narwhals as nw >>> import pandas as pd >>> import polars as pl + >>> from narwhals.typing import IntoFrameT + >>> >>> data = { ... "a": [1, 2, None], ... "b": [6.0, 5.0, 4.0], ... "c": ["a", "c", "b"], ... } >>> df_pd = pd.DataFrame(data) - >>> df_lf = pl.LazyFrame(data) + >>> lf_pl = pl.LazyFrame(data) Let's define a dataframe-agnostic function in which we sort by multiple columns in different orders - >>> @nw.narwhalify - ... def func(df): - ... return df.sort("c", "a", descending=[False, True]) + >>> def agnostic_sort(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.sort("c", "a", descending=[False, True]).to_native() We can then pass either pandas or Polars to `func`: - >>> func(df_pd) + >>> agnostic_sort(df_pd) a b c 0 1.0 6.0 a 2 NaN 4.0 b 1 2.0 5.0 c - >>> func(df_lf).collect() + >>> agnostic_sort(lf_pl).collect() shape: (3, 3) ┌──────┬─────┬─────┐ │ a ┆ b ┆ c │ @@ -4111,6 +4137,8 @@ def join( >>> import narwhals as nw >>> import pandas as pd >>> import polars as pl + >>> from narwhals.typing import IntoFrameT + >>> >>> data = { ... "foo": [1, 2, 3], ... "bar": [6.0, 7.0, 8.0], @@ -4124,23 +4152,27 @@ def join( >>> df_pd = pd.DataFrame(data) >>> other_pd = pd.DataFrame(data_other) - >>> df_pl = pl.LazyFrame(data) + >>> lf_pl = pl.LazyFrame(data) >>> other_pl = pl.LazyFrame(data_other) Let's define a dataframe-agnostic function in which we join over "ham" column: - >>> @nw.narwhalify - ... def join_on_ham(df, other_any): - ... return df.join(other_any, left_on="ham", right_on="ham") + >>> def agnostic_join_on_ham( + ... df_native: IntoFrameT, + ... other_any_native: IntoFrameT, + ... ) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... other_any = nw.from_native(other_any_native) + ... return df.join(other_any, left_on="ham", right_on="ham").to_native() We can now pass either pandas or Polars to the function: - >>> join_on_ham(df_pd, other_pd) + >>> agnostic_join_on_ham(df_pd, other_pd) foo bar ham apple 0 1 6.0 a x 1 2 7.0 b y - >>> join_on_ham(df_pl, other_pl).collect() + >>> agnostic_join_on_ham(lf_pl, other_pl).collect() shape: (2, 4) ┌─────┬─────┬─────┬───────┐ │ foo ┆ bar ┆ ham ┆ apple │ @@ -4202,6 +4234,9 @@ def join_asof( >>> import narwhals as nw >>> import pandas as pd >>> import polars as pl + >>> from typing import Literal + >>> from narwhals.typing import IntoFrameT + >>> >>> data_gdp = { ... "datetime": [ ... datetime(2016, 1, 1), @@ -4227,19 +4262,28 @@ def join_asof( Let's define a dataframe-agnostic function in which we join over "datetime" column: - >>> @nw.narwhalify - ... def join_asof_datetime(df, other_any, strategy): - ... return df.join_asof(other_any, on="datetime", strategy=strategy) + >>> def agnostic_join_asof_datetime( + ... df_native: IntoFrameT, + ... other_any_native: IntoFrameT, + ... strategy: Literal["backward", "forward", "nearest"], + ... ) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... other_any = nw.from_native(other_any_native) + ... return df.join_asof( + ... other_any, on="datetime", strategy=strategy + ... ).to_native() We can now pass either pandas or Polars to the function: - >>> join_asof_datetime(population_pd, gdp_pd, strategy="backward") + >>> agnostic_join_asof_datetime(population_pd, gdp_pd, strategy="backward") datetime population gdp 0 2016-03-01 82.19 4164 1 2018-08-01 82.66 4566 2 2019-01-01 83.12 4696 - >>> join_asof_datetime(population_pl, gdp_pl, strategy="backward").collect() + >>> agnostic_join_asof_datetime( + ... population_pl, gdp_pl, strategy="backward" + ... ).collect() shape: (3, 3) ┌─────────────────────┬────────────┬──────┐ │ datetime ┆ population ┆ gdp │ @@ -4257,6 +4301,8 @@ def join_asof( >>> import narwhals as nw >>> import pandas as pd >>> import polars as pl + >>> from narwhals.typing import IntoFrameT + >>> >>> data_quotes = { ... "datetime": [ ... datetime(2016, 5, 25, 13, 30, 0, 23), @@ -4300,13 +4346,17 @@ def join_asof( Let's define a dataframe-agnostic function in which we join over "datetime" and by "ticker" columns: - >>> @nw.narwhalify - ... def join_asof_datetime_by_ticker(df, other_any): - ... return df.join_asof(other_any, on="datetime", by="ticker") + >>> def agnostic_join_asof_datetime_by_ticker( + ... df_native: IntoFrameT, + ... other_any_native: IntoFrameT, + ... ) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... other_any = nw.from_native(other_any_native) + ... return df.join_asof(other_any, on="datetime", by="ticker").to_native() We can now pass either pandas or Polars to the function: - >>> join_asof_datetime_by_ticker(trades_pd, quotes_pd) + >>> agnostic_join_asof_datetime_by_ticker(trades_pd, quotes_pd) datetime ticker price quantity bid ask 0 2016-05-25 13:30:00.000023 MSFT 51.95 75 51.95 51.96 1 2016-05-25 13:30:00.000038 MSFT 51.95 155 51.97 51.98 @@ -4314,7 +4364,7 @@ def join_asof( 3 2016-05-25 13:30:00.000048 GOOG 720.92 100 720.50 720.93 4 2016-05-25 13:30:00.000048 AAPL 98.00 100 NaN NaN - >>> join_asof_datetime_by_ticker(trades_pl, quotes_pl).collect() + >>> agnostic_join_asof_datetime_by_ticker(trades_pl, quotes_pl).collect() shape: (5, 6) ┌────────────────────────────┬────────┬────────┬──────────┬───────┬────────┐ │ datetime ┆ ticker ┆ price ┆ quantity ┆ bid ┆ ask │ @@ -4346,22 +4396,24 @@ def clone(self) -> Self: >>> import narwhals as nw >>> import pandas as pd >>> import polars as pl + >>> from narwhals.typing import IntoFrameT + >>> >>> data = {"a": [1, 2], "b": [3, 4]} >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.LazyFrame(data) + >>> lf_pl = pl.LazyFrame(data) Let's define a dataframe-agnostic function in which we copy the DataFrame: - >>> @nw.narwhalify - ... def func(df): - ... return df.clone() + >>> def agnostic_clone(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.clone().to_native() - >>> func(df_pd) + >>> agnostic_clone(df_pd) a b 0 1 3 1 2 4 - >>> func(df_pl).collect() + >>> agnostic_clone(lf_pl).collect() shape: (2, 2) ┌─────┬─────┐ │ a ┆ b │ @@ -4385,24 +4437,26 @@ def lazy(self) -> Self: >>> import pandas as pd >>> import polars as pl >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> >>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]} >>> df_pd = pd.DataFrame(df) - >>> df_pl = pl.LazyFrame(df) + >>> lf_pl = pl.LazyFrame(df) We define a library agnostic function: - >>> @nw.narwhalify - ... def func(df): - ... return df.lazy() + >>> def agnostic_lazy(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.lazy().to_native() Note that then, pandas dataframe stay eager, and the Polars LazyFrame stays lazy: - >>> func(df_pd) + >>> agnostic_lazy(df_pd) foo bar ham 0 1 6.0 a 1 2 7.0 b 2 3 8.0 c - >>> func(df_pl) + >>> agnostic_lazy(lf_pl) """ return self @@ -4418,6 +4472,8 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self: >>> import narwhals as nw >>> import pandas as pd >>> import polars as pl + >>> from narwhals.typing import IntoFrameT + >>> >>> data = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]} >>> df_pd = pd.DataFrame(data) >>> lf_pl = pl.LazyFrame(data) @@ -4425,16 +4481,16 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self: Let's define a dataframe-agnostic function in which gather every 2 rows, starting from a offset of 1: - >>> @nw.narwhalify - ... def func(df): - ... return df.gather_every(n=2, offset=1) + >>> def agnostic_gather_every(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.gather_every(n=2, offset=1).to_native() - >>> func(df_pd) + >>> agnostic_gather_every(df_pd) a b 1 2 6 3 4 8 - >>> func(lf_pl).collect() + >>> agnostic_gather_every(lf_pl).collect() shape: (2, 2) ┌─────┬─────┐ │ a ┆ b │ @@ -4479,21 +4535,24 @@ def unpivot( Examples: >>> import narwhals as nw >>> import polars as pl + >>> from narwhals.typing import IntoFrameT + >>> >>> data = { ... "a": ["x", "y", "z"], ... "b": [1, 3, 5], ... "c": [2, 4, 6], ... } + >>> lf_pl = pl.LazyFrame(data) We define a library agnostic function: - >>> @nw.narwhalify - ... def func(lf): + >>> def agnostic_unpivot(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) ... return ( - ... lf.unpivot(on=["b", "c"], index="a").sort(["variable", "a"]).collect() - ... ) + ... df.unpivot(on=["b", "c"], index="a").sort(["variable", "a"]) + ... ).to_native() - >>> func(pl.LazyFrame(data)) + >>> agnostic_unpivot(lf_pl).collect() shape: (6, 3) ┌─────┬──────────┬───────┐ │ a ┆ variable ┆ value │