diff --git a/docs/backcompat.md b/docs/backcompat.md index 787df393d..8902d7a00 100644 --- a/docs/backcompat.md +++ b/docs/backcompat.md @@ -47,15 +47,26 @@ and deprecate the old one? The answer is...no! Narwhals offers a `stable` namespace, which allows you to write your code once and forget about it. That is to say, if you write your code like this: -```python -import narwhals.stable.v1 as nw -from narwhals.typing import FrameT +=== "from/to_native" + ```python + import narwhals.stable.v1 as nw + from narwhals.typing import IntoFrameT -@nw.narwhalify -def func(df: FrameT) -> FrameT: - return df.with_columns(nw.col("a").cum_sum()) -``` + def func(df: IntoFrameT) -> IntoFrameT: + return nw.from_native(df).with_columns(nw.col("a").cum_sum()).to_native() + ``` + +=== "@narwhalify" + ```python + import narwhals.stable.v1 as nw + from narwhals.typing import FrameT + + + @nw.narwhalify + def func(df: FrameT) -> FrameT: + return df.with_columns(nw.col("a").cum_sum()) + ``` then we, in Narwhals, promise that your code will keep working, even in newer versions of Polars after they have renamed their method. diff --git a/docs/basics/complete_example.md b/docs/basics/complete_example.md index d3b4ecfe4..061d040fb 100644 --- a/docs/basics/complete_example.md +++ b/docs/basics/complete_example.md @@ -22,31 +22,77 @@ doesn't either. We can specify that in the `@nw.narwhalify` decorator by setting `eager_only=True`, and the argument will be propagated to `nw.from_native`. -```python -import narwhals as nw -from typing import Any +=== "from/to_native" + ```python + from typing import Self + import narwhals as nw + from narwhals.typing import IntoDataFrameT + + + class StandardScaler: + def fit(self: Self, df: IntoDataFrameT) -> Self: + df_nw = nw.from_native(df, eager_only=True) + self._means = {col: df_nw[col].mean() for col in df_nw.columns} + self._std_devs = {col: df_nw[col].std() for col in df_nw.columns} + self._columns = df_nw.columns + return self + ``` + +=== "@narwhalify" + ```python + from typing import Self + import narwhals as nw + from narwhals.typing import DataFrameT -class StandardScaler: - @nw.narwhalify(eager_only=True) - def fit(self, df: nw.DataFrame[Any]) -> None: - self._means = {col: df[col].mean() for col in df.columns} - self._std_devs = {col: df[col].std() for col in df.columns} - self._columns = df.columns -``` + class StandardScaler: + @nw.narwhalify(eager_only=True) + def fit(self: Self, df: DataFrameT) -> Self: + self._means = {col: df[col].mean() for col in df.columns} + self._std_devs = {col: df[col].std() for col in df.columns} + self._columns = df.columns + return self + ``` ## Transform method -We're going to take in a dataframe, and return a dataframe of the same type. -Therefore, we use `@nw.narwhalify`: +We're going to take in a dataframe, and return a dataframe of the same type: + +=== "from/to_native" + ```python + from typing import Self + import narwhals as nw + from narwhals.typing import IntoFrameT + + + class StandardScaler: + ... + + def transform(self: Self, df: IntoFrameT) -> IntoFrameT: + df_nw = nw.from_native(df) + return df_nw.with_columns( + (nw.col(col) - self._means[col]) / self._std_devs[col] + for col in self._columns + ).to_native() + ``` + +=== "@narwhalify" + ```python + from typing import Self + import narwhals as nw + from narwhals.typing import FrameT + -```python -@nw.narwhalify -def transform(self, df: FrameT) -> FrameT: - return df.with_columns( - (nw.col(col) - self._means[col]) / self._std_devs[col] for col in self._columns - ) -``` + class StandardScaler: + ... + + @nw.narwhalify + def transform(self: Self, df: FrameT) -> FrameT: + return df.with_columns( + (nw.col(col) - self._means[col]) / self._std_devs[col] + for col in self._columns + ) + ``` Note that all the calculations here can stay lazy if the underlying library permits it, so we don't pass in any extra keyword-arguments such as `eager_only`, we just use the @@ -55,34 +101,61 @@ default `eager_only=False`. ## Putting it all together Here is our dataframe-agnostic standard scaler: -```python exec="1" source="above" session="tute-ex1" -from typing import Any -import narwhals as nw -from narwhals.typing import FrameT - - -class StandardScaler: - @nw.narwhalify(eager_only=True) - def fit(self, df: nw.DataFrame[Any]) -> None: - self._means = {col: df[col].mean() for col in df.columns} - self._std_devs = {col: df[col].std() for col in df.columns} - self._columns = df.columns +=== "from/to_native" + ```python + from typing import Self + import narwhals as nw + from narwhals.typing import IntoDataFrameT + from narwhals.typing import IntoFrameT + + + class StandardScaler: + def fit(self: Self, df: IntoDataFrameT) -> Self: + df_nw = nw.from_native(df, eager_only=True) + self._means = {col: df_nw[col].mean() for col in df_nw.columns} + self._std_devs = {col: df_nw[col].std() for col in df_nw.columns} + self._columns = df_nw.columns + return self + + def transform(self: Self, df: IntoFrameT) -> IntoFrameT: + df_nw = nw.from_native(df) + return df_nw.with_columns( + (nw.col(col) - self._means[col]) / self._std_devs[col] + for col in self._columns + ).to_native() + ``` - @nw.narwhalify - def transform(self, df: FrameT) -> FrameT: - return df.with_columns( - (nw.col(col) - self._means[col]) / self._std_devs[col] - for col in self._columns - ) -``` +=== "@narwhalify" + ```python exec="1" source="above" session="standard-scaler-example" + from typing import Self + import narwhals as nw + from narwhals.typing import DataFrameT + from narwhals.typing import FrameT + + + class StandardScaler: + @nw.narwhalify(eager_only=True) + def fit(self: Self, df: DataFrameT) -> Self: + self._means = {col: df[col].mean() for col in df.columns} + self._std_devs = {col: df[col].std() for col in df.columns} + self._columns = df.columns + return self + + @nw.narwhalify + def transform(self: Self, df: FrameT) -> FrameT: + return df.with_columns( + (nw.col(col) - self._means[col]) / self._std_devs[col] + for col in self._columns + ) + ``` Next, let's try running it. Notice how, as `transform` doesn't use any eager-only features, so we can pass a Polars LazyFrame to it and have it stay lazy! === "pandas" - ```python exec="true" source="material-block" result="python" session="tute-ex1" + ```python exec="true" source="material-block" result="python" session="standard-scaler-example" import pandas as pd df_train = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 7]}) @@ -93,7 +166,7 @@ stay lazy! ``` === "Polars" - ```python exec="true" source="material-block" result="python" session="tute-ex1" + ```python exec="true" source="material-block" result="python" session="standard-scaler-example" import polars as pl df_train = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 7]}) diff --git a/docs/basics/dataframe.md b/docs/basics/dataframe.md index 41be8aebf..78b9bcce4 100644 --- a/docs/basics/dataframe.md +++ b/docs/basics/dataframe.md @@ -17,24 +17,42 @@ Let's explore this with some simple examples. ## Example 1: descriptive statistics -Just like in Polars, we can pass expressions to -`DataFrame.select` or `LazyFrame.select`. +Just like in Polars, we can pass expressions to `DataFrame.select` or `LazyFrame.select`. Make a Python file with the following content: -```python exec="1" source="above" session="df_ex1" -import narwhals as nw -from narwhals.typing import FrameT +=== "from/to_native" + ```python exec="1" source="above" session="df_ex1" + import narwhals as nw + from narwhals.typing import IntoFrameT + + + def func(df: IntoFrameT) -> IntoFrameT: + return ( + nw.from_native(df) + .select( + a_sum=nw.col("a").sum(), + a_mean=nw.col("a").mean(), + a_std=nw.col("a").std(), + ) + .to_native() + ) + ``` +=== "@narwhalify" + ```python exec="1" source="above" session="df_ex1" + import narwhals as nw + from narwhals.typing import FrameT -@nw.narwhalify -def func(df: FrameT) -> FrameT: - return df.select( - a_sum=nw.col("a").sum(), - a_mean=nw.col("a").mean(), - a_std=nw.col("a").std(), - ) -``` + + @nw.narwhalify + def func(df: FrameT) -> FrameT: + return df.select( + a_sum=nw.col("a").sum(), + a_mean=nw.col("a").mean(), + a_std=nw.col("a").std(), + ) + ``` Let's try it out: @@ -70,42 +88,33 @@ Let's try it out: print(func(table)) ``` -Alternatively, we could have opted for the more explicit version: - -```python -import narwhals as nw -from narwhals.typing import IntoFrameT - - -def func(df_native: IntoFrameT) -> IntoFrameT: - df = nw.from_native(df_native) - df = df.select( - a_sum=nw.col("a").sum(), - a_mean=nw.col("a").mean(), - a_std=nw.col("a").std(), - ) - return nw.to_native(df) -``` - -Despite being more verbose, it has the advantage of preserving the type annotation of the native -object - see [typing](../api-reference/typing.md) for more details. - -In general, in this tutorial, we'll use the former. - ## Example 2: group-by and mean Just like in Polars, we can pass expressions to `GroupBy.agg`. Make a Python file with the following content: -```python exec="1" source="above" session="df_ex2" -import narwhals as nw -from narwhals.typing import FrameT +=== "from/to_native" + ```python exec="1" source="above" session="df_ex2" + import narwhals as nw + from narwhals.typing import IntoFrameT -@nw.narwhalify -def func(df: FrameT) -> FrameT: - return df.group_by("a").agg(nw.col("b").mean()).sort("a") -``` + def func(df: IntoFrameT) -> IntoFrameT: + return ( + nw.from_native(df).group_by("a").agg(nw.col("b").mean()).sort("a").to_native() + ) + ``` + +=== "@narwhalify" + ```python exec="1" source="above" session="df_ex2" + import narwhals as nw + from narwhals.typing import FrameT + + + @nw.narwhalify + def func(df: FrameT) -> FrameT: + return df.group_by("a").agg(nw.col("b").mean()).sort("a") + ``` Let's try it out: @@ -148,15 +157,30 @@ For example, we can compute a horizontal sum using `nw.sum_horizontal`. Make a Python file with the following content: -```python exec="1" source="above" session="df_ex3" -import narwhals as nw -from narwhals.typing import FrameT +=== "from/to_native" + ```python exec="1" source="above" session="df_ex3" + import narwhals as nw + from narwhals.typing import IntoFrameT -@nw.narwhalify -def func(df: FrameT) -> FrameT: - return df.with_columns(a_plus_b=nw.sum_horizontal("a", "b")) -``` + def func(df: IntoFrameT) -> IntoFrameT: + return ( + nw.from_native(df) + .with_columns(a_plus_b=nw.sum_horizontal("a", "b")) + .to_native() + ) + ``` + +=== "@narwhalify" + ```python exec="1" source="above" session="df_ex3" + import narwhals as nw + from narwhals.typing import FrameT + + + @nw.narwhalify + def func(df: FrameT) -> FrameT: + return df.with_columns(a_plus_b=nw.sum_horizontal("a", "b")) + ``` Let's try it out: @@ -203,13 +227,12 @@ on a series. Make a Python file with the following content: ```python exec="1" source="above" session="df_ex4" -from typing import Any - import narwhals as nw +from narwhals.typing import DataFrameT @nw.narwhalify(eager_only=True) -def func(df: nw.DataFrame[Any], s: nw.Series, col_name: str) -> int: +def func(df: DataFrameT, s: nw.Series, col_name: str) -> int: return df.filter(nw.col(col_name).is_in(s)).shape[0] ``` diff --git a/docs/basics/series.md b/docs/basics/series.md index f5d8dee16..4cfc0292c 100644 --- a/docs/basics/series.md +++ b/docs/basics/series.md @@ -11,20 +11,33 @@ to pass `eager_only=True` to `nw.from_native`. ## Example 1: filter based on a column's values -This can stay lazy, so we just use `nw.from_native` and expressions: +This can stay lazy, so we just use expressions: -```python exec="1" source="above" session="ex1" -import narwhals as nw -from narwhals.typing import FrameT +=== "from/to_native" + ```python exec="1" source="above" session="series_ex1" + import narwhals as nw + from narwhals.typing import IntoFrameT -@nw.narwhalify -def my_func(df: FrameT) -> FrameT: - return df.filter(nw.col("a") > 0) -``` + def my_func(df: IntoFrameT) -> IntoFrameT: + return nw.from_native(df).filter(nw.col("a") > 0).to_native() + ``` + +=== "@narwhalify" + ```python exec="1" source="above" session="series_ex1" + import narwhals as nw + from narwhals.typing import FrameT + + + @nw.narwhalify + def my_func(df: FrameT) -> FrameT: + return df.filter(nw.col("a") > 0) + ``` + +and call it either on a eager or lazy dataframe: === "pandas" - ```python exec="true" source="material-block" result="python" session="ex1" + ```python exec="true" source="material-block" result="python" session="series_ex1" import pandas as pd df = pd.DataFrame({"a": [-1, 1, 3], "b": [3, 5, -3]}) @@ -32,7 +45,7 @@ def my_func(df: FrameT) -> FrameT: ``` === "Polars (eager)" - ```python exec="true" source="material-block" result="python" session="ex1" + ```python exec="true" source="material-block" result="python" session="series_ex1" import polars as pl df = pl.DataFrame({"a": [-1, 1, 3], "b": [3, 5, -3]}) @@ -40,7 +53,7 @@ def my_func(df: FrameT) -> FrameT: ``` === "Polars (lazy)" - ```python exec="true" source="material-block" result="python" session="ex1" + ```python exec="true" source="material-block" result="python" session="series_ex1" import polars as pl df = pl.LazyFrame({"a": [-1, 1, 3], "b": [3, 5, -3]}) @@ -48,7 +61,7 @@ def my_func(df: FrameT) -> FrameT: ``` === "PyArrow" - ```python exec="true" source="material-block" result="python" session="ex1" + ```python exec="true" source="material-block" result="python" session="series_ex1" import pyarrow as pa table = pa.table({"a": [-1, 1, 3], "b": [3, 5, -3]}) @@ -60,18 +73,31 @@ def my_func(df: FrameT) -> FrameT: Let's write a dataframe-agnostic function which multiplies the values in column `'a'` by 2. This can also stay lazy, and can use expressions: -```python exec="1" source="above" session="ex2" -import narwhals as nw -from narwhals.typing import FrameT +=== "from/to_native" + ```python exec="1" source="above" session="series_ex2" + import narwhals as nw + from narwhals.typing import IntoFrameT -@nw.narwhalify -def my_func(df: FrameT) -> FrameT: - return df.with_columns(nw.col("a") * 2) -``` + def my_func(df: IntoFrameT) -> IntoFrameT: + return nw.from_native(df).with_columns(nw.col("a") * 2).to_native() + ``` + +=== "@narwhalify" + ```python exec="1" source="above" session="series_ex2" + import narwhals as nw + from narwhals.typing import FrameT + + + @nw.narwhalify + def my_func(df: FrameT) -> FrameT: + return df.with_columns(nw.col("a") * 2) + ``` + +and call it either on a eager or lazy dataframe: === "pandas" - ```python exec="true" source="material-block" result="python" session="ex2" + ```python exec="true" source="material-block" result="python" session="series_ex2" import pandas as pd df = pd.DataFrame({"a": [-1, 1, 3], "b": [3, 5, -3]}) @@ -79,7 +105,7 @@ def my_func(df: FrameT) -> FrameT: ``` === "Polars (eager)" - ```python exec="true" source="material-block" result="python" session="ex2" + ```python exec="true" source="material-block" result="python" session="series_ex2" import polars as pl df = pl.DataFrame({"a": [-1, 1, 3], "b": [3, 5, -3]}) @@ -87,7 +113,7 @@ def my_func(df: FrameT) -> FrameT: ``` === "Polars (lazy)" - ```python exec="true" source="material-block" result="python" session="ex2" + ```python exec="true" source="material-block" result="python" session="series_ex2" import polars as pl df = pl.LazyFrame({"a": [-1, 1, 3], "b": [3, 5, -3]}) @@ -95,7 +121,7 @@ def my_func(df: FrameT) -> FrameT: ``` === "PyArrow" - ```python exec="true" source="material-block" result="python" session="ex2" + ```python exec="true" source="material-block" result="python" session="series_ex2" import pyarrow as pa table = pa.table({"a": [-1, 1, 3], "b": [3, 5, -3]}) @@ -105,7 +131,7 @@ def my_func(df: FrameT) -> FrameT: Note that column `'a'` was overwritten. If we had wanted to add a new column called `'c'` containing column `'a'`'s values multiplied by 2, we could have used `Expr.alias`: -```python exec="1" source="above" session="ex2.1" +```python exec="1" source="above" session="series_ex2.1" import narwhals as nw from narwhals.typing import FrameT @@ -116,7 +142,7 @@ def my_func(df: FrameT) -> FrameT: ``` === "pandas" - ```python exec="true" source="material-block" result="python" session="ex2.1" + ```python exec="true" source="material-block" result="python" session="series_ex2.1" import pandas as pd df = pd.DataFrame({"a": [-1, 1, 3], "b": [3, 5, -3]}) @@ -124,7 +150,7 @@ def my_func(df: FrameT) -> FrameT: ``` === "Polars (eager)" - ```python exec="true" source="material-block" result="python" session="ex2.1" + ```python exec="true" source="material-block" result="python" session="series_ex2.1" import polars as pl df = pl.DataFrame({"a": [-1, 1, 3], "b": [3, 5, -3]}) @@ -132,7 +158,7 @@ def my_func(df: FrameT) -> FrameT: ``` === "Polars (lazy)" - ```python exec="true" source="material-block" result="python" session="ex2.1" + ```python exec="true" source="material-block" result="python" session="series_ex2.1" import polars as pl df = pl.LazyFrame({"a": [-1, 1, 3], "b": [3, 5, -3]}) @@ -140,7 +166,7 @@ def my_func(df: FrameT) -> FrameT: ``` === "PyArrow" - ```python exec="true" source="material-block" result="python" session="ex2.1" + ```python exec="true" source="material-block" result="python" session="series_ex2.1" import pyarrow as pa table = pa.table({"a": [-1, 1, 3], "b": [3, 5, -3]}) @@ -151,21 +177,34 @@ def my_func(df: FrameT) -> FrameT: Now, we want to find the mean of column `'a'`, and we need it as a Python scalar. This means that computation cannot stay lazy - it must execute! -Therefore, we'll pass `eager_only=True` to `nw.narwhalify`, and then, instead -of using expressions, we'll extract a `Series`. +Therefore, we'll pass `eager_only=True` to `nw.from_native` (or `nw.narwhalify`), +and then, instead of using expressions, we'll extract a `Series`. -```python exec="1" source="above" session="ex2" -from __future__ import annotations -import narwhals as nw +=== "from/to_native" + ```python exec="1" source="above" session="series_ex3" + import narwhals as nw + from narwhals.typing import IntoDataFrameT -@nw.narwhalify(eager_only=True) -def my_func(df: nw.DataFrame) -> float | None: - return df["a"].mean() -``` + def my_func(df: IntoDataFrameT) -> float | None: + return nw.from_native(df, eager_only=True)["a"].mean() + ``` + +=== "@narwhalify" + ```python exec="1" source="above" session="series_ex3" + import narwhals as nw + from narwhals.typing import DataFrameT + + + @nw.narwhalify(eager_only=True) + def my_func(df: DataFrameT) -> float | None: + return df["a"].mean() + ``` + +Now we can call it on a eager dataframe only: === "pandas" - ```python exec="true" source="material-block" result="python" session="ex2" + ```python exec="true" source="material-block" result="python" session="series_ex3" import pandas as pd df = pd.DataFrame({"a": [-1, 1, 3], "b": [3, 5, -3]}) @@ -173,7 +212,7 @@ def my_func(df: nw.DataFrame) -> float | None: ``` === "Polars (eager)" - ```python exec="true" source="material-block" result="python" session="ex2" + ```python exec="true" source="material-block" result="python" session="series_ex3" import polars as pl df = pl.DataFrame({"a": [-1, 1, 3], "b": [3, 5, -3]}) @@ -181,7 +220,7 @@ def my_func(df: nw.DataFrame) -> float | None: ``` === "PyArrow" - ```python exec="true" source="material-block" result="python" session="ex2" + ```python exec="true" source="material-block" result="python" session="series_ex3" import pyarrow as pa table = pa.table({"a": [-1, 1, 3], "b": [3, 5, -3]}) diff --git a/docs/extending.md b/docs/extending.md index 865a93b08..f05a733e4 100644 --- a/docs/extending.md +++ b/docs/extending.md @@ -25,18 +25,37 @@ Libraries for which we have full support can benefit from the whole For example: -```python exec="1" source="above" -import narwhals as nw -from narwhals.typing import FrameT - - -@nw.narwhalify -def func(df: FrameT) -> FrameT: - return df.group_by("a").agg( - b_mean=nw.col("b").mean(), - b_std=nw.col("b").std(), - ) -``` +=== "from/to_native" + ```python exec="1" source="above" + import narwhals as nw + from narwhals.typing import IntoFrameT + + + def func(df: IntoFrameT) -> IntoFrameT: + return ( + nw.from_native(df) + .group_by("a") + .agg( + b_mean=nw.col("b").mean(), + b_std=nw.col("b").std(), + ) + .to_native() + ) + ``` + +=== "@narwhalify" + ```python exec="1" source="above" + import narwhals as nw + from narwhals.typing import FrameT + + + @nw.narwhalify + def func(df: FrameT) -> FrameT: + return df.group_by("a").agg( + b_mean=nw.col("b").mean(), + b_std=nw.col("b").std(), + ) + ``` will work for any of pandas, Polars, cuDF, Modin, and PyArrow.