Skip to content

Commit

Permalink
docs: add from/to_native in documentation examples (#1461)
Browse files Browse the repository at this point in the history
  • Loading branch information
FBruzzesi authored Nov 29, 2024
1 parent 98f980f commit ea1a64f
Show file tree
Hide file tree
Showing 5 changed files with 316 additions and 151 deletions.
25 changes: 18 additions & 7 deletions docs/backcompat.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,26 @@ and deprecate the old one? The answer is...no!
Narwhals offers a `stable` namespace, which allows you to write your code once and forget about
it. That is to say, if you write your code like this:

```python
import narwhals.stable.v1 as nw
from narwhals.typing import FrameT
=== "from/to_native"
```python
import narwhals.stable.v1 as nw
from narwhals.typing import IntoFrameT


@nw.narwhalify
def func(df: FrameT) -> FrameT:
return df.with_columns(nw.col("a").cum_sum())
```
def func(df: IntoFrameT) -> IntoFrameT:
return nw.from_native(df).with_columns(nw.col("a").cum_sum()).to_native()
```

=== "@narwhalify"
```python
import narwhals.stable.v1 as nw
from narwhals.typing import FrameT


@nw.narwhalify
def func(df: FrameT) -> FrameT:
return df.with_columns(nw.col("a").cum_sum())
```

then we, in Narwhals, promise that your code will keep working, even in newer versions of Polars
after they have renamed their method.
Expand Down
153 changes: 113 additions & 40 deletions docs/basics/complete_example.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,31 +22,77 @@ doesn't either.
We can specify that in the `@nw.narwhalify` decorator by setting `eager_only=True`, and
the argument will be propagated to `nw.from_native`.

```python
import narwhals as nw
from typing import Any
=== "from/to_native"
```python
from typing import Self
import narwhals as nw
from narwhals.typing import IntoDataFrameT


class StandardScaler:
def fit(self: Self, df: IntoDataFrameT) -> Self:
df_nw = nw.from_native(df, eager_only=True)
self._means = {col: df_nw[col].mean() for col in df_nw.columns}
self._std_devs = {col: df_nw[col].std() for col in df_nw.columns}
self._columns = df_nw.columns
return self
```

=== "@narwhalify"
```python
from typing import Self
import narwhals as nw
from narwhals.typing import DataFrameT


class StandardScaler:
@nw.narwhalify(eager_only=True)
def fit(self, df: nw.DataFrame[Any]) -> None:
self._means = {col: df[col].mean() for col in df.columns}
self._std_devs = {col: df[col].std() for col in df.columns}
self._columns = df.columns
```
class StandardScaler:
@nw.narwhalify(eager_only=True)
def fit(self: Self, df: DataFrameT) -> Self:
self._means = {col: df[col].mean() for col in df.columns}
self._std_devs = {col: df[col].std() for col in df.columns}
self._columns = df.columns
return self
```

## Transform method

We're going to take in a dataframe, and return a dataframe of the same type.
Therefore, we use `@nw.narwhalify`:
We're going to take in a dataframe, and return a dataframe of the same type:

=== "from/to_native"
```python
from typing import Self
import narwhals as nw
from narwhals.typing import IntoFrameT


class StandardScaler:
...

def transform(self: Self, df: IntoFrameT) -> IntoFrameT:
df_nw = nw.from_native(df)
return df_nw.with_columns(
(nw.col(col) - self._means[col]) / self._std_devs[col]
for col in self._columns
).to_native()
```

=== "@narwhalify"
```python
from typing import Self
import narwhals as nw
from narwhals.typing import FrameT


```python
@nw.narwhalify
def transform(self, df: FrameT) -> FrameT:
return df.with_columns(
(nw.col(col) - self._means[col]) / self._std_devs[col] for col in self._columns
)
```
class StandardScaler:
...

@nw.narwhalify
def transform(self: Self, df: FrameT) -> FrameT:
return df.with_columns(
(nw.col(col) - self._means[col]) / self._std_devs[col]
for col in self._columns
)
```

Note that all the calculations here can stay lazy if the underlying library permits it,
so we don't pass in any extra keyword-arguments such as `eager_only`, we just use the
Expand All @@ -55,34 +101,61 @@ default `eager_only=False`.
## Putting it all together

Here is our dataframe-agnostic standard scaler:
```python exec="1" source="above" session="tute-ex1"
from typing import Any

import narwhals as nw
from narwhals.typing import FrameT


class StandardScaler:
@nw.narwhalify(eager_only=True)
def fit(self, df: nw.DataFrame[Any]) -> None:
self._means = {col: df[col].mean() for col in df.columns}
self._std_devs = {col: df[col].std() for col in df.columns}
self._columns = df.columns
=== "from/to_native"
```python
from typing import Self
import narwhals as nw
from narwhals.typing import IntoDataFrameT
from narwhals.typing import IntoFrameT


class StandardScaler:
def fit(self: Self, df: IntoDataFrameT) -> Self:
df_nw = nw.from_native(df, eager_only=True)
self._means = {col: df_nw[col].mean() for col in df_nw.columns}
self._std_devs = {col: df_nw[col].std() for col in df_nw.columns}
self._columns = df_nw.columns
return self

def transform(self: Self, df: IntoFrameT) -> IntoFrameT:
df_nw = nw.from_native(df)
return df_nw.with_columns(
(nw.col(col) - self._means[col]) / self._std_devs[col]
for col in self._columns
).to_native()
```

@nw.narwhalify
def transform(self, df: FrameT) -> FrameT:
return df.with_columns(
(nw.col(col) - self._means[col]) / self._std_devs[col]
for col in self._columns
)
```
=== "@narwhalify"
```python exec="1" source="above" session="standard-scaler-example"
from typing import Self
import narwhals as nw
from narwhals.typing import DataFrameT
from narwhals.typing import FrameT


class StandardScaler:
@nw.narwhalify(eager_only=True)
def fit(self: Self, df: DataFrameT) -> Self:
self._means = {col: df[col].mean() for col in df.columns}
self._std_devs = {col: df[col].std() for col in df.columns}
self._columns = df.columns
return self

@nw.narwhalify
def transform(self: Self, df: FrameT) -> FrameT:
return df.with_columns(
(nw.col(col) - self._means[col]) / self._std_devs[col]
for col in self._columns
)
```

Next, let's try running it. Notice how, as `transform` doesn't use
any eager-only features, so we can pass a Polars LazyFrame to it and have it
stay lazy!

=== "pandas"
```python exec="true" source="material-block" result="python" session="tute-ex1"
```python exec="true" source="material-block" result="python" session="standard-scaler-example"
import pandas as pd

df_train = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 7]})
Expand All @@ -93,7 +166,7 @@ stay lazy!
```

=== "Polars"
```python exec="true" source="material-block" result="python" session="tute-ex1"
```python exec="true" source="material-block" result="python" session="standard-scaler-example"
import polars as pl

df_train = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 7]})
Expand Down
127 changes: 75 additions & 52 deletions docs/basics/dataframe.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,24 +17,42 @@ Let's explore this with some simple examples.

## Example 1: descriptive statistics

Just like in Polars, we can pass expressions to
`DataFrame.select` or `LazyFrame.select`.
Just like in Polars, we can pass expressions to `DataFrame.select` or `LazyFrame.select`.

Make a Python file with the following content:

```python exec="1" source="above" session="df_ex1"
import narwhals as nw
from narwhals.typing import FrameT
=== "from/to_native"
```python exec="1" source="above" session="df_ex1"
import narwhals as nw
from narwhals.typing import IntoFrameT


def func(df: IntoFrameT) -> IntoFrameT:
return (
nw.from_native(df)
.select(
a_sum=nw.col("a").sum(),
a_mean=nw.col("a").mean(),
a_std=nw.col("a").std(),
)
.to_native()
)
```

=== "@narwhalify"
```python exec="1" source="above" session="df_ex1"
import narwhals as nw
from narwhals.typing import FrameT

@nw.narwhalify
def func(df: FrameT) -> FrameT:
return df.select(
a_sum=nw.col("a").sum(),
a_mean=nw.col("a").mean(),
a_std=nw.col("a").std(),
)
```

@nw.narwhalify
def func(df: FrameT) -> FrameT:
return df.select(
a_sum=nw.col("a").sum(),
a_mean=nw.col("a").mean(),
a_std=nw.col("a").std(),
)
```

Let's try it out:

Expand Down Expand Up @@ -70,42 +88,33 @@ Let's try it out:
print(func(table))
```

Alternatively, we could have opted for the more explicit version:

```python
import narwhals as nw
from narwhals.typing import IntoFrameT


def func(df_native: IntoFrameT) -> IntoFrameT:
df = nw.from_native(df_native)
df = df.select(
a_sum=nw.col("a").sum(),
a_mean=nw.col("a").mean(),
a_std=nw.col("a").std(),
)
return nw.to_native(df)
```

Despite being more verbose, it has the advantage of preserving the type annotation of the native
object - see [typing](../api-reference/typing.md) for more details.

In general, in this tutorial, we'll use the former.

## Example 2: group-by and mean

Just like in Polars, we can pass expressions to `GroupBy.agg`.
Make a Python file with the following content:

```python exec="1" source="above" session="df_ex2"
import narwhals as nw
from narwhals.typing import FrameT
=== "from/to_native"
```python exec="1" source="above" session="df_ex2"
import narwhals as nw
from narwhals.typing import IntoFrameT


@nw.narwhalify
def func(df: FrameT) -> FrameT:
return df.group_by("a").agg(nw.col("b").mean()).sort("a")
```
def func(df: IntoFrameT) -> IntoFrameT:
return (
nw.from_native(df).group_by("a").agg(nw.col("b").mean()).sort("a").to_native()
)
```

=== "@narwhalify"
```python exec="1" source="above" session="df_ex2"
import narwhals as nw
from narwhals.typing import FrameT


@nw.narwhalify
def func(df: FrameT) -> FrameT:
return df.group_by("a").agg(nw.col("b").mean()).sort("a")
```

Let's try it out:

Expand Down Expand Up @@ -148,15 +157,30 @@ For example, we can compute a horizontal sum using `nw.sum_horizontal`.

Make a Python file with the following content:

```python exec="1" source="above" session="df_ex3"
import narwhals as nw
from narwhals.typing import FrameT
=== "from/to_native"
```python exec="1" source="above" session="df_ex3"
import narwhals as nw
from narwhals.typing import IntoFrameT


@nw.narwhalify
def func(df: FrameT) -> FrameT:
return df.with_columns(a_plus_b=nw.sum_horizontal("a", "b"))
```
def func(df: IntoFrameT) -> IntoFrameT:
return (
nw.from_native(df)
.with_columns(a_plus_b=nw.sum_horizontal("a", "b"))
.to_native()
)
```

=== "@narwhalify"
```python exec="1" source="above" session="df_ex3"
import narwhals as nw
from narwhals.typing import FrameT


@nw.narwhalify
def func(df: FrameT) -> FrameT:
return df.with_columns(a_plus_b=nw.sum_horizontal("a", "b"))
```

Let's try it out:

Expand Down Expand Up @@ -203,13 +227,12 @@ on a series.
Make a Python file with the following content:

```python exec="1" source="above" session="df_ex4"
from typing import Any

import narwhals as nw
from narwhals.typing import DataFrameT


@nw.narwhalify(eager_only=True)
def func(df: nw.DataFrame[Any], s: nw.Series, col_name: str) -> int:
def func(df: DataFrameT, s: nw.Series, col_name: str) -> int:
return df.filter(nw.col(col_name).is_in(s)).shape[0]
```

Expand Down
Loading

0 comments on commit ea1a64f

Please sign in to comment.