From 8b1c054454622aeb7a6dcfa1d17d6a956d48452d Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Tue, 12 Nov 2024 13:43:36 +0000 Subject: [PATCH] perf: replace loc with getitem in Dask (#1356) * replace loc with getitem * recover loc for multiple columns --- narwhals/_dask/dataframe.py | 6 +++--- narwhals/_dask/expr.py | 2 +- narwhals/_dask/namespace.py | 10 +++++----- narwhals/_dask/utils.py | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/narwhals/_dask/dataframe.py b/narwhals/_dask/dataframe.py index cf5c8eae1..a160e7dfc 100644 --- a/narwhals/_dask/dataframe.py +++ b/narwhals/_dask/dataframe.py @@ -150,7 +150,7 @@ def drop_nulls(self: Self, subset: str | list[str] | None) -> Self: def schema(self) -> dict[str, DType]: return { col: native_to_narwhals_dtype( - self._native_frame.loc[:, col], self._dtypes, self._implementation + self._native_frame[col], self._dtypes, self._implementation ) for col in self._native_frame.columns } @@ -196,7 +196,7 @@ def unique( subset = subset or self.columns token = generate_temporary_column_name(n_bytes=8, columns=subset) ser = native_frame.groupby(subset).size().rename(token) - ser = ser.loc[ser == 1] + ser = ser[ser == 1] unique = ser.reset_index().drop(columns=token) result = native_frame.merge(unique, on=subset, how="inner") else: @@ -272,7 +272,7 @@ def join( right_on=left_on, ) return self._from_native_frame( - df.loc[df[indicator_token] == "left_only"].drop(columns=[indicator_token]) + df[df[indicator_token] == "left_only"].drop(columns=[indicator_token]) ) if how == "semi": diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py index 1f863f206..efbeadf80 100644 --- a/narwhals/_dask/expr.py +++ b/narwhals/_dask/expr.py @@ -67,7 +67,7 @@ def from_column_names( dtypes: DTypes, ) -> Self: def func(df: DaskLazyFrame) -> list[dask_expr.Series]: - return [df._native_frame.loc[:, column_name] for column_name in column_names] + return [df._native_frame[column_name] for column_name in column_names] return cls( func, diff --git a/narwhals/_dask/namespace.py b/narwhals/_dask/namespace.py index a9a9aaa2d..2f1b401f0 100644 --- a/narwhals/_dask/namespace.py +++ b/narwhals/_dask/namespace.py @@ -41,7 +41,7 @@ def __init__(self, *, backend_version: tuple[int, ...], dtypes: DTypes) -> None: def all(self) -> DaskExpr: def func(df: DaskLazyFrame) -> list[dask_expr.Series]: - return [df._native_frame.loc[:, column_name] for column_name in df.columns] + return [df._native_frame[column_name] for column_name in df.columns] return DaskExpr( func, @@ -76,9 +76,9 @@ def convert_if_dtype( return DaskExpr( lambda df: [ - df._native_frame.assign(literal=value) - .loc[:, "literal"] - .pipe(convert_if_dtype, dtype) + df._native_frame.assign(literal=value)["literal"].pipe( + convert_if_dtype, dtype + ) ], depth=0, function_name="lit", @@ -126,7 +126,7 @@ def func(df: DaskLazyFrame) -> list[dask_expr.Series]: npartitions=df._native_frame.npartitions, ) ] - return [df._native_frame.loc[:, df.columns[0]].size.to_series().rename("len")] + return [df._native_frame[df.columns[0]].size.to_series().rename("len")] # coverage bug? this is definitely hit return DaskExpr( # pragma: no cover diff --git a/narwhals/_dask/utils.py b/narwhals/_dask/utils.py index cf8f9a3fc..913beb193 100644 --- a/narwhals/_dask/utils.py +++ b/narwhals/_dask/utils.py @@ -43,7 +43,7 @@ def parse_exprs_and_named_exprs( if hasattr(expr, "__narwhals_expr__"): _results = expr._call(df) elif isinstance(expr, str): - _results = [df._native_frame.loc[:, expr]] + _results = [df._native_frame[expr]] else: # pragma: no cover msg = f"Expected expression or column name, got: {expr}" raise TypeError(msg)