Skip to content

Commit

Permalink
perf: replace loc with getitem in Dask (#1356)
Browse files Browse the repository at this point in the history
* replace loc with getitem

* recover loc for multiple columns
  • Loading branch information
raisadz authored Nov 12, 2024
1 parent dc4ed28 commit 8b1c054
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 10 deletions.
6 changes: 3 additions & 3 deletions narwhals/_dask/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def drop_nulls(self: Self, subset: str | list[str] | None) -> Self:
def schema(self) -> dict[str, DType]:
return {
col: native_to_narwhals_dtype(
self._native_frame.loc[:, col], self._dtypes, self._implementation
self._native_frame[col], self._dtypes, self._implementation
)
for col in self._native_frame.columns
}
Expand Down Expand Up @@ -196,7 +196,7 @@ def unique(
subset = subset or self.columns
token = generate_temporary_column_name(n_bytes=8, columns=subset)
ser = native_frame.groupby(subset).size().rename(token)
ser = ser.loc[ser == 1]
ser = ser[ser == 1]
unique = ser.reset_index().drop(columns=token)
result = native_frame.merge(unique, on=subset, how="inner")
else:
Expand Down Expand Up @@ -272,7 +272,7 @@ def join(
right_on=left_on,
)
return self._from_native_frame(
df.loc[df[indicator_token] == "left_only"].drop(columns=[indicator_token])
df[df[indicator_token] == "left_only"].drop(columns=[indicator_token])
)

if how == "semi":
Expand Down
2 changes: 1 addition & 1 deletion narwhals/_dask/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def from_column_names(
dtypes: DTypes,
) -> Self:
def func(df: DaskLazyFrame) -> list[dask_expr.Series]:
return [df._native_frame.loc[:, column_name] for column_name in column_names]
return [df._native_frame[column_name] for column_name in column_names]

return cls(
func,
Expand Down
10 changes: 5 additions & 5 deletions narwhals/_dask/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def __init__(self, *, backend_version: tuple[int, ...], dtypes: DTypes) -> None:

def all(self) -> DaskExpr:
def func(df: DaskLazyFrame) -> list[dask_expr.Series]:
return [df._native_frame.loc[:, column_name] for column_name in df.columns]
return [df._native_frame[column_name] for column_name in df.columns]

return DaskExpr(
func,
Expand Down Expand Up @@ -76,9 +76,9 @@ def convert_if_dtype(

return DaskExpr(
lambda df: [
df._native_frame.assign(literal=value)
.loc[:, "literal"]
.pipe(convert_if_dtype, dtype)
df._native_frame.assign(literal=value)["literal"].pipe(
convert_if_dtype, dtype
)
],
depth=0,
function_name="lit",
Expand Down Expand Up @@ -126,7 +126,7 @@ def func(df: DaskLazyFrame) -> list[dask_expr.Series]:
npartitions=df._native_frame.npartitions,
)
]
return [df._native_frame.loc[:, df.columns[0]].size.to_series().rename("len")]
return [df._native_frame[df.columns[0]].size.to_series().rename("len")]

# coverage bug? this is definitely hit
return DaskExpr( # pragma: no cover
Expand Down
2 changes: 1 addition & 1 deletion narwhals/_dask/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def parse_exprs_and_named_exprs(
if hasattr(expr, "__narwhals_expr__"):
_results = expr._call(df)
elif isinstance(expr, str):
_results = [df._native_frame.loc[:, expr]]
_results = [df._native_frame[expr]]
else: # pragma: no cover
msg = f"Expected expression or column name, got: {expr}"
raise TypeError(msg)
Expand Down

0 comments on commit 8b1c054

Please sign in to comment.