From e5ea1e4df22282cd840f329ab45366f2c197f9b7 Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Sat, 7 Dec 2024 13:44:08 +0000 Subject: [PATCH] feat: Expose nw.Implementation, along with `Implementation.is_pandas`, `Implementation.is_pandas_like`, and more (#1531) --- docs/api-reference/dataframe.md | 1 + docs/api-reference/lazyframe.md | 1 + docs/api-reference/series.md | 1 + narwhals/__init__.py | 2 + narwhals/dataframe.py | 53 ++++++++++++ narwhals/series.py | 28 +++++++ narwhals/stable/v1/__init__.py | 2 + narwhals/utils.py | 138 ++++++++++++++++++++++++++++++++ tests/implementation_test.py | 25 ++++++ utils/check_api_reference.py | 1 + 10 files changed, 252 insertions(+) create mode 100644 tests/implementation_test.py diff --git a/docs/api-reference/dataframe.md b/docs/api-reference/dataframe.md index 00ff2122e..883fb7897 100644 --- a/docs/api-reference/dataframe.md +++ b/docs/api-reference/dataframe.md @@ -16,6 +16,7 @@ - get_column - group_by - head + - implementation - is_duplicated - is_empty - is_unique diff --git a/docs/api-reference/lazyframe.md b/docs/api-reference/lazyframe.md index a6776e08c..515069d1c 100644 --- a/docs/api-reference/lazyframe.md +++ b/docs/api-reference/lazyframe.md @@ -14,6 +14,7 @@ - gather_every - group_by - head + - implementation - join - join_asof - lazy diff --git a/docs/api-reference/series.md b/docs/api-reference/series.md index 7c7a5ed17..45c71f9c0 100644 --- a/docs/api-reference/series.md +++ b/docs/api-reference/series.md @@ -28,6 +28,7 @@ - filter - gather_every - head + - implementation - is_between - is_duplicated - is_empty diff --git a/narwhals/__init__.py b/narwhals/__init__.py index bf8af9801..08ea8b926 100644 --- a/narwhals/__init__.py +++ b/narwhals/__init__.py @@ -63,6 +63,7 @@ from narwhals.translate import narwhalify from narwhals.translate import to_native from narwhals.translate import to_py_scalar +from narwhals.utils import Implementation from narwhals.utils import generate_temporary_column_name from narwhals.utils import is_ordered_categorical from narwhals.utils import maybe_align_index @@ -86,6 +87,7 @@ "Field", "Float32", "Float64", + "Implementation", "Int8", "Int16", "Int32", diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 2d044710f..c057b7227 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -36,6 +36,7 @@ from narwhals.typing import IntoDataFrame from narwhals.typing import IntoExpr from narwhals.typing import IntoFrame + from narwhals.utils import Implementation FrameT = TypeVar("FrameT", bound="IntoFrame") DataFrameT = TypeVar("DataFrameT", bound="IntoDataFrame") @@ -366,6 +367,33 @@ def __init__( msg = f"Expected an object which implements `__narwhals_dataframe__`, got: {type(df)}" raise AssertionError(msg) + @property + def implementation(self) -> Implementation: + """Return implementation of native frame. + + This can be useful when you need to some special-casing for + some libraries for features outside of Narwhals' scope - for + example, when dealing with pandas' Period Dtype. + + Returns: + Implementation. + + Examples: + >>> import narwhals as nw + >>> import pandas as pd + >>> df_native = pd.DataFrame({"a": [1, 2, 3]}) + >>> df = nw.from_native(df_native) + >>> df.implementation + + >>> df.implementation.is_pandas() + True + >>> df.implementation.is_pandas_like() + True + >>> df.implementation.is_polars() + False + """ + return self._compliant_frame._implementation # type: ignore[no-any-return] + def __len__(self) -> Any: return self._compliant_frame.__len__() @@ -2938,6 +2966,31 @@ def __repr__(self) -> str: # pragma: no cover + "┘" ) + @property + def implementation(self) -> Implementation: + """Return implementation of native frame. + + This can be useful when you need to some special-casing for + some libraries for features outside of Narwhals' scope - for + example, when dealing with pandas' Period Dtype. + + Returns: + Implementation. + + Examples: + >>> import narwhals as nw + >>> import polars as pl + >>> lf_native = pl.LazyFrame({"a": [1, 2, 3]}) + >>> lf = nw.from_native(lf_native) + >>> lf.implementation + + >>> lf.implementation.is_pandas() + False + >>> lf.implementation.is_polars() + True + """ + return self._compliant_frame._implementation # type: ignore[no-any-return] + def __getitem__(self, item: str | slice) -> NoReturn: msg = "Slicing is not supported on LazyFrame" raise TypeError(msg) diff --git a/narwhals/series.py b/narwhals/series.py index 2846aebea..4ed7f048d 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -26,6 +26,7 @@ from narwhals.dataframe import DataFrame from narwhals.dtypes import DType + from narwhals.utils import Implementation class Series(Generic[IntoSeriesT]): @@ -57,6 +58,33 @@ def __init__( msg = f"Expected Polars Series or an object which implements `__narwhals_series__`, got: {type(series)}." raise AssertionError(msg) + @property + def implementation(self) -> Implementation: + """Return implementation of native Series. + + This can be useful when you need to some special-casing for + some libraries for features outside of Narwhals' scope - for + example, when dealing with pandas' Period Dtype. + + Returns: + Implementation. + + Examples: + >>> import narwhals as nw + >>> import pandas as pd + >>> s_native = pd.Series([1, 2, 3]) + >>> s = nw.from_native(s_native, series_only=True) + >>> s.implementation + + >>> s.implementation.is_pandas() + True + >>> s.implementation.is_pandas_like() + True + >>> s.implementation.is_polars() + False + """ + return self._compliant_series._implementation # type: ignore[no-any-return] + def __array__(self: Self, dtype: Any = None, copy: bool | None = None) -> np.ndarray: return self._compliant_series.__array__(dtype=dtype, copy=copy) diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index af1a23d85..e5b84cff8 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -59,6 +59,7 @@ from narwhals.typing import IntoDataFrameT from narwhals.typing import IntoFrameT from narwhals.typing import IntoSeriesT +from narwhals.utils import Implementation from narwhals.utils import Version from narwhals.utils import generate_temporary_column_name from narwhals.utils import is_ordered_categorical @@ -3397,6 +3398,7 @@ def from_numpy( "Field", "Float32", "Float64", + "Implementation", "Int8", "Int16", "Int32", diff --git a/narwhals/utils.py b/narwhals/utils.py index 7e0c142ce..f69eb5661 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -55,15 +55,25 @@ class Version(Enum): class Implementation(Enum): + """Implementation of native object (pandas, Polars, PyArrow, ...).""" + PANDAS = auto() + """Pandas implementation.""" MODIN = auto() + """Modin implementation.""" CUDF = auto() + """cuDF implementation.""" PYARROW = auto() + """PyArrow implementation.""" PYSPARK = auto() + """PySpark implementation.""" POLARS = auto() + """Polars implementation.""" DASK = auto() + """Dask implementation.""" UNKNOWN = auto() + """Unknown implementation.""" @classmethod def from_native_namespace( @@ -105,6 +115,134 @@ def to_native_namespace(self: Self) -> ModuleType: } return mapping[self] # type: ignore[no-any-return] + def is_pandas(self) -> bool: + """Return whether implementation is pandas. + + Returns: + Boolean. + + Examples: + >>> import pandas as pd + >>> import narwhals as nw + >>> df_native = pd.DataFrame({"a": [1, 2, 3]}) + >>> df = nw.from_native(df_native) + >>> df.implementation.is_pandas() + True + """ + return self is Implementation.PANDAS + + def is_pandas_like(self) -> bool: + """Return whether implementation is pandas, Modin, or cuDF. + + Returns: + Boolean. + + Examples: + >>> import pandas as pd + >>> import narwhals as nw + >>> df_native = pd.DataFrame({"a": [1, 2, 3]}) + >>> df = nw.from_native(df_native) + >>> df.implementation.is_pandas_like() + True + """ + return self in {Implementation.PANDAS, Implementation.MODIN, Implementation.CUDF} + + def is_polars(self) -> bool: + """Return whether implementation is Polars. + + Returns: + Boolean. + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) + >>> df = nw.from_native(df_native) + >>> df.implementation.is_polars() + True + """ + return self is Implementation.POLARS + + def is_cudf(self) -> bool: + """Return whether implementation is cuDF. + + Returns: + Boolean. + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) + >>> df = nw.from_native(df_native) + >>> df.implementation.is_cudf() + False + """ + return self is Implementation.CUDF # pragma: no cover + + def is_modin(self) -> bool: + """Return whether implementation is Modin. + + Returns: + Boolean. + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) + >>> df = nw.from_native(df_native) + >>> df.implementation.is_modin() + False + """ + return self is Implementation.MODIN # pragma: no cover + + def is_pyspark(self) -> bool: + """Return whether implementation is PySpark. + + Returns: + Boolean. + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) + >>> df = nw.from_native(df_native) + >>> df.implementation.is_pyspark() + False + """ + return self is Implementation.PYSPARK # pragma: no cover + + def is_pyarrow(self) -> bool: + """Return whether implementation is PyArrow. + + Returns: + Boolean. + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) + >>> df = nw.from_native(df_native) + >>> df.implementation.is_pyarrow() + False + """ + return self is Implementation.PYARROW # pragma: no cover + + def is_dask(self) -> bool: + """Return whether implementation is Dask. + + Returns: + Boolean. + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) + >>> df = nw.from_native(df_native) + >>> df.implementation.is_dask() + False + """ + return self is Implementation.DASK # pragma: no cover + def import_dtypes_module(version: Version) -> DTypes: if version is Version.V1: diff --git a/tests/implementation_test.py b/tests/implementation_test.py new file mode 100644 index 000000000..4a8fa4d0a --- /dev/null +++ b/tests/implementation_test.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +import pandas as pd +import polars as pl + +import narwhals.stable.v1 as nw + + +def test_implementation() -> None: + assert ( + nw.from_native(pd.DataFrame({"a": [1, 2, 3]})).implementation + is nw.Implementation.PANDAS + ) + assert ( + nw.from_native(pd.DataFrame({"a": [1, 2, 3]}))["a"].implementation + is nw.Implementation.PANDAS + ) + assert nw.from_native(pd.DataFrame({"a": [1, 2, 3]})).implementation.is_pandas() + assert nw.from_native(pd.DataFrame({"a": [1, 2, 3]})).implementation.is_pandas_like() + assert not nw.from_native(pl.DataFrame({"a": [1, 2, 3]})).implementation.is_pandas() + assert not nw.from_native(pl.DataFrame({"a": [1, 2, 3]}))[ + "a" + ].implementation.is_pandas() + assert nw.from_native(pl.DataFrame({"a": [1, 2, 3]})).implementation.is_polars() + assert nw.from_native(pl.LazyFrame({"a": [1, 2, 3]})).implementation.is_polars() diff --git a/utils/check_api_reference.py b/utils/check_api_reference.py index aa7e6a405..4d82edbc5 100644 --- a/utils/check_api_reference.py +++ b/utils/check_api_reference.py @@ -15,6 +15,7 @@ EXPR_ONLY_METHODS = {"over", "map_batches"} SERIES_ONLY_METHODS = { "dtype", + "implementation", "is_empty", "is_sorted", "item",