diff --git a/ibis_ml/steps/__init__.py b/ibis_ml/steps/__init__.py index 61f8156..73c5d72 100644 --- a/ibis_ml/steps/__init__.py +++ b/ibis_ml/steps/__init__.py @@ -6,7 +6,7 @@ from ibis_ml.steps._impute import FillNA, ImputeMean, ImputeMedian, ImputeMode from ibis_ml.steps._select_features import DropZeroVariance from ibis_ml.steps._standardize import ScaleMinMax, ScaleStandard -from ibis_ml.steps._temporal import ExpandDate, ExpandDateTime, ExpandTime +from ibis_ml.steps._temporal import ExpandDate, ExpandTime, ExpandTimestamp __all__ = ( "Cast", @@ -16,8 +16,8 @@ "Drop", "DropZeroVariance", "ExpandDate", - "ExpandDateTime", "ExpandTime", + "ExpandTimestamp", "FillNA", "HandleUnivariateOutliers", "ImputeMean", diff --git a/ibis_ml/steps/_temporal.py b/ibis_ml/steps/_temporal.py index eee700f..83e7a8c 100644 --- a/ibis_ml/steps/_temporal.py +++ b/ibis_ml/steps/_temporal.py @@ -13,8 +13,8 @@ _DOCS_PAGE_NAME = "temporal-feature-extraction" -class ExpandDateTime(Step): - """A step for expanding date and time columns into one or more features. +class ExpandDate(Step): + """A step for expanding date columns into one or more features. New features will be named ``{input_column}_{component}``. For example, if expanding a ``"year"`` component from column ``"x"``, the feature column @@ -23,9 +23,9 @@ class ExpandDateTime(Step): Parameters ---------- inputs - A selection of date and time columns to expand into new features. + A selection of date columns to expand into new features. components - A sequence of date or time components to expand. Options include + A sequence of components to expand. Options include - ``day``: the day of the month as a numeric value - ``week``: the week of the year as a numeric value @@ -33,43 +33,30 @@ class ExpandDateTime(Step): - ``year``: the year as a numeric value - ``dow``: the day of the week as a categorical value - ``doy``: the day of the year as a numeric value - - ``hour``: the hour as a numeric value - - ``minute``: the minute as a numeric value - - ``second``: the second as a numeric value - - ``millisecond``: the millisecond as a numeric value - Defaults to ``["dow", "month", "year", "hour", "minute", "second"]``. + Defaults to ``["dow", "month", "year"]``. Examples -------- >>> import ibis_ml as ml - Expand date and time columns using the default components + Expand date columns using the default components - >>> step = ml.ExpandDateTime(ml.timestamp()) + >>> step = ml.ExpandDate(ml.date()) - Expand specific columns using specific components for date and time + Expand specific columns using specific components - >>> step = ml.ExpandDateTime(["x", "y"], ["day", "year", "hour"]) + >>> step = ml.ExpandDate(["x", "y"], ["day", "year"]) """ def __init__( self, inputs: SelectionType, - components: list[ - Literal[ - "day", - "week", - "month", - "year", - "dow", - "doy", - "hour", - "minute", - "second", - "millisecond", - ] - ] = ("dow", "month", "year", "hour", "minute", "second"), + components: Sequence[Literal["day", "week", "month", "year", "dow", "doy"]] = ( + "dow", + "month", + "year", + ), ): self.inputs = selector(inputs) self.components = list(components) @@ -80,7 +67,6 @@ def _repr(self) -> Iterable[tuple[str, Any]]: def fit_table(self, table: ir.Table, metadata: Metadata) -> None: columns = self.inputs.select_columns(table, metadata) - if "month" in self.components: for col in columns: metadata.set_categories( @@ -114,12 +100,10 @@ def fit_table(self, table: ir.Table, metadata: Metadata) -> None: "Sunday", ], ) - self.columns_ = columns def transform_table(self, table: ir.Table) -> ir.Table: new_cols = [] - for name in self.columns_: col = table[name] for comp in self.components: @@ -135,7 +119,65 @@ def transform_table(self, table: ir.Table) -> ir.Table: feat = col.day_of_week.index() elif comp == "doy": feat = col.day_of_year() - elif comp == "hour": + new_cols.append(feat.name(f"{name}_{comp}")) + return table.mutate(new_cols) + + +class ExpandTime(Step): + """A step for expanding time columns into one or more features. + + New features will be named ``{input_column}_{component}``. For example, if + expanding an ``"hour"`` component from column ``"x"``, the feature column + would be named ``"x_hour"``. + + Parameters + ---------- + inputs + A selection of time columns to expand into new features. + components + A sequence of components to expand. Options include ``hour``, + ``minute``, ``second``, and ``millisecond``. + + Defaults to ``["hour", "minute", "second"]``. + + Examples + -------- + >>> import ibis_ml as ml + + Expand time columns using the default components + + >>> step = ml.ExpandTime(ml.time()) + + Expand specific columns using specific components + + >>> step = ml.ExpandTime(["x", "y"], ["hour", "minute"]) + """ + + def __init__( + self, + inputs: SelectionType, + components: Sequence[Literal["hour", "minute", "second", "millisecond"]] = ( + "hour", + "minute", + "second", + ), + ): + self.inputs = selector(inputs) + self.components = list(components) + + def _repr(self) -> Iterable[tuple[str, Any]]: + yield ("", self.inputs) + yield ("components", self.components) + + def fit_table(self, table: ir.Table, metadata: Metadata) -> None: + self.columns_ = self.inputs.select_columns(table, metadata) + + def transform_table(self, table: ir.Table) -> ir.Table: + new_cols = [] + for name in self.columns_: + col = table[name] + for comp in self.components: + if comp == "hour": feat = col.hour() elif comp == "minute": feat = col.minute() @@ -144,12 +186,11 @@ def transform_table(self, table: ir.Table) -> ir.Table: elif comp == "millisecond": feat = col.millisecond() new_cols.append(feat.name(f"{name}_{comp}")) - return table.mutate(new_cols) -class ExpandDate(Step): - """A step for expanding date columns into one or more features. +class ExpandTimestamp(Step): + """A step for expanding timestamp columns into one or more features. New features will be named ``{input_column}_{component}``. For example, if expanding a ``"year"`` component from column ``"x"``, the feature column @@ -158,9 +199,9 @@ class ExpandDate(Step): Parameters ---------- inputs - A selection of date columns to expand into new features. + A selection of timestamp columns to expand into new features. components - A sequence of components to expand. Options include + A sequence of date or time components to expand. Options include - ``day``: the day of the month as a numeric value - ``week``: the week of the year as a numeric value @@ -168,30 +209,43 @@ class ExpandDate(Step): - ``year``: the year as a numeric value - ``dow``: the day of the week as a categorical value - ``doy``: the day of the year as a numeric value + - ``hour``: the hour as a numeric value + - ``minute``: the minute as a numeric value + - ``second``: the second as a numeric value + - ``millisecond``: the millisecond as a numeric value - Defaults to ``["dow", "month", "year"]``. + Defaults to ``["dow", "month", "year", "hour", "minute", "second"]``. Examples -------- >>> import ibis_ml as ml - Expand date columns using the default components + Expand timestamp columns using the default components - >>> step = ml.ExpandDate(ml.date()) + >>> step = ml.ExpandTimestamp(ml.timestamp()) Expand specific columns using specific components - >>> step = ml.ExpandDate(["x", "y"], ["day", "year"]) + >>> step = ml.ExpandTimestamp(["x", "y"], ["day", "year", "hour"]) """ def __init__( self, inputs: SelectionType, - components: Sequence[Literal["day", "week", "month", "year", "dow", "doy"]] = ( - "dow", - "month", - "year", - ), + components: list[ + Literal[ + "day", + "week", + "month", + "year", + "dow", + "doy", + "hour", + "minute", + "second", + "millisecond", + ] + ] = ("dow", "month", "year", "hour", "minute", "second"), ): self.inputs = selector(inputs) self.components = list(components) @@ -202,6 +256,7 @@ def _repr(self) -> Iterable[tuple[str, Any]]: def fit_table(self, table: ir.Table, metadata: Metadata) -> None: columns = self.inputs.select_columns(table, metadata) + if "month" in self.components: for col in columns: metadata.set_categories( @@ -235,10 +290,12 @@ def fit_table(self, table: ir.Table, metadata: Metadata) -> None: "Sunday", ], ) + self.columns_ = columns def transform_table(self, table: ir.Table) -> ir.Table: new_cols = [] + for name in self.columns_: col = table[name] for comp in self.components: @@ -254,65 +311,7 @@ def transform_table(self, table: ir.Table) -> ir.Table: feat = col.day_of_week.index() elif comp == "doy": feat = col.day_of_year() - new_cols.append(feat.name(f"{name}_{comp}")) - return table.mutate(new_cols) - - -class ExpandTime(Step): - """A step for expanding time columns into one or more features. - - New features will be named ``{input_column}_{component}``. For example, if - expanding an ``"hour"`` component from column ``"x"``, the feature column - would be named ``"x_hour"``. - - Parameters - ---------- - inputs - A selection of time columns to expand into new features. - components - A sequence of components to expand. Options include ``hour``, - ``minute``, ``second``, and ``millisecond``. - - Defaults to ``["hour", "minute", "second"]``. - - Examples - -------- - >>> import ibis_ml as ml - - Expand time columns using the default components - - >>> step = ml.ExpandTime(ml.time()) - - Expand specific columns using specific components - - >>> step = ml.ExpandTime(["x", "y"], ["hour", "minute"]) - """ - - def __init__( - self, - inputs: SelectionType, - components: Sequence[Literal["hour", "minute", "second", "millisecond"]] = ( - "hour", - "minute", - "second", - ), - ): - self.inputs = selector(inputs) - self.components = list(components) - - def _repr(self) -> Iterable[tuple[str, Any]]: - yield ("", self.inputs) - yield ("components", self.components) - - def fit_table(self, table: ir.Table, metadata: Metadata) -> None: - self.columns_ = self.inputs.select_columns(table, metadata) - - def transform_table(self, table: ir.Table) -> ir.Table: - new_cols = [] - for name in self.columns_: - col = table[name] - for comp in self.components: - if comp == "hour": + elif comp == "hour": feat = col.hour() elif comp == "minute": feat = col.minute() @@ -321,4 +320,5 @@ def transform_table(self, table: ir.Table) -> ir.Table: elif comp == "millisecond": feat = col.millisecond() new_cols.append(feat.name(f"{name}_{comp}")) + return table.mutate(new_cols) diff --git a/tests/test_core.py b/tests/test_core.py index 3994f60..c26990c 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -366,10 +366,10 @@ def test_errors_nicely_if_not_fitted(table, method): def test_get_params(): - rec = ml.Recipe(ml.ExpandDateTime(ml.timestamp())) + rec = ml.Recipe(ml.ExpandTimestamp(ml.timestamp())) - assert "expanddatetime__components" in rec.get_params(deep=True) - assert "expanddatetime__components" not in rec.get_params(deep=False) + assert "expandtimestamp__components" in rec.get_params(deep=True) + assert "expandtimestamp__components" not in rec.get_params(deep=False) @pytest.mark.parametrize( diff --git a/tests/test_temporal.py b/tests/test_temporal.py index 141ab74..adf1504 100644 --- a/tests/test_temporal.py +++ b/tests/test_temporal.py @@ -36,9 +36,9 @@ def test_expand_time(): assert res.equals(sol) -def test_expand_datetime(): +def test_expand_timestamp(): t = ibis.table({"y": "timestamp", "z": "int"}) - step = ml.ExpandDateTime( + step = ml.ExpandTimestamp( ml.timestamp(), components=[ "dow",