Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow typecasting selected value arrays in PandasValueSelector #32

Merged
merged 2 commits into from
Nov 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 16 additions & 3 deletions tests/test_preprocessing/test_pandas_feature_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
import timeserio.ini as ini
from timeserio.data.mock import mock_fit_data
from timeserio.preprocessing import (
PandasColumnSelector, PandasValueSelector,
PandasIndexValueSelector, PandasSequenceSplitter
PandasColumnSelector, PandasValueSelector, PandasIndexValueSelector,
PandasSequenceSplitter
)


datetime_column = ini.Columns.datetime
usage_column = ini.Columns.target
id_column = ini.Columns.id


@pytest.fixture
Expand Down Expand Up @@ -66,6 +66,12 @@ def test_value_selector(df, columns, shape1):
assert subarray.shape == expected_shape


@pytest.mark.parametrize("dtype", ["uint8", "int8"])
def test_value_selector_dtype(df, dtype):
subarray = PandasValueSelector(columns="id", dtype=dtype).transform(df)
assert subarray.dtype == dtype


@pytest.mark.parametrize(
'levels, shape1', [
(None, 0),
Expand All @@ -83,6 +89,13 @@ def test_index_value_selector(indexed_df, levels, shape1):
assert subarray.shape == expected_shape


@pytest.mark.parametrize("dtype", ["uint8", "int8"])
def test_index_value_selector_dtype(indexed_df, dtype):
subarray = PandasIndexValueSelector(levels="id",
dtype=dtype).transform(indexed_df)
assert subarray.dtype == dtype


@pytest.mark.parametrize(
'transformer, required_columns', [
(PandasColumnSelector('col1'), {'col1'}),
Expand Down
20 changes: 16 additions & 4 deletions timeserio/preprocessing/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,14 @@ def _get_column_as_tensor(s: pd.Series):


class PandasValueSelector(BaseEstimator, TransformerMixin):
"""Select scalar - or vector-valued feature cols, and return np.array."""
"""Select scalar - or vector-valued feature cols, and return np.array.

def __init__(self, columns=None):
Optionally, cast the resulting arry to dtype.
"""

def __init__(self, columns=None, dtype=None):
self.columns = columns
self.dtype = dtype

def fit(self, df, y=None, **fit_params):
return self
Expand All @@ -98,6 +102,8 @@ def transform(self, df):
else: # support a mix of compatible tensors and regular columns
blocks = [_get_column_as_tensor(df[col]) for col in columns]
subarray = np.hstack(blocks)
if self.dtype:
subarray = subarray.astype(self.dtype)
return subarray

@property
Expand All @@ -112,10 +118,14 @@ def transformed_columns(self, input_columns):


class PandasIndexValueSelector(BaseEstimator, TransformerMixin):
"""Select index levels as feature cols, and return np.array."""
"""Select index levels as feature cols, and return np.array.

Optionally, cast the resulting arry to dtype.
"""

def __init__(self, levels=None):
def __init__(self, levels=None, dtype=None):
self.levels = levels
self.dtype = dtype

def fit(self, df, y=None, **fit_params):
return self
Expand All @@ -133,6 +143,8 @@ def transform(self, df):
for level in levels
]
subarray = np.hstack(blocks) if blocks else np.empty((len(df), 0))
if self.dtype:
subarray = subarray.astype(self.dtype)
return subarray


Expand Down