Skip to content

Commit

Permalink
Merge pull request #32 from ig248/dtypes
Browse files Browse the repository at this point in the history
Allow typecasting selected value arrays in PandasValueSelector
  • Loading branch information
Ali Teeney authored Nov 9, 2020
2 parents 06f9f7f + 0c983a7 commit 55c2105
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 7 deletions.
19 changes: 16 additions & 3 deletions tests/test_preprocessing/test_pandas_feature_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
import timeserio.ini as ini
from timeserio.data.mock import mock_fit_data
from timeserio.preprocessing import (
PandasColumnSelector, PandasValueSelector,
PandasIndexValueSelector, PandasSequenceSplitter
PandasColumnSelector, PandasValueSelector, PandasIndexValueSelector,
PandasSequenceSplitter
)


datetime_column = ini.Columns.datetime
usage_column = ini.Columns.target
id_column = ini.Columns.id


@pytest.fixture
Expand Down Expand Up @@ -66,6 +66,12 @@ def test_value_selector(df, columns, shape1):
assert subarray.shape == expected_shape


@pytest.mark.parametrize("dtype", ["uint8", "int8"])
def test_value_selector_dtype(df, dtype):
subarray = PandasValueSelector(columns="id", dtype=dtype).transform(df)
assert subarray.dtype == dtype


@pytest.mark.parametrize(
'levels, shape1', [
(None, 0),
Expand All @@ -83,6 +89,13 @@ def test_index_value_selector(indexed_df, levels, shape1):
assert subarray.shape == expected_shape


@pytest.mark.parametrize("dtype", ["uint8", "int8"])
def test_index_value_selector_dtype(indexed_df, dtype):
subarray = PandasIndexValueSelector(levels="id",
dtype=dtype).transform(indexed_df)
assert subarray.dtype == dtype


@pytest.mark.parametrize(
'transformer, required_columns', [
(PandasColumnSelector('col1'), {'col1'}),
Expand Down
20 changes: 16 additions & 4 deletions timeserio/preprocessing/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,14 @@ def _get_column_as_tensor(s: pd.Series):


class PandasValueSelector(BaseEstimator, TransformerMixin):
"""Select scalar - or vector-valued feature cols, and return np.array."""
"""Select scalar - or vector-valued feature cols, and return np.array.
def __init__(self, columns=None):
Optionally, cast the resulting arry to dtype.
"""

def __init__(self, columns=None, dtype=None):
self.columns = columns
self.dtype = dtype

def fit(self, df, y=None, **fit_params):
return self
Expand All @@ -98,6 +102,8 @@ def transform(self, df):
else: # support a mix of compatible tensors and regular columns
blocks = [_get_column_as_tensor(df[col]) for col in columns]
subarray = np.hstack(blocks)
if self.dtype:
subarray = subarray.astype(self.dtype)
return subarray

@property
Expand All @@ -112,10 +118,14 @@ def transformed_columns(self, input_columns):


class PandasIndexValueSelector(BaseEstimator, TransformerMixin):
"""Select index levels as feature cols, and return np.array."""
"""Select index levels as feature cols, and return np.array.
Optionally, cast the resulting arry to dtype.
"""

def __init__(self, levels=None):
def __init__(self, levels=None, dtype=None):
self.levels = levels
self.dtype = dtype

def fit(self, df, y=None, **fit_params):
return self
Expand All @@ -133,6 +143,8 @@ def transform(self, df):
for level in levels
]
subarray = np.hstack(blocks) if blocks else np.empty((len(df), 0))
if self.dtype:
subarray = subarray.astype(self.dtype)
return subarray


Expand Down

0 comments on commit 55c2105

Please sign in to comment.