Skip to content

Commit

Permalink
changed target normalizer from lambda (unpicklable functions) to eval
Browse files Browse the repository at this point in the history
  • Loading branch information
nepslor committed Jul 30, 2024
1 parent 3aa91e2 commit 9e1669f
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 8 deletions.
6 changes: 5 additions & 1 deletion pyforecaster/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,11 +257,15 @@ def normalize(self, x, y, normalizing_fun=None):
# find df_n columns to normalize
nr_columns = (tr.metadata['name'].isin([target_to_norm])).index
for c in nr_columns:
df_n[c] = normalizing_fun(df_n, c)
df_n[c] = self.normalizing_wrapper(normalizing_fun, df_n, c)

df_n = df_n[[c for c in y.columns]]
return df_n

def normalizing_wrapper(self, normalizing_fun, df, t):
return eval(normalizing_fun)


def _simulate_transform(self, x=None):
"""
This won't actually modify the dataframe, it will just populate the metqdata property of each transformer
Expand Down
15 changes: 8 additions & 7 deletions tests/test_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,14 +257,15 @@ def test_normalizers(self):
df = pd.DataFrame(np.random.randn(100, 5), index=pd.date_range('01-01-2020', freq='20min', periods=100, tz='Europe/Zurich'), columns=['a', 'b', 'c', 'd', 'e'])
formatter = pyf.Formatter().add_transform(['a', 'b'], lags=np.arange(1, 5), agg_freq='20min')
formatter.add_target_transform(['a'], lags=-np.arange(1, 5), agg_freq='20min')
formatter.add_target_normalizer(['a'], 'mean', agg_freq='10H', name='a')
formatter.add_target_normalizer(['a'], 'std', agg_freq='10H', name='b')
formatter.add_target_normalizer(['a'], 'mean', agg_freq='10H', name='a_movingavg')
formatter.add_target_normalizer(['a'], 'std', agg_freq='10H', name='a_movingstd')
x, y = formatter.transform(df, time_features=True, holidays=True, prov='ZH')

formatter.add_normalizing_fun(lambda df, t: (df[t] - df['a'])/(df['b']+1))
#formatter.add_normalizing_fun(lambda df, t: (df[t] - df['a_movingavg'])/(df['a_movingstd']+1))
formatter.add_normalizing_fun("(df[t] - df['a_movingavg']) / (df['a_movingstd'] + 1)")
x, y_norm = formatter.transform(df, time_features=True, holidays=True, prov='ZH')

y_unnorm = formatter.normalize(x, y_norm , normalizing_fun=lambda df, t: df[t]*(df['b']+1) + df['a'])
y_unnorm = formatter.normalize(x, y_norm , normalizing_fun="df[t]*(df['a_movingstd']+1) + df['a_movingavg']")

# check if back-transform works
assert (y_unnorm-y).sum().sum() < 1e-6
Expand All @@ -279,9 +280,9 @@ def test_normalizers_complex(self):

x, y = formatter.transform(df, time_features=True, holidays=True, prov='ZH')

formatter.add_normalizing_fun(lambda df, t: np.exp(df[t]+df['a']) + df['b'])
formatter.add_normalizing_fun("np.exp(df[t]+df['a']) + df['b']")
x, y_norm = formatter.transform(df, time_features=True, holidays=True, prov='ZH')
y_unnorm = formatter.normalize(x, y_norm , normalizing_fun= lambda df, t: np.log(df[t]-df["b"]) -df["a"])
y_unnorm = formatter.normalize(x, y_norm , normalizing_fun= "np.log(df[t]-df['b']) -df['a']")

# check if back-transform works
assert (y_unnorm-y).sum().sum() < 1e-6
Expand All @@ -308,7 +309,7 @@ def test_normalizers_impossible(self):
formatter.add_target_normalizer(['target'], 'std', agg_freq='5H', name='std')

x, y = formatter.transform(df_mi, time_features=True, holidays=True, prov='ZH',global_form=True)
formatter.add_normalizing_fun(lambda df, t: (df[t] - df['mean'])/(df['std']+1))
formatter.add_normalizing_fun("(df[t] - df['mean'])/(df['std']+1)")
x, y_norm = formatter.transform(df_mi, time_features=True, holidays=True, prov='ZH',global_form=True)

xs = formatter.global_form_preprocess(df_mi)
Expand Down
19 changes: 19 additions & 0 deletions tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import logging
from pyforecaster.forecasting_models.holtwinters import HoltWinters, HoltWintersMulti
from pyforecaster.forecasting_models.fast_adaptive_models import Fourier_es, FK, FK_multi
from pyforecaster.forecasting_models.random_fourier_features import RFFRegression
from pyforecaster.forecasting_models.randomforests import QRF
from pyforecaster.forecaster import LinearForecaster, LGBForecaster
from pyforecaster.plot_utils import plot_quantiles
Expand Down Expand Up @@ -183,5 +184,23 @@ def test_qrf(self):
y_hat = qrf.predict(x_te.iloc[[0], :])
q = qrf.predict(x_te.iloc[[0], :])

def test_rffr(self):
formatter = Formatter(logger=self.logger).add_transform(['all'], lags=np.arange(144),
relative_lags=True)
formatter.add_target_transform(['all'], lags=-np.arange(144))

x, y = formatter.transform(self.data.iloc[:10000])
x.columns = x.columns.astype(str)
y.columns = y.columns.astype(str)
n_tr = int(len(x) * 0.95)
x_tr, x_te, y_tr, y_te = [x.iloc[:n_tr, :].copy(), x.iloc[n_tr:, :].copy(), y.iloc[:n_tr].copy(),
y.iloc[n_tr:].copy()]
m = RFFRegression(std_kernel=0.001, dim_kernel=30).fit(x_tr, y_tr)
y_hat = m.predict(x_te)
q = m.predict_quantiles(x_te)
plot_quantiles([y_te, pd.DataFrame(y_hat,index=y_te.index)], q, ['y_te', 'y_hat', 'y_hat_qrf'], n_rows=600, repeat=False)



if __name__ == '__main__':
unittest.main()

0 comments on commit 9e1669f

Please sign in to comment.