From a38280096fa0c82d1005ec25df75e77f3305095e Mon Sep 17 00:00:00 2001 From: nepslor Date: Tue, 10 Sep 2024 13:59:22 +0200 Subject: [PATCH] fixed DoscreteDistr forecaster --- pyforecaster/forecasting_models/benchmarks.py | 7 +++-- tests/test_models.py | 30 +++++++++---------- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/pyforecaster/forecasting_models/benchmarks.py b/pyforecaster/forecasting_models/benchmarks.py index 706ede9..6f3b272 100644 --- a/pyforecaster/forecasting_models/benchmarks.py +++ b/pyforecaster/forecasting_models/benchmarks.py @@ -66,17 +66,18 @@ def _predict_quantiles(self, x: pd.DataFrame, **kwargs): class DiscreteDistr(ScenarioGenerator): - def __init__(self, period='1d', n_sa=1, q_vect=None, val_ratio=None, nodes_at_step=None, + def __init__(self, period='1d', q_vect=None, val_ratio=None, nodes_at_step=None, conditional_to_hour=False, **scengen_kwgs): super().__init__(q_vect, nodes_at_step=nodes_at_step, val_ratio=val_ratio, conditional_to_hour=conditional_to_hour, **scengen_kwgs) - self.n_sa = n_sa + self.n_sa = None self.period = period self.target_names = None self.y_distributions = None self.support = None def fit(self, x:pd.DataFrame, y:pd.DataFrame): + self.n_sa = y.shape[1] # infer sampling time sampling_time = pd.infer_freq(x.index) @@ -108,7 +109,7 @@ def fit(self, x:pd.DataFrame, y:pd.DataFrame): return self def predict(self, x, **kwargs): - return np.mean(self.predict_probabilities(x) * self.support.reshape(1, -1), axis=1) + return (self.predict_probabilities(x) * np.tile(self.support.reshape(1, -1), self.n_sa)).groupby(level=0, axis=1).sum() def predict_probabilities(self, x, **kwargs): # infer sampling time diff --git a/tests/test_models.py b/tests/test_models.py index d51824b..b04f591 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -294,36 +294,34 @@ def test_persistence(self): y_plot = pd.concat({'y_{:02d}'.format(i): data_te['all'].shift(-i) for i in range(24)}, axis=1) plot_quantiles([y_plot, y_hat], q_hat, ['y_te', 'y_hat'], n_rows=300) - """ + def test_discrete_distr(self): self.data = self.data.resample('1h').mean() self.data = self.data.round(-1) formatter = Formatter(logger=self.logger, augment=False) formatter.add_target_transform(['all'], lags=-np.arange(1, 24)) x, y = formatter.transform(self.data) - n_tr = 6000 - n_te = 2000 + n_tr = 500 + n_te = 500 x_tr, y_tr, x_te, y_te = x.iloc[:n_tr], y.iloc[:n_tr], x.iloc[n_tr:n_tr+n_te], y.iloc[n_tr:n_tr+n_te] m = DiscreteDistr(target_name='all', q_vect=np.arange(31)/30, nodes_at_step=None, val_ratio=0.8, n_sa=24, period='1d').fit(x_tr, y_tr) y_hat = m.predict(x_te) - y_hat = m.predict_probabilities(x_te) - from pyforecaster.forecaster import ScenarioGenerator - - plt.matshow(ScenarioGenerator().quantiles_to_numpy(y_hat)[0].T) + q_hat = m.predict_probabilities(x_te) - n_taus = len(y_hat.columns.get_level_values(1).unique()) - q_hat = y_hat.values - q_hat = np.reshape(q_hat, (q_hat.shape[0], -1, n_taus)) - plt.matshow(q_hat[0].T) + from pyforecaster.forecaster import ScenarioGenerator + x_bins = np.arange(23) + y_bins = m.support - n_taus = len(y_hat.columns.get_level_values(1).unique()) - q_hat = y_hat.values - q_hat = np.reshape(q_hat, (q_hat.shape[0], n_taus, -1)) - q_hat = np.swapaxes(q_hat, 1, 2) - """ + extent = [x_bins.min(), x_bins.max(), y_bins.min(), y_bins.max()] + for i in range(10): + plt.figure() + plt.imshow(ScenarioGenerator().quantiles_to_numpy(q_hat)[i].T, aspect='auto', extent=extent, origin='lower', cmap='plasma') + plt.plot(y_te.iloc[i, :].values) + plt.plot(y_hat.iloc[i, :].values) + plt.pause(0.1) if __name__ == '__main__': unittest.main()