From a38280096fa0c82d1005ec25df75e77f3305095e Mon Sep 17 00:00:00 2001
From: nepslor <nepslor@gmail.com>
Date: Tue, 10 Sep 2024 13:59:22 +0200
Subject: [PATCH] fixed DoscreteDistr forecaster

---
 pyforecaster/forecasting_models/benchmarks.py |  7 +++--
 tests/test_models.py                          | 30 +++++++++----------
 2 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/pyforecaster/forecasting_models/benchmarks.py b/pyforecaster/forecasting_models/benchmarks.py
index 706ede9..6f3b272 100644
--- a/pyforecaster/forecasting_models/benchmarks.py
+++ b/pyforecaster/forecasting_models/benchmarks.py
@@ -66,17 +66,18 @@ def _predict_quantiles(self, x: pd.DataFrame, **kwargs):
 
 
 class DiscreteDistr(ScenarioGenerator):
-    def __init__(self, period='1d', n_sa=1, q_vect=None, val_ratio=None, nodes_at_step=None,
+    def __init__(self, period='1d', q_vect=None, val_ratio=None, nodes_at_step=None,
                  conditional_to_hour=False, **scengen_kwgs):
         super().__init__(q_vect, nodes_at_step=nodes_at_step, val_ratio=val_ratio,
                          conditional_to_hour=conditional_to_hour, **scengen_kwgs)
-        self.n_sa = n_sa
+        self.n_sa = None
         self.period = period
         self.target_names = None
         self.y_distributions = None
         self.support = None
 
     def fit(self, x:pd.DataFrame, y:pd.DataFrame):
+        self.n_sa = y.shape[1]
         # infer sampling time
         sampling_time = pd.infer_freq(x.index)
 
@@ -108,7 +109,7 @@ def fit(self, x:pd.DataFrame, y:pd.DataFrame):
         return self
 
     def predict(self, x, **kwargs):
-        return np.mean(self.predict_probabilities(x) * self.support.reshape(1, -1), axis=1)
+        return (self.predict_probabilities(x) * np.tile(self.support.reshape(1, -1), self.n_sa)).groupby(level=0, axis=1).sum()
 
     def predict_probabilities(self, x, **kwargs):
         # infer sampling time
diff --git a/tests/test_models.py b/tests/test_models.py
index d51824b..b04f591 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -294,36 +294,34 @@ def test_persistence(self):
         y_plot = pd.concat({'y_{:02d}'.format(i): data_te['all'].shift(-i) for i in range(24)}, axis=1)
         plot_quantiles([y_plot, y_hat], q_hat, ['y_te', 'y_hat'], n_rows=300)
 
-    """
+
     def test_discrete_distr(self):
         self.data = self.data.resample('1h').mean()
         self.data = self.data.round(-1)
         formatter = Formatter(logger=self.logger, augment=False)
         formatter.add_target_transform(['all'], lags=-np.arange(1, 24))
         x, y = formatter.transform(self.data)
-        n_tr = 6000
-        n_te = 2000
+        n_tr = 500
+        n_te = 500
         x_tr, y_tr, x_te, y_te = x.iloc[:n_tr], y.iloc[:n_tr], x.iloc[n_tr:n_tr+n_te], y.iloc[n_tr:n_tr+n_te]
 
         m = DiscreteDistr(target_name='all', q_vect=np.arange(31)/30, nodes_at_step=None, val_ratio=0.8, n_sa=24,
                                     period='1d').fit(x_tr, y_tr)
         y_hat = m.predict(x_te)
-        y_hat = m.predict_probabilities(x_te)
-        from pyforecaster.forecaster import ScenarioGenerator
-
-        plt.matshow(ScenarioGenerator().quantiles_to_numpy(y_hat)[0].T)
+        q_hat = m.predict_probabilities(x_te)
 
-        n_taus = len(y_hat.columns.get_level_values(1).unique())
-        q_hat = y_hat.values
-        q_hat = np.reshape(q_hat, (q_hat.shape[0], -1, n_taus))
-        plt.matshow(q_hat[0].T)
+        from pyforecaster.forecaster import ScenarioGenerator
 
+        x_bins = np.arange(23)
+        y_bins = m.support
 
-        n_taus = len(y_hat.columns.get_level_values(1).unique())
-        q_hat = y_hat.values
-        q_hat = np.reshape(q_hat, (q_hat.shape[0], n_taus, -1))
-        q_hat = np.swapaxes(q_hat, 1, 2)
-    """
+        extent = [x_bins.min(), x_bins.max(), y_bins.min(), y_bins.max()]
+        for i in range(10):
+            plt.figure()
+            plt.imshow(ScenarioGenerator().quantiles_to_numpy(q_hat)[i].T, aspect='auto', extent=extent, origin='lower', cmap='plasma')
+            plt.plot(y_te.iloc[i, :].values)
+            plt.plot(y_hat.iloc[i, :].values)
+            plt.pause(0.1)
 
 if __name__ == '__main__':
     unittest.main()