added multihead to FFNN, changed output of fast adaptive models and H…

…oltWinters to pd.DataFrame
supsi-dacd-isaac · Sep 23, 2024 · f7291f1 · f7291f1
1 parent 2504cc5
commit f7291f1
Show file tree

Hide file tree

Showing 5 changed files with 54 additions and 22 deletions.
diff --git a/pyforecaster/forecasting_models/fast_adaptive_models.py b/pyforecaster/forecasting_models/fast_adaptive_models.py
@@ -192,7 +192,7 @@ def store_basis(self):
     def predict(self, x_pd, **kwargs):
         x = x_pd.values
         y = x_pd[self.target_name].values
-        return self.run(x, y, start_from=0, fit=False)
+        return pd.DataFrame(self.run(x, y, start_from=0, fit=False), index=x_pd.index, columns=[f'{self.target_name}_t+{i}' for i in range(1, self.n_sa+1)])
 
 
     def run(self, x, y, return_coeffs=False, start_from=0, fit=True):
@@ -360,7 +360,7 @@ def store_basis(self):
     def predict(self, x_pd, **kwargs):
         x = x_pd.values
         y = x_pd[self.target_name].values
-        return self.run(x, y, start_from=0, fit=False)
+        return pd.DataFrame(self.run(x, y, start_from=0, fit=False), index=x_pd.index, columns=[f'{self.target_name}_t+{i}' for i in range(1, self.n_sa+1)])
 
 
     def run(self, x, y, return_coeffs=False, start_from=0, fit=True):
@@ -508,7 +508,7 @@ def fit(self, x_pd, y_pd=None, **kwargs):
         return self
 
     def predict(self, x_pd, **kwargs):
-        return self.run(x_pd, fit=False, return_coeffs=True)[0]
+        return pd.DataFrame(self.run(x_pd, fit=False, return_coeffs=True)[0], index=x_pd.index, columns=[f'{self.target_name}_t+{i}' for i in range(1, self.n_sa+1)])
 
 
     def run(self, x_pd, return_coeffs=True, fit=True):

diff --git a/pyforecaster/forecasting_models/holtwinters.py b/pyforecaster/forecasting_models/holtwinters.py
@@ -121,7 +121,7 @@ def score_autoregressive(model, x, tr_ratio=0.7, target_name=None, n_sa=1):
     model.fit(x_tr, x_tr)
     y_hat = model.predict(x_te)
 
-    return np.mean((y_hat[:len(target), :] - target) ** 2)
+    return np.mean((y_hat.values[:len(target), :] - target) ** 2)
 
 class HoltWinters(ScenarioGenerator):
     def __init__(self, periods, target_name, targets_names=None, q_vect=None, val_ratio=None, nodes_at_step=None, optimization_budget=800, n_sa=1, constraints=None,
@@ -230,7 +230,7 @@ def predict(self, x_pd, **kwargs):
 
         self.y_hat_te = y_hat
 
-        return y_hat
+        return pd.DataFrame(y_hat, index=x_pd.index, columns=self.target_cols)
 
     def _predict_quantiles(self, x, **kwargs):
         preds = self.predict(x)
@@ -441,9 +441,9 @@ def predict(self, x,  **kwargs):
         for i,m in enumerate(self.models):
             y_hat_m = m.predict(x)
             selection = np.arange(k, y_hat_m.shape[1])
-            y_hat[:, selection] = y_hat_m[:, selection]
+            y_hat[:, selection] = y_hat_m.iloc[:, selection]
             k = y_hat_m.shape[1]
-        return y_hat
+        return pd.DataFrame(y_hat, index=x.index, columns=['{}_{}'.format(self.target_name, t) for t in np.arange(self.n_sa)])
 
     def reinit(self, x):
         for i,m in enumerate(self.models):

diff --git a/pyforecaster/forecasting_models/neural_models/base_nn.py b/pyforecaster/forecasting_models/neural_models/base_nn.py
@@ -88,6 +88,7 @@ class FeedForwardModule(nn.Module):
     n_layers: Union[int, np.array, list]
     n_out: int=None
     n_neurons: int=None
+    split_heads: bool = False
     @nn.compact
     def __call__(self, x):
         if isinstance(self.n_layers, int):
@@ -97,9 +98,31 @@ def __call__(self, x):
         else:
             layers = self.n_layers
         for i, n in enumerate(layers):
-            x = nn.Dense(features=n, name='dense_{}'.format(i))(x)
+
             if i < len(layers)-1:
+                x = nn.Dense(features=n, name='dense_{}'.format(i))(x)
                 x = nn.relu(x)
+            else:
+                if self.split_heads:
+                    n_out = self.n_out if self.n_out is not None else layers[-1]
+                    # split into n_out heads to predict the output independently
+                    subnets = [nn.relu(nn.Dense(features=layers[np.maximum(-2, -len(layers))], name='subnet_in_{}'.format(k))(x)) for k in range(n_out)]
+                    out = [nn.Dense(features=1, name='subnet_out_{}'.format(k))(subnets[k]) for k in range(n_out)]
+                    x = jnp.hstack(out)
+                    """
+                    # Combine the outputs in a single dense layer
+                    n_last = layers[np.maximum(-2, -len(layers))]
+                    x = nn.Dense(features=self.n_out * n_last, name='combined_dense')(x)
+                    x = nn.relu(x)
+                    # Reshape into separate heads
+                    x = x.reshape((-1, self.n_out, n_last))
+                    x = nn.relu(x)
+                    # Final layer to predict the output for each head
+                    x = nn.Dense(features=1, name='final_out')(x).squeeze(-1)
+                    """
+                else:
+                    x = nn.Dense(features=n, name='dense_{}'.format(i))(x)
+
         return x
 
 class NN(ScenarioGenerator):
@@ -384,3 +407,12 @@ def __init__(self, n_out=None, q_vect=None, n_epochs=10, val_ratio=None, nodes_a
                        scengen_dict={}, batch_size=None, **model_kwargs):
         super().__init__(n_out=n_out, q_vect=q_vect, n_epochs=n_epochs, val_ratio=val_ratio, nodes_at_step=nodes_at_step, learning_rate=learning_rate,
                  nn_module=FeedForwardModule, scengen_dict=scengen_dict, batch_size=batch_size,  **model_kwargs)
+
+    def set_arch(self):
+        self.optimizer = optax.adamw(learning_rate=self.learning_rate)
+        self.model = FeedForwardModule(n_layers=self.n_layers, n_neurons=self.n_hidden_x,
+                              n_out=self.n_out, split_heads=True)
+        self.predict_batch = vmap(jitting_wrapper(predict_batch, self.model), in_axes=(None, 0))
+        self.loss_fn = jitting_wrapper(probabilistic_loss_fn, self.predict_batch) if self.probabilistic else (
+            jitting_wrapper(loss_fn, self.predict_batch))
+        self.train_step = jitting_wrapper(partial(train_step, loss_fn=self.loss_fn), self.optimizer)
diff --git a/tests/test_models.py b/tests/test_models.py
@@ -48,7 +48,7 @@ def test_hw(self):
         #hw.reinit(x_tr['target'])
         y_hat = hw.predict(pd.concat([x_te,y_te], axis=1))
 
-        ts_animation([y_hat], names=['y_hat', 'target'], target=y_te.values, frames=100, repeat=False)
+        ts_animation([y_hat.values], names=['y_hat', 'target'], target=y_te.values, frames=100, repeat=False)
 
     def test_fast_linreg(self):
 
@@ -120,7 +120,7 @@ def test_hw_difficult(self):
                          target_name='target', models_periods=np.array([1,2,3,5, 10, 24]), constraints=[0, np.inf]).fit(y_tr,y_tr)
         y_hat_multi = hw_multi.predict(y_te)
 
-        ts_animation([y_hat, y_hat_multi], names=['y_hat', 'y_hat_multi', 'target'], target=y_te.values, frames=100, repeat=False)
+        ts_animation([y_hat.values, y_hat_multi.values], names=['y_hat', 'y_hat_multi', 'target'], target=y_te.values, frames=100, repeat=False)
 
 
     def test_hw_multi(self):
@@ -155,7 +155,7 @@ def test_hw_multi(self):
         y_hat_fks_multi = fks_multi.predict(df_te)
         y_hat_fks_multi_q = fks_multi.predict_quantiles(df_te)
 
-        ys = [y_hat, y_hat_multi, y_hat_fes, y_hat_fks, y_hat_fks_multi]
+        ys = [y_hat.values, y_hat_multi.values, y_hat_fes.values, y_hat_fks.values, y_hat_fks_multi.values]
         ts_animation(ys, target = df_te['all'].values, names = ['hw', 'hw_multi', 'fes', 'fks', 'fks_multi', 'target'], frames = 120, interval = 1, step = 1, repeat = False)
 
     def test_linear_val_split(self):

diff --git a/tests/test_nns.py b/tests/test_nns.py
@@ -497,24 +497,24 @@ def test_invertible_causal_nn(self):
             ax.plot(y_invert.iloc[i, 144:].values, linestyle='--')
             plt.pause(1e-6)
         """
-        m = FFNN(n_layers=1, learning_rate=1e-3, batch_size=100, load_path=None, n_out=120, rel_tol=-1, stopping_rounds=20).fit(e_tr.iloc[:, :184], e_tr.iloc[:, -120:])
-        y_hat = m.predict(e_te.iloc[:, :184])
+        m = FFNN(n_hidden_x=50, n_layers=1, learning_rate=1e-3, batch_size=100, load_path=None, n_out=143, rel_tol=-1, stopping_rounds=20,n_epochs=1).fit(e_tr.iloc[:, :145], e_tr.iloc[:, -143:])
+        y_hat = m.predict(e_te.iloc[:, :145])
 
-        m = CausalInvertibleNN(learning_rate=1e-2, batch_size=200, load_path=None, n_in=184,
-                               n_layers=3, normalize_target=False, n_epochs=1, stopping_rounds=20, rel_tol=-1,
-                               end_to_end='full', n_hidden_y=300, n_prediction_layers=3, n_out=120,names_exogenous=['all_lag_000']).fit(e_tr.iloc[:, :184], e_tr.iloc[:, -120:])
+        m = CausalInvertibleNN(learning_rate=1e-2, batch_size=300, load_path=None, n_in=145,
+                               n_layers=2, normalize_target=False, n_epochs=5, stopping_rounds=30, rel_tol=-1,
+                               end_to_end='full', n_hidden_y=300, n_prediction_layers=3, n_out=143,names_exogenous=['all_lag_000']).fit(e_tr.iloc[:, :145], e_tr.iloc[:, -143:])
 
-        z_hat_ete = m.predict(e_te.iloc[:, :184])
+        z_hat_ete = m.predict(e_te.iloc[:, :145])
 
-        np.mean((z_hat_ete.values- e_te.iloc[:, -120:].values)**2)
-        np.mean((y_hat.values- e_te.iloc[:, -120:].values)**2)
-        np.mean((y_hat_lin.values- e_te.iloc[:, -120:].values)**2)
+        np.mean((z_hat_ete.values- e_te.iloc[:, -143:].values)**2)
+        np.mean((y_hat.values- e_te.iloc[:, -143:].values)**2)
+        np.mean((y_hat_lin.values- e_te.iloc[:, -143:].values)**2)
 
         fig, ax = plt.subplots(1, 1, figsize=(4, 3))
         for i in range(100):
-            if i%10 == 0:
+            if i%5 == 0:
                 plt.cla()
-                ax.plot(e_te.iloc[i, -120:].values)
+                ax.plot(e_te.iloc[i, -143:].values)
                 ax.plot(y_hat_lin.iloc[i, :].values, linewidth=1)
                 ax.plot(z_hat_ete.iloc[i, :].values, linestyle='--')
                 plt.pause(1e-6)