corrected reconciliation bugs

supsi-dacd-isaac · Jul 31, 2024 · 08575d9 · 08575d9
1 parent e83729f
commit 08575d9
Show file tree

Hide file tree

Showing 2 changed files with 19 additions and 15 deletions.
diff --git a/pyforecaster/reconciliation/reconciliation.py b/pyforecaster/reconciliation/reconciliation.py
@@ -127,7 +127,7 @@ def get_quantiles(self, x: pd.DataFrame, level='bottom'):
 
     def get_target_matrix(self, x, y):
         bottom_series = [c for c in x['name'].unique() if c not in self.hierarchy.keys()]
-        y_tr = y.T.reset_index(drop=True).T
+        y_tr = y
         y_tr = pd.concat([pd.concat({k: y_tr.loc[x['name'].isin([k])] for k in self.hierarchy.keys()}, axis=1),
                           pd.concat({k: y_tr.loc[x['name'].isin([k])] for k in bottom_series}, axis=1)],
                          axis=1)
@@ -183,19 +183,20 @@ def fit(self, x:pd.DataFrame, y:pd.DataFrame):
         # fit reconciliation for all the steps ahead, keep errors, pre- and post-reconciliation
         self.steps = y_tr.columns.get_level_values(1).unique()
         errs_hat, errs_tilde = {}, {}
-        for sa in tqdm(self.steps):
+        target_names = y_hat.columns.get_level_values(1).unique()
+        for t_name in tqdm(target_names):
             # ---------------------------- get base predictions, ground truth for sa ---------------------------------------
-            y_hat_sa = y_hat.loc[:, y_hat.columns.get_level_values(1) == sa].droplevel(1, 1)
-            y_sa = y_tr.loc[:, y_tr.columns.get_level_values(1) == sa].droplevel(1, 1)
+            y_hat_sa = y_hat.loc[:, y_hat.columns.get_level_values(1) == t_name].droplevel(1, 1)
+            y_sa = y_tr.loc[:, y_hat.columns.get_level_values(1) == t_name].droplevel(1, 1)
 
             # ---------------------------- fit, predict -------------------------------------------------------------------
             self.fit_reconciliation(y_sa, y_hat_sa, self.hierarchy)
             y_tilde_sa = self.reconcile(y_hat_sa)
 
             # ---------------------------- retrieve error samples from the training set ------------------------------------
 
-            errs_hat[sa] = y_sa - y_hat_sa
-            errs_tilde[sa] = y_sa - y_tilde_sa
+            errs_hat[t_name] = y_sa - y_hat_sa
+            errs_tilde[t_name] = y_sa - y_tilde_sa
 
         self.errs_hat = pd.concat(errs_hat, axis=1).swaplevel(0, 1, axis=1).sort_index(axis=1)
         self.errs_tilde = pd.concat(errs_tilde, axis=1).swaplevel(0, 1, axis=1).sort_index(axis=1)
@@ -209,10 +210,11 @@ def predict(self, x, method='reconciled'):
             return y_hat
 
         y_tilde = {}
-        for sa in tqdm(self.steps):
+        target_names = y_hat.columns.get_level_values(1).unique()
+        for t_name in tqdm(target_names):
             # get reconciled forecasts at this step ahead
-            y_hat_sa = y_hat.loc[:, y_hat.columns.get_level_values(1) == sa].droplevel(1, 1)
-            y_tilde[sa] = self.reconcile(y_hat_sa)
+            y_hat_sa = y_hat.loc[:, y_hat.columns.get_level_values(1) == t_name].droplevel(1, 1)
+            y_tilde[t_name] = self.reconcile(y_hat_sa)
 
         y_tilde = pd.concat(y_tilde, axis=1).swaplevel(0, 1, axis=1).sort_index(axis=1)
 
@@ -241,9 +243,10 @@ def predict_scenarios(self, x, method='reconciled'):
         """
 
         y_hat = self.get_predictions(x, level='all')
+        target_names = y_hat.columns.get_level_values(1).unique()
         if method == 'reconciled':
-            y_tilde = pd.concat({sa: self.reconcile(y_hat.loc[:, y_hat.columns.get_level_values(1) == sa].
-                                                    droplevel(1, 1)) for sa in self.steps}, axis=1)
+            y_tilde = pd.concat({t_name: self.reconcile(y_hat.loc[:, y_hat.columns.get_level_values(1) == t_name].
+                                                    droplevel(1, 1)) for t_name in target_names}, axis=1)
             y_tilde = y_tilde.swaplevel(0, 1, axis=1).sort_index(axis=1)
 
         scens = []
@@ -370,7 +373,8 @@ def get_scenarios(self, errs_tr, y_hat_te, n_scenarios, method='unconditional'):
     def compute_kpis(self, hat, x, y, metric, **metric_kwargs):
         y_mat = self.get_target_matrix(x, y)
         kpi = {}
-        for s in self.steps:
+        target_names = hat.columns.get_level_values(1).unique()
+        for s in target_names:
             hat_s = convert_multiindex_pandas_to_tensor(hat.loc[:, (slice(None), [s], slice(None))].droplevel(1, 1))
             y_mat_s = y_mat.loc[:, (slice(None), [s])].droplevel(1, 1)
             kpi[s] = metric(hat_s, y_mat_s, **metric_kwargs).T

diff --git a/tests/test_formatter.py b/tests/test_formatter.py
@@ -275,12 +275,12 @@ def test_normalizers_complex(self):
         df = pd.DataFrame(np.random.randn(100, 5), index=pd.date_range('01-01-2020', freq='20min', periods=100, tz='Europe/Zurich'), columns=['a', 'b', 'c', 'd', 'e'])
         formatter = pyf.Formatter().add_transform(['a', 'b'], lags=np.arange(1, 5), agg_freq='20min')
         formatter.add_target_transform(['a'], lags=-np.arange(1, 5), agg_freq='20min')
-        formatter.add_target_normalizer(['a'], 'mean', agg_freq='10H', name='a')
-        formatter.add_target_normalizer(['a'], 'std', agg_freq='5H', name='b')
+        formatter.add_target_normalizer(['a'], 'mean', agg_freq='10H', name='a_n')
+        formatter.add_target_normalizer(['a'], 'std', agg_freq='5H', name='b_n')
 
         x, y = formatter.transform(df, time_features=True, holidays=True, prov='ZH')
 
-        formatter.add_normalizing_fun(expr="np.exp(df[t]+df['a']) + df['b']", inv_expr="np.log(df[t]-df['b']) -df['a']")
+        formatter.add_normalizing_fun(expr="np.exp(df[t]+df['a_n']) + df['b_n']", inv_expr="np.log(df[t]-df['b_n']) -df['a_n']")
         x, y_norm = formatter.transform(df, time_features=True, holidays=True, prov='ZH')
         y_unnorm = formatter.denormalize(x, y_norm)