Skip to content

Commit

Permalink
corrected reconciliation bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
nepslor committed Jul 31, 2024
1 parent e83729f commit 08575d9
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 15 deletions.
28 changes: 16 additions & 12 deletions pyforecaster/reconciliation/reconciliation.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def get_quantiles(self, x: pd.DataFrame, level='bottom'):

def get_target_matrix(self, x, y):
bottom_series = [c for c in x['name'].unique() if c not in self.hierarchy.keys()]
y_tr = y.T.reset_index(drop=True).T
y_tr = y
y_tr = pd.concat([pd.concat({k: y_tr.loc[x['name'].isin([k])] for k in self.hierarchy.keys()}, axis=1),
pd.concat({k: y_tr.loc[x['name'].isin([k])] for k in bottom_series}, axis=1)],
axis=1)
Expand Down Expand Up @@ -183,19 +183,20 @@ def fit(self, x:pd.DataFrame, y:pd.DataFrame):
# fit reconciliation for all the steps ahead, keep errors, pre- and post-reconciliation
self.steps = y_tr.columns.get_level_values(1).unique()
errs_hat, errs_tilde = {}, {}
for sa in tqdm(self.steps):
target_names = y_hat.columns.get_level_values(1).unique()
for t_name in tqdm(target_names):
# ---------------------------- get base predictions, ground truth for sa ---------------------------------------
y_hat_sa = y_hat.loc[:, y_hat.columns.get_level_values(1) == sa].droplevel(1, 1)
y_sa = y_tr.loc[:, y_tr.columns.get_level_values(1) == sa].droplevel(1, 1)
y_hat_sa = y_hat.loc[:, y_hat.columns.get_level_values(1) == t_name].droplevel(1, 1)
y_sa = y_tr.loc[:, y_hat.columns.get_level_values(1) == t_name].droplevel(1, 1)

# ---------------------------- fit, predict -------------------------------------------------------------------
self.fit_reconciliation(y_sa, y_hat_sa, self.hierarchy)
y_tilde_sa = self.reconcile(y_hat_sa)

# ---------------------------- retrieve error samples from the training set ------------------------------------

errs_hat[sa] = y_sa - y_hat_sa
errs_tilde[sa] = y_sa - y_tilde_sa
errs_hat[t_name] = y_sa - y_hat_sa
errs_tilde[t_name] = y_sa - y_tilde_sa

self.errs_hat = pd.concat(errs_hat, axis=1).swaplevel(0, 1, axis=1).sort_index(axis=1)
self.errs_tilde = pd.concat(errs_tilde, axis=1).swaplevel(0, 1, axis=1).sort_index(axis=1)
Expand All @@ -209,10 +210,11 @@ def predict(self, x, method='reconciled'):
return y_hat

y_tilde = {}
for sa in tqdm(self.steps):
target_names = y_hat.columns.get_level_values(1).unique()
for t_name in tqdm(target_names):
# get reconciled forecasts at this step ahead
y_hat_sa = y_hat.loc[:, y_hat.columns.get_level_values(1) == sa].droplevel(1, 1)
y_tilde[sa] = self.reconcile(y_hat_sa)
y_hat_sa = y_hat.loc[:, y_hat.columns.get_level_values(1) == t_name].droplevel(1, 1)
y_tilde[t_name] = self.reconcile(y_hat_sa)

y_tilde = pd.concat(y_tilde, axis=1).swaplevel(0, 1, axis=1).sort_index(axis=1)

Expand Down Expand Up @@ -241,9 +243,10 @@ def predict_scenarios(self, x, method='reconciled'):
"""

y_hat = self.get_predictions(x, level='all')
target_names = y_hat.columns.get_level_values(1).unique()
if method == 'reconciled':
y_tilde = pd.concat({sa: self.reconcile(y_hat.loc[:, y_hat.columns.get_level_values(1) == sa].
droplevel(1, 1)) for sa in self.steps}, axis=1)
y_tilde = pd.concat({t_name: self.reconcile(y_hat.loc[:, y_hat.columns.get_level_values(1) == t_name].
droplevel(1, 1)) for t_name in target_names}, axis=1)
y_tilde = y_tilde.swaplevel(0, 1, axis=1).sort_index(axis=1)

scens = []
Expand Down Expand Up @@ -370,7 +373,8 @@ def get_scenarios(self, errs_tr, y_hat_te, n_scenarios, method='unconditional'):
def compute_kpis(self, hat, x, y, metric, **metric_kwargs):
y_mat = self.get_target_matrix(x, y)
kpi = {}
for s in self.steps:
target_names = hat.columns.get_level_values(1).unique()
for s in target_names:
hat_s = convert_multiindex_pandas_to_tensor(hat.loc[:, (slice(None), [s], slice(None))].droplevel(1, 1))
y_mat_s = y_mat.loc[:, (slice(None), [s])].droplevel(1, 1)
kpi[s] = metric(hat_s, y_mat_s, **metric_kwargs).T
Expand Down
6 changes: 3 additions & 3 deletions tests/test_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,12 +275,12 @@ def test_normalizers_complex(self):
df = pd.DataFrame(np.random.randn(100, 5), index=pd.date_range('01-01-2020', freq='20min', periods=100, tz='Europe/Zurich'), columns=['a', 'b', 'c', 'd', 'e'])
formatter = pyf.Formatter().add_transform(['a', 'b'], lags=np.arange(1, 5), agg_freq='20min')
formatter.add_target_transform(['a'], lags=-np.arange(1, 5), agg_freq='20min')
formatter.add_target_normalizer(['a'], 'mean', agg_freq='10H', name='a')
formatter.add_target_normalizer(['a'], 'std', agg_freq='5H', name='b')
formatter.add_target_normalizer(['a'], 'mean', agg_freq='10H', name='a_n')
formatter.add_target_normalizer(['a'], 'std', agg_freq='5H', name='b_n')

x, y = formatter.transform(df, time_features=True, holidays=True, prov='ZH')

formatter.add_normalizing_fun(expr="np.exp(df[t]+df['a']) + df['b']", inv_expr="np.log(df[t]-df['b']) -df['a']")
formatter.add_normalizing_fun(expr="np.exp(df[t]+df['a_n']) + df['b_n']", inv_expr="np.log(df[t]-df['b_n']) -df['a_n']")
x, y_norm = formatter.transform(df, time_features=True, holidays=True, prov='ZH')
y_unnorm = formatter.denormalize(x, y_norm)

Expand Down

0 comments on commit 08575d9

Please sign in to comment.