From dc480a20ce2df58e30b31cc1699c5808f5232201 Mon Sep 17 00:00:00 2001 From: Oliver Hamelijnck Date: Wed, 4 Dec 2024 10:19:58 +0000 Subject: [PATCH 1/2] :wip: attempt to show how to hold the liklihood noise --- .../gpjax_models/models/stgp_mrdgp.py | 42 ++++++++++++++++++- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/containers/cleanair/gpjax_models/gpjax_models/models/stgp_mrdgp.py b/containers/cleanair/gpjax_models/gpjax_models/models/stgp_mrdgp.py index fb0cd4475..e42cbb304 100644 --- a/containers/cleanair/gpjax_models/gpjax_models/models/stgp_mrdgp.py +++ b/containers/cleanair/gpjax_models/gpjax_models/models/stgp_mrdgp.py @@ -105,34 +105,72 @@ def train_model(model, num_epochs): sat_natgrad = NatGradTrainer(sat_model) laqn_natgrad = NatGradTrainer(laqn_model) + for q in range(len(laqn_model.approximate_posterior.approx_posteriors)): laqn_model.approximate_posterior.approx_posteriors[q]._S_chol.fix() laqn_model.approximate_posterior.approx_posteriors[q]._m.fix() for q in range(len(sat_model.approximate_posterior.approx_posteriors)): sat_model.approximate_posterior.approx_posteriors[q]._S_chol.fix() sat_model.approximate_posterior.approx_posteriors[q]._m.fix() + sat_grad = GradDescentTrainer(sat_model, objax.optimizer.Adam) laqn_grad = GradDescentTrainer(laqn_model, objax.optimizer.Adam) joint_grad = GradDescentTrainer(combined_model, objax.optimizer.Adam) + sat_model.likelihood.fix() + laqn_model.likelihood.fix() + sat_lik_fixed_grad = GradDescentTrainer(sat_model, objax.optimizer.Adam) + laqn_lik_fixed_grad = GradDescentTrainer(laqn_model, objax.optimizer.Adam) + joint_lik_fixed_grad = GradDescentTrainer(combined_model, objax.optimizer.Adam) + + sat_model.likelihood.release() + laqn_model.likelihood.release() + loss_curve = [] + + # pretrain/initial the approximate posterior with natural gradients sat_natgrad.train(1.0, 1) laqn_natgrad.train(1.0, 1) # Pretrain SAT model # pretrain sat + + pretrain_lik_fixed_epochs = int(self.pretrain_epochs*0.4) + pretrain_lik_released_epochs = self.pretrain_epochs - pretrain_lik_fixed_epochs + + num_lik_fixed_epochs = int(self.num_epochs*0.4) + num_lik_released_epochs = self.num_epochs - num_lik_fixed_epochs + + # pretrain satellite + # for 40% of epochs use likelihood fixed print("Pretraining sat") - for i in trange(self.pretrain_epochs): + for i in trange(pretrain_lik_fixed_epochs): + lc_i, _ = sat_lik_fixed_grad.train(0.01, 1) + sat_natgrad.train(0.1, 1) + loss_curve.append(lc_i) + + # for 60% of epochs use likelihood released + for i in trange(pretrain_lik_released_epochs): lc_i, _ = sat_grad.train(0.01, 1) sat_natgrad.train(0.1, 1) loss_curve.append(lc_i) + # pretrain laqn print("Pretraining laqn") - for i in trange(self.pretrain_epochs): + + for i in trange(pretrain_lik_fixed_epochs): + lc_i, _ = laqn_lik_fixed_grad.train(0.01, 1) + laqn_natgrad.train(0.1, 1) + loss_curve.append(lc_i) + + # for 60% of epochs use likelihood released + for i in trange(pretrain_lik_released_epochs): lc_i, _ = laqn_grad.train(0.01, 1) laqn_natgrad.train(0.1, 1) loss_curve.append(lc_i) + + # TODO: not sure if this should have likleihood fixed or not print("Joint training") for i in trange(self.num_epochs): lc_i, _ = joint_grad.train(0.01, 1) From 38a5a24acb9b7c9e8170404bff4e904f408a77b0 Mon Sep 17 00:00:00 2001 From: Oliver Hamelijnck Date: Wed, 4 Dec 2024 10:20:42 +0000 Subject: [PATCH 2/2] update --- .../gpjax_models/models/metrics.py | 133 ++++++++++++++++++ .../gpjax_models/gpjax_models/models/vis.py | 4 + 2 files changed, 137 insertions(+) create mode 100644 containers/cleanair/gpjax_models/gpjax_models/models/metrics.py diff --git a/containers/cleanair/gpjax_models/gpjax_models/models/metrics.py b/containers/cleanair/gpjax_models/gpjax_models/models/metrics.py new file mode 100644 index 000000000..e40b3281d --- /dev/null +++ b/containers/cleanair/gpjax_models/gpjax_models/models/metrics.py @@ -0,0 +1,133 @@ +import pickle, csv +import numpy as np +import pandas as pd +import geopandas as gpd +from pathlib import Path +import matplotlib.pyplot as plt +import os +from stdata.vis.spacetime import SpaceTimeVisualise +import geopandas as gpd +from stdata.utils import to_gdf + +from pysal.explore import esda # Exploratory Spatial analytics +from pysal.lib import weights # Spatial weights + +directory_path = Path('/Users/oliverhamelijnck/Downloads/dataset/') + +# Create the directory if it doesn't exist +if not os.path.exists(directory_path): + os.makedirs(directory_path) + + +def load_data(root): + with open( + str( + directory_path / "training_dataset.pkl" + ), + "rb", + ) as file: + training_data = pd.read_pickle(file) + with open( + str( + directory_path/ "test_dataset.pkl" + ), + "rb", + ) as file: + testing_data = pd.read_pickle(file) + # Load raw data using pickle + with open( + directory_path / "raw_data_3.pkl", + "rb", + ) as file: + raw_data = pd.read_pickle(file) + + with open( + directory_path / "test_true_dataset.pkl", + "rb", + ) as file: + true_y = pd.read_pickle(file) + date_ranges = [ + ("2023-12-01 00:00:00+00:00", "2023-12-01 23:00:00+00:00"), + ("2023-12-02 00:00:00+00:00", "2023-12-02 23:00:00+00:00"), + ("2023-12-03 00:00:00+00:00", "2023-12-03 23:00:00+00:00"), + ("2023-12-04 00:00:00+00:00", "2023-12-04 23:00:00+00:00"), + ("2023-12-05 00:00:00+00:00", "2023-12-05 23:00:00+00:00"), + ("2023-12-06 00:00:00+00:00", "2023-12-06 23:00:00+00:00"), + ("2023-12-07 00:00:00+00:00", "2023-12-07 23:00:00+00:00"), + ] + + # Convert the 'measurement_start_utc' column to datetime if it's not already + true_y["laqn"]["measurement_start_utc"] = pd.to_datetime( + true_y["laqn"]["measurement_start_utc"] + ) + + # Filter the laqn DataFrame for each specified date range + filtered_data = [] + for start_date, end_date in date_ranges: + day_gt = true_y["laqn"][ + (true_y["laqn"]["measurement_start_utc"] >= start_date) + & (true_y["laqn"]["measurement_start_utc"] <= end_date) + ] + filtered_data.append(day_gt) + + # Assign each day's data to a separate variable for convenience + day_1_gt, day_2_gt, day_3_gt, day_4_gt, day_5_gt, day_6_gt, day_7_gt = filtered_data + + return training_data, testing_data, raw_data, day_3_gt + + +def load_results(root): + with open( + str(root / "predictions_mrdgp_3.pkl"), "rb" + ) as file: + results = pd.read_pickle(file) + return results + + +def fix_df_columns_dropna(df): + df = df.rename(columns={"point_id": "id", "datetime": "measurement_start_utc"}) + return df.dropna(subset=["NO2"]) + + +def fix_df_columns(df): + return df.rename(columns={"point_id": "id", "datetime": "measurement_start_utc"}) + + +if __name__ == "__main__": + data_root = directory_path + + training_data, testing_data, raw_data, day_3_gt = load_data(data_root) + day_3_gt = day_3_gt.reset_index(drop=True) + train_laqn_df = fix_df_columns(raw_data["train"]["laqn"]["df"]) + test_laqn_df = fix_df_columns(raw_data["test"]["laqn"]["df"]) + true_val = fix_df_columns(day_3_gt) + + # Load results + results = load_results(data_root) + train_laqn_df["pred"] = results["predictions"]["train_laqn"]["mu"][0].T + train_laqn_df["var"] = np.squeeze(results["predictions"]["train_laqn"]["var"][0]) + train_laqn_df["observed"] = train_laqn_df["NO2"] + + train_laqn_df["NO2"] = train_laqn_df["NO2"] + + test_laqn_df["pred"] = results["predictions"]["test_laqn"]["mu"][0].T + test_laqn_df["var"] = np.squeeze(results["predictions"]["test_laqn"]["var"][0]) + test_laqn_df["observed"] = day_3_gt["NO2"] + + test_laqn_df['residual'] = test_laqn_df['observed'] - test_laqn_df['pred'] + + test_df = to_gdf(test_laqn_df[test_laqn_df['epoch']==test_laqn_df['epoch'].iloc[0]].copy()) + test_df = test_df[test_df['residual'].notna()].copy() + + # Generate W from the GeoDataFrame + w = weights.distance.KNN.from_dataframe(test_df, k=8) + # Row-standardization + w.transform = "R" + + test_df["w_residual"] = weights.lag_spatial(w, test_df['residual']) + + test_df["residual_std"] = test_df["residual"] - test_df["residual"].mean() + test_df["w_residual_std"] = weights.lag_spatial(w, test_df['residual_std']) + + lisa = esda.moran.Moran_Local(test_df['residual'], w) + breakpoint() diff --git a/containers/cleanair/gpjax_models/gpjax_models/models/vis.py b/containers/cleanair/gpjax_models/gpjax_models/models/vis.py index dd6688a6a..07adfc696 100644 --- a/containers/cleanair/gpjax_models/gpjax_models/models/vis.py +++ b/containers/cleanair/gpjax_models/gpjax_models/models/vis.py @@ -143,7 +143,11 @@ def fix_df_columns(df): sat_df['var'] = results["predictions"]['sat']['var'][0] sat_df['observed'] = sat_df['NO2'] + laqn_df['residual'] = laqn_df['observed'] - laqn_df['pred'] + laqn_df['pred'] = laqn_df['residual'] + vis_obj = SpaceTimeVisualise( laqn_df, hexgrid_df, sat_df=sat_df, geopandas_flag=True, test_start=train_end) + #vis_obj = SpaceTimeVisualise( laqn_df, hexgrid_df, geopandas_flag=True, test_start=train_end) # Show the visualization vis_obj.show()