-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
111 lines (87 loc) · 3.53 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import numpy as np
import pandas as pd
import xarray as xr
from eofs.xarray import Eof
# data_path = './data/train_val/'
data_path = ""
min_co2 = 0.
max_co2 = 9500
def normalize_co2(data):
return data / max_co2
def un_normalize_co2(data):
return data * max_co2
min_ch4 = 0.
max_ch4 = 0.8
def normalize_ch4(data):
return data / max_ch4
def un_normalize_ch4(data):
return data * max_ch4
def create_predictor_data(data_sets, n_eofs=5):
"""
Args:
data_sets list(str): names of datasets
n_eofs (int): number of eofs to create for aerosol variables
"""
# Create training and testing arrays
if isinstance(data_sets, str):
data_sets = [data_sets]
# X = xr.concat([xr.open_dataset(data_path + f"inputs_{file}.nc") for file in data_sets], dim='time')
X = xr.concat([xr.open_dataset(f"inputs_{file}.nc") for file in data_sets], dim='time')
X = X.assign_coords(time=np.arange(len(X.time)))
# Compute EOFs for BC
bc_solver = Eof(X['BC'])
bc_eofs = bc_solver.eofsAsCorrelation(neofs=n_eofs)
bc_pcs = bc_solver.pcs(npcs=n_eofs, pcscaling=1)
# Compute EOFs for SO2
so2_solver = Eof(X['SO2'])
so2_eofs = so2_solver.eofsAsCorrelation(neofs=n_eofs)
so2_pcs = so2_solver.pcs(npcs=n_eofs, pcscaling=1)
# Convert to pandas
bc_df = bc_pcs.to_dataframe().unstack('mode')
bc_df.columns = [f"BC_{i}" for i in range(n_eofs)]
so2_df = so2_pcs.to_dataframe().unstack('mode')
so2_df.columns = [f"SO2_{i}" for i in range(n_eofs)]
# Bring the emissions data back together again and normalise
inputs = pd.DataFrame({
"CO2": normalize_co2(X["CO2"].data),
"CH4": normalize_ch4(X["CH4"].data)
}, index=X["CO2"].coords['time'].data)
# Combine with aerosol EOFs
inputs = pd.concat([inputs, bc_df, so2_df], axis=1)
return inputs, (so2_solver, bc_solver)
def get_test_data(file, eof_solvers, n_eofs=5):
"""
Args:
file str: name of datasets
n_eofs (int): number of eofs to create for aerosol variables
eof_solvers (Eof_so2, Eof_bc): Fitted Eof objects to use for projection
"""
# Create training and testing arrays
# X = xr.open_dataset(data_path + f"inputs_{file}.nc")
X = xr.open_dataset(f"inputs_{file}.nc")
so2_pcs = eof_solvers[0].projectField(X["SO2"], neofs=5, eofscaling=1)
so2_df = so2_pcs.to_dataframe().unstack('mode')
so2_df.columns = [f"SO2_{i}" for i in range(n_eofs)]
bc_pcs = eof_solvers[1].projectField(X["BC"], neofs=5, eofscaling=1)
bc_df = bc_pcs.to_dataframe().unstack('mode')
bc_df.columns = [f"BC_{i}" for i in range(n_eofs)]
# Bring the emissions data back together again and normalise
inputs = pd.DataFrame({
"CO2": normalize_co2(X["CO2"].data),
"CH4": normalize_ch4(X["CH4"].data)
}, index=X["CO2"].coords['time'].data)
# Combine with aerosol EOFs
inputs = pd.concat([inputs, bc_df, so2_df], axis=1)
return inputs
def create_predictdand_data(data_sets):
if isinstance(data_sets, str):
data_sets = [data_sets]
# Y = xr.concat([xr.open_dataset(data_path + f"outputs_{file}.nc") for file in data_sets], dim='time').mean("member")
Y = xr.concat([xr.open_dataset(f"outputs_{file}.nc") for file in data_sets], dim='time').mean("member")
# Convert the precip values to mm/day
Y["pr"] *= 86400
Y["pr90"] *= 86400
return Y
def get_rmse(truth, pred):
weights = np.cos(np.deg2rad(truth.lat))
return np.sqrt(((truth - pred)**2).weighted(weights).mean(['lat', 'lon'])).data