-
Notifications
You must be signed in to change notification settings - Fork 0
/
example_3.py
195 lines (158 loc) · 8.31 KB
/
example_3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
# Illustrate how to use DAPPER
# to obtain benchmarks for a range of experimental control variables,
# and plot the compiled results as functions of the control variable (xlabel),
# with each curve being one method configuration.
#
# Specifically, we will reproduce figure 6.6 from [1], accessible through
# http://books.google.no/books?id=FtDZDQAAQBAJ&q=figure+6.6
# The figure reveals the relative importance of localization and inflation in the EnKF.
# Ref[1]: Book: "Data Assimilation: Methods, Algorithms, and Applications"
# by M. Asch, M. Bocquet, M. Nodet.
#
# Also demonstrates:
# - Parallelization (distribution of experiment settings (xticks)).
# - Result data saving, loading, plotting.
from common import *
# Seed management. Notice below that the seed only varies
# between repetitions, not xticks or configurations.
sd0 = seed_init(8) # Base random seed.
from mods.Lorenz95.boc15loc import HMM
import mods.Lorenz95.core as core
HMM.t.T = 4**4.0
# Specify the control variable (i.e. the plotting xlabel) of interest.
CtrlVar = sys.argv[1] # For example, run script with: `python example_3.py N`
# CtrlVar = 'N'
# Define range of the experiment control variable.
if CtrlVar == 'N': # Ensemble size.
xticks = ccat(arange(2,20),[20, 22, 25, 30, 40, 50, 70, 100])
if CtrlVar == 'F': # Model forcing
xticks = arange(3,20)
# Experiments duplication (random seeds will be varied).
xticks = array(xticks).repeat(32)
# If this script is run
# - with the 2nd command-line argument being PARALLELIZE,
# then this function will split the 'xticks' array into 'nCore' batches,
# and distribute each to a WORKER which run the rest of the script in parallel.
# Important: see comment in distribute() about enforcing single-core use by numpy.
# - without a second argument: script is run as normal (no parallelization).
xticks, save_path, rep_inds = distribute(__file__,sys.argv,xticks,CtrlVar)
##############################
# DA Configurations
##############################
N = '?' if CtrlVar=='N' else 20
cfgs = List_of_Configs()
cfgs += Climatology() # Baseline method
cfgs += OptInterp() # Baseline method
for upd_a in ['PertObs','Sqrt']: # Update (_a) forms: stoch, determ.
cfgs += EnKF(upd_a,N ) # Pure EnKF
cfgs += EnKF(upd_a,N ,infl=1.01 ) # + fixed, post-inflation, good around N=50.
cfgs += EnKF(upd_a,N ,infl=1.05 ) # + idem , good around N=17.
cfgs += EnKF(upd_a,N ,infl=1.10 ) # + idem , good around N=16.
cfgs += EnKF_N( N ) # + adaptive (≈optimal) inflation.
cfgs += EnKF_N( N ,xN=2 ) # + idem, with 2x confidence in inflation hyper-prior.
cfgs += LETKF( N,loc_rad=2 ) # + localization with radius=2.
cfgs += LETKF( N,loc_rad='?' ) # + localization with ≈optimal radius(N)
cfgs += LETKF( N,loc_rad='?',infl='-N' ,xN=2 ) # + idem, with adaptive (≈optimal) inflation.
cfgs += LETKF( N,loc_rad='$',infl='-N' ,xN=2 ) # + idem, with adaptive (≈optimal) inflation.
cfgs += iLEnKS('-N' ,N,loc_rad='?' ,xN=2 ,nIter=4,Lag='?' ) # + iterations, localization and adaptive inflation.
##############################
# Setters for the control variable
##############################
def adjust_osse(variable,X):
if variable == 'F': core.Force = X
elif variable == 'N': pass
else: raise ValueError("OSSE changes not defined for variable " + variable)
def adjust_cfg(C,variable,X):
if variable == 'F':
if getattr(C,'loc_rad',None)=='?': C = C.update_settings(loc_rad=L95_rad(X,core.Force))
if getattr(C,'loc_rad',None)=='$': C = C.update_settings(loc_rad=L95_rad(X,core.Force))
elif variable == 'N':
if getattr(C,'N' ,None)=='?': C = C.update_settings( N=X)
if getattr(C,'loc_rad',None)=='?': C = C.update_settings(loc_rad=L95_rad(X,core.Force))
if getattr(C,'loc_rad',None)=='$': C = C.update_settings(loc_rad=1.5*L95_rad(X,core.Force))
if getattr(C,'Lag' ,None)=='?': C = C.update_settings( Lag=L95_lag(X,core.Force))
else: raise ValueError("Config changes not defined for variable " + variable)
return C
# Most DA methods are approximate => Leeway exists
# => Tuneable parameters exits. Here, we define some tuning xticks.
def L95_rad(N,F):
# Approximately fitted (for infl=1.0) variogram (Gaussian).
r = 0.08 + 10*(1-exp(-(N/40)**2))**0.8
r *= sqrt(8/F) # Not tuned at all!
return r
def L95_lag(N,F):
# Approximately tuned for iLEnKS and sak08 xticks.
if N<=7 : return 1
if 8 <=N<=15: return N-6
if 16<=N : return 10
##############################
# Assimilate
##############################
avrgs = np.empty((len(xticks),1,len(cfgs)),dict)
# avrgs uses nRepeats=1 coz repetition is done by xticks replication.
# stats = np.empty_like(avrgs)
# Lines with the 'stats' array are commented out so that the stat objects
# (which contain full time series and therefore might require significant memory)
# that the array would hold, instead get discarded after each loop iterate.
for iX,(X,iR) in enumerate(zip(xticks,rep_inds)):
with coloring(): print('\n'+"xticks[",iX,'/',len(xticks)-1,"] ",CtrlVar,': ',X,sep="")
adjust_osse(CtrlVar,X)
seed(sd0+iR)
xx,yy = simulate(HMM)
for iC,C in enumerate(cfgs):
C = adjust_cfg(C,CtrlVar,X)
seed(sd0+iR)
stat = C.assimilate(HMM,xx,yy)
# stats[iX,0,iC] = stat
avrgs[iX,0,iC] = stat.average_in_time()
print_averages(cfgs,avrgs[iX,0])
# Results saved in the format below is supported by DAPPER's ResultsTable, whose main purpose
# is to collect result data from parallelized (or otherwise independent) experiments.
np.savez(save_path,
avrgs = avrgs, # 3D array of dicts, whose fields are the averages.
xlabel = CtrlVar, # The control variable tag (string).
xticks = xticks, # xticks (array).
labels = cfgs.gen_names()) # List of strings.
##############################
# Results load & presentation
##############################
if 'WORKER' in sys.argv: sys.exit(0) # quit if script is running as worker.
# The rest of this script only uses saved data ...
R = ResultsTable(save_path)
# ... => could be run as a separate script,
# on downloaded data (e.g. from parallelization):
# R = ResultsTable('data/example_3/MyRemoteHost/N_runX')
# Print averages of a given field.
# The "subcolumns" show the number of repetitions, crashes and the 1-sigma conf.
with coloring(): print("Averages over experiment repetition:")
R.print_mean_field('rmse_a',1,1,cols=slice(0,2))
# Separate out the baseline methods from the rest
BaseLineMethods = R.split(lambda x: x in ['Climatology', 'OptInterp', 'Var3D','ExtKF'])
# Plot
fig, ax = plt.subplots()
R.plot_1d('rmse_a',)
# The commented-out lines make checkmarks that toggle on/off the curves.
# if 'checkmarks' not in locals(): checkmarks = []
# checkmarks += [toggle_lines()];
BaseLineMethods.plot_1d('rmse_a',color='k')
# Adjust plot
if R.xlabel=='N':
ax.loglog()
ax.grid(True,'minor')
xt = [2,3,4,6,8,10,15,20,25,30,40,50,70,100]
yt = [0.1, 0.2, 0.5, 1, 2, 5]
ax.set_xticks(xt); ax.set_xticklabels(xt)
ax.set_yticks(yt); ax.set_yticklabels(yt)
# Discussion of results shown in figure:
# - Localization more important than inflation? Both are necessary, neither sufficient.
# - Some rotation could be beneficial for the Sqrt EnKF's with N>30 (avoid worsening rmse).
# - The EnKFs with infl=1 (and no localization) diverge even for large ensemble sizes.
# But simply with infl=1.01 one obtains a much more reasonable benchmark curve.
# However, the difference to **tuned** inflation (represented here by the EnKF-N)
# is still very clear.
# - TODO: replace tuning functions by argmin (i.e. R.minz_tuning). Merge with example_4.
# Why: good tuning functions require too much manual labor, and bad ones yield noisy curves.
# Also use 2d plotting tools.
# - TODO: more thought to localization + adaptive inflation
# This should also enable {inflation+localization} to get more advantage over {localization only}.
##