-
Notifications
You must be signed in to change notification settings - Fork 2
/
sub_train.py
332 lines (285 loc) · 14.8 KB
/
sub_train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
"""
@author: Chang, responsible for DOI:https://doi.org/10.1063/5.0138287
"""
import os
import tensorflow as tf # TF1.14
import numpy as np
import scipy.io as io
import time
import argparse
import h5py
from utilities_ndm import data_process, HFM
# 随机种子用于复现实验结果 Random seed is used for repeating experiments.
np.random.seed(3407)
tf.set_random_seed(3407)
def check_signal_Cni(index,T_i):
try:
with open(f'signal_Cni_{index}_T{T_i}.txt', 'r') as f:
return f.read()
except FileNotFoundError:
return 'wait'
def send_signal_Loss(Loss, index, T_i):
with open(f'signal_Loss_{index}_T{T_i}.txt', 'w') as f:
f.write(Loss)
if __name__ == "__main__":
time0 = time.time()
# =====================================================================
# || 0.Hyper-parameter defination 定义超参数 ||
# =====================================================================
# 创建解析器接收参数 Create a parser to receive some input parameters
parser = argparse.ArgumentParser(description='Receive some input parameters.')
parser.add_argument('--debug', type=int, choices=[0, 1], help='Debug or not')
parser.add_argument('--index', type=int, default=0, help='sub_process')
parser.add_argument('--tp1', type=int, default=0, help='Time block start')
parser.add_argument('--tp2', type=int, default=120, help='Time block end')
# 解析参数 Parsing parameters
args = parser.parse_args()
tp1, tp2 = args.tp1, args.tp2
if args.debug==1:
log_path = "./tp{}-{}_dt0.1s_X-x29p_t{}-{}_test".format(args.tp1+10, args.tp2-10, args.tp1, args.tp2)
# batch_size 注意要能被eqns_Nx * eqns_Ny * eqns_Nt整除 40000
# batch_size should be an integer multiple of (eqns_Nx * eqns_Ny * eqns_Nt)
batch_size = 6000
layers = [3] + 2 * [2] + [3]
# 学习率衰减 Learning rate decay.
lr = 0.001 # 初始学习率 Initial lr.
mm = 1.0 # 学习率峰值衰减率 The decaying ratio of the peak lr.
ep = 10 # 第一个周期的训练代数 Epoch of the first period.
tm = 2 # 周期增长率 The growth rate of periods.
dn = 6 # 最大衰减周期数 The max periods.
# Nt_true是用于训练的快照数,间隔n*dt,Nt_true=Nt/n,Nt是总的快照数
# Nt_true is the number of snapshots for training, Nt_true=Nt/n, Nt is the total number of snapshots
Nx, Ny, Nt_true = 400, 200, 1100
eqns_Nx, eqns_Ny, eqns_Nt = 10, 5, tp2 - tp1
else:
log_path = "./tp{}-{}_dt0.1s_X-x29p_t{}-{}".format(args.tp1+10, args.tp2-10, args.tp1, args.tp2)
batch_size = 60000
layers = [3] + 10 * [50] + [3]
lr = 0.001
mm = 1.0
ep = 1000
tm = 2
dn = 6
Nx, Ny, Nt_true = 400, 200, 1100
eqns_Nx, eqns_Ny, eqns_Nt = 100, 50, tp2 - tp1
index = args.index
print("************************ SubNN ", index, " start, debug=", args.debug,
", tp", args.tp1,"-tp", args.tp2, " ************************")
if os.path.exists(log_path) == False:
os.makedirs(log_path)
loss_name = log_path + "/loss-epoch.dat"
loss_header = 'iter loss loss_data loss_eqns loss_e1 loss_e2 loss_e3\n'
with open(loss_name, 'a') as f:
f.write(loss_header)
log_name = log_path + "/PINN_log.txt"
pred_path = log_path + "/Pred_ep{}.h5" # .h5 or .mat, corresponds to Part3.Post process
NN_path = log_path + '/Trained_HFM_ep{}/tp_NN.ckpt'
Rey = 100
# mat_path = '../cylinder_Re100_lam_X1-9_400x200_5500_ndm.mat' # para
mat_path = 'D:/AI/cylinder_post/cylinder_Re100_t1100_Diamond_29p.mat' # local
# =====================================================================
# || 1.前处理 preprocess ||
# =====================================================================
DATA = data_process(mat_path, Nx, Ny, Nt_true)
# =====================Time block=====================
t_tp, u_tp, v_tp, p_tp = DATA.tp(tp1, tp2)
# =====================数据点布置方式一:根据xy方向上点数均匀布置数据点===================
# =====================Sensor placement type 1:Rectangle=====================
# data_Nx, data_Ny, data_Nt = 6, 5, tp2 - tp1 # 仅限数据点采用均匀布置(Only use in this placemrnttype)
# x_data, y_data, t_data, u_data, v_data = DATA.data_uniform(t_tp, u_tp, v_tp, p_tp,
# data_Nx, data_Ny, data_Nt)
# =====================数据点布置方式一:结束 Sensor placement type 1: end ===========
# =====================数据点布置方式二:根据点id抽取任意布置的数据点=====================
# =====================Sensor placement type 2:Extract by ID =====================
# 给id_ex, 参考最下面的备份 The various types of extraction are listed at the end.
# Type diamond29
id_ex = np.array([[1, 1], [1, 100], [1, 200],
[50, 25], [50, 75], [50, 125], [50, 175],
[100, 50], [100, 150],
[150, 25], [150, 75], [150, 125], [150, 175],
[200, 1], [200, 100], [200, 200],
[250, 25], [250, 75], [250, 125], [250, 175],
[300, 50], [300, 150],
[350, 25], [350, 75], [350, 125], [350, 175],
[400, 1], [400, 100], [400, 200]
])-1
x_data, y_data, t_data, u_data, v_data = DATA.data_ext(t_tp, u_tp, v_tp, p_tp,id_ex)
# ================ 数据点布置方式二:结束 Sensor placement type 2:end==================
print("x_data shape is", x_data.shape)
print("t_data shape is", t_data.shape)
print("u_data shape is", u_data.shape)
# =========================== 方程点 Equation points ===============================
x_eqns, y_eqns, t_eqns = DATA.eqns(t_tp, eqns_Nx, eqns_Ny, eqns_Nt)
print("x_eqns shape is", x_eqns.shape)
print("t_eqns shape is", t_eqns.shape)
# =====================预留用于重构的点 Inputs of reconstruction=======================
# x_pred, y_pred, t_pred = DATA.x_all, DATA.y_all, DATA.t_all[DATA.tp1:DATA.tp2]
x_pred_100, y_pred_100, t_pred_100, N_100, T_100 = DATA.data_pred(t1=10, t2=110,t_jump=1) # Tensor
# x_pred_500, y_pred_500, t_pred_500, N_500, T_500 = DATA.data_pred(t1=50, t2=550, t_jump=1)
print("x_pred_100 shape is", x_pred_100.shape)
# print("t_pred_500 shape is", t_pred_500.shape)
# DATA.plot_data()
# DATA.plot_eqns()
del DATA
time1 = time.time()
print("Data prepare done, costs {:.3e}s".format(time1 - time0))
with open(log_name, "a") as f:
f.write('Data prepare done, costs {:.3e}s'.format(time1 - time0) + '\n')
# =====================================================================
# || 2.训练网络 NN training ||
# =====================================================================
# time0 = time.time()
# Training
print("==============================Load HFM=============================")
np_lr = np.zeros(dn) + lr
np_ep = np.zeros(dn) + ep
for i in range(1, dn):
lr, ep = mm * lr, tm * ep
np_lr[i], np_ep[i] = lr, ep
print("========Max lr======== is ", np_lr)
print("========Each epochs======== is ", np_ep)
print("==============================Training start=============================")
for i in range(dn):
lr = np_lr[i].astype('float32')
epochs = np_ep[i].astype('int32')
model = HFM(t_data, x_data, y_data, u_data, v_data,
t_eqns, x_eqns, y_eqns,
layers, batch_size,
Rey,
lr, epochs, tm, mm)
if i>=1:
model.saver.restore(model.sess, NN_path.format(int(np_ep[i-1])))
model.train(epochs)
# print(tf.contrib.framework.get_variables_to_restore())
model.saver.save(model.sess, NN_path.format(epochs)) # 会自动创建文件夹
time1 = time.time()
print("=====================SubNN",index," training T",i," done!=============================")
print("Training cost {:.3e}s".format(time1 - time0) + '\n')
with open(log_name, "a") as f:
f.write('Training cost {:.3e}s'.format(time1 - time0) + '\n')
# =====================================================================
# || 3.Post process ||
# =====================================================================
time0 = time.time()
# ================================预测=================================
U_pred_dict = model.predict(x_pred_100,y_pred_100,t_pred_100,N_100,T_100)
# io.savemat(pred_path.format(epochs), U_pred_dict) # save as .mat
# 创建一个新的.h5文件
with h5py.File(pred_path.format(epochs), 'w') as hf:
for key, value in U_pred_dict.items():
hf.create_dataset(key, data=value)
# ============================ 输出损失 Write loss ==============================
a_ep = np.array(model.a_ep)
a_loss = np.array(model.a_loss)
a_loss_data = np.array(model.a_loss_data)
a_loss_eqns = np.array(model.a_loss_eqns)
a_loss_e1 = np.array(model.a_loss_e1)
a_loss_e2 = np.array(model.a_loss_e2)
a_loss_e3 = np.array(model.a_loss_e3)
# a_inertial_x = np.array(model.a_inertial_x)
# a_inertial_y = np.array(model.a_inertial_y)
# a_p_gradient_x = np.array(model.a_p_gradient_x)
# a_p_gradient_y = np.array(model.a_p_gradient_y)
# a_viscous_x = np.array(model.a_viscous_x)
# a_viscous_y = np.array(model.a_viscous_y)
a_lr = np.array(model.a_lr)
l_e = np.concatenate((a_ep.reshape(-1,1), a_loss.reshape(-1,1), a_loss_data.reshape(-1,1),
a_loss_eqns.reshape(-1,1),
a_loss_e1.reshape(-1,1),
a_loss_e2.reshape(-1,1),
a_loss_e3.reshape(-1,1),
# a_inertial_x.reshape(-1,1),
# a_inertial_y.reshape(-1,1),
# a_p_gradient_x.reshape(-1,1),
# a_p_gradient_y.reshape(-1,1),
# a_viscous_x.reshape(-1,1),
# a_viscous_y.reshape(-1,1),
a_lr.reshape(-1,1),
),axis=1)
loss_header = 'iter loss loss_data loss_eqns loss_e1 loss_e2 loss_e3'
with open(loss_name,'ab') as f:
np.savetxt(f,l_e,delimiter=" ")
time1 = time.time()
with open(log_name,"a") as f:
f.write('Output cost {}s'.format(time1-time0) + '\n')
# def send_signal_Loss(Loss, index, T_i):
# with open(f'Loss_{index}_T{T_i}.txt', 'w') as f:
# f.write(Loss)
send_signal_Loss(str(a_loss[-1]), index, i)
nextPeriod = 0
while nextPeriod == 0:
# 'signal_Cni_{index}_{T_i}.txt'
signal = check_signal_Cni(index,i)
print("=======SubRefresh: SubNN", index," received signal ", signal)
if signal == 'T{}_stop'.format(i):
print(f"Process {index} stopping as per the signal.")
break
elif signal == 'T{}_continue'.format(i):
print(f"Process {index} continuing as per the signal.")
nextPeriod = 1
else:
print(f"Process {index} received unknown signal: {signal}")
time.sleep(30)
tf.reset_default_graph()
del model
# =====================================================================
# || 4.加载模型预测 ||
# =====================================================================
# 加载模型
# ============================================查看checkpoint中变量名称================================================
# from tensorflow.python import pywrap_tensorflow
# model_dir = "Saved_model"
# checkpoint_path = os.path.join(model_dir, "model.ckpt")
# checkpoint_path = './A_TF1test_TrainedModel/training/HFM_trained.ckpt'
# reader = pywrap_tensorflow.NewCheckpointReader(checkpoint_path)
# var_to_shape_map = reader.get_variable_to_shape_map()
# for key in var_to_shape_map:
# print("tensor_name: ", key, end=' ')
# print(reader.get_tensor(key))
# ============================================查看checkpoint中变量名称================================================
# model_load_done = HFM( t_data, x_data, y_data, u_data, v_data,
# t_eqns, x_eqns, y_eqns,
# layers, batch_size,
# Rey,
# lr,ep,tm,mm)
# print(NN_path)
# model_load_done.saver.restore(model_load_done.sess, NN_path)
# U_pred_dict_done = model_load_done.predict(x_pred_500, y_pred_500, t_pred_500, N_500, T_500)
# print(pred_path)
# io.savemat(pred_path + '_dt0.02.mat', U_pred_dict_done)
# del model_load_done
# 点坐标备份
# 大X套小x型
# id_ex = np.array([[1, 1], [1, 100], [1, 200],
# [50, 25], [50, 75], [50, 125], [50, 175],
# [100, 50], [100, 150],
# [150, 25], [150, 75], [150, 125], [150, 175],
# [200, 0], [200, 100], [200, 200],
# [250, 25], [250, 75], [250, 125], [250, 175],
# [300, 50], [300, 150],
# [350, 25], [350, 75], [350, 125], [350, 175],
# [400, 1], [400, 100], [400, 200]
# ])-1
# 一个大X
# id_ex = np.array([[0, 0], [0, 196],
# [28, 14], [28, 182],
# [56, 28], [56, 168],
# [84, 42], [84, 154],
# [112, 56], [112, 140],
# [140, 70], [140, 126],
# [168, 84], [168, 112],
# [196, 98],
# [224, 112], [224, 84],
# [252, 126], [252, 70],
# [280, 140], [280, 56],
# [308, 154], [308, 42],
# [336, 168], [336, 28],
# [364, 182], [364, 14],
# [399, 196], [399, 0],
# ])
# O型
# id_ex = np.array([[49, 24],
# [34, 9], [64, 9],
# [9, 19], [89, 19],
# [9, 29], [89, 29],
# [34, 39], [64, 39]])