-
Notifications
You must be signed in to change notification settings - Fork 4
/
train.log
629 lines (629 loc) · 138 KB
/
train.log
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
Namespace(adam_betas='(0.9, 0.98)', adam_eps=1e-09, adaptive_softmax_cutoff=None, adaptive_softmax_dropout=0, arch='transformer', attention_dropout=0.0, bucket_cap_mb=25, clip_norm=25, cpu=False, criterion='label_smoothed_cross_entropy', data=['data-bin/wmt17_zh_en'], ddp_backend='c10d', decoder_attention_heads=8, decoder_embed_dim=512, decoder_embed_path=None, decoder_ffn_embed_dim=2048, decoder_input_dim=512, decoder_layers=6, decoder_learned_pos=False, decoder_normalize_before=False, decoder_output_dim=512, device_id=0, distributed_backend='nccl', distributed_init_method='tcp://localhost:18727', distributed_port=-1, distributed_rank=0, distributed_world_size=6, dropout=0.3, encoder_attention_heads=8, encoder_embed_dim=512, encoder_embed_path=None, encoder_ffn_embed_dim=2048, encoder_layers=6, encoder_learned_pos=False, encoder_normalize_before=False, fix_batches_to_gpus=False, fp16=False, fp16_init_scale=128, fp16_scale_tolerance=0.0, fp16_scale_window=None, keep_interval_updates=-1, keep_last_epochs=20, label_smoothing=0.1, lazy_load=False, left_pad_source='True', left_pad_target='False', log_format='json', log_interval=1000, lr=[0.001], lr_scheduler='inverse_sqrt', lr_shrink=0.1, max_epoch=0, max_sentences=None, max_sentences_valid=None, max_source_positions=1024, max_target_positions=1024, max_tokens=4000, max_update=400000, memory_efficient_fp16=False, min_loss_scale=0.0001, min_lr=1e-09, momentum=0.99, no_epoch_checkpoints=False, no_progress_bar=False, no_save=False, no_token_positional_embeddings=False, num_workers=0, optimizer='adam', optimizer_overrides='{}', raw_text=False, relu_dropout=0.0, reset_lr_scheduler=False, reset_optimizer=False, restore_file='checkpoint_last.pt', save_dir='checkpoints2', save_interval=1, save_interval_updates=0, seed=1, sentence_avg=False, share_all_embeddings=False, share_decoder_input_output_embed=False, skip_invalid_size_inputs_valid_test=False, source_lang='zh', target_lang='en', task='translation', train_subset='train', update_freq=[1], upsample_primary=1, user_dir=None, valid_subset='valid', validate_interval=1, warmup_init_lr=1e-07, warmup_updates=10000, weight_decay=0.0001)
| [zh] dictionary: 50080 types
| [en] dictionary: 33080 types
| data-bin/wmt17_zh_en train 17810380 examples
| data-bin/wmt17_zh_en valid 1996 examples
| distributed init (rank 0): tcp://localhost:18727
Namespace(adam_betas='(0.9, 0.98)', adam_eps=1e-09, adaptive_softmax_cutoff=None, adaptive_softmax_dropout=0, arch='transformer', attention_dropout=0.0, bucket_cap_mb=25, clip_norm=25, cpu=False, criterion='label_smoothed_cross_entropy', data=['data-bin/wmt17_zh_en'], ddp_backend='c10d', decoder_attention_heads=8, decoder_embed_dim=512, decoder_embed_path=None, decoder_ffn_embed_dim=2048, decoder_input_dim=512, decoder_layers=6, decoder_learned_pos=False, decoder_normalize_before=False, decoder_output_dim=512, device_id=1, distributed_backend='nccl', distributed_init_method='tcp://localhost:18727', distributed_port=-1, distributed_rank=1, distributed_world_size=6, dropout=0.3, encoder_attention_heads=8, encoder_embed_dim=512, encoder_embed_path=None, encoder_ffn_embed_dim=2048, encoder_layers=6, encoder_learned_pos=False, encoder_normalize_before=False, fix_batches_to_gpus=False, fp16=False, fp16_init_scale=128, fp16_scale_tolerance=0.0, fp16_scale_window=None, keep_interval_updates=-1, keep_last_epochs=20, label_smoothing=0.1, lazy_load=False, left_pad_source='True', left_pad_target='False', log_format='json', log_interval=1000, lr=[0.001], lr_scheduler='inverse_sqrt', lr_shrink=0.1, max_epoch=0, max_sentences=None, max_sentences_valid=None, max_source_positions=1024, max_target_positions=1024, max_tokens=4000, max_update=400000, memory_efficient_fp16=False, min_loss_scale=0.0001, min_lr=1e-09, momentum=0.99, no_epoch_checkpoints=False, no_progress_bar=False, no_save=False, no_token_positional_embeddings=False, num_workers=0, optimizer='adam', optimizer_overrides='{}', raw_text=False, relu_dropout=0.0, reset_lr_scheduler=False, reset_optimizer=False, restore_file='checkpoint_last.pt', save_dir='checkpoints2', save_interval=1, save_interval_updates=0, seed=1, sentence_avg=False, share_all_embeddings=False, share_decoder_input_output_embed=False, skip_invalid_size_inputs_valid_test=False, source_lang='zh', target_lang='en', task='translation', train_subset='train', update_freq=[1], upsample_primary=1, user_dir=None, valid_subset='valid', validate_interval=1, warmup_init_lr=1e-07, warmup_updates=10000, weight_decay=0.0001)
| [zh] dictionary: 50080 types
| [en] dictionary: 33080 types
| data-bin/wmt17_zh_en train 17810380 examples
| data-bin/wmt17_zh_en valid 1996 examples
| distributed init (rank 1): tcp://localhost:18727
Namespace(adam_betas='(0.9, 0.98)', adam_eps=1e-09, adaptive_softmax_cutoff=None, adaptive_softmax_dropout=0, arch='transformer', attention_dropout=0.0, bucket_cap_mb=25, clip_norm=25, cpu=False, criterion='label_smoothed_cross_entropy', data=['data-bin/wmt17_zh_en'], ddp_backend='c10d', decoder_attention_heads=8, decoder_embed_dim=512, decoder_embed_path=None, decoder_ffn_embed_dim=2048, decoder_input_dim=512, decoder_layers=6, decoder_learned_pos=False, decoder_normalize_before=False, decoder_output_dim=512, device_id=3, distributed_backend='nccl', distributed_init_method='tcp://localhost:18727', distributed_port=-1, distributed_rank=3, distributed_world_size=6, dropout=0.3, encoder_attention_heads=8, encoder_embed_dim=512, encoder_embed_path=None, encoder_ffn_embed_dim=2048, encoder_layers=6, encoder_learned_pos=False, encoder_normalize_before=False, fix_batches_to_gpus=False, fp16=False, fp16_init_scale=128, fp16_scale_tolerance=0.0, fp16_scale_window=None, keep_interval_updates=-1, keep_last_epochs=20, label_smoothing=0.1, lazy_load=False, left_pad_source='True', left_pad_target='False', log_format='json', log_interval=1000, lr=[0.001], lr_scheduler='inverse_sqrt', lr_shrink=0.1, max_epoch=0, max_sentences=None, max_sentences_valid=None, max_source_positions=1024, max_target_positions=1024, max_tokens=4000, max_update=400000, memory_efficient_fp16=False, min_loss_scale=0.0001, min_lr=1e-09, momentum=0.99, no_epoch_checkpoints=False, no_progress_bar=False, no_save=False, no_token_positional_embeddings=False, num_workers=0, optimizer='adam', optimizer_overrides='{}', raw_text=False, relu_dropout=0.0, reset_lr_scheduler=False, reset_optimizer=False, restore_file='checkpoint_last.pt', save_dir='checkpoints2', save_interval=1, save_interval_updates=0, seed=1, sentence_avg=False, share_all_embeddings=False, share_decoder_input_output_embed=False, skip_invalid_size_inputs_valid_test=False, source_lang='zh', target_lang='en', task='translation', train_subset='train', update_freq=[1], upsample_primary=1, user_dir=None, valid_subset='valid', validate_interval=1, warmup_init_lr=1e-07, warmup_updates=10000, weight_decay=0.0001)
| [zh] dictionary: 50080 types
| [en] dictionary: 33080 types
| data-bin/wmt17_zh_en train 17810380 examples
| data-bin/wmt17_zh_en valid 1996 examples
| distributed init (rank 3): tcp://localhost:18727
Namespace(adam_betas='(0.9, 0.98)', adam_eps=1e-09, adaptive_softmax_cutoff=None, adaptive_softmax_dropout=0, arch='transformer', attention_dropout=0.0, bucket_cap_mb=25, clip_norm=25, cpu=False, criterion='label_smoothed_cross_entropy', data=['data-bin/wmt17_zh_en'], ddp_backend='c10d', decoder_attention_heads=8, decoder_embed_dim=512, decoder_embed_path=None, decoder_ffn_embed_dim=2048, decoder_input_dim=512, decoder_layers=6, decoder_learned_pos=False, decoder_normalize_before=False, decoder_output_dim=512, device_id=2, distributed_backend='nccl', distributed_init_method='tcp://localhost:18727', distributed_port=-1, distributed_rank=2, distributed_world_size=6, dropout=0.3, encoder_attention_heads=8, encoder_embed_dim=512, encoder_embed_path=None, encoder_ffn_embed_dim=2048, encoder_layers=6, encoder_learned_pos=False, encoder_normalize_before=False, fix_batches_to_gpus=False, fp16=False, fp16_init_scale=128, fp16_scale_tolerance=0.0, fp16_scale_window=None, keep_interval_updates=-1, keep_last_epochs=20, label_smoothing=0.1, lazy_load=False, left_pad_source='True', left_pad_target='False', log_format='json', log_interval=1000, lr=[0.001], lr_scheduler='inverse_sqrt', lr_shrink=0.1, max_epoch=0, max_sentences=None, max_sentences_valid=None, max_source_positions=1024, max_target_positions=1024, max_tokens=4000, max_update=400000, memory_efficient_fp16=False, min_loss_scale=0.0001, min_lr=1e-09, momentum=0.99, no_epoch_checkpoints=False, no_progress_bar=False, no_save=False, no_token_positional_embeddings=False, num_workers=0, optimizer='adam', optimizer_overrides='{}', raw_text=False, relu_dropout=0.0, reset_lr_scheduler=False, reset_optimizer=False, restore_file='checkpoint_last.pt', save_dir='checkpoints2', save_interval=1, save_interval_updates=0, seed=1, sentence_avg=False, share_all_embeddings=False, share_decoder_input_output_embed=False, skip_invalid_size_inputs_valid_test=False, source_lang='zh', target_lang='en', task='translation', train_subset='train', update_freq=[1], upsample_primary=1, user_dir=None, valid_subset='valid', validate_interval=1, warmup_init_lr=1e-07, warmup_updates=10000, weight_decay=0.0001)
| [zh] dictionary: 50080 types
| [en] dictionary: 33080 types
| data-bin/wmt17_zh_en train 17810380 examples
| data-bin/wmt17_zh_en valid 1996 examples
| distributed init (rank 2): tcp://localhost:18727
Namespace(adam_betas='(0.9, 0.98)', adam_eps=1e-09, adaptive_softmax_cutoff=None, adaptive_softmax_dropout=0, arch='transformer', attention_dropout=0.0, bucket_cap_mb=25, clip_norm=25, cpu=False, criterion='label_smoothed_cross_entropy', data=['data-bin/wmt17_zh_en'], ddp_backend='c10d', decoder_attention_heads=8, decoder_embed_dim=512, decoder_embed_path=None, decoder_ffn_embed_dim=2048, decoder_input_dim=512, decoder_layers=6, decoder_learned_pos=False, decoder_normalize_before=False, decoder_output_dim=512, device_id=5, distributed_backend='nccl', distributed_init_method='tcp://localhost:18727', distributed_port=-1, distributed_rank=5, distributed_world_size=6, dropout=0.3, encoder_attention_heads=8, encoder_embed_dim=512, encoder_embed_path=None, encoder_ffn_embed_dim=2048, encoder_layers=6, encoder_learned_pos=False, encoder_normalize_before=False, fix_batches_to_gpus=False, fp16=False, fp16_init_scale=128, fp16_scale_tolerance=0.0, fp16_scale_window=None, keep_interval_updates=-1, keep_last_epochs=20, label_smoothing=0.1, lazy_load=False, left_pad_source='True', left_pad_target='False', log_format='json', log_interval=1000, lr=[0.001], lr_scheduler='inverse_sqrt', lr_shrink=0.1, max_epoch=0, max_sentences=None, max_sentences_valid=None, max_source_positions=1024, max_target_positions=1024, max_tokens=4000, max_update=400000, memory_efficient_fp16=False, min_loss_scale=0.0001, min_lr=1e-09, momentum=0.99, no_epoch_checkpoints=False, no_progress_bar=False, no_save=False, no_token_positional_embeddings=False, num_workers=0, optimizer='adam', optimizer_overrides='{}', raw_text=False, relu_dropout=0.0, reset_lr_scheduler=False, reset_optimizer=False, restore_file='checkpoint_last.pt', save_dir='checkpoints2', save_interval=1, save_interval_updates=0, seed=1, sentence_avg=False, share_all_embeddings=False, share_decoder_input_output_embed=False, skip_invalid_size_inputs_valid_test=False, source_lang='zh', target_lang='en', task='translation', train_subset='train', update_freq=[1], upsample_primary=1, user_dir=None, valid_subset='valid', validate_interval=1, warmup_init_lr=1e-07, warmup_updates=10000, weight_decay=0.0001)
| [zh] dictionary: 50080 types
| [en] dictionary: 33080 types
| data-bin/wmt17_zh_en train 17810380 examples
| data-bin/wmt17_zh_en valid 1996 examples
| distributed init (rank 5): tcp://localhost:18727
Namespace(adam_betas='(0.9, 0.98)', adam_eps=1e-09, adaptive_softmax_cutoff=None, adaptive_softmax_dropout=0, arch='transformer', attention_dropout=0.0, bucket_cap_mb=25, clip_norm=25, cpu=False, criterion='label_smoothed_cross_entropy', data=['data-bin/wmt17_zh_en'], ddp_backend='c10d', decoder_attention_heads=8, decoder_embed_dim=512, decoder_embed_path=None, decoder_ffn_embed_dim=2048, decoder_input_dim=512, decoder_layers=6, decoder_learned_pos=False, decoder_normalize_before=False, decoder_output_dim=512, device_id=4, distributed_backend='nccl', distributed_init_method='tcp://localhost:18727', distributed_port=-1, distributed_rank=4, distributed_world_size=6, dropout=0.3, encoder_attention_heads=8, encoder_embed_dim=512, encoder_embed_path=None, encoder_ffn_embed_dim=2048, encoder_layers=6, encoder_learned_pos=False, encoder_normalize_before=False, fix_batches_to_gpus=False, fp16=False, fp16_init_scale=128, fp16_scale_tolerance=0.0, fp16_scale_window=None, keep_interval_updates=-1, keep_last_epochs=20, label_smoothing=0.1, lazy_load=False, left_pad_source='True', left_pad_target='False', log_format='json', log_interval=1000, lr=[0.001], lr_scheduler='inverse_sqrt', lr_shrink=0.1, max_epoch=0, max_sentences=None, max_sentences_valid=None, max_source_positions=1024, max_target_positions=1024, max_tokens=4000, max_update=400000, memory_efficient_fp16=False, min_loss_scale=0.0001, min_lr=1e-09, momentum=0.99, no_epoch_checkpoints=False, no_progress_bar=False, no_save=False, no_token_positional_embeddings=False, num_workers=0, optimizer='adam', optimizer_overrides='{}', raw_text=False, relu_dropout=0.0, reset_lr_scheduler=False, reset_optimizer=False, restore_file='checkpoint_last.pt', save_dir='checkpoints2', save_interval=1, save_interval_updates=0, seed=1, sentence_avg=False, share_all_embeddings=False, share_decoder_input_output_embed=False, skip_invalid_size_inputs_valid_test=False, source_lang='zh', target_lang='en', task='translation', train_subset='train', update_freq=[1], upsample_primary=1, user_dir=None, valid_subset='valid', validate_interval=1, warmup_init_lr=1e-07, warmup_updates=10000, weight_decay=0.0001)
| [zh] dictionary: 50080 types
| [en] dictionary: 33080 types
| data-bin/wmt17_zh_en train 17810380 examples
| data-bin/wmt17_zh_en valid 1996 examples
| distributed init (rank 4): tcp://localhost:18727
| initialized host localhost.localdomain as rank 0
TransformerModel(
(encoder): TransformerEncoder(
(embed_tokens): Embedding(50080, 512, padding_idx=1)
(embed_positions): SinusoidalPositionalEmbedding()
(layers): ModuleList(
(0): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=512, out_features=512, bias=True)
)
(fc1): Linear(in_features=512, out_features=2048, bias=True)
(fc2): Linear(in_features=2048, out_features=512, bias=True)
(layer_norms): ModuleList(
(0): LayerNorm(torch.Size([512]), eps=1e-05, elementwise_affine=True)
(1): LayerNorm(torch.Size([512]), eps=1e-05, elementwise_affine=True)
)
)
(1): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=512, out_features=512, bias=True)
)
(fc1): Linear(in_features=512, out_features=2048, bias=True)
(fc2): Linear(in_features=2048, out_features=512, bias=True)
(layer_norms): ModuleList(
(0): LayerNorm(torch.Size([512]), eps=1e-05, elementwise_affine=True)
(1): LayerNorm(torch.Size([512]), eps=1e-05, elementwise_affine=True)
)
)
(2): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=512, out_features=512, bias=True)
)
(fc1): Linear(in_features=512, out_features=2048, bias=True)
(fc2): Linear(in_features=2048, out_features=512, bias=True)
(layer_norms): ModuleList(
(0): LayerNorm(torch.Size([512]), eps=1e-05, elementwise_affine=True)
(1): LayerNorm(torch.Size([512]), eps=1e-05, elementwise_affine=True)
)
)
(3): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=512, out_features=512, bias=True)
)
(fc1): Linear(in_features=512, out_features=2048, bias=True)
(fc2): Linear(in_features=2048, out_features=512, bias=True)
(layer_norms): ModuleList(
(0): LayerNorm(torch.Size([512]), eps=1e-05, elementwise_affine=True)
(1): LayerNorm(torch.Size([512]), eps=1e-05, elementwise_affine=True)
)
)
(4): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=512, out_features=512, bias=True)
)
(fc1): Linear(in_features=512, out_features=2048, bias=True)
(fc2): Linear(in_features=2048, out_features=512, bias=True)
(layer_norms): ModuleList(
(0): LayerNorm(torch.Size([512]), eps=1e-05, elementwise_affine=True)
(1): LayerNorm(torch.Size([512]), eps=1e-05, elementwise_affine=True)
)
)
(5): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=512, out_features=512, bias=True)
)
(fc1): Linear(in_features=512, out_features=2048, bias=True)
(fc2): Linear(in_features=2048, out_features=512, bias=True)
(layer_norms): ModuleList(
(0): LayerNorm(torch.Size([512]), eps=1e-05, elementwise_affine=True)
(1): LayerNorm(torch.Size([512]), eps=1e-05, elementwise_affine=True)
)
)
)
)
(decoder): TransformerDecoder(
(embed_tokens): Embedding(33080, 512, padding_idx=1)
(embed_positions): SinusoidalPositionalEmbedding()
(layers): ModuleList(
(0): TransformerDecoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=512, out_features=512, bias=True)
)
(self_attn_layer_norm): LayerNorm(torch.Size([512]), eps=1e-05, elementwise_affine=True)
(encoder_attn): MultiheadAttention(
(out_proj): Linear(in_features=512, out_features=512, bias=True)
)
(encoder_attn_layer_norm): LayerNorm(torch.Size([512]), eps=1e-05, elementwise_affine=True)
(fc1): Linear(in_features=512, out_features=2048, bias=True)
(fc2): Linear(in_features=2048, out_features=512, bias=True)
(final_layer_norm): LayerNorm(torch.Size([512]), eps=1e-05, elementwise_affine=True)
)
(1): TransformerDecoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=512, out_features=512, bias=True)
)
(self_attn_layer_norm): LayerNorm(torch.Size([512]), eps=1e-05, elementwise_affine=True)
(encoder_attn): MultiheadAttention(
(out_proj): Linear(in_features=512, out_features=512, bias=True)
)
(encoder_attn_layer_norm): LayerNorm(torch.Size([512]), eps=1e-05, elementwise_affine=True)
(fc1): Linear(in_features=512, out_features=2048, bias=True)
(fc2): Linear(in_features=2048, out_features=512, bias=True)
(final_layer_norm): LayerNorm(torch.Size([512]), eps=1e-05, elementwise_affine=True)
)
(2): TransformerDecoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=512, out_features=512, bias=True)
)
(self_attn_layer_norm): LayerNorm(torch.Size([512]), eps=1e-05, elementwise_affine=True)
(encoder_attn): MultiheadAttention(
(out_proj): Linear(in_features=512, out_features=512, bias=True)
)
(encoder_attn_layer_norm): LayerNorm(torch.Size([512]), eps=1e-05, elementwise_affine=True)
(fc1): Linear(in_features=512, out_features=2048, bias=True)
(fc2): Linear(in_features=2048, out_features=512, bias=True)
(final_layer_norm): LayerNorm(torch.Size([512]), eps=1e-05, elementwise_affine=True)
)
(3): TransformerDecoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=512, out_features=512, bias=True)
)
(self_attn_layer_norm): LayerNorm(torch.Size([512]), eps=1e-05, elementwise_affine=True)
(encoder_attn): MultiheadAttention(
(out_proj): Linear(in_features=512, out_features=512, bias=True)
)
(encoder_attn_layer_norm): LayerNorm(torch.Size([512]), eps=1e-05, elementwise_affine=True)
(fc1): Linear(in_features=512, out_features=2048, bias=True)
(fc2): Linear(in_features=2048, out_features=512, bias=True)
(final_layer_norm): LayerNorm(torch.Size([512]), eps=1e-05, elementwise_affine=True)
)
(4): TransformerDecoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=512, out_features=512, bias=True)
)
(self_attn_layer_norm): LayerNorm(torch.Size([512]), eps=1e-05, elementwise_affine=True)
(encoder_attn): MultiheadAttention(
(out_proj): Linear(in_features=512, out_features=512, bias=True)
)
(encoder_attn_layer_norm): LayerNorm(torch.Size([512]), eps=1e-05, elementwise_affine=True)
(fc1): Linear(in_features=512, out_features=2048, bias=True)
(fc2): Linear(in_features=2048, out_features=512, bias=True)
(final_layer_norm): LayerNorm(torch.Size([512]), eps=1e-05, elementwise_affine=True)
)
(5): TransformerDecoderLayer(
(self_attn): MultiheadAttention(
(out_proj): Linear(in_features=512, out_features=512, bias=True)
)
(self_attn_layer_norm): LayerNorm(torch.Size([512]), eps=1e-05, elementwise_affine=True)
(encoder_attn): MultiheadAttention(
(out_proj): Linear(in_features=512, out_features=512, bias=True)
)
(encoder_attn_layer_norm): LayerNorm(torch.Size([512]), eps=1e-05, elementwise_affine=True)
(fc1): Linear(in_features=512, out_features=2048, bias=True)
(fc2): Linear(in_features=2048, out_features=512, bias=True)
(final_layer_norm): LayerNorm(torch.Size([512]), eps=1e-05, elementwise_affine=True)
)
)
)
)
| model transformer, criterion LabelSmoothedCrossEntropyCriterion
| num. model params: 103653376 (num. trained: 103653376)
| training on 6 GPUs
| max tokens per GPU = 4000 and max sentences per GPU = None
| no existing checkpoint found checkpoints2/checkpoint_last.pt
{"epoch": 1, "update": 0.04648568240981778, "loss": "10.765", "nll_loss": "10.103", "ppl": "1100.13", "wps": 14901, "ups": "0.6", "wpb": 22246, "bsz": 825, "num_updates": 1001, "lr": 0.00010018999, "gnorm": "1.333", "clip": "0%", "oom": 0.0, "wall": 1683, "train_wall": 1468}
{"epoch": 1, "update": 0.09297136481963555, "loss": "9.637", "nll_loss": "8.792", "ppl": "443.26", "wps": 14914, "ups": "0.6", "wpb": 22223, "bsz": 831, "num_updates": 2001, "lr": 0.00020017999, "gnorm": "1.161", "clip": "0%", "oom": 0.0, "wall": 3170, "train_wall": 2928}
{"epoch": 1, "update": 0.13945704722945332, "loss": "8.884", "nll_loss": "7.918", "ppl": "241.92", "wps": 14911, "ups": "0.6", "wpb": 22231, "bsz": 832, "num_updates": 3001, "lr": 0.00030016999, "gnorm": "1.064", "clip": "0%", "oom": 0.0, "wall": 4662, "train_wall": 4393}
{"epoch": 1, "update": 0.1859427296392711, "loss": "8.257", "nll_loss": "7.193", "ppl": "146.37", "wps": 14915, "ups": "0.7", "wpb": 22224, "bsz": 828, "num_updates": 4001, "lr": 0.00040015999, "gnorm": "0.992", "clip": "0%", "oom": 0.0, "wall": 6150, "train_wall": 5854}
{"epoch": 1, "update": 0.23242841204908887, "loss": "7.725", "nll_loss": "6.580", "ppl": "95.65", "wps": 14916, "ups": "0.7", "wpb": 22224, "bsz": 830, "num_updates": 5001, "lr": 0.0005001499899999999, "gnorm": "0.929", "clip": "0%", "oom": 0.0, "wall": 7640, "train_wall": 7316}
{"epoch": 1, "update": 0.27891409445890664, "loss": "7.307", "nll_loss": "6.098", "ppl": "68.51", "wps": 14917, "ups": "0.7", "wpb": 22223, "bsz": 832, "num_updates": 6001, "lr": 0.0006001399899999999, "gnorm": "0.874", "clip": "0%", "oom": 0.0, "wall": 9128, "train_wall": 8777}
{"epoch": 1, "update": 0.32539977686872446, "loss": "6.976", "nll_loss": "5.720", "ppl": "52.70", "wps": 14923, "ups": "0.7", "wpb": 22220, "bsz": 830, "num_updates": 7001, "lr": 0.0007001299899999999, "gnorm": "0.830", "clip": "0%", "oom": 0.0, "wall": 10613, "train_wall": 10234}
{"epoch": 1, "update": 0.3718854592785422, "loss": "6.710", "nll_loss": "5.416", "ppl": "42.70", "wps": 14926, "ups": "0.7", "wpb": 22218, "bsz": 830, "num_updates": 8001, "lr": 0.0008001199899999999, "gnorm": "0.795", "clip": "0%", "oom": 0.0, "wall": 12098, "train_wall": 11692}
{"epoch": 1, "update": 0.41837114168836, "loss": "6.496", "nll_loss": "5.172", "ppl": "36.05", "wps": 14930, "ups": "0.7", "wpb": 22218, "bsz": 828, "num_updates": 9001, "lr": 0.0009001099899999999, "gnorm": "0.768", "clip": "0%", "oom": 0.0, "wall": 13582, "train_wall": 13149}
{"epoch": 1, "update": 0.46485682409817775, "loss": "6.317", "nll_loss": "4.969", "ppl": "31.32", "wps": 14931, "ups": "0.7", "wpb": 22216, "bsz": 829, "num_updates": 10001, "lr": 0.0009999500037496875, "gnorm": "0.746", "clip": "0%", "oom": 0.0, "wall": 15068, "train_wall": 14608}
{"epoch": 1, "update": 0.5113425065079955, "loss": "6.166", "nll_loss": "4.798", "ppl": "27.81", "wps": 14936, "ups": "0.7", "wpb": 22218, "bsz": 829, "num_updates": 11001, "lr": 0.0009534192529917125, "gnorm": "0.729", "clip": "0%", "oom": 0.0, "wall": 16553, "train_wall": 16066}
{"epoch": 1, "update": 0.5578281889178133, "loss": "6.036", "nll_loss": "4.650", "ppl": "25.10", "wps": 14936, "ups": "0.7", "wpb": 22216, "bsz": 828, "num_updates": 12001, "lr": 0.0009128328952636641, "gnorm": "0.711", "clip": "0%", "oom": 0.0, "wall": 18038, "train_wall": 17525}
{"epoch": 1, "update": 0.604313871327631, "loss": "5.921", "nll_loss": "4.520", "ppl": "22.94", "wps": 14936, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 13001, "lr": 0.0008770242882522966, "gnorm": "0.696", "clip": "0%", "oom": 0.0, "wall": 19523, "train_wall": 18983}
{"epoch": 1, "update": 0.6507995537374489, "loss": "5.820", "nll_loss": "4.405", "ppl": "21.19", "wps": 14936, "ups": "0.7", "wpb": 22210, "bsz": 828, "num_updates": 14001, "lr": 0.0008451240722648986, "gnorm": "0.681", "clip": "0%", "oom": 0.0, "wall": 21007, "train_wall": 20441}
{"epoch": 1, "update": 0.6972852361472667, "loss": "5.727", "nll_loss": "4.301", "ppl": "19.71", "wps": 14940, "ups": "0.7", "wpb": 22214, "bsz": 828, "num_updates": 15001, "lr": 0.0008164693657357804, "gnorm": "0.668", "clip": "0%", "oom": 0.0, "wall": 22492, "train_wall": 21898}
{"epoch": 1, "update": 0.7437709185570844, "loss": "5.646", "nll_loss": "4.209", "ppl": "18.49", "wps": 14941, "ups": "0.7", "wpb": 22214, "bsz": 827, "num_updates": 16001, "lr": 0.0007905447109058752, "gnorm": "0.656", "clip": "0%", "oom": 0.0, "wall": 23978, "train_wall": 23358}
{"epoch": 1, "update": 0.7902566009669022, "loss": "5.571", "nll_loss": "4.125", "ppl": "17.45", "wps": 14944, "ups": "0.7", "wpb": 22215, "bsz": 828, "num_updates": 17001, "lr": 0.0007669424320487289, "gnorm": "0.645", "clip": "0%", "oom": 0.0, "wall": 25461, "train_wall": 24814}
{"epoch": 1, "update": 0.83674228337672, "loss": "5.504", "nll_loss": "4.049", "ppl": "16.55", "wps": 14943, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 18001, "lr": 0.0007453352890294455, "gnorm": "0.634", "clip": "0%", "oom": 0.0, "wall": 26947, "train_wall": 26273}
{"epoch": 1, "update": 0.8832279657865377, "loss": "5.442", "nll_loss": "3.979", "ppl": "15.77", "wps": 14945, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 19001, "lr": 0.0007254571593833237, "gnorm": "0.625", "clip": "0%", "oom": 0.0, "wall": 28431, "train_wall": 27730}
{"epoch": 1, "update": 0.9297136481963555, "loss": "5.386", "nll_loss": "3.916", "ppl": "15.09", "wps": 14945, "ups": "0.7", "wpb": 22212, "bsz": 828, "num_updates": 20001, "lr": 0.0007070891041799029, "gnorm": "0.616", "clip": "0%", "oom": 0.0, "wall": 29914, "train_wall": 29187}
{"epoch": 1, "update": 0.9761993306061733, "loss": "5.333", "nll_loss": "3.857", "ppl": "14.49", "wps": 14946, "ups": "0.7", "wpb": 22212, "bsz": 828, "num_updates": 21001, "lr": 0.0006900491297967561, "gnorm": "0.608", "clip": "0%", "oom": 0.0, "wall": 31398, "train_wall": 30643}
{"epoch": 1, "loss": "5.308", "nll_loss": "3.829", "ppl": "14.21", "wps": 14946, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 21512, "lr": 0.0006818040951022353, "gnorm": "0.604", "clip": "0%", "oom": 0.0, "wall": 32159, "train_wall": 31391}
{"epoch": 1, "valid_loss": 4.981665227291914, "valid_nll_loss": 3.3787692669378373, "valid_ppl": "10.40", "num_updates": 21512}
{"epoch": 2, "update": 1.0464856824098179, "loss": "4.244", "nll_loss": "2.632", "ppl": "6.20", "wps": 14930, "ups": "0.6", "wpb": 22181, "bsz": 823, "num_updates": 22513, "lr": 0.0006664741574907017, "gnorm": "0.432", "clip": "0%", "oom": 0.0, "wall": 33699, "train_wall": 32850}
{"epoch": 2, "update": 1.0929713648196355, "loss": "4.248", "nll_loss": "2.636", "ppl": "6.22", "wps": 14950, "ups": "0.7", "wpb": 22209, "bsz": 829, "num_updates": 23513, "lr": 0.0006521477167095471, "gnorm": "0.440", "clip": "0%", "oom": 0.0, "wall": 35184, "train_wall": 34307}
{"epoch": 2, "update": 1.1394570472294534, "loss": "4.246", "nll_loss": "2.634", "ppl": "6.21", "wps": 14959, "ups": "0.7", "wpb": 22203, "bsz": 825, "num_updates": 24513, "lr": 0.0006387071345591755, "gnorm": "0.440", "clip": "0%", "oom": 0.0, "wall": 36666, "train_wall": 35762}
{"epoch": 2, "update": 1.185942729639271, "loss": "4.237", "nll_loss": "2.625", "ppl": "6.17", "wps": 14963, "ups": "0.7", "wpb": 22213, "bsz": 825, "num_updates": 25513, "lr": 0.0006260647262929375, "gnorm": "0.438", "clip": "0%", "oom": 0.0, "wall": 38152, "train_wall": 37219}
{"epoch": 2, "update": 1.232428412049089, "loss": "4.232", "nll_loss": "2.619", "ppl": "6.14", "wps": 14969, "ups": "0.7", "wpb": 22231, "bsz": 829, "num_updates": 26513, "lr": 0.0006141444960880203, "gnorm": "0.440", "clip": "0%", "oom": 0.0, "wall": 39639, "train_wall": 38678}
{"epoch": 2, "update": 1.2789140944589066, "loss": "4.226", "nll_loss": "2.613", "ppl": "6.12", "wps": 14972, "ups": "0.7", "wpb": 22235, "bsz": 828, "num_updates": 27513, "lr": 0.0006028802070343075, "gnorm": "0.437", "clip": "0%", "oom": 0.0, "wall": 41124, "train_wall": 40135}
{"epoch": 2, "update": 1.3253997768687245, "loss": "4.221", "nll_loss": "2.607", "ppl": "6.09", "wps": 14970, "ups": "0.7", "wpb": 22236, "bsz": 829, "num_updates": 28513, "lr": 0.0005922138268464933, "gnorm": "0.436", "clip": "0%", "oom": 0.0, "wall": 42611, "train_wall": 41594}
{"epoch": 2, "update": 1.371885459278542, "loss": "4.216", "nll_loss": "2.602", "ppl": "6.07", "wps": 14971, "ups": "0.7", "wpb": 22236, "bsz": 830, "num_updates": 29513, "lr": 0.0005820942658080266, "gnorm": "0.434", "clip": "0%", "oom": 0.0, "wall": 44095, "train_wall": 43050}
{"epoch": 2, "update": 1.41837114168836, "loss": "4.212", "nll_loss": "2.597", "ppl": "6.05", "wps": 14963, "ups": "0.7", "wpb": 22230, "bsz": 829, "num_updates": 30513, "lr": 0.0005724763441561227, "gnorm": "0.433", "clip": "0%", "oom": 0.0, "wall": 45584, "train_wall": 44513}
{"epoch": 2, "update": 1.4648568240981779, "loss": "4.206", "nll_loss": "2.591", "ppl": "6.03", "wps": 14962, "ups": "0.7", "wpb": 22229, "bsz": 829, "num_updates": 31513, "lr": 0.0005633199411868324, "gnorm": "0.431", "clip": "0%", "oom": 0.0, "wall": 47070, "train_wall": 45972}
{"epoch": 2, "update": 1.5113425065079955, "loss": "4.201", "nll_loss": "2.586", "ppl": "6.01", "wps": 14963, "ups": "0.7", "wpb": 22226, "bsz": 828, "num_updates": 32513, "lr": 0.0005545892894569057, "gnorm": "0.430", "clip": "0%", "oom": 0.0, "wall": 48553, "train_wall": 47427}
{"epoch": 2, "update": 1.5578281889178132, "loss": "4.197", "nll_loss": "2.582", "ppl": "5.99", "wps": 14960, "ups": "0.7", "wpb": 22219, "bsz": 828, "num_updates": 33513, "lr": 0.0005462523857198285, "gnorm": "0.429", "clip": "0%", "oom": 0.0, "wall": 50036, "train_wall": 48884}
{"epoch": 2, "update": 1.604313871327631, "loss": "4.192", "nll_loss": "2.576", "ppl": "5.96", "wps": 14960, "ups": "0.7", "wpb": 22217, "bsz": 828, "num_updates": 34513, "lr": 0.0005382804964439719, "gnorm": "0.428", "clip": "0%", "oom": 0.0, "wall": 51520, "train_wall": 50341}
{"epoch": 2, "update": 1.650799553737449, "loss": "4.188", "nll_loss": "2.572", "ppl": "5.95", "wps": 14960, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 35513, "lr": 0.0005306477404737652, "gnorm": "0.428", "clip": "0%", "oom": 0.0, "wall": 53002, "train_wall": 51796}
{"epoch": 2, "update": 1.6972852361472666, "loss": "4.184", "nll_loss": "2.567", "ppl": "5.92", "wps": 14960, "ups": "0.7", "wpb": 22218, "bsz": 827, "num_updates": 36513, "lr": 0.0005233307350026844, "gnorm": "0.427", "clip": "0%", "oom": 0.0, "wall": 54491, "train_wall": 53259}
{"epoch": 2, "update": 1.7437709185570844, "loss": "4.179", "nll_loss": "2.562", "ppl": "5.91", "wps": 14961, "ups": "0.7", "wpb": 22220, "bsz": 828, "num_updates": 37513, "lr": 0.000516308293811482, "gnorm": "0.426", "clip": "0%", "oom": 0.0, "wall": 55976, "train_wall": 54716}
{"epoch": 2, "update": 1.7902566009669023, "loss": "4.175", "nll_loss": "2.557", "ppl": "5.89", "wps": 14956, "ups": "0.7", "wpb": 22215, "bsz": 828, "num_updates": 38513, "lr": 0.0005095611688909214, "gnorm": "0.426", "clip": "0%", "oom": 0.0, "wall": 57465, "train_wall": 56178}
{"epoch": 2, "update": 1.83674228337672, "loss": "4.171", "nll_loss": "2.553", "ppl": "5.87", "wps": 14956, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 39513, "lr": 0.0005030718282651066, "gnorm": "0.426", "clip": "0%", "oom": 0.0, "wall": 58949, "train_wall": 57634}
{"epoch": 2, "update": 1.8832279657865376, "loss": "4.168", "nll_loss": "2.549", "ppl": "5.85", "wps": 14954, "ups": "0.7", "wpb": 22211, "bsz": 828, "num_updates": 40513, "lr": 0.0004968242641700102, "gnorm": "0.425", "clip": "0%", "oom": 0.0, "wall": 60434, "train_wall": 59092}
{"epoch": 2, "update": 1.9297136481963555, "loss": "4.165", "nll_loss": "2.546", "ppl": "5.84", "wps": 14956, "ups": "0.7", "wpb": 22214, "bsz": 828, "num_updates": 41513, "lr": 0.0004908038268045036, "gnorm": "0.425", "clip": "0%", "oom": 0.0, "wall": 61918, "train_wall": 60549}
{"epoch": 2, "update": 1.9761993306061734, "loss": "4.162", "nll_loss": "2.543", "ppl": "5.83", "wps": 14956, "ups": "0.7", "wpb": 22212, "bsz": 828, "num_updates": 42513, "lr": 0.0004849970797201256, "gnorm": "0.425", "clip": "0%", "oom": 0.0, "wall": 63401, "train_wall": 62005}
{"epoch": 2, "loss": "4.160", "nll_loss": "2.541", "ppl": "5.82", "wps": 14957, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 43024, "lr": 0.00048210829908754827, "gnorm": "0.424", "clip": "0%", "oom": 0.0, "wall": 64160, "train_wall": 62751}
{"epoch": 2, "valid_loss": 4.687946606567542, "valid_nll_loss": 3.063029011099045, "valid_ppl": "8.36", "num_updates": 43024, "best": 4.687946606567542}
{"epoch": 3, "update": 2.0464856824098177, "loss": "4.071", "nll_loss": "2.444", "ppl": "5.44", "wps": 14942, "ups": "0.6", "wpb": 22172, "bsz": 827, "num_updates": 44025, "lr": 0.00047659591728239155, "gnorm": "0.415", "clip": "0%", "oom": 0.0, "wall": 65742, "train_wall": 64209}
{"epoch": 3, "update": 2.0929713648196357, "loss": "4.066", "nll_loss": "2.437", "ppl": "5.42", "wps": 14940, "ups": "0.7", "wpb": 22204, "bsz": 837, "num_updates": 45025, "lr": 0.00047127362962627313, "gnorm": "0.413", "clip": "0%", "oom": 0.0, "wall": 67231, "train_wall": 65670}
{"epoch": 3, "update": 2.1394570472294534, "loss": "4.065", "nll_loss": "2.437", "ppl": "5.41", "wps": 14953, "ups": "0.7", "wpb": 22213, "bsz": 832, "num_updates": 46025, "lr": 0.0004661257567175434, "gnorm": "0.414", "clip": "0%", "oom": 0.0, "wall": 68715, "train_wall": 67126}
{"epoch": 3, "update": 2.185942729639271, "loss": "4.063", "nll_loss": "2.434", "ppl": "5.40", "wps": 14948, "ups": "0.7", "wpb": 22218, "bsz": 830, "num_updates": 47025, "lr": 0.0004611429759107165, "gnorm": "0.413", "clip": "0%", "oom": 0.0, "wall": 70203, "train_wall": 68587}
{"epoch": 3, "update": 2.2324284120490887, "loss": "4.063", "nll_loss": "2.435", "ppl": "5.41", "wps": 14949, "ups": "0.7", "wpb": 22209, "bsz": 832, "num_updates": 48025, "lr": 0.00045631664759627545, "gnorm": "0.414", "clip": "0%", "oom": 0.0, "wall": 71686, "train_wall": 70043}
{"epoch": 3, "update": 2.278914094458907, "loss": "4.063", "nll_loss": "2.434", "ppl": "5.41", "wps": 14951, "ups": "0.7", "wpb": 22209, "bsz": 830, "num_updates": 49025, "lr": 0.0004516387521771449, "gnorm": "0.415", "clip": "0%", "oom": 0.0, "wall": 73171, "train_wall": 71500}
{"epoch": 3, "update": 2.3253997768687245, "loss": "4.061", "nll_loss": "2.433", "ppl": "5.40", "wps": 14951, "ups": "0.7", "wpb": 22209, "bsz": 829, "num_updates": 50025, "lr": 0.0004471018340098959, "gnorm": "0.415", "clip": "0%", "oom": 0.0, "wall": 74656, "train_wall": 72958}
{"epoch": 3, "update": 2.371885459278542, "loss": "4.062", "nll_loss": "2.434", "ppl": "5.40", "wps": 14952, "ups": "0.7", "wpb": 22211, "bsz": 830, "num_updates": 51025, "lr": 0.0004426989514189718, "gnorm": "0.416", "clip": "0%", "oom": 0.0, "wall": 76142, "train_wall": 74417}
{"epoch": 3, "update": 2.41837114168836, "loss": "4.060", "nll_loss": "2.431", "ppl": "5.39", "wps": 14949, "ups": "0.7", "wpb": 22211, "bsz": 829, "num_updates": 52025, "lr": 0.0004384236320214145, "gnorm": "0.415", "clip": "0%", "oom": 0.0, "wall": 77630, "train_wall": 75877}
{"epoch": 3, "update": 2.464856824098178, "loss": "4.058", "nll_loss": "2.430", "ppl": "5.39", "wps": 14951, "ups": "0.7", "wpb": 22213, "bsz": 829, "num_updates": 53025, "lr": 0.0004342698327079474, "gnorm": "0.415", "clip": "0%", "oom": 0.0, "wall": 79115, "train_wall": 77335}
{"epoch": 3, "update": 2.5113425065079955, "loss": "4.057", "nll_loss": "2.428", "ppl": "5.38", "wps": 14954, "ups": "0.7", "wpb": 22217, "bsz": 828, "num_updates": 54025, "lr": 0.00043023190371751824, "gnorm": "0.418", "clip": "0%", "oom": 0.0, "wall": 80601, "train_wall": 78794}
{"epoch": 3, "update": 2.557828188917813, "loss": "4.056", "nll_loss": "2.427", "ppl": "5.38", "wps": 14956, "ups": "0.7", "wpb": 22218, "bsz": 829, "num_updates": 55025, "lr": 0.00042630455631948756, "gnorm": "0.418", "clip": "0%", "oom": 0.0, "wall": 82084, "train_wall": 80250}
{"epoch": 3, "update": 2.604313871327631, "loss": "4.054", "nll_loss": "2.425", "ppl": "5.37", "wps": 14955, "ups": "0.7", "wpb": 22219, "bsz": 829, "num_updates": 56025, "lr": 0.00042248283368298534, "gnorm": "0.417", "clip": "0%", "oom": 0.0, "wall": 83573, "train_wall": 81711}
{"epoch": 3, "update": 2.650799553737449, "loss": "4.053", "nll_loss": "2.425", "ppl": "5.37", "wps": 14958, "ups": "0.7", "wpb": 22222, "bsz": 829, "num_updates": 57025, "lr": 0.00041876208456850697, "gnorm": "0.417", "clip": "0%", "oom": 0.0, "wall": 85057, "train_wall": 83168}
{"epoch": 3, "update": 2.6972852361472666, "loss": "4.051", "nll_loss": "2.422", "ppl": "5.36", "wps": 14954, "ups": "0.7", "wpb": 22216, "bsz": 829, "num_updates": 58025, "lr": 0.0004151379395242018, "gnorm": "0.417", "clip": "0%", "oom": 0.0, "wall": 86542, "train_wall": 84626}
{"epoch": 3, "update": 2.743770918557084, "loss": "4.048", "nll_loss": "2.419", "ppl": "5.35", "wps": 14953, "ups": "0.7", "wpb": 22216, "bsz": 828, "num_updates": 59025, "lr": 0.0004116062893098295, "gnorm": "0.417", "clip": "0%", "oom": 0.0, "wall": 88029, "train_wall": 86086}
{"epoch": 3, "update": 2.7902566009669023, "loss": "4.047", "nll_loss": "2.418", "ppl": "5.34", "wps": 14953, "ups": "0.7", "wpb": 22216, "bsz": 828, "num_updates": 60025, "lr": 0.0004081632653061225, "gnorm": "0.416", "clip": "0%", "oom": 0.0, "wall": 89516, "train_wall": 87546}
{"epoch": 3, "update": 2.83674228337672, "loss": "4.046", "nll_loss": "2.416", "ppl": "5.34", "wps": 14950, "ups": "0.7", "wpb": 22214, "bsz": 828, "num_updates": 61025, "lr": 0.0004048052216971938, "gnorm": "0.416", "clip": "0%", "oom": 0.0, "wall": 91005, "train_wall": 89009}
{"epoch": 3, "update": 2.8832279657865376, "loss": "4.044", "nll_loss": "2.414", "ppl": "5.33", "wps": 14950, "ups": "0.7", "wpb": 22214, "bsz": 828, "num_updates": 62025, "lr": 0.0004015287192394123, "gnorm": "0.416", "clip": "0%", "oom": 0.0, "wall": 92490, "train_wall": 90467}
{"epoch": 3, "update": 2.9297136481963557, "loss": "4.043", "nll_loss": "2.413", "ppl": "5.33", "wps": 14950, "ups": "0.7", "wpb": 22215, "bsz": 828, "num_updates": 63025, "lr": 0.0003983305104524615, "gnorm": "0.416", "clip": "0%", "oom": 0.0, "wall": 93978, "train_wall": 91927}
{"epoch": 3, "update": 2.9761993306061734, "loss": "4.040", "nll_loss": "2.410", "ppl": "5.32", "wps": 14950, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 64025, "lr": 0.0003952075260876219, "gnorm": "0.416", "clip": "0%", "oom": 0.0, "wall": 95460, "train_wall": 93383}
{"epoch": 3, "loss": "4.039", "nll_loss": "2.409", "ppl": "5.31", "wps": 14950, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 64536, "lr": 0.0003936397778418647, "gnorm": "0.416", "clip": "0%", "oom": 0.0, "wall": 96220, "train_wall": 94129}
{"epoch": 3, "valid_loss": 4.574561594030522, "valid_nll_loss": 2.9353992588257456, "valid_ppl": "7.65", "num_updates": 64536, "best": 4.574561594030522}
{"epoch": 4, "update": 3.0464856824098177, "loss": "3.995", "nll_loss": "2.360", "ppl": "5.13", "wps": 14958, "ups": "0.6", "wpb": 22224, "bsz": 822, "num_updates": 65537, "lr": 0.0003906220198018669, "gnorm": "0.416", "clip": "0%", "oom": 0.0, "wall": 97788, "train_wall": 95587}
{"epoch": 4, "update": 3.0929713648196357, "loss": "3.990", "nll_loss": "2.355", "ppl": "5.11", "wps": 14945, "ups": "0.7", "wpb": 22198, "bsz": 828, "num_updates": 66537, "lr": 0.00038767553257795166, "gnorm": "0.418", "clip": "0%", "oom": 0.0, "wall": 99272, "train_wall": 97043}
{"epoch": 4, "update": 3.1394570472294534, "loss": "3.990", "nll_loss": "2.355", "ppl": "5.12", "wps": 14955, "ups": "0.7", "wpb": 22215, "bsz": 826, "num_updates": 67537, "lr": 0.00038479473164824144, "gnorm": "0.417", "clip": "0%", "oom": 0.0, "wall": 100758, "train_wall": 98501}
{"epoch": 4, "update": 3.185942729639271, "loss": "3.990", "nll_loss": "2.355", "ppl": "5.12", "wps": 14957, "ups": "0.7", "wpb": 22205, "bsz": 827, "num_updates": 68537, "lr": 0.0003819772121484441, "gnorm": "0.417", "clip": "0%", "oom": 0.0, "wall": 102240, "train_wall": 99956}
{"epoch": 4, "update": 3.2324284120490887, "loss": "3.991", "nll_loss": "2.356", "ppl": "5.12", "wps": 14955, "ups": "0.7", "wpb": 22205, "bsz": 826, "num_updates": 69537, "lr": 0.00037922069069971757, "gnorm": "0.417", "clip": "0%", "oom": 0.0, "wall": 103725, "train_wall": 101414}
{"epoch": 4, "update": 3.278914094458907, "loss": "3.993", "nll_loss": "2.358", "ppl": "5.13", "wps": 14955, "ups": "0.7", "wpb": 22201, "bsz": 826, "num_updates": 70537, "lr": 0.00037652299763040766, "gnorm": "0.417", "clip": "0%", "oom": 0.0, "wall": 105208, "train_wall": 102870}
{"epoch": 4, "update": 3.3253997768687245, "loss": "3.990", "nll_loss": "2.355", "ppl": "5.12", "wps": 14959, "ups": "0.7", "wpb": 22208, "bsz": 827, "num_updates": 71537, "lr": 0.0003738820697979342, "gnorm": "0.417", "clip": "0%", "oom": 0.0, "wall": 106694, "train_wall": 104328}
{"epoch": 4, "update": 3.371885459278542, "loss": "3.989", "nll_loss": "2.354", "ppl": "5.11", "wps": 14957, "ups": "0.7", "wpb": 22206, "bsz": 828, "num_updates": 72537, "lr": 0.0003712959439568582, "gnorm": "0.418", "clip": "0%", "oom": 0.0, "wall": 108179, "train_wall": 105786}
{"epoch": 4, "update": 3.41837114168836, "loss": "3.989", "nll_loss": "2.355", "ppl": "5.11", "wps": 14954, "ups": "0.7", "wpb": 22203, "bsz": 827, "num_updates": 73537, "lr": 0.00036876275062468444, "gnorm": "0.418", "clip": "0%", "oom": 0.0, "wall": 109665, "train_wall": 107244}
{"epoch": 4, "update": 3.464856824098178, "loss": "3.988", "nll_loss": "2.353", "ppl": "5.11", "wps": 14956, "ups": "0.7", "wpb": 22208, "bsz": 827, "num_updates": 74537, "lr": 0.0003662807084018454, "gnorm": "0.418", "clip": "0%", "oom": 0.0, "wall": 111151, "train_wall": 108703}
{"epoch": 4, "update": 3.5113425065079955, "loss": "3.986", "nll_loss": "2.351", "ppl": "5.10", "wps": 14956, "ups": "0.7", "wpb": 22209, "bsz": 827, "num_updates": 75537, "lr": 0.0003638481187066534, "gnorm": "0.418", "clip": "0%", "oom": 0.0, "wall": 112636, "train_wall": 110161}
{"epoch": 4, "update": 3.557828188917813, "loss": "3.987", "nll_loss": "2.352", "ppl": "5.10", "wps": 14956, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 76537, "lr": 0.0003614633608898598, "gnorm": "0.419", "clip": "0%", "oom": 0.0, "wall": 114124, "train_wall": 111621}
{"epoch": 4, "update": 3.604313871327631, "loss": "3.986", "nll_loss": "2.351", "ppl": "5.10", "wps": 14960, "ups": "0.7", "wpb": 22217, "bsz": 828, "num_updates": 77537, "lr": 0.00035912488769689554, "gnorm": "0.419", "clip": "0%", "oom": 0.0, "wall": 115607, "train_wall": 113078}
{"epoch": 4, "update": 3.650799553737449, "loss": "3.986", "nll_loss": "2.351", "ppl": "5.10", "wps": 14960, "ups": "0.7", "wpb": 22215, "bsz": 827, "num_updates": 78537, "lr": 0.0003568312210489231, "gnorm": "0.419", "clip": "0%", "oom": 0.0, "wall": 117092, "train_wall": 114535}
{"epoch": 4, "update": 3.6972852361472666, "loss": "3.986", "nll_loss": "2.351", "ppl": "5.10", "wps": 14958, "ups": "0.7", "wpb": 22212, "bsz": 828, "num_updates": 79537, "lr": 0.00035458094811656254, "gnorm": "0.421", "clip": "0%", "oom": 0.0, "wall": 118577, "train_wall": 115993}
{"epoch": 4, "update": 3.743770918557084, "loss": "3.985", "nll_loss": "2.350", "ppl": "5.10", "wps": 14958, "ups": "0.7", "wpb": 22212, "bsz": 828, "num_updates": 80537, "lr": 0.0003523727176625959, "gnorm": "0.422", "clip": "0%", "oom": 0.0, "wall": 120060, "train_wall": 117449}
{"epoch": 4, "update": 3.7902566009669023, "loss": "3.985", "nll_loss": "2.350", "ppl": "5.10", "wps": 14956, "ups": "0.7", "wpb": 22210, "bsz": 828, "num_updates": 81537, "lr": 0.00035020523663214, "gnorm": "0.422", "clip": "0%", "oom": 0.0, "wall": 121547, "train_wall": 118908}
{"epoch": 4, "update": 3.83674228337672, "loss": "3.984", "nll_loss": "2.350", "ppl": "5.10", "wps": 14956, "ups": "0.7", "wpb": 22210, "bsz": 829, "num_updates": 82537, "lr": 0.00034807726697073894, "gnorm": "0.422", "clip": "0%", "oom": 0.0, "wall": 123033, "train_wall": 120368}
{"epoch": 4, "update": 3.8832279657865376, "loss": "3.984", "nll_loss": "2.349", "ppl": "5.09", "wps": 14958, "ups": "0.7", "wpb": 22213, "bsz": 829, "num_updates": 83537, "lr": 0.00034598762265258804, "gnorm": "0.422", "clip": "0%", "oom": 0.0, "wall": 124518, "train_wall": 121825}
{"epoch": 4, "update": 3.9297136481963557, "loss": "3.983", "nll_loss": "2.348", "ppl": "5.09", "wps": 14960, "ups": "0.7", "wpb": 22215, "bsz": 829, "num_updates": 84537, "lr": 0.00034393516690268294, "gnorm": "0.422", "clip": "0%", "oom": 0.0, "wall": 126001, "train_wall": 123282}
{"epoch": 4, "update": 3.9761993306061734, "loss": "3.982", "nll_loss": "2.347", "ppl": "5.09", "wps": 14960, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 85537, "lr": 0.00034191880959811454, "gnorm": "0.422", "clip": "0%", "oom": 0.0, "wall": 127484, "train_wall": 124737}
{"epoch": 4, "loss": "3.982", "nll_loss": "2.347", "ppl": "5.09", "wps": 14959, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 86048, "lr": 0.00034090204755111764, "gnorm": "0.422", "clip": "0%", "oom": 0.0, "wall": 128244, "train_wall": 125483}
{"epoch": 4, "valid_loss": 4.502153538618683, "valid_nll_loss": 2.8590988995289672, "valid_ppl": "7.26", "num_updates": 86048, "best": 4.502153538618683}
{"epoch": 5, "update": 4.046485682409818, "loss": "3.956", "nll_loss": "2.318", "ppl": "4.99", "wps": 14950, "ups": "0.6", "wpb": 22179, "bsz": 824, "num_updates": 87049, "lr": 0.000338936317662371, "gnorm": "0.428", "clip": "0%", "oom": 0.0, "wall": 129808, "train_wall": 126941}
{"epoch": 5, "update": 4.092971364819635, "loss": "3.959", "nll_loss": "2.322", "ppl": "5.00", "wps": 14954, "ups": "0.7", "wpb": 22210, "bsz": 824, "num_updates": 88049, "lr": 0.00033700611872178733, "gnorm": "0.428", "clip": "0%", "oom": 0.0, "wall": 131295, "train_wall": 128400}
{"epoch": 5, "update": 4.139457047229453, "loss": "3.957", "nll_loss": "2.320", "ppl": "4.99", "wps": 14936, "ups": "0.7", "wpb": 22199, "bsz": 828, "num_updates": 89049, "lr": 0.00033510852529462765, "gnorm": "0.428", "clip": "0%", "oom": 0.0, "wall": 132784, "train_wall": 129861}
{"epoch": 5, "update": 4.1859427296392715, "loss": "3.955", "nll_loss": "2.317", "ppl": "4.98", "wps": 14933, "ups": "0.7", "wpb": 22186, "bsz": 826, "num_updates": 90049, "lr": 0.0003332426296282589, "gnorm": "0.428", "clip": "0%", "oom": 0.0, "wall": 134268, "train_wall": 131318}
{"epoch": 5, "update": 4.232428412049089, "loss": "3.954", "nll_loss": "2.316", "ppl": "4.98", "wps": 14946, "ups": "0.7", "wpb": 22207, "bsz": 827, "num_updates": 91049, "lr": 0.0003314075589617099, "gnorm": "0.428", "clip": "0%", "oom": 0.0, "wall": 135754, "train_wall": 132777}
{"epoch": 5, "update": 4.278914094458907, "loss": "3.951", "nll_loss": "2.313", "ppl": "4.97", "wps": 14941, "ups": "0.7", "wpb": 22206, "bsz": 827, "num_updates": 92049, "lr": 0.0003296024738103273, "gnorm": "0.428", "clip": "0%", "oom": 0.0, "wall": 137243, "train_wall": 134238}
{"epoch": 5, "update": 4.325399776868725, "loss": "3.950", "nll_loss": "2.312", "ppl": "4.97", "wps": 14934, "ups": "0.7", "wpb": 22203, "bsz": 827, "num_updates": 93049, "lr": 0.0003278265663520995, "gnorm": "0.428", "clip": "0%", "oom": 0.0, "wall": 138732, "train_wall": 135701}
{"epoch": 5, "update": 4.371885459278542, "loss": "3.949", "nll_loss": "2.311", "ppl": "4.96", "wps": 14939, "ups": "0.7", "wpb": 22210, "bsz": 828, "num_updates": 94049, "lr": 0.0003260790589086034, "gnorm": "0.432", "clip": "0%", "oom": 0.0, "wall": 140219, "train_wall": 137161}
{"epoch": 5, "update": 4.41837114168836, "loss": "3.949", "nll_loss": "2.310", "ppl": "4.96", "wps": 14939, "ups": "0.7", "wpb": 22210, "bsz": 828, "num_updates": 95049, "lr": 0.0003243592025140854, "gnorm": "0.432", "clip": "0%", "oom": 0.0, "wall": 141705, "train_wall": 138620}
{"epoch": 5, "update": 4.464856824098177, "loss": "3.948", "nll_loss": "2.310", "ppl": "4.96", "wps": 14941, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 96049, "lr": 0.0003226662755666963, "gnorm": "0.432", "clip": "0%", "oom": 0.0, "wall": 143192, "train_wall": 140081}
{"epoch": 5, "update": 4.5113425065079955, "loss": "3.948", "nll_loss": "2.310", "ppl": "4.96", "wps": 14940, "ups": "0.7", "wpb": 22210, "bsz": 827, "num_updates": 97049, "lr": 0.0003209995825563643, "gnorm": "0.432", "clip": "0%", "oom": 0.0, "wall": 144677, "train_wall": 141539}
{"epoch": 5, "update": 4.557828188917814, "loss": "3.947", "nll_loss": "2.309", "ppl": "4.95", "wps": 14936, "ups": "0.7", "wpb": 22206, "bsz": 827, "num_updates": 98049, "lr": 0.00031935845286421136, "gnorm": "0.432", "clip": "0%", "oom": 0.0, "wall": 146166, "train_wall": 143001}
{"epoch": 5, "update": 4.604313871327631, "loss": "3.947", "nll_loss": "2.309", "ppl": "4.96", "wps": 14938, "ups": "0.7", "wpb": 22203, "bsz": 826, "num_updates": 99049, "lr": 0.00031774223962880604, "gnorm": "0.432", "clip": "0%", "oom": 0.0, "wall": 147648, "train_wall": 144456}
{"epoch": 5, "update": 4.650799553737449, "loss": "3.947", "nll_loss": "2.309", "ppl": "4.96", "wps": 14938, "ups": "0.7", "wpb": 22204, "bsz": 827, "num_updates": 100049, "lr": 0.0003161503186749001, "gnorm": "0.434", "clip": "0%", "oom": 0.0, "wall": 149133, "train_wall": 145914}
{"epoch": 5, "update": 4.697285236147267, "loss": "3.949", "nll_loss": "2.311", "ppl": "4.96", "wps": 14940, "ups": "0.7", "wpb": 22206, "bsz": 827, "num_updates": 101049, "lr": 0.0003145820875006191, "gnorm": "0.434", "clip": "0%", "oom": 0.0, "wall": 150620, "train_wall": 147371}
{"epoch": 5, "update": 4.743770918557084, "loss": "3.947", "nll_loss": "2.309", "ppl": "4.96", "wps": 14939, "ups": "0.7", "wpb": 22206, "bsz": 827, "num_updates": 102049, "lr": 0.00031303696431937456, "gnorm": "0.434", "clip": "0%", "oom": 0.0, "wall": 152108, "train_wall": 148833}
{"epoch": 5, "update": 4.790256600966902, "loss": "3.947", "nll_loss": "2.309", "ppl": "4.96", "wps": 14941, "ups": "0.7", "wpb": 22207, "bsz": 828, "num_updates": 103049, "lr": 0.0003115143871530373, "gnorm": "0.434", "clip": "0%", "oom": 0.0, "wall": 153591, "train_wall": 150288}
{"epoch": 5, "update": 4.83674228337672, "loss": "3.946", "nll_loss": "2.308", "ppl": "4.95", "wps": 14946, "ups": "0.7", "wpb": 22211, "bsz": 828, "num_updates": 104049, "lr": 0.0003100138129731624, "gnorm": "0.434", "clip": "0%", "oom": 0.0, "wall": 155075, "train_wall": 151744}
{"epoch": 5, "update": 4.883227965786538, "loss": "3.946", "nll_loss": "2.308", "ppl": "4.95", "wps": 14947, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 105049, "lr": 0.0003085347168872849, "gnorm": "0.434", "clip": "0%", "oom": 0.0, "wall": 156561, "train_wall": 153203}
{"epoch": 5, "update": 4.929713648196356, "loss": "3.945", "nll_loss": "2.307", "ppl": "4.95", "wps": 14950, "ups": "0.7", "wpb": 22216, "bsz": 827, "num_updates": 106049, "lr": 0.00030707659136751717, "gnorm": "0.434", "clip": "0%", "oom": 0.0, "wall": 158045, "train_wall": 154660}
{"epoch": 5, "update": 4.976199330606173, "loss": "3.946", "nll_loss": "2.308", "ppl": "4.95", "wps": 14949, "ups": "0.7", "wpb": 22214, "bsz": 828, "num_updates": 107049, "lr": 0.00030563894551887365, "gnorm": "0.435", "clip": "0%", "oom": 0.0, "wall": 159532, "train_wall": 156121}
{"epoch": 5, "loss": "3.946", "nll_loss": "2.308", "ppl": "4.95", "wps": 14948, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 107560, "lr": 0.0003049120607972659, "gnorm": "0.435", "clip": "0%", "oom": 0.0, "wall": 160290, "train_wall": 156865}
{"epoch": 5, "valid_loss": 4.484407378344848, "valid_nll_loss": 2.845150428240601, "valid_ppl": "7.19", "num_updates": 107560, "best": 4.484407378344848}
{"epoch": 6, "update": 5.046485682409818, "loss": "3.911", "nll_loss": "2.269", "ppl": "4.82", "wps": 14914, "ups": "0.6", "wpb": 22143, "bsz": 826, "num_updates": 108561, "lr": 0.00030350306581755984, "gnorm": "0.439", "clip": "0%", "oom": 0.0, "wall": 161870, "train_wall": 158323}
{"epoch": 6, "update": 5.092971364819635, "loss": "3.913", "nll_loss": "2.270", "ppl": "4.82", "wps": 14941, "ups": "0.7", "wpb": 22211, "bsz": 829, "num_updates": 109561, "lr": 0.00030211480362537764, "gnorm": "0.438", "clip": "0%", "oom": 0.0, "wall": 163359, "train_wall": 159784}
{"epoch": 6, "update": 5.139457047229453, "loss": "3.913", "nll_loss": "2.271", "ppl": "4.82", "wps": 14947, "ups": "0.7", "wpb": 22212, "bsz": 826, "num_updates": 110561, "lr": 0.0003007454190722064, "gnorm": "0.438", "clip": "0%", "oom": 0.0, "wall": 164844, "train_wall": 161242}
{"epoch": 6, "update": 5.1859427296392715, "loss": "3.916", "nll_loss": "2.275", "ppl": "4.84", "wps": 14943, "ups": "0.7", "wpb": 22212, "bsz": 829, "num_updates": 111561, "lr": 0.00029939448816886837, "gnorm": "0.439", "clip": "0%", "oom": 0.0, "wall": 166331, "train_wall": 162703}
{"epoch": 6, "update": 5.232428412049089, "loss": "3.921", "nll_loss": "2.280", "ppl": "4.86", "wps": 14945, "ups": "0.7", "wpb": 22214, "bsz": 829, "num_updates": 112561, "lr": 0.0002980616001393577, "gnorm": "0.441", "clip": "0%", "oom": 0.0, "wall": 167817, "train_wall": 164162}
{"epoch": 6, "update": 5.278914094458907, "loss": "3.919", "nll_loss": "2.278", "ppl": "4.85", "wps": 14947, "ups": "0.7", "wpb": 22217, "bsz": 828, "num_updates": 113561, "lr": 0.0002967463568960866, "gnorm": "0.441", "clip": "0%", "oom": 0.0, "wall": 169303, "train_wall": 165622}
{"epoch": 6, "update": 5.325399776868725, "loss": "3.920", "nll_loss": "2.279", "ppl": "4.85", "wps": 14951, "ups": "0.7", "wpb": 22215, "bsz": 826, "num_updates": 114561, "lr": 0.0002954483725403805, "gnorm": "0.441", "clip": "0%", "oom": 0.0, "wall": 170786, "train_wall": 167078}
{"epoch": 6, "update": 5.371885459278542, "loss": "3.923", "nll_loss": "2.282", "ppl": "4.86", "wps": 14950, "ups": "0.7", "wpb": 22210, "bsz": 827, "num_updates": 115561, "lr": 0.00029416727288679873, "gnorm": "0.441", "clip": "0%", "oom": 0.0, "wall": 172270, "train_wall": 168534}
{"epoch": 6, "update": 5.41837114168836, "loss": "3.924", "nll_loss": "2.284", "ppl": "4.87", "wps": 14954, "ups": "0.7", "wpb": 22211, "bsz": 827, "num_updates": 116561, "lr": 0.0002929026950099488, "gnorm": "0.442", "clip": "0%", "oom": 0.0, "wall": 173753, "train_wall": 169991}
{"epoch": 6, "update": 5.464856824098177, "loss": "3.923", "nll_loss": "2.283", "ppl": "4.87", "wps": 14954, "ups": "0.7", "wpb": 22216, "bsz": 828, "num_updates": 117561, "lr": 0.0002916542868125485, "gnorm": "0.441", "clip": "0%", "oom": 0.0, "wall": 175241, "train_wall": 171451}
{"epoch": 6, "update": 5.5113425065079955, "loss": "3.923", "nll_loss": "2.282", "ppl": "4.86", "wps": 14957, "ups": "0.7", "wpb": 22220, "bsz": 828, "num_updates": 118561, "lr": 0.00029042170661356813, "gnorm": "0.442", "clip": "0%", "oom": 0.0, "wall": 176727, "train_wall": 172910}
{"epoch": 6, "update": 5.557828188917814, "loss": "3.923", "nll_loss": "2.283", "ppl": "4.87", "wps": 14957, "ups": "0.7", "wpb": 22221, "bsz": 828, "num_updates": 119561, "lr": 0.00028920462275535883, "gnorm": "0.442", "clip": "0%", "oom": 0.0, "wall": 178213, "train_wall": 174369}
{"epoch": 6, "update": 5.604313871327631, "loss": "3.922", "nll_loss": "2.282", "ppl": "4.86", "wps": 14954, "ups": "0.7", "wpb": 22217, "bsz": 829, "num_updates": 120561, "lr": 0.00028800271322874124, "gnorm": "0.442", "clip": "0%", "oom": 0.0, "wall": 179699, "train_wall": 175827}
{"epoch": 6, "update": 5.650799553737449, "loss": "3.922", "nll_loss": "2.281", "ppl": "4.86", "wps": 14952, "ups": "0.7", "wpb": 22218, "bsz": 829, "num_updates": 121561, "lr": 0.00028681566531509273, "gnorm": "0.442", "clip": "0%", "oom": 0.0, "wall": 181188, "train_wall": 177290}
{"epoch": 6, "update": 5.697285236147267, "loss": "3.923", "nll_loss": "2.283", "ppl": "4.87", "wps": 14952, "ups": "0.7", "wpb": 22216, "bsz": 829, "num_updates": 122561, "lr": 0.0002856431752445285, "gnorm": "0.442", "clip": "0%", "oom": 0.0, "wall": 182673, "train_wall": 178748}
{"epoch": 6, "update": 5.743770918557084, "loss": "3.923", "nll_loss": "2.283", "ppl": "4.87", "wps": 14952, "ups": "0.7", "wpb": 22215, "bsz": 829, "num_updates": 123561, "lr": 0.00028448494786932973, "gnorm": "0.442", "clip": "0%", "oom": 0.0, "wall": 184157, "train_wall": 180205}
{"epoch": 6, "update": 5.790256600966902, "loss": "3.922", "nll_loss": "2.282", "ppl": "4.86", "wps": 14953, "ups": "0.7", "wpb": 22215, "bsz": 828, "num_updates": 124561, "lr": 0.00028334069635182037, "gnorm": "0.443", "clip": "0%", "oom": 0.0, "wall": 185641, "train_wall": 181662}
{"epoch": 6, "update": 5.83674228337672, "loss": "3.922", "nll_loss": "2.282", "ppl": "4.86", "wps": 14954, "ups": "0.7", "wpb": 22216, "bsz": 828, "num_updates": 125561, "lr": 0.0002822101418659437, "gnorm": "0.443", "clip": "0%", "oom": 0.0, "wall": 187126, "train_wall": 183121}
{"epoch": 6, "update": 5.883227965786538, "loss": "3.922", "nll_loss": "2.281", "ppl": "4.86", "wps": 14952, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 126561, "lr": 0.00028109301331183335, "gnorm": "0.443", "clip": "0%", "oom": 0.0, "wall": 188612, "train_wall": 184579}
{"epoch": 6, "update": 5.929713648196356, "loss": "3.921", "nll_loss": "2.281", "ppl": "4.86", "wps": 14952, "ups": "0.7", "wpb": 22215, "bsz": 828, "num_updates": 127561, "lr": 0.0002799890470427154, "gnorm": "0.443", "clip": "0%", "oom": 0.0, "wall": 190100, "train_wall": 186040}
{"epoch": 6, "update": 5.976199330606173, "loss": "3.921", "nll_loss": "2.281", "ppl": "4.86", "wps": 14951, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 128561, "lr": 0.00027889798660351705, "gnorm": "0.443", "clip": "0%", "oom": 0.0, "wall": 191585, "train_wall": 187499}
{"epoch": 6, "loss": "3.921", "nll_loss": "2.280", "ppl": "4.86", "wps": 14951, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 129072, "lr": 0.0002783453562567486, "gnorm": "0.443", "clip": "0%", "oom": 0.0, "wall": 192343, "train_wall": 188244}
{"epoch": 6, "valid_loss": 4.451280913636323, "valid_nll_loss": 2.811643267765514, "valid_ppl": "7.02", "num_updates": 129072, "best": 4.451280913636323}
{"epoch": 7, "update": 6.046485682409818, "loss": "3.881", "nll_loss": "2.235", "ppl": "4.71", "wps": 14947, "ups": "0.6", "wpb": 22245, "bsz": 826, "num_updates": 130073, "lr": 0.00027727225951940305, "gnorm": "0.446", "clip": "0%", "oom": 0.0, "wall": 193927, "train_wall": 189706}
{"epoch": 7, "update": 6.092971364819635, "loss": "3.895", "nll_loss": "2.252", "ppl": "4.76", "wps": 14954, "ups": "0.7", "wpb": 22239, "bsz": 829, "num_updates": 131073, "lr": 0.0002762125327358115, "gnorm": "0.447", "clip": "0%", "oom": 0.0, "wall": 195413, "train_wall": 191165}
{"epoch": 7, "update": 6.139457047229453, "loss": "3.897", "nll_loss": "2.254", "ppl": "4.77", "wps": 14950, "ups": "0.7", "wpb": 22226, "bsz": 829, "num_updates": 132073, "lr": 0.00027516486454411056, "gnorm": "0.448", "clip": "0%", "oom": 0.0, "wall": 196899, "train_wall": 192624}
{"epoch": 7, "update": 6.1859427296392715, "loss": "3.900", "nll_loss": "2.257", "ppl": "4.78", "wps": 14943, "ups": "0.7", "wpb": 22225, "bsz": 828, "num_updates": 133073, "lr": 0.00027412902797509085, "gnorm": "0.448", "clip": "0%", "oom": 0.0, "wall": 198388, "train_wall": 194086}
{"epoch": 7, "update": 6.232428412049089, "loss": "3.898", "nll_loss": "2.255", "ppl": "4.77", "wps": 14946, "ups": "0.7", "wpb": 22231, "bsz": 826, "num_updates": 134073, "lr": 0.0002731048019957472, "gnorm": "0.447", "clip": "0%", "oom": 0.0, "wall": 199876, "train_wall": 195546}
{"epoch": 7, "update": 6.278914094458907, "loss": "3.898", "nll_loss": "2.255", "ppl": "4.77", "wps": 14946, "ups": "0.7", "wpb": 22228, "bsz": 825, "num_updates": 135073, "lr": 0.00027209197131114276, "gnorm": "0.460", "clip": "0%", "oom": 0.0, "wall": 201362, "train_wall": 197005}
{"epoch": 7, "update": 6.325399776868725, "loss": "3.899", "nll_loss": "2.256", "ppl": "4.78", "wps": 14949, "ups": "0.7", "wpb": 22232, "bsz": 828, "num_updates": 136073, "lr": 0.00027109032617429697, "gnorm": "0.460", "clip": "0%", "oom": 0.0, "wall": 202850, "train_wall": 198465}
{"epoch": 7, "update": 6.371885459278542, "loss": "3.899", "nll_loss": "2.256", "ppl": "4.78", "wps": 14952, "ups": "0.7", "wpb": 22232, "bsz": 828, "num_updates": 137073, "lr": 0.00027009966220371503, "gnorm": "0.459", "clip": "0%", "oom": 0.0, "wall": 204334, "train_wall": 199923}
{"epoch": 7, "update": 6.41837114168836, "loss": "3.902", "nll_loss": "2.259", "ppl": "4.79", "wps": 14948, "ups": "0.7", "wpb": 22219, "bsz": 828, "num_updates": 138073, "lr": 0.0002691197802081998, "gnorm": "0.459", "clip": "0%", "oom": 0.0, "wall": 205817, "train_wall": 201378}
{"epoch": 7, "update": 6.464856824098177, "loss": "3.900", "nll_loss": "2.258", "ppl": "4.78", "wps": 14954, "ups": "0.7", "wpb": 22228, "bsz": 828, "num_updates": 139073, "lr": 0.0002681504860186043, "gnorm": "0.458", "clip": "0%", "oom": 0.0, "wall": 207303, "train_wall": 202837}
{"epoch": 7, "update": 6.5113425065079955, "loss": "3.901", "nll_loss": "2.259", "ppl": "4.79", "wps": 14957, "ups": "0.7", "wpb": 22228, "bsz": 829, "num_updates": 140073, "lr": 0.0002671915903262021, "gnorm": "0.458", "clip": "0%", "oom": 0.0, "wall": 208786, "train_wall": 204293}
{"epoch": 7, "update": 6.557828188917814, "loss": "3.902", "nll_loss": "2.260", "ppl": "4.79", "wps": 14958, "ups": "0.7", "wpb": 22226, "bsz": 829, "num_updates": 141073, "lr": 0.00026624290852737126, "gnorm": "0.457", "clip": "0%", "oom": 0.0, "wall": 210270, "train_wall": 205750}
{"epoch": 7, "update": 6.604313871327631, "loss": "3.902", "nll_loss": "2.260", "ppl": "4.79", "wps": 14955, "ups": "0.7", "wpb": 22220, "bsz": 828, "num_updates": 142073, "lr": 0.00026530426057430053, "gnorm": "0.457", "clip": "0%", "oom": 0.0, "wall": 211755, "train_wall": 207207}
{"epoch": 7, "update": 6.650799553737449, "loss": "3.902", "nll_loss": "2.259", "ppl": "4.79", "wps": 14953, "ups": "0.7", "wpb": 22220, "bsz": 829, "num_updates": 143073, "lr": 0.0002643754708314458, "gnorm": "0.457", "clip": "0%", "oom": 0.0, "wall": 213243, "train_wall": 208669}
{"epoch": 7, "update": 6.697285236147267, "loss": "3.901", "nll_loss": "2.259", "ppl": "4.79", "wps": 14952, "ups": "0.7", "wpb": 22221, "bsz": 829, "num_updates": 144073, "lr": 0.00026345636793747516, "gnorm": "0.456", "clip": "0%", "oom": 0.0, "wall": 214731, "train_wall": 210131}
{"epoch": 7, "update": 6.743770918557084, "loss": "3.901", "nll_loss": "2.259", "ppl": "4.79", "wps": 14949, "ups": "0.7", "wpb": 22219, "bsz": 828, "num_updates": 145073, "lr": 0.0002625467846724576, "gnorm": "0.456", "clip": "0%", "oom": 0.0, "wall": 216220, "train_wall": 211593}
{"epoch": 7, "update": 6.790256600966902, "loss": "3.902", "nll_loss": "2.260", "ppl": "4.79", "wps": 14947, "ups": "0.7", "wpb": 22218, "bsz": 828, "num_updates": 146073, "lr": 0.0002616465578300608, "gnorm": "0.457", "clip": "0%", "oom": 0.0, "wall": 217709, "train_wall": 213055}
{"epoch": 7, "update": 6.83674228337672, "loss": "3.902", "nll_loss": "2.260", "ppl": "4.79", "wps": 14947, "ups": "0.7", "wpb": 22217, "bsz": 828, "num_updates": 147073, "lr": 0.00026075552809453603, "gnorm": "0.457", "clip": "0%", "oom": 0.0, "wall": 219195, "train_wall": 214514}
{"epoch": 7, "update": 6.883227965786538, "loss": "3.902", "nll_loss": "2.260", "ppl": "4.79", "wps": 14946, "ups": "0.7", "wpb": 22216, "bsz": 828, "num_updates": 148073, "lr": 0.0002598735399222809, "gnorm": "0.456", "clip": "0%", "oom": 0.0, "wall": 220682, "train_wall": 215975}
{"epoch": 7, "update": 6.929713648196356, "loss": "3.903", "nll_loss": "2.261", "ppl": "4.79", "wps": 14942, "ups": "0.7", "wpb": 22211, "bsz": 828, "num_updates": 149073, "lr": 0.0002590004414277785, "gnorm": "0.456", "clip": "0%", "oom": 0.0, "wall": 222169, "train_wall": 217435}
{"epoch": 7, "update": 6.976199330606173, "loss": "3.902", "nll_loss": "2.259", "ppl": "4.79", "wps": 14941, "ups": "0.7", "wpb": 22212, "bsz": 828, "num_updates": 150073, "lr": 0.00025813608427372435, "gnorm": "0.456", "clip": "0%", "oom": 0.0, "wall": 223658, "train_wall": 218898}
{"epoch": 7, "loss": "3.902", "nll_loss": "2.259", "ppl": "4.79", "wps": 14941, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 150584, "lr": 0.0002576977255008494, "gnorm": "0.456", "clip": "0%", "oom": 0.0, "wall": 224418, "train_wall": 219644}
{"epoch": 7, "valid_loss": 4.413993167413645, "valid_nll_loss": 2.768014708562664, "valid_ppl": "6.81", "num_updates": 150584, "best": 4.413993167413645}
{"epoch": 8, "update": 7.046485682409818, "loss": "3.883", "nll_loss": "2.238", "ppl": "4.72", "wps": 14885, "ups": "0.6", "wpb": 22167, "bsz": 830, "num_updates": 151585, "lr": 0.0002568454555173652, "gnorm": "0.461", "clip": "0%", "oom": 0.0, "wall": 226005, "train_wall": 221107}
{"epoch": 8, "update": 7.092971364819635, "loss": "3.882", "nll_loss": "2.237", "ppl": "4.71", "wps": 14916, "ups": "0.7", "wpb": 22192, "bsz": 831, "num_updates": 152585, "lr": 0.0002560024248664526, "gnorm": "0.459", "clip": "0%", "oom": 0.0, "wall": 227491, "train_wall": 222565}
{"epoch": 8, "update": 7.139457047229453, "loss": "3.884", "nll_loss": "2.239", "ppl": "4.72", "wps": 14924, "ups": "0.7", "wpb": 22207, "bsz": 831, "num_updates": 153585, "lr": 0.00025516764120159333, "gnorm": "0.458", "clip": "0%", "oom": 0.0, "wall": 228980, "train_wall": 224026}
{"epoch": 8, "update": 7.1859427296392715, "loss": "3.885", "nll_loss": "2.241", "ppl": "4.73", "wps": 14925, "ups": "0.7", "wpb": 22209, "bsz": 830, "num_updates": 154585, "lr": 0.0002543409709329187, "gnorm": "0.460", "clip": "0%", "oom": 0.0, "wall": 230468, "train_wall": 225487}
{"epoch": 8, "update": 7.232428412049089, "loss": "3.884", "nll_loss": "2.239", "ppl": "4.72", "wps": 14922, "ups": "0.7", "wpb": 22204, "bsz": 831, "num_updates": 155585, "lr": 0.00025352228348062336, "gnorm": "0.462", "clip": "0%", "oom": 0.0, "wall": 231956, "train_wall": 226948}
{"epoch": 8, "update": 7.278914094458907, "loss": "3.885", "nll_loss": "2.241", "ppl": "4.73", "wps": 14929, "ups": "0.7", "wpb": 22203, "bsz": 828, "num_updates": 156585, "lr": 0.0002527114511883221, "gnorm": "0.462", "clip": "0%", "oom": 0.0, "wall": 233440, "train_wall": 228405}
{"epoch": 8, "update": 7.325399776868725, "loss": "3.886", "nll_loss": "2.242", "ppl": "4.73", "wps": 14929, "ups": "0.7", "wpb": 22205, "bsz": 828, "num_updates": 157585, "lr": 0.0002519083492394349, "gnorm": "0.461", "clip": "0%", "oom": 0.0, "wall": 234928, "train_wall": 229865}
{"epoch": 8, "update": 7.371885459278542, "loss": "3.888", "nll_loss": "2.244", "ppl": "4.74", "wps": 14933, "ups": "0.7", "wpb": 22208, "bsz": 827, "num_updates": 158585, "lr": 0.000251112855576477, "gnorm": "0.461", "clip": "0%", "oom": 0.0, "wall": 236414, "train_wall": 231323}
{"epoch": 8, "update": 7.41837114168836, "loss": "3.887", "nll_loss": "2.243", "ppl": "4.73", "wps": 14932, "ups": "0.7", "wpb": 22206, "bsz": 827, "num_updates": 159585, "lr": 0.0002503248508231347, "gnorm": "0.461", "clip": "0%", "oom": 0.0, "wall": 237900, "train_wall": 232783}
{"epoch": 8, "update": 7.464856824098177, "loss": "3.887", "nll_loss": "2.243", "ppl": "4.73", "wps": 14930, "ups": "0.7", "wpb": 22205, "bsz": 827, "num_updates": 160585, "lr": 0.0002495442182090142, "gnorm": "0.462", "clip": "0%", "oom": 0.0, "wall": 239389, "train_wall": 234245}
{"epoch": 8, "update": 7.5113425065079955, "loss": "3.887", "nll_loss": "2.243", "ppl": "4.73", "wps": 14932, "ups": "0.7", "wpb": 22208, "bsz": 828, "num_updates": 161585, "lr": 0.000248770843496956, "gnorm": "0.463", "clip": "0%", "oom": 0.0, "wall": 240876, "train_wall": 235704}
{"epoch": 8, "update": 7.557828188917814, "loss": "3.887", "nll_loss": "2.243", "ppl": "4.73", "wps": 14934, "ups": "0.7", "wpb": 22209, "bsz": 828, "num_updates": 162585, "lr": 0.0002480046149128118, "gnorm": "0.463", "clip": "0%", "oom": 0.0, "wall": 242362, "train_wall": 237163}
{"epoch": 8, "update": 7.604313871327631, "loss": "3.886", "nll_loss": "2.242", "ppl": "4.73", "wps": 14935, "ups": "0.7", "wpb": 22211, "bsz": 827, "num_updates": 163585, "lr": 0.0002472454230775865, "gnorm": "0.463", "clip": "0%", "oom": 0.0, "wall": 243850, "train_wall": 238624}
{"epoch": 8, "update": 7.650799553737449, "loss": "3.886", "nll_loss": "2.242", "ppl": "4.73", "wps": 14935, "ups": "0.7", "wpb": 22212, "bsz": 828, "num_updates": 164585, "lr": 0.00024649316094185026, "gnorm": "0.463", "clip": "0%", "oom": 0.0, "wall": 245337, "train_wall": 240085}
{"epoch": 8, "update": 7.697285236147267, "loss": "3.886", "nll_loss": "2.242", "ppl": "4.73", "wps": 14936, "ups": "0.7", "wpb": 22211, "bsz": 828, "num_updates": 165585, "lr": 0.0002457477237223337, "gnorm": "0.463", "clip": "0%", "oom": 0.0, "wall": 246822, "train_wall": 241543}
{"epoch": 8, "update": 7.743770918557084, "loss": "3.886", "nll_loss": "2.242", "ppl": "4.73", "wps": 14937, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 166585, "lr": 0.00024500900884061875, "gnorm": "0.463", "clip": "0%", "oom": 0.0, "wall": 248309, "train_wall": 243003}
{"epoch": 8, "update": 7.790256600966902, "loss": "3.887", "nll_loss": "2.243", "ppl": "4.73", "wps": 14940, "ups": "0.7", "wpb": 22215, "bsz": 827, "num_updates": 167585, "lr": 0.0002442769158638451, "gnorm": "0.463", "clip": "0%", "oom": 0.0, "wall": 249794, "train_wall": 244461}
{"epoch": 8, "update": 7.83674228337672, "loss": "3.886", "nll_loss": "2.242", "ppl": "4.73", "wps": 14939, "ups": "0.7", "wpb": 22214, "bsz": 827, "num_updates": 168585, "lr": 0.00024355134644735342, "gnorm": "0.463", "clip": "0%", "oom": 0.0, "wall": 251282, "train_wall": 245922}
{"epoch": 8, "update": 7.883227965786538, "loss": "3.886", "nll_loss": "2.242", "ppl": "4.73", "wps": 14941, "ups": "0.7", "wpb": 22215, "bsz": 827, "num_updates": 169585, "lr": 0.00024283220427919264, "gnorm": "0.463", "clip": "0%", "oom": 0.0, "wall": 252766, "train_wall": 247380}
{"epoch": 8, "update": 7.929713648196356, "loss": "3.886", "nll_loss": "2.242", "ppl": "4.73", "wps": 14940, "ups": "0.7", "wpb": 22214, "bsz": 828, "num_updates": 170585, "lr": 0.00024211939502641772, "gnorm": "0.463", "clip": "0%", "oom": 0.0, "wall": 254254, "train_wall": 248841}
{"epoch": 8, "update": 7.976199330606173, "loss": "3.886", "nll_loss": "2.242", "ppl": "4.73", "wps": 14941, "ups": "0.7", "wpb": 22214, "bsz": 828, "num_updates": 171585, "lr": 0.0002414128262831121, "gnorm": "0.463", "clip": "0%", "oom": 0.0, "wall": 255740, "train_wall": 250300}
{"epoch": 8, "loss": "3.886", "nll_loss": "2.243", "ppl": "4.73", "wps": 14940, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 172096, "lr": 0.00024105414954377413, "gnorm": "0.463", "clip": "0%", "oom": 0.0, "wall": 256499, "train_wall": 251046}
{"epoch": 8, "valid_loss": 4.422199736495138, "valid_nll_loss": 2.7700943348109104, "valid_ppl": "6.82", "num_updates": 172096, "best": 4.413993167413645}
{"epoch": 9, "update": 8.046485682409818, "loss": "3.858", "nll_loss": "2.211", "ppl": "4.63", "wps": 14929, "ups": "0.6", "wpb": 22207, "bsz": 817, "num_updates": 173097, "lr": 0.00024035614477879103, "gnorm": "0.462", "clip": "0%", "oom": 0.0, "wall": 258053, "train_wall": 252507}
{"epoch": 9, "update": 8.092971364819636, "loss": "3.861", "nll_loss": "2.214", "ppl": "4.64", "wps": 14937, "ups": "0.7", "wpb": 22219, "bsz": 816, "num_updates": 174097, "lr": 0.00023966485691490053, "gnorm": "0.476", "clip": "0%", "oom": 0.0, "wall": 259541, "train_wall": 253967}
{"epoch": 9, "update": 8.139457047229453, "loss": "3.867", "nll_loss": "2.221", "ppl": "4.66", "wps": 14924, "ups": "0.7", "wpb": 22202, "bsz": 819, "num_updates": 175097, "lr": 0.00023897949958081186, "gnorm": "0.473", "clip": "0%", "oom": 0.0, "wall": 261029, "train_wall": 255428}
{"epoch": 9, "update": 8.18594272963927, "loss": "3.872", "nll_loss": "2.226", "ppl": "4.68", "wps": 14927, "ups": "0.7", "wpb": 22200, "bsz": 821, "num_updates": 176097, "lr": 0.0002382999884623489, "gnorm": "0.473", "clip": "0%", "oom": 0.0, "wall": 262515, "train_wall": 256886}
{"epoch": 9, "update": 8.232428412049089, "loss": "3.871", "nll_loss": "2.226", "ppl": "4.68", "wps": 14930, "ups": "0.7", "wpb": 22193, "bsz": 822, "num_updates": 177097, "lr": 0.00023762624091401747, "gnorm": "0.472", "clip": "0%", "oom": 0.0, "wall": 263998, "train_wall": 258342}
{"epoch": 9, "update": 8.278914094458907, "loss": "3.871", "nll_loss": "2.226", "ppl": "4.68", "wps": 14937, "ups": "0.7", "wpb": 22199, "bsz": 824, "num_updates": 178097, "lr": 0.00023695817591678307, "gnorm": "0.471", "clip": "0%", "oom": 0.0, "wall": 265483, "train_wall": 259800}
{"epoch": 9, "update": 8.325399776868725, "loss": "3.872", "nll_loss": "2.226", "ppl": "4.68", "wps": 14940, "ups": "0.7", "wpb": 22203, "bsz": 826, "num_updates": 179097, "lr": 0.00023629571403714682, "gnorm": "0.471", "clip": "0%", "oom": 0.0, "wall": 266969, "train_wall": 261257}
{"epoch": 9, "update": 8.371885459278543, "loss": "3.875", "nll_loss": "2.229", "ppl": "4.69", "wps": 14938, "ups": "0.7", "wpb": 22200, "bsz": 827, "num_updates": 180097, "lr": 0.00023563877738747312, "gnorm": "0.472", "clip": "0%", "oom": 0.0, "wall": 268454, "train_wall": 262715}
{"epoch": 9, "update": 8.41837114168836, "loss": "3.875", "nll_loss": "2.230", "ppl": "4.69", "wps": 14946, "ups": "0.7", "wpb": 22203, "bsz": 828, "num_updates": 181097, "lr": 0.00023498728958752386, "gnorm": "0.472", "clip": "0%", "oom": 0.0, "wall": 269936, "train_wall": 264170}
{"epoch": 9, "update": 8.464856824098177, "loss": "3.875", "nll_loss": "2.230", "ppl": "4.69", "wps": 14944, "ups": "0.7", "wpb": 22203, "bsz": 829, "num_updates": 182097, "lr": 0.00023434117572715586, "gnorm": "0.472", "clip": "0%", "oom": 0.0, "wall": 271423, "train_wall": 265630}
{"epoch": 9, "update": 8.511342506507996, "loss": "3.873", "nll_loss": "2.228", "ppl": "4.68", "wps": 14945, "ups": "0.7", "wpb": 22207, "bsz": 829, "num_updates": 183097, "lr": 0.0002337003623301406, "gnorm": "0.471", "clip": "0%", "oom": 0.0, "wall": 272911, "train_wall": 267090}
{"epoch": 9, "update": 8.557828188917814, "loss": "3.873", "nll_loss": "2.228", "ppl": "4.69", "wps": 14943, "ups": "0.7", "wpb": 22205, "bsz": 829, "num_updates": 184097, "lr": 0.00023306477731906613, "gnorm": "0.471", "clip": "0%", "oom": 0.0, "wall": 274397, "train_wall": 268550}
{"epoch": 9, "update": 8.604313871327632, "loss": "3.874", "nll_loss": "2.228", "ppl": "4.69", "wps": 14945, "ups": "0.7", "wpb": 22207, "bsz": 829, "num_updates": 185097, "lr": 0.00023243434998128354, "gnorm": "0.472", "clip": "0%", "oom": 0.0, "wall": 275883, "train_wall": 270009}
{"epoch": 9, "update": 8.65079955373745, "loss": "3.874", "nll_loss": "2.228", "ppl": "4.69", "wps": 14943, "ups": "0.7", "wpb": 22206, "bsz": 828, "num_updates": 186097, "lr": 0.00023180901093586092, "gnorm": "0.472", "clip": "0%", "oom": 0.0, "wall": 277371, "train_wall": 271470}
{"epoch": 9, "update": 8.697285236147266, "loss": "3.873", "nll_loss": "2.228", "ppl": "4.69", "wps": 14944, "ups": "0.7", "wpb": 22205, "bsz": 827, "num_updates": 187097, "lr": 0.00023118869210151042, "gnorm": "0.472", "clip": "0%", "oom": 0.0, "wall": 278855, "train_wall": 272927}
{"epoch": 9, "update": 8.743770918557084, "loss": "3.875", "nll_loss": "2.230", "ppl": "4.69", "wps": 14945, "ups": "0.7", "wpb": 22204, "bsz": 827, "num_updates": 188097, "lr": 0.00023057332666545413, "gnorm": "0.472", "clip": "0%", "oom": 0.0, "wall": 280338, "train_wall": 274384}
{"epoch": 9, "update": 8.790256600966902, "loss": "3.875", "nll_loss": "2.230", "ppl": "4.69", "wps": 14943, "ups": "0.7", "wpb": 22203, "bsz": 828, "num_updates": 189097, "lr": 0.00022996284905319668, "gnorm": "0.472", "clip": "0%", "oom": 0.0, "wall": 281825, "train_wall": 275844}
{"epoch": 9, "update": 8.83674228337672, "loss": "3.875", "nll_loss": "2.230", "ppl": "4.69", "wps": 14944, "ups": "0.7", "wpb": 22207, "bsz": 828, "num_updates": 190097, "lr": 0.00022935719489917394, "gnorm": "0.472", "clip": "0%", "oom": 0.0, "wall": 283313, "train_wall": 277304}
{"epoch": 9, "update": 8.883227965786539, "loss": "3.875", "nll_loss": "2.230", "ppl": "4.69", "wps": 14942, "ups": "0.7", "wpb": 22205, "bsz": 828, "num_updates": 191097, "lr": 0.00022875630101824702, "gnorm": "0.472", "clip": "0%", "oom": 0.0, "wall": 284801, "train_wall": 278763}
{"epoch": 9, "update": 8.929713648196355, "loss": "3.875", "nll_loss": "2.231", "ppl": "4.69", "wps": 14945, "ups": "0.7", "wpb": 22209, "bsz": 828, "num_updates": 192097, "lr": 0.00022816010537801408, "gnorm": "0.472", "clip": "0%", "oom": 0.0, "wall": 286286, "train_wall": 280222}
{"epoch": 9, "update": 8.976199330606173, "loss": "3.874", "nll_loss": "2.229", "ppl": "4.69", "wps": 14948, "ups": "0.7", "wpb": 22212, "bsz": 828, "num_updates": 193097, "lr": 0.00022756854707191144, "gnorm": "0.472", "clip": "0%", "oom": 0.0, "wall": 287771, "train_wall": 281679}
{"epoch": 9, "loss": "3.874", "nll_loss": "2.229", "ppl": "4.69", "wps": 14950, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 193608, "lr": 0.0002272680317007451, "gnorm": "0.472", "clip": "0%", "oom": 0.0, "wall": 288528, "train_wall": 282422}
{"epoch": 9, "valid_loss": 4.3962564298553355, "valid_nll_loss": 2.737483803614158, "valid_ppl": "6.67", "num_updates": 193608, "best": 4.3962564298553355}
{"epoch": 10, "update": 9.046485682409818, "loss": "3.859", "nll_loss": "2.212", "ppl": "4.63", "wps": 14978, "ups": "0.6", "wpb": 22223, "bsz": 829, "num_updates": 194609, "lr": 0.00022668278488945293, "gnorm": "0.476", "clip": "0%", "oom": 0.0, "wall": 290108, "train_wall": 283880}
{"epoch": 10, "update": 9.092971364819636, "loss": "3.861", "nll_loss": "2.215", "ppl": "4.64", "wps": 14965, "ups": "0.7", "wpb": 22219, "bsz": 832, "num_updates": 195609, "lr": 0.00022610261413564425, "gnorm": "0.475", "clip": "0%", "oom": 0.0, "wall": 291594, "train_wall": 285338}
{"epoch": 10, "update": 9.139457047229453, "loss": "3.866", "nll_loss": "2.220", "ppl": "4.66", "wps": 14960, "ups": "0.7", "wpb": 22212, "bsz": 831, "num_updates": 196609, "lr": 0.00022552687535842372, "gnorm": "0.476", "clip": "0%", "oom": 0.0, "wall": 293078, "train_wall": 286795}
{"epoch": 10, "update": 9.18594272963927, "loss": "3.866", "nll_loss": "2.220", "ppl": "4.66", "wps": 14971, "ups": "0.7", "wpb": 22218, "bsz": 829, "num_updates": 197609, "lr": 0.00022495551241652982, "gnorm": "0.477", "clip": "0%", "oom": 0.0, "wall": 294561, "train_wall": 288250}
{"epoch": 10, "update": 9.232428412049089, "loss": "3.865", "nll_loss": "2.219", "ppl": "4.66", "wps": 14970, "ups": "0.7", "wpb": 22222, "bsz": 827, "num_updates": 198609, "lr": 0.00022438847015930592, "gnorm": "0.477", "clip": "0%", "oom": 0.0, "wall": 296047, "train_wall": 289709}
{"epoch": 10, "update": 9.278914094458907, "loss": "3.866", "nll_loss": "2.219", "ppl": "4.66", "wps": 14966, "ups": "0.7", "wpb": 22222, "bsz": 828, "num_updates": 199609, "lr": 0.00022382569440433948, "gnorm": "0.477", "clip": "0%", "oom": 0.0, "wall": 297533, "train_wall": 291167}
{"epoch": 10, "update": 9.325399776868725, "loss": "3.867", "nll_loss": "2.221", "ppl": "4.66", "wps": 14970, "ups": "0.7", "wpb": 22227, "bsz": 828, "num_updates": 200609, "lr": 0.00022326713191571585, "gnorm": "0.477", "clip": "0%", "oom": 0.0, "wall": 299017, "train_wall": 292625}
{"epoch": 10, "update": 9.371885459278543, "loss": "3.866", "nll_loss": "2.219", "ppl": "4.66", "wps": 14965, "ups": "0.7", "wpb": 22223, "bsz": 828, "num_updates": 201609, "lr": 0.00022271273038286543, "gnorm": "0.477", "clip": "0%", "oom": 0.0, "wall": 300504, "train_wall": 294084}
{"epoch": 10, "update": 9.41837114168836, "loss": "3.866", "nll_loss": "2.220", "ppl": "4.66", "wps": 14964, "ups": "0.7", "wpb": 22222, "bsz": 828, "num_updates": 202609, "lr": 0.00022216243839998613, "gnorm": "0.477", "clip": "0%", "oom": 0.0, "wall": 301989, "train_wall": 295543}
{"epoch": 10, "update": 9.464856824098177, "loss": "3.865", "nll_loss": "2.219", "ppl": "4.66", "wps": 14963, "ups": "0.7", "wpb": 22221, "bsz": 828, "num_updates": 203609, "lr": 0.00022161620544602215, "gnorm": "0.477", "clip": "0%", "oom": 0.0, "wall": 303475, "train_wall": 297001}
{"epoch": 10, "update": 9.511342506507996, "loss": "3.866", "nll_loss": "2.220", "ppl": "4.66", "wps": 14965, "ups": "0.7", "wpb": 22221, "bsz": 829, "num_updates": 204609, "lr": 0.00022107398186518173, "gnorm": "0.478", "clip": "0%", "oom": 0.0, "wall": 304958, "train_wall": 298457}
{"epoch": 10, "update": 9.557828188917814, "loss": "3.865", "nll_loss": "2.219", "ppl": "4.66", "wps": 14966, "ups": "0.7", "wpb": 22221, "bsz": 828, "num_updates": 205609, "lr": 0.0002205357188479767, "gnorm": "0.480", "clip": "0%", "oom": 0.0, "wall": 306442, "train_wall": 299915}
{"epoch": 10, "update": 9.604313871327632, "loss": "3.864", "nll_loss": "2.218", "ppl": "4.65", "wps": 14966, "ups": "0.7", "wpb": 22223, "bsz": 828, "num_updates": 206609, "lr": 0.00022000136841276733, "gnorm": "0.480", "clip": "0%", "oom": 0.0, "wall": 307928, "train_wall": 301373}
{"epoch": 10, "update": 9.65079955373745, "loss": "3.864", "nll_loss": "2.218", "ppl": "4.65", "wps": 14966, "ups": "0.7", "wpb": 22221, "bsz": 828, "num_updates": 207609, "lr": 0.00021947088338779652, "gnorm": "0.480", "clip": "0%", "oom": 0.0, "wall": 309411, "train_wall": 302829}
{"epoch": 10, "update": 9.697285236147266, "loss": "3.865", "nll_loss": "2.219", "ppl": "4.65", "wps": 14962, "ups": "0.7", "wpb": 22215, "bsz": 829, "num_updates": 208609, "lr": 0.00021894421739369863, "gnorm": "0.480", "clip": "0%", "oom": 0.0, "wall": 310897, "train_wall": 304288}
{"epoch": 10, "update": 9.743770918557084, "loss": "3.865", "nll_loss": "2.219", "ppl": "4.66", "wps": 14961, "ups": "0.7", "wpb": 22216, "bsz": 829, "num_updates": 209609, "lr": 0.0002184213248264674, "gnorm": "0.481", "clip": "0%", "oom": 0.0, "wall": 312383, "train_wall": 305747}
{"epoch": 10, "update": 9.790256600966902, "loss": "3.865", "nll_loss": "2.219", "ppl": "4.66", "wps": 14961, "ups": "0.7", "wpb": 22218, "bsz": 829, "num_updates": 210609, "lr": 0.00021790216084086953, "gnorm": "0.481", "clip": "0%", "oom": 0.0, "wall": 313870, "train_wall": 307207}
{"epoch": 10, "update": 9.83674228337672, "loss": "3.864", "nll_loss": "2.218", "ppl": "4.65", "wps": 14962, "ups": "0.7", "wpb": 22217, "bsz": 828, "num_updates": 211609, "lr": 0.00021738668133428952, "gnorm": "0.481", "clip": "0%", "oom": 0.0, "wall": 315352, "train_wall": 308662}
{"epoch": 10, "update": 9.883227965786539, "loss": "3.864", "nll_loss": "2.218", "ppl": "4.65", "wps": 14962, "ups": "0.7", "wpb": 22216, "bsz": 828, "num_updates": 212609, "lr": 0.00021687484293099338, "gnorm": "0.481", "clip": "0%", "oom": 0.0, "wall": 316836, "train_wall": 310120}
{"epoch": 10, "update": 9.929713648196355, "loss": "3.863", "nll_loss": "2.217", "ppl": "4.65", "wps": 14961, "ups": "0.7", "wpb": 22215, "bsz": 828, "num_updates": 213609, "lr": 0.0002163666029667975, "gnorm": "0.481", "clip": "0%", "oom": 0.0, "wall": 318321, "train_wall": 311578}
{"epoch": 10, "update": 9.976199330606173, "loss": "3.863", "nll_loss": "2.217", "ppl": "4.65", "wps": 14959, "ups": "0.7", "wpb": 22212, "bsz": 828, "num_updates": 214609, "lr": 0.00021586191947413127, "gnorm": "0.481", "clip": "0%", "oom": 0.0, "wall": 319807, "train_wall": 313036}
{"epoch": 10, "loss": "3.863", "nll_loss": "2.217", "ppl": "4.65", "wps": 14959, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 215120, "lr": 0.00021560538585531158, "gnorm": "0.481", "clip": "0%", "oom": 0.0, "wall": 320566, "train_wall": 313781}
{"epoch": 10, "valid_loss": 4.371640037027143, "valid_nll_loss": 2.7203969359795144, "valid_ppl": "6.59", "num_updates": 215120, "best": 4.371640037027143}
{"epoch": 11, "update": 10.046485682409818, "loss": "3.839", "nll_loss": "2.190", "ppl": "4.56", "wps": 14940, "ups": "0.6", "wpb": 22228, "bsz": 838, "num_updates": 216121, "lr": 0.00021510550043402096, "gnorm": "0.483", "clip": "0%", "oom": 0.0, "wall": 322149, "train_wall": 315243}
{"epoch": 11, "update": 10.092971364819636, "loss": "3.845", "nll_loss": "2.197", "ppl": "4.58", "wps": 14944, "ups": "0.7", "wpb": 22222, "bsz": 831, "num_updates": 217121, "lr": 0.0002146095701653167, "gnorm": "0.485", "clip": "0%", "oom": 0.0, "wall": 323635, "train_wall": 316701}
{"epoch": 11, "update": 10.139457047229453, "loss": "3.848", "nll_loss": "2.199", "ppl": "4.59", "wps": 14939, "ups": "0.7", "wpb": 22212, "bsz": 825, "num_updates": 218121, "lr": 0.00021411705428880392, "gnorm": "0.486", "clip": "0%", "oom": 0.0, "wall": 325122, "train_wall": 318161}
{"epoch": 11, "update": 10.18594272963927, "loss": "3.851", "nll_loss": "2.203", "ppl": "4.60", "wps": 14937, "ups": "0.7", "wpb": 22214, "bsz": 825, "num_updates": 219121, "lr": 0.00021362791380431093, "gnorm": "0.487", "clip": "0%", "oom": 0.0, "wall": 326610, "train_wall": 319621}
{"epoch": 11, "update": 10.232428412049089, "loss": "3.849", "nll_loss": "2.201", "ppl": "4.60", "wps": 14943, "ups": "0.7", "wpb": 22222, "bsz": 828, "num_updates": 220121, "lr": 0.0002131421103324895, "gnorm": "0.487", "clip": "0%", "oom": 0.0, "wall": 328096, "train_wall": 321080}
{"epoch": 11, "update": 10.278914094458907, "loss": "3.849", "nll_loss": "2.201", "ppl": "4.60", "wps": 14944, "ups": "0.7", "wpb": 22220, "bsz": 827, "num_updates": 221121, "lr": 0.00021265960610216658, "gnorm": "0.486", "clip": "0%", "oom": 0.0, "wall": 329583, "train_wall": 322540}
{"epoch": 11, "update": 10.325399776868725, "loss": "3.851", "nll_loss": "2.203", "ppl": "4.60", "wps": 14946, "ups": "0.7", "wpb": 22218, "bsz": 825, "num_updates": 222121, "lr": 0.00021218036393800917, "gnorm": "0.486", "clip": "0%", "oom": 0.0, "wall": 331067, "train_wall": 323998}
{"epoch": 11, "update": 10.371885459278543, "loss": "3.851", "nll_loss": "2.204", "ppl": "4.61", "wps": 14949, "ups": "0.7", "wpb": 22222, "bsz": 826, "num_updates": 223121, "lr": 0.00021170434724849347, "gnorm": "0.487", "clip": "0%", "oom": 0.0, "wall": 332553, "train_wall": 325457}
{"epoch": 11, "update": 10.41837114168836, "loss": "3.850", "nll_loss": "2.203", "ppl": "4.60", "wps": 14945, "ups": "0.7", "wpb": 22218, "bsz": 827, "num_updates": 224121, "lr": 0.00021123152001416995, "gnorm": "0.490", "clip": "0%", "oom": 0.0, "wall": 334041, "train_wall": 326917}
{"epoch": 11, "update": 10.464856824098177, "loss": "3.850", "nll_loss": "2.202", "ppl": "4.60", "wps": 14946, "ups": "0.7", "wpb": 22220, "bsz": 826, "num_updates": 225121, "lr": 0.00021076184677621497, "gnorm": "0.489", "clip": "0%", "oom": 0.0, "wall": 335528, "train_wall": 328377}
{"epoch": 11, "update": 10.511342506507996, "loss": "3.852", "nll_loss": "2.205", "ppl": "4.61", "wps": 14945, "ups": "0.7", "wpb": 22217, "bsz": 827, "num_updates": 226121, "lr": 0.00021029529262526185, "gnorm": "0.489", "clip": "0%", "oom": 0.0, "wall": 337014, "train_wall": 329836}
{"epoch": 11, "update": 10.557828188917814, "loss": "3.853", "nll_loss": "2.206", "ppl": "4.61", "wps": 14948, "ups": "0.7", "wpb": 22218, "bsz": 826, "num_updates": 227121, "lr": 0.00020983182319050193, "gnorm": "0.489", "clip": "0%", "oom": 0.0, "wall": 338497, "train_wall": 331293}
{"epoch": 11, "update": 10.604313871327632, "loss": "3.854", "nll_loss": "2.206", "ppl": "4.61", "wps": 14952, "ups": "0.7", "wpb": 22219, "bsz": 827, "num_updates": 228121, "lr": 0.00020937140462904923, "gnorm": "0.490", "clip": "0%", "oom": 0.0, "wall": 339980, "train_wall": 332749}
{"epoch": 11, "update": 10.65079955373745, "loss": "3.855", "nll_loss": "2.208", "ppl": "4.62", "wps": 14952, "ups": "0.7", "wpb": 22215, "bsz": 826, "num_updates": 229121, "lr": 0.00020891400361556066, "gnorm": "0.491", "clip": "0%", "oom": 0.0, "wall": 341462, "train_wall": 334204}
{"epoch": 11, "update": 10.697285236147266, "loss": "3.855", "nll_loss": "2.208", "ppl": "4.62", "wps": 14954, "ups": "0.7", "wpb": 22217, "bsz": 826, "num_updates": 230121, "lr": 0.00020845958733210418, "gnorm": "0.491", "clip": "0%", "oom": 0.0, "wall": 342946, "train_wall": 335660}
{"epoch": 11, "update": 10.743770918557084, "loss": "3.855", "nll_loss": "2.208", "ppl": "4.62", "wps": 14956, "ups": "0.7", "wpb": 22217, "bsz": 827, "num_updates": 231121, "lr": 0.00020800812345826881, "gnorm": "0.491", "clip": "0%", "oom": 0.0, "wall": 344430, "train_wall": 337115}
{"epoch": 11, "update": 10.790256600966902, "loss": "3.855", "nll_loss": "2.208", "ppl": "4.62", "wps": 14956, "ups": "0.7", "wpb": 22216, "bsz": 827, "num_updates": 232121, "lr": 0.0002075595801615089, "gnorm": "0.491", "clip": "0%", "oom": 0.0, "wall": 345914, "train_wall": 338572}
{"epoch": 11, "update": 10.83674228337672, "loss": "3.855", "nll_loss": "2.208", "ppl": "4.62", "wps": 14953, "ups": "0.7", "wpb": 22212, "bsz": 827, "num_updates": 233121, "lr": 0.0002071139260877167, "gnorm": "0.491", "clip": "0%", "oom": 0.0, "wall": 347400, "train_wall": 340030}
{"epoch": 11, "update": 10.883227965786539, "loss": "3.854", "nll_loss": "2.207", "ppl": "4.62", "wps": 14953, "ups": "0.7", "wpb": 22212, "bsz": 828, "num_updates": 234121, "lr": 0.00020667113035201647, "gnorm": "0.491", "clip": "0%", "oom": 0.0, "wall": 348885, "train_wall": 341487}
{"epoch": 11, "update": 10.929713648196355, "loss": "3.854", "nll_loss": "2.207", "ppl": "4.62", "wps": 14952, "ups": "0.7", "wpb": 22210, "bsz": 828, "num_updates": 235121, "lr": 0.00020623116252977414, "gnorm": "0.491", "clip": "0%", "oom": 0.0, "wall": 350371, "train_wall": 342947}
{"epoch": 11, "update": 10.976199330606173, "loss": "3.854", "nll_loss": "2.207", "ppl": "4.62", "wps": 14953, "ups": "0.7", "wpb": 22211, "bsz": 828, "num_updates": 236121, "lr": 0.00020579399264781677, "gnorm": "0.491", "clip": "0%", "oom": 0.0, "wall": 351855, "train_wall": 344404}
{"epoch": 11, "loss": "3.854", "nll_loss": "2.207", "ppl": "4.62", "wps": 14952, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 236632, "lr": 0.00020557166945290038, "gnorm": "0.491", "clip": "0%", "oom": 0.0, "wall": 352617, "train_wall": 345152}
{"epoch": 11, "valid_loss": 4.352914094931821, "valid_nll_loss": 2.6956829484641354, "valid_ppl": "6.48", "num_updates": 236632, "best": 4.352914094931821}
{"epoch": 12, "update": 11.046485682409818, "loss": "3.840", "nll_loss": "2.191", "ppl": "4.56", "wps": 14963, "ups": "0.6", "wpb": 22214, "bsz": 825, "num_updates": 237633, "lr": 0.00020513823974906928, "gnorm": "0.494", "clip": "0%", "oom": 0.0, "wall": 354196, "train_wall": 346609}
{"epoch": 12, "update": 11.092971364819636, "loss": "3.840", "nll_loss": "2.191", "ppl": "4.57", "wps": 14931, "ups": "0.7", "wpb": 22184, "bsz": 832, "num_updates": 238633, "lr": 0.00020470796899652898, "gnorm": "0.494", "clip": "0%", "oom": 0.0, "wall": 355683, "train_wall": 348068}
{"epoch": 12, "update": 11.139457047229453, "loss": "3.845", "nll_loss": "2.197", "ppl": "4.59", "wps": 14939, "ups": "0.7", "wpb": 22198, "bsz": 831, "num_updates": 239633, "lr": 0.00020428039437239253, "gnorm": "0.495", "clip": "0%", "oom": 0.0, "wall": 357169, "train_wall": 349527}
{"epoch": 12, "update": 11.18594272963927, "loss": "3.844", "nll_loss": "2.196", "ppl": "4.58", "wps": 14944, "ups": "0.7", "wpb": 22218, "bsz": 830, "num_updates": 240633, "lr": 0.0002038554878366651, "gnorm": "0.494", "clip": "0%", "oom": 0.0, "wall": 358659, "train_wall": 350990}
{"epoch": 12, "update": 11.232428412049089, "loss": "3.847", "nll_loss": "2.199", "ppl": "4.59", "wps": 14935, "ups": "0.7", "wpb": 22214, "bsz": 832, "num_updates": 241633, "lr": 0.00020343322175592665, "gnorm": "0.494", "clip": "0%", "oom": 0.0, "wall": 360148, "train_wall": 352453}
{"epoch": 12, "update": 11.278914094458907, "loss": "3.845", "nll_loss": "2.197", "ppl": "4.58", "wps": 14939, "ups": "0.7", "wpb": 22211, "bsz": 832, "num_updates": 242633, "lr": 0.00020301356889578393, "gnorm": "0.494", "clip": "0%", "oom": 0.0, "wall": 361632, "train_wall": 353909}
{"epoch": 12, "update": 11.325399776868725, "loss": "3.846", "nll_loss": "2.198", "ppl": "4.59", "wps": 14941, "ups": "0.7", "wpb": 22209, "bsz": 831, "num_updates": 243633, "lr": 0.00020259650241349263, "gnorm": "0.496", "clip": "0%", "oom": 0.0, "wall": 363117, "train_wall": 355367}
{"epoch": 12, "update": 11.371885459278543, "loss": "3.847", "nll_loss": "2.199", "ppl": "4.59", "wps": 14940, "ups": "0.7", "wpb": 22209, "bsz": 830, "num_updates": 244633, "lr": 0.00020218199585074538, "gnorm": "0.496", "clip": "0%", "oom": 0.0, "wall": 364604, "train_wall": 356827}
{"epoch": 12, "update": 11.41837114168836, "loss": "3.845", "nll_loss": "2.196", "ppl": "4.58", "wps": 14941, "ups": "0.7", "wpb": 22206, "bsz": 829, "num_updates": 245633, "lr": 0.0002017700231266221, "gnorm": "0.496", "clip": "0%", "oom": 0.0, "wall": 366087, "train_wall": 358284}
{"epoch": 12, "update": 11.464856824098177, "loss": "3.845", "nll_loss": "2.197", "ppl": "4.58", "wps": 14944, "ups": "0.7", "wpb": 22208, "bsz": 828, "num_updates": 246633, "lr": 0.00020136055853069688, "gnorm": "0.496", "clip": "0%", "oom": 0.0, "wall": 367572, "train_wall": 359742}
{"epoch": 12, "update": 11.511342506507996, "loss": "3.844", "nll_loss": "2.196", "ppl": "4.58", "wps": 14946, "ups": "0.7", "wpb": 22209, "bsz": 829, "num_updates": 247633, "lr": 0.0002009535767162987, "gnorm": "0.496", "clip": "0%", "oom": 0.0, "wall": 369057, "train_wall": 361200}
{"epoch": 12, "update": 11.557828188917814, "loss": "3.845", "nll_loss": "2.197", "ppl": "4.59", "wps": 14949, "ups": "0.7", "wpb": 22210, "bsz": 829, "num_updates": 248633, "lr": 0.00020054905269392123, "gnorm": "0.496", "clip": "0%", "oom": 0.0, "wall": 370541, "train_wall": 362657}
{"epoch": 12, "update": 11.604313871327632, "loss": "3.846", "nll_loss": "2.198", "ppl": "4.59", "wps": 14948, "ups": "0.7", "wpb": 22208, "bsz": 828, "num_updates": 249633, "lr": 0.0002001469618247778, "gnorm": "0.496", "clip": "0%", "oom": 0.0, "wall": 372026, "train_wall": 364115}
{"epoch": 12, "update": 11.65079955373745, "loss": "3.846", "nll_loss": "2.198", "ppl": "4.59", "wps": 14948, "ups": "0.7", "wpb": 22208, "bsz": 828, "num_updates": 250633, "lr": 0.00019974727981449808, "gnorm": "0.497", "clip": "0%", "oom": 0.0, "wall": 373511, "train_wall": 365574}
{"epoch": 12, "update": 11.697285236147266, "loss": "3.847", "nll_loss": "2.199", "ppl": "4.59", "wps": 14951, "ups": "0.7", "wpb": 22211, "bsz": 829, "num_updates": 251633, "lr": 0.00019934998270696287, "gnorm": "0.497", "clip": "0%", "oom": 0.0, "wall": 374996, "train_wall": 367032}
{"epoch": 12, "update": 11.743770918557084, "loss": "3.847", "nll_loss": "2.199", "ppl": "4.59", "wps": 14949, "ups": "0.7", "wpb": 22210, "bsz": 828, "num_updates": 252633, "lr": 0.0001989550468782729, "gnorm": "0.497", "clip": "0%", "oom": 0.0, "wall": 376482, "train_wall": 368492}
{"epoch": 12, "update": 11.790256600966902, "loss": "3.847", "nll_loss": "2.199", "ppl": "4.59", "wps": 14949, "ups": "0.7", "wpb": 22209, "bsz": 828, "num_updates": 253633, "lr": 0.00019856244903084864, "gnorm": "0.498", "clip": "0%", "oom": 0.0, "wall": 377967, "train_wall": 369950}
{"epoch": 12, "update": 11.83674228337672, "loss": "3.846", "nll_loss": "2.199", "ppl": "4.59", "wps": 14951, "ups": "0.7", "wpb": 22210, "bsz": 828, "num_updates": 254633, "lr": 0.00019817216618765775, "gnorm": "0.497", "clip": "0%", "oom": 0.0, "wall": 379451, "train_wall": 371408}
{"epoch": 12, "update": 11.883227965786539, "loss": "3.846", "nll_loss": "2.198", "ppl": "4.59", "wps": 14951, "ups": "0.7", "wpb": 22211, "bsz": 828, "num_updates": 255633, "lr": 0.00019778417568656702, "gnorm": "0.498", "clip": "0%", "oom": 0.0, "wall": 380938, "train_wall": 372868}
{"epoch": 12, "update": 11.929713648196355, "loss": "3.846", "nll_loss": "2.198", "ppl": "4.59", "wps": 14951, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 256633, "lr": 0.00019739845517481496, "gnorm": "0.498", "clip": "0%", "oom": 0.0, "wall": 382426, "train_wall": 374329}
{"epoch": 12, "update": 11.976199330606173, "loss": "3.846", "nll_loss": "2.198", "ppl": "4.59", "wps": 14949, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 257633, "lr": 0.0001970149826036032, "gnorm": "0.498", "clip": "0%", "oom": 0.0, "wall": 383917, "train_wall": 375792}
{"epoch": 12, "loss": "3.846", "nll_loss": "2.198", "ppl": "4.59", "wps": 14948, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 258144, "lr": 0.00019681988892093235, "gnorm": "0.498", "clip": "0%", "oom": 0.0, "wall": 384677, "train_wall": 376539}
{"epoch": 12, "valid_loss": 4.349834386062343, "valid_nll_loss": 2.692851873931617, "valid_ppl": "6.47", "num_updates": 258144, "best": 4.349834386062343}
{"epoch": 13, "update": 12.046485682409818, "loss": "3.834", "nll_loss": "2.184", "ppl": "4.55", "wps": 14905, "ups": "0.6", "wpb": 22179, "bsz": 827, "num_updates": 259145, "lr": 0.0001964393928060681, "gnorm": "0.501", "clip": "0%", "oom": 0.0, "wall": 386247, "train_wall": 378001}
{"epoch": 13, "update": 12.092971364819636, "loss": "3.835", "nll_loss": "2.186", "ppl": "4.55", "wps": 14900, "ups": "0.7", "wpb": 22155, "bsz": 823, "num_updates": 260145, "lr": 0.00019606147177113578, "gnorm": "0.502", "clip": "0%", "oom": 0.0, "wall": 387733, "train_wall": 379459}
{"epoch": 13, "update": 12.139457047229453, "loss": "3.838", "nll_loss": "2.189", "ppl": "4.56", "wps": 14913, "ups": "0.7", "wpb": 22176, "bsz": 823, "num_updates": 261145, "lr": 0.00019568572357363788, "gnorm": "0.502", "clip": "0%", "oom": 0.0, "wall": 389220, "train_wall": 380919}
{"epoch": 13, "update": 12.18594272963927, "loss": "3.838", "nll_loss": "2.189", "ppl": "4.56", "wps": 14925, "ups": "0.7", "wpb": 22191, "bsz": 827, "num_updates": 262145, "lr": 0.00019531212747203597, "gnorm": "0.503", "clip": "0%", "oom": 0.0, "wall": 390706, "train_wall": 382378}
{"epoch": 13, "update": 12.232428412049089, "loss": "3.838", "nll_loss": "2.189", "ppl": "4.56", "wps": 14927, "ups": "0.7", "wpb": 22193, "bsz": 826, "num_updates": 263145, "lr": 0.00019494066300093082, "gnorm": "0.503", "clip": "0%", "oom": 0.0, "wall": 392193, "train_wall": 383837}
{"epoch": 13, "update": 12.278914094458907, "loss": "3.837", "nll_loss": "2.188", "ppl": "4.56", "wps": 14932, "ups": "0.7", "wpb": 22196, "bsz": 827, "num_updates": 264145, "lr": 0.00019457130996635336, "gnorm": "0.503", "clip": "0%", "oom": 0.0, "wall": 393677, "train_wall": 385294}
{"epoch": 13, "update": 12.325399776868725, "loss": "3.835", "nll_loss": "2.186", "ppl": "4.55", "wps": 14939, "ups": "0.7", "wpb": 22212, "bsz": 827, "num_updates": 265145, "lr": 0.00019420404844115382, "gnorm": "0.503", "clip": "0%", "oom": 0.0, "wall": 395167, "train_wall": 386756}
{"epoch": 13, "update": 12.371885459278543, "loss": "3.837", "nll_loss": "2.188", "ppl": "4.56", "wps": 14939, "ups": "0.7", "wpb": 22212, "bsz": 827, "num_updates": 266145, "lr": 0.000193838858760486, "gnorm": "0.503", "clip": "0%", "oom": 0.0, "wall": 396653, "train_wall": 388216}
{"epoch": 13, "update": 12.41837114168836, "loss": "3.837", "nll_loss": "2.188", "ppl": "4.56", "wps": 14935, "ups": "0.7", "wpb": 22202, "bsz": 827, "num_updates": 267145, "lr": 0.0001934757215173847, "gnorm": "0.503", "clip": "0%", "oom": 0.0, "wall": 398138, "train_wall": 389673}
{"epoch": 13, "update": 12.464856824098177, "loss": "3.838", "nll_loss": "2.189", "ppl": "4.56", "wps": 14938, "ups": "0.7", "wpb": 22202, "bsz": 827, "num_updates": 268145, "lr": 0.00019311461755843384, "gnorm": "0.504", "clip": "0%", "oom": 0.0, "wall": 399621, "train_wall": 391130}
{"epoch": 13, "update": 12.511342506507996, "loss": "3.837", "nll_loss": "2.188", "ppl": "4.56", "wps": 14941, "ups": "0.7", "wpb": 22208, "bsz": 828, "num_updates": 269145, "lr": 0.00019275552797952335, "gnorm": "0.504", "clip": "0%", "oom": 0.0, "wall": 401109, "train_wall": 392591}
{"epoch": 13, "update": 12.557828188917814, "loss": "3.836", "nll_loss": "2.187", "ppl": "4.55", "wps": 14940, "ups": "0.7", "wpb": 22207, "bsz": 828, "num_updates": 270145, "lr": 0.00019239843412169246, "gnorm": "0.504", "clip": "0%", "oom": 0.0, "wall": 402596, "train_wall": 394050}
{"epoch": 13, "update": 12.604313871327632, "loss": "3.837", "nll_loss": "2.188", "ppl": "4.56", "wps": 14940, "ups": "0.7", "wpb": 22208, "bsz": 828, "num_updates": 271145, "lr": 0.0001920433175670572, "gnorm": "0.504", "clip": "0%", "oom": 0.0, "wall": 404083, "train_wall": 395510}
{"epoch": 13, "update": 12.65079955373745, "loss": "3.838", "nll_loss": "2.189", "ppl": "4.56", "wps": 14943, "ups": "0.7", "wpb": 22209, "bsz": 829, "num_updates": 272145, "lr": 0.00019169016013482066, "gnorm": "0.504", "clip": "0%", "oom": 0.0, "wall": 405567, "train_wall": 396966}
{"epoch": 13, "update": 12.697285236147266, "loss": "3.839", "nll_loss": "2.190", "ppl": "4.56", "wps": 14943, "ups": "0.7", "wpb": 22210, "bsz": 829, "num_updates": 273145, "lr": 0.0001913389438773633, "gnorm": "0.504", "clip": "0%", "oom": 0.0, "wall": 407054, "train_wall": 398426}
{"epoch": 13, "update": 12.743770918557084, "loss": "3.839", "nll_loss": "2.190", "ppl": "4.56", "wps": 14942, "ups": "0.7", "wpb": 22210, "bsz": 829, "num_updates": 274145, "lr": 0.000190989651076412, "gnorm": "0.505", "clip": "0%", "oom": 0.0, "wall": 408541, "train_wall": 399887}
{"epoch": 13, "update": 12.790256600966902, "loss": "3.839", "nll_loss": "2.190", "ppl": "4.56", "wps": 14944, "ups": "0.7", "wpb": 22210, "bsz": 828, "num_updates": 275145, "lr": 0.00019064226423928552, "gnorm": "0.505", "clip": "0%", "oom": 0.0, "wall": 410025, "train_wall": 401343}
{"epoch": 13, "update": 12.83674228337672, "loss": "3.839", "nll_loss": "2.190", "ppl": "4.56", "wps": 14945, "ups": "0.7", "wpb": 22211, "bsz": 828, "num_updates": 276145, "lr": 0.00019029676609521522, "gnorm": "0.505", "clip": "0%", "oom": 0.0, "wall": 411511, "train_wall": 402801}
{"epoch": 13, "update": 12.883227965786539, "loss": "3.839", "nll_loss": "2.191", "ppl": "4.57", "wps": 14946, "ups": "0.7", "wpb": 22211, "bsz": 828, "num_updates": 277145, "lr": 0.0001899531395917384, "gnorm": "0.505", "clip": "0%", "oom": 0.0, "wall": 412994, "train_wall": 404258}
{"epoch": 13, "update": 12.929713648196355, "loss": "3.839", "nll_loss": "2.191", "ppl": "4.57", "wps": 14948, "ups": "0.7", "wpb": 22214, "bsz": 828, "num_updates": 278145, "lr": 0.0001896113678911632, "gnorm": "0.505", "clip": "0%", "oom": 0.0, "wall": 414480, "train_wall": 405718}
{"epoch": 13, "update": 12.976199330606173, "loss": "3.839", "nll_loss": "2.191", "ppl": "4.57", "wps": 14947, "ups": "0.7", "wpb": 22212, "bsz": 828, "num_updates": 279145, "lr": 0.00018927143436710345, "gnorm": "0.506", "clip": "0%", "oom": 0.0, "wall": 415966, "train_wall": 407176}
{"epoch": 13, "loss": "3.839", "nll_loss": "2.191", "ppl": "4.56", "wps": 14948, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 279656, "lr": 0.00018909843267018731, "gnorm": "0.506", "clip": "0%", "oom": 0.0, "wall": 416725, "train_wall": 407921}
{"epoch": 13, "valid_loss": 4.331361380671269, "valid_nll_loss": 2.6685902511233803, "valid_ppl": "6.36", "num_updates": 279656, "best": 4.331361380671269}
{"epoch": 14, "update": 13.046485682409818, "loss": "3.830", "nll_loss": "2.180", "ppl": "4.53", "wps": 14989, "ups": "0.6", "wpb": 22249, "bsz": 823, "num_updates": 280657, "lr": 0.00018876090926534732, "gnorm": "0.509", "clip": "0%", "oom": 0.0, "wall": 418290, "train_wall": 409380}
{"epoch": 14, "update": 13.092971364819636, "loss": "3.829", "nll_loss": "2.179", "ppl": "4.53", "wps": 14952, "ups": "0.7", "wpb": 22231, "bsz": 829, "num_updates": 281657, "lr": 0.0001884255212714877, "gnorm": "0.516", "clip": "0%", "oom": 0.0, "wall": 419779, "train_wall": 410841}
{"epoch": 14, "update": 13.139457047229453, "loss": "3.828", "nll_loss": "2.178", "ppl": "4.53", "wps": 14945, "ups": "0.7", "wpb": 22223, "bsz": 831, "num_updates": 282657, "lr": 0.00018809191468761304, "gnorm": "0.516", "clip": "0%", "oom": 0.0, "wall": 421266, "train_wall": 412301}
{"epoch": 14, "update": 13.18594272963927, "loss": "3.827", "nll_loss": "2.177", "ppl": "4.52", "wps": 14942, "ups": "0.7", "wpb": 22202, "bsz": 829, "num_updates": 283657, "lr": 0.000187760073799457, "gnorm": "0.514", "clip": "0%", "oom": 0.0, "wall": 422749, "train_wall": 413756}
{"epoch": 14, "update": 13.232428412049089, "loss": "3.829", "nll_loss": "2.179", "ppl": "4.53", "wps": 14943, "ups": "0.7", "wpb": 22193, "bsz": 829, "num_updates": 284657, "lr": 0.00018742998308613824, "gnorm": "0.514", "clip": "0%", "oom": 0.0, "wall": 424231, "train_wall": 415211}
{"epoch": 14, "update": 13.278914094458907, "loss": "3.828", "nll_loss": "2.177", "ppl": "4.52", "wps": 14944, "ups": "0.7", "wpb": 22199, "bsz": 830, "num_updates": 285657, "lr": 0.00018710162721711137, "gnorm": "0.514", "clip": "0%", "oom": 0.0, "wall": 425718, "train_wall": 416670}
{"epoch": 14, "update": 13.325399776868725, "loss": "3.830", "nll_loss": "2.180", "ppl": "4.53", "wps": 14945, "ups": "0.7", "wpb": 22199, "bsz": 829, "num_updates": 286657, "lr": 0.0001867749910491765, "gnorm": "0.514", "clip": "0%", "oom": 0.0, "wall": 427203, "train_wall": 418127}
{"epoch": 14, "update": 13.371885459278543, "loss": "3.828", "nll_loss": "2.178", "ppl": "4.53", "wps": 14949, "ups": "0.7", "wpb": 22203, "bsz": 828, "num_updates": 287657, "lr": 0.00018645005962354566, "gnorm": "0.514", "clip": "0%", "oom": 0.0, "wall": 428687, "train_wall": 419584}
{"epoch": 14, "update": 13.41837114168836, "loss": "3.829", "nll_loss": "2.179", "ppl": "4.53", "wps": 14947, "ups": "0.7", "wpb": 22205, "bsz": 828, "num_updates": 288657, "lr": 0.00018612681816296565, "gnorm": "0.516", "clip": "0%", "oom": 0.0, "wall": 430175, "train_wall": 421045}
{"epoch": 14, "update": 13.464856824098177, "loss": "3.831", "nll_loss": "2.181", "ppl": "4.54", "wps": 14945, "ups": "0.7", "wpb": 22201, "bsz": 827, "num_updates": 289657, "lr": 0.0001858052520688953, "gnorm": "0.516", "clip": "0%", "oom": 0.0, "wall": 431661, "train_wall": 422504}
{"epoch": 14, "update": 13.511342506507996, "loss": "3.831", "nll_loss": "2.181", "ppl": "4.54", "wps": 14946, "ups": "0.7", "wpb": 22205, "bsz": 828, "num_updates": 290657, "lr": 0.00018548534691873616, "gnorm": "0.516", "clip": "0%", "oom": 0.0, "wall": 433148, "train_wall": 423964}
{"epoch": 14, "update": 13.557828188917814, "loss": "3.830", "nll_loss": "2.180", "ppl": "4.53", "wps": 14947, "ups": "0.7", "wpb": 22208, "bsz": 829, "num_updates": 291657, "lr": 0.00018516708846311555, "gnorm": "0.516", "clip": "0%", "oom": 0.0, "wall": 434634, "train_wall": 425424}
{"epoch": 14, "update": 13.604313871327632, "loss": "3.831", "nll_loss": "2.181", "ppl": "4.54", "wps": 14947, "ups": "0.7", "wpb": 22210, "bsz": 828, "num_updates": 292657, "lr": 0.0001848504626232207, "gnorm": "0.515", "clip": "0%", "oom": 0.0, "wall": 436123, "train_wall": 426886}
{"epoch": 14, "update": 13.65079955373745, "loss": "3.832", "nll_loss": "2.183", "ppl": "4.54", "wps": 14942, "ups": "0.7", "wpb": 22207, "bsz": 828, "num_updates": 293657, "lr": 0.00018453545548818295, "gnorm": "0.516", "clip": "0%", "oom": 0.0, "wall": 437612, "train_wall": 428348}
{"epoch": 14, "update": 13.697285236147266, "loss": "3.832", "nll_loss": "2.182", "ppl": "4.54", "wps": 14939, "ups": "0.7", "wpb": 22208, "bsz": 827, "num_updates": 294657, "lr": 0.0001842220533125105, "gnorm": "0.516", "clip": "0%", "oom": 0.0, "wall": 439103, "train_wall": 429812}
{"epoch": 14, "update": 13.743770918557084, "loss": "3.832", "nll_loss": "2.182", "ppl": "4.54", "wps": 14934, "ups": "0.7", "wpb": 22204, "bsz": 828, "num_updates": 295657, "lr": 0.00018391024251356947, "gnorm": "0.516", "clip": "0%", "oom": 0.0, "wall": 440594, "train_wall": 431276}
{"epoch": 14, "update": 13.790256600966902, "loss": "3.833", "nll_loss": "2.183", "ppl": "4.54", "wps": 14933, "ups": "0.7", "wpb": 22204, "bsz": 828, "num_updates": 296657, "lr": 0.00018360000966911118, "gnorm": "0.516", "clip": "0%", "oom": 0.0, "wall": 442084, "train_wall": 432739}
{"epoch": 14, "update": 13.83674228337672, "loss": "3.833", "nll_loss": "2.183", "ppl": "4.54", "wps": 14931, "ups": "0.7", "wpb": 22205, "bsz": 828, "num_updates": 297657, "lr": 0.00018329134151484546, "gnorm": "0.516", "clip": "0%", "oom": 0.0, "wall": 443574, "train_wall": 434201}
{"epoch": 14, "update": 13.883227965786539, "loss": "3.833", "nll_loss": "2.184", "ppl": "4.54", "wps": 14933, "ups": "0.7", "wpb": 22207, "bsz": 828, "num_updates": 298657, "lr": 0.00018298422494205868, "gnorm": "0.516", "clip": "0%", "oom": 0.0, "wall": 445061, "train_wall": 435661}
{"epoch": 14, "update": 13.929713648196355, "loss": "3.833", "nll_loss": "2.184", "ppl": "4.54", "wps": 14936, "ups": "0.7", "wpb": 22210, "bsz": 828, "num_updates": 299657, "lr": 0.00018267864699527542, "gnorm": "0.516", "clip": "0%", "oom": 0.0, "wall": 446546, "train_wall": 437120}
{"epoch": 14, "update": 13.976199330606173, "loss": "3.833", "nll_loss": "2.184", "ppl": "4.54", "wps": 14939, "ups": "0.7", "wpb": 22212, "bsz": 828, "num_updates": 300657, "lr": 0.00018237459486996295, "gnorm": "0.516", "clip": "0%", "oom": 0.0, "wall": 448029, "train_wall": 438576}
{"epoch": 14, "loss": "3.833", "nll_loss": "2.184", "ppl": "4.54", "wps": 14940, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 301168, "lr": 0.0001822198091980001, "gnorm": "0.516", "clip": "0%", "oom": 0.0, "wall": 448788, "train_wall": 439322}
{"epoch": 14, "valid_loss": 4.341601152926771, "valid_nll_loss": 2.687361107808607, "valid_ppl": "6.44", "num_updates": 301168, "best": 4.331361380671269}
{"epoch": 15, "update": 14.046485682409818, "loss": "3.836", "nll_loss": "2.187", "ppl": "4.55", "wps": 14977, "ups": "0.6", "wpb": 22229, "bsz": 836, "num_updates": 302169, "lr": 0.0001819177376068439, "gnorm": "0.517", "clip": "0%", "oom": 0.0, "wall": 450341, "train_wall": 440780}
{"epoch": 15, "update": 14.092971364819636, "loss": "3.828", "nll_loss": "2.178", "ppl": "4.53", "wps": 14984, "ups": "0.7", "wpb": 22221, "bsz": 831, "num_updates": 303169, "lr": 0.00018161746284425372, "gnorm": "0.516", "clip": "0%", "oom": 0.0, "wall": 451823, "train_wall": 442234}
{"epoch": 15, "update": 14.139457047229453, "loss": "3.820", "nll_loss": "2.169", "ppl": "4.50", "wps": 14982, "ups": "0.7", "wpb": 22239, "bsz": 833, "num_updates": 304169, "lr": 0.000181318670097112, "gnorm": "0.515", "clip": "0%", "oom": 0.0, "wall": 453310, "train_wall": 443694}
{"epoch": 15, "update": 14.18594272963927, "loss": "3.820", "nll_loss": "2.169", "ppl": "4.50", "wps": 14987, "ups": "0.7", "wpb": 22254, "bsz": 832, "num_updates": 305169, "lr": 0.00018102134721450513, "gnorm": "0.516", "clip": "0%", "oom": 0.0, "wall": 454796, "train_wall": 445152}
{"epoch": 15, "update": 14.232428412049089, "loss": "3.821", "nll_loss": "2.170", "ppl": "4.50", "wps": 14982, "ups": "0.7", "wpb": 22250, "bsz": 831, "num_updates": 306169, "lr": 0.00018072548218453746, "gnorm": "0.516", "clip": "0%", "oom": 0.0, "wall": 456282, "train_wall": 446610}
{"epoch": 15, "update": 14.278914094458907, "loss": "3.819", "nll_loss": "2.168", "ppl": "4.49", "wps": 14977, "ups": "0.7", "wpb": 22245, "bsz": 831, "num_updates": 307169, "lr": 0.00018043106313229325, "gnorm": "0.516", "clip": "0%", "oom": 0.0, "wall": 457769, "train_wall": 448069}
{"epoch": 15, "update": 14.325399776868725, "loss": "3.820", "nll_loss": "2.169", "ppl": "4.50", "wps": 14975, "ups": "0.7", "wpb": 22238, "bsz": 829, "num_updates": 308169, "lr": 0.0001801380783178348, "gnorm": "0.516", "clip": "0%", "oom": 0.0, "wall": 459252, "train_wall": 449525}
{"epoch": 15, "update": 14.371885459278543, "loss": "3.821", "nll_loss": "2.170", "ppl": "4.50", "wps": 14970, "ups": "0.7", "wpb": 22228, "bsz": 828, "num_updates": 309169, "lr": 0.0001798465161342361, "gnorm": "0.518", "clip": "0%", "oom": 0.0, "wall": 460736, "train_wall": 450981}
{"epoch": 15, "update": 14.41837114168836, "loss": "3.822", "nll_loss": "2.171", "ppl": "4.50", "wps": 14971, "ups": "0.7", "wpb": 22227, "bsz": 828, "num_updates": 310169, "lr": 0.00017955636510565162, "gnorm": "0.518", "clip": "0%", "oom": 0.0, "wall": 462219, "train_wall": 452438}
{"epoch": 15, "update": 14.464856824098177, "loss": "3.824", "nll_loss": "2.174", "ppl": "4.51", "wps": 14968, "ups": "0.7", "wpb": 22220, "bsz": 828, "num_updates": 311169, "lr": 0.0001792676138854191, "gnorm": "0.519", "clip": "0%", "oom": 0.0, "wall": 463702, "train_wall": 453894}
{"epoch": 15, "update": 14.511342506507996, "loss": "3.825", "nll_loss": "2.175", "ppl": "4.51", "wps": 14963, "ups": "0.7", "wpb": 22211, "bsz": 827, "num_updates": 312169, "lr": 0.00017898025125419589, "gnorm": "0.519", "clip": "0%", "oom": 0.0, "wall": 465185, "train_wall": 455350}
{"epoch": 15, "update": 14.557828188917814, "loss": "3.826", "nll_loss": "2.176", "ppl": "4.52", "wps": 14962, "ups": "0.7", "wpb": 22210, "bsz": 827, "num_updates": 313169, "lr": 0.00017869426611812793, "gnorm": "0.519", "clip": "0%", "oom": 0.0, "wall": 466671, "train_wall": 456809}
{"epoch": 15, "update": 14.604313871327632, "loss": "3.826", "nll_loss": "2.176", "ppl": "4.52", "wps": 14960, "ups": "0.7", "wpb": 22208, "bsz": 827, "num_updates": 314169, "lr": 0.00017840964750705094, "gnorm": "0.519", "clip": "0%", "oom": 0.0, "wall": 468156, "train_wall": 458267}
{"epoch": 15, "update": 14.65079955373745, "loss": "3.826", "nll_loss": "2.176", "ppl": "4.52", "wps": 14963, "ups": "0.7", "wpb": 22209, "bsz": 827, "num_updates": 315169, "lr": 0.00017812638457272295, "gnorm": "0.519", "clip": "0%", "oom": 0.0, "wall": 469638, "train_wall": 459722}
{"epoch": 15, "update": 14.697285236147266, "loss": "3.827", "nll_loss": "2.177", "ppl": "4.52", "wps": 14963, "ups": "0.7", "wpb": 22211, "bsz": 828, "num_updates": 316169, "lr": 0.00017784446658708752, "gnorm": "0.519", "clip": "0%", "oom": 0.0, "wall": 471123, "train_wall": 461178}
{"epoch": 15, "update": 14.743770918557084, "loss": "3.827", "nll_loss": "2.176", "ppl": "4.52", "wps": 14962, "ups": "0.7", "wpb": 22212, "bsz": 828, "num_updates": 317169, "lr": 0.00017756388294056736, "gnorm": "0.519", "clip": "0%", "oom": 0.0, "wall": 472610, "train_wall": 462638}
{"epoch": 15, "update": 14.790256600966902, "loss": "3.826", "nll_loss": "2.176", "ppl": "4.52", "wps": 14961, "ups": "0.7", "wpb": 22209, "bsz": 828, "num_updates": 318169, "lr": 0.0001772846231403872, "gnorm": "0.520", "clip": "0%", "oom": 0.0, "wall": 474093, "train_wall": 464094}
{"epoch": 15, "update": 14.83674228337672, "loss": "3.827", "nll_loss": "2.177", "ppl": "4.52", "wps": 14961, "ups": "0.7", "wpb": 22211, "bsz": 828, "num_updates": 319169, "lr": 0.00017700667680892572, "gnorm": "0.520", "clip": "0%", "oom": 0.0, "wall": 475580, "train_wall": 465553}
{"epoch": 15, "update": 14.883227965786539, "loss": "3.828", "nll_loss": "2.178", "ppl": "4.52", "wps": 14958, "ups": "0.7", "wpb": 22210, "bsz": 828, "num_updates": 320169, "lr": 0.0001767300336820958, "gnorm": "0.520", "clip": "0%", "oom": 0.0, "wall": 477069, "train_wall": 467015}
{"epoch": 15, "update": 14.929713648196355, "loss": "3.827", "nll_loss": "2.177", "ppl": "4.52", "wps": 14959, "ups": "0.7", "wpb": 22212, "bsz": 828, "num_updates": 321169, "lr": 0.00017645468360775245, "gnorm": "0.520", "clip": "0%", "oom": 0.0, "wall": 478556, "train_wall": 468476}
{"epoch": 15, "update": 14.976199330606173, "loss": "3.827", "nll_loss": "2.178", "ppl": "4.52", "wps": 14959, "ups": "0.7", "wpb": 22214, "bsz": 828, "num_updates": 322169, "lr": 0.0001761806165441281, "gnorm": "0.520", "clip": "0%", "oom": 0.0, "wall": 480042, "train_wall": 469935}
{"epoch": 15, "loss": "3.827", "nll_loss": "2.177", "ppl": "4.52", "wps": 14958, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 322680, "lr": 0.000176041060380465, "gnorm": "0.520", "clip": "0%", "oom": 0.0, "wall": 480802, "train_wall": 470681}
{"epoch": 15, "valid_loss": 4.330418344253392, "valid_nll_loss": 2.6672931989277155, "valid_ppl": "6.35", "num_updates": 322680, "best": 4.330418344253392}
{"epoch": 16, "update": 15.046485682409818, "loss": "3.817", "nll_loss": "2.166", "ppl": "4.49", "wps": 14974, "ups": "0.6", "wpb": 22229, "bsz": 834, "num_updates": 323681, "lr": 0.0001757686416215528, "gnorm": "0.525", "clip": "0%", "oom": 0.0, "wall": 482382, "train_wall": 472139}
{"epoch": 16, "update": 15.092971364819636, "loss": "3.819", "nll_loss": "2.168", "ppl": "4.49", "wps": 14964, "ups": "0.7", "wpb": 22222, "bsz": 829, "num_updates": 324681, "lr": 0.0001754977539041762, "gnorm": "0.524", "clip": "0%", "oom": 0.0, "wall": 483867, "train_wall": 473597}
{"epoch": 16, "update": 15.139457047229453, "loss": "3.817", "nll_loss": "2.165", "ppl": "4.49", "wps": 14950, "ups": "0.7", "wpb": 22225, "bsz": 829, "num_updates": 325681, "lr": 0.00017522811478320452, "gnorm": "0.523", "clip": "0%", "oom": 0.0, "wall": 485357, "train_wall": 475060}
{"epoch": 16, "update": 15.18594272963927, "loss": "3.818", "nll_loss": "2.166", "ppl": "4.49", "wps": 14953, "ups": "0.7", "wpb": 22227, "bsz": 827, "num_updates": 326681, "lr": 0.00017495971469614125, "gnorm": "0.524", "clip": "0%", "oom": 0.0, "wall": 486843, "train_wall": 476518}
{"epoch": 16, "update": 15.232428412049089, "loss": "3.823", "nll_loss": "2.172", "ppl": "4.51", "wps": 14941, "ups": "0.7", "wpb": 22224, "bsz": 830, "num_updates": 327681, "lr": 0.00017469254418270622, "gnorm": "0.525", "clip": "0%", "oom": 0.0, "wall": 488335, "train_wall": 477982}
{"epoch": 16, "update": 15.278914094458907, "loss": "3.824", "nll_loss": "2.174", "ppl": "4.51", "wps": 14946, "ups": "0.7", "wpb": 22220, "bsz": 827, "num_updates": 328681, "lr": 0.000174426593883435, "gnorm": "0.525", "clip": "0%", "oom": 0.0, "wall": 489818, "train_wall": 479438}
{"epoch": 16, "update": 15.325399776868725, "loss": "3.823", "nll_loss": "2.173", "ppl": "4.51", "wps": 14947, "ups": "0.7", "wpb": 22225, "bsz": 827, "num_updates": 329681, "lr": 0.00017416185453830195, "gnorm": "0.526", "clip": "0%", "oom": 0.0, "wall": 491306, "train_wall": 480899}
{"epoch": 16, "update": 15.371885459278543, "loss": "3.822", "nll_loss": "2.171", "ppl": "4.50", "wps": 14951, "ups": "0.7", "wpb": 22226, "bsz": 826, "num_updates": 330681, "lr": 0.00017389831698536585, "gnorm": "0.526", "clip": "0%", "oom": 0.0, "wall": 492790, "train_wall": 482357}
{"epoch": 16, "update": 15.41837114168836, "loss": "3.821", "nll_loss": "2.170", "ppl": "4.50", "wps": 14950, "ups": "0.7", "wpb": 22224, "bsz": 827, "num_updates": 331681, "lr": 0.00017363597215943813, "gnorm": "0.526", "clip": "0%", "oom": 0.0, "wall": 494276, "train_wall": 483816}
{"epoch": 16, "update": 15.464856824098177, "loss": "3.821", "nll_loss": "2.170", "ppl": "4.50", "wps": 14948, "ups": "0.7", "wpb": 22223, "bsz": 827, "num_updates": 332681, "lr": 0.00017337481109077333, "gnorm": "0.526", "clip": "0%", "oom": 0.0, "wall": 495764, "train_wall": 485277}
{"epoch": 16, "update": 15.511342506507996, "loss": "3.820", "nll_loss": "2.169", "ppl": "4.50", "wps": 14943, "ups": "0.7", "wpb": 22214, "bsz": 827, "num_updates": 333681, "lr": 0.00017311482490378076, "gnorm": "0.526", "clip": "0%", "oom": 0.0, "wall": 497250, "train_wall": 486737}
{"epoch": 16, "update": 15.557828188917814, "loss": "3.820", "nll_loss": "2.169", "ppl": "4.50", "wps": 14942, "ups": "0.7", "wpb": 22215, "bsz": 827, "num_updates": 334681, "lr": 0.00017285600481575786, "gnorm": "0.526", "clip": "0%", "oom": 0.0, "wall": 498738, "train_wall": 488197}
{"epoch": 16, "update": 15.604313871327632, "loss": "3.821", "nll_loss": "2.170", "ppl": "4.50", "wps": 14940, "ups": "0.7", "wpb": 22213, "bsz": 827, "num_updates": 335681, "lr": 0.00017259834213564378, "gnorm": "0.526", "clip": "0%", "oom": 0.0, "wall": 500225, "train_wall": 489657}
{"epoch": 16, "update": 15.65079955373745, "loss": "3.821", "nll_loss": "2.170", "ppl": "4.50", "wps": 14941, "ups": "0.7", "wpb": 22214, "bsz": 827, "num_updates": 336681, "lr": 0.0001723418282627938, "gnorm": "0.526", "clip": "0%", "oom": 0.0, "wall": 501713, "train_wall": 491117}
{"epoch": 16, "update": 15.697285236147266, "loss": "3.821", "nll_loss": "2.171", "ppl": "4.50", "wps": 14938, "ups": "0.7", "wpb": 22210, "bsz": 827, "num_updates": 337681, "lr": 0.0001720864546857734, "gnorm": "0.527", "clip": "0%", "oom": 0.0, "wall": 503199, "train_wall": 492577}
{"epoch": 16, "update": 15.743770918557084, "loss": "3.822", "nll_loss": "2.171", "ppl": "4.50", "wps": 14938, "ups": "0.7", "wpb": 22211, "bsz": 827, "num_updates": 338681, "lr": 0.00017183221298117219, "gnorm": "0.527", "clip": "0%", "oom": 0.0, "wall": 504688, "train_wall": 494040}
{"epoch": 16, "update": 15.790256600966902, "loss": "3.821", "nll_loss": "2.171", "ppl": "4.50", "wps": 14936, "ups": "0.7", "wpb": 22210, "bsz": 827, "num_updates": 339681, "lr": 0.00017157909481243676, "gnorm": "0.527", "clip": "0%", "oom": 0.0, "wall": 506177, "train_wall": 495502}
{"epoch": 16, "update": 15.83674228337672, "loss": "3.821", "nll_loss": "2.170", "ppl": "4.50", "wps": 14938, "ups": "0.7", "wpb": 22212, "bsz": 828, "num_updates": 340681, "lr": 0.0001713270919287227, "gnorm": "0.527", "clip": "0%", "oom": 0.0, "wall": 507663, "train_wall": 496962}
{"epoch": 16, "update": 15.883227965786539, "loss": "3.822", "nll_loss": "2.171", "ppl": "4.50", "wps": 14939, "ups": "0.7", "wpb": 22213, "bsz": 827, "num_updates": 341681, "lr": 0.00017107619616376496, "gnorm": "0.527", "clip": "0%", "oom": 0.0, "wall": 509150, "train_wall": 498421}
{"epoch": 16, "update": 15.929713648196355, "loss": "3.822", "nll_loss": "2.171", "ppl": "4.50", "wps": 14938, "ups": "0.7", "wpb": 22212, "bsz": 827, "num_updates": 342681, "lr": 0.0001708263994347663, "gnorm": "0.527", "clip": "0%", "oom": 0.0, "wall": 510636, "train_wall": 499880}
{"epoch": 16, "update": 15.976199330606173, "loss": "3.822", "nll_loss": "2.172", "ppl": "4.51", "wps": 14938, "ups": "0.7", "wpb": 22212, "bsz": 828, "num_updates": 343681, "lr": 0.00017057769374130354, "gnorm": "0.527", "clip": "0%", "oom": 0.0, "wall": 512123, "train_wall": 501341}
{"epoch": 16, "loss": "3.822", "nll_loss": "2.171", "ppl": "4.50", "wps": 14937, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 344192, "lr": 0.00017045102377555882, "gnorm": "0.527", "clip": "0%", "oom": 0.0, "wall": 512886, "train_wall": 502089}
{"epoch": 16, "valid_loss": 4.325823872780149, "valid_nll_loss": 2.666926839073895, "valid_ppl": "6.35", "num_updates": 344192, "best": 4.325823872780149}
{"epoch": 17, "update": 16.046485682409816, "loss": "3.814", "nll_loss": "2.163", "ppl": "4.48", "wps": 14969, "ups": "0.6", "wpb": 22239, "bsz": 837, "num_updates": 345193, "lr": 0.00017020370510504462, "gnorm": "0.531", "clip": "0%", "oom": 0.0, "wall": 514469, "train_wall": 503548}
{"epoch": 17, "update": 16.092971364819636, "loss": "3.815", "nll_loss": "2.163", "ppl": "4.48", "wps": 14956, "ups": "0.7", "wpb": 22225, "bsz": 836, "num_updates": 346193, "lr": 0.00016995770533910522, "gnorm": "0.546", "clip": "0%", "oom": 0.0, "wall": 515956, "train_wall": 505007}
{"epoch": 17, "update": 16.139457047229453, "loss": "3.815", "nll_loss": "2.163", "ppl": "4.48", "wps": 14942, "ups": "0.7", "wpb": 22207, "bsz": 832, "num_updates": 347193, "lr": 0.00016971276914836095, "gnorm": "0.540", "clip": "0%", "oom": 0.0, "wall": 517442, "train_wall": 506466}
{"epoch": 17, "update": 16.185942729639272, "loss": "3.817", "nll_loss": "2.165", "ppl": "4.49", "wps": 14923, "ups": "0.7", "wpb": 22193, "bsz": 833, "num_updates": 348193, "lr": 0.0001694688888909245, "gnorm": "0.539", "clip": "0%", "oom": 0.0, "wall": 518932, "train_wall": 507928}
{"epoch": 17, "update": 16.23242841204909, "loss": "3.815", "nll_loss": "2.163", "ppl": "4.48", "wps": 14925, "ups": "0.7", "wpb": 22197, "bsz": 832, "num_updates": 349193, "lr": 0.000169226057001559, "gnorm": "0.538", "clip": "0%", "oom": 0.0, "wall": 520420, "train_wall": 509389}
{"epoch": 17, "update": 16.278914094458905, "loss": "3.814", "nll_loss": "2.163", "ppl": "4.48", "wps": 14925, "ups": "0.7", "wpb": 22196, "bsz": 830, "num_updates": 350193, "lr": 0.00016898426599069245, "gnorm": "0.536", "clip": "0%", "oom": 0.0, "wall": 521907, "train_wall": 510849}
{"epoch": 17, "update": 16.325399776868725, "loss": "3.815", "nll_loss": "2.163", "ppl": "4.48", "wps": 14925, "ups": "0.7", "wpb": 22198, "bsz": 829, "num_updates": 351193, "lr": 0.00016874350844344737, "gnorm": "0.535", "clip": "0%", "oom": 0.0, "wall": 523395, "train_wall": 512311}
{"epoch": 17, "update": 16.37188545927854, "loss": "3.815", "nll_loss": "2.164", "ppl": "4.48", "wps": 14923, "ups": "0.7", "wpb": 22203, "bsz": 830, "num_updates": 352193, "lr": 0.00016850377701868583, "gnorm": "0.535", "clip": "0%", "oom": 0.0, "wall": 524886, "train_wall": 513775}
{"epoch": 17, "update": 16.41837114168836, "loss": "3.816", "nll_loss": "2.165", "ppl": "4.48", "wps": 14928, "ups": "0.7", "wpb": 22207, "bsz": 828, "num_updates": 353193, "lr": 0.00016826506444806927, "gnorm": "0.535", "clip": "0%", "oom": 0.0, "wall": 526372, "train_wall": 515234}
{"epoch": 17, "update": 16.464856824098177, "loss": "3.817", "nll_loss": "2.166", "ppl": "4.49", "wps": 14927, "ups": "0.7", "wpb": 22207, "bsz": 828, "num_updates": 354193, "lr": 0.00016802736353513286, "gnorm": "0.535", "clip": "0%", "oom": 0.0, "wall": 527860, "train_wall": 516695}
{"epoch": 17, "update": 16.511342506507994, "loss": "3.818", "nll_loss": "2.167", "ppl": "4.49", "wps": 14928, "ups": "0.7", "wpb": 22208, "bsz": 829, "num_updates": 355193, "lr": 0.00016779066715437438, "gnorm": "0.535", "clip": "0%", "oom": 0.0, "wall": 529348, "train_wall": 518155}
{"epoch": 17, "update": 16.557828188917814, "loss": "3.819", "nll_loss": "2.168", "ppl": "4.49", "wps": 14934, "ups": "0.7", "wpb": 22211, "bsz": 829, "num_updates": 356193, "lr": 0.00016755496825035705, "gnorm": "0.534", "clip": "0%", "oom": 0.0, "wall": 530832, "train_wall": 519613}
{"epoch": 17, "update": 16.60431387132763, "loss": "3.818", "nll_loss": "2.167", "ppl": "4.49", "wps": 14936, "ups": "0.7", "wpb": 22214, "bsz": 829, "num_updates": 357193, "lr": 0.0001673202598368261, "gnorm": "0.535", "clip": "0%", "oom": 0.0, "wall": 532318, "train_wall": 521072}
{"epoch": 17, "update": 16.65079955373745, "loss": "3.818", "nll_loss": "2.167", "ppl": "4.49", "wps": 14934, "ups": "0.7", "wpb": 22210, "bsz": 829, "num_updates": 358193, "lr": 0.0001670865349958392, "gnorm": "0.535", "clip": "0%", "oom": 0.0, "wall": 533804, "train_wall": 522531}
{"epoch": 17, "update": 16.697285236147266, "loss": "3.818", "nll_loss": "2.167", "ppl": "4.49", "wps": 14938, "ups": "0.7", "wpb": 22214, "bsz": 829, "num_updates": 359193, "lr": 0.0001668537868769099, "gnorm": "0.535", "clip": "0%", "oom": 0.0, "wall": 535289, "train_wall": 523989}
{"epoch": 17, "update": 16.743770918557086, "loss": "3.818", "nll_loss": "2.167", "ppl": "4.49", "wps": 14940, "ups": "0.7", "wpb": 22216, "bsz": 829, "num_updates": 360193, "lr": 0.0001666220086961644, "gnorm": "0.535", "clip": "0%", "oom": 0.0, "wall": 536776, "train_wall": 525448}
{"epoch": 17, "update": 16.790256600966902, "loss": "3.818", "nll_loss": "2.167", "ppl": "4.49", "wps": 14944, "ups": "0.7", "wpb": 22219, "bsz": 829, "num_updates": 361193, "lr": 0.00016639119373551092, "gnorm": "0.535", "clip": "0%", "oom": 0.0, "wall": 538260, "train_wall": 526906}
{"epoch": 17, "update": 16.83674228337672, "loss": "3.818", "nll_loss": "2.167", "ppl": "4.49", "wps": 14943, "ups": "0.7", "wpb": 22216, "bsz": 829, "num_updates": 362193, "lr": 0.00016616133534182198, "gnorm": "0.535", "clip": "0%", "oom": 0.0, "wall": 539745, "train_wall": 528364}
{"epoch": 17, "update": 16.88322796578654, "loss": "3.817", "nll_loss": "2.166", "ppl": "4.49", "wps": 14944, "ups": "0.7", "wpb": 22216, "bsz": 829, "num_updates": 363193, "lr": 0.00016593242692612887, "gnorm": "0.535", "clip": "0%", "oom": 0.0, "wall": 541230, "train_wall": 529822}
{"epoch": 17, "update": 16.929713648196355, "loss": "3.817", "nll_loss": "2.166", "ppl": "4.49", "wps": 14945, "ups": "0.7", "wpb": 22215, "bsz": 828, "num_updates": 364193, "lr": 0.00016570446196282842, "gnorm": "0.535", "clip": "0%", "oom": 0.0, "wall": 542713, "train_wall": 531279}
{"epoch": 17, "update": 16.976199330606175, "loss": "3.817", "nll_loss": "2.166", "ppl": "4.49", "wps": 14945, "ups": "0.7", "wpb": 22214, "bsz": 828, "num_updates": 365193, "lr": 0.00016547743398890149, "gnorm": "0.535", "clip": "0%", "oom": 0.0, "wall": 544198, "train_wall": 532736}
{"epoch": 17, "loss": "3.817", "nll_loss": "2.166", "ppl": "4.49", "wps": 14944, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 365704, "lr": 0.00016536178235795201, "gnorm": "0.535", "clip": "0%", "oom": 0.0, "wall": 544957, "train_wall": 533482}
{"epoch": 17, "valid_loss": 4.322712581347811, "valid_nll_loss": 2.6624853406599214, "valid_ppl": "6.33", "num_updates": 365704, "best": 4.322712581347811}
{"epoch": 18, "update": 17.046485682409816, "loss": "3.812", "nll_loss": "2.160", "ppl": "4.47", "wps": 14914, "ups": "0.6", "wpb": 22208, "bsz": 828, "num_updates": 366705, "lr": 0.00016513593288897132, "gnorm": "0.537", "clip": "0%", "oom": 0.0, "wall": 546528, "train_wall": 534945}
{"epoch": 18, "update": 17.092971364819636, "loss": "3.806", "nll_loss": "2.153", "ppl": "4.45", "wps": 14947, "ups": "0.7", "wpb": 22204, "bsz": 827, "num_updates": 367705, "lr": 0.00016491123053170338, "gnorm": "0.537", "clip": "0%", "oom": 0.0, "wall": 548010, "train_wall": 536397}
{"epoch": 18, "update": 17.139457047229453, "loss": "3.808", "nll_loss": "2.156", "ppl": "4.46", "wps": 14950, "ups": "0.7", "wpb": 22201, "bsz": 824, "num_updates": 368705, "lr": 0.00016468744295044624, "gnorm": "0.537", "clip": "0%", "oom": 0.0, "wall": 549494, "train_wall": 537855}
{"epoch": 18, "update": 17.185942729639272, "loss": "3.812", "nll_loss": "2.160", "ppl": "4.47", "wps": 14945, "ups": "0.7", "wpb": 22198, "bsz": 827, "num_updates": 369705, "lr": 0.00016446456395515643, "gnorm": "0.538", "clip": "0%", "oom": 0.0, "wall": 550980, "train_wall": 539314}
{"epoch": 18, "update": 17.23242841204909, "loss": "3.812", "nll_loss": "2.160", "ppl": "4.47", "wps": 14944, "ups": "0.7", "wpb": 22198, "bsz": 828, "num_updates": 370705, "lr": 0.00016424258741427327, "gnorm": "0.539", "clip": "0%", "oom": 0.0, "wall": 552466, "train_wall": 540773}
{"epoch": 18, "update": 17.278914094458905, "loss": "3.814", "nll_loss": "2.162", "ppl": "4.47", "wps": 14940, "ups": "0.7", "wpb": 22201, "bsz": 829, "num_updates": 371705, "lr": 0.00016402150725401015, "gnorm": "0.538", "clip": "0%", "oom": 0.0, "wall": 553955, "train_wall": 542235}
{"epoch": 18, "update": 17.325399776868725, "loss": "3.814", "nll_loss": "2.163", "ppl": "4.48", "wps": 14939, "ups": "0.7", "wpb": 22199, "bsz": 830, "num_updates": 372705, "lr": 0.00016380131745765648, "gnorm": "0.539", "clip": "0%", "oom": 0.0, "wall": 555440, "train_wall": 543693}
{"epoch": 18, "update": 17.37188545927854, "loss": "3.815", "nll_loss": "2.163", "ppl": "4.48", "wps": 14944, "ups": "0.7", "wpb": 22204, "bsz": 829, "num_updates": 373705, "lr": 0.00016358201206489016, "gnorm": "0.539", "clip": "0%", "oom": 0.0, "wall": 556925, "train_wall": 545151}
{"epoch": 18, "update": 17.41837114168836, "loss": "3.815", "nll_loss": "2.164", "ppl": "4.48", "wps": 14940, "ups": "0.7", "wpb": 22203, "bsz": 831, "num_updates": 374705, "lr": 0.00016336358517109976, "gnorm": "0.539", "clip": "0%", "oom": 0.0, "wall": 558414, "train_wall": 546612}
{"epoch": 18, "update": 17.464856824098177, "loss": "3.815", "nll_loss": "2.164", "ppl": "4.48", "wps": 14942, "ups": "0.7", "wpb": 22206, "bsz": 830, "num_updates": 375705, "lr": 0.00016314603092671683, "gnorm": "0.539", "clip": "0%", "oom": 0.0, "wall": 559900, "train_wall": 548070}
{"epoch": 18, "update": 17.511342506507994, "loss": "3.815", "nll_loss": "2.164", "ppl": "4.48", "wps": 14942, "ups": "0.7", "wpb": 22205, "bsz": 829, "num_updates": 376705, "lr": 0.00016292934353655795, "gnorm": "0.539", "clip": "0%", "oom": 0.0, "wall": 561385, "train_wall": 549530}
{"epoch": 18, "update": 17.557828188917814, "loss": "3.816", "nll_loss": "2.165", "ppl": "4.48", "wps": 14946, "ups": "0.7", "wpb": 22206, "bsz": 830, "num_updates": 377705, "lr": 0.00016271351725917634, "gnorm": "0.540", "clip": "0%", "oom": 0.0, "wall": 562867, "train_wall": 550984}
{"epoch": 18, "update": 17.60431387132763, "loss": "3.816", "nll_loss": "2.165", "ppl": "4.48", "wps": 14944, "ups": "0.7", "wpb": 22203, "bsz": 830, "num_updates": 378705, "lr": 0.00016249854640622302, "gnorm": "0.540", "clip": "0%", "oom": 0.0, "wall": 564353, "train_wall": 552442}
{"epoch": 18, "update": 17.65079955373745, "loss": "3.815", "nll_loss": "2.163", "ppl": "4.48", "wps": 14946, "ups": "0.7", "wpb": 22208, "bsz": 830, "num_updates": 379705, "lr": 0.00016228442534181684, "gnorm": "0.540", "clip": "0%", "oom": 0.0, "wall": 565841, "train_wall": 553904}
{"epoch": 18, "update": 17.697285236147266, "loss": "3.815", "nll_loss": "2.164", "ppl": "4.48", "wps": 14945, "ups": "0.7", "wpb": 22209, "bsz": 830, "num_updates": 380705, "lr": 0.00016207114848192425, "gnorm": "0.540", "clip": "0%", "oom": 0.0, "wall": 567330, "train_wall": 555365}
{"epoch": 18, "update": 17.743770918557086, "loss": "3.816", "nll_loss": "2.164", "ppl": "4.48", "wps": 14942, "ups": "0.7", "wpb": 22209, "bsz": 830, "num_updates": 381705, "lr": 0.00016185871029374734, "gnorm": "0.540", "clip": "0%", "oom": 0.0, "wall": 568820, "train_wall": 556829}
{"epoch": 18, "update": 17.790256600966902, "loss": "3.815", "nll_loss": "2.164", "ppl": "4.48", "wps": 14941, "ups": "0.7", "wpb": 22210, "bsz": 829, "num_updates": 382705, "lr": 0.0001616471052951212, "gnorm": "0.540", "clip": "0%", "oom": 0.0, "wall": 570309, "train_wall": 558291}
{"epoch": 18, "update": 17.83674228337672, "loss": "3.815", "nll_loss": "2.163", "ppl": "4.48", "wps": 14942, "ups": "0.7", "wpb": 22212, "bsz": 828, "num_updates": 383705, "lr": 0.00016143632805391969, "gnorm": "0.540", "clip": "0%", "oom": 0.0, "wall": 571795, "train_wall": 559751}
{"epoch": 18, "update": 17.88322796578654, "loss": "3.816", "nll_loss": "2.164", "ppl": "4.48", "wps": 14943, "ups": "0.7", "wpb": 22213, "bsz": 829, "num_updates": 384705, "lr": 0.00016122637318746952, "gnorm": "0.540", "clip": "0%", "oom": 0.0, "wall": 573283, "train_wall": 561211}
{"epoch": 18, "update": 17.929713648196355, "loss": "3.815", "nll_loss": "2.163", "ppl": "4.48", "wps": 14943, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 385705, "lr": 0.00016101723536197335, "gnorm": "0.540", "clip": "0%", "oom": 0.0, "wall": 574768, "train_wall": 562668}
{"epoch": 18, "update": 17.976199330606175, "loss": "3.814", "nll_loss": "2.162", "ppl": "4.48", "wps": 14943, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 386705, "lr": 0.00016080890929194027, "gnorm": "0.540", "clip": "0%", "oom": 0.0, "wall": 576255, "train_wall": 564129}
{"epoch": 18, "loss": "3.813", "nll_loss": "2.162", "ppl": "4.47", "wps": 14943, "ups": "0.7", "wpb": 22213, "bsz": 828, "num_updates": 387216, "lr": 0.0001607027663625161, "gnorm": "0.541", "clip": "0%", "oom": 0.0, "wall": 577015, "train_wall": 564875}
{"epoch": 18, "valid_loss": 4.304374436580863, "valid_nll_loss": 2.6411894352341836, "valid_ppl": "6.24", "num_updates": 387216, "best": 4.304374436580863}
{"epoch": 19, "update": 18.046485682409816, "loss": "3.800", "nll_loss": "2.146", "ppl": "4.43", "wps": 14939, "ups": "0.6", "wpb": 22205, "bsz": 824, "num_updates": 388217, "lr": 0.00016049545022511324, "gnorm": "0.545", "clip": "0%", "oom": 0.0, "wall": 578582, "train_wall": 566336}
{"epoch": 19, "update": 18.092971364819636, "loss": "3.805", "nll_loss": "2.152", "ppl": "4.45", "wps": 14935, "ups": "0.7", "wpb": 22199, "bsz": 828, "num_updates": 389217, "lr": 0.0001602891402854932, "gnorm": "0.545", "clip": "0%", "oom": 0.0, "wall": 580068, "train_wall": 567795}
{"epoch": 19, "update": 18.139457047229453, "loss": "3.803", "nll_loss": "2.150", "ppl": "4.44", "wps": 14940, "ups": "0.7", "wpb": 22207, "bsz": 827, "num_updates": 390217, "lr": 0.00016008362391333353, "gnorm": "0.543", "clip": "0%", "oom": 0.0, "wall": 581555, "train_wall": 569255}
{"epoch": 19, "update": 18.185942729639272, "loss": "3.803", "nll_loss": "2.150", "ppl": "4.44", "wps": 14943, "ups": "0.7", "wpb": 22195, "bsz": 826, "num_updates": 391217, "lr": 0.0001598788960342403, "gnorm": "0.543", "clip": "0%", "oom": 0.0, "wall": 583037, "train_wall": 570710}
{"epoch": 19, "update": 18.23242841204909, "loss": "3.803", "nll_loss": "2.150", "ppl": "4.44", "wps": 14945, "ups": "0.7", "wpb": 22200, "bsz": 828, "num_updates": 392217, "lr": 0.0001596749516191307, "gnorm": "0.544", "clip": "0%", "oom": 0.0, "wall": 584523, "train_wall": 572170}
{"epoch": 19, "update": 18.278914094458905, "loss": "3.803", "nll_loss": "2.150", "ppl": "4.44", "wps": 14955, "ups": "0.7", "wpb": 22209, "bsz": 826, "num_updates": 393217, "lr": 0.00015947178568371384, "gnorm": "0.544", "clip": "0%", "oom": 0.0, "wall": 586006, "train_wall": 573626}
{"epoch": 19, "update": 18.325399776868725, "loss": "3.804", "nll_loss": "2.151", "ppl": "4.44", "wps": 14960, "ups": "0.7", "wpb": 22214, "bsz": 827, "num_updates": 394217, "lr": 0.0001592693932879793, "gnorm": "0.544", "clip": "0%", "oom": 0.0, "wall": 587490, "train_wall": 575082}
{"epoch": 19, "update": 18.37188545927854, "loss": "3.806", "nll_loss": "2.153", "ppl": "4.45", "wps": 14957, "ups": "0.7", "wpb": 22211, "bsz": 827, "num_updates": 395217, "lr": 0.00015906776953569275, "gnorm": "0.544", "clip": "0%", "oom": 0.0, "wall": 588975, "train_wall": 576541}
{"epoch": 19, "update": 18.41837114168836, "loss": "3.807", "nll_loss": "2.155", "ppl": "4.45", "wps": 14962, "ups": "0.7", "wpb": 22219, "bsz": 828, "num_updates": 396217, "lr": 0.00015886690957389818, "gnorm": "0.545", "clip": "0%", "oom": 0.0, "wall": 590460, "train_wall": 577998}
{"epoch": 19, "update": 18.464856824098177, "loss": "3.808", "nll_loss": "2.156", "ppl": "4.46", "wps": 14964, "ups": "0.7", "wpb": 22221, "bsz": 828, "num_updates": 397217, "lr": 0.0001586668085924275, "gnorm": "0.545", "clip": "0%", "oom": 0.0, "wall": 591945, "train_wall": 579456}
{"epoch": 19, "update": 18.511342506507994, "loss": "3.807", "nll_loss": "2.155", "ppl": "4.45", "wps": 14964, "ups": "0.7", "wpb": 22220, "bsz": 828, "num_updates": 398217, "lr": 0.00015846746182341658, "gnorm": "0.545", "clip": "0%", "oom": 0.0, "wall": 593429, "train_wall": 580913}
{"epoch": 19, "update": 18.557828188917814, "loss": "3.807", "nll_loss": "2.155", "ppl": "4.45", "wps": 14966, "ups": "0.7", "wpb": 22221, "bsz": 827, "num_updates": 399217, "lr": 0.0001582688645408283, "gnorm": "0.545", "clip": "0%", "oom": 0.0, "wall": 594913, "train_wall": 582371}
{"epoch": 19, "loss": "3.807", "nll_loss": "2.155", "ppl": "4.45", "wps": 14969, "ups": "0.7", "wpb": 22224, "bsz": 826, "num_updates": 400000, "lr": 0.00015811388300841897, "gnorm": "0.546", "clip": "0%", "oom": 0.0, "wall": 596074, "train_wall": 583512}
{"epoch": 19, "valid_loss": 4.305666644629651, "valid_nll_loss": 2.64319203317321, "valid_ppl": "6.25", "num_updates": 400000, "best": 4.304374436580863}
| done training in 595950.5 seconds