Amiran13 commited on
Commit
aad3659
1 Parent(s): 2fb0884

Delete checkpoint-4800

Browse files
checkpoint-4800/config.json DELETED
@@ -1,117 +0,0 @@
1
- {
2
- "_name_or_path": "facebook/wav2vec2-large-xlsr-53",
3
- "activation_dropout": 0.0,
4
- "adapter_attn_dim": null,
5
- "adapter_kernel_size": 3,
6
- "adapter_stride": 2,
7
- "add_adapter": false,
8
- "apply_spec_augment": true,
9
- "architectures": [
10
- "Wav2Vec2ForCTC"
11
- ],
12
- "attention_dropout": 0.1,
13
- "bos_token_id": 1,
14
- "classifier_proj_size": 256,
15
- "codevector_dim": 768,
16
- "contrastive_logits_temperature": 0.1,
17
- "conv_bias": true,
18
- "conv_dim": [
19
- 512,
20
- 512,
21
- 512,
22
- 512,
23
- 512,
24
- 512,
25
- 512
26
- ],
27
- "conv_kernel": [
28
- 10,
29
- 3,
30
- 3,
31
- 3,
32
- 3,
33
- 2,
34
- 2
35
- ],
36
- "conv_stride": [
37
- 5,
38
- 2,
39
- 2,
40
- 2,
41
- 2,
42
- 2,
43
- 2
44
- ],
45
- "ctc_loss_reduction": "mean",
46
- "ctc_zero_infinity": false,
47
- "diversity_loss_weight": 0.1,
48
- "do_stable_layer_norm": true,
49
- "eos_token_id": 2,
50
- "feat_extract_activation": "gelu",
51
- "feat_extract_dropout": 0.0,
52
- "feat_extract_norm": "layer",
53
- "feat_proj_dropout": 0.0,
54
- "feat_quantizer_dropout": 0.0,
55
- "final_dropout": 0.0,
56
- "gradient_checkpointing": false,
57
- "hidden_act": "gelu",
58
- "hidden_dropout": 0.1,
59
- "hidden_size": 1024,
60
- "initializer_range": 0.02,
61
- "intermediate_size": 4096,
62
- "layer_norm_eps": 1e-05,
63
- "layerdrop": 0.1,
64
- "mask_channel_length": 10,
65
- "mask_channel_min_space": 1,
66
- "mask_channel_other": 0.0,
67
- "mask_channel_prob": 0.0,
68
- "mask_channel_selection": "static",
69
- "mask_feature_length": 10,
70
- "mask_feature_min_masks": 0,
71
- "mask_feature_prob": 0.0,
72
- "mask_time_length": 10,
73
- "mask_time_min_masks": 2,
74
- "mask_time_min_space": 1,
75
- "mask_time_other": 0.0,
76
- "mask_time_prob": 0.05,
77
- "mask_time_selection": "static",
78
- "model_type": "wav2vec2",
79
- "num_adapter_layers": 3,
80
- "num_attention_heads": 16,
81
- "num_codevector_groups": 2,
82
- "num_codevectors_per_group": 320,
83
- "num_conv_pos_embedding_groups": 16,
84
- "num_conv_pos_embeddings": 128,
85
- "num_feat_extract_layers": 7,
86
- "num_hidden_layers": 24,
87
- "num_negatives": 100,
88
- "output_hidden_size": 1024,
89
- "pad_token_id": 38,
90
- "proj_codevector_dim": 768,
91
- "tdnn_dilation": [
92
- 1,
93
- 2,
94
- 3,
95
- 1,
96
- 1
97
- ],
98
- "tdnn_dim": [
99
- 512,
100
- 512,
101
- 512,
102
- 512,
103
- 1500
104
- ],
105
- "tdnn_kernel": [
106
- 5,
107
- 3,
108
- 3,
109
- 1,
110
- 1
111
- ],
112
- "torch_dtype": "float32",
113
- "transformers_version": "4.35.2",
114
- "use_weighted_layer_sum": false,
115
- "vocab_size": 41,
116
- "xvector_output_dim": 512
117
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-4800/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:97a0acea7eceff19738cd25ae9446136a990c99d8200948b4e10851388bba6a4
3
- size 1261975580
 
 
 
 
checkpoint-4800/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:331b689c0ab3a582ab10f4f859a1c54b22404c6e08d16a8247d74f5a74270d61
3
- size 2490495926
 
 
 
 
checkpoint-4800/preprocessor_config.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "do_normalize": true,
3
- "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
- "feature_size": 1,
5
- "padding_side": "right",
6
- "padding_value": 0.0,
7
- "return_attention_mask": true,
8
- "sampling_rate": 16000
9
- }
 
 
 
 
 
 
 
 
 
 
checkpoint-4800/rng_state.pth DELETED
Binary file (14.2 kB)
 
checkpoint-4800/scheduler.pt DELETED
Binary file (1.06 kB)
 
checkpoint-4800/trainer_state.json DELETED
@@ -1,3331 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 7.816110659072416,
5
- "eval_steps": 100,
6
- "global_step": 4800,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.02,
13
- "learning_rate": 1.4999999999999999e-05,
14
- "loss": 11.8163,
15
- "step": 10
16
- },
17
- {
18
- "epoch": 0.03,
19
- "learning_rate": 2.8499999999999998e-05,
20
- "loss": 11.8929,
21
- "step": 20
22
- },
23
- {
24
- "epoch": 0.05,
25
- "learning_rate": 4.3499999999999993e-05,
26
- "loss": 12.0064,
27
- "step": 30
28
- },
29
- {
30
- "epoch": 0.07,
31
- "learning_rate": 5.6999999999999996e-05,
32
- "loss": 10.5132,
33
- "step": 40
34
- },
35
- {
36
- "epoch": 0.08,
37
- "learning_rate": 7.199999999999999e-05,
38
- "loss": 7.9699,
39
- "step": 50
40
- },
41
- {
42
- "epoch": 0.1,
43
- "learning_rate": 8.699999999999999e-05,
44
- "loss": 4.7703,
45
- "step": 60
46
- },
47
- {
48
- "epoch": 0.11,
49
- "learning_rate": 0.000102,
50
- "loss": 3.7982,
51
- "step": 70
52
- },
53
- {
54
- "epoch": 0.13,
55
- "learning_rate": 0.000117,
56
- "loss": 3.436,
57
- "step": 80
58
- },
59
- {
60
- "epoch": 0.15,
61
- "learning_rate": 0.00013199999999999998,
62
- "loss": 3.2573,
63
- "step": 90
64
- },
65
- {
66
- "epoch": 0.16,
67
- "learning_rate": 0.000147,
68
- "loss": 3.1412,
69
- "step": 100
70
- },
71
- {
72
- "epoch": 0.16,
73
- "eval_loss": 3.0855324268341064,
74
- "eval_runtime": 1038.5545,
75
- "eval_samples_per_second": 11.178,
76
- "eval_steps_per_second": 0.699,
77
- "eval_wer": 1.0,
78
- "step": 100
79
- },
80
- {
81
- "epoch": 0.18,
82
- "learning_rate": 0.000162,
83
- "loss": 3.0577,
84
- "step": 110
85
- },
86
- {
87
- "epoch": 0.2,
88
- "learning_rate": 0.00017699999999999997,
89
- "loss": 3.0443,
90
- "step": 120
91
- },
92
- {
93
- "epoch": 0.21,
94
- "learning_rate": 0.00019199999999999998,
95
- "loss": 3.0597,
96
- "step": 130
97
- },
98
- {
99
- "epoch": 0.23,
100
- "learning_rate": 0.00020699999999999996,
101
- "loss": 3.061,
102
- "step": 140
103
- },
104
- {
105
- "epoch": 0.24,
106
- "learning_rate": 0.00022199999999999998,
107
- "loss": 3.0717,
108
- "step": 150
109
- },
110
- {
111
- "epoch": 0.26,
112
- "learning_rate": 0.000237,
113
- "loss": 3.0299,
114
- "step": 160
115
- },
116
- {
117
- "epoch": 0.28,
118
- "learning_rate": 0.00025199999999999995,
119
- "loss": 3.0295,
120
- "step": 170
121
- },
122
- {
123
- "epoch": 0.29,
124
- "learning_rate": 0.000267,
125
- "loss": 3.0423,
126
- "step": 180
127
- },
128
- {
129
- "epoch": 0.31,
130
- "learning_rate": 0.00028199999999999997,
131
- "loss": 3.0437,
132
- "step": 190
133
- },
134
- {
135
- "epoch": 0.33,
136
- "learning_rate": 0.00029699999999999996,
137
- "loss": 3.0569,
138
- "step": 200
139
- },
140
- {
141
- "epoch": 0.33,
142
- "eval_loss": 3.036912679672241,
143
- "eval_runtime": 1030.0275,
144
- "eval_samples_per_second": 11.271,
145
- "eval_steps_per_second": 0.705,
146
- "eval_wer": 1.0,
147
- "step": 200
148
- },
149
- {
150
- "epoch": 0.34,
151
- "learning_rate": 0.00029973362930077685,
152
- "loss": 3.0243,
153
- "step": 210
154
- },
155
- {
156
- "epoch": 0.36,
157
- "learning_rate": 0.000299400665926748,
158
- "loss": 3.0236,
159
- "step": 220
160
- },
161
- {
162
- "epoch": 0.37,
163
- "learning_rate": 0.0002990677025527192,
164
- "loss": 3.0316,
165
- "step": 230
166
- },
167
- {
168
- "epoch": 0.39,
169
- "learning_rate": 0.0002987347391786903,
170
- "loss": 3.0363,
171
- "step": 240
172
- },
173
- {
174
- "epoch": 0.41,
175
- "learning_rate": 0.00029840177580466146,
176
- "loss": 3.0582,
177
- "step": 250
178
- },
179
- {
180
- "epoch": 0.42,
181
- "learning_rate": 0.0002980688124306326,
182
- "loss": 3.0144,
183
- "step": 260
184
- },
185
- {
186
- "epoch": 0.44,
187
- "learning_rate": 0.00029773584905660376,
188
- "loss": 3.0126,
189
- "step": 270
190
- },
191
- {
192
- "epoch": 0.46,
193
- "learning_rate": 0.0002974028856825749,
194
- "loss": 3.0035,
195
- "step": 280
196
- },
197
- {
198
- "epoch": 0.47,
199
- "learning_rate": 0.000297069922308546,
200
- "loss": 2.9913,
201
- "step": 290
202
- },
203
- {
204
- "epoch": 0.49,
205
- "learning_rate": 0.00029673695893451716,
206
- "loss": 2.9625,
207
- "step": 300
208
- },
209
- {
210
- "epoch": 0.49,
211
- "eval_loss": 2.977771520614624,
212
- "eval_runtime": 1042.3792,
213
- "eval_samples_per_second": 11.137,
214
- "eval_steps_per_second": 0.696,
215
- "eval_wer": 1.0,
216
- "step": 300
217
- },
218
- {
219
- "epoch": 0.5,
220
- "learning_rate": 0.0002964039955604883,
221
- "loss": 2.9686,
222
- "step": 310
223
- },
224
- {
225
- "epoch": 0.52,
226
- "learning_rate": 0.00029607103218645946,
227
- "loss": 2.8773,
228
- "step": 320
229
- },
230
- {
231
- "epoch": 0.54,
232
- "learning_rate": 0.0002957380688124306,
233
- "loss": 2.726,
234
- "step": 330
235
- },
236
- {
237
- "epoch": 0.55,
238
- "learning_rate": 0.00029540510543840177,
239
- "loss": 2.3671,
240
- "step": 340
241
- },
242
- {
243
- "epoch": 0.57,
244
- "learning_rate": 0.0002950721420643729,
245
- "loss": 1.9479,
246
- "step": 350
247
- },
248
- {
249
- "epoch": 0.59,
250
- "learning_rate": 0.000294739178690344,
251
- "loss": 1.5549,
252
- "step": 360
253
- },
254
- {
255
- "epoch": 0.6,
256
- "learning_rate": 0.00029440621531631517,
257
- "loss": 1.1765,
258
- "step": 370
259
- },
260
- {
261
- "epoch": 0.62,
262
- "learning_rate": 0.0002940732519422863,
263
- "loss": 0.9155,
264
- "step": 380
265
- },
266
- {
267
- "epoch": 0.63,
268
- "learning_rate": 0.00029374028856825747,
269
- "loss": 0.7823,
270
- "step": 390
271
- },
272
- {
273
- "epoch": 0.65,
274
- "learning_rate": 0.0002934073251942286,
275
- "loss": 0.7715,
276
- "step": 400
277
- },
278
- {
279
- "epoch": 0.65,
280
- "eval_loss": 0.5112669467926025,
281
- "eval_runtime": 1048.8544,
282
- "eval_samples_per_second": 11.068,
283
- "eval_steps_per_second": 0.692,
284
- "eval_wer": 0.718480591159969,
285
- "step": 400
286
- },
287
- {
288
- "epoch": 0.67,
289
- "learning_rate": 0.00029307436182019977,
290
- "loss": 0.6635,
291
- "step": 410
292
- },
293
- {
294
- "epoch": 0.68,
295
- "learning_rate": 0.0002927413984461709,
296
- "loss": 0.6032,
297
- "step": 420
298
- },
299
- {
300
- "epoch": 0.7,
301
- "learning_rate": 0.000292408435072142,
302
- "loss": 0.5401,
303
- "step": 430
304
- },
305
- {
306
- "epoch": 0.72,
307
- "learning_rate": 0.00029207547169811317,
308
- "loss": 0.5276,
309
- "step": 440
310
- },
311
- {
312
- "epoch": 0.73,
313
- "learning_rate": 0.0002917425083240843,
314
- "loss": 0.5713,
315
- "step": 450
316
- },
317
- {
318
- "epoch": 0.75,
319
- "learning_rate": 0.0002914095449500555,
320
- "loss": 0.46,
321
- "step": 460
322
- },
323
- {
324
- "epoch": 0.76,
325
- "learning_rate": 0.0002910765815760266,
326
- "loss": 0.4508,
327
- "step": 470
328
- },
329
- {
330
- "epoch": 0.78,
331
- "learning_rate": 0.0002907436182019977,
332
- "loss": 0.447,
333
- "step": 480
334
- },
335
- {
336
- "epoch": 0.8,
337
- "learning_rate": 0.00029041065482796893,
338
- "loss": 0.4454,
339
- "step": 490
340
- },
341
- {
342
- "epoch": 0.81,
343
- "learning_rate": 0.0002900776914539401,
344
- "loss": 0.4725,
345
- "step": 500
346
- },
347
- {
348
- "epoch": 0.81,
349
- "eval_loss": 0.3072386682033539,
350
- "eval_runtime": 1051.5656,
351
- "eval_samples_per_second": 11.04,
352
- "eval_steps_per_second": 0.69,
353
- "eval_wer": 0.5137640833827487,
354
- "step": 500
355
- },
356
- {
357
- "epoch": 0.83,
358
- "learning_rate": 0.0002897447280799112,
359
- "loss": 0.3822,
360
- "step": 510
361
- },
362
- {
363
- "epoch": 0.85,
364
- "learning_rate": 0.00028941176470588233,
365
- "loss": 0.3807,
366
- "step": 520
367
- },
368
- {
369
- "epoch": 0.86,
370
- "learning_rate": 0.0002890788013318535,
371
- "loss": 0.3827,
372
- "step": 530
373
- },
374
- {
375
- "epoch": 0.88,
376
- "learning_rate": 0.00028874583795782463,
377
- "loss": 0.3945,
378
- "step": 540
379
- },
380
- {
381
- "epoch": 0.9,
382
- "learning_rate": 0.00028841287458379573,
383
- "loss": 0.4519,
384
- "step": 550
385
- },
386
- {
387
- "epoch": 0.91,
388
- "learning_rate": 0.0002880799112097669,
389
- "loss": 0.3534,
390
- "step": 560
391
- },
392
- {
393
- "epoch": 0.93,
394
- "learning_rate": 0.00028774694783573803,
395
- "loss": 0.3731,
396
- "step": 570
397
- },
398
- {
399
- "epoch": 0.94,
400
- "learning_rate": 0.0002874139844617092,
401
- "loss": 0.3433,
402
- "step": 580
403
- },
404
- {
405
- "epoch": 0.96,
406
- "learning_rate": 0.00028708102108768033,
407
- "loss": 0.3547,
408
- "step": 590
409
- },
410
- {
411
- "epoch": 0.98,
412
- "learning_rate": 0.0002867480577136515,
413
- "loss": 0.4103,
414
- "step": 600
415
- },
416
- {
417
- "epoch": 0.98,
418
- "eval_loss": 0.24468904733657837,
419
- "eval_runtime": 1043.2136,
420
- "eval_samples_per_second": 11.128,
421
- "eval_steps_per_second": 0.696,
422
- "eval_wer": 0.4336997673676048,
423
- "step": 600
424
- },
425
- {
426
- "epoch": 0.99,
427
- "learning_rate": 0.00028641509433962264,
428
- "loss": 0.3484,
429
- "step": 610
430
- },
431
- {
432
- "epoch": 1.01,
433
- "learning_rate": 0.00028608213096559373,
434
- "loss": 0.3775,
435
- "step": 620
436
- },
437
- {
438
- "epoch": 1.03,
439
- "learning_rate": 0.0002857491675915649,
440
- "loss": 0.3207,
441
- "step": 630
442
- },
443
- {
444
- "epoch": 1.04,
445
- "learning_rate": 0.00028541620421753604,
446
- "loss": 0.2971,
447
- "step": 640
448
- },
449
- {
450
- "epoch": 1.06,
451
- "learning_rate": 0.0002850832408435072,
452
- "loss": 0.3129,
453
- "step": 650
454
- },
455
- {
456
- "epoch": 1.07,
457
- "learning_rate": 0.00028475027746947834,
458
- "loss": 0.3143,
459
- "step": 660
460
- },
461
- {
462
- "epoch": 1.09,
463
- "learning_rate": 0.0002844173140954495,
464
- "loss": 0.3179,
465
- "step": 670
466
- },
467
- {
468
- "epoch": 1.11,
469
- "learning_rate": 0.00028408435072142064,
470
- "loss": 0.299,
471
- "step": 680
472
- },
473
- {
474
- "epoch": 1.12,
475
- "learning_rate": 0.0002837513873473918,
476
- "loss": 0.2734,
477
- "step": 690
478
- },
479
- {
480
- "epoch": 1.14,
481
- "learning_rate": 0.0002834184239733629,
482
- "loss": 0.2775,
483
- "step": 700
484
- },
485
- {
486
- "epoch": 1.14,
487
- "eval_loss": 0.2055242359638214,
488
- "eval_runtime": 1053.2634,
489
- "eval_samples_per_second": 11.022,
490
- "eval_steps_per_second": 0.689,
491
- "eval_wer": 0.3768644802262464,
492
- "step": 700
493
- },
494
- {
495
- "epoch": 1.16,
496
- "learning_rate": 0.00028308546059933404,
497
- "loss": 0.2878,
498
- "step": 710
499
- },
500
- {
501
- "epoch": 1.17,
502
- "learning_rate": 0.0002827524972253052,
503
- "loss": 0.307,
504
- "step": 720
505
- },
506
- {
507
- "epoch": 1.19,
508
- "learning_rate": 0.00028241953385127634,
509
- "loss": 0.2848,
510
- "step": 730
511
- },
512
- {
513
- "epoch": 1.2,
514
- "learning_rate": 0.00028208657047724744,
515
- "loss": 0.2744,
516
- "step": 740
517
- },
518
- {
519
- "epoch": 1.22,
520
- "learning_rate": 0.00028175360710321865,
521
- "loss": 0.2839,
522
- "step": 750
523
- },
524
- {
525
- "epoch": 1.24,
526
- "learning_rate": 0.0002814206437291898,
527
- "loss": 0.2888,
528
- "step": 760
529
- },
530
- {
531
- "epoch": 1.25,
532
- "learning_rate": 0.0002810876803551609,
533
- "loss": 0.288,
534
- "step": 770
535
- },
536
- {
537
- "epoch": 1.27,
538
- "learning_rate": 0.00028075471698113205,
539
- "loss": 0.2559,
540
- "step": 780
541
- },
542
- {
543
- "epoch": 1.29,
544
- "learning_rate": 0.0002804217536071032,
545
- "loss": 0.2498,
546
- "step": 790
547
- },
548
- {
549
- "epoch": 1.3,
550
- "learning_rate": 0.00028008879023307435,
551
- "loss": 0.2554,
552
- "step": 800
553
- },
554
- {
555
- "epoch": 1.3,
556
- "eval_loss": 0.19498416781425476,
557
- "eval_runtime": 1045.8615,
558
- "eval_samples_per_second": 11.1,
559
- "eval_steps_per_second": 0.694,
560
- "eval_wer": 0.360272316744971,
561
- "step": 800
562
- },
563
- {
564
- "epoch": 1.32,
565
- "learning_rate": 0.00027975582685904545,
566
- "loss": 0.3059,
567
- "step": 810
568
- },
569
- {
570
- "epoch": 1.33,
571
- "learning_rate": 0.0002794228634850166,
572
- "loss": 0.281,
573
- "step": 820
574
- },
575
- {
576
- "epoch": 1.35,
577
- "learning_rate": 0.00027908990011098775,
578
- "loss": 0.2642,
579
- "step": 830
580
- },
581
- {
582
- "epoch": 1.37,
583
- "learning_rate": 0.0002787569367369589,
584
- "loss": 0.242,
585
- "step": 840
586
- },
587
- {
588
- "epoch": 1.38,
589
- "learning_rate": 0.00027842397336293005,
590
- "loss": 0.2541,
591
- "step": 850
592
- },
593
- {
594
- "epoch": 1.4,
595
- "learning_rate": 0.0002780910099889012,
596
- "loss": 0.2614,
597
- "step": 860
598
- },
599
- {
600
- "epoch": 1.42,
601
- "learning_rate": 0.00027775804661487236,
602
- "loss": 0.3001,
603
- "step": 870
604
- },
605
- {
606
- "epoch": 1.43,
607
- "learning_rate": 0.0002774250832408435,
608
- "loss": 0.2365,
609
- "step": 880
610
- },
611
- {
612
- "epoch": 1.45,
613
- "learning_rate": 0.0002770921198668146,
614
- "loss": 0.2373,
615
- "step": 890
616
- },
617
- {
618
- "epoch": 1.46,
619
- "learning_rate": 0.00027675915649278575,
620
- "loss": 0.263,
621
- "step": 900
622
- },
623
- {
624
- "epoch": 1.46,
625
- "eval_loss": 0.1812964379787445,
626
- "eval_runtime": 1053.0871,
627
- "eval_samples_per_second": 11.024,
628
- "eval_steps_per_second": 0.689,
629
- "eval_wer": 0.3371687269078137,
630
- "step": 900
631
- },
632
- {
633
- "epoch": 1.48,
634
- "learning_rate": 0.0002764261931187569,
635
- "loss": 0.289,
636
- "step": 910
637
- },
638
- {
639
- "epoch": 1.5,
640
- "learning_rate": 0.00027609322974472806,
641
- "loss": 0.2578,
642
- "step": 920
643
- },
644
- {
645
- "epoch": 1.51,
646
- "learning_rate": 0.0002757602663706992,
647
- "loss": 0.2565,
648
- "step": 930
649
- },
650
- {
651
- "epoch": 1.53,
652
- "learning_rate": 0.00027542730299667036,
653
- "loss": 0.2543,
654
- "step": 940
655
- },
656
- {
657
- "epoch": 1.55,
658
- "learning_rate": 0.0002750943396226415,
659
- "loss": 0.2404,
660
- "step": 950
661
- },
662
- {
663
- "epoch": 1.56,
664
- "learning_rate": 0.0002747613762486126,
665
- "loss": 0.2291,
666
- "step": 960
667
- },
668
- {
669
- "epoch": 1.58,
670
- "learning_rate": 0.00027442841287458376,
671
- "loss": 0.2636,
672
- "step": 970
673
- },
674
- {
675
- "epoch": 1.59,
676
- "learning_rate": 0.0002740954495005549,
677
- "loss": 0.2222,
678
- "step": 980
679
- },
680
- {
681
- "epoch": 1.61,
682
- "learning_rate": 0.00027376248612652606,
683
- "loss": 0.2322,
684
- "step": 990
685
- },
686
- {
687
- "epoch": 1.63,
688
- "learning_rate": 0.0002734295227524972,
689
- "loss": 0.2294,
690
- "step": 1000
691
- },
692
- {
693
- "epoch": 1.63,
694
- "eval_loss": 0.1664419174194336,
695
- "eval_runtime": 1047.59,
696
- "eval_samples_per_second": 11.082,
697
- "eval_steps_per_second": 0.693,
698
- "eval_wer": 0.3131756602654746,
699
- "step": 1000
700
- },
701
- {
702
- "epoch": 1.64,
703
- "learning_rate": 0.00027309655937846837,
704
- "loss": 0.249,
705
- "step": 1010
706
- },
707
- {
708
- "epoch": 1.66,
709
- "learning_rate": 0.0002727635960044395,
710
- "loss": 0.2682,
711
- "step": 1020
712
- },
713
- {
714
- "epoch": 1.68,
715
- "learning_rate": 0.0002724306326304106,
716
- "loss": 0.2242,
717
- "step": 1030
718
- },
719
- {
720
- "epoch": 1.69,
721
- "learning_rate": 0.00027209766925638177,
722
- "loss": 0.2333,
723
- "step": 1040
724
- },
725
- {
726
- "epoch": 1.71,
727
- "learning_rate": 0.0002717647058823529,
728
- "loss": 0.2188,
729
- "step": 1050
730
- },
731
- {
732
- "epoch": 1.72,
733
- "learning_rate": 0.00027143174250832407,
734
- "loss": 0.2592,
735
- "step": 1060
736
- },
737
- {
738
- "epoch": 1.74,
739
- "learning_rate": 0.0002710987791342952,
740
- "loss": 0.268,
741
- "step": 1070
742
- },
743
- {
744
- "epoch": 1.76,
745
- "learning_rate": 0.0002707658157602663,
746
- "loss": 0.2091,
747
- "step": 1080
748
- },
749
- {
750
- "epoch": 1.77,
751
- "learning_rate": 0.00027043285238623747,
752
- "loss": 0.2069,
753
- "step": 1090
754
- },
755
- {
756
- "epoch": 1.79,
757
- "learning_rate": 0.0002700998890122087,
758
- "loss": 0.2296,
759
- "step": 1100
760
- },
761
- {
762
- "epoch": 1.79,
763
- "eval_loss": 0.15650752186775208,
764
- "eval_runtime": 1054.82,
765
- "eval_samples_per_second": 11.006,
766
- "eval_steps_per_second": 0.688,
767
- "eval_wer": 0.2962413903206678,
768
- "step": 1100
769
- },
770
- {
771
- "epoch": 1.81,
772
- "learning_rate": 0.00026976692563817977,
773
- "loss": 0.2402,
774
- "step": 1110
775
- },
776
- {
777
- "epoch": 1.82,
778
- "learning_rate": 0.0002694339622641509,
779
- "loss": 0.2258,
780
- "step": 1120
781
- },
782
- {
783
- "epoch": 1.84,
784
- "learning_rate": 0.0002691009988901221,
785
- "loss": 0.2215,
786
- "step": 1130
787
- },
788
- {
789
- "epoch": 1.86,
790
- "learning_rate": 0.0002687680355160932,
791
- "loss": 0.2059,
792
- "step": 1140
793
- },
794
- {
795
- "epoch": 1.87,
796
- "learning_rate": 0.0002684350721420643,
797
- "loss": 0.2022,
798
- "step": 1150
799
- },
800
- {
801
- "epoch": 1.89,
802
- "learning_rate": 0.0002681021087680355,
803
- "loss": 0.2261,
804
- "step": 1160
805
- },
806
- {
807
- "epoch": 1.9,
808
- "learning_rate": 0.0002677691453940066,
809
- "loss": 0.2132,
810
- "step": 1170
811
- },
812
- {
813
- "epoch": 1.92,
814
- "learning_rate": 0.0002674361820199778,
815
- "loss": 0.2245,
816
- "step": 1180
817
- },
818
- {
819
- "epoch": 1.94,
820
- "learning_rate": 0.00026710321864594893,
821
- "loss": 0.2133,
822
- "step": 1190
823
- },
824
- {
825
- "epoch": 1.95,
826
- "learning_rate": 0.0002667702552719201,
827
- "loss": 0.2183,
828
- "step": 1200
829
- },
830
- {
831
- "epoch": 1.95,
832
- "eval_loss": 0.14735129475593567,
833
- "eval_runtime": 1059.1579,
834
- "eval_samples_per_second": 10.961,
835
- "eval_steps_per_second": 0.685,
836
- "eval_wer": 0.29856771427268164,
837
- "step": 1200
838
- },
839
- {
840
- "epoch": 1.97,
841
- "learning_rate": 0.00026643729189789123,
842
- "loss": 0.2204,
843
- "step": 1210
844
- },
845
- {
846
- "epoch": 1.99,
847
- "learning_rate": 0.00026610432852386233,
848
- "loss": 0.2482,
849
- "step": 1220
850
- },
851
- {
852
- "epoch": 2.0,
853
- "learning_rate": 0.0002657713651498335,
854
- "loss": 0.2214,
855
- "step": 1230
856
- },
857
- {
858
- "epoch": 2.02,
859
- "learning_rate": 0.00026543840177580463,
860
- "loss": 0.1779,
861
- "step": 1240
862
- },
863
- {
864
- "epoch": 2.03,
865
- "learning_rate": 0.0002651054384017758,
866
- "loss": 0.1688,
867
- "step": 1250
868
- },
869
- {
870
- "epoch": 2.05,
871
- "learning_rate": 0.00026477247502774693,
872
- "loss": 0.1915,
873
- "step": 1260
874
- },
875
- {
876
- "epoch": 2.07,
877
- "learning_rate": 0.0002644395116537181,
878
- "loss": 0.1942,
879
- "step": 1270
880
- },
881
- {
882
- "epoch": 2.08,
883
- "learning_rate": 0.00026410654827968924,
884
- "loss": 0.2266,
885
- "step": 1280
886
- },
887
- {
888
- "epoch": 2.1,
889
- "learning_rate": 0.0002637735849056604,
890
- "loss": 0.1931,
891
- "step": 1290
892
- },
893
- {
894
- "epoch": 2.12,
895
- "learning_rate": 0.0002634406215316315,
896
- "loss": 0.1822,
897
- "step": 1300
898
- },
899
- {
900
- "epoch": 2.12,
901
- "eval_loss": 0.1546158790588379,
902
- "eval_runtime": 1062.5837,
903
- "eval_samples_per_second": 10.925,
904
- "eval_steps_per_second": 0.683,
905
- "eval_wer": 0.2810974775350089,
906
- "step": 1300
907
- },
908
- {
909
- "epoch": 2.13,
910
- "learning_rate": 0.00026310765815760264,
911
- "loss": 0.1735,
912
- "step": 1310
913
- },
914
- {
915
- "epoch": 2.15,
916
- "learning_rate": 0.0002627746947835738,
917
- "loss": 0.2042,
918
- "step": 1320
919
- },
920
- {
921
- "epoch": 2.16,
922
- "learning_rate": 0.00026244173140954494,
923
- "loss": 0.2347,
924
- "step": 1330
925
- },
926
- {
927
- "epoch": 2.18,
928
- "learning_rate": 0.00026210876803551604,
929
- "loss": 0.1816,
930
- "step": 1340
931
- },
932
- {
933
- "epoch": 2.2,
934
- "learning_rate": 0.0002617758046614872,
935
- "loss": 0.1838,
936
- "step": 1350
937
- },
938
- {
939
- "epoch": 2.21,
940
- "learning_rate": 0.0002614428412874584,
941
- "loss": 0.1856,
942
- "step": 1360
943
- },
944
- {
945
- "epoch": 2.23,
946
- "learning_rate": 0.0002611098779134295,
947
- "loss": 0.1866,
948
- "step": 1370
949
- },
950
- {
951
- "epoch": 2.25,
952
- "learning_rate": 0.00026077691453940064,
953
- "loss": 0.2016,
954
- "step": 1380
955
- },
956
- {
957
- "epoch": 2.26,
958
- "learning_rate": 0.0002604439511653718,
959
- "loss": 0.1813,
960
- "step": 1390
961
- },
962
- {
963
- "epoch": 2.28,
964
- "learning_rate": 0.00026011098779134294,
965
- "loss": 0.1798,
966
- "step": 1400
967
- },
968
- {
969
- "epoch": 2.28,
970
- "eval_loss": 0.14420565962791443,
971
- "eval_runtime": 1057.3911,
972
- "eval_samples_per_second": 10.979,
973
- "eval_steps_per_second": 0.687,
974
- "eval_wer": 0.28108607398622454,
975
- "step": 1400
976
- },
977
- {
978
- "epoch": 2.29,
979
- "learning_rate": 0.00025977802441731404,
980
- "loss": 0.184,
981
- "step": 1410
982
- },
983
- {
984
- "epoch": 2.31,
985
- "learning_rate": 0.0002594450610432852,
986
- "loss": 0.19,
987
- "step": 1420
988
- },
989
- {
990
- "epoch": 2.33,
991
- "learning_rate": 0.00025911209766925634,
992
- "loss": 0.1982,
993
- "step": 1430
994
- },
995
- {
996
- "epoch": 2.34,
997
- "learning_rate": 0.0002587791342952275,
998
- "loss": 0.1627,
999
- "step": 1440
1000
- },
1001
- {
1002
- "epoch": 2.36,
1003
- "learning_rate": 0.00025844617092119865,
1004
- "loss": 0.1914,
1005
- "step": 1450
1006
- },
1007
- {
1008
- "epoch": 2.38,
1009
- "learning_rate": 0.0002581132075471698,
1010
- "loss": 0.171,
1011
- "step": 1460
1012
- },
1013
- {
1014
- "epoch": 2.39,
1015
- "learning_rate": 0.00025778024417314095,
1016
- "loss": 0.1653,
1017
- "step": 1470
1018
- },
1019
- {
1020
- "epoch": 2.41,
1021
- "learning_rate": 0.0002574472807991121,
1022
- "loss": 0.2263,
1023
- "step": 1480
1024
- },
1025
- {
1026
- "epoch": 2.42,
1027
- "learning_rate": 0.0002571143174250832,
1028
- "loss": 0.1801,
1029
- "step": 1490
1030
- },
1031
- {
1032
- "epoch": 2.44,
1033
- "learning_rate": 0.00025678135405105435,
1034
- "loss": 0.179,
1035
- "step": 1500
1036
- },
1037
- {
1038
- "epoch": 2.44,
1039
- "eval_loss": 0.14106744527816772,
1040
- "eval_runtime": 1059.0138,
1041
- "eval_samples_per_second": 10.962,
1042
- "eval_steps_per_second": 0.686,
1043
- "eval_wer": 0.26864480226246407,
1044
- "step": 1500
1045
- },
1046
- {
1047
- "epoch": 2.46,
1048
- "learning_rate": 0.0002564483906770255,
1049
- "loss": 0.1718,
1050
- "step": 1510
1051
- },
1052
- {
1053
- "epoch": 2.47,
1054
- "learning_rate": 0.00025611542730299665,
1055
- "loss": 0.1659,
1056
- "step": 1520
1057
- },
1058
- {
1059
- "epoch": 2.49,
1060
- "learning_rate": 0.0002557824639289678,
1061
- "loss": 0.2146,
1062
- "step": 1530
1063
- },
1064
- {
1065
- "epoch": 2.51,
1066
- "learning_rate": 0.00025544950055493896,
1067
- "loss": 0.1789,
1068
- "step": 1540
1069
- },
1070
- {
1071
- "epoch": 2.52,
1072
- "learning_rate": 0.0002551165371809101,
1073
- "loss": 0.18,
1074
- "step": 1550
1075
- },
1076
- {
1077
- "epoch": 2.54,
1078
- "learning_rate": 0.0002547835738068812,
1079
- "loss": 0.176,
1080
- "step": 1560
1081
- },
1082
- {
1083
- "epoch": 2.55,
1084
- "learning_rate": 0.00025445061043285236,
1085
- "loss": 0.1789,
1086
- "step": 1570
1087
- },
1088
- {
1089
- "epoch": 2.57,
1090
- "learning_rate": 0.0002541176470588235,
1091
- "loss": 0.2318,
1092
- "step": 1580
1093
- },
1094
- {
1095
- "epoch": 2.59,
1096
- "learning_rate": 0.00025378468368479466,
1097
- "loss": 0.161,
1098
- "step": 1590
1099
- },
1100
- {
1101
- "epoch": 2.6,
1102
- "learning_rate": 0.00025345172031076576,
1103
- "loss": 0.1593,
1104
- "step": 1600
1105
- },
1106
- {
1107
- "epoch": 2.6,
1108
- "eval_loss": 0.14078468084335327,
1109
- "eval_runtime": 1052.8874,
1110
- "eval_samples_per_second": 11.026,
1111
- "eval_steps_per_second": 0.69,
1112
- "eval_wer": 0.2739132418008484,
1113
- "step": 1600
1114
- },
1115
- {
1116
- "epoch": 2.62,
1117
- "learning_rate": 0.0002531187569367369,
1118
- "loss": 5.5922,
1119
- "step": 1610
1120
- },
1121
- {
1122
- "epoch": 2.64,
1123
- "learning_rate": 0.0002527857935627081,
1124
- "loss": 3.2059,
1125
- "step": 1620
1126
- },
1127
- {
1128
- "epoch": 2.65,
1129
- "learning_rate": 0.0002524528301886792,
1130
- "loss": 3.0426,
1131
- "step": 1630
1132
- },
1133
- {
1134
- "epoch": 2.67,
1135
- "learning_rate": 0.00025211986681465036,
1136
- "loss": 2.9869,
1137
- "step": 1640
1138
- },
1139
- {
1140
- "epoch": 2.69,
1141
- "learning_rate": 0.0002517869034406215,
1142
- "loss": 2.936,
1143
- "step": 1650
1144
- },
1145
- {
1146
- "epoch": 2.7,
1147
- "learning_rate": 0.00025145394006659266,
1148
- "loss": 2.5037,
1149
- "step": 1660
1150
- },
1151
- {
1152
- "epoch": 2.72,
1153
- "learning_rate": 0.0002511209766925638,
1154
- "loss": 1.297,
1155
- "step": 1670
1156
- },
1157
- {
1158
- "epoch": 2.74,
1159
- "learning_rate": 0.0002507880133185349,
1160
- "loss": 0.56,
1161
- "step": 1680
1162
- },
1163
- {
1164
- "epoch": 2.75,
1165
- "learning_rate": 0.00025045504994450606,
1166
- "loss": 0.3262,
1167
- "step": 1690
1168
- },
1169
- {
1170
- "epoch": 2.77,
1171
- "learning_rate": 0.0002501220865704772,
1172
- "loss": 0.2652,
1173
- "step": 1700
1174
- },
1175
- {
1176
- "epoch": 2.77,
1177
- "eval_loss": 0.20742562413215637,
1178
- "eval_runtime": 1056.4173,
1179
- "eval_samples_per_second": 10.989,
1180
- "eval_steps_per_second": 0.687,
1181
- "eval_wer": 0.4499384208365643,
1182
- "step": 1700
1183
- },
1184
- {
1185
- "epoch": 2.78,
1186
- "learning_rate": 0.00024978912319644837,
1187
- "loss": 0.2585,
1188
- "step": 1710
1189
- },
1190
- {
1191
- "epoch": 2.8,
1192
- "learning_rate": 0.0002494561598224195,
1193
- "loss": 0.2722,
1194
- "step": 1720
1195
- },
1196
- {
1197
- "epoch": 2.82,
1198
- "learning_rate": 0.00024912319644839067,
1199
- "loss": 0.2571,
1200
- "step": 1730
1201
- },
1202
- {
1203
- "epoch": 2.83,
1204
- "learning_rate": 0.0002487902330743618,
1205
- "loss": 0.2256,
1206
- "step": 1740
1207
- },
1208
- {
1209
- "epoch": 2.85,
1210
- "learning_rate": 0.0002484572697003329,
1211
- "loss": 0.2176,
1212
- "step": 1750
1213
- },
1214
- {
1215
- "epoch": 2.87,
1216
- "learning_rate": 0.00024812430632630407,
1217
- "loss": 0.1954,
1218
- "step": 1760
1219
- },
1220
- {
1221
- "epoch": 2.88,
1222
- "learning_rate": 0.0002477913429522752,
1223
- "loss": 0.2099,
1224
- "step": 1770
1225
- },
1226
- {
1227
- "epoch": 2.9,
1228
- "learning_rate": 0.00024745837957824637,
1229
- "loss": 0.2316,
1230
- "step": 1780
1231
- },
1232
- {
1233
- "epoch": 2.91,
1234
- "learning_rate": 0.0002471254162042175,
1235
- "loss": 0.1814,
1236
- "step": 1790
1237
- },
1238
- {
1239
- "epoch": 2.93,
1240
- "learning_rate": 0.0002467924528301887,
1241
- "loss": 0.1834,
1242
- "step": 1800
1243
- },
1244
- {
1245
- "epoch": 2.93,
1246
- "eval_loss": 0.15701305866241455,
1247
- "eval_runtime": 1047.7925,
1248
- "eval_samples_per_second": 11.079,
1249
- "eval_steps_per_second": 0.693,
1250
- "eval_wer": 0.39416366373215345,
1251
- "step": 1800
1252
- },
1253
- {
1254
- "epoch": 2.95,
1255
- "learning_rate": 0.0002464594894561598,
1256
- "loss": 0.2047,
1257
- "step": 1810
1258
- },
1259
- {
1260
- "epoch": 2.96,
1261
- "learning_rate": 0.0002461265260821309,
1262
- "loss": 0.2051,
1263
- "step": 1820
1264
- },
1265
- {
1266
- "epoch": 2.98,
1267
- "learning_rate": 0.0002457935627081021,
1268
- "loss": 0.2246,
1269
- "step": 1830
1270
- },
1271
- {
1272
- "epoch": 3.0,
1273
- "learning_rate": 0.0002454605993340732,
1274
- "loss": 0.1797,
1275
- "step": 1840
1276
- },
1277
- {
1278
- "epoch": 3.01,
1279
- "learning_rate": 0.0002451276359600444,
1280
- "loss": 0.2112,
1281
- "step": 1850
1282
- },
1283
- {
1284
- "epoch": 3.03,
1285
- "learning_rate": 0.00024479467258601553,
1286
- "loss": 0.1914,
1287
- "step": 1860
1288
- },
1289
- {
1290
- "epoch": 3.04,
1291
- "learning_rate": 0.0002444617092119866,
1292
- "loss": 0.1761,
1293
- "step": 1870
1294
- },
1295
- {
1296
- "epoch": 3.06,
1297
- "learning_rate": 0.0002441287458379578,
1298
- "loss": 0.1917,
1299
- "step": 1880
1300
- },
1301
- {
1302
- "epoch": 3.08,
1303
- "learning_rate": 0.00024379578246392896,
1304
- "loss": 0.2106,
1305
- "step": 1890
1306
- },
1307
- {
1308
- "epoch": 3.09,
1309
- "learning_rate": 0.0002434628190899001,
1310
- "loss": 0.2015,
1311
- "step": 1900
1312
- },
1313
- {
1314
- "epoch": 3.09,
1315
- "eval_loss": 0.15158045291900635,
1316
- "eval_runtime": 1057.6306,
1317
- "eval_samples_per_second": 10.976,
1318
- "eval_steps_per_second": 0.686,
1319
- "eval_wer": 0.38587328376590796,
1320
- "step": 1900
1321
- },
1322
- {
1323
- "epoch": 3.11,
1324
- "learning_rate": 0.00024312985571587123,
1325
- "loss": 0.1756,
1326
- "step": 1910
1327
- },
1328
- {
1329
- "epoch": 3.13,
1330
- "learning_rate": 0.00024279689234184238,
1331
- "loss": 0.1733,
1332
- "step": 1920
1333
- },
1334
- {
1335
- "epoch": 3.14,
1336
- "learning_rate": 0.0002424639289678135,
1337
- "loss": 0.1805,
1338
- "step": 1930
1339
- },
1340
- {
1341
- "epoch": 3.16,
1342
- "learning_rate": 0.00024213096559378466,
1343
- "loss": 0.181,
1344
- "step": 1940
1345
- },
1346
- {
1347
- "epoch": 3.17,
1348
- "learning_rate": 0.00024179800221975578,
1349
- "loss": 0.1701,
1350
- "step": 1950
1351
- },
1352
- {
1353
- "epoch": 3.19,
1354
- "learning_rate": 0.00024146503884572693,
1355
- "loss": 0.1894,
1356
- "step": 1960
1357
- },
1358
- {
1359
- "epoch": 3.21,
1360
- "learning_rate": 0.0002411320754716981,
1361
- "loss": 0.1749,
1362
- "step": 1970
1363
- },
1364
- {
1365
- "epoch": 3.22,
1366
- "learning_rate": 0.00024079911209766924,
1367
- "loss": 0.1559,
1368
- "step": 1980
1369
- },
1370
- {
1371
- "epoch": 3.24,
1372
- "learning_rate": 0.0002404661487236404,
1373
- "loss": 0.2001,
1374
- "step": 1990
1375
- },
1376
- {
1377
- "epoch": 3.26,
1378
- "learning_rate": 0.0002401331853496115,
1379
- "loss": 0.1696,
1380
- "step": 2000
1381
- },
1382
- {
1383
- "epoch": 3.26,
1384
- "eval_loss": 0.14519302546977997,
1385
- "eval_runtime": 1046.4919,
1386
- "eval_samples_per_second": 11.093,
1387
- "eval_steps_per_second": 0.694,
1388
- "eval_wer": 0.3826004652647904,
1389
- "step": 2000
1390
- },
1391
- {
1392
- "epoch": 3.27,
1393
- "learning_rate": 0.00023980022197558266,
1394
- "loss": 0.1714,
1395
- "step": 2010
1396
- },
1397
- {
1398
- "epoch": 3.29,
1399
- "learning_rate": 0.0002394672586015538,
1400
- "loss": 0.1527,
1401
- "step": 2020
1402
- },
1403
- {
1404
- "epoch": 3.31,
1405
- "learning_rate": 0.00023913429522752494,
1406
- "loss": 0.1589,
1407
- "step": 2030
1408
- },
1409
- {
1410
- "epoch": 3.32,
1411
- "learning_rate": 0.0002388013318534961,
1412
- "loss": 0.1786,
1413
- "step": 2040
1414
- },
1415
- {
1416
- "epoch": 3.34,
1417
- "learning_rate": 0.00023846836847946724,
1418
- "loss": 0.1958,
1419
- "step": 2050
1420
- },
1421
- {
1422
- "epoch": 3.35,
1423
- "learning_rate": 0.0002381354051054384,
1424
- "loss": 0.1671,
1425
- "step": 2060
1426
- },
1427
- {
1428
- "epoch": 3.37,
1429
- "learning_rate": 0.00023780244173140955,
1430
- "loss": 0.177,
1431
- "step": 2070
1432
- },
1433
- {
1434
- "epoch": 3.39,
1435
- "learning_rate": 0.00023746947835738067,
1436
- "loss": 0.1618,
1437
- "step": 2080
1438
- },
1439
- {
1440
- "epoch": 3.4,
1441
- "learning_rate": 0.00023713651498335182,
1442
- "loss": 0.1859,
1443
- "step": 2090
1444
- },
1445
- {
1446
- "epoch": 3.42,
1447
- "learning_rate": 0.00023680355160932295,
1448
- "loss": 0.1782,
1449
- "step": 2100
1450
- },
1451
- {
1452
- "epoch": 3.42,
1453
- "eval_loss": 0.14127187430858612,
1454
- "eval_runtime": 1047.734,
1455
- "eval_samples_per_second": 11.08,
1456
- "eval_steps_per_second": 0.693,
1457
- "eval_wer": 0.3763285134333805,
1458
- "step": 2100
1459
- },
1460
- {
1461
- "epoch": 3.44,
1462
- "learning_rate": 0.0002364705882352941,
1463
- "loss": 0.1563,
1464
- "step": 2110
1465
- },
1466
- {
1467
- "epoch": 3.45,
1468
- "learning_rate": 0.00023613762486126522,
1469
- "loss": 0.159,
1470
- "step": 2120
1471
- },
1472
- {
1473
- "epoch": 3.47,
1474
- "learning_rate": 0.00023580466148723637,
1475
- "loss": 0.1781,
1476
- "step": 2130
1477
- },
1478
- {
1479
- "epoch": 3.48,
1480
- "learning_rate": 0.00023547169811320755,
1481
- "loss": 0.1841,
1482
- "step": 2140
1483
- },
1484
- {
1485
- "epoch": 3.5,
1486
- "learning_rate": 0.00023513873473917868,
1487
- "loss": 0.1755,
1488
- "step": 2150
1489
- },
1490
- {
1491
- "epoch": 3.52,
1492
- "learning_rate": 0.00023480577136514983,
1493
- "loss": 0.153,
1494
- "step": 2160
1495
- },
1496
- {
1497
- "epoch": 3.53,
1498
- "learning_rate": 0.00023447280799112095,
1499
- "loss": 0.151,
1500
- "step": 2170
1501
- },
1502
- {
1503
- "epoch": 3.55,
1504
- "learning_rate": 0.0002341398446170921,
1505
- "loss": 0.1691,
1506
- "step": 2180
1507
- },
1508
- {
1509
- "epoch": 3.57,
1510
- "learning_rate": 0.00023380688124306323,
1511
- "loss": 0.1685,
1512
- "step": 2190
1513
- },
1514
- {
1515
- "epoch": 3.58,
1516
- "learning_rate": 0.00023347391786903438,
1517
- "loss": 0.1636,
1518
- "step": 2200
1519
- },
1520
- {
1521
- "epoch": 3.58,
1522
- "eval_loss": 0.13500617444515228,
1523
- "eval_runtime": 1049.8895,
1524
- "eval_samples_per_second": 11.057,
1525
- "eval_steps_per_second": 0.692,
1526
- "eval_wer": 0.3761004424576928,
1527
- "step": 2200
1528
- },
1529
- {
1530
- "epoch": 3.6,
1531
- "learning_rate": 0.00023314095449500553,
1532
- "loss": 0.1803,
1533
- "step": 2210
1534
- },
1535
- {
1536
- "epoch": 3.61,
1537
- "learning_rate": 0.00023280799112097665,
1538
- "loss": 0.15,
1539
- "step": 2220
1540
- },
1541
- {
1542
- "epoch": 3.63,
1543
- "learning_rate": 0.00023247502774694783,
1544
- "loss": 0.1721,
1545
- "step": 2230
1546
- },
1547
- {
1548
- "epoch": 3.65,
1549
- "learning_rate": 0.00023214206437291896,
1550
- "loss": 0.1691,
1551
- "step": 2240
1552
- },
1553
- {
1554
- "epoch": 3.66,
1555
- "learning_rate": 0.0002318091009988901,
1556
- "loss": 0.1548,
1557
- "step": 2250
1558
- },
1559
- {
1560
- "epoch": 3.68,
1561
- "learning_rate": 0.00023147613762486126,
1562
- "loss": 0.1616,
1563
- "step": 2260
1564
- },
1565
- {
1566
- "epoch": 3.7,
1567
- "learning_rate": 0.00023114317425083238,
1568
- "loss": 0.159,
1569
- "step": 2270
1570
- },
1571
- {
1572
- "epoch": 3.71,
1573
- "learning_rate": 0.00023081021087680353,
1574
- "loss": 0.1483,
1575
- "step": 2280
1576
- },
1577
- {
1578
- "epoch": 3.73,
1579
- "learning_rate": 0.00023047724750277466,
1580
- "loss": 0.1693,
1581
- "step": 2290
1582
- },
1583
- {
1584
- "epoch": 3.74,
1585
- "learning_rate": 0.0002301442841287458,
1586
- "loss": 0.173,
1587
- "step": 2300
1588
- },
1589
- {
1590
- "epoch": 3.74,
1591
- "eval_loss": 0.13234160840511322,
1592
- "eval_runtime": 1044.2826,
1593
- "eval_samples_per_second": 11.117,
1594
- "eval_steps_per_second": 0.695,
1595
- "eval_wer": 0.3621653058431784,
1596
- "step": 2300
1597
- },
1598
- {
1599
- "epoch": 3.76,
1600
- "learning_rate": 0.000229811320754717,
1601
- "loss": 0.1488,
1602
- "step": 2310
1603
- },
1604
- {
1605
- "epoch": 3.78,
1606
- "learning_rate": 0.0002294783573806881,
1607
- "loss": 0.1491,
1608
- "step": 2320
1609
- },
1610
- {
1611
- "epoch": 3.79,
1612
- "learning_rate": 0.00022914539400665926,
1613
- "loss": 0.1632,
1614
- "step": 2330
1615
- },
1616
- {
1617
- "epoch": 3.81,
1618
- "learning_rate": 0.0002288124306326304,
1619
- "loss": 0.1733,
1620
- "step": 2340
1621
- },
1622
- {
1623
- "epoch": 3.83,
1624
- "learning_rate": 0.00022847946725860154,
1625
- "loss": 0.1648,
1626
- "step": 2350
1627
- },
1628
- {
1629
- "epoch": 3.84,
1630
- "learning_rate": 0.00022814650388457266,
1631
- "loss": 0.1513,
1632
- "step": 2360
1633
- },
1634
- {
1635
- "epoch": 3.86,
1636
- "learning_rate": 0.00022781354051054382,
1637
- "loss": 0.1602,
1638
- "step": 2370
1639
- },
1640
- {
1641
- "epoch": 3.87,
1642
- "learning_rate": 0.00022748057713651494,
1643
- "loss": 0.1528,
1644
- "step": 2380
1645
- },
1646
- {
1647
- "epoch": 3.89,
1648
- "learning_rate": 0.0002271476137624861,
1649
- "loss": 0.1688,
1650
- "step": 2390
1651
- },
1652
- {
1653
- "epoch": 3.91,
1654
- "learning_rate": 0.00022681465038845727,
1655
- "loss": 0.1704,
1656
- "step": 2400
1657
- },
1658
- {
1659
- "epoch": 3.91,
1660
- "eval_loss": 0.12890243530273438,
1661
- "eval_runtime": 1049.8266,
1662
- "eval_samples_per_second": 11.058,
1663
- "eval_steps_per_second": 0.692,
1664
- "eval_wer": 0.364423208502486,
1665
- "step": 2400
1666
- },
1667
- {
1668
- "epoch": 3.92,
1669
- "learning_rate": 0.0002264816870144284,
1670
- "loss": 0.1427,
1671
- "step": 2410
1672
- },
1673
- {
1674
- "epoch": 3.94,
1675
- "learning_rate": 0.00022614872364039955,
1676
- "loss": 0.1585,
1677
- "step": 2420
1678
- },
1679
- {
1680
- "epoch": 3.96,
1681
- "learning_rate": 0.00022581576026637067,
1682
- "loss": 0.1504,
1683
- "step": 2430
1684
- },
1685
- {
1686
- "epoch": 3.97,
1687
- "learning_rate": 0.00022548279689234182,
1688
- "loss": 0.1819,
1689
- "step": 2440
1690
- },
1691
- {
1692
- "epoch": 3.99,
1693
- "learning_rate": 0.00022514983351831297,
1694
- "loss": 0.1679,
1695
- "step": 2450
1696
- },
1697
- {
1698
- "epoch": 4.0,
1699
- "learning_rate": 0.0002248168701442841,
1700
- "loss": 0.1619,
1701
- "step": 2460
1702
- },
1703
- {
1704
- "epoch": 4.02,
1705
- "learning_rate": 0.00022448390677025525,
1706
- "loss": 0.1288,
1707
- "step": 2470
1708
- },
1709
- {
1710
- "epoch": 4.04,
1711
- "learning_rate": 0.00022415094339622637,
1712
- "loss": 0.1288,
1713
- "step": 2480
1714
- },
1715
- {
1716
- "epoch": 4.05,
1717
- "learning_rate": 0.00022381798002219755,
1718
- "loss": 0.1465,
1719
- "step": 2490
1720
- },
1721
- {
1722
- "epoch": 4.07,
1723
- "learning_rate": 0.0002234850166481687,
1724
- "loss": 0.1418,
1725
- "step": 2500
1726
- },
1727
- {
1728
- "epoch": 4.07,
1729
- "eval_loss": 0.12660406529903412,
1730
- "eval_runtime": 1055.0446,
1731
- "eval_samples_per_second": 11.003,
1732
- "eval_steps_per_second": 0.688,
1733
- "eval_wer": 0.34805911599689826,
1734
- "step": 2500
1735
- },
1736
- {
1737
- "epoch": 4.09,
1738
- "learning_rate": 0.00022315205327413983,
1739
- "loss": 0.1618,
1740
- "step": 2510
1741
- },
1742
- {
1743
- "epoch": 4.1,
1744
- "learning_rate": 0.00022281908990011098,
1745
- "loss": 0.1337,
1746
- "step": 2520
1747
- },
1748
- {
1749
- "epoch": 4.12,
1750
- "learning_rate": 0.0002224861265260821,
1751
- "loss": 0.1545,
1752
- "step": 2530
1753
- },
1754
- {
1755
- "epoch": 4.14,
1756
- "learning_rate": 0.00022215316315205325,
1757
- "loss": 0.1343,
1758
- "step": 2540
1759
- },
1760
- {
1761
- "epoch": 4.15,
1762
- "learning_rate": 0.00022182019977802438,
1763
- "loss": 0.1309,
1764
- "step": 2550
1765
- },
1766
- {
1767
- "epoch": 4.17,
1768
- "learning_rate": 0.00022148723640399553,
1769
- "loss": 0.1449,
1770
- "step": 2560
1771
- },
1772
- {
1773
- "epoch": 4.18,
1774
- "learning_rate": 0.0002211542730299667,
1775
- "loss": 0.1214,
1776
- "step": 2570
1777
- },
1778
- {
1779
- "epoch": 4.2,
1780
- "learning_rate": 0.00022082130965593783,
1781
- "loss": 0.1397,
1782
- "step": 2580
1783
- },
1784
- {
1785
- "epoch": 4.22,
1786
- "learning_rate": 0.00022048834628190898,
1787
- "loss": 0.124,
1788
- "step": 2590
1789
- },
1790
- {
1791
- "epoch": 4.23,
1792
- "learning_rate": 0.0002201553829078801,
1793
- "loss": 0.1403,
1794
- "step": 2600
1795
- },
1796
- {
1797
- "epoch": 4.23,
1798
- "eval_loss": 0.1273965835571289,
1799
- "eval_runtime": 1056.7903,
1800
- "eval_samples_per_second": 10.985,
1801
- "eval_steps_per_second": 0.687,
1802
- "eval_wer": 0.34823016922866395,
1803
- "step": 2600
1804
- },
1805
- {
1806
- "epoch": 4.25,
1807
- "learning_rate": 0.00021982241953385126,
1808
- "loss": 0.1616,
1809
- "step": 2610
1810
- },
1811
- {
1812
- "epoch": 4.27,
1813
- "learning_rate": 0.00021948945615982238,
1814
- "loss": 0.1368,
1815
- "step": 2620
1816
- },
1817
- {
1818
- "epoch": 4.28,
1819
- "learning_rate": 0.00021915649278579354,
1820
- "loss": 0.1288,
1821
- "step": 2630
1822
- },
1823
- {
1824
- "epoch": 4.3,
1825
- "learning_rate": 0.0002188235294117647,
1826
- "loss": 0.1299,
1827
- "step": 2640
1828
- },
1829
- {
1830
- "epoch": 4.31,
1831
- "learning_rate": 0.0002184905660377358,
1832
- "loss": 0.1349,
1833
- "step": 2650
1834
- },
1835
- {
1836
- "epoch": 4.33,
1837
- "learning_rate": 0.000218157602663707,
1838
- "loss": 0.166,
1839
- "step": 2660
1840
- },
1841
- {
1842
- "epoch": 4.35,
1843
- "learning_rate": 0.00021782463928967811,
1844
- "loss": 0.1347,
1845
- "step": 2670
1846
- },
1847
- {
1848
- "epoch": 4.36,
1849
- "learning_rate": 0.00021749167591564927,
1850
- "loss": 0.1315,
1851
- "step": 2680
1852
- },
1853
- {
1854
- "epoch": 4.38,
1855
- "learning_rate": 0.00021715871254162042,
1856
- "loss": 0.1311,
1857
- "step": 2690
1858
- },
1859
- {
1860
- "epoch": 4.4,
1861
- "learning_rate": 0.00021682574916759154,
1862
- "loss": 0.1353,
1863
- "step": 2700
1864
- },
1865
- {
1866
- "epoch": 4.4,
1867
- "eval_loss": 0.1287122368812561,
1868
- "eval_runtime": 1067.8021,
1869
- "eval_samples_per_second": 10.872,
1870
- "eval_steps_per_second": 0.68,
1871
- "eval_wer": 0.3488915750581581,
1872
- "step": 2700
1873
- },
1874
- {
1875
- "epoch": 4.41,
1876
- "learning_rate": 0.0002164927857935627,
1877
- "loss": 0.1741,
1878
- "step": 2710
1879
- },
1880
- {
1881
- "epoch": 4.43,
1882
- "learning_rate": 0.00021615982241953382,
1883
- "loss": 0.1331,
1884
- "step": 2720
1885
- },
1886
- {
1887
- "epoch": 4.44,
1888
- "learning_rate": 0.00021582685904550497,
1889
- "loss": 0.1422,
1890
- "step": 2730
1891
- },
1892
- {
1893
- "epoch": 4.46,
1894
- "learning_rate": 0.0002154938956714761,
1895
- "loss": 0.1313,
1896
- "step": 2740
1897
- },
1898
- {
1899
- "epoch": 4.48,
1900
- "learning_rate": 0.00021516093229744727,
1901
- "loss": 0.1508,
1902
- "step": 2750
1903
- },
1904
- {
1905
- "epoch": 4.49,
1906
- "learning_rate": 0.00021482796892341842,
1907
- "loss": 0.1604,
1908
- "step": 2760
1909
- },
1910
- {
1911
- "epoch": 4.51,
1912
- "learning_rate": 0.00021449500554938955,
1913
- "loss": 0.1301,
1914
- "step": 2770
1915
- },
1916
- {
1917
- "epoch": 4.53,
1918
- "learning_rate": 0.0002141620421753607,
1919
- "loss": 0.1432,
1920
- "step": 2780
1921
- },
1922
- {
1923
- "epoch": 4.54,
1924
- "learning_rate": 0.00021382907880133182,
1925
- "loss": 0.1274,
1926
- "step": 2790
1927
- },
1928
- {
1929
- "epoch": 4.56,
1930
- "learning_rate": 0.00021349611542730297,
1931
- "loss": 0.1432,
1932
- "step": 2800
1933
- },
1934
- {
1935
- "epoch": 4.56,
1936
- "eval_loss": 0.12929297983646393,
1937
- "eval_runtime": 1060.4858,
1938
- "eval_samples_per_second": 10.947,
1939
- "eval_steps_per_second": 0.685,
1940
- "eval_wer": 0.35316790585230123,
1941
- "step": 2800
1942
- },
1943
- {
1944
- "epoch": 4.57,
1945
- "learning_rate": 0.0002131631520532741,
1946
- "loss": 0.1652,
1947
- "step": 2810
1948
- },
1949
- {
1950
- "epoch": 4.59,
1951
- "learning_rate": 0.00021283018867924525,
1952
- "loss": 0.1304,
1953
- "step": 2820
1954
- },
1955
- {
1956
- "epoch": 4.61,
1957
- "learning_rate": 0.00021249722530521643,
1958
- "loss": 0.1461,
1959
- "step": 2830
1960
- },
1961
- {
1962
- "epoch": 4.62,
1963
- "learning_rate": 0.00021216426193118755,
1964
- "loss": 0.1342,
1965
- "step": 2840
1966
- },
1967
- {
1968
- "epoch": 4.64,
1969
- "learning_rate": 0.0002118312985571587,
1970
- "loss": 0.1506,
1971
- "step": 2850
1972
- },
1973
- {
1974
- "epoch": 4.66,
1975
- "learning_rate": 0.00021149833518312985,
1976
- "loss": 0.1487,
1977
- "step": 2860
1978
- },
1979
- {
1980
- "epoch": 4.67,
1981
- "learning_rate": 0.00021116537180910098,
1982
- "loss": 0.1291,
1983
- "step": 2870
1984
- },
1985
- {
1986
- "epoch": 4.69,
1987
- "learning_rate": 0.00021083240843507213,
1988
- "loss": 0.1326,
1989
- "step": 2880
1990
- },
1991
- {
1992
- "epoch": 4.7,
1993
- "learning_rate": 0.00021049944506104325,
1994
- "loss": 0.1352,
1995
- "step": 2890
1996
- },
1997
- {
1998
- "epoch": 4.72,
1999
- "learning_rate": 0.0002101664816870144,
2000
- "loss": 0.1283,
2001
- "step": 2900
2002
- },
2003
- {
2004
- "epoch": 4.72,
2005
- "eval_loss": 0.12256743013858795,
2006
- "eval_runtime": 1069.9155,
2007
- "eval_samples_per_second": 10.85,
2008
- "eval_steps_per_second": 0.679,
2009
- "eval_wer": 0.34155909318980066,
2010
- "step": 2900
2011
- },
2012
- {
2013
- "epoch": 4.74,
2014
- "learning_rate": 0.00020983351831298553,
2015
- "loss": 0.1592,
2016
- "step": 2910
2017
- },
2018
- {
2019
- "epoch": 4.75,
2020
- "learning_rate": 0.0002095005549389567,
2021
- "loss": 0.137,
2022
- "step": 2920
2023
- },
2024
- {
2025
- "epoch": 4.77,
2026
- "learning_rate": 0.00020916759156492786,
2027
- "loss": 0.1447,
2028
- "step": 2930
2029
- },
2030
- {
2031
- "epoch": 4.79,
2032
- "learning_rate": 0.00020883462819089898,
2033
- "loss": 0.13,
2034
- "step": 2940
2035
- },
2036
- {
2037
- "epoch": 4.8,
2038
- "learning_rate": 0.00020850166481687014,
2039
- "loss": 0.1336,
2040
- "step": 2950
2041
- },
2042
- {
2043
- "epoch": 4.82,
2044
- "learning_rate": 0.00020816870144284126,
2045
- "loss": 0.1451,
2046
- "step": 2960
2047
- },
2048
- {
2049
- "epoch": 4.83,
2050
- "learning_rate": 0.0002078357380688124,
2051
- "loss": 0.1191,
2052
- "step": 2970
2053
- },
2054
- {
2055
- "epoch": 4.85,
2056
- "learning_rate": 0.00020750277469478354,
2057
- "loss": 0.1266,
2058
- "step": 2980
2059
- },
2060
- {
2061
- "epoch": 4.87,
2062
- "learning_rate": 0.0002071698113207547,
2063
- "loss": 0.1349,
2064
- "step": 2990
2065
- },
2066
- {
2067
- "epoch": 4.88,
2068
- "learning_rate": 0.00020683684794672584,
2069
- "loss": 0.1367,
2070
- "step": 3000
2071
- },
2072
- {
2073
- "epoch": 4.88,
2074
- "eval_loss": 0.12064128369092941,
2075
- "eval_runtime": 1063.0232,
2076
- "eval_samples_per_second": 10.921,
2077
- "eval_steps_per_second": 0.683,
2078
- "eval_wer": 0.3425740090316106,
2079
- "step": 3000
2080
- },
2081
- {
2082
- "epoch": 4.9,
2083
- "learning_rate": 0.000206503884572697,
2084
- "loss": 0.1426,
2085
- "step": 3010
2086
- },
2087
- {
2088
- "epoch": 4.92,
2089
- "learning_rate": 0.00020617092119866814,
2090
- "loss": 0.1221,
2091
- "step": 3020
2092
- },
2093
- {
2094
- "epoch": 4.93,
2095
- "learning_rate": 0.00020583795782463927,
2096
- "loss": 0.1268,
2097
- "step": 3030
2098
- },
2099
- {
2100
- "epoch": 4.95,
2101
- "learning_rate": 0.00020550499445061042,
2102
- "loss": 0.1421,
2103
- "step": 3040
2104
- },
2105
- {
2106
- "epoch": 4.97,
2107
- "learning_rate": 0.00020517203107658157,
2108
- "loss": 0.142,
2109
- "step": 3050
2110
- },
2111
- {
2112
- "epoch": 4.98,
2113
- "learning_rate": 0.0002048390677025527,
2114
- "loss": 0.1659,
2115
- "step": 3060
2116
- },
2117
- {
2118
- "epoch": 5.0,
2119
- "learning_rate": 0.00020450610432852384,
2120
- "loss": 0.1353,
2121
- "step": 3070
2122
- },
2123
- {
2124
- "epoch": 5.01,
2125
- "learning_rate": 0.00020417314095449497,
2126
- "loss": 0.1325,
2127
- "step": 3080
2128
- },
2129
- {
2130
- "epoch": 5.03,
2131
- "learning_rate": 0.00020384017758046615,
2132
- "loss": 0.1191,
2133
- "step": 3090
2134
- },
2135
- {
2136
- "epoch": 5.05,
2137
- "learning_rate": 0.0002035072142064373,
2138
- "loss": 0.1162,
2139
- "step": 3100
2140
- },
2141
- {
2142
- "epoch": 5.05,
2143
- "eval_loss": 0.12216832488775253,
2144
- "eval_runtime": 1074.6528,
2145
- "eval_samples_per_second": 10.803,
2146
- "eval_steps_per_second": 0.676,
2147
- "eval_wer": 0.33939241892076816,
2148
- "step": 3100
2149
- },
2150
- {
2151
- "epoch": 5.06,
2152
- "learning_rate": 0.00020317425083240842,
2153
- "loss": 0.1213,
2154
- "step": 3110
2155
- },
2156
- {
2157
- "epoch": 5.08,
2158
- "learning_rate": 0.00020284128745837957,
2159
- "loss": 0.1631,
2160
- "step": 3120
2161
- },
2162
- {
2163
- "epoch": 5.1,
2164
- "learning_rate": 0.0002025083240843507,
2165
- "loss": 0.1285,
2166
- "step": 3130
2167
- },
2168
- {
2169
- "epoch": 5.11,
2170
- "learning_rate": 0.00020217536071032185,
2171
- "loss": 0.1122,
2172
- "step": 3140
2173
- },
2174
- {
2175
- "epoch": 5.13,
2176
- "learning_rate": 0.00020184239733629297,
2177
- "loss": 0.1202,
2178
- "step": 3150
2179
- },
2180
- {
2181
- "epoch": 5.14,
2182
- "learning_rate": 0.00020150943396226413,
2183
- "loss": 0.1222,
2184
- "step": 3160
2185
- },
2186
- {
2187
- "epoch": 5.16,
2188
- "learning_rate": 0.00020117647058823525,
2189
- "loss": 0.1442,
2190
- "step": 3170
2191
- },
2192
- {
2193
- "epoch": 5.18,
2194
- "learning_rate": 0.00020084350721420643,
2195
- "loss": 0.1197,
2196
- "step": 3180
2197
- },
2198
- {
2199
- "epoch": 5.19,
2200
- "learning_rate": 0.00020051054384017758,
2201
- "loss": 0.1124,
2202
- "step": 3190
2203
- },
2204
- {
2205
- "epoch": 5.21,
2206
- "learning_rate": 0.0002001775804661487,
2207
- "loss": 0.1267,
2208
- "step": 3200
2209
- },
2210
- {
2211
- "epoch": 5.21,
2212
- "eval_loss": 0.11833450943231583,
2213
- "eval_runtime": 1070.4879,
2214
- "eval_samples_per_second": 10.845,
2215
- "eval_steps_per_second": 0.678,
2216
- "eval_wer": 0.33128449573507274,
2217
- "step": 3200
2218
- },
2219
- {
2220
- "epoch": 5.23,
2221
- "learning_rate": 0.00019984461709211985,
2222
- "loss": 0.1266,
2223
- "step": 3210
2224
- },
2225
- {
2226
- "epoch": 5.24,
2227
- "learning_rate": 0.00019951165371809098,
2228
- "loss": 0.1213,
2229
- "step": 3220
2230
- },
2231
- {
2232
- "epoch": 5.26,
2233
- "learning_rate": 0.00019917869034406213,
2234
- "loss": 0.1246,
2235
- "step": 3230
2236
- },
2237
- {
2238
- "epoch": 5.27,
2239
- "learning_rate": 0.00019884572697003328,
2240
- "loss": 0.1117,
2241
- "step": 3240
2242
- },
2243
- {
2244
- "epoch": 5.29,
2245
- "learning_rate": 0.0001985127635960044,
2246
- "loss": 0.1127,
2247
- "step": 3250
2248
- },
2249
- {
2250
- "epoch": 5.31,
2251
- "learning_rate": 0.00019817980022197556,
2252
- "loss": 0.1134,
2253
- "step": 3260
2254
- },
2255
- {
2256
- "epoch": 5.32,
2257
- "learning_rate": 0.0001978468368479467,
2258
- "loss": 0.1505,
2259
- "step": 3270
2260
- },
2261
- {
2262
- "epoch": 5.34,
2263
- "learning_rate": 0.00019751387347391786,
2264
- "loss": 0.1252,
2265
- "step": 3280
2266
- },
2267
- {
2268
- "epoch": 5.36,
2269
- "learning_rate": 0.000197180910099889,
2270
- "loss": 0.1227,
2271
- "step": 3290
2272
- },
2273
- {
2274
- "epoch": 5.37,
2275
- "learning_rate": 0.00019684794672586014,
2276
- "loss": 0.1126,
2277
- "step": 3300
2278
- },
2279
- {
2280
- "epoch": 5.37,
2281
- "eval_loss": 0.11798416078090668,
2282
- "eval_runtime": 1068.0787,
2283
- "eval_samples_per_second": 10.869,
2284
- "eval_steps_per_second": 0.68,
2285
- "eval_wer": 0.32992747342973133,
2286
- "step": 3300
2287
- },
2288
- {
2289
- "epoch": 5.39,
2290
- "learning_rate": 0.0001965149833518313,
2291
- "loss": 0.1238,
2292
- "step": 3310
2293
- },
2294
- {
2295
- "epoch": 5.4,
2296
- "learning_rate": 0.0001961820199778024,
2297
- "loss": 0.1361,
2298
- "step": 3320
2299
- },
2300
- {
2301
- "epoch": 5.42,
2302
- "learning_rate": 0.00019584905660377356,
2303
- "loss": 0.1354,
2304
- "step": 3330
2305
- },
2306
- {
2307
- "epoch": 5.44,
2308
- "learning_rate": 0.0001955160932297447,
2309
- "loss": 0.1225,
2310
- "step": 3340
2311
- },
2312
- {
2313
- "epoch": 5.45,
2314
- "learning_rate": 0.00019518312985571587,
2315
- "loss": 0.1127,
2316
- "step": 3350
2317
- },
2318
- {
2319
- "epoch": 5.47,
2320
- "learning_rate": 0.00019485016648168702,
2321
- "loss": 0.1162,
2322
- "step": 3360
2323
- },
2324
- {
2325
- "epoch": 5.49,
2326
- "learning_rate": 0.00019451720310765814,
2327
- "loss": 0.142,
2328
- "step": 3370
2329
- },
2330
- {
2331
- "epoch": 5.5,
2332
- "learning_rate": 0.0001941842397336293,
2333
- "loss": 0.1239,
2334
- "step": 3380
2335
- },
2336
- {
2337
- "epoch": 5.52,
2338
- "learning_rate": 0.00019385127635960042,
2339
- "loss": 0.1173,
2340
- "step": 3390
2341
- },
2342
- {
2343
- "epoch": 5.53,
2344
- "learning_rate": 0.00019351831298557157,
2345
- "loss": 0.1127,
2346
- "step": 3400
2347
- },
2348
- {
2349
- "epoch": 5.53,
2350
- "eval_loss": 0.11773715913295746,
2351
- "eval_runtime": 1066.7509,
2352
- "eval_samples_per_second": 10.883,
2353
- "eval_steps_per_second": 0.681,
2354
- "eval_wer": 0.330486247320166,
2355
- "step": 3400
2356
- },
2357
- {
2358
- "epoch": 5.55,
2359
- "learning_rate": 0.0001931853496115427,
2360
- "loss": 0.1243,
2361
- "step": 3410
2362
- },
2363
- {
2364
- "epoch": 5.57,
2365
- "learning_rate": 0.00019285238623751384,
2366
- "loss": 0.1427,
2367
- "step": 3420
2368
- },
2369
- {
2370
- "epoch": 5.58,
2371
- "learning_rate": 0.000192519422863485,
2372
- "loss": 0.1182,
2373
- "step": 3430
2374
- },
2375
- {
2376
- "epoch": 5.6,
2377
- "learning_rate": 0.00019218645948945615,
2378
- "loss": 0.1189,
2379
- "step": 3440
2380
- },
2381
- {
2382
- "epoch": 5.62,
2383
- "learning_rate": 0.0001918534961154273,
2384
- "loss": 0.1062,
2385
- "step": 3450
2386
- },
2387
- {
2388
- "epoch": 5.63,
2389
- "learning_rate": 0.00019152053274139842,
2390
- "loss": 0.1192,
2391
- "step": 3460
2392
- },
2393
- {
2394
- "epoch": 5.65,
2395
- "learning_rate": 0.00019118756936736957,
2396
- "loss": 0.1308,
2397
- "step": 3470
2398
- },
2399
- {
2400
- "epoch": 5.66,
2401
- "learning_rate": 0.00019085460599334073,
2402
- "loss": 0.1137,
2403
- "step": 3480
2404
- },
2405
- {
2406
- "epoch": 5.68,
2407
- "learning_rate": 0.00019052164261931185,
2408
- "loss": 0.1087,
2409
- "step": 3490
2410
- },
2411
- {
2412
- "epoch": 5.7,
2413
- "learning_rate": 0.000190188679245283,
2414
- "loss": 0.1155,
2415
- "step": 3500
2416
- },
2417
- {
2418
- "epoch": 5.7,
2419
- "eval_loss": 0.11851849406957626,
2420
- "eval_runtime": 1077.6014,
2421
- "eval_samples_per_second": 10.773,
2422
- "eval_steps_per_second": 0.674,
2423
- "eval_wer": 0.33171783058887927,
2424
- "step": 3500
2425
- },
2426
- {
2427
- "epoch": 5.71,
2428
- "learning_rate": 0.00018985571587125413,
2429
- "loss": 0.1336,
2430
- "step": 3510
2431
- },
2432
- {
2433
- "epoch": 5.73,
2434
- "learning_rate": 0.00018952275249722528,
2435
- "loss": 0.1248,
2436
- "step": 3520
2437
- },
2438
- {
2439
- "epoch": 5.75,
2440
- "learning_rate": 0.00018918978912319646,
2441
- "loss": 0.1247,
2442
- "step": 3530
2443
- },
2444
- {
2445
- "epoch": 5.76,
2446
- "learning_rate": 0.00018885682574916758,
2447
- "loss": 0.1057,
2448
- "step": 3540
2449
- },
2450
- {
2451
- "epoch": 5.78,
2452
- "learning_rate": 0.00018852386237513873,
2453
- "loss": 0.1213,
2454
- "step": 3550
2455
- },
2456
- {
2457
- "epoch": 5.79,
2458
- "learning_rate": 0.00018819089900110986,
2459
- "loss": 0.1122,
2460
- "step": 3560
2461
- },
2462
- {
2463
- "epoch": 5.81,
2464
- "learning_rate": 0.000187857935627081,
2465
- "loss": 0.1251,
2466
- "step": 3570
2467
- },
2468
- {
2469
- "epoch": 5.83,
2470
- "learning_rate": 0.00018752497225305213,
2471
- "loss": 0.1164,
2472
- "step": 3580
2473
- },
2474
- {
2475
- "epoch": 5.84,
2476
- "learning_rate": 0.00018719200887902328,
2477
- "loss": 0.1161,
2478
- "step": 3590
2479
- },
2480
- {
2481
- "epoch": 5.86,
2482
- "learning_rate": 0.00018685904550499443,
2483
- "loss": 0.1086,
2484
- "step": 3600
2485
- },
2486
- {
2487
- "epoch": 5.86,
2488
- "eval_loss": 0.11293730139732361,
2489
- "eval_runtime": 1066.1285,
2490
- "eval_samples_per_second": 10.889,
2491
- "eval_steps_per_second": 0.681,
2492
- "eval_wer": 0.322686219951649,
2493
- "step": 3600
2494
- },
2495
- {
2496
- "epoch": 5.88,
2497
- "learning_rate": 0.00018652608213096559,
2498
- "loss": 0.1214,
2499
- "step": 3610
2500
- },
2501
- {
2502
- "epoch": 5.89,
2503
- "learning_rate": 0.00018619311875693674,
2504
- "loss": 0.1279,
2505
- "step": 3620
2506
- },
2507
- {
2508
- "epoch": 5.91,
2509
- "learning_rate": 0.00018586015538290786,
2510
- "loss": 0.1201,
2511
- "step": 3630
2512
- },
2513
- {
2514
- "epoch": 5.93,
2515
- "learning_rate": 0.000185527192008879,
2516
- "loss": 0.1016,
2517
- "step": 3640
2518
- },
2519
- {
2520
- "epoch": 5.94,
2521
- "learning_rate": 0.00018519422863485016,
2522
- "loss": 0.1116,
2523
- "step": 3650
2524
- },
2525
- {
2526
- "epoch": 5.96,
2527
- "learning_rate": 0.0001848612652608213,
2528
- "loss": 0.1134,
2529
- "step": 3660
2530
- },
2531
- {
2532
- "epoch": 5.97,
2533
- "learning_rate": 0.00018452830188679244,
2534
- "loss": 0.1374,
2535
- "step": 3670
2536
- },
2537
- {
2538
- "epoch": 5.99,
2539
- "learning_rate": 0.00018419533851276356,
2540
- "loss": 0.1045,
2541
- "step": 3680
2542
- },
2543
- {
2544
- "epoch": 6.01,
2545
- "learning_rate": 0.00018386237513873471,
2546
- "loss": 0.1122,
2547
- "step": 3690
2548
- },
2549
- {
2550
- "epoch": 6.02,
2551
- "learning_rate": 0.0001835294117647059,
2552
- "loss": 0.1135,
2553
- "step": 3700
2554
- },
2555
- {
2556
- "epoch": 6.02,
2557
- "eval_loss": 0.11180546879768372,
2558
- "eval_runtime": 1077.7461,
2559
- "eval_samples_per_second": 10.772,
2560
- "eval_steps_per_second": 0.674,
2561
- "eval_wer": 0.32663184783104504,
2562
- "step": 3700
2563
- },
2564
- {
2565
- "epoch": 6.04,
2566
- "learning_rate": 0.00018319644839067702,
2567
- "loss": 0.0972,
2568
- "step": 3710
2569
- },
2570
- {
2571
- "epoch": 6.06,
2572
- "learning_rate": 0.00018286348501664817,
2573
- "loss": 0.1149,
2574
- "step": 3720
2575
- },
2576
- {
2577
- "epoch": 6.07,
2578
- "learning_rate": 0.0001825305216426193,
2579
- "loss": 0.1192,
2580
- "step": 3730
2581
- },
2582
- {
2583
- "epoch": 6.09,
2584
- "learning_rate": 0.00018219755826859044,
2585
- "loss": 0.1311,
2586
- "step": 3740
2587
- },
2588
- {
2589
- "epoch": 6.1,
2590
- "learning_rate": 0.00018186459489456157,
2591
- "loss": 0.1149,
2592
- "step": 3750
2593
- },
2594
- {
2595
- "epoch": 6.12,
2596
- "learning_rate": 0.00018153163152053272,
2597
- "loss": 0.0989,
2598
- "step": 3760
2599
- },
2600
- {
2601
- "epoch": 6.14,
2602
- "learning_rate": 0.00018119866814650384,
2603
- "loss": 0.1192,
2604
- "step": 3770
2605
- },
2606
- {
2607
- "epoch": 6.15,
2608
- "learning_rate": 0.000180865704772475,
2609
- "loss": 0.1011,
2610
- "step": 3780
2611
- },
2612
- {
2613
- "epoch": 6.17,
2614
- "learning_rate": 0.00018053274139844617,
2615
- "loss": 0.1315,
2616
- "step": 3790
2617
- },
2618
- {
2619
- "epoch": 6.19,
2620
- "learning_rate": 0.0001801997780244173,
2621
- "loss": 0.1112,
2622
- "step": 3800
2623
- },
2624
- {
2625
- "epoch": 6.19,
2626
- "eval_loss": 0.1141621470451355,
2627
- "eval_runtime": 1071.2114,
2628
- "eval_samples_per_second": 10.837,
2629
- "eval_steps_per_second": 0.678,
2630
- "eval_wer": 0.32282306253706156,
2631
- "step": 3800
2632
- },
2633
- {
2634
- "epoch": 6.2,
2635
- "learning_rate": 0.00017986681465038845,
2636
- "loss": 0.12,
2637
- "step": 3810
2638
- },
2639
- {
2640
- "epoch": 6.22,
2641
- "learning_rate": 0.00017953385127635957,
2642
- "loss": 0.1038,
2643
- "step": 3820
2644
- },
2645
- {
2646
- "epoch": 6.23,
2647
- "learning_rate": 0.00017920088790233073,
2648
- "loss": 0.1138,
2649
- "step": 3830
2650
- },
2651
- {
2652
- "epoch": 6.25,
2653
- "learning_rate": 0.00017886792452830188,
2654
- "loss": 0.1376,
2655
- "step": 3840
2656
- },
2657
- {
2658
- "epoch": 6.27,
2659
- "learning_rate": 0.000178534961154273,
2660
- "loss": 0.1072,
2661
- "step": 3850
2662
- },
2663
- {
2664
- "epoch": 6.28,
2665
- "learning_rate": 0.00017820199778024415,
2666
- "loss": 0.1063,
2667
- "step": 3860
2668
- },
2669
- {
2670
- "epoch": 6.3,
2671
- "learning_rate": 0.0001778690344062153,
2672
- "loss": 0.1071,
2673
- "step": 3870
2674
- },
2675
- {
2676
- "epoch": 6.32,
2677
- "learning_rate": 0.00017753607103218646,
2678
- "loss": 0.1016,
2679
- "step": 3880
2680
- },
2681
- {
2682
- "epoch": 6.33,
2683
- "learning_rate": 0.0001772031076581576,
2684
- "loss": 0.1246,
2685
- "step": 3890
2686
- },
2687
- {
2688
- "epoch": 6.35,
2689
- "learning_rate": 0.00017687014428412873,
2690
- "loss": 0.0866,
2691
- "step": 3900
2692
- },
2693
- {
2694
- "epoch": 6.35,
2695
- "eval_loss": 0.11722169816493988,
2696
- "eval_runtime": 1077.3899,
2697
- "eval_samples_per_second": 10.775,
2698
- "eval_steps_per_second": 0.674,
2699
- "eval_wer": 0.32837659079505543,
2700
- "step": 3900
2701
- },
2702
- {
2703
- "epoch": 6.36,
2704
- "learning_rate": 0.00017653718091009988,
2705
- "loss": 0.1062,
2706
- "step": 3910
2707
- },
2708
- {
2709
- "epoch": 6.38,
2710
- "learning_rate": 0.000176204217536071,
2711
- "loss": 0.1214,
2712
- "step": 3920
2713
- },
2714
- {
2715
- "epoch": 6.4,
2716
- "learning_rate": 0.00017587125416204216,
2717
- "loss": 0.1138,
2718
- "step": 3930
2719
- },
2720
- {
2721
- "epoch": 6.41,
2722
- "learning_rate": 0.00017553829078801328,
2723
- "loss": 0.1216,
2724
- "step": 3940
2725
- },
2726
- {
2727
- "epoch": 6.43,
2728
- "learning_rate": 0.00017520532741398443,
2729
- "loss": 0.1111,
2730
- "step": 3950
2731
- },
2732
- {
2733
- "epoch": 6.45,
2734
- "learning_rate": 0.0001748723640399556,
2735
- "loss": 0.1044,
2736
- "step": 3960
2737
- },
2738
- {
2739
- "epoch": 6.46,
2740
- "learning_rate": 0.00017453940066592674,
2741
- "loss": 0.1108,
2742
- "step": 3970
2743
- },
2744
- {
2745
- "epoch": 6.48,
2746
- "learning_rate": 0.0001742064372918979,
2747
- "loss": 0.1083,
2748
- "step": 3980
2749
- },
2750
- {
2751
- "epoch": 6.49,
2752
- "learning_rate": 0.000173873473917869,
2753
- "loss": 0.136,
2754
- "step": 3990
2755
- },
2756
- {
2757
- "epoch": 6.51,
2758
- "learning_rate": 0.00017354051054384016,
2759
- "loss": 0.1003,
2760
- "step": 4000
2761
- },
2762
- {
2763
- "epoch": 6.51,
2764
- "eval_loss": 0.11332289129495621,
2765
- "eval_runtime": 1068.9642,
2766
- "eval_samples_per_second": 10.86,
2767
- "eval_steps_per_second": 0.679,
2768
- "eval_wer": 0.32444236646444374,
2769
- "step": 4000
2770
- },
2771
- {
2772
- "epoch": 6.53,
2773
- "learning_rate": 0.0001732075471698113,
2774
- "loss": 6.155,
2775
- "step": 4010
2776
- },
2777
- {
2778
- "epoch": 6.55,
2779
- "learning_rate": 0.00017287458379578244,
2780
- "loss": 3.1349,
2781
- "step": 4020
2782
- },
2783
- {
2784
- "epoch": 6.56,
2785
- "learning_rate": 0.0001725416204217536,
2786
- "loss": 3.0542,
2787
- "step": 4030
2788
- },
2789
- {
2790
- "epoch": 6.58,
2791
- "learning_rate": 0.00017220865704772474,
2792
- "loss": 3.0084,
2793
- "step": 4040
2794
- },
2795
- {
2796
- "epoch": 6.6,
2797
- "learning_rate": 0.0001718756936736959,
2798
- "loss": 2.9937,
2799
- "step": 4050
2800
- },
2801
- {
2802
- "epoch": 6.61,
2803
- "learning_rate": 0.00017154273029966702,
2804
- "loss": 2.9765,
2805
- "step": 4060
2806
- },
2807
- {
2808
- "epoch": 6.63,
2809
- "learning_rate": 0.00017120976692563817,
2810
- "loss": 2.9408,
2811
- "step": 4070
2812
- },
2813
- {
2814
- "epoch": 6.64,
2815
- "learning_rate": 0.00017087680355160932,
2816
- "loss": 2.6233,
2817
- "step": 4080
2818
- },
2819
- {
2820
- "epoch": 6.66,
2821
- "learning_rate": 0.00017054384017758045,
2822
- "loss": 1.4399,
2823
- "step": 4090
2824
- },
2825
- {
2826
- "epoch": 6.68,
2827
- "learning_rate": 0.0001702108768035516,
2828
- "loss": 0.4366,
2829
- "step": 4100
2830
- },
2831
- {
2832
- "epoch": 6.68,
2833
- "eval_loss": 0.24362534284591675,
2834
- "eval_runtime": 1064.9477,
2835
- "eval_samples_per_second": 10.901,
2836
- "eval_steps_per_second": 0.682,
2837
- "eval_wer": 0.4587077498517539,
2838
- "step": 4100
2839
- },
2840
- {
2841
- "epoch": 6.69,
2842
- "learning_rate": 0.00016987791342952272,
2843
- "loss": 0.2313,
2844
- "step": 4110
2845
- },
2846
- {
2847
- "epoch": 6.71,
2848
- "learning_rate": 0.00016954495005549387,
2849
- "loss": 0.1763,
2850
- "step": 4120
2851
- },
2852
- {
2853
- "epoch": 6.73,
2854
- "learning_rate": 0.00016921198668146505,
2855
- "loss": 0.1956,
2856
- "step": 4130
2857
- },
2858
- {
2859
- "epoch": 6.74,
2860
- "learning_rate": 0.00016887902330743617,
2861
- "loss": 0.1825,
2862
- "step": 4140
2863
- },
2864
- {
2865
- "epoch": 6.76,
2866
- "learning_rate": 0.00016854605993340733,
2867
- "loss": 0.1533,
2868
- "step": 4150
2869
- },
2870
- {
2871
- "epoch": 6.77,
2872
- "learning_rate": 0.00016821309655937845,
2873
- "loss": 0.1459,
2874
- "step": 4160
2875
- },
2876
- {
2877
- "epoch": 6.79,
2878
- "learning_rate": 0.0001678801331853496,
2879
- "loss": 0.1547,
2880
- "step": 4170
2881
- },
2882
- {
2883
- "epoch": 6.81,
2884
- "learning_rate": 0.00016754716981132073,
2885
- "loss": 0.1347,
2886
- "step": 4180
2887
- },
2888
- {
2889
- "epoch": 6.82,
2890
- "learning_rate": 0.00016721420643729188,
2891
- "loss": 0.1387,
2892
- "step": 4190
2893
- },
2894
- {
2895
- "epoch": 6.84,
2896
- "learning_rate": 0.000166881243063263,
2897
- "loss": 0.1216,
2898
- "step": 4200
2899
- },
2900
- {
2901
- "epoch": 6.84,
2902
- "eval_loss": 0.1344495564699173,
2903
- "eval_runtime": 1042.0044,
2904
- "eval_samples_per_second": 11.141,
2905
- "eval_steps_per_second": 0.697,
2906
- "eval_wer": 0.33859417050586144,
2907
- "step": 4200
2908
- },
2909
- {
2910
- "epoch": 6.86,
2911
- "learning_rate": 0.00016654827968923415,
2912
- "loss": 0.1096,
2913
- "step": 4210
2914
- },
2915
- {
2916
- "epoch": 6.87,
2917
- "learning_rate": 0.00016621531631520533,
2918
- "loss": 0.1429,
2919
- "step": 4220
2920
- },
2921
- {
2922
- "epoch": 6.89,
2923
- "learning_rate": 0.00016588235294117646,
2924
- "loss": 0.139,
2925
- "step": 4230
2926
- },
2927
- {
2928
- "epoch": 6.9,
2929
- "learning_rate": 0.0001655493895671476,
2930
- "loss": 0.1343,
2931
- "step": 4240
2932
- },
2933
- {
2934
- "epoch": 6.92,
2935
- "learning_rate": 0.00016521642619311873,
2936
- "loss": 0.1223,
2937
- "step": 4250
2938
- },
2939
- {
2940
- "epoch": 6.94,
2941
- "learning_rate": 0.00016488346281908988,
2942
- "loss": 0.1298,
2943
- "step": 4260
2944
- },
2945
- {
2946
- "epoch": 6.95,
2947
- "learning_rate": 0.00016455049944506103,
2948
- "loss": 0.1549,
2949
- "step": 4270
2950
- },
2951
- {
2952
- "epoch": 6.97,
2953
- "learning_rate": 0.00016421753607103216,
2954
- "loss": 0.1378,
2955
- "step": 4280
2956
- },
2957
- {
2958
- "epoch": 6.99,
2959
- "learning_rate": 0.0001638845726970033,
2960
- "loss": 0.1492,
2961
- "step": 4290
2962
- },
2963
- {
2964
- "epoch": 7.0,
2965
- "learning_rate": 0.00016355160932297446,
2966
- "loss": 0.1165,
2967
- "step": 4300
2968
- },
2969
- {
2970
- "epoch": 7.0,
2971
- "eval_loss": 0.12804068624973297,
2972
- "eval_runtime": 1059.2138,
2973
- "eval_samples_per_second": 10.96,
2974
- "eval_steps_per_second": 0.685,
2975
- "eval_wer": 0.3323564293208046,
2976
- "step": 4300
2977
- },
2978
- {
2979
- "epoch": 7.02,
2980
- "learning_rate": 0.0001632186459489456,
2981
- "loss": 0.1136,
2982
- "step": 4310
2983
- },
2984
- {
2985
- "epoch": 7.03,
2986
- "learning_rate": 0.00016288568257491676,
2987
- "loss": 0.1041,
2988
- "step": 4320
2989
- },
2990
- {
2991
- "epoch": 7.05,
2992
- "learning_rate": 0.0001625527192008879,
2993
- "loss": 0.1216,
2994
- "step": 4330
2995
- },
2996
- {
2997
- "epoch": 7.07,
2998
- "learning_rate": 0.00016221975582685904,
2999
- "loss": 0.1156,
3000
- "step": 4340
3001
- },
3002
- {
3003
- "epoch": 7.08,
3004
- "learning_rate": 0.00016188679245283016,
3005
- "loss": 0.1433,
3006
- "step": 4350
3007
- },
3008
- {
3009
- "epoch": 7.1,
3010
- "learning_rate": 0.00016155382907880132,
3011
- "loss": 0.0961,
3012
- "step": 4360
3013
- },
3014
- {
3015
- "epoch": 7.12,
3016
- "learning_rate": 0.00016122086570477244,
3017
- "loss": 0.1292,
3018
- "step": 4370
3019
- },
3020
- {
3021
- "epoch": 7.13,
3022
- "learning_rate": 0.0001608879023307436,
3023
- "loss": 0.1118,
3024
- "step": 4380
3025
- },
3026
- {
3027
- "epoch": 7.15,
3028
- "learning_rate": 0.00016055493895671477,
3029
- "loss": 0.1076,
3030
- "step": 4390
3031
- },
3032
- {
3033
- "epoch": 7.17,
3034
- "learning_rate": 0.0001602219755826859,
3035
- "loss": 0.131,
3036
- "step": 4400
3037
- },
3038
- {
3039
- "epoch": 7.17,
3040
- "eval_loss": 0.12520039081573486,
3041
- "eval_runtime": 1045.0008,
3042
- "eval_samples_per_second": 11.109,
3043
- "eval_steps_per_second": 0.695,
3044
- "eval_wer": 0.3244879806595813,
3045
- "step": 4400
3046
- },
3047
- {
3048
- "epoch": 7.18,
3049
- "learning_rate": 0.00015988901220865705,
3050
- "loss": 0.1108,
3051
- "step": 4410
3052
- },
3053
- {
3054
- "epoch": 7.2,
3055
- "learning_rate": 0.00015955604883462817,
3056
- "loss": 0.1011,
3057
- "step": 4420
3058
- },
3059
- {
3060
- "epoch": 7.21,
3061
- "learning_rate": 0.00015922308546059932,
3062
- "loss": 0.1123,
3063
- "step": 4430
3064
- },
3065
- {
3066
- "epoch": 7.23,
3067
- "learning_rate": 0.00015889012208657047,
3068
- "loss": 0.119,
3069
- "step": 4440
3070
- },
3071
- {
3072
- "epoch": 7.25,
3073
- "learning_rate": 0.0001585571587125416,
3074
- "loss": 0.1318,
3075
- "step": 4450
3076
- },
3077
- {
3078
- "epoch": 7.26,
3079
- "learning_rate": 0.00015822419533851275,
3080
- "loss": 0.1212,
3081
- "step": 4460
3082
- },
3083
- {
3084
- "epoch": 7.28,
3085
- "learning_rate": 0.00015789123196448387,
3086
- "loss": 0.1036,
3087
- "step": 4470
3088
- },
3089
- {
3090
- "epoch": 7.3,
3091
- "learning_rate": 0.00015755826859045505,
3092
- "loss": 0.1113,
3093
- "step": 4480
3094
- },
3095
- {
3096
- "epoch": 7.31,
3097
- "learning_rate": 0.0001572253052164262,
3098
- "loss": 0.1028,
3099
- "step": 4490
3100
- },
3101
- {
3102
- "epoch": 7.33,
3103
- "learning_rate": 0.00015689234184239733,
3104
- "loss": 0.1407,
3105
- "step": 4500
3106
- },
3107
- {
3108
- "epoch": 7.33,
3109
- "eval_loss": 0.12339464575052261,
3110
- "eval_runtime": 1067.1403,
3111
- "eval_samples_per_second": 10.879,
3112
- "eval_steps_per_second": 0.68,
3113
- "eval_wer": 0.32521780778178166,
3114
- "step": 4500
3115
- },
3116
- {
3117
- "epoch": 7.34,
3118
- "learning_rate": 0.00015655937846836848,
3119
- "loss": 0.1006,
3120
- "step": 4510
3121
- },
3122
- {
3123
- "epoch": 7.36,
3124
- "learning_rate": 0.0001562264150943396,
3125
- "loss": 0.099,
3126
- "step": 4520
3127
- },
3128
- {
3129
- "epoch": 7.38,
3130
- "learning_rate": 0.00015589345172031075,
3131
- "loss": 0.1091,
3132
- "step": 4530
3133
- },
3134
- {
3135
- "epoch": 7.39,
3136
- "learning_rate": 0.00015556048834628188,
3137
- "loss": 0.1087,
3138
- "step": 4540
3139
- },
3140
- {
3141
- "epoch": 7.41,
3142
- "learning_rate": 0.00015522752497225303,
3143
- "loss": 0.1429,
3144
- "step": 4550
3145
- },
3146
- {
3147
- "epoch": 7.43,
3148
- "learning_rate": 0.0001548945615982242,
3149
- "loss": 0.1038,
3150
- "step": 4560
3151
- },
3152
- {
3153
- "epoch": 7.44,
3154
- "learning_rate": 0.00015456159822419533,
3155
- "loss": 0.1086,
3156
- "step": 4570
3157
- },
3158
- {
3159
- "epoch": 7.46,
3160
- "learning_rate": 0.00015422863485016648,
3161
- "loss": 0.1144,
3162
- "step": 4580
3163
- },
3164
- {
3165
- "epoch": 7.47,
3166
- "learning_rate": 0.0001538956714761376,
3167
- "loss": 0.1135,
3168
- "step": 4590
3169
- },
3170
- {
3171
- "epoch": 7.49,
3172
- "learning_rate": 0.00015356270810210876,
3173
- "loss": 0.1394,
3174
- "step": 4600
3175
- },
3176
- {
3177
- "epoch": 7.49,
3178
- "eval_loss": 0.12083031237125397,
3179
- "eval_runtime": 1054.1862,
3180
- "eval_samples_per_second": 11.012,
3181
- "eval_steps_per_second": 0.689,
3182
- "eval_wer": 0.3176800620353054,
3183
- "step": 4600
3184
- },
3185
- {
3186
- "epoch": 7.51,
3187
- "learning_rate": 0.00015322974472807988,
3188
- "loss": 0.0979,
3189
- "step": 4610
3190
- },
3191
- {
3192
- "epoch": 7.52,
3193
- "learning_rate": 0.00015289678135405103,
3194
- "loss": 0.1036,
3195
- "step": 4620
3196
- },
3197
- {
3198
- "epoch": 7.54,
3199
- "learning_rate": 0.00015256381798002219,
3200
- "loss": 0.1145,
3201
- "step": 4630
3202
- },
3203
- {
3204
- "epoch": 7.56,
3205
- "learning_rate": 0.0001522308546059933,
3206
- "loss": 0.1173,
3207
- "step": 4640
3208
- },
3209
- {
3210
- "epoch": 7.57,
3211
- "learning_rate": 0.0001518978912319645,
3212
- "loss": 0.1222,
3213
- "step": 4650
3214
- },
3215
- {
3216
- "epoch": 7.59,
3217
- "learning_rate": 0.0001515649278579356,
3218
- "loss": 0.0967,
3219
- "step": 4660
3220
- },
3221
- {
3222
- "epoch": 7.6,
3223
- "learning_rate": 0.00015123196448390676,
3224
- "loss": 0.1034,
3225
- "step": 4670
3226
- },
3227
- {
3228
- "epoch": 7.62,
3229
- "learning_rate": 0.00015089900110987792,
3230
- "loss": 0.1085,
3231
- "step": 4680
3232
- },
3233
- {
3234
- "epoch": 7.64,
3235
- "learning_rate": 0.00015056603773584904,
3236
- "loss": 0.1081,
3237
- "step": 4690
3238
- },
3239
- {
3240
- "epoch": 7.65,
3241
- "learning_rate": 0.0001502330743618202,
3242
- "loss": 0.1449,
3243
- "step": 4700
3244
- },
3245
- {
3246
- "epoch": 7.65,
3247
- "eval_loss": 0.11798720061779022,
3248
- "eval_runtime": 1055.6152,
3249
- "eval_samples_per_second": 10.997,
3250
- "eval_steps_per_second": 0.688,
3251
- "eval_wer": 0.31648268941294533,
3252
- "step": 4700
3253
- },
3254
- {
3255
- "epoch": 7.67,
3256
- "learning_rate": 0.00014990011098779132,
3257
- "loss": 0.0958,
3258
- "step": 4710
3259
- },
3260
- {
3261
- "epoch": 7.69,
3262
- "learning_rate": 0.00014956714761376247,
3263
- "loss": 0.0984,
3264
- "step": 4720
3265
- },
3266
- {
3267
- "epoch": 7.7,
3268
- "learning_rate": 0.00014923418423973362,
3269
- "loss": 0.11,
3270
- "step": 4730
3271
- },
3272
- {
3273
- "epoch": 7.72,
3274
- "learning_rate": 0.00014890122086570477,
3275
- "loss": 0.1064,
3276
- "step": 4740
3277
- },
3278
- {
3279
- "epoch": 7.73,
3280
- "learning_rate": 0.0001485682574916759,
3281
- "loss": 0.1332,
3282
- "step": 4750
3283
- },
3284
- {
3285
- "epoch": 7.75,
3286
- "learning_rate": 0.00014823529411764705,
3287
- "loss": 0.1068,
3288
- "step": 4760
3289
- },
3290
- {
3291
- "epoch": 7.77,
3292
- "learning_rate": 0.0001479023307436182,
3293
- "loss": 0.097,
3294
- "step": 4770
3295
- },
3296
- {
3297
- "epoch": 7.78,
3298
- "learning_rate": 0.00014756936736958932,
3299
- "loss": 0.1058,
3300
- "step": 4780
3301
- },
3302
- {
3303
- "epoch": 7.8,
3304
- "learning_rate": 0.00014723640399556047,
3305
- "loss": 0.1074,
3306
- "step": 4790
3307
- },
3308
- {
3309
- "epoch": 7.82,
3310
- "learning_rate": 0.00014690344062153162,
3311
- "loss": 0.1295,
3312
- "step": 4800
3313
- },
3314
- {
3315
- "epoch": 7.82,
3316
- "eval_loss": 0.11703231930732727,
3317
- "eval_runtime": 1061.0638,
3318
- "eval_samples_per_second": 10.941,
3319
- "eval_steps_per_second": 0.684,
3320
- "eval_wer": 0.3151712813027414,
3321
- "step": 4800
3322
- }
3323
- ],
3324
- "logging_steps": 10,
3325
- "max_steps": 9210,
3326
- "num_train_epochs": 15,
3327
- "save_steps": 200,
3328
- "total_flos": 5.031639452063693e+19,
3329
- "trial_name": null,
3330
- "trial_params": null
3331
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-4800/training_args.bin DELETED
Binary file (4.6 kB)