Amiran13 commited on
Commit
c97563f
1 Parent(s): 05bdf71

Delete checkpoint-1400

Browse files
checkpoint-1400/config.json DELETED
@@ -1,117 +0,0 @@
1
- {
2
- "_name_or_path": "facebook/wav2vec2-large-xlsr-53",
3
- "activation_dropout": 0.0,
4
- "adapter_attn_dim": null,
5
- "adapter_kernel_size": 3,
6
- "adapter_stride": 2,
7
- "add_adapter": false,
8
- "apply_spec_augment": true,
9
- "architectures": [
10
- "Wav2Vec2ForCTC"
11
- ],
12
- "attention_dropout": 0.1,
13
- "bos_token_id": 1,
14
- "classifier_proj_size": 256,
15
- "codevector_dim": 768,
16
- "contrastive_logits_temperature": 0.1,
17
- "conv_bias": true,
18
- "conv_dim": [
19
- 512,
20
- 512,
21
- 512,
22
- 512,
23
- 512,
24
- 512,
25
- 512
26
- ],
27
- "conv_kernel": [
28
- 10,
29
- 3,
30
- 3,
31
- 3,
32
- 3,
33
- 2,
34
- 2
35
- ],
36
- "conv_stride": [
37
- 5,
38
- 2,
39
- 2,
40
- 2,
41
- 2,
42
- 2,
43
- 2
44
- ],
45
- "ctc_loss_reduction": "mean",
46
- "ctc_zero_infinity": false,
47
- "diversity_loss_weight": 0.1,
48
- "do_stable_layer_norm": true,
49
- "eos_token_id": 2,
50
- "feat_extract_activation": "gelu",
51
- "feat_extract_dropout": 0.0,
52
- "feat_extract_norm": "layer",
53
- "feat_proj_dropout": 0.0,
54
- "feat_quantizer_dropout": 0.0,
55
- "final_dropout": 0.0,
56
- "gradient_checkpointing": false,
57
- "hidden_act": "gelu",
58
- "hidden_dropout": 0.1,
59
- "hidden_size": 1024,
60
- "initializer_range": 0.02,
61
- "intermediate_size": 4096,
62
- "layer_norm_eps": 1e-05,
63
- "layerdrop": 0.1,
64
- "mask_channel_length": 10,
65
- "mask_channel_min_space": 1,
66
- "mask_channel_other": 0.0,
67
- "mask_channel_prob": 0.0,
68
- "mask_channel_selection": "static",
69
- "mask_feature_length": 10,
70
- "mask_feature_min_masks": 0,
71
- "mask_feature_prob": 0.0,
72
- "mask_time_length": 10,
73
- "mask_time_min_masks": 2,
74
- "mask_time_min_space": 1,
75
- "mask_time_other": 0.0,
76
- "mask_time_prob": 0.05,
77
- "mask_time_selection": "static",
78
- "model_type": "wav2vec2",
79
- "num_adapter_layers": 3,
80
- "num_attention_heads": 16,
81
- "num_codevector_groups": 2,
82
- "num_codevectors_per_group": 320,
83
- "num_conv_pos_embedding_groups": 16,
84
- "num_conv_pos_embeddings": 128,
85
- "num_feat_extract_layers": 7,
86
- "num_hidden_layers": 24,
87
- "num_negatives": 100,
88
- "output_hidden_size": 1024,
89
- "pad_token_id": 38,
90
- "proj_codevector_dim": 768,
91
- "tdnn_dilation": [
92
- 1,
93
- 2,
94
- 3,
95
- 1,
96
- 1
97
- ],
98
- "tdnn_dim": [
99
- 512,
100
- 512,
101
- 512,
102
- 512,
103
- 1500
104
- ],
105
- "tdnn_kernel": [
106
- 5,
107
- 3,
108
- 3,
109
- 1,
110
- 1
111
- ],
112
- "torch_dtype": "float32",
113
- "transformers_version": "4.35.2",
114
- "use_weighted_layer_sum": false,
115
- "vocab_size": 41,
116
- "xvector_output_dim": 512
117
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1400/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a357ea7d3c4a3d555041a441b5492c6783a8221d59e04ec0d6907c6f1e8ef2d
3
- size 1261975580
 
 
 
 
checkpoint-1400/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:be42baca93fcb1d825980ae2e5f32bdaee712d1316bf47bd24a302535eddddde
3
- size 2490495926
 
 
 
 
checkpoint-1400/preprocessor_config.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "do_normalize": true,
3
- "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
- "feature_size": 1,
5
- "padding_side": "right",
6
- "padding_value": 0.0,
7
- "return_attention_mask": true,
8
- "sampling_rate": 16000
9
- }
 
 
 
 
 
 
 
 
 
 
checkpoint-1400/rng_state.pth DELETED
Binary file (14.3 kB)
 
checkpoint-1400/scheduler.pt DELETED
Binary file (1.06 kB)
 
checkpoint-1400/trainer_state.json DELETED
@@ -1,985 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 2.2782750203417415,
5
- "eval_steps": 100,
6
- "global_step": 1400,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.02,
13
- "learning_rate": 1.4999999999999999e-05,
14
- "loss": 11.8163,
15
- "step": 10
16
- },
17
- {
18
- "epoch": 0.03,
19
- "learning_rate": 2.8499999999999998e-05,
20
- "loss": 11.8929,
21
- "step": 20
22
- },
23
- {
24
- "epoch": 0.05,
25
- "learning_rate": 4.3499999999999993e-05,
26
- "loss": 12.0064,
27
- "step": 30
28
- },
29
- {
30
- "epoch": 0.07,
31
- "learning_rate": 5.6999999999999996e-05,
32
- "loss": 10.5132,
33
- "step": 40
34
- },
35
- {
36
- "epoch": 0.08,
37
- "learning_rate": 7.199999999999999e-05,
38
- "loss": 7.9699,
39
- "step": 50
40
- },
41
- {
42
- "epoch": 0.1,
43
- "learning_rate": 8.699999999999999e-05,
44
- "loss": 4.7703,
45
- "step": 60
46
- },
47
- {
48
- "epoch": 0.11,
49
- "learning_rate": 0.000102,
50
- "loss": 3.7982,
51
- "step": 70
52
- },
53
- {
54
- "epoch": 0.13,
55
- "learning_rate": 0.000117,
56
- "loss": 3.436,
57
- "step": 80
58
- },
59
- {
60
- "epoch": 0.15,
61
- "learning_rate": 0.00013199999999999998,
62
- "loss": 3.2573,
63
- "step": 90
64
- },
65
- {
66
- "epoch": 0.16,
67
- "learning_rate": 0.000147,
68
- "loss": 3.1412,
69
- "step": 100
70
- },
71
- {
72
- "epoch": 0.16,
73
- "eval_loss": 3.0855324268341064,
74
- "eval_runtime": 1038.5545,
75
- "eval_samples_per_second": 11.178,
76
- "eval_steps_per_second": 0.699,
77
- "eval_wer": 1.0,
78
- "step": 100
79
- },
80
- {
81
- "epoch": 0.18,
82
- "learning_rate": 0.000162,
83
- "loss": 3.0577,
84
- "step": 110
85
- },
86
- {
87
- "epoch": 0.2,
88
- "learning_rate": 0.00017699999999999997,
89
- "loss": 3.0443,
90
- "step": 120
91
- },
92
- {
93
- "epoch": 0.21,
94
- "learning_rate": 0.00019199999999999998,
95
- "loss": 3.0597,
96
- "step": 130
97
- },
98
- {
99
- "epoch": 0.23,
100
- "learning_rate": 0.00020699999999999996,
101
- "loss": 3.061,
102
- "step": 140
103
- },
104
- {
105
- "epoch": 0.24,
106
- "learning_rate": 0.00022199999999999998,
107
- "loss": 3.0717,
108
- "step": 150
109
- },
110
- {
111
- "epoch": 0.26,
112
- "learning_rate": 0.000237,
113
- "loss": 3.0299,
114
- "step": 160
115
- },
116
- {
117
- "epoch": 0.28,
118
- "learning_rate": 0.00025199999999999995,
119
- "loss": 3.0295,
120
- "step": 170
121
- },
122
- {
123
- "epoch": 0.29,
124
- "learning_rate": 0.000267,
125
- "loss": 3.0423,
126
- "step": 180
127
- },
128
- {
129
- "epoch": 0.31,
130
- "learning_rate": 0.00028199999999999997,
131
- "loss": 3.0437,
132
- "step": 190
133
- },
134
- {
135
- "epoch": 0.33,
136
- "learning_rate": 0.00029699999999999996,
137
- "loss": 3.0569,
138
- "step": 200
139
- },
140
- {
141
- "epoch": 0.33,
142
- "eval_loss": 3.036912679672241,
143
- "eval_runtime": 1030.0275,
144
- "eval_samples_per_second": 11.271,
145
- "eval_steps_per_second": 0.705,
146
- "eval_wer": 1.0,
147
- "step": 200
148
- },
149
- {
150
- "epoch": 0.34,
151
- "learning_rate": 0.00029973362930077685,
152
- "loss": 3.0243,
153
- "step": 210
154
- },
155
- {
156
- "epoch": 0.36,
157
- "learning_rate": 0.000299400665926748,
158
- "loss": 3.0236,
159
- "step": 220
160
- },
161
- {
162
- "epoch": 0.37,
163
- "learning_rate": 0.0002990677025527192,
164
- "loss": 3.0316,
165
- "step": 230
166
- },
167
- {
168
- "epoch": 0.39,
169
- "learning_rate": 0.0002987347391786903,
170
- "loss": 3.0363,
171
- "step": 240
172
- },
173
- {
174
- "epoch": 0.41,
175
- "learning_rate": 0.00029840177580466146,
176
- "loss": 3.0582,
177
- "step": 250
178
- },
179
- {
180
- "epoch": 0.42,
181
- "learning_rate": 0.0002980688124306326,
182
- "loss": 3.0144,
183
- "step": 260
184
- },
185
- {
186
- "epoch": 0.44,
187
- "learning_rate": 0.00029773584905660376,
188
- "loss": 3.0126,
189
- "step": 270
190
- },
191
- {
192
- "epoch": 0.46,
193
- "learning_rate": 0.0002974028856825749,
194
- "loss": 3.0035,
195
- "step": 280
196
- },
197
- {
198
- "epoch": 0.47,
199
- "learning_rate": 0.000297069922308546,
200
- "loss": 2.9913,
201
- "step": 290
202
- },
203
- {
204
- "epoch": 0.49,
205
- "learning_rate": 0.00029673695893451716,
206
- "loss": 2.9625,
207
- "step": 300
208
- },
209
- {
210
- "epoch": 0.49,
211
- "eval_loss": 2.977771520614624,
212
- "eval_runtime": 1042.3792,
213
- "eval_samples_per_second": 11.137,
214
- "eval_steps_per_second": 0.696,
215
- "eval_wer": 1.0,
216
- "step": 300
217
- },
218
- {
219
- "epoch": 0.5,
220
- "learning_rate": 0.0002964039955604883,
221
- "loss": 2.9686,
222
- "step": 310
223
- },
224
- {
225
- "epoch": 0.52,
226
- "learning_rate": 0.00029607103218645946,
227
- "loss": 2.8773,
228
- "step": 320
229
- },
230
- {
231
- "epoch": 0.54,
232
- "learning_rate": 0.0002957380688124306,
233
- "loss": 2.726,
234
- "step": 330
235
- },
236
- {
237
- "epoch": 0.55,
238
- "learning_rate": 0.00029540510543840177,
239
- "loss": 2.3671,
240
- "step": 340
241
- },
242
- {
243
- "epoch": 0.57,
244
- "learning_rate": 0.0002950721420643729,
245
- "loss": 1.9479,
246
- "step": 350
247
- },
248
- {
249
- "epoch": 0.59,
250
- "learning_rate": 0.000294739178690344,
251
- "loss": 1.5549,
252
- "step": 360
253
- },
254
- {
255
- "epoch": 0.6,
256
- "learning_rate": 0.00029440621531631517,
257
- "loss": 1.1765,
258
- "step": 370
259
- },
260
- {
261
- "epoch": 0.62,
262
- "learning_rate": 0.0002940732519422863,
263
- "loss": 0.9155,
264
- "step": 380
265
- },
266
- {
267
- "epoch": 0.63,
268
- "learning_rate": 0.00029374028856825747,
269
- "loss": 0.7823,
270
- "step": 390
271
- },
272
- {
273
- "epoch": 0.65,
274
- "learning_rate": 0.0002934073251942286,
275
- "loss": 0.7715,
276
- "step": 400
277
- },
278
- {
279
- "epoch": 0.65,
280
- "eval_loss": 0.5112669467926025,
281
- "eval_runtime": 1048.8544,
282
- "eval_samples_per_second": 11.068,
283
- "eval_steps_per_second": 0.692,
284
- "eval_wer": 0.718480591159969,
285
- "step": 400
286
- },
287
- {
288
- "epoch": 0.67,
289
- "learning_rate": 0.00029307436182019977,
290
- "loss": 0.6635,
291
- "step": 410
292
- },
293
- {
294
- "epoch": 0.68,
295
- "learning_rate": 0.0002927413984461709,
296
- "loss": 0.6032,
297
- "step": 420
298
- },
299
- {
300
- "epoch": 0.7,
301
- "learning_rate": 0.000292408435072142,
302
- "loss": 0.5401,
303
- "step": 430
304
- },
305
- {
306
- "epoch": 0.72,
307
- "learning_rate": 0.00029207547169811317,
308
- "loss": 0.5276,
309
- "step": 440
310
- },
311
- {
312
- "epoch": 0.73,
313
- "learning_rate": 0.0002917425083240843,
314
- "loss": 0.5713,
315
- "step": 450
316
- },
317
- {
318
- "epoch": 0.75,
319
- "learning_rate": 0.0002914095449500555,
320
- "loss": 0.46,
321
- "step": 460
322
- },
323
- {
324
- "epoch": 0.76,
325
- "learning_rate": 0.0002910765815760266,
326
- "loss": 0.4508,
327
- "step": 470
328
- },
329
- {
330
- "epoch": 0.78,
331
- "learning_rate": 0.0002907436182019977,
332
- "loss": 0.447,
333
- "step": 480
334
- },
335
- {
336
- "epoch": 0.8,
337
- "learning_rate": 0.00029041065482796893,
338
- "loss": 0.4454,
339
- "step": 490
340
- },
341
- {
342
- "epoch": 0.81,
343
- "learning_rate": 0.0002900776914539401,
344
- "loss": 0.4725,
345
- "step": 500
346
- },
347
- {
348
- "epoch": 0.81,
349
- "eval_loss": 0.3072386682033539,
350
- "eval_runtime": 1051.5656,
351
- "eval_samples_per_second": 11.04,
352
- "eval_steps_per_second": 0.69,
353
- "eval_wer": 0.5137640833827487,
354
- "step": 500
355
- },
356
- {
357
- "epoch": 0.83,
358
- "learning_rate": 0.0002897447280799112,
359
- "loss": 0.3822,
360
- "step": 510
361
- },
362
- {
363
- "epoch": 0.85,
364
- "learning_rate": 0.00028941176470588233,
365
- "loss": 0.3807,
366
- "step": 520
367
- },
368
- {
369
- "epoch": 0.86,
370
- "learning_rate": 0.0002890788013318535,
371
- "loss": 0.3827,
372
- "step": 530
373
- },
374
- {
375
- "epoch": 0.88,
376
- "learning_rate": 0.00028874583795782463,
377
- "loss": 0.3945,
378
- "step": 540
379
- },
380
- {
381
- "epoch": 0.9,
382
- "learning_rate": 0.00028841287458379573,
383
- "loss": 0.4519,
384
- "step": 550
385
- },
386
- {
387
- "epoch": 0.91,
388
- "learning_rate": 0.0002880799112097669,
389
- "loss": 0.3534,
390
- "step": 560
391
- },
392
- {
393
- "epoch": 0.93,
394
- "learning_rate": 0.00028774694783573803,
395
- "loss": 0.3731,
396
- "step": 570
397
- },
398
- {
399
- "epoch": 0.94,
400
- "learning_rate": 0.0002874139844617092,
401
- "loss": 0.3433,
402
- "step": 580
403
- },
404
- {
405
- "epoch": 0.96,
406
- "learning_rate": 0.00028708102108768033,
407
- "loss": 0.3547,
408
- "step": 590
409
- },
410
- {
411
- "epoch": 0.98,
412
- "learning_rate": 0.0002867480577136515,
413
- "loss": 0.4103,
414
- "step": 600
415
- },
416
- {
417
- "epoch": 0.98,
418
- "eval_loss": 0.24468904733657837,
419
- "eval_runtime": 1043.2136,
420
- "eval_samples_per_second": 11.128,
421
- "eval_steps_per_second": 0.696,
422
- "eval_wer": 0.4336997673676048,
423
- "step": 600
424
- },
425
- {
426
- "epoch": 0.99,
427
- "learning_rate": 0.00028641509433962264,
428
- "loss": 0.3484,
429
- "step": 610
430
- },
431
- {
432
- "epoch": 1.01,
433
- "learning_rate": 0.00028608213096559373,
434
- "loss": 0.3775,
435
- "step": 620
436
- },
437
- {
438
- "epoch": 1.03,
439
- "learning_rate": 0.0002857491675915649,
440
- "loss": 0.3207,
441
- "step": 630
442
- },
443
- {
444
- "epoch": 1.04,
445
- "learning_rate": 0.00028541620421753604,
446
- "loss": 0.2971,
447
- "step": 640
448
- },
449
- {
450
- "epoch": 1.06,
451
- "learning_rate": 0.0002850832408435072,
452
- "loss": 0.3129,
453
- "step": 650
454
- },
455
- {
456
- "epoch": 1.07,
457
- "learning_rate": 0.00028475027746947834,
458
- "loss": 0.3143,
459
- "step": 660
460
- },
461
- {
462
- "epoch": 1.09,
463
- "learning_rate": 0.0002844173140954495,
464
- "loss": 0.3179,
465
- "step": 670
466
- },
467
- {
468
- "epoch": 1.11,
469
- "learning_rate": 0.00028408435072142064,
470
- "loss": 0.299,
471
- "step": 680
472
- },
473
- {
474
- "epoch": 1.12,
475
- "learning_rate": 0.0002837513873473918,
476
- "loss": 0.2734,
477
- "step": 690
478
- },
479
- {
480
- "epoch": 1.14,
481
- "learning_rate": 0.0002834184239733629,
482
- "loss": 0.2775,
483
- "step": 700
484
- },
485
- {
486
- "epoch": 1.14,
487
- "eval_loss": 0.2055242359638214,
488
- "eval_runtime": 1053.2634,
489
- "eval_samples_per_second": 11.022,
490
- "eval_steps_per_second": 0.689,
491
- "eval_wer": 0.3768644802262464,
492
- "step": 700
493
- },
494
- {
495
- "epoch": 1.16,
496
- "learning_rate": 0.00028308546059933404,
497
- "loss": 0.2878,
498
- "step": 710
499
- },
500
- {
501
- "epoch": 1.17,
502
- "learning_rate": 0.0002827524972253052,
503
- "loss": 0.307,
504
- "step": 720
505
- },
506
- {
507
- "epoch": 1.19,
508
- "learning_rate": 0.00028241953385127634,
509
- "loss": 0.2848,
510
- "step": 730
511
- },
512
- {
513
- "epoch": 1.2,
514
- "learning_rate": 0.00028208657047724744,
515
- "loss": 0.2744,
516
- "step": 740
517
- },
518
- {
519
- "epoch": 1.22,
520
- "learning_rate": 0.00028175360710321865,
521
- "loss": 0.2839,
522
- "step": 750
523
- },
524
- {
525
- "epoch": 1.24,
526
- "learning_rate": 0.0002814206437291898,
527
- "loss": 0.2888,
528
- "step": 760
529
- },
530
- {
531
- "epoch": 1.25,
532
- "learning_rate": 0.0002810876803551609,
533
- "loss": 0.288,
534
- "step": 770
535
- },
536
- {
537
- "epoch": 1.27,
538
- "learning_rate": 0.00028075471698113205,
539
- "loss": 0.2559,
540
- "step": 780
541
- },
542
- {
543
- "epoch": 1.29,
544
- "learning_rate": 0.0002804217536071032,
545
- "loss": 0.2498,
546
- "step": 790
547
- },
548
- {
549
- "epoch": 1.3,
550
- "learning_rate": 0.00028008879023307435,
551
- "loss": 0.2554,
552
- "step": 800
553
- },
554
- {
555
- "epoch": 1.3,
556
- "eval_loss": 0.19498416781425476,
557
- "eval_runtime": 1045.8615,
558
- "eval_samples_per_second": 11.1,
559
- "eval_steps_per_second": 0.694,
560
- "eval_wer": 0.360272316744971,
561
- "step": 800
562
- },
563
- {
564
- "epoch": 1.32,
565
- "learning_rate": 0.00027975582685904545,
566
- "loss": 0.3059,
567
- "step": 810
568
- },
569
- {
570
- "epoch": 1.33,
571
- "learning_rate": 0.0002794228634850166,
572
- "loss": 0.281,
573
- "step": 820
574
- },
575
- {
576
- "epoch": 1.35,
577
- "learning_rate": 0.00027908990011098775,
578
- "loss": 0.2642,
579
- "step": 830
580
- },
581
- {
582
- "epoch": 1.37,
583
- "learning_rate": 0.0002787569367369589,
584
- "loss": 0.242,
585
- "step": 840
586
- },
587
- {
588
- "epoch": 1.38,
589
- "learning_rate": 0.00027842397336293005,
590
- "loss": 0.2541,
591
- "step": 850
592
- },
593
- {
594
- "epoch": 1.4,
595
- "learning_rate": 0.0002780910099889012,
596
- "loss": 0.2614,
597
- "step": 860
598
- },
599
- {
600
- "epoch": 1.42,
601
- "learning_rate": 0.00027775804661487236,
602
- "loss": 0.3001,
603
- "step": 870
604
- },
605
- {
606
- "epoch": 1.43,
607
- "learning_rate": 0.0002774250832408435,
608
- "loss": 0.2365,
609
- "step": 880
610
- },
611
- {
612
- "epoch": 1.45,
613
- "learning_rate": 0.0002770921198668146,
614
- "loss": 0.2373,
615
- "step": 890
616
- },
617
- {
618
- "epoch": 1.46,
619
- "learning_rate": 0.00027675915649278575,
620
- "loss": 0.263,
621
- "step": 900
622
- },
623
- {
624
- "epoch": 1.46,
625
- "eval_loss": 0.1812964379787445,
626
- "eval_runtime": 1053.0871,
627
- "eval_samples_per_second": 11.024,
628
- "eval_steps_per_second": 0.689,
629
- "eval_wer": 0.3371687269078137,
630
- "step": 900
631
- },
632
- {
633
- "epoch": 1.48,
634
- "learning_rate": 0.0002764261931187569,
635
- "loss": 0.289,
636
- "step": 910
637
- },
638
- {
639
- "epoch": 1.5,
640
- "learning_rate": 0.00027609322974472806,
641
- "loss": 0.2578,
642
- "step": 920
643
- },
644
- {
645
- "epoch": 1.51,
646
- "learning_rate": 0.0002757602663706992,
647
- "loss": 0.2565,
648
- "step": 930
649
- },
650
- {
651
- "epoch": 1.53,
652
- "learning_rate": 0.00027542730299667036,
653
- "loss": 0.2543,
654
- "step": 940
655
- },
656
- {
657
- "epoch": 1.55,
658
- "learning_rate": 0.0002750943396226415,
659
- "loss": 0.2404,
660
- "step": 950
661
- },
662
- {
663
- "epoch": 1.56,
664
- "learning_rate": 0.0002747613762486126,
665
- "loss": 0.2291,
666
- "step": 960
667
- },
668
- {
669
- "epoch": 1.58,
670
- "learning_rate": 0.00027442841287458376,
671
- "loss": 0.2636,
672
- "step": 970
673
- },
674
- {
675
- "epoch": 1.59,
676
- "learning_rate": 0.0002740954495005549,
677
- "loss": 0.2222,
678
- "step": 980
679
- },
680
- {
681
- "epoch": 1.61,
682
- "learning_rate": 0.00027376248612652606,
683
- "loss": 0.2322,
684
- "step": 990
685
- },
686
- {
687
- "epoch": 1.63,
688
- "learning_rate": 0.0002734295227524972,
689
- "loss": 0.2294,
690
- "step": 1000
691
- },
692
- {
693
- "epoch": 1.63,
694
- "eval_loss": 0.1664419174194336,
695
- "eval_runtime": 1047.59,
696
- "eval_samples_per_second": 11.082,
697
- "eval_steps_per_second": 0.693,
698
- "eval_wer": 0.3131756602654746,
699
- "step": 1000
700
- },
701
- {
702
- "epoch": 1.64,
703
- "learning_rate": 0.00027309655937846837,
704
- "loss": 0.249,
705
- "step": 1010
706
- },
707
- {
708
- "epoch": 1.66,
709
- "learning_rate": 0.0002727635960044395,
710
- "loss": 0.2682,
711
- "step": 1020
712
- },
713
- {
714
- "epoch": 1.68,
715
- "learning_rate": 0.0002724306326304106,
716
- "loss": 0.2242,
717
- "step": 1030
718
- },
719
- {
720
- "epoch": 1.69,
721
- "learning_rate": 0.00027209766925638177,
722
- "loss": 0.2333,
723
- "step": 1040
724
- },
725
- {
726
- "epoch": 1.71,
727
- "learning_rate": 0.0002717647058823529,
728
- "loss": 0.2188,
729
- "step": 1050
730
- },
731
- {
732
- "epoch": 1.72,
733
- "learning_rate": 0.00027143174250832407,
734
- "loss": 0.2592,
735
- "step": 1060
736
- },
737
- {
738
- "epoch": 1.74,
739
- "learning_rate": 0.0002710987791342952,
740
- "loss": 0.268,
741
- "step": 1070
742
- },
743
- {
744
- "epoch": 1.76,
745
- "learning_rate": 0.0002707658157602663,
746
- "loss": 0.2091,
747
- "step": 1080
748
- },
749
- {
750
- "epoch": 1.77,
751
- "learning_rate": 0.00027043285238623747,
752
- "loss": 0.2069,
753
- "step": 1090
754
- },
755
- {
756
- "epoch": 1.79,
757
- "learning_rate": 0.0002700998890122087,
758
- "loss": 0.2296,
759
- "step": 1100
760
- },
761
- {
762
- "epoch": 1.79,
763
- "eval_loss": 0.15650752186775208,
764
- "eval_runtime": 1054.82,
765
- "eval_samples_per_second": 11.006,
766
- "eval_steps_per_second": 0.688,
767
- "eval_wer": 0.2962413903206678,
768
- "step": 1100
769
- },
770
- {
771
- "epoch": 1.81,
772
- "learning_rate": 0.00026976692563817977,
773
- "loss": 0.2402,
774
- "step": 1110
775
- },
776
- {
777
- "epoch": 1.82,
778
- "learning_rate": 0.0002694339622641509,
779
- "loss": 0.2258,
780
- "step": 1120
781
- },
782
- {
783
- "epoch": 1.84,
784
- "learning_rate": 0.0002691009988901221,
785
- "loss": 0.2215,
786
- "step": 1130
787
- },
788
- {
789
- "epoch": 1.86,
790
- "learning_rate": 0.0002687680355160932,
791
- "loss": 0.2059,
792
- "step": 1140
793
- },
794
- {
795
- "epoch": 1.87,
796
- "learning_rate": 0.0002684350721420643,
797
- "loss": 0.2022,
798
- "step": 1150
799
- },
800
- {
801
- "epoch": 1.89,
802
- "learning_rate": 0.0002681021087680355,
803
- "loss": 0.2261,
804
- "step": 1160
805
- },
806
- {
807
- "epoch": 1.9,
808
- "learning_rate": 0.0002677691453940066,
809
- "loss": 0.2132,
810
- "step": 1170
811
- },
812
- {
813
- "epoch": 1.92,
814
- "learning_rate": 0.0002674361820199778,
815
- "loss": 0.2245,
816
- "step": 1180
817
- },
818
- {
819
- "epoch": 1.94,
820
- "learning_rate": 0.00026710321864594893,
821
- "loss": 0.2133,
822
- "step": 1190
823
- },
824
- {
825
- "epoch": 1.95,
826
- "learning_rate": 0.0002667702552719201,
827
- "loss": 0.2183,
828
- "step": 1200
829
- },
830
- {
831
- "epoch": 1.95,
832
- "eval_loss": 0.14735129475593567,
833
- "eval_runtime": 1059.1579,
834
- "eval_samples_per_second": 10.961,
835
- "eval_steps_per_second": 0.685,
836
- "eval_wer": 0.29856771427268164,
837
- "step": 1200
838
- },
839
- {
840
- "epoch": 1.97,
841
- "learning_rate": 0.00026643729189789123,
842
- "loss": 0.2204,
843
- "step": 1210
844
- },
845
- {
846
- "epoch": 1.99,
847
- "learning_rate": 0.00026610432852386233,
848
- "loss": 0.2482,
849
- "step": 1220
850
- },
851
- {
852
- "epoch": 2.0,
853
- "learning_rate": 0.0002657713651498335,
854
- "loss": 0.2214,
855
- "step": 1230
856
- },
857
- {
858
- "epoch": 2.02,
859
- "learning_rate": 0.00026543840177580463,
860
- "loss": 0.1779,
861
- "step": 1240
862
- },
863
- {
864
- "epoch": 2.03,
865
- "learning_rate": 0.0002651054384017758,
866
- "loss": 0.1688,
867
- "step": 1250
868
- },
869
- {
870
- "epoch": 2.05,
871
- "learning_rate": 0.00026477247502774693,
872
- "loss": 0.1915,
873
- "step": 1260
874
- },
875
- {
876
- "epoch": 2.07,
877
- "learning_rate": 0.0002644395116537181,
878
- "loss": 0.1942,
879
- "step": 1270
880
- },
881
- {
882
- "epoch": 2.08,
883
- "learning_rate": 0.00026410654827968924,
884
- "loss": 0.2266,
885
- "step": 1280
886
- },
887
- {
888
- "epoch": 2.1,
889
- "learning_rate": 0.0002637735849056604,
890
- "loss": 0.1931,
891
- "step": 1290
892
- },
893
- {
894
- "epoch": 2.12,
895
- "learning_rate": 0.0002634406215316315,
896
- "loss": 0.1822,
897
- "step": 1300
898
- },
899
- {
900
- "epoch": 2.12,
901
- "eval_loss": 0.1546158790588379,
902
- "eval_runtime": 1062.5837,
903
- "eval_samples_per_second": 10.925,
904
- "eval_steps_per_second": 0.683,
905
- "eval_wer": 0.2810974775350089,
906
- "step": 1300
907
- },
908
- {
909
- "epoch": 2.13,
910
- "learning_rate": 0.00026310765815760264,
911
- "loss": 0.1735,
912
- "step": 1310
913
- },
914
- {
915
- "epoch": 2.15,
916
- "learning_rate": 0.0002627746947835738,
917
- "loss": 0.2042,
918
- "step": 1320
919
- },
920
- {
921
- "epoch": 2.16,
922
- "learning_rate": 0.00026244173140954494,
923
- "loss": 0.2347,
924
- "step": 1330
925
- },
926
- {
927
- "epoch": 2.18,
928
- "learning_rate": 0.00026210876803551604,
929
- "loss": 0.1816,
930
- "step": 1340
931
- },
932
- {
933
- "epoch": 2.2,
934
- "learning_rate": 0.0002617758046614872,
935
- "loss": 0.1838,
936
- "step": 1350
937
- },
938
- {
939
- "epoch": 2.21,
940
- "learning_rate": 0.0002614428412874584,
941
- "loss": 0.1856,
942
- "step": 1360
943
- },
944
- {
945
- "epoch": 2.23,
946
- "learning_rate": 0.0002611098779134295,
947
- "loss": 0.1866,
948
- "step": 1370
949
- },
950
- {
951
- "epoch": 2.25,
952
- "learning_rate": 0.00026077691453940064,
953
- "loss": 0.2016,
954
- "step": 1380
955
- },
956
- {
957
- "epoch": 2.26,
958
- "learning_rate": 0.0002604439511653718,
959
- "loss": 0.1813,
960
- "step": 1390
961
- },
962
- {
963
- "epoch": 2.28,
964
- "learning_rate": 0.00026011098779134294,
965
- "loss": 0.1798,
966
- "step": 1400
967
- },
968
- {
969
- "epoch": 2.28,
970
- "eval_loss": 0.14420565962791443,
971
- "eval_runtime": 1057.3911,
972
- "eval_samples_per_second": 10.979,
973
- "eval_steps_per_second": 0.687,
974
- "eval_wer": 0.28108607398622454,
975
- "step": 1400
976
- }
977
- ],
978
- "logging_steps": 10,
979
- "max_steps": 9210,
980
- "num_train_epochs": 15,
981
- "save_steps": 200,
982
- "total_flos": 1.4734730001947961e+19,
983
- "trial_name": null,
984
- "trial_params": null
985
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1400/training_args.bin DELETED
Binary file (4.6 kB)