Sercan commited on
Commit
3e825ad
1 Parent(s): 9e976f5

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 7.11,
3
+ "eval_cer": 4.285298281325233,
4
+ "eval_loss": 0.2575719356536865,
5
+ "eval_runtime": 3544.648,
6
+ "eval_samples_per_second": 2.861,
7
+ "eval_steps_per_second": 0.179,
8
+ "eval_wer": 16.632698616044568,
9
+ "train_loss": 0.04895092924162745,
10
+ "train_runtime": 68546.6851,
11
+ "train_samples_per_second": 2.334,
12
+ "train_steps_per_second": 0.073
13
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 7.11,
3
+ "eval_cer": 4.285298281325233,
4
+ "eval_loss": 0.2575719356536865,
5
+ "eval_runtime": 3544.648,
6
+ "eval_samples_per_second": 2.861,
7
+ "eval_steps_per_second": 0.179,
8
+ "eval_wer": 16.632698616044568
9
+ }
runs/Jan22_10-09-53_192-9-144-123/events.out.tfevents.1674454367.192-9-144-123.69595.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b743b2f23343d6b7eeeee69c3560fdf113c5cb3937e4debf4ac429bc625916f6
3
+ size 405
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 7.11,
3
+ "train_loss": 0.04895092924162745,
4
+ "train_runtime": 68546.6851,
5
+ "train_samples_per_second": 2.334,
6
+ "train_steps_per_second": 0.073
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,745 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 16.632698616044568,
3
+ "best_model_checkpoint": "./checkpoint-4000",
4
+ "epoch": 7.1138,
5
+ "global_step": 5000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.01,
12
+ "learning_rate": 9.200000000000001e-07,
13
+ "loss": 1.1412,
14
+ "step": 50
15
+ },
16
+ {
17
+ "epoch": 0.02,
18
+ "learning_rate": 1.9200000000000003e-06,
19
+ "loss": 0.5256,
20
+ "step": 100
21
+ },
22
+ {
23
+ "epoch": 0.03,
24
+ "learning_rate": 2.92e-06,
25
+ "loss": 0.1965,
26
+ "step": 150
27
+ },
28
+ {
29
+ "epoch": 0.04,
30
+ "learning_rate": 3.920000000000001e-06,
31
+ "loss": 0.1692,
32
+ "step": 200
33
+ },
34
+ {
35
+ "epoch": 0.05,
36
+ "learning_rate": 4.92e-06,
37
+ "loss": 0.1573,
38
+ "step": 250
39
+ },
40
+ {
41
+ "epoch": 0.06,
42
+ "learning_rate": 5.92e-06,
43
+ "loss": 0.1267,
44
+ "step": 300
45
+ },
46
+ {
47
+ "epoch": 0.07,
48
+ "learning_rate": 6.92e-06,
49
+ "loss": 0.1248,
50
+ "step": 350
51
+ },
52
+ {
53
+ "epoch": 0.08,
54
+ "learning_rate": 7.92e-06,
55
+ "loss": 0.1412,
56
+ "step": 400
57
+ },
58
+ {
59
+ "epoch": 0.08,
60
+ "eval_cer": 5.2393487430695105,
61
+ "eval_loss": 0.2656303644180298,
62
+ "eval_runtime": 3483.4243,
63
+ "eval_samples_per_second": 2.912,
64
+ "eval_steps_per_second": 0.182,
65
+ "eval_wer": 19.8334595003785,
66
+ "step": 400
67
+ },
68
+ {
69
+ "epoch": 0.09,
70
+ "learning_rate": 8.920000000000001e-06,
71
+ "loss": 0.1807,
72
+ "step": 450
73
+ },
74
+ {
75
+ "epoch": 0.1,
76
+ "learning_rate": 9.920000000000002e-06,
77
+ "loss": 0.1606,
78
+ "step": 500
79
+ },
80
+ {
81
+ "epoch": 0.11,
82
+ "learning_rate": 9.89777777777778e-06,
83
+ "loss": 0.1767,
84
+ "step": 550
85
+ },
86
+ {
87
+ "epoch": 0.12,
88
+ "learning_rate": 9.786666666666667e-06,
89
+ "loss": 0.1367,
90
+ "step": 600
91
+ },
92
+ {
93
+ "epoch": 1.0,
94
+ "learning_rate": 9.675555555555555e-06,
95
+ "loss": 0.1498,
96
+ "step": 650
97
+ },
98
+ {
99
+ "epoch": 1.01,
100
+ "learning_rate": 9.564444444444445e-06,
101
+ "loss": 0.1635,
102
+ "step": 700
103
+ },
104
+ {
105
+ "epoch": 1.02,
106
+ "learning_rate": 9.453333333333335e-06,
107
+ "loss": 0.1518,
108
+ "step": 750
109
+ },
110
+ {
111
+ "epoch": 1.03,
112
+ "learning_rate": 9.342222222222223e-06,
113
+ "loss": 0.0851,
114
+ "step": 800
115
+ },
116
+ {
117
+ "epoch": 1.03,
118
+ "eval_cer": 4.891634975507227,
119
+ "eval_loss": 0.23817911744117737,
120
+ "eval_runtime": 3529.8171,
121
+ "eval_samples_per_second": 2.874,
122
+ "eval_steps_per_second": 0.18,
123
+ "eval_wer": 18.63001999262408,
124
+ "step": 800
125
+ },
126
+ {
127
+ "epoch": 1.04,
128
+ "learning_rate": 9.231111111111111e-06,
129
+ "loss": 0.0817,
130
+ "step": 850
131
+ },
132
+ {
133
+ "epoch": 1.05,
134
+ "learning_rate": 9.12e-06,
135
+ "loss": 0.0717,
136
+ "step": 900
137
+ },
138
+ {
139
+ "epoch": 1.06,
140
+ "learning_rate": 9.008888888888889e-06,
141
+ "loss": 0.0514,
142
+ "step": 950
143
+ },
144
+ {
145
+ "epoch": 1.07,
146
+ "learning_rate": 8.897777777777779e-06,
147
+ "loss": 0.0455,
148
+ "step": 1000
149
+ },
150
+ {
151
+ "epoch": 1.08,
152
+ "learning_rate": 8.786666666666668e-06,
153
+ "loss": 0.0604,
154
+ "step": 1050
155
+ },
156
+ {
157
+ "epoch": 1.09,
158
+ "learning_rate": 8.675555555555556e-06,
159
+ "loss": 0.0633,
160
+ "step": 1100
161
+ },
162
+ {
163
+ "epoch": 1.1,
164
+ "learning_rate": 8.564444444444445e-06,
165
+ "loss": 0.0533,
166
+ "step": 1150
167
+ },
168
+ {
169
+ "epoch": 1.11,
170
+ "learning_rate": 8.453333333333334e-06,
171
+ "loss": 0.0525,
172
+ "step": 1200
173
+ },
174
+ {
175
+ "epoch": 1.11,
176
+ "eval_cer": 5.223779469895081,
177
+ "eval_loss": 0.253223180770874,
178
+ "eval_runtime": 3540.1304,
179
+ "eval_samples_per_second": 2.865,
180
+ "eval_steps_per_second": 0.179,
181
+ "eval_wer": 19.169626739649452,
182
+ "step": 1200
183
+ },
184
+ {
185
+ "epoch": 1.12,
186
+ "learning_rate": 8.342222222222222e-06,
187
+ "loss": 0.0427,
188
+ "step": 1250
189
+ },
190
+ {
191
+ "epoch": 2.01,
192
+ "learning_rate": 8.231111111111112e-06,
193
+ "loss": 0.0513,
194
+ "step": 1300
195
+ },
196
+ {
197
+ "epoch": 2.02,
198
+ "learning_rate": 8.120000000000002e-06,
199
+ "loss": 0.058,
200
+ "step": 1350
201
+ },
202
+ {
203
+ "epoch": 2.03,
204
+ "learning_rate": 8.00888888888889e-06,
205
+ "loss": 0.0368,
206
+ "step": 1400
207
+ },
208
+ {
209
+ "epoch": 2.04,
210
+ "learning_rate": 7.897777777777778e-06,
211
+ "loss": 0.0268,
212
+ "step": 1450
213
+ },
214
+ {
215
+ "epoch": 2.05,
216
+ "learning_rate": 7.786666666666666e-06,
217
+ "loss": 0.0265,
218
+ "step": 1500
219
+ },
220
+ {
221
+ "epoch": 2.06,
222
+ "learning_rate": 7.675555555555556e-06,
223
+ "loss": 0.0202,
224
+ "step": 1550
225
+ },
226
+ {
227
+ "epoch": 2.07,
228
+ "learning_rate": 7.564444444444446e-06,
229
+ "loss": 0.0163,
230
+ "step": 1600
231
+ },
232
+ {
233
+ "epoch": 2.07,
234
+ "eval_cer": 4.583997670375421,
235
+ "eval_loss": 0.24465897679328918,
236
+ "eval_runtime": 3488.6047,
237
+ "eval_samples_per_second": 2.907,
238
+ "eval_steps_per_second": 0.182,
239
+ "eval_wer": 17.201420835031737,
240
+ "step": 1600
241
+ },
242
+ {
243
+ "epoch": 2.08,
244
+ "learning_rate": 7.453333333333334e-06,
245
+ "loss": 0.0171,
246
+ "step": 1650
247
+ },
248
+ {
249
+ "epoch": 2.09,
250
+ "learning_rate": 7.342222222222223e-06,
251
+ "loss": 0.0244,
252
+ "step": 1700
253
+ },
254
+ {
255
+ "epoch": 2.1,
256
+ "learning_rate": 7.231111111111112e-06,
257
+ "loss": 0.0202,
258
+ "step": 1750
259
+ },
260
+ {
261
+ "epoch": 2.11,
262
+ "learning_rate": 7.1200000000000004e-06,
263
+ "loss": 0.0171,
264
+ "step": 1800
265
+ },
266
+ {
267
+ "epoch": 2.12,
268
+ "learning_rate": 7.008888888888889e-06,
269
+ "loss": 0.0175,
270
+ "step": 1850
271
+ },
272
+ {
273
+ "epoch": 3.0,
274
+ "learning_rate": 6.897777777777779e-06,
275
+ "loss": 0.0135,
276
+ "step": 1900
277
+ },
278
+ {
279
+ "epoch": 3.01,
280
+ "learning_rate": 6.786666666666667e-06,
281
+ "loss": 0.0219,
282
+ "step": 1950
283
+ },
284
+ {
285
+ "epoch": 3.02,
286
+ "learning_rate": 6.675555555555556e-06,
287
+ "loss": 0.0202,
288
+ "step": 2000
289
+ },
290
+ {
291
+ "epoch": 3.02,
292
+ "eval_cer": 4.493465230064843,
293
+ "eval_loss": 0.2471683919429779,
294
+ "eval_runtime": 3536.1026,
295
+ "eval_samples_per_second": 2.868,
296
+ "eval_steps_per_second": 0.179,
297
+ "eval_wer": 17.10631029328985,
298
+ "step": 2000
299
+ },
300
+ {
301
+ "epoch": 3.03,
302
+ "learning_rate": 6.564444444444446e-06,
303
+ "loss": 0.0114,
304
+ "step": 2050
305
+ },
306
+ {
307
+ "epoch": 3.04,
308
+ "learning_rate": 6.453333333333334e-06,
309
+ "loss": 0.01,
310
+ "step": 2100
311
+ },
312
+ {
313
+ "epoch": 3.05,
314
+ "learning_rate": 6.342222222222223e-06,
315
+ "loss": 0.0101,
316
+ "step": 2150
317
+ },
318
+ {
319
+ "epoch": 3.06,
320
+ "learning_rate": 6.231111111111111e-06,
321
+ "loss": 0.008,
322
+ "step": 2200
323
+ },
324
+ {
325
+ "epoch": 3.07,
326
+ "learning_rate": 6.120000000000001e-06,
327
+ "loss": 0.005,
328
+ "step": 2250
329
+ },
330
+ {
331
+ "epoch": 3.08,
332
+ "learning_rate": 6.00888888888889e-06,
333
+ "loss": 0.0083,
334
+ "step": 2300
335
+ },
336
+ {
337
+ "epoch": 3.09,
338
+ "learning_rate": 5.897777777777778e-06,
339
+ "loss": 0.0096,
340
+ "step": 2350
341
+ },
342
+ {
343
+ "epoch": 3.1,
344
+ "learning_rate": 5.7866666666666674e-06,
345
+ "loss": 0.0075,
346
+ "step": 2400
347
+ },
348
+ {
349
+ "epoch": 3.1,
350
+ "eval_cer": 4.4317647771143225,
351
+ "eval_loss": 0.25027787685394287,
352
+ "eval_runtime": 3586.7167,
353
+ "eval_samples_per_second": 2.828,
354
+ "eval_steps_per_second": 0.177,
355
+ "eval_wer": 17.01508181447621,
356
+ "step": 2400
357
+ },
358
+ {
359
+ "epoch": 3.11,
360
+ "learning_rate": 5.675555555555556e-06,
361
+ "loss": 0.0068,
362
+ "step": 2450
363
+ },
364
+ {
365
+ "epoch": 3.12,
366
+ "learning_rate": 5.5644444444444444e-06,
367
+ "loss": 0.0051,
368
+ "step": 2500
369
+ },
370
+ {
371
+ "epoch": 4.0,
372
+ "learning_rate": 5.453333333333334e-06,
373
+ "loss": 0.0072,
374
+ "step": 2550
375
+ },
376
+ {
377
+ "epoch": 4.01,
378
+ "learning_rate": 5.342222222222223e-06,
379
+ "loss": 0.0077,
380
+ "step": 2600
381
+ },
382
+ {
383
+ "epoch": 4.02,
384
+ "learning_rate": 5.231111111111111e-06,
385
+ "loss": 0.0062,
386
+ "step": 2650
387
+ },
388
+ {
389
+ "epoch": 4.03,
390
+ "learning_rate": 5.12e-06,
391
+ "loss": 0.0053,
392
+ "step": 2700
393
+ },
394
+ {
395
+ "epoch": 4.04,
396
+ "learning_rate": 5.00888888888889e-06,
397
+ "loss": 0.0041,
398
+ "step": 2750
399
+ },
400
+ {
401
+ "epoch": 4.05,
402
+ "learning_rate": 4.897777777777778e-06,
403
+ "loss": 0.0039,
404
+ "step": 2800
405
+ },
406
+ {
407
+ "epoch": 4.05,
408
+ "eval_cer": 4.365451206186191,
409
+ "eval_loss": 0.2513594627380371,
410
+ "eval_runtime": 3531.9344,
411
+ "eval_samples_per_second": 2.872,
412
+ "eval_steps_per_second": 0.18,
413
+ "eval_wer": 16.74333740949941,
414
+ "step": 2800
415
+ },
416
+ {
417
+ "epoch": 4.06,
418
+ "learning_rate": 4.786666666666667e-06,
419
+ "loss": 0.0033,
420
+ "step": 2850
421
+ },
422
+ {
423
+ "epoch": 4.07,
424
+ "learning_rate": 4.675555555555556e-06,
425
+ "loss": 0.0026,
426
+ "step": 2900
427
+ },
428
+ {
429
+ "epoch": 4.08,
430
+ "learning_rate": 4.564444444444445e-06,
431
+ "loss": 0.0035,
432
+ "step": 2950
433
+ },
434
+ {
435
+ "epoch": 4.09,
436
+ "learning_rate": 4.453333333333334e-06,
437
+ "loss": 0.0033,
438
+ "step": 3000
439
+ },
440
+ {
441
+ "epoch": 4.1,
442
+ "learning_rate": 4.3422222222222225e-06,
443
+ "loss": 0.0028,
444
+ "step": 3050
445
+ },
446
+ {
447
+ "epoch": 4.11,
448
+ "learning_rate": 4.2311111111111114e-06,
449
+ "loss": 0.0032,
450
+ "step": 3100
451
+ },
452
+ {
453
+ "epoch": 4.12,
454
+ "learning_rate": 4.12e-06,
455
+ "loss": 0.0023,
456
+ "step": 3150
457
+ },
458
+ {
459
+ "epoch": 5.01,
460
+ "learning_rate": 4.008888888888889e-06,
461
+ "loss": 0.0038,
462
+ "step": 3200
463
+ },
464
+ {
465
+ "epoch": 5.01,
466
+ "eval_cer": 4.358243209346177,
467
+ "eval_loss": 0.2565072774887085,
468
+ "eval_runtime": 3521.7324,
469
+ "eval_samples_per_second": 2.88,
470
+ "eval_steps_per_second": 0.18,
471
+ "eval_wer": 16.88697373784429,
472
+ "step": 3200
473
+ },
474
+ {
475
+ "epoch": 5.02,
476
+ "learning_rate": 3.897777777777778e-06,
477
+ "loss": 0.0037,
478
+ "step": 3250
479
+ },
480
+ {
481
+ "epoch": 5.03,
482
+ "learning_rate": 3.7866666666666667e-06,
483
+ "loss": 0.0025,
484
+ "step": 3300
485
+ },
486
+ {
487
+ "epoch": 5.04,
488
+ "learning_rate": 3.675555555555556e-06,
489
+ "loss": 0.002,
490
+ "step": 3350
491
+ },
492
+ {
493
+ "epoch": 5.05,
494
+ "learning_rate": 3.564444444444445e-06,
495
+ "loss": 0.0023,
496
+ "step": 3400
497
+ },
498
+ {
499
+ "epoch": 5.06,
500
+ "learning_rate": 3.4533333333333334e-06,
501
+ "loss": 0.0029,
502
+ "step": 3450
503
+ },
504
+ {
505
+ "epoch": 5.07,
506
+ "learning_rate": 3.3422222222222224e-06,
507
+ "loss": 0.0016,
508
+ "step": 3500
509
+ },
510
+ {
511
+ "epoch": 5.08,
512
+ "learning_rate": 3.2311111111111117e-06,
513
+ "loss": 0.0017,
514
+ "step": 3550
515
+ },
516
+ {
517
+ "epoch": 5.09,
518
+ "learning_rate": 3.12e-06,
519
+ "loss": 0.0023,
520
+ "step": 3600
521
+ },
522
+ {
523
+ "epoch": 5.09,
524
+ "eval_cer": 4.333736020090129,
525
+ "eval_loss": 0.2590169310569763,
526
+ "eval_runtime": 3522.239,
527
+ "eval_samples_per_second": 2.88,
528
+ "eval_steps_per_second": 0.18,
529
+ "eval_wer": 16.698693685824647,
530
+ "step": 3600
531
+ },
532
+ {
533
+ "epoch": 5.1,
534
+ "learning_rate": 3.008888888888889e-06,
535
+ "loss": 0.0018,
536
+ "step": 3650
537
+ },
538
+ {
539
+ "epoch": 5.11,
540
+ "learning_rate": 2.8977777777777785e-06,
541
+ "loss": 0.0017,
542
+ "step": 3700
543
+ },
544
+ {
545
+ "epoch": 5.12,
546
+ "learning_rate": 2.786666666666667e-06,
547
+ "loss": 0.0016,
548
+ "step": 3750
549
+ },
550
+ {
551
+ "epoch": 6.0,
552
+ "learning_rate": 2.675555555555556e-06,
553
+ "loss": 0.0014,
554
+ "step": 3800
555
+ },
556
+ {
557
+ "epoch": 6.01,
558
+ "learning_rate": 2.5644444444444444e-06,
559
+ "loss": 0.0024,
560
+ "step": 3850
561
+ },
562
+ {
563
+ "epoch": 6.02,
564
+ "learning_rate": 2.4533333333333333e-06,
565
+ "loss": 0.002,
566
+ "step": 3900
567
+ },
568
+ {
569
+ "epoch": 6.03,
570
+ "learning_rate": 2.342222222222222e-06,
571
+ "loss": 0.0014,
572
+ "step": 3950
573
+ },
574
+ {
575
+ "epoch": 6.04,
576
+ "learning_rate": 2.2311111111111115e-06,
577
+ "loss": 0.0013,
578
+ "step": 4000
579
+ },
580
+ {
581
+ "epoch": 6.04,
582
+ "eval_cer": 4.285298281325233,
583
+ "eval_loss": 0.2575719356536865,
584
+ "eval_runtime": 3539.1562,
585
+ "eval_samples_per_second": 2.866,
586
+ "eval_steps_per_second": 0.179,
587
+ "eval_wer": 16.632698616044568,
588
+ "step": 4000
589
+ },
590
+ {
591
+ "epoch": 6.05,
592
+ "learning_rate": 2.12e-06,
593
+ "loss": 0.0013,
594
+ "step": 4050
595
+ },
596
+ {
597
+ "epoch": 6.06,
598
+ "learning_rate": 2.008888888888889e-06,
599
+ "loss": 0.0011,
600
+ "step": 4100
601
+ },
602
+ {
603
+ "epoch": 6.07,
604
+ "learning_rate": 1.8977777777777779e-06,
605
+ "loss": 0.0011,
606
+ "step": 4150
607
+ },
608
+ {
609
+ "epoch": 6.08,
610
+ "learning_rate": 1.7866666666666668e-06,
611
+ "loss": 0.0013,
612
+ "step": 4200
613
+ },
614
+ {
615
+ "epoch": 6.09,
616
+ "learning_rate": 1.675555555555556e-06,
617
+ "loss": 0.0016,
618
+ "step": 4250
619
+ },
620
+ {
621
+ "epoch": 6.1,
622
+ "learning_rate": 1.5644444444444446e-06,
623
+ "loss": 0.0013,
624
+ "step": 4300
625
+ },
626
+ {
627
+ "epoch": 6.11,
628
+ "learning_rate": 1.4533333333333335e-06,
629
+ "loss": 0.0014,
630
+ "step": 4350
631
+ },
632
+ {
633
+ "epoch": 6.12,
634
+ "learning_rate": 1.3422222222222222e-06,
635
+ "loss": 0.0011,
636
+ "step": 4400
637
+ },
638
+ {
639
+ "epoch": 6.12,
640
+ "eval_cer": 4.355648330483771,
641
+ "eval_loss": 0.26468709111213684,
642
+ "eval_runtime": 3546.9734,
643
+ "eval_samples_per_second": 2.86,
644
+ "eval_steps_per_second": 0.179,
645
+ "eval_wer": 16.91220714687785,
646
+ "step": 4400
647
+ },
648
+ {
649
+ "epoch": 7.0,
650
+ "learning_rate": 1.2311111111111112e-06,
651
+ "loss": 0.0013,
652
+ "step": 4450
653
+ },
654
+ {
655
+ "epoch": 7.01,
656
+ "learning_rate": 1.12e-06,
657
+ "loss": 0.0016,
658
+ "step": 4500
659
+ },
660
+ {
661
+ "epoch": 7.02,
662
+ "learning_rate": 1.008888888888889e-06,
663
+ "loss": 0.0013,
664
+ "step": 4550
665
+ },
666
+ {
667
+ "epoch": 7.03,
668
+ "learning_rate": 8.977777777777778e-07,
669
+ "loss": 0.0011,
670
+ "step": 4600
671
+ },
672
+ {
673
+ "epoch": 7.04,
674
+ "learning_rate": 7.866666666666667e-07,
675
+ "loss": 0.0011,
676
+ "step": 4650
677
+ },
678
+ {
679
+ "epoch": 7.05,
680
+ "learning_rate": 6.755555555555555e-07,
681
+ "loss": 0.001,
682
+ "step": 4700
683
+ },
684
+ {
685
+ "epoch": 7.06,
686
+ "learning_rate": 5.644444444444445e-07,
687
+ "loss": 0.001,
688
+ "step": 4750
689
+ },
690
+ {
691
+ "epoch": 7.07,
692
+ "learning_rate": 4.533333333333334e-07,
693
+ "loss": 0.001,
694
+ "step": 4800
695
+ },
696
+ {
697
+ "epoch": 7.07,
698
+ "eval_cer": 4.28385668195723,
699
+ "eval_loss": 0.26154619455337524,
700
+ "eval_runtime": 3542.9557,
701
+ "eval_samples_per_second": 2.863,
702
+ "eval_steps_per_second": 0.179,
703
+ "eval_wer": 16.634639647508685,
704
+ "step": 4800
705
+ },
706
+ {
707
+ "epoch": 7.08,
708
+ "learning_rate": 3.422222222222223e-07,
709
+ "loss": 0.0012,
710
+ "step": 4850
711
+ },
712
+ {
713
+ "epoch": 7.09,
714
+ "learning_rate": 2.3111111111111112e-07,
715
+ "loss": 0.0011,
716
+ "step": 4900
717
+ },
718
+ {
719
+ "epoch": 7.1,
720
+ "learning_rate": 1.2000000000000002e-07,
721
+ "loss": 0.0012,
722
+ "step": 4950
723
+ },
724
+ {
725
+ "epoch": 7.11,
726
+ "learning_rate": 8.88888888888889e-09,
727
+ "loss": 0.0012,
728
+ "step": 5000
729
+ },
730
+ {
731
+ "epoch": 7.11,
732
+ "step": 5000,
733
+ "total_flos": 4.616962385559552e+19,
734
+ "train_loss": 0.04895092924162745,
735
+ "train_runtime": 68546.6851,
736
+ "train_samples_per_second": 2.334,
737
+ "train_steps_per_second": 0.073
738
+ }
739
+ ],
740
+ "max_steps": 5000,
741
+ "num_train_epochs": 9223372036854775807,
742
+ "total_flos": 4.616962385559552e+19,
743
+ "trial_name": null,
744
+ "trial_params": null
745
+ }