ylacombe HF staff commited on
Commit
9c450af
1 Parent(s): 7956637

Training in progress, step 15200

Browse files
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 15.0,
3
+ "train_loss": 3.3253096312547563,
4
+ "train_runtime": 44962.3834,
5
+ "train_samples": 43798,
6
+ "train_samples_per_second": 14.612,
7
+ "train_steps_per_second": 0.365
8
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20040514504bc5ee5bd18d13fba1b134a10318ea7b533415256064528379cd74
3
  size 2322279712
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7672da5bd09cf427bc66613581a71b449bbc210ad010a9367fe7492cdb60f19
3
  size 2322279712
runs/Jan04_20-37-14_vorace/events.out.tfevents.1704401907.vorace.1537045.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec8999353a5574d02e7f7952f14ecafa0a34a87085e23cdb71a11f482a95dcc5
3
- size 25941
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83336ed20a20bcf3e49a097cf65553b0523e13c87e33668c7a97a761d0625edb
3
+ size 26259
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 15.0,
3
+ "train_loss": 3.3253096312547563,
4
+ "train_runtime": 44962.3834,
5
+ "train_samples": 43798,
6
+ "train_samples_per_second": 14.612,
7
+ "train_steps_per_second": 0.365
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,708 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 15.0,
5
+ "eval_steps": 300,
6
+ "global_step": 16425,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.27,
13
+ "eval_loss": 7.266334533691406,
14
+ "eval_runtime": 325.0132,
15
+ "eval_samples_per_second": 33.952,
16
+ "eval_steps_per_second": 1.061,
17
+ "eval_wer": 1.0,
18
+ "step": 300
19
+ },
20
+ {
21
+ "epoch": 0.46,
22
+ "learning_rate": 7.14420628442998e-05,
23
+ "loss": 10.5256,
24
+ "step": 500
25
+ },
26
+ {
27
+ "epoch": 0.55,
28
+ "eval_loss": 3.0892837047576904,
29
+ "eval_runtime": 316.7833,
30
+ "eval_samples_per_second": 34.835,
31
+ "eval_steps_per_second": 1.089,
32
+ "eval_wer": 1.0,
33
+ "step": 600
34
+ },
35
+ {
36
+ "epoch": 0.82,
37
+ "eval_loss": 3.061225652694702,
38
+ "eval_runtime": 483.1327,
39
+ "eval_samples_per_second": 22.841,
40
+ "eval_steps_per_second": 0.714,
41
+ "eval_wer": 1.0,
42
+ "step": 900
43
+ },
44
+ {
45
+ "epoch": 0.91,
46
+ "learning_rate": 0.0001434602713566988,
47
+ "loss": 2.9795,
48
+ "step": 1000
49
+ },
50
+ {
51
+ "epoch": 1.1,
52
+ "eval_loss": 2.9936766624450684,
53
+ "eval_runtime": 320.3828,
54
+ "eval_samples_per_second": 34.443,
55
+ "eval_steps_per_second": 1.077,
56
+ "eval_wer": 1.0,
57
+ "step": 1200
58
+ },
59
+ {
60
+ "epoch": 1.37,
61
+ "learning_rate": 0.0002154784798690978,
62
+ "loss": 2.9564,
63
+ "step": 1500
64
+ },
65
+ {
66
+ "epoch": 1.37,
67
+ "eval_loss": 3.242413282394409,
68
+ "eval_runtime": 328.1378,
69
+ "eval_samples_per_second": 33.629,
70
+ "eval_steps_per_second": 1.051,
71
+ "eval_wer": 1.0,
72
+ "step": 1500
73
+ },
74
+ {
75
+ "epoch": 1.64,
76
+ "eval_loss": 3.2866387367248535,
77
+ "eval_runtime": 325.8189,
78
+ "eval_samples_per_second": 33.869,
79
+ "eval_steps_per_second": 1.059,
80
+ "eval_wer": 1.0,
81
+ "step": 1800
82
+ },
83
+ {
84
+ "epoch": 1.83,
85
+ "learning_rate": 0.0002874966883814968,
86
+ "loss": 3.1552,
87
+ "step": 2000
88
+ },
89
+ {
90
+ "epoch": 1.92,
91
+ "eval_loss": 3.6338589191436768,
92
+ "eval_runtime": 322.0052,
93
+ "eval_samples_per_second": 34.27,
94
+ "eval_steps_per_second": 1.071,
95
+ "eval_wer": 1.0,
96
+ "step": 2100
97
+ },
98
+ {
99
+ "epoch": 2.19,
100
+ "eval_loss": 3.1184866428375244,
101
+ "eval_runtime": 316.2742,
102
+ "eval_samples_per_second": 34.891,
103
+ "eval_steps_per_second": 1.091,
104
+ "eval_wer": 1.0,
105
+ "step": 2400
106
+ },
107
+ {
108
+ "epoch": 2.28,
109
+ "learning_rate": 0.00035951489689389575,
110
+ "loss": 3.2079,
111
+ "step": 2500
112
+ },
113
+ {
114
+ "epoch": 2.47,
115
+ "eval_loss": 3.183176040649414,
116
+ "eval_runtime": 316.7437,
117
+ "eval_samples_per_second": 34.839,
118
+ "eval_steps_per_second": 1.089,
119
+ "eval_wer": 1.0,
120
+ "step": 2700
121
+ },
122
+ {
123
+ "epoch": 2.74,
124
+ "learning_rate": 0.00043153310540629475,
125
+ "loss": 3.1275,
126
+ "step": 3000
127
+ },
128
+ {
129
+ "epoch": 2.74,
130
+ "eval_loss": 3.3952367305755615,
131
+ "eval_runtime": 319.4373,
132
+ "eval_samples_per_second": 34.545,
133
+ "eval_steps_per_second": 1.08,
134
+ "eval_wer": 1.0,
135
+ "step": 3000
136
+ },
137
+ {
138
+ "epoch": 3.01,
139
+ "eval_loss": 3.2981579303741455,
140
+ "eval_runtime": 327.568,
141
+ "eval_samples_per_second": 33.688,
142
+ "eval_steps_per_second": 1.053,
143
+ "eval_wer": 1.0,
144
+ "step": 3300
145
+ },
146
+ {
147
+ "epoch": 3.2,
148
+ "learning_rate": 0.0005035513139186939,
149
+ "loss": 3.0987,
150
+ "step": 3500
151
+ },
152
+ {
153
+ "epoch": 3.29,
154
+ "eval_loss": 3.103595733642578,
155
+ "eval_runtime": 327.3342,
156
+ "eval_samples_per_second": 33.712,
157
+ "eval_steps_per_second": 1.054,
158
+ "eval_wer": 1.0,
159
+ "step": 3600
160
+ },
161
+ {
162
+ "epoch": 3.56,
163
+ "eval_loss": 3.1222941875457764,
164
+ "eval_runtime": 312.8357,
165
+ "eval_samples_per_second": 35.274,
166
+ "eval_steps_per_second": 1.103,
167
+ "eval_wer": 1.0,
168
+ "step": 3900
169
+ },
170
+ {
171
+ "epoch": 3.65,
172
+ "learning_rate": 0.0005755695224310928,
173
+ "loss": 2.9301,
174
+ "step": 4000
175
+ },
176
+ {
177
+ "epoch": 3.84,
178
+ "eval_loss": 3.114525556564331,
179
+ "eval_runtime": 308.5965,
180
+ "eval_samples_per_second": 35.759,
181
+ "eval_steps_per_second": 1.118,
182
+ "eval_wer": 1.0,
183
+ "step": 4200
184
+ },
185
+ {
186
+ "epoch": 4.11,
187
+ "learning_rate": 0.0006475877309434917,
188
+ "loss": 2.9197,
189
+ "step": 4500
190
+ },
191
+ {
192
+ "epoch": 4.11,
193
+ "eval_loss": 3.0324432849884033,
194
+ "eval_runtime": 308.9122,
195
+ "eval_samples_per_second": 35.722,
196
+ "eval_steps_per_second": 1.117,
197
+ "eval_wer": 1.0,
198
+ "step": 4500
199
+ },
200
+ {
201
+ "epoch": 4.38,
202
+ "eval_loss": 2.999401807785034,
203
+ "eval_runtime": 308.612,
204
+ "eval_samples_per_second": 35.757,
205
+ "eval_steps_per_second": 1.118,
206
+ "eval_wer": 1.9598550067965563,
207
+ "step": 4800
208
+ },
209
+ {
210
+ "epoch": 4.57,
211
+ "learning_rate": 0.0007196059394558908,
212
+ "loss": 2.9023,
213
+ "step": 5000
214
+ },
215
+ {
216
+ "epoch": 4.66,
217
+ "eval_loss": 2.991722822189331,
218
+ "eval_runtime": 309.103,
219
+ "eval_samples_per_second": 35.7,
220
+ "eval_steps_per_second": 1.116,
221
+ "eval_wer": 1.8240144993203444,
222
+ "step": 5100
223
+ },
224
+ {
225
+ "epoch": 4.93,
226
+ "eval_loss": 2.9946165084838867,
227
+ "eval_runtime": 308.4148,
228
+ "eval_samples_per_second": 35.78,
229
+ "eval_steps_per_second": 1.119,
230
+ "eval_wer": 1.958948799275034,
231
+ "step": 5400
232
+ },
233
+ {
234
+ "epoch": 5.02,
235
+ "learning_rate": 0.0006889164121067909,
236
+ "loss": 2.9007,
237
+ "step": 5500
238
+ },
239
+ {
240
+ "epoch": 5.21,
241
+ "eval_loss": 3.195502519607544,
242
+ "eval_runtime": 308.3861,
243
+ "eval_samples_per_second": 35.783,
244
+ "eval_steps_per_second": 1.119,
245
+ "eval_wer": 1.0,
246
+ "step": 5700
247
+ },
248
+ {
249
+ "epoch": 5.48,
250
+ "learning_rate": 0.0006574616321963427,
251
+ "loss": 3.1887,
252
+ "step": 6000
253
+ },
254
+ {
255
+ "epoch": 5.48,
256
+ "eval_loss": 3.1901698112487793,
257
+ "eval_runtime": 308.4713,
258
+ "eval_samples_per_second": 35.773,
259
+ "eval_steps_per_second": 1.118,
260
+ "eval_wer": 1.0,
261
+ "step": 6000
262
+ },
263
+ {
264
+ "epoch": 5.75,
265
+ "eval_loss": 3.167245864868164,
266
+ "eval_runtime": 308.5576,
267
+ "eval_samples_per_second": 35.763,
268
+ "eval_steps_per_second": 1.118,
269
+ "eval_wer": 1.0,
270
+ "step": 6300
271
+ },
272
+ {
273
+ "epoch": 5.94,
274
+ "learning_rate": 0.0006259438166548115,
275
+ "loss": 3.135,
276
+ "step": 6500
277
+ },
278
+ {
279
+ "epoch": 6.03,
280
+ "eval_loss": 3.2076234817504883,
281
+ "eval_runtime": 308.784,
282
+ "eval_samples_per_second": 35.737,
283
+ "eval_steps_per_second": 1.117,
284
+ "eval_wer": 1.0,
285
+ "step": 6600
286
+ },
287
+ {
288
+ "epoch": 6.3,
289
+ "eval_loss": 3.212040424346924,
290
+ "eval_runtime": 309.3257,
291
+ "eval_samples_per_second": 35.674,
292
+ "eval_steps_per_second": 1.115,
293
+ "eval_wer": 1.0,
294
+ "step": 6900
295
+ },
296
+ {
297
+ "epoch": 6.39,
298
+ "learning_rate": 0.0005944260011132802,
299
+ "loss": 3.1482,
300
+ "step": 7000
301
+ },
302
+ {
303
+ "epoch": 6.58,
304
+ "eval_loss": 3.1832025051116943,
305
+ "eval_runtime": 308.2925,
306
+ "eval_samples_per_second": 35.794,
307
+ "eval_steps_per_second": 1.119,
308
+ "eval_wer": 1.0,
309
+ "step": 7200
310
+ },
311
+ {
312
+ "epoch": 6.85,
313
+ "learning_rate": 0.0005629081855717488,
314
+ "loss": 3.1546,
315
+ "step": 7500
316
+ },
317
+ {
318
+ "epoch": 6.85,
319
+ "eval_loss": 3.1799111366271973,
320
+ "eval_runtime": 308.7059,
321
+ "eval_samples_per_second": 35.746,
322
+ "eval_steps_per_second": 1.118,
323
+ "eval_wer": 1.0,
324
+ "step": 7500
325
+ },
326
+ {
327
+ "epoch": 7.12,
328
+ "eval_loss": 3.2451581954956055,
329
+ "eval_runtime": 307.3881,
330
+ "eval_samples_per_second": 35.899,
331
+ "eval_steps_per_second": 1.122,
332
+ "eval_wer": 1.0,
333
+ "step": 7800
334
+ },
335
+ {
336
+ "epoch": 7.31,
337
+ "learning_rate": 0.0005313903700302176,
338
+ "loss": 3.1567,
339
+ "step": 8000
340
+ },
341
+ {
342
+ "epoch": 7.4,
343
+ "eval_loss": 3.2318718433380127,
344
+ "eval_runtime": 308.2983,
345
+ "eval_samples_per_second": 35.793,
346
+ "eval_steps_per_second": 1.119,
347
+ "eval_wer": 1.0,
348
+ "step": 8100
349
+ },
350
+ {
351
+ "epoch": 7.67,
352
+ "eval_loss": 3.222830057144165,
353
+ "eval_runtime": 308.3145,
354
+ "eval_samples_per_second": 35.791,
355
+ "eval_steps_per_second": 1.119,
356
+ "eval_wer": 1.0,
357
+ "step": 8400
358
+ },
359
+ {
360
+ "epoch": 7.76,
361
+ "learning_rate": 0.0004998725544886862,
362
+ "loss": 3.1719,
363
+ "step": 8500
364
+ },
365
+ {
366
+ "epoch": 7.95,
367
+ "eval_loss": 3.2054970264434814,
368
+ "eval_runtime": 308.0159,
369
+ "eval_samples_per_second": 35.826,
370
+ "eval_steps_per_second": 1.12,
371
+ "eval_wer": 1.0,
372
+ "step": 8700
373
+ },
374
+ {
375
+ "epoch": 8.22,
376
+ "learning_rate": 0.00046835473894715497,
377
+ "loss": 3.168,
378
+ "step": 9000
379
+ },
380
+ {
381
+ "epoch": 8.22,
382
+ "eval_loss": 3.2552778720855713,
383
+ "eval_runtime": 303.2884,
384
+ "eval_samples_per_second": 36.385,
385
+ "eval_steps_per_second": 1.138,
386
+ "eval_wer": 1.0,
387
+ "step": 9000
388
+ },
389
+ {
390
+ "epoch": 8.49,
391
+ "eval_loss": 3.197523593902588,
392
+ "eval_runtime": 305.1147,
393
+ "eval_samples_per_second": 36.167,
394
+ "eval_steps_per_second": 1.131,
395
+ "eval_wer": 1.0,
396
+ "step": 9300
397
+ },
398
+ {
399
+ "epoch": 8.68,
400
+ "learning_rate": 0.0004368369234056237,
401
+ "loss": 3.1643,
402
+ "step": 9500
403
+ },
404
+ {
405
+ "epoch": 8.77,
406
+ "eval_loss": 3.2445874214172363,
407
+ "eval_runtime": 303.4296,
408
+ "eval_samples_per_second": 36.368,
409
+ "eval_steps_per_second": 1.137,
410
+ "eval_wer": 1.0,
411
+ "step": 9600
412
+ },
413
+ {
414
+ "epoch": 9.04,
415
+ "eval_loss": 3.2781076431274414,
416
+ "eval_runtime": 305.7237,
417
+ "eval_samples_per_second": 36.095,
418
+ "eval_steps_per_second": 1.128,
419
+ "eval_wer": 1.0,
420
+ "step": 9900
421
+ },
422
+ {
423
+ "epoch": 9.13,
424
+ "learning_rate": 0.0004053191078640924,
425
+ "loss": 3.169,
426
+ "step": 10000
427
+ },
428
+ {
429
+ "epoch": 9.32,
430
+ "eval_loss": 3.2596964836120605,
431
+ "eval_runtime": 306.6385,
432
+ "eval_samples_per_second": 35.987,
433
+ "eval_steps_per_second": 1.125,
434
+ "eval_wer": 1.0,
435
+ "step": 10200
436
+ },
437
+ {
438
+ "epoch": 9.59,
439
+ "learning_rate": 0.00037380129232256106,
440
+ "loss": 3.1789,
441
+ "step": 10500
442
+ },
443
+ {
444
+ "epoch": 9.59,
445
+ "eval_loss": 3.2585501670837402,
446
+ "eval_runtime": 307.6539,
447
+ "eval_samples_per_second": 35.868,
448
+ "eval_steps_per_second": 1.121,
449
+ "eval_wer": 1.0,
450
+ "step": 10500
451
+ },
452
+ {
453
+ "epoch": 9.86,
454
+ "eval_loss": 3.2689764499664307,
455
+ "eval_runtime": 307.738,
456
+ "eval_samples_per_second": 35.858,
457
+ "eval_steps_per_second": 1.121,
458
+ "eval_wer": 1.0,
459
+ "step": 10800
460
+ },
461
+ {
462
+ "epoch": 10.05,
463
+ "learning_rate": 0.0003422834767810298,
464
+ "loss": 3.1701,
465
+ "step": 11000
466
+ },
467
+ {
468
+ "epoch": 10.14,
469
+ "eval_loss": 3.273723602294922,
470
+ "eval_runtime": 308.5346,
471
+ "eval_samples_per_second": 35.766,
472
+ "eval_steps_per_second": 1.118,
473
+ "eval_wer": 1.0,
474
+ "step": 11100
475
+ },
476
+ {
477
+ "epoch": 10.41,
478
+ "eval_loss": 3.273848533630371,
479
+ "eval_runtime": 308.8614,
480
+ "eval_samples_per_second": 35.728,
481
+ "eval_steps_per_second": 1.117,
482
+ "eval_wer": 1.0,
483
+ "step": 11400
484
+ },
485
+ {
486
+ "epoch": 10.5,
487
+ "learning_rate": 0.00031076566123949855,
488
+ "loss": 3.1698,
489
+ "step": 11500
490
+ },
491
+ {
492
+ "epoch": 10.68,
493
+ "eval_loss": 3.2595293521881104,
494
+ "eval_runtime": 308.7628,
495
+ "eval_samples_per_second": 35.739,
496
+ "eval_steps_per_second": 1.117,
497
+ "eval_wer": 1.0,
498
+ "step": 11700
499
+ },
500
+ {
501
+ "epoch": 10.96,
502
+ "learning_rate": 0.00027924784569796727,
503
+ "loss": 3.1595,
504
+ "step": 12000
505
+ },
506
+ {
507
+ "epoch": 10.96,
508
+ "eval_loss": 3.2467362880706787,
509
+ "eval_runtime": 308.3094,
510
+ "eval_samples_per_second": 35.792,
511
+ "eval_steps_per_second": 1.119,
512
+ "eval_wer": 1.0,
513
+ "step": 12000
514
+ },
515
+ {
516
+ "epoch": 11.23,
517
+ "eval_loss": 3.252420663833618,
518
+ "eval_runtime": 309.0904,
519
+ "eval_samples_per_second": 35.702,
520
+ "eval_steps_per_second": 1.116,
521
+ "eval_wer": 1.0,
522
+ "step": 12300
523
+ },
524
+ {
525
+ "epoch": 11.42,
526
+ "learning_rate": 0.00024773003015643593,
527
+ "loss": 3.15,
528
+ "step": 12500
529
+ },
530
+ {
531
+ "epoch": 11.51,
532
+ "eval_loss": 3.2327377796173096,
533
+ "eval_runtime": 308.9397,
534
+ "eval_samples_per_second": 35.719,
535
+ "eval_steps_per_second": 1.117,
536
+ "eval_wer": 1.0,
537
+ "step": 12600
538
+ },
539
+ {
540
+ "epoch": 11.78,
541
+ "eval_loss": 3.219557046890259,
542
+ "eval_runtime": 309.2594,
543
+ "eval_samples_per_second": 35.682,
544
+ "eval_steps_per_second": 1.116,
545
+ "eval_wer": 1.0,
546
+ "step": 12900
547
+ },
548
+ {
549
+ "epoch": 11.87,
550
+ "learning_rate": 0.00021621221461490465,
551
+ "loss": 3.1444,
552
+ "step": 13000
553
+ },
554
+ {
555
+ "epoch": 12.05,
556
+ "eval_loss": 3.1942968368530273,
557
+ "eval_runtime": 309.871,
558
+ "eval_samples_per_second": 35.612,
559
+ "eval_steps_per_second": 1.113,
560
+ "eval_wer": 1.0,
561
+ "step": 13200
562
+ },
563
+ {
564
+ "epoch": 12.33,
565
+ "learning_rate": 0.00018469439907337336,
566
+ "loss": 3.132,
567
+ "step": 13500
568
+ },
569
+ {
570
+ "epoch": 12.33,
571
+ "eval_loss": 3.191138744354248,
572
+ "eval_runtime": 309.3206,
573
+ "eval_samples_per_second": 35.675,
574
+ "eval_steps_per_second": 1.115,
575
+ "eval_wer": 1.0,
576
+ "step": 13500
577
+ },
578
+ {
579
+ "epoch": 12.6,
580
+ "eval_loss": 3.207465648651123,
581
+ "eval_runtime": 309.5517,
582
+ "eval_samples_per_second": 35.648,
583
+ "eval_steps_per_second": 1.115,
584
+ "eval_wer": 1.0,
585
+ "step": 13800
586
+ },
587
+ {
588
+ "epoch": 12.79,
589
+ "learning_rate": 0.00015323961916292511,
590
+ "loss": 3.1153,
591
+ "step": 14000
592
+ },
593
+ {
594
+ "epoch": 12.88,
595
+ "eval_loss": 3.1938300132751465,
596
+ "eval_runtime": 310.0376,
597
+ "eval_samples_per_second": 35.592,
598
+ "eval_steps_per_second": 1.113,
599
+ "eval_wer": 1.0,
600
+ "step": 14100
601
+ },
602
+ {
603
+ "epoch": 13.15,
604
+ "eval_loss": 3.1638731956481934,
605
+ "eval_runtime": 308.9592,
606
+ "eval_samples_per_second": 35.717,
607
+ "eval_steps_per_second": 1.117,
608
+ "eval_wer": 1.0,
609
+ "step": 14400
610
+ },
611
+ {
612
+ "epoch": 13.24,
613
+ "learning_rate": 0.00012172180362139385,
614
+ "loss": 3.1039,
615
+ "step": 14500
616
+ },
617
+ {
618
+ "epoch": 13.42,
619
+ "eval_loss": 3.15146803855896,
620
+ "eval_runtime": 308.6922,
621
+ "eval_samples_per_second": 35.748,
622
+ "eval_steps_per_second": 1.118,
623
+ "eval_wer": 1.0,
624
+ "step": 14700
625
+ },
626
+ {
627
+ "epoch": 13.7,
628
+ "learning_rate": 9.020398807986256e-05,
629
+ "loss": 3.0839,
630
+ "step": 15000
631
+ },
632
+ {
633
+ "epoch": 13.7,
634
+ "eval_loss": 3.153453826904297,
635
+ "eval_runtime": 309.1197,
636
+ "eval_samples_per_second": 35.698,
637
+ "eval_steps_per_second": 1.116,
638
+ "eval_wer": 1.0,
639
+ "step": 15000
640
+ },
641
+ {
642
+ "epoch": 13.97,
643
+ "eval_loss": 3.130723237991333,
644
+ "eval_runtime": 309.6167,
645
+ "eval_samples_per_second": 35.641,
646
+ "eval_steps_per_second": 1.114,
647
+ "eval_wer": 1.0,
648
+ "step": 15300
649
+ },
650
+ {
651
+ "epoch": 14.16,
652
+ "learning_rate": 5.8686172538331265e-05,
653
+ "loss": 3.0632,
654
+ "step": 15500
655
+ },
656
+ {
657
+ "epoch": 14.25,
658
+ "eval_loss": 3.1138317584991455,
659
+ "eval_runtime": 309.4562,
660
+ "eval_samples_per_second": 35.659,
661
+ "eval_steps_per_second": 1.115,
662
+ "eval_wer": 1.0,
663
+ "step": 15600
664
+ },
665
+ {
666
+ "epoch": 14.52,
667
+ "eval_loss": 3.128912925720215,
668
+ "eval_runtime": 309.4874,
669
+ "eval_samples_per_second": 35.656,
670
+ "eval_steps_per_second": 1.115,
671
+ "eval_wer": 1.0,
672
+ "step": 15900
673
+ },
674
+ {
675
+ "epoch": 14.61,
676
+ "learning_rate": 2.7168356996799972e-05,
677
+ "loss": 3.0518,
678
+ "step": 16000
679
+ },
680
+ {
681
+ "epoch": 14.79,
682
+ "eval_loss": 3.081491708755493,
683
+ "eval_runtime": 308.8218,
684
+ "eval_samples_per_second": 35.733,
685
+ "eval_steps_per_second": 1.117,
686
+ "eval_wer": 1.0,
687
+ "step": 16200
688
+ },
689
+ {
690
+ "epoch": 15.0,
691
+ "step": 16425,
692
+ "total_flos": 6.442470243808035e+19,
693
+ "train_loss": 3.3253096312547563,
694
+ "train_runtime": 44962.3834,
695
+ "train_samples_per_second": 14.612,
696
+ "train_steps_per_second": 0.365
697
+ }
698
+ ],
699
+ "logging_steps": 500,
700
+ "max_steps": 16425,
701
+ "num_input_tokens_seen": 0,
702
+ "num_train_epochs": 15,
703
+ "save_steps": 400,
704
+ "total_flos": 6.442470243808035e+19,
705
+ "train_batch_size": 20,
706
+ "trial_name": null,
707
+ "trial_params": null
708
+ }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44e050caa7de36d64b4d49269e32fc9881c0d2b2decc3757c23eaa931743a065
3
  size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f3c53562aa99723a8907e8c35881be65f8d75203d3001a36991a6ebba0d8857
3
  size 4728