DewiBrynJones commited on
Commit
e069dc3
1 Parent(s): ff51815

End of training

Browse files
README.md CHANGED
@@ -2,6 +2,8 @@
2
  license: apache-2.0
3
  base_model: facebook/wav2vec2-large-xlsr-53
4
  tags:
 
 
5
  - generated_from_trainer
6
  metrics:
7
  - wer
@@ -15,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # wav2vec2-xlsr-53-ft-btb-ccv-cy
17
 
18
- This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: inf
21
  - Wer: 0.5238
 
2
  license: apache-2.0
3
  base_model: facebook/wav2vec2-large-xlsr-53
4
  tags:
5
+ - automatic-speech-recognition
6
+ - DewiBrynJones/banc-trawsgrifiadau-bangor-clean-with-ccv
7
  - generated_from_trainer
8
  metrics:
9
  - wer
 
17
 
18
  # wav2vec2-xlsr-53-ft-btb-ccv-cy
19
 
20
+ This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on the DEWIBRYNJONES/BANC-TRAWSGRIFIADAU-BANGOR-CLEAN-WITH-CCV - DEFAULT dataset.
21
  It achieves the following results on the evaluation set:
22
  - Loss: inf
23
  - Wer: 0.5238
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 2.3166023166023164,
3
  "eval_loss": Infinity,
4
- "eval_runtime": 182.873,
5
  "eval_samples": 7022,
6
- "eval_samples_per_second": 38.398,
7
- "eval_steps_per_second": 4.801,
8
- "eval_wer": 0.3289055094594419,
9
- "total_flos": 1.261526897313927e+19,
10
- "train_loss": 0.8721037826538086,
11
- "train_runtime": 9008.7975,
12
  "train_samples": 41435,
13
- "train_samples_per_second": 10.656,
14
- "train_steps_per_second": 0.666
15
  }
 
1
  {
2
+ "epoch": 4.633204633204633,
3
  "eval_loss": Infinity,
4
+ "eval_runtime": 188.2056,
5
  "eval_samples": 7022,
6
+ "eval_samples_per_second": 37.31,
7
+ "eval_steps_per_second": 4.665,
8
+ "eval_wer": 0.5237557295521535,
9
+ "total_flos": 2.5308493485736165e+19,
10
+ "train_loss": 0.9557839482625325,
11
+ "train_runtime": 12299.0724,
12
  "train_samples": 41435,
13
+ "train_samples_per_second": 15.611,
14
+ "train_steps_per_second": 0.488
15
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 2.3166023166023164,
3
  "eval_loss": Infinity,
4
- "eval_runtime": 182.873,
5
  "eval_samples": 7022,
6
- "eval_samples_per_second": 38.398,
7
- "eval_steps_per_second": 4.801,
8
- "eval_wer": 0.3289055094594419
9
  }
 
1
  {
2
+ "epoch": 4.633204633204633,
3
  "eval_loss": Infinity,
4
+ "eval_runtime": 188.2056,
5
  "eval_samples": 7022,
6
+ "eval_samples_per_second": 37.31,
7
+ "eval_steps_per_second": 4.665,
8
+ "eval_wer": 0.5237557295521535
9
  }
runs/Aug12_16-36-55_940cfb8ec62d/events.out.tfevents.1723489570.940cfb8ec62d.1888.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e21ca93c58a036acf1a23d94dade2f3d05b93b4ea1e19a6c5ffa78196b71981d
3
+ size 406
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 2.3166023166023164,
3
- "total_flos": 1.261526897313927e+19,
4
- "train_loss": 0.8721037826538086,
5
- "train_runtime": 9008.7975,
6
  "train_samples": 41435,
7
- "train_samples_per_second": 10.656,
8
- "train_steps_per_second": 0.666
9
  }
 
1
  {
2
+ "epoch": 4.633204633204633,
3
+ "total_flos": 2.5308493485736165e+19,
4
+ "train_loss": 0.9557839482625325,
5
+ "train_runtime": 12299.0724,
6
  "train_samples": 41435,
7
+ "train_samples_per_second": 15.611,
8
+ "train_steps_per_second": 0.488
9
  }
trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.3166023166023164,
5
  "eval_steps": 200,
6
  "global_step": 6000,
7
  "is_hyper_param_search": false,
@@ -9,373 +9,373 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.07722007722007722,
13
  "eval_loss": Infinity,
14
- "eval_runtime": 199.2963,
15
- "eval_samples_per_second": 35.234,
16
- "eval_steps_per_second": 4.406,
17
  "eval_wer": 1.0,
18
  "step": 200
19
  },
20
  {
21
- "epoch": 0.15444015444015444,
22
  "eval_loss": Infinity,
23
- "eval_runtime": 187.0234,
24
- "eval_samples_per_second": 37.546,
25
- "eval_steps_per_second": 4.695,
26
- "eval_wer": 0.9937672860426162,
27
  "step": 400
28
  },
29
  {
30
- "epoch": 0.19305019305019305,
31
- "grad_norm": 4.210392951965332,
32
  "learning_rate": 0.00029699999999999996,
33
- "loss": 3.9317,
34
  "step": 500
35
  },
36
  {
37
- "epoch": 0.23166023166023167,
38
  "eval_loss": Infinity,
39
- "eval_runtime": 180.1705,
40
- "eval_samples_per_second": 38.974,
41
- "eval_steps_per_second": 4.873,
42
- "eval_wer": 0.7576253359822368,
43
  "step": 600
44
  },
45
  {
46
- "epoch": 0.3088803088803089,
47
  "eval_loss": Infinity,
48
- "eval_runtime": 179.0087,
49
- "eval_samples_per_second": 39.227,
50
- "eval_steps_per_second": 4.905,
51
- "eval_wer": 0.6941685170036228,
52
  "step": 800
53
  },
54
  {
55
- "epoch": 0.3861003861003861,
56
- "grad_norm": 3.149083137512207,
57
- "learning_rate": 0.0002730545454545454,
58
- "loss": 0.9699,
59
  "step": 1000
60
  },
61
  {
62
- "epoch": 0.3861003861003861,
63
  "eval_loss": Infinity,
64
- "eval_runtime": 179.6063,
65
- "eval_samples_per_second": 39.097,
66
- "eval_steps_per_second": 4.888,
67
- "eval_wer": 0.5762923142846013,
68
  "step": 1000
69
  },
70
  {
71
- "epoch": 0.46332046332046334,
72
  "eval_loss": Infinity,
73
- "eval_runtime": 180.175,
74
- "eval_samples_per_second": 38.973,
75
- "eval_steps_per_second": 4.873,
76
- "eval_wer": 0.5518548816433589,
77
  "step": 1200
78
  },
79
  {
80
- "epoch": 0.5405405405405406,
81
  "eval_loss": Infinity,
82
- "eval_runtime": 180.7382,
83
- "eval_samples_per_second": 38.852,
84
- "eval_steps_per_second": 4.858,
85
- "eval_wer": 0.5173931673873242,
86
  "step": 1400
87
  },
88
  {
89
- "epoch": 0.5791505791505791,
90
- "grad_norm": 4.50093412399292,
91
  "learning_rate": 0.0002458363636363636,
92
- "loss": 0.8031,
93
  "step": 1500
94
  },
95
  {
96
- "epoch": 0.6177606177606177,
97
  "eval_loss": Infinity,
98
- "eval_runtime": 180.2438,
99
- "eval_samples_per_second": 38.958,
100
- "eval_steps_per_second": 4.871,
101
- "eval_wer": 0.5338059808084349,
102
  "step": 1600
103
  },
104
  {
105
- "epoch": 0.694980694980695,
106
  "eval_loss": Infinity,
107
- "eval_runtime": 180.7932,
108
- "eval_samples_per_second": 38.84,
109
- "eval_steps_per_second": 4.856,
110
- "eval_wer": 0.47772454001272513,
111
  "step": 1800
112
  },
113
  {
114
- "epoch": 0.7722007722007722,
115
- "grad_norm": 3.7705626487731934,
116
  "learning_rate": 0.0002186181818181818,
117
- "loss": 0.7169,
118
  "step": 2000
119
  },
120
  {
121
- "epoch": 0.7722007722007722,
122
  "eval_loss": Infinity,
123
- "eval_runtime": 179.9877,
124
- "eval_samples_per_second": 39.014,
125
- "eval_steps_per_second": 4.878,
126
- "eval_wer": 0.45044343162842637,
127
  "step": 2000
128
  },
129
  {
130
- "epoch": 0.8494208494208494,
131
  "eval_loss": Infinity,
132
- "eval_runtime": 182.9186,
133
- "eval_samples_per_second": 38.389,
134
- "eval_steps_per_second": 4.8,
135
- "eval_wer": 0.4499500084401335,
136
  "step": 2200
137
  },
138
  {
139
- "epoch": 0.9266409266409267,
140
  "eval_loss": Infinity,
141
- "eval_runtime": 180.2372,
142
- "eval_samples_per_second": 38.96,
143
- "eval_steps_per_second": 4.871,
144
- "eval_wer": 0.4431719320114786,
145
  "step": 2400
146
  },
147
  {
148
- "epoch": 0.9652509652509652,
149
- "grad_norm": 3.5582635402679443,
150
- "learning_rate": 0.00019134545454545454,
151
- "loss": 0.6687,
152
  "step": 2500
153
  },
154
  {
155
- "epoch": 1.0038610038610039,
156
  "eval_loss": Infinity,
157
- "eval_runtime": 180.3998,
158
- "eval_samples_per_second": 38.925,
159
- "eval_steps_per_second": 4.867,
160
- "eval_wer": 0.4175658655032267,
161
  "step": 2600
162
  },
163
  {
164
- "epoch": 1.0810810810810811,
165
  "eval_loss": Infinity,
166
- "eval_runtime": 180.1365,
167
- "eval_samples_per_second": 38.982,
168
- "eval_steps_per_second": 4.874,
169
- "eval_wer": 0.40537311882409466,
170
  "step": 2800
171
  },
172
  {
173
- "epoch": 1.1583011583011582,
174
- "grad_norm": 0.5179678201675415,
175
- "learning_rate": 0.0001641272727272727,
176
- "loss": 0.5609,
177
  "step": 3000
178
  },
179
  {
180
- "epoch": 1.1583011583011582,
181
  "eval_loss": Infinity,
182
- "eval_runtime": 180.8398,
183
- "eval_samples_per_second": 38.83,
184
- "eval_steps_per_second": 4.855,
185
- "eval_wer": 0.4009193253087141,
186
  "step": 3000
187
  },
188
  {
189
- "epoch": 1.2355212355212355,
190
  "eval_loss": Infinity,
191
- "eval_runtime": 181.6098,
192
- "eval_samples_per_second": 38.665,
193
- "eval_steps_per_second": 4.835,
194
- "eval_wer": 0.4022567618454027,
195
  "step": 3200
196
  },
197
  {
198
- "epoch": 1.3127413127413128,
199
  "eval_loss": Infinity,
200
- "eval_runtime": 182.1259,
201
- "eval_samples_per_second": 38.556,
202
- "eval_steps_per_second": 4.821,
203
- "eval_wer": 0.39188189007050755,
204
  "step": 3400
205
  },
206
  {
207
- "epoch": 1.3513513513513513,
208
- "grad_norm": 0.5364826321601868,
209
- "learning_rate": 0.00013696363636363636,
210
- "loss": 0.5324,
211
  "step": 3500
212
  },
213
  {
214
- "epoch": 1.3899613899613898,
215
  "eval_loss": Infinity,
216
- "eval_runtime": 181.3225,
217
- "eval_samples_per_second": 38.727,
218
- "eval_steps_per_second": 4.842,
219
- "eval_wer": 0.3794813862594627,
220
  "step": 3600
221
  },
222
  {
223
- "epoch": 1.4671814671814671,
224
  "eval_loss": Infinity,
225
- "eval_runtime": 181.5845,
226
- "eval_samples_per_second": 38.671,
227
- "eval_steps_per_second": 4.835,
228
- "eval_wer": 0.37523534987599494,
229
  "step": 3800
230
  },
231
  {
232
- "epoch": 1.5444015444015444,
233
- "grad_norm": 1.197091817855835,
234
- "learning_rate": 0.00010974545454545454,
235
- "loss": 0.5196,
236
  "step": 4000
237
  },
238
  {
239
- "epoch": 1.5444015444015444,
240
  "eval_loss": Infinity,
241
- "eval_runtime": 181.3235,
242
- "eval_samples_per_second": 38.726,
243
- "eval_steps_per_second": 4.842,
244
- "eval_wer": 0.36617194499629935,
245
  "step": 4000
246
  },
247
  {
248
- "epoch": 1.6216216216216215,
249
  "eval_loss": Infinity,
250
- "eval_runtime": 181.9653,
251
- "eval_samples_per_second": 38.59,
252
- "eval_steps_per_second": 4.825,
253
- "eval_wer": 0.37034007245529976,
254
  "step": 4200
255
  },
256
  {
257
- "epoch": 1.698841698841699,
258
  "eval_loss": Infinity,
259
- "eval_runtime": 181.5241,
260
- "eval_samples_per_second": 38.684,
261
- "eval_steps_per_second": 4.837,
262
- "eval_wer": 0.3613935309623051,
263
  "step": 4400
264
  },
265
  {
266
- "epoch": 1.7374517374517375,
267
- "grad_norm": 0.7301501631736755,
268
- "learning_rate": 8.247272727272728e-05,
269
- "loss": 0.4967,
270
  "step": 4500
271
  },
272
  {
273
- "epoch": 1.776061776061776,
274
  "eval_loss": Infinity,
275
- "eval_runtime": 181.6896,
276
- "eval_samples_per_second": 38.648,
277
- "eval_steps_per_second": 4.832,
278
- "eval_wer": 0.3530313064028151,
279
  "step": 4600
280
  },
281
  {
282
- "epoch": 1.8532818532818531,
283
  "eval_loss": Infinity,
284
- "eval_runtime": 184.0459,
285
- "eval_samples_per_second": 38.154,
286
- "eval_steps_per_second": 4.771,
287
- "eval_wer": 0.34805812005765263,
288
  "step": 4800
289
  },
290
  {
291
- "epoch": 1.9305019305019306,
292
- "grad_norm": 1.2918003797531128,
293
- "learning_rate": 5.519999999999999e-05,
294
- "loss": 0.4735,
295
  "step": 5000
296
  },
297
  {
298
- "epoch": 1.9305019305019306,
299
  "eval_loss": Infinity,
300
- "eval_runtime": 182.1737,
301
- "eval_samples_per_second": 38.546,
302
- "eval_steps_per_second": 4.82,
303
- "eval_wer": 0.35057717528209525,
304
  "step": 5000
305
  },
306
  {
307
- "epoch": 2.0077220077220077,
308
  "eval_loss": Infinity,
309
- "eval_runtime": 182.5384,
310
- "eval_samples_per_second": 38.469,
311
- "eval_steps_per_second": 4.81,
312
- "eval_wer": 0.3432277667406801,
313
  "step": 5200
314
  },
315
  {
316
- "epoch": 2.0849420849420848,
317
  "eval_loss": Infinity,
318
- "eval_runtime": 182.9254,
319
- "eval_samples_per_second": 38.387,
320
- "eval_steps_per_second": 4.8,
321
- "eval_wer": 0.33689117421733994,
322
  "step": 5400
323
  },
324
  {
325
- "epoch": 2.1235521235521237,
326
- "grad_norm": 0.6086732745170593,
327
- "learning_rate": 2.7927272727272724e-05,
328
- "loss": 0.4244,
329
  "step": 5500
330
  },
331
  {
332
- "epoch": 2.1621621621621623,
333
  "eval_loss": Infinity,
334
- "eval_runtime": 182.4452,
335
- "eval_samples_per_second": 38.488,
336
- "eval_steps_per_second": 4.812,
337
- "eval_wer": 0.32959370495890306,
338
  "step": 5600
339
  },
340
  {
341
- "epoch": 2.2393822393822393,
342
  "eval_loss": Infinity,
343
- "eval_runtime": 182.1862,
344
- "eval_samples_per_second": 38.543,
345
- "eval_steps_per_second": 4.819,
346
- "eval_wer": 0.32954176567592486,
347
  "step": 5800
348
  },
349
  {
350
- "epoch": 2.3166023166023164,
351
- "grad_norm": 1.0216798782348633,
352
- "learning_rate": 6.545454545454546e-07,
353
- "loss": 0.3674,
354
  "step": 6000
355
  },
356
  {
357
- "epoch": 2.3166023166023164,
358
  "eval_loss": Infinity,
359
- "eval_runtime": 182.1846,
360
- "eval_samples_per_second": 38.543,
361
- "eval_steps_per_second": 4.819,
362
- "eval_wer": 0.3289055094594419,
363
  "step": 6000
364
  },
365
  {
366
- "epoch": 2.3166023166023164,
367
  "step": 6000,
368
- "total_flos": 1.261526897313927e+19,
369
- "train_loss": 0.8721037826538086,
370
- "train_runtime": 9008.7975,
371
- "train_samples_per_second": 10.656,
372
- "train_steps_per_second": 0.666
373
  }
374
  ],
375
  "logging_steps": 500,
376
  "max_steps": 6000,
377
  "num_input_tokens_seen": 0,
378
- "num_train_epochs": 3,
379
  "save_steps": 500,
380
  "stateful_callbacks": {
381
  "TrainerControl": {
@@ -389,8 +389,8 @@
389
  "attributes": {}
390
  }
391
  },
392
- "total_flos": 1.261526897313927e+19,
393
- "train_batch_size": 16,
394
  "trial_name": null,
395
  "trial_params": null
396
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.633204633204633,
5
  "eval_steps": 200,
6
  "global_step": 6000,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.15444015444015444,
13
  "eval_loss": Infinity,
14
+ "eval_runtime": 188.4417,
15
+ "eval_samples_per_second": 37.264,
16
+ "eval_steps_per_second": 4.659,
17
  "eval_wer": 1.0,
18
  "step": 200
19
  },
20
  {
21
+ "epoch": 0.3088803088803089,
22
  "eval_loss": Infinity,
23
+ "eval_runtime": 187.0753,
24
+ "eval_samples_per_second": 37.536,
25
+ "eval_steps_per_second": 4.693,
26
+ "eval_wer": 0.8660615740199706,
27
  "step": 400
28
  },
29
  {
30
+ "epoch": 0.3861003861003861,
31
+ "grad_norm": 2.7789804935455322,
32
  "learning_rate": 0.00029699999999999996,
33
+ "loss": 3.7305,
34
  "step": 500
35
  },
36
  {
37
+ "epoch": 0.46332046332046334,
38
  "eval_loss": Infinity,
39
+ "eval_runtime": 186.9186,
40
+ "eval_samples_per_second": 37.567,
41
+ "eval_steps_per_second": 4.697,
42
+ "eval_wer": 0.7040110111279914,
43
  "step": 600
44
  },
45
  {
46
+ "epoch": 0.6177606177606177,
47
  "eval_loss": Infinity,
48
+ "eval_runtime": 187.3522,
49
+ "eval_samples_per_second": 37.48,
50
+ "eval_steps_per_second": 4.686,
51
+ "eval_wer": 0.5505953540311376,
52
  "step": 800
53
  },
54
  {
55
+ "epoch": 0.7722007722007722,
56
+ "grad_norm": 11.684309959411621,
57
+ "learning_rate": 0.0002731090909090909,
58
+ "loss": 0.8464,
59
  "step": 1000
60
  },
61
  {
62
+ "epoch": 0.7722007722007722,
63
  "eval_loss": Infinity,
64
+ "eval_runtime": 187.5603,
65
+ "eval_samples_per_second": 37.439,
66
+ "eval_steps_per_second": 4.681,
67
+ "eval_wer": 0.5168088504538195,
68
  "step": 1000
69
  },
70
  {
71
+ "epoch": 0.9266409266409267,
72
  "eval_loss": Infinity,
73
+ "eval_runtime": 187.4249,
74
+ "eval_samples_per_second": 37.466,
75
+ "eval_steps_per_second": 4.685,
76
+ "eval_wer": 0.4824769844052303,
77
  "step": 1200
78
  },
79
  {
80
+ "epoch": 1.0810810810810811,
81
  "eval_loss": Infinity,
82
+ "eval_runtime": 188.7764,
83
+ "eval_samples_per_second": 37.197,
84
+ "eval_steps_per_second": 4.651,
85
+ "eval_wer": 0.46011712308311586,
86
  "step": 1400
87
  },
88
  {
89
+ "epoch": 1.1583011583011582,
90
+ "grad_norm": 0.6005635261535645,
91
  "learning_rate": 0.0002458363636363636,
92
+ "loss": 0.6629,
93
  "step": 1500
94
  },
95
  {
96
+ "epoch": 1.2355212355212355,
97
  "eval_loss": Infinity,
98
+ "eval_runtime": 188.0077,
99
+ "eval_samples_per_second": 37.35,
100
+ "eval_steps_per_second": 4.67,
101
+ "eval_wer": 0.4445483230104008,
102
  "step": 1600
103
  },
104
  {
105
+ "epoch": 1.3899613899613898,
106
  "eval_loss": Infinity,
107
+ "eval_runtime": 188.204,
108
+ "eval_samples_per_second": 37.311,
109
+ "eval_steps_per_second": 4.665,
110
+ "eval_wer": 0.41425473621336656,
111
  "step": 1800
112
  },
113
  {
114
+ "epoch": 1.5444015444015444,
115
+ "grad_norm": 0.4440418481826782,
116
  "learning_rate": 0.0002186181818181818,
117
+ "loss": 0.5655,
118
  "step": 2000
119
  },
120
  {
121
+ "epoch": 1.5444015444015444,
122
  "eval_loss": Infinity,
123
+ "eval_runtime": 188.2865,
124
+ "eval_samples_per_second": 37.294,
125
+ "eval_steps_per_second": 4.663,
126
+ "eval_wer": 0.41700751821121107,
127
  "step": 2000
128
  },
129
  {
130
+ "epoch": 1.698841698841699,
131
  "eval_loss": Infinity,
132
+ "eval_runtime": 189.1618,
133
+ "eval_samples_per_second": 37.122,
134
+ "eval_steps_per_second": 4.642,
135
+ "eval_wer": 0.4047108929661226,
136
  "step": 2200
137
  },
138
  {
139
+ "epoch": 1.8532818532818531,
140
  "eval_loss": Infinity,
141
+ "eval_runtime": 188.2544,
142
+ "eval_samples_per_second": 37.301,
143
+ "eval_steps_per_second": 4.664,
144
+ "eval_wer": 0.3966213496422682,
145
  "step": 2400
146
  },
147
  {
148
+ "epoch": 1.9305019305019306,
149
+ "grad_norm": 0.9740249514579773,
150
+ "learning_rate": 0.00019150909090909088,
151
+ "loss": 0.5524,
152
  "step": 2500
153
  },
154
  {
155
+ "epoch": 2.0077220077220077,
156
  "eval_loss": Infinity,
157
+ "eval_runtime": 189.1265,
158
+ "eval_samples_per_second": 37.129,
159
+ "eval_steps_per_second": 4.642,
160
+ "eval_wer": 0.37794917741160583,
161
  "step": 2600
162
  },
163
  {
164
+ "epoch": 2.1621621621621623,
165
  "eval_loss": Infinity,
166
+ "eval_runtime": 188.7855,
167
+ "eval_samples_per_second": 37.196,
168
+ "eval_steps_per_second": 4.651,
169
+ "eval_wer": 0.37366418656590444,
170
  "step": 2800
171
  },
172
  {
173
+ "epoch": 2.3166023166023164,
174
+ "grad_norm": 0.5873022675514221,
175
+ "learning_rate": 0.00016429090909090907,
176
+ "loss": 0.4773,
177
  "step": 3000
178
  },
179
  {
180
+ "epoch": 2.3166023166023164,
181
  "eval_loss": Infinity,
182
+ "eval_runtime": 189.7378,
183
+ "eval_samples_per_second": 37.009,
184
+ "eval_steps_per_second": 4.627,
185
+ "eval_wer": 0.3698336644462623,
186
  "step": 3000
187
  },
188
  {
189
+ "epoch": 2.471042471042471,
190
  "eval_loss": Infinity,
191
+ "eval_runtime": 189.8812,
192
+ "eval_samples_per_second": 36.981,
193
+ "eval_steps_per_second": 4.624,
194
+ "eval_wer": 0.37235271967070493,
195
  "step": 3200
196
  },
197
  {
198
+ "epoch": 2.6254826254826256,
199
  "eval_loss": Infinity,
200
+ "eval_runtime": 189.4769,
201
+ "eval_samples_per_second": 37.06,
202
+ "eval_steps_per_second": 4.634,
203
+ "eval_wer": 0.3583940373703141,
204
  "step": 3400
205
  },
206
  {
207
+ "epoch": 2.7027027027027026,
208
+ "grad_norm": 1.4725390672683716,
209
+ "learning_rate": 0.00013712727272727272,
210
+ "loss": 0.4694,
211
  "step": 3500
212
  },
213
  {
214
+ "epoch": 2.7799227799227797,
215
  "eval_loss": Infinity,
216
+ "eval_runtime": 189.6126,
217
+ "eval_samples_per_second": 37.033,
218
+ "eval_steps_per_second": 4.63,
219
+ "eval_wer": 0.3820783504083726,
220
  "step": 3600
221
  },
222
  {
223
+ "epoch": 2.9343629343629343,
224
  "eval_loss": Infinity,
225
+ "eval_runtime": 190.0131,
226
+ "eval_samples_per_second": 36.955,
227
+ "eval_steps_per_second": 4.621,
228
+ "eval_wer": 0.4729850804409645,
229
  "step": 3800
230
  },
231
  {
232
+ "epoch": 3.088803088803089,
233
+ "grad_norm": 14.039852142333984,
234
+ "learning_rate": 0.0001099090909090909,
235
+ "loss": 0.6537,
236
  "step": 4000
237
  },
238
  {
239
+ "epoch": 3.088803088803089,
240
  "eval_loss": Infinity,
241
+ "eval_runtime": 189.7145,
242
+ "eval_samples_per_second": 37.014,
243
+ "eval_steps_per_second": 4.628,
244
+ "eval_wer": 0.4753613026372171,
245
  "step": 4000
246
  },
247
  {
248
+ "epoch": 3.2432432432432434,
249
  "eval_loss": Infinity,
250
+ "eval_runtime": 189.6267,
251
+ "eval_samples_per_second": 37.031,
252
+ "eval_steps_per_second": 4.63,
253
+ "eval_wer": 0.5899263760663784,
254
  "step": 4200
255
  },
256
  {
257
+ "epoch": 3.3976833976833976,
258
  "eval_loss": Infinity,
259
+ "eval_runtime": 189.2072,
260
+ "eval_samples_per_second": 37.113,
261
+ "eval_steps_per_second": 4.64,
262
+ "eval_wer": 0.5957565605806812,
263
  "step": 4400
264
  },
265
  {
266
+ "epoch": 3.474903474903475,
267
+ "grad_norm": 15.850507736206055,
268
+ "learning_rate": 8.269090909090907e-05,
269
+ "loss": 0.8238,
270
  "step": 4500
271
  },
272
  {
273
+ "epoch": 3.552123552123552,
274
  "eval_loss": Infinity,
275
+ "eval_runtime": 188.9868,
276
+ "eval_samples_per_second": 37.156,
277
+ "eval_steps_per_second": 4.646,
278
+ "eval_wer": 0.633646267513277,
279
  "step": 4600
280
  },
281
  {
282
+ "epoch": 3.7065637065637067,
283
  "eval_loss": Infinity,
284
+ "eval_runtime": 190.0461,
285
+ "eval_samples_per_second": 36.949,
286
+ "eval_steps_per_second": 4.62,
287
+ "eval_wer": 0.6025865762923143,
288
  "step": 4800
289
  },
290
  {
291
+ "epoch": 3.861003861003861,
292
+ "grad_norm": 6.711777687072754,
293
+ "learning_rate": 5.547272727272727e-05,
294
+ "loss": 0.8682,
295
  "step": 5000
296
  },
297
  {
298
+ "epoch": 3.861003861003861,
299
  "eval_loss": Infinity,
300
+ "eval_runtime": 189.1358,
301
+ "eval_samples_per_second": 37.127,
302
+ "eval_steps_per_second": 4.642,
303
+ "eval_wer": 0.5671380156596938,
304
  "step": 5000
305
  },
306
  {
307
+ "epoch": 4.015444015444015,
308
  "eval_loss": Infinity,
309
+ "eval_runtime": 190.8009,
310
+ "eval_samples_per_second": 36.803,
311
+ "eval_steps_per_second": 4.602,
312
+ "eval_wer": 0.5377923207770117,
313
  "step": 5200
314
  },
315
  {
316
+ "epoch": 4.1698841698841695,
317
  "eval_loss": Infinity,
318
+ "eval_runtime": 189.3706,
319
+ "eval_samples_per_second": 37.081,
320
+ "eval_steps_per_second": 4.636,
321
+ "eval_wer": 0.5373638216924416,
322
  "step": 5400
323
  },
324
  {
325
+ "epoch": 4.2471042471042475,
326
+ "grad_norm": 6.047791957855225,
327
+ "learning_rate": 2.8309090909090903e-05,
328
+ "loss": 0.855,
329
  "step": 5500
330
  },
331
  {
332
+ "epoch": 4.324324324324325,
333
  "eval_loss": Infinity,
334
+ "eval_runtime": 189.8451,
335
+ "eval_samples_per_second": 36.988,
336
+ "eval_steps_per_second": 4.625,
337
+ "eval_wer": 0.5328451040733383,
338
  "step": 5600
339
  },
340
  {
341
+ "epoch": 4.478764478764479,
342
  "eval_loss": Infinity,
343
+ "eval_runtime": 190.1917,
344
+ "eval_samples_per_second": 36.921,
345
+ "eval_steps_per_second": 4.616,
346
+ "eval_wer": 0.5225351564021659,
347
  "step": 5800
348
  },
349
  {
350
+ "epoch": 4.633204633204633,
351
+ "grad_norm": 9.171218872070312,
352
+ "learning_rate": 1.0363636363636363e-06,
353
+ "loss": 0.9644,
354
  "step": 6000
355
  },
356
  {
357
+ "epoch": 4.633204633204633,
358
  "eval_loss": Infinity,
359
+ "eval_runtime": 190.3777,
360
+ "eval_samples_per_second": 36.885,
361
+ "eval_steps_per_second": 4.612,
362
+ "eval_wer": 0.5237557295521535,
363
  "step": 6000
364
  },
365
  {
366
+ "epoch": 4.633204633204633,
367
  "step": 6000,
368
+ "total_flos": 2.5308493485736165e+19,
369
+ "train_loss": 0.9557839482625325,
370
+ "train_runtime": 12299.0724,
371
+ "train_samples_per_second": 15.611,
372
+ "train_steps_per_second": 0.488
373
  }
374
  ],
375
  "logging_steps": 500,
376
  "max_steps": 6000,
377
  "num_input_tokens_seen": 0,
378
+ "num_train_epochs": 5,
379
  "save_steps": 500,
380
  "stateful_callbacks": {
381
  "TrainerControl": {
 
389
  "attributes": {}
390
  }
391
  },
392
+ "total_flos": 2.5308493485736165e+19,
393
+ "train_batch_size": 32,
394
  "trial_name": null,
395
  "trial_params": null
396
  }