DewiBrynJones commited on
Commit
da85d55
1 Parent(s): 3528274

End of training

Browse files
README.md CHANGED
@@ -2,6 +2,8 @@
2
  license: apache-2.0
3
  base_model: facebook/wav2vec2-large-xlsr-53
4
  tags:
 
 
5
  - generated_from_trainer
6
  metrics:
7
  - wer
@@ -15,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # wav2vec2-xlsr-53-ft-btb-ccv-cy
17
 
18
- This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: nan
21
  - Wer: 1.0
 
2
  license: apache-2.0
3
  base_model: facebook/wav2vec2-large-xlsr-53
4
  tags:
5
+ - automatic-speech-recognition
6
+ - DewiBrynJones/banc-trawsgrifiadau-bangor-clean-with-ccv
7
  - generated_from_trainer
8
  metrics:
9
  - wer
 
17
 
18
  # wav2vec2-xlsr-53-ft-btb-ccv-cy
19
 
20
+ This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on the DEWIBRYNJONES/BANC-TRAWSGRIFIADAU-BANGOR-CLEAN-WITH-CCV - DEFAULT dataset.
21
  It achieves the following results on the evaluation set:
22
  - Loss: nan
23
  - Wer: 1.0
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 0.6412722842118763,
3
- "eval_loss": 0.5324302911758423,
4
- "eval_runtime": 187.9214,
5
  "eval_samples": 7022,
6
- "eval_samples_per_second": 37.367,
7
- "eval_steps_per_second": 0.585,
8
- "eval_wer": 0.40138720950318235,
9
- "total_flos": 1.1393778193380235e+19,
10
- "train_loss": 0.73015986328125,
11
- "train_runtime": 7697.7754,
12
  "train_samples": 124748,
13
- "train_samples_per_second": 10.393,
14
- "train_steps_per_second": 1.299
15
  }
 
1
  {
2
+ "epoch": 0.9619084263178146,
3
+ "eval_loss": NaN,
4
+ "eval_runtime": 185.8154,
5
  "eval_samples": 7022,
6
+ "eval_samples_per_second": 37.79,
7
+ "eval_steps_per_second": 0.592,
8
+ "eval_wer": 1.0,
9
+ "total_flos": 1.7109669148845115e+19,
10
+ "train_loss": 0.5128920831044514,
11
+ "train_runtime": 11433.8652,
12
  "train_samples": 124748,
13
+ "train_samples_per_second": 10.495,
14
+ "train_steps_per_second": 1.312
15
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 0.6412722842118763,
3
- "eval_loss": 0.5324302911758423,
4
- "eval_runtime": 187.9214,
5
  "eval_samples": 7022,
6
- "eval_samples_per_second": 37.367,
7
- "eval_steps_per_second": 0.585,
8
- "eval_wer": 0.40138720950318235
9
  }
 
1
  {
2
+ "epoch": 0.9619084263178146,
3
+ "eval_loss": NaN,
4
+ "eval_runtime": 185.8154,
5
  "eval_samples": 7022,
6
+ "eval_samples_per_second": 37.79,
7
+ "eval_steps_per_second": 0.592,
8
+ "eval_wer": 1.0
9
  }
runs/Aug31_19-52-07_4b35055fdbcb/events.out.tfevents.1725144230.4b35055fdbcb.1485.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a29fde314f4c8cf8aacdf9639192ada17a865f3885802902b46d5a28d6252fac
3
+ size 406
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 0.6412722842118763,
3
- "total_flos": 1.1393778193380235e+19,
4
- "train_loss": 0.73015986328125,
5
- "train_runtime": 7697.7754,
6
  "train_samples": 124748,
7
- "train_samples_per_second": 10.393,
8
- "train_steps_per_second": 1.299
9
  }
 
1
  {
2
+ "epoch": 0.9619084263178146,
3
+ "total_flos": 1.7109669148845115e+19,
4
+ "train_loss": 0.5128920831044514,
5
+ "train_runtime": 11433.8652,
6
  "train_samples": 124748,
7
+ "train_samples_per_second": 10.495,
8
+ "train_steps_per_second": 1.312
9
  }
trainer_state.json CHANGED
@@ -1,345 +1,505 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6412722842118763,
5
  "eval_steps": 500,
6
- "global_step": 10000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.03206361421059382,
13
- "grad_norm": 8.037480354309082,
14
  "learning_rate": 0.0002465,
15
- "loss": 4.7051,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.03206361421059382,
20
- "eval_loss": 1.7504417896270752,
21
- "eval_runtime": 184.6482,
22
- "eval_samples_per_second": 38.029,
23
- "eval_steps_per_second": 0.596,
24
- "eval_wer": 0.9570416827223323,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 0.06412722842118763,
29
- "grad_norm": 7.763198375701904,
30
- "learning_rate": 0.0002874574468085106,
31
- "loss": 1.0409,
32
  "step": 1000
33
  },
34
  {
35
  "epoch": 0.06412722842118763,
36
- "eval_loss": 1.1511156558990479,
37
- "eval_runtime": 184.729,
38
- "eval_samples_per_second": 38.012,
39
- "eval_steps_per_second": 0.595,
40
- "eval_wer": 0.776093224730597,
41
  "step": 1000
42
  },
43
  {
44
  "epoch": 0.09619084263178146,
45
- "grad_norm": 3.2267072200775146,
46
- "learning_rate": 0.0002715,
47
- "loss": 0.8183,
48
  "step": 1500
49
  },
50
  {
51
  "epoch": 0.09619084263178146,
52
- "eval_loss": 1.0506497621536255,
53
- "eval_runtime": 186.2316,
54
- "eval_samples_per_second": 37.706,
55
- "eval_steps_per_second": 0.591,
56
- "eval_wer": 0.70972242522489,
57
  "step": 1500
58
  },
59
  {
60
  "epoch": 0.12825445684237527,
61
- "grad_norm": 6.372620582580566,
62
- "learning_rate": 0.00025554255319148935,
63
- "loss": 0.7091,
64
  "step": 2000
65
  },
66
  {
67
  "epoch": 0.12825445684237527,
68
- "eval_loss": 0.9421387314796448,
69
- "eval_runtime": 186.5946,
70
- "eval_samples_per_second": 37.632,
71
- "eval_steps_per_second": 0.59,
72
- "eval_wer": 0.6609707809032807,
73
  "step": 2000
74
  },
75
  {
76
  "epoch": 0.16031807105296908,
77
- "grad_norm": 5.675894260406494,
78
- "learning_rate": 0.0002395851063829787,
79
- "loss": 0.6547,
80
  "step": 2500
81
  },
82
  {
83
  "epoch": 0.16031807105296908,
84
- "eval_loss": 0.8725515007972717,
85
- "eval_runtime": 187.2013,
86
- "eval_samples_per_second": 37.51,
87
- "eval_steps_per_second": 0.588,
88
- "eval_wer": 0.6127639219229594,
89
  "step": 2500
90
  },
91
  {
92
  "epoch": 0.19238168526356292,
93
- "grad_norm": 6.913870334625244,
94
- "learning_rate": 0.00022362765957446805,
95
- "loss": 0.6088,
96
  "step": 3000
97
  },
98
  {
99
  "epoch": 0.19238168526356292,
100
- "eval_loss": 0.8246235847473145,
101
- "eval_runtime": 188.3497,
102
- "eval_samples_per_second": 37.282,
103
- "eval_steps_per_second": 0.584,
104
- "eval_wer": 0.5989582641278784,
105
  "step": 3000
106
  },
107
  {
108
  "epoch": 0.22444529947415673,
109
- "grad_norm": 4.30249547958374,
110
- "learning_rate": 0.00020767021276595744,
111
- "loss": 0.5781,
112
  "step": 3500
113
  },
114
  {
115
  "epoch": 0.22444529947415673,
116
- "eval_loss": 0.802536129951477,
117
- "eval_runtime": 187.0791,
118
- "eval_samples_per_second": 37.535,
119
- "eval_steps_per_second": 0.588,
120
- "eval_wer": 0.5747352476115813,
121
  "step": 3500
122
  },
123
  {
124
  "epoch": 0.25650891368475054,
125
- "grad_norm": 3.4820008277893066,
126
- "learning_rate": 0.0001917446808510638,
127
- "loss": 0.5429,
128
  "step": 4000
129
  },
130
  {
131
  "epoch": 0.25650891368475054,
132
- "eval_loss": 0.7359501123428345,
133
- "eval_runtime": 186.7747,
134
- "eval_samples_per_second": 37.596,
135
- "eval_steps_per_second": 0.589,
136
- "eval_wer": 0.53048804794111,
137
  "step": 4000
138
  },
139
  {
140
  "epoch": 0.2885725278953444,
141
- "grad_norm": 11.696717262268066,
142
- "learning_rate": 0.00017585106382978722,
143
- "loss": 0.5104,
144
  "step": 4500
145
  },
146
  {
147
  "epoch": 0.2885725278953444,
148
- "eval_loss": 0.7335178852081299,
149
- "eval_runtime": 187.3685,
150
- "eval_samples_per_second": 37.477,
151
- "eval_steps_per_second": 0.587,
152
- "eval_wer": 0.5394039251119468,
153
  "step": 4500
154
  },
155
  {
156
  "epoch": 0.32063614210593816,
157
- "grad_norm": 7.053103446960449,
158
- "learning_rate": 0.00015989361702127658,
159
- "loss": 0.501,
160
  "step": 5000
161
  },
162
  {
163
  "epoch": 0.32063614210593816,
164
- "eval_loss": 0.6932825446128845,
165
- "eval_runtime": 186.2726,
166
- "eval_samples_per_second": 37.697,
167
- "eval_steps_per_second": 0.591,
168
- "eval_wer": 0.5087763589736776,
169
  "step": 5000
170
  },
171
  {
172
  "epoch": 0.352699756316532,
173
- "grad_norm": 6.128586769104004,
174
- "learning_rate": 0.00014393617021276595,
175
- "loss": 0.4708,
176
  "step": 5500
177
  },
178
  {
179
  "epoch": 0.352699756316532,
180
- "eval_loss": 0.6770374774932861,
181
- "eval_runtime": 188.2655,
182
- "eval_samples_per_second": 37.298,
183
- "eval_steps_per_second": 0.584,
184
- "eval_wer": 0.5112743990751937,
185
  "step": 5500
186
  },
187
  {
188
  "epoch": 0.38476337052712584,
189
- "grad_norm": 7.154539108276367,
190
- "learning_rate": 0.00012801063829787234,
191
- "loss": 0.4526,
192
  "step": 6000
193
  },
194
  {
195
  "epoch": 0.38476337052712584,
196
- "eval_loss": 0.6608560681343079,
197
- "eval_runtime": 187.3283,
198
- "eval_samples_per_second": 37.485,
199
- "eval_steps_per_second": 0.587,
200
- "eval_wer": 0.48059368314753054,
201
  "step": 6000
202
  },
203
  {
204
  "epoch": 0.4168269847377196,
205
- "grad_norm": 5.313536643981934,
206
- "learning_rate": 0.0001120531914893617,
207
- "loss": 0.4235,
208
  "step": 6500
209
  },
210
  {
211
  "epoch": 0.4168269847377196,
212
- "eval_loss": 0.637322187423706,
213
- "eval_runtime": 186.315,
214
- "eval_samples_per_second": 37.689,
215
- "eval_steps_per_second": 0.59,
216
- "eval_wer": 0.485842224850184,
217
  "step": 6500
218
  },
219
  {
220
  "epoch": 0.44889059894831346,
221
- "grad_norm": 6.399425983428955,
222
- "learning_rate": 9.612765957446806e-05,
223
- "loss": 0.4032,
224
  "step": 7000
225
  },
226
  {
227
  "epoch": 0.44889059894831346,
228
- "eval_loss": 0.6047533750534058,
229
- "eval_runtime": 186.8155,
230
- "eval_samples_per_second": 37.588,
231
- "eval_steps_per_second": 0.589,
232
- "eval_wer": 0.4466176802774419,
233
  "step": 7000
234
  },
235
  {
236
  "epoch": 0.4809542131589073,
237
- "grad_norm": 11.48141098022461,
238
- "learning_rate": 8.017021276595744e-05,
239
- "loss": 0.3863,
240
  "step": 7500
241
  },
242
  {
243
  "epoch": 0.4809542131589073,
244
- "eval_loss": 0.5946004390716553,
245
- "eval_runtime": 186.2938,
246
- "eval_samples_per_second": 37.693,
247
- "eval_steps_per_second": 0.59,
248
- "eval_wer": 0.4432160937562285,
249
  "step": 7500
250
  },
251
  {
252
  "epoch": 0.5130178273695011,
253
- "grad_norm": 32.89252471923828,
254
- "learning_rate": 6.424468085106383e-05,
255
- "loss": 0.3766,
256
  "step": 8000
257
  },
258
  {
259
  "epoch": 0.5130178273695011,
260
- "eval_loss": 0.5737225413322449,
261
- "eval_runtime": 186.9085,
262
- "eval_samples_per_second": 37.569,
263
- "eval_steps_per_second": 0.589,
264
- "eval_wer": 0.4298489217236477,
265
  "step": 8000
266
  },
267
  {
268
  "epoch": 0.5450814415800949,
269
- "grad_norm": 4.741519451141357,
270
- "learning_rate": 4.8287234042553194e-05,
271
- "loss": 0.3746,
272
  "step": 8500
273
  },
274
  {
275
  "epoch": 0.5450814415800949,
276
- "eval_loss": 0.5668203234672546,
277
- "eval_runtime": 186.8619,
278
- "eval_samples_per_second": 37.579,
279
- "eval_steps_per_second": 0.589,
280
- "eval_wer": 0.4247731168365245,
281
  "step": 8500
282
  },
283
  {
284
  "epoch": 0.5771450557906888,
285
- "grad_norm": 13.890504837036133,
286
- "learning_rate": 3.232978723404255e-05,
287
- "loss": 0.3586,
288
  "step": 9000
289
  },
290
  {
291
  "epoch": 0.5771450557906888,
292
- "eval_loss": 0.5485312342643738,
293
- "eval_runtime": 187.9252,
294
- "eval_samples_per_second": 37.366,
295
- "eval_steps_per_second": 0.585,
296
- "eval_wer": 0.4100772000690947,
297
  "step": 9000
298
  },
299
  {
300
  "epoch": 0.6092086700012825,
301
- "grad_norm": 8.073569297790527,
302
- "learning_rate": 1.6372340425531912e-05,
303
- "loss": 0.3552,
304
  "step": 9500
305
  },
306
  {
307
  "epoch": 0.6092086700012825,
308
- "eval_loss": 0.5377594828605652,
309
- "eval_runtime": 187.0305,
310
- "eval_samples_per_second": 37.545,
311
- "eval_steps_per_second": 0.588,
312
- "eval_wer": 0.40320758978992544,
313
  "step": 9500
314
  },
315
  {
316
  "epoch": 0.6412722842118763,
317
- "grad_norm": 6.519000053405762,
318
- "learning_rate": 4.1489361702127654e-07,
319
- "loss": 0.3326,
320
  "step": 10000
321
  },
322
  {
323
  "epoch": 0.6412722842118763,
324
- "eval_loss": 0.5324302911758423,
325
- "eval_runtime": 186.1276,
326
- "eval_samples_per_second": 37.727,
327
- "eval_steps_per_second": 0.591,
328
- "eval_wer": 0.40138720950318235,
329
  "step": 10000
330
  },
331
  {
332
- "epoch": 0.6412722842118763,
333
- "step": 10000,
334
- "total_flos": 1.1393778193380235e+19,
335
- "train_loss": 0.73015986328125,
336
- "train_runtime": 7697.7754,
337
- "train_samples_per_second": 10.393,
338
- "train_steps_per_second": 1.299
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339
  }
340
  ],
341
  "logging_steps": 500,
342
- "max_steps": 10000,
343
  "num_input_tokens_seen": 0,
344
  "num_train_epochs": 1,
345
  "save_steps": 500,
@@ -355,7 +515,7 @@
355
  "attributes": {}
356
  }
357
  },
358
- "total_flos": 1.1393778193380235e+19,
359
  "train_batch_size": 8,
360
  "trial_name": null,
361
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9619084263178146,
5
  "eval_steps": 500,
6
+ "global_step": 15000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.03206361421059382,
13
+ "grad_norm": 9.123078346252441,
14
  "learning_rate": 0.0002465,
15
+ "loss": 4.6156,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.03206361421059382,
20
+ "eval_loss": 1.5867419242858887,
21
+ "eval_runtime": 188.7881,
22
+ "eval_samples_per_second": 37.195,
23
+ "eval_steps_per_second": 0.583,
24
+ "eval_wer": 0.9176576887814082,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 0.06412722842118763,
29
+ "grad_norm": 6.654547691345215,
30
+ "learning_rate": 0.00029181249999999997,
31
+ "loss": 1.0315,
32
  "step": 1000
33
  },
34
  {
35
  "epoch": 0.06412722842118763,
36
+ "eval_loss": 1.1748294830322266,
37
+ "eval_runtime": 190.3404,
38
+ "eval_samples_per_second": 36.892,
39
+ "eval_steps_per_second": 0.578,
40
+ "eval_wer": 0.7888358867377988,
41
  "step": 1000
42
  },
43
  {
44
  "epoch": 0.09619084263178146,
45
+ "grad_norm": 6.171149253845215,
46
+ "learning_rate": 0.0002813958333333333,
47
+ "loss": 0.834,
48
  "step": 1500
49
  },
50
  {
51
  "epoch": 0.09619084263178146,
52
+ "eval_loss": 1.0392996072769165,
53
+ "eval_runtime": 189.2832,
54
+ "eval_samples_per_second": 37.098,
55
+ "eval_steps_per_second": 0.581,
56
+ "eval_wer": 0.7219867391275462,
57
  "step": 1500
58
  },
59
  {
60
  "epoch": 0.12825445684237527,
61
+ "grad_norm": 6.896900177001953,
62
+ "learning_rate": 0.00027097916666666666,
63
+ "loss": 0.7184,
64
  "step": 2000
65
  },
66
  {
67
  "epoch": 0.12825445684237527,
68
+ "eval_loss": 0.9616143703460693,
69
+ "eval_runtime": 190.9944,
70
+ "eval_samples_per_second": 36.765,
71
+ "eval_steps_per_second": 0.576,
72
+ "eval_wer": 0.663747857399115,
73
  "step": 2000
74
  },
75
  {
76
  "epoch": 0.16031807105296908,
77
+ "grad_norm": 9.408955574035645,
78
+ "learning_rate": 0.0002605625,
79
+ "loss": 0.6655,
80
  "step": 2500
81
  },
82
  {
83
  "epoch": 0.16031807105296908,
84
+ "eval_loss": 0.9033711552619934,
85
+ "eval_runtime": 190.9851,
86
+ "eval_samples_per_second": 36.767,
87
+ "eval_steps_per_second": 0.576,
88
+ "eval_wer": 0.6331335787081944,
89
  "step": 2500
90
  },
91
  {
92
  "epoch": 0.19238168526356292,
93
+ "grad_norm": 6.4334211349487305,
94
+ "learning_rate": 0.0002501458333333333,
95
+ "loss": 0.6193,
96
  "step": 3000
97
  },
98
  {
99
  "epoch": 0.19238168526356292,
100
+ "eval_loss": 0.8614802956581116,
101
+ "eval_runtime": 191.2463,
102
+ "eval_samples_per_second": 36.717,
103
+ "eval_steps_per_second": 0.575,
104
+ "eval_wer": 0.6238988028009939,
105
  "step": 3000
106
  },
107
  {
108
  "epoch": 0.22444529947415673,
109
+ "grad_norm": 3.711681365966797,
110
+ "learning_rate": 0.00023972916666666665,
111
+ "loss": 0.5952,
112
  "step": 3500
113
  },
114
  {
115
  "epoch": 0.22444529947415673,
116
+ "eval_loss": 0.8161324858665466,
117
+ "eval_runtime": 191.2031,
118
+ "eval_samples_per_second": 36.725,
119
+ "eval_steps_per_second": 0.575,
120
+ "eval_wer": 0.5866275129884798,
121
  "step": 3500
122
  },
123
  {
124
  "epoch": 0.25650891368475054,
125
+ "grad_norm": 7.527787208557129,
126
+ "learning_rate": 0.00022933333333333332,
127
+ "loss": 0.5622,
128
  "step": 4000
129
  },
130
  {
131
  "epoch": 0.25650891368475054,
132
+ "eval_loss": 0.811023473739624,
133
+ "eval_runtime": 190.6985,
134
+ "eval_samples_per_second": 36.823,
135
+ "eval_steps_per_second": 0.577,
136
+ "eval_wer": 0.5850728816487065,
137
  "step": 4000
138
  },
139
  {
140
  "epoch": 0.2885725278953444,
141
+ "grad_norm": 11.801218032836914,
142
+ "learning_rate": 0.0002189583333333333,
143
+ "loss": 0.5341,
144
  "step": 4500
145
  },
146
  {
147
  "epoch": 0.2885725278953444,
148
+ "eval_loss": 0.757978618144989,
149
+ "eval_runtime": 192.268,
150
+ "eval_samples_per_second": 36.522,
151
+ "eval_steps_per_second": 0.572,
152
+ "eval_wer": 0.5546579146680132,
153
  "step": 4500
154
  },
155
  {
156
  "epoch": 0.32063614210593816,
157
+ "grad_norm": 9.381750106811523,
158
+ "learning_rate": 0.00020854166666666664,
159
+ "loss": 0.522,
160
  "step": 5000
161
  },
162
  {
163
  "epoch": 0.32063614210593816,
164
+ "eval_loss": 0.7397128343582153,
165
+ "eval_runtime": 191.0373,
166
+ "eval_samples_per_second": 36.757,
167
+ "eval_steps_per_second": 0.576,
168
+ "eval_wer": 0.5411711556092959,
169
  "step": 5000
170
  },
171
  {
172
  "epoch": 0.352699756316532,
173
+ "grad_norm": 6.341240882873535,
174
+ "learning_rate": 0.00019812499999999998,
175
+ "loss": 0.5123,
176
  "step": 5500
177
  },
178
  {
179
  "epoch": 0.352699756316532,
180
+ "eval_loss": 0.7228623628616333,
181
+ "eval_runtime": 191.6536,
182
+ "eval_samples_per_second": 36.639,
183
+ "eval_steps_per_second": 0.574,
184
+ "eval_wer": 0.531737067991868,
185
  "step": 5500
186
  },
187
  {
188
  "epoch": 0.38476337052712584,
189
+ "grad_norm": 6.53903341293335,
190
+ "learning_rate": 0.00018772916666666666,
191
+ "loss": 0.4884,
192
  "step": 6000
193
  },
194
  {
195
  "epoch": 0.38476337052712584,
196
+ "eval_loss": 0.72346431016922,
197
+ "eval_runtime": 191.4082,
198
+ "eval_samples_per_second": 36.686,
199
+ "eval_steps_per_second": 0.575,
200
+ "eval_wer": 0.5164830784358017,
201
  "step": 6000
202
  },
203
  {
204
  "epoch": 0.4168269847377196,
205
+ "grad_norm": 10.402660369873047,
206
+ "learning_rate": 0.00017731249999999998,
207
+ "loss": 0.4658,
208
  "step": 6500
209
  },
210
  {
211
  "epoch": 0.4168269847377196,
212
+ "eval_loss": 0.681357204914093,
213
+ "eval_runtime": 191.0697,
214
+ "eval_samples_per_second": 36.751,
215
+ "eval_steps_per_second": 0.576,
216
+ "eval_wer": 0.5116995973903453,
217
  "step": 6500
218
  },
219
  {
220
  "epoch": 0.44889059894831346,
221
+ "grad_norm": 11.663326263427734,
222
+ "learning_rate": 0.00016691666666666667,
223
+ "loss": 0.4471,
224
  "step": 7000
225
  },
226
  {
227
  "epoch": 0.44889059894831346,
228
+ "eval_loss": 0.662290632724762,
229
+ "eval_runtime": 191.4867,
230
+ "eval_samples_per_second": 36.671,
231
+ "eval_steps_per_second": 0.574,
232
+ "eval_wer": 0.4890577871085186,
233
  "step": 7000
234
  },
235
  {
236
  "epoch": 0.4809542131589073,
237
+ "grad_norm": 7.363061428070068,
238
+ "learning_rate": 0.00015649999999999998,
239
+ "loss": 0.4338,
240
  "step": 7500
241
  },
242
  {
243
  "epoch": 0.4809542131589073,
244
+ "eval_loss": 0.6449915170669556,
245
+ "eval_runtime": 190.9868,
246
+ "eval_samples_per_second": 36.767,
247
+ "eval_steps_per_second": 0.576,
248
+ "eval_wer": 0.4913830903945043,
249
  "step": 7500
250
  },
251
  {
252
  "epoch": 0.5130178273695011,
253
+ "grad_norm": 14.478469848632812,
254
+ "learning_rate": 0.00014610416666666667,
255
+ "loss": 0.4267,
256
  "step": 8000
257
  },
258
  {
259
  "epoch": 0.5130178273695011,
260
+ "eval_loss": 0.6256160736083984,
261
+ "eval_runtime": 190.8261,
262
+ "eval_samples_per_second": 36.798,
263
+ "eval_steps_per_second": 0.576,
264
+ "eval_wer": 0.4685419684024502,
265
  "step": 8000
266
  },
267
  {
268
  "epoch": 0.5450814415800949,
269
+ "grad_norm": 10.456161499023438,
270
+ "learning_rate": 0.00013568749999999998,
271
+ "loss": 0.4283,
272
  "step": 8500
273
  },
274
  {
275
  "epoch": 0.5450814415800949,
276
+ "eval_loss": 0.6342806816101074,
277
+ "eval_runtime": 190.609,
278
+ "eval_samples_per_second": 36.84,
279
+ "eval_steps_per_second": 0.577,
280
+ "eval_wer": 0.4710665833986633,
281
  "step": 8500
282
  },
283
  {
284
  "epoch": 0.5771450557906888,
285
+ "grad_norm": 9.847672462463379,
286
+ "learning_rate": 0.00012527083333333333,
287
+ "loss": 0.4131,
288
  "step": 9000
289
  },
290
  {
291
  "epoch": 0.5771450557906888,
292
+ "eval_loss": 0.5988845229148865,
293
+ "eval_runtime": 189.2404,
294
+ "eval_samples_per_second": 37.106,
295
+ "eval_steps_per_second": 0.581,
296
+ "eval_wer": 0.4486506597217608,
297
  "step": 9000
298
  },
299
  {
300
  "epoch": 0.6092086700012825,
301
+ "grad_norm": 7.610143661499023,
302
+ "learning_rate": 0.00011485416666666666,
303
+ "loss": 0.4317,
304
  "step": 9500
305
  },
306
  {
307
  "epoch": 0.6092086700012825,
308
+ "eval_loss": 0.7167520523071289,
309
+ "eval_runtime": 189.8256,
310
+ "eval_samples_per_second": 36.992,
311
+ "eval_steps_per_second": 0.579,
312
+ "eval_wer": 0.4919677380778379,
313
  "step": 9500
314
  },
315
  {
316
  "epoch": 0.6412722842118763,
317
+ "grad_norm": NaN,
318
+ "learning_rate": 0.00010691666666666665,
319
+ "loss": 0.5904,
320
  "step": 10000
321
  },
322
  {
323
  "epoch": 0.6412722842118763,
324
+ "eval_loss": NaN,
325
+ "eval_runtime": 190.1563,
326
+ "eval_samples_per_second": 36.928,
327
+ "eval_steps_per_second": 0.578,
328
+ "eval_wer": 0.7309956284298224,
329
  "step": 10000
330
  },
331
  {
332
+ "epoch": 0.6733358984224702,
333
+ "grad_norm": NaN,
334
+ "learning_rate": 0.000106875,
335
+ "loss": 0.0513,
336
+ "step": 10500
337
+ },
338
+ {
339
+ "epoch": 0.6733358984224702,
340
+ "eval_loss": NaN,
341
+ "eval_runtime": 185.5416,
342
+ "eval_samples_per_second": 37.846,
343
+ "eval_steps_per_second": 0.593,
344
+ "eval_wer": 1.0,
345
+ "step": 10500
346
+ },
347
+ {
348
+ "epoch": 0.705399512633064,
349
+ "grad_norm": NaN,
350
+ "learning_rate": 0.000106875,
351
+ "loss": 0.0,
352
+ "step": 11000
353
+ },
354
+ {
355
+ "epoch": 0.705399512633064,
356
+ "eval_loss": NaN,
357
+ "eval_runtime": 185.2695,
358
+ "eval_samples_per_second": 37.902,
359
+ "eval_steps_per_second": 0.594,
360
+ "eval_wer": 1.0,
361
+ "step": 11000
362
+ },
363
+ {
364
+ "epoch": 0.7374631268436578,
365
+ "grad_norm": NaN,
366
+ "learning_rate": 0.000106875,
367
+ "loss": 0.0,
368
+ "step": 11500
369
+ },
370
+ {
371
+ "epoch": 0.7374631268436578,
372
+ "eval_loss": NaN,
373
+ "eval_runtime": 185.1794,
374
+ "eval_samples_per_second": 37.92,
375
+ "eval_steps_per_second": 0.594,
376
+ "eval_wer": 1.0,
377
+ "step": 11500
378
+ },
379
+ {
380
+ "epoch": 0.7695267410542517,
381
+ "grad_norm": NaN,
382
+ "learning_rate": 0.000106875,
383
+ "loss": 0.0,
384
+ "step": 12000
385
+ },
386
+ {
387
+ "epoch": 0.7695267410542517,
388
+ "eval_loss": NaN,
389
+ "eval_runtime": 184.8489,
390
+ "eval_samples_per_second": 37.988,
391
+ "eval_steps_per_second": 0.595,
392
+ "eval_wer": 1.0,
393
+ "step": 12000
394
+ },
395
+ {
396
+ "epoch": 0.8015903552648455,
397
+ "grad_norm": NaN,
398
+ "learning_rate": 0.000106875,
399
+ "loss": 0.0,
400
+ "step": 12500
401
+ },
402
+ {
403
+ "epoch": 0.8015903552648455,
404
+ "eval_loss": NaN,
405
+ "eval_runtime": 184.8249,
406
+ "eval_samples_per_second": 37.993,
407
+ "eval_steps_per_second": 0.595,
408
+ "eval_wer": 1.0,
409
+ "step": 12500
410
+ },
411
+ {
412
+ "epoch": 0.8336539694754392,
413
+ "grad_norm": NaN,
414
+ "learning_rate": 0.000106875,
415
+ "loss": 0.0,
416
+ "step": 13000
417
+ },
418
+ {
419
+ "epoch": 0.8336539694754392,
420
+ "eval_loss": NaN,
421
+ "eval_runtime": 185.2964,
422
+ "eval_samples_per_second": 37.896,
423
+ "eval_steps_per_second": 0.594,
424
+ "eval_wer": 1.0,
425
+ "step": 13000
426
+ },
427
+ {
428
+ "epoch": 0.8657175836860331,
429
+ "grad_norm": NaN,
430
+ "learning_rate": 0.000106875,
431
+ "loss": 0.0,
432
+ "step": 13500
433
+ },
434
+ {
435
+ "epoch": 0.8657175836860331,
436
+ "eval_loss": NaN,
437
+ "eval_runtime": 184.7613,
438
+ "eval_samples_per_second": 38.006,
439
+ "eval_steps_per_second": 0.595,
440
+ "eval_wer": 1.0,
441
+ "step": 13500
442
+ },
443
+ {
444
+ "epoch": 0.8977811978966269,
445
+ "grad_norm": NaN,
446
+ "learning_rate": 0.000106875,
447
+ "loss": 0.0,
448
+ "step": 14000
449
+ },
450
+ {
451
+ "epoch": 0.8977811978966269,
452
+ "eval_loss": NaN,
453
+ "eval_runtime": 184.7837,
454
+ "eval_samples_per_second": 38.001,
455
+ "eval_steps_per_second": 0.595,
456
+ "eval_wer": 1.0,
457
+ "step": 14000
458
+ },
459
+ {
460
+ "epoch": 0.9298448121072207,
461
+ "grad_norm": NaN,
462
+ "learning_rate": 0.000106875,
463
+ "loss": 0.0,
464
+ "step": 14500
465
+ },
466
+ {
467
+ "epoch": 0.9298448121072207,
468
+ "eval_loss": NaN,
469
+ "eval_runtime": 184.6054,
470
+ "eval_samples_per_second": 38.038,
471
+ "eval_steps_per_second": 0.596,
472
+ "eval_wer": 1.0,
473
+ "step": 14500
474
+ },
475
+ {
476
+ "epoch": 0.9619084263178146,
477
+ "grad_norm": NaN,
478
+ "learning_rate": 0.000106875,
479
+ "loss": 0.0,
480
+ "step": 15000
481
+ },
482
+ {
483
+ "epoch": 0.9619084263178146,
484
+ "eval_loss": NaN,
485
+ "eval_runtime": 184.8182,
486
+ "eval_samples_per_second": 37.994,
487
+ "eval_steps_per_second": 0.595,
488
+ "eval_wer": 1.0,
489
+ "step": 15000
490
+ },
491
+ {
492
+ "epoch": 0.9619084263178146,
493
+ "step": 15000,
494
+ "total_flos": 1.7109669148845115e+19,
495
+ "train_loss": 0.5128920831044514,
496
+ "train_runtime": 11433.8652,
497
+ "train_samples_per_second": 10.495,
498
+ "train_steps_per_second": 1.312
499
  }
500
  ],
501
  "logging_steps": 500,
502
+ "max_steps": 15000,
503
  "num_input_tokens_seen": 0,
504
  "num_train_epochs": 1,
505
  "save_steps": 500,
 
515
  "attributes": {}
516
  }
517
  },
518
+ "total_flos": 1.7109669148845115e+19,
519
  "train_batch_size": 8,
520
  "trial_name": null,
521
  "trial_params": null