vasilis commited on
Commit
4b628c5
1 Parent(s): 0ec1229

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 139.66,
3
+ "eval_loss": 0.882430911064148,
4
+ "eval_runtime": 187.1455,
5
+ "eval_samples": 2613,
6
+ "eval_samples_per_second": 13.962,
7
+ "eval_steps_per_second": 0.438,
8
+ "eval_wer": 0.5246322597007355,
9
+ "train_loss": 0.3391121254348755,
10
+ "train_runtime": 108834.0383,
11
+ "train_samples": 5716,
12
+ "train_samples_per_second": 7.351,
13
+ "train_steps_per_second": 0.23
14
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 139.66,
3
+ "eval_loss": 0.882430911064148,
4
+ "eval_runtime": 187.1455,
5
+ "eval_samples": 2613,
6
+ "eval_samples_per_second": 13.962,
7
+ "eval_steps_per_second": 0.438,
8
+ "eval_wer": 0.5246322597007355
9
+ }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b77bf08ed1374620a18a053b6b80a707cacb9a14178b336b4e5dd602d4f08a60
3
  size 3850508017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43f528ae94ef35f4d4dc5f7e24c9587eb4daa7a69077f892c8d22f1c203cdff9
3
  size 3850508017
runs/Jan29_08-32-21_job-52fc6e89-fa32-4d14-ae2b-5b667d8aed8a/events.out.tfevents.1643445166.job-52fc6e89-fa32-4d14-ae2b-5b667d8aed8a.1551559.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e795a84b9138aeab17fde535f60f83d55e49e5c4c52d732305c9fd1558c6589b
3
- size 57952
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64e57be450d7b1f4a02448f5225660a1f498bfca389233b3b378c775c79dcc71
3
+ size 60560
runs/Jan29_08-32-21_job-52fc6e89-fa32-4d14-ae2b-5b667d8aed8a/events.out.tfevents.1643554195.job-52fc6e89-fa32-4d14-ae2b-5b667d8aed8a.1551559.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e94ae459ee58126d0e5154130f9c8f597c279c5977b0558e28269ada2706087d
3
+ size 364
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 139.66,
3
+ "train_loss": 0.3391121254348755,
4
+ "train_runtime": 108834.0383,
5
+ "train_samples": 5716,
6
+ "train_samples_per_second": 7.351,
7
+ "train_steps_per_second": 0.23
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,1975 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 139.66480446927375,
5
+ "global_step": 25000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.56,
12
+ "learning_rate": 0.000194,
13
+ "loss": 3.3358,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 1.12,
18
+ "learning_rate": 0.00039400000000000004,
19
+ "loss": 0.9471,
20
+ "step": 200
21
+ },
22
+ {
23
+ "epoch": 1.68,
24
+ "learning_rate": 0.000594,
25
+ "loss": 0.9236,
26
+ "step": 300
27
+ },
28
+ {
29
+ "epoch": 2.23,
30
+ "learning_rate": 0.0007940000000000001,
31
+ "loss": 0.996,
32
+ "step": 400
33
+ },
34
+ {
35
+ "epoch": 2.79,
36
+ "learning_rate": 0.000994,
37
+ "loss": 1.0296,
38
+ "step": 500
39
+ },
40
+ {
41
+ "epoch": 2.79,
42
+ "eval_loss": 0.8106316328048706,
43
+ "eval_runtime": 194.0347,
44
+ "eval_samples_per_second": 13.467,
45
+ "eval_steps_per_second": 0.423,
46
+ "eval_wer": 0.8029419223941161,
47
+ "step": 500
48
+ },
49
+ {
50
+ "epoch": 3.35,
51
+ "learning_rate": 0.0009960408163265306,
52
+ "loss": 1.0459,
53
+ "step": 600
54
+ },
55
+ {
56
+ "epoch": 3.91,
57
+ "learning_rate": 0.0009919591836734694,
58
+ "loss": 0.9901,
59
+ "step": 700
60
+ },
61
+ {
62
+ "epoch": 4.47,
63
+ "learning_rate": 0.0009878775510204082,
64
+ "loss": 0.9455,
65
+ "step": 800
66
+ },
67
+ {
68
+ "epoch": 5.03,
69
+ "learning_rate": 0.000983795918367347,
70
+ "loss": 0.9814,
71
+ "step": 900
72
+ },
73
+ {
74
+ "epoch": 5.59,
75
+ "learning_rate": 0.0009797142857142858,
76
+ "loss": 0.9339,
77
+ "step": 1000
78
+ },
79
+ {
80
+ "epoch": 5.59,
81
+ "eval_loss": 0.7419061064720154,
82
+ "eval_runtime": 190.1458,
83
+ "eval_samples_per_second": 13.742,
84
+ "eval_steps_per_second": 0.431,
85
+ "eval_wer": 0.7931777834136444,
86
+ "step": 1000
87
+ },
88
+ {
89
+ "epoch": 6.15,
90
+ "learning_rate": 0.0009756326530612245,
91
+ "loss": 0.9541,
92
+ "step": 1100
93
+ },
94
+ {
95
+ "epoch": 6.7,
96
+ "learning_rate": 0.0009715510204081632,
97
+ "loss": 0.8954,
98
+ "step": 1200
99
+ },
100
+ {
101
+ "epoch": 7.26,
102
+ "learning_rate": 0.0009674693877551021,
103
+ "loss": 0.9209,
104
+ "step": 1300
105
+ },
106
+ {
107
+ "epoch": 7.82,
108
+ "learning_rate": 0.0009633877551020408,
109
+ "loss": 0.9078,
110
+ "step": 1400
111
+ },
112
+ {
113
+ "epoch": 8.38,
114
+ "learning_rate": 0.0009593061224489796,
115
+ "loss": 0.8925,
116
+ "step": 1500
117
+ },
118
+ {
119
+ "epoch": 8.38,
120
+ "eval_loss": 0.7136574983596802,
121
+ "eval_runtime": 190.5136,
122
+ "eval_samples_per_second": 13.716,
123
+ "eval_steps_per_second": 0.43,
124
+ "eval_wer": 0.7706061374587877,
125
+ "step": 1500
126
+ },
127
+ {
128
+ "epoch": 8.94,
129
+ "learning_rate": 0.0009552244897959184,
130
+ "loss": 0.8785,
131
+ "step": 1600
132
+ },
133
+ {
134
+ "epoch": 9.5,
135
+ "learning_rate": 0.0009511428571428572,
136
+ "loss": 0.8793,
137
+ "step": 1700
138
+ },
139
+ {
140
+ "epoch": 10.06,
141
+ "learning_rate": 0.0009470612244897959,
142
+ "loss": 0.8766,
143
+ "step": 1800
144
+ },
145
+ {
146
+ "epoch": 10.61,
147
+ "learning_rate": 0.0009429795918367347,
148
+ "loss": 0.8601,
149
+ "step": 1900
150
+ },
151
+ {
152
+ "epoch": 11.17,
153
+ "learning_rate": 0.0009388979591836735,
154
+ "loss": 0.8484,
155
+ "step": 2000
156
+ },
157
+ {
158
+ "epoch": 11.17,
159
+ "eval_loss": 0.7019568681716919,
160
+ "eval_runtime": 193.4802,
161
+ "eval_samples_per_second": 13.505,
162
+ "eval_steps_per_second": 0.424,
163
+ "eval_wer": 0.7676895764646209,
164
+ "step": 2000
165
+ },
166
+ {
167
+ "epoch": 11.73,
168
+ "learning_rate": 0.0009348163265306122,
169
+ "loss": 0.8123,
170
+ "step": 2100
171
+ },
172
+ {
173
+ "epoch": 12.29,
174
+ "learning_rate": 0.000930734693877551,
175
+ "loss": 0.7892,
176
+ "step": 2200
177
+ },
178
+ {
179
+ "epoch": 12.85,
180
+ "learning_rate": 0.0009266530612244898,
181
+ "loss": 0.7836,
182
+ "step": 2300
183
+ },
184
+ {
185
+ "epoch": 13.41,
186
+ "learning_rate": 0.0009225714285714286,
187
+ "loss": 0.7694,
188
+ "step": 2400
189
+ },
190
+ {
191
+ "epoch": 13.97,
192
+ "learning_rate": 0.0009184897959183673,
193
+ "loss": 0.7521,
194
+ "step": 2500
195
+ },
196
+ {
197
+ "epoch": 13.97,
198
+ "eval_loss": 0.7042976021766663,
199
+ "eval_runtime": 193.425,
200
+ "eval_samples_per_second": 13.509,
201
+ "eval_steps_per_second": 0.424,
202
+ "eval_wer": 0.7375095105249809,
203
+ "step": 2500
204
+ },
205
+ {
206
+ "epoch": 14.53,
207
+ "learning_rate": 0.0009144081632653061,
208
+ "loss": 0.7472,
209
+ "step": 2600
210
+ },
211
+ {
212
+ "epoch": 15.08,
213
+ "learning_rate": 0.0009103265306122449,
214
+ "loss": 0.7254,
215
+ "step": 2700
216
+ },
217
+ {
218
+ "epoch": 15.64,
219
+ "learning_rate": 0.0009062448979591837,
220
+ "loss": 0.7541,
221
+ "step": 2800
222
+ },
223
+ {
224
+ "epoch": 16.2,
225
+ "learning_rate": 0.0009021632653061224,
226
+ "loss": 0.7361,
227
+ "step": 2900
228
+ },
229
+ {
230
+ "epoch": 16.76,
231
+ "learning_rate": 0.0008980816326530612,
232
+ "loss": 0.719,
233
+ "step": 3000
234
+ },
235
+ {
236
+ "epoch": 16.76,
237
+ "eval_loss": 0.6617498397827148,
238
+ "eval_runtime": 191.0945,
239
+ "eval_samples_per_second": 13.674,
240
+ "eval_steps_per_second": 0.429,
241
+ "eval_wer": 0.742772001014456,
242
+ "step": 3000
243
+ },
244
+ {
245
+ "epoch": 17.32,
246
+ "learning_rate": 0.000894,
247
+ "loss": 0.6862,
248
+ "step": 3100
249
+ },
250
+ {
251
+ "epoch": 17.88,
252
+ "learning_rate": 0.0008899183673469387,
253
+ "loss": 0.6599,
254
+ "step": 3200
255
+ },
256
+ {
257
+ "epoch": 18.44,
258
+ "learning_rate": 0.0008858367346938775,
259
+ "loss": 0.6363,
260
+ "step": 3300
261
+ },
262
+ {
263
+ "epoch": 18.99,
264
+ "learning_rate": 0.0008817551020408164,
265
+ "loss": 0.6861,
266
+ "step": 3400
267
+ },
268
+ {
269
+ "epoch": 19.55,
270
+ "learning_rate": 0.0008776734693877552,
271
+ "loss": 0.656,
272
+ "step": 3500
273
+ },
274
+ {
275
+ "epoch": 19.55,
276
+ "eval_loss": 0.6388168334960938,
277
+ "eval_runtime": 190.9254,
278
+ "eval_samples_per_second": 13.686,
279
+ "eval_steps_per_second": 0.429,
280
+ "eval_wer": 0.7201686533096627,
281
+ "step": 3500
282
+ },
283
+ {
284
+ "epoch": 20.11,
285
+ "learning_rate": 0.0008735918367346939,
286
+ "loss": 0.6348,
287
+ "step": 3600
288
+ },
289
+ {
290
+ "epoch": 20.67,
291
+ "learning_rate": 0.0008695102040816327,
292
+ "loss": 0.5911,
293
+ "step": 3700
294
+ },
295
+ {
296
+ "epoch": 21.23,
297
+ "learning_rate": 0.0008654285714285715,
298
+ "loss": 0.605,
299
+ "step": 3800
300
+ },
301
+ {
302
+ "epoch": 21.79,
303
+ "learning_rate": 0.0008613469387755102,
304
+ "loss": 0.6051,
305
+ "step": 3900
306
+ },
307
+ {
308
+ "epoch": 22.35,
309
+ "learning_rate": 0.0008572653061224491,
310
+ "loss": 0.6085,
311
+ "step": 4000
312
+ },
313
+ {
314
+ "epoch": 22.35,
315
+ "eval_loss": 0.6211119890213013,
316
+ "eval_runtime": 188.9632,
317
+ "eval_samples_per_second": 13.828,
318
+ "eval_steps_per_second": 0.434,
319
+ "eval_wer": 0.6960119198579762,
320
+ "step": 4000
321
+ },
322
+ {
323
+ "epoch": 22.91,
324
+ "learning_rate": 0.0008531836734693878,
325
+ "loss": 0.5777,
326
+ "step": 4100
327
+ },
328
+ {
329
+ "epoch": 23.46,
330
+ "learning_rate": 0.0008491020408163266,
331
+ "loss": 0.5582,
332
+ "step": 4200
333
+ },
334
+ {
335
+ "epoch": 24.02,
336
+ "learning_rate": 0.0008450204081632653,
337
+ "loss": 0.5859,
338
+ "step": 4300
339
+ },
340
+ {
341
+ "epoch": 24.58,
342
+ "learning_rate": 0.0008409387755102042,
343
+ "loss": 0.5368,
344
+ "step": 4400
345
+ },
346
+ {
347
+ "epoch": 25.14,
348
+ "learning_rate": 0.0008368571428571429,
349
+ "loss": 0.5598,
350
+ "step": 4500
351
+ },
352
+ {
353
+ "epoch": 25.14,
354
+ "eval_loss": 0.6131592988967896,
355
+ "eval_runtime": 191.2607,
356
+ "eval_samples_per_second": 13.662,
357
+ "eval_steps_per_second": 0.429,
358
+ "eval_wer": 0.664436976921126,
359
+ "step": 4500
360
+ },
361
+ {
362
+ "epoch": 25.7,
363
+ "learning_rate": 0.0008327755102040816,
364
+ "loss": 0.5139,
365
+ "step": 4600
366
+ },
367
+ {
368
+ "epoch": 26.26,
369
+ "learning_rate": 0.0008286938775510205,
370
+ "loss": 0.5455,
371
+ "step": 4700
372
+ },
373
+ {
374
+ "epoch": 26.82,
375
+ "learning_rate": 0.0008246122448979592,
376
+ "loss": 0.5448,
377
+ "step": 4800
378
+ },
379
+ {
380
+ "epoch": 27.37,
381
+ "learning_rate": 0.000820530612244898,
382
+ "loss": 0.5316,
383
+ "step": 4900
384
+ },
385
+ {
386
+ "epoch": 27.93,
387
+ "learning_rate": 0.0008164489795918367,
388
+ "loss": 0.4969,
389
+ "step": 5000
390
+ },
391
+ {
392
+ "epoch": 27.93,
393
+ "eval_loss": 0.6065218448638916,
394
+ "eval_runtime": 194.8518,
395
+ "eval_samples_per_second": 13.41,
396
+ "eval_steps_per_second": 0.421,
397
+ "eval_wer": 0.65210499619579,
398
+ "step": 5000
399
+ },
400
+ {
401
+ "epoch": 28.49,
402
+ "learning_rate": 0.0008123673469387756,
403
+ "loss": 0.48,
404
+ "step": 5100
405
+ },
406
+ {
407
+ "epoch": 29.05,
408
+ "learning_rate": 0.0008082857142857143,
409
+ "loss": 0.5028,
410
+ "step": 5200
411
+ },
412
+ {
413
+ "epoch": 29.61,
414
+ "learning_rate": 0.0008042040816326531,
415
+ "loss": 0.4577,
416
+ "step": 5300
417
+ },
418
+ {
419
+ "epoch": 30.17,
420
+ "learning_rate": 0.0008001224489795919,
421
+ "loss": 0.4636,
422
+ "step": 5400
423
+ },
424
+ {
425
+ "epoch": 30.73,
426
+ "learning_rate": 0.0007960408163265306,
427
+ "loss": 0.4638,
428
+ "step": 5500
429
+ },
430
+ {
431
+ "epoch": 30.73,
432
+ "eval_loss": 0.697826623916626,
433
+ "eval_runtime": 189.0268,
434
+ "eval_samples_per_second": 13.823,
435
+ "eval_steps_per_second": 0.434,
436
+ "eval_wer": 0.6577479076845042,
437
+ "step": 5500
438
+ },
439
+ {
440
+ "epoch": 31.28,
441
+ "learning_rate": 0.0007919591836734694,
442
+ "loss": 0.46,
443
+ "step": 5600
444
+ },
445
+ {
446
+ "epoch": 31.84,
447
+ "learning_rate": 0.0007878775510204081,
448
+ "loss": 0.4577,
449
+ "step": 5700
450
+ },
451
+ {
452
+ "epoch": 32.4,
453
+ "learning_rate": 0.000783795918367347,
454
+ "loss": 0.4502,
455
+ "step": 5800
456
+ },
457
+ {
458
+ "epoch": 32.96,
459
+ "learning_rate": 0.0007797142857142857,
460
+ "loss": 0.4395,
461
+ "step": 5900
462
+ },
463
+ {
464
+ "epoch": 33.52,
465
+ "learning_rate": 0.0007756326530612245,
466
+ "loss": 0.4385,
467
+ "step": 6000
468
+ },
469
+ {
470
+ "epoch": 33.52,
471
+ "eval_loss": 0.5994424223899841,
472
+ "eval_runtime": 190.5342,
473
+ "eval_samples_per_second": 13.714,
474
+ "eval_steps_per_second": 0.43,
475
+ "eval_wer": 0.6565115394369769,
476
+ "step": 6000
477
+ },
478
+ {
479
+ "epoch": 34.08,
480
+ "learning_rate": 0.0007715510204081633,
481
+ "loss": 0.4173,
482
+ "step": 6100
483
+ },
484
+ {
485
+ "epoch": 34.64,
486
+ "learning_rate": 0.0007674693877551021,
487
+ "loss": 0.4144,
488
+ "step": 6200
489
+ },
490
+ {
491
+ "epoch": 35.2,
492
+ "learning_rate": 0.0007633877551020408,
493
+ "loss": 0.3906,
494
+ "step": 6300
495
+ },
496
+ {
497
+ "epoch": 35.75,
498
+ "learning_rate": 0.0007593061224489795,
499
+ "loss": 0.4085,
500
+ "step": 6400
501
+ },
502
+ {
503
+ "epoch": 36.31,
504
+ "learning_rate": 0.0007552244897959184,
505
+ "loss": 0.396,
506
+ "step": 6500
507
+ },
508
+ {
509
+ "epoch": 36.31,
510
+ "eval_loss": 0.6170300841331482,
511
+ "eval_runtime": 190.0522,
512
+ "eval_samples_per_second": 13.749,
513
+ "eval_steps_per_second": 0.431,
514
+ "eval_wer": 0.6257608419984784,
515
+ "step": 6500
516
+ },
517
+ {
518
+ "epoch": 36.87,
519
+ "learning_rate": 0.0007511428571428571,
520
+ "loss": 0.3864,
521
+ "step": 6600
522
+ },
523
+ {
524
+ "epoch": 37.43,
525
+ "learning_rate": 0.0007470612244897959,
526
+ "loss": 0.3864,
527
+ "step": 6700
528
+ },
529
+ {
530
+ "epoch": 37.99,
531
+ "learning_rate": 0.0007429795918367347,
532
+ "loss": 0.3975,
533
+ "step": 6800
534
+ },
535
+ {
536
+ "epoch": 38.55,
537
+ "learning_rate": 0.0007388979591836735,
538
+ "loss": 0.3873,
539
+ "step": 6900
540
+ },
541
+ {
542
+ "epoch": 39.11,
543
+ "learning_rate": 0.0007348163265306122,
544
+ "loss": 0.3861,
545
+ "step": 7000
546
+ },
547
+ {
548
+ "epoch": 39.11,
549
+ "eval_loss": 0.6486256718635559,
550
+ "eval_runtime": 191.5175,
551
+ "eval_samples_per_second": 13.644,
552
+ "eval_steps_per_second": 0.428,
553
+ "eval_wer": 0.6216713162566574,
554
+ "step": 7000
555
+ },
556
+ {
557
+ "epoch": 39.66,
558
+ "learning_rate": 0.0007307346938775509,
559
+ "loss": 0.3629,
560
+ "step": 7100
561
+ },
562
+ {
563
+ "epoch": 40.22,
564
+ "learning_rate": 0.0007266530612244898,
565
+ "loss": 0.3545,
566
+ "step": 7200
567
+ },
568
+ {
569
+ "epoch": 40.78,
570
+ "learning_rate": 0.0007225714285714285,
571
+ "loss": 0.381,
572
+ "step": 7300
573
+ },
574
+ {
575
+ "epoch": 41.34,
576
+ "learning_rate": 0.0007184897959183673,
577
+ "loss": 0.3679,
578
+ "step": 7400
579
+ },
580
+ {
581
+ "epoch": 41.9,
582
+ "learning_rate": 0.0007144081632653061,
583
+ "loss": 0.3602,
584
+ "step": 7500
585
+ },
586
+ {
587
+ "epoch": 41.9,
588
+ "eval_loss": 0.6508038640022278,
589
+ "eval_runtime": 188.3071,
590
+ "eval_samples_per_second": 13.876,
591
+ "eval_steps_per_second": 0.435,
592
+ "eval_wer": 0.6114633527770733,
593
+ "step": 7500
594
+ },
595
+ {
596
+ "epoch": 42.46,
597
+ "learning_rate": 0.000710326530612245,
598
+ "loss": 0.3431,
599
+ "step": 7600
600
+ },
601
+ {
602
+ "epoch": 43.02,
603
+ "learning_rate": 0.0007062448979591836,
604
+ "loss": 0.3555,
605
+ "step": 7700
606
+ },
607
+ {
608
+ "epoch": 43.58,
609
+ "learning_rate": 0.0007021632653061226,
610
+ "loss": 0.3327,
611
+ "step": 7800
612
+ },
613
+ {
614
+ "epoch": 44.13,
615
+ "learning_rate": 0.0006980816326530613,
616
+ "loss": 0.3327,
617
+ "step": 7900
618
+ },
619
+ {
620
+ "epoch": 44.69,
621
+ "learning_rate": 0.000694,
622
+ "loss": 0.3251,
623
+ "step": 8000
624
+ },
625
+ {
626
+ "epoch": 44.69,
627
+ "eval_loss": 0.7021898031234741,
628
+ "eval_runtime": 189.1301,
629
+ "eval_samples_per_second": 13.816,
630
+ "eval_steps_per_second": 0.434,
631
+ "eval_wer": 0.625317017499366,
632
+ "step": 8000
633
+ },
634
+ {
635
+ "epoch": 45.25,
636
+ "learning_rate": 0.0006899183673469388,
637
+ "loss": 0.3403,
638
+ "step": 8100
639
+ },
640
+ {
641
+ "epoch": 45.81,
642
+ "learning_rate": 0.0006858367346938776,
643
+ "loss": 0.3242,
644
+ "step": 8200
645
+ },
646
+ {
647
+ "epoch": 46.37,
648
+ "learning_rate": 0.0006817551020408164,
649
+ "loss": 0.316,
650
+ "step": 8300
651
+ },
652
+ {
653
+ "epoch": 46.93,
654
+ "learning_rate": 0.0006776734693877551,
655
+ "loss": 0.3113,
656
+ "step": 8400
657
+ },
658
+ {
659
+ "epoch": 47.49,
660
+ "learning_rate": 0.000673591836734694,
661
+ "loss": 0.3197,
662
+ "step": 8500
663
+ },
664
+ {
665
+ "epoch": 47.49,
666
+ "eval_loss": 0.7705556750297546,
667
+ "eval_runtime": 188.475,
668
+ "eval_samples_per_second": 13.864,
669
+ "eval_steps_per_second": 0.435,
670
+ "eval_wer": 0.621544509256911,
671
+ "step": 8500
672
+ },
673
+ {
674
+ "epoch": 48.04,
675
+ "learning_rate": 0.0006695102040816327,
676
+ "loss": 0.3188,
677
+ "step": 8600
678
+ },
679
+ {
680
+ "epoch": 48.6,
681
+ "learning_rate": 0.0006654285714285715,
682
+ "loss": 0.3134,
683
+ "step": 8700
684
+ },
685
+ {
686
+ "epoch": 49.16,
687
+ "learning_rate": 0.0006613469387755102,
688
+ "loss": 0.3093,
689
+ "step": 8800
690
+ },
691
+ {
692
+ "epoch": 49.72,
693
+ "learning_rate": 0.000657265306122449,
694
+ "loss": 0.2872,
695
+ "step": 8900
696
+ },
697
+ {
698
+ "epoch": 50.28,
699
+ "learning_rate": 0.0006531836734693878,
700
+ "loss": 0.3013,
701
+ "step": 9000
702
+ },
703
+ {
704
+ "epoch": 50.28,
705
+ "eval_loss": 0.6418728232383728,
706
+ "eval_runtime": 189.3407,
707
+ "eval_samples_per_second": 13.801,
708
+ "eval_steps_per_second": 0.433,
709
+ "eval_wer": 0.5998922140502155,
710
+ "step": 9000
711
+ },
712
+ {
713
+ "epoch": 50.84,
714
+ "learning_rate": 0.0006491020408163265,
715
+ "loss": 0.2866,
716
+ "step": 9100
717
+ },
718
+ {
719
+ "epoch": 51.4,
720
+ "learning_rate": 0.0006450204081632654,
721
+ "loss": 0.2794,
722
+ "step": 9200
723
+ },
724
+ {
725
+ "epoch": 51.96,
726
+ "learning_rate": 0.0006409387755102041,
727
+ "loss": 0.2842,
728
+ "step": 9300
729
+ },
730
+ {
731
+ "epoch": 52.51,
732
+ "learning_rate": 0.0006368571428571429,
733
+ "loss": 0.2894,
734
+ "step": 9400
735
+ },
736
+ {
737
+ "epoch": 53.07,
738
+ "learning_rate": 0.0006327755102040816,
739
+ "loss": 0.2813,
740
+ "step": 9500
741
+ },
742
+ {
743
+ "epoch": 53.07,
744
+ "eval_loss": 0.6907714009284973,
745
+ "eval_runtime": 188.5038,
746
+ "eval_samples_per_second": 13.862,
747
+ "eval_steps_per_second": 0.435,
748
+ "eval_wer": 0.595929495308141,
749
+ "step": 9500
750
+ },
751
+ {
752
+ "epoch": 53.63,
753
+ "learning_rate": 0.0006286938775510205,
754
+ "loss": 0.2846,
755
+ "step": 9600
756
+ },
757
+ {
758
+ "epoch": 54.19,
759
+ "learning_rate": 0.0006246122448979592,
760
+ "loss": 0.2784,
761
+ "step": 9700
762
+ },
763
+ {
764
+ "epoch": 54.75,
765
+ "learning_rate": 0.0006205306122448979,
766
+ "loss": 0.2841,
767
+ "step": 9800
768
+ },
769
+ {
770
+ "epoch": 55.31,
771
+ "learning_rate": 0.0006164489795918368,
772
+ "loss": 0.2838,
773
+ "step": 9900
774
+ },
775
+ {
776
+ "epoch": 55.87,
777
+ "learning_rate": 0.0006123673469387755,
778
+ "loss": 0.286,
779
+ "step": 10000
780
+ },
781
+ {
782
+ "epoch": 55.87,
783
+ "eval_loss": 0.7150660157203674,
784
+ "eval_runtime": 188.1197,
785
+ "eval_samples_per_second": 13.89,
786
+ "eval_steps_per_second": 0.436,
787
+ "eval_wer": 0.5916180573167639,
788
+ "step": 10000
789
+ },
790
+ {
791
+ "epoch": 56.42,
792
+ "learning_rate": 0.0006082857142857143,
793
+ "loss": 0.2831,
794
+ "step": 10100
795
+ },
796
+ {
797
+ "epoch": 56.98,
798
+ "learning_rate": 0.000604204081632653,
799
+ "loss": 0.2738,
800
+ "step": 10200
801
+ },
802
+ {
803
+ "epoch": 57.54,
804
+ "learning_rate": 0.0006001224489795919,
805
+ "loss": 0.2613,
806
+ "step": 10300
807
+ },
808
+ {
809
+ "epoch": 58.1,
810
+ "learning_rate": 0.0005960816326530613,
811
+ "loss": 0.2645,
812
+ "step": 10400
813
+ },
814
+ {
815
+ "epoch": 58.66,
816
+ "learning_rate": 0.000592,
817
+ "loss": 0.2645,
818
+ "step": 10500
819
+ },
820
+ {
821
+ "epoch": 58.66,
822
+ "eval_loss": 0.7181155681610107,
823
+ "eval_runtime": 192.5668,
824
+ "eval_samples_per_second": 13.569,
825
+ "eval_steps_per_second": 0.426,
826
+ "eval_wer": 0.5860068475779863,
827
+ "step": 10500
828
+ },
829
+ {
830
+ "epoch": 59.22,
831
+ "learning_rate": 0.0005879183673469388,
832
+ "loss": 0.2501,
833
+ "step": 10600
834
+ },
835
+ {
836
+ "epoch": 59.78,
837
+ "learning_rate": 0.0005838367346938776,
838
+ "loss": 0.2612,
839
+ "step": 10700
840
+ },
841
+ {
842
+ "epoch": 60.34,
843
+ "learning_rate": 0.0005797551020408164,
844
+ "loss": 0.2541,
845
+ "step": 10800
846
+ },
847
+ {
848
+ "epoch": 60.89,
849
+ "learning_rate": 0.0005756734693877551,
850
+ "loss": 0.2496,
851
+ "step": 10900
852
+ },
853
+ {
854
+ "epoch": 61.45,
855
+ "learning_rate": 0.0005715918367346939,
856
+ "loss": 0.2535,
857
+ "step": 11000
858
+ },
859
+ {
860
+ "epoch": 61.45,
861
+ "eval_loss": 0.7877444624900818,
862
+ "eval_runtime": 190.8548,
863
+ "eval_samples_per_second": 13.691,
864
+ "eval_steps_per_second": 0.43,
865
+ "eval_wer": 0.5979267055541466,
866
+ "step": 11000
867
+ },
868
+ {
869
+ "epoch": 62.01,
870
+ "learning_rate": 0.0005675102040816327,
871
+ "loss": 0.2619,
872
+ "step": 11100
873
+ },
874
+ {
875
+ "epoch": 62.57,
876
+ "learning_rate": 0.0005634285714285714,
877
+ "loss": 0.2495,
878
+ "step": 11200
879
+ },
880
+ {
881
+ "epoch": 63.13,
882
+ "learning_rate": 0.0005593469387755102,
883
+ "loss": 0.2521,
884
+ "step": 11300
885
+ },
886
+ {
887
+ "epoch": 63.69,
888
+ "learning_rate": 0.000555265306122449,
889
+ "loss": 0.2326,
890
+ "step": 11400
891
+ },
892
+ {
893
+ "epoch": 64.25,
894
+ "learning_rate": 0.0005511836734693878,
895
+ "loss": 0.247,
896
+ "step": 11500
897
+ },
898
+ {
899
+ "epoch": 64.25,
900
+ "eval_loss": 0.8198513984680176,
901
+ "eval_runtime": 187.6839,
902
+ "eval_samples_per_second": 13.922,
903
+ "eval_steps_per_second": 0.437,
904
+ "eval_wer": 0.6128899315242201,
905
+ "step": 11500
906
+ },
907
+ {
908
+ "epoch": 64.8,
909
+ "learning_rate": 0.0005471020408163265,
910
+ "loss": 0.249,
911
+ "step": 11600
912
+ },
913
+ {
914
+ "epoch": 65.36,
915
+ "learning_rate": 0.0005430204081632654,
916
+ "loss": 0.2463,
917
+ "step": 11700
918
+ },
919
+ {
920
+ "epoch": 65.92,
921
+ "learning_rate": 0.0005389387755102041,
922
+ "loss": 0.2333,
923
+ "step": 11800
924
+ },
925
+ {
926
+ "epoch": 66.48,
927
+ "learning_rate": 0.0005348571428571428,
928
+ "loss": 0.2381,
929
+ "step": 11900
930
+ },
931
+ {
932
+ "epoch": 67.04,
933
+ "learning_rate": 0.0005307755102040816,
934
+ "loss": 0.2412,
935
+ "step": 12000
936
+ },
937
+ {
938
+ "epoch": 67.04,
939
+ "eval_loss": 0.767884373664856,
940
+ "eval_runtime": 188.3627,
941
+ "eval_samples_per_second": 13.872,
942
+ "eval_steps_per_second": 0.435,
943
+ "eval_wer": 0.5884161805731677,
944
+ "step": 12000
945
+ },
946
+ {
947
+ "epoch": 67.6,
948
+ "learning_rate": 0.0005266938775510204,
949
+ "loss": 0.2285,
950
+ "step": 12100
951
+ },
952
+ {
953
+ "epoch": 68.16,
954
+ "learning_rate": 0.0005226122448979592,
955
+ "loss": 0.241,
956
+ "step": 12200
957
+ },
958
+ {
959
+ "epoch": 68.72,
960
+ "learning_rate": 0.0005185306122448979,
961
+ "loss": 0.229,
962
+ "step": 12300
963
+ },
964
+ {
965
+ "epoch": 69.27,
966
+ "learning_rate": 0.0005144489795918368,
967
+ "loss": 0.2371,
968
+ "step": 12400
969
+ },
970
+ {
971
+ "epoch": 69.83,
972
+ "learning_rate": 0.0005103673469387755,
973
+ "loss": 0.2404,
974
+ "step": 12500
975
+ },
976
+ {
977
+ "epoch": 69.83,
978
+ "eval_loss": 0.7266025543212891,
979
+ "eval_runtime": 190.7393,
980
+ "eval_samples_per_second": 13.699,
981
+ "eval_steps_per_second": 0.43,
982
+ "eval_wer": 0.581632006086736,
983
+ "step": 12500
984
+ },
985
+ {
986
+ "epoch": 70.39,
987
+ "learning_rate": 0.0005062857142857143,
988
+ "loss": 0.2315,
989
+ "step": 12600
990
+ },
991
+ {
992
+ "epoch": 70.95,
993
+ "learning_rate": 0.0005022040816326531,
994
+ "loss": 0.2305,
995
+ "step": 12700
996
+ },
997
+ {
998
+ "epoch": 71.51,
999
+ "learning_rate": 0.0004981224489795918,
1000
+ "loss": 0.2307,
1001
+ "step": 12800
1002
+ },
1003
+ {
1004
+ "epoch": 72.07,
1005
+ "learning_rate": 0.0004940408163265306,
1006
+ "loss": 0.2204,
1007
+ "step": 12900
1008
+ },
1009
+ {
1010
+ "epoch": 72.63,
1011
+ "learning_rate": 0.00049,
1012
+ "loss": 0.2293,
1013
+ "step": 13000
1014
+ },
1015
+ {
1016
+ "epoch": 72.63,
1017
+ "eval_loss": 0.792820394039154,
1018
+ "eval_runtime": 188.6653,
1019
+ "eval_samples_per_second": 13.85,
1020
+ "eval_steps_per_second": 0.435,
1021
+ "eval_wer": 0.5794762870910474,
1022
+ "step": 13000
1023
+ },
1024
+ {
1025
+ "epoch": 73.18,
1026
+ "learning_rate": 0.0004859183673469388,
1027
+ "loss": 0.2185,
1028
+ "step": 13100
1029
+ },
1030
+ {
1031
+ "epoch": 73.74,
1032
+ "learning_rate": 0.00048183673469387754,
1033
+ "loss": 0.2126,
1034
+ "step": 13200
1035
+ },
1036
+ {
1037
+ "epoch": 74.3,
1038
+ "learning_rate": 0.00047775510204081634,
1039
+ "loss": 0.2177,
1040
+ "step": 13300
1041
+ },
1042
+ {
1043
+ "epoch": 74.86,
1044
+ "learning_rate": 0.00047367346938775515,
1045
+ "loss": 0.2204,
1046
+ "step": 13400
1047
+ },
1048
+ {
1049
+ "epoch": 75.42,
1050
+ "learning_rate": 0.0004695918367346939,
1051
+ "loss": 0.2176,
1052
+ "step": 13500
1053
+ },
1054
+ {
1055
+ "epoch": 75.42,
1056
+ "eval_loss": 0.7916468381881714,
1057
+ "eval_runtime": 189.1525,
1058
+ "eval_samples_per_second": 13.814,
1059
+ "eval_steps_per_second": 0.434,
1060
+ "eval_wer": 0.5845802688308395,
1061
+ "step": 13500
1062
+ },
1063
+ {
1064
+ "epoch": 75.98,
1065
+ "learning_rate": 0.00046551020408163265,
1066
+ "loss": 0.2119,
1067
+ "step": 13600
1068
+ },
1069
+ {
1070
+ "epoch": 76.54,
1071
+ "learning_rate": 0.0004614285714285714,
1072
+ "loss": 0.2149,
1073
+ "step": 13700
1074
+ },
1075
+ {
1076
+ "epoch": 77.09,
1077
+ "learning_rate": 0.0004573469387755102,
1078
+ "loss": 0.214,
1079
+ "step": 13800
1080
+ },
1081
+ {
1082
+ "epoch": 77.65,
1083
+ "learning_rate": 0.000453265306122449,
1084
+ "loss": 0.2095,
1085
+ "step": 13900
1086
+ },
1087
+ {
1088
+ "epoch": 78.21,
1089
+ "learning_rate": 0.00044918367346938776,
1090
+ "loss": 0.2143,
1091
+ "step": 14000
1092
+ },
1093
+ {
1094
+ "epoch": 78.21,
1095
+ "eval_loss": 0.7954298853874207,
1096
+ "eval_runtime": 187.5182,
1097
+ "eval_samples_per_second": 13.935,
1098
+ "eval_steps_per_second": 0.437,
1099
+ "eval_wer": 0.5764646208470707,
1100
+ "step": 14000
1101
+ },
1102
+ {
1103
+ "epoch": 78.77,
1104
+ "learning_rate": 0.00044510204081632656,
1105
+ "loss": 0.2182,
1106
+ "step": 14100
1107
+ },
1108
+ {
1109
+ "epoch": 79.33,
1110
+ "learning_rate": 0.0004410204081632653,
1111
+ "loss": 0.2042,
1112
+ "step": 14200
1113
+ },
1114
+ {
1115
+ "epoch": 79.89,
1116
+ "learning_rate": 0.0004369387755102041,
1117
+ "loss": 0.2184,
1118
+ "step": 14300
1119
+ },
1120
+ {
1121
+ "epoch": 80.45,
1122
+ "learning_rate": 0.00043285714285714287,
1123
+ "loss": 0.2104,
1124
+ "step": 14400
1125
+ },
1126
+ {
1127
+ "epoch": 81.01,
1128
+ "learning_rate": 0.0004287755102040816,
1129
+ "loss": 0.2185,
1130
+ "step": 14500
1131
+ },
1132
+ {
1133
+ "epoch": 81.01,
1134
+ "eval_loss": 0.8317196369171143,
1135
+ "eval_runtime": 188.0902,
1136
+ "eval_samples_per_second": 13.892,
1137
+ "eval_steps_per_second": 0.436,
1138
+ "eval_wer": 0.5906987065686026,
1139
+ "step": 14500
1140
+ },
1141
+ {
1142
+ "epoch": 81.56,
1143
+ "learning_rate": 0.0004246938775510204,
1144
+ "loss": 0.2026,
1145
+ "step": 14600
1146
+ },
1147
+ {
1148
+ "epoch": 82.12,
1149
+ "learning_rate": 0.0004206122448979592,
1150
+ "loss": 0.1972,
1151
+ "step": 14700
1152
+ },
1153
+ {
1154
+ "epoch": 82.68,
1155
+ "learning_rate": 0.000416530612244898,
1156
+ "loss": 0.2083,
1157
+ "step": 14800
1158
+ },
1159
+ {
1160
+ "epoch": 83.24,
1161
+ "learning_rate": 0.00041244897959183673,
1162
+ "loss": 0.2132,
1163
+ "step": 14900
1164
+ },
1165
+ {
1166
+ "epoch": 83.8,
1167
+ "learning_rate": 0.00040836734693877553,
1168
+ "loss": 0.2057,
1169
+ "step": 15000
1170
+ },
1171
+ {
1172
+ "epoch": 83.8,
1173
+ "eval_loss": 0.8015716075897217,
1174
+ "eval_runtime": 188.5358,
1175
+ "eval_samples_per_second": 13.859,
1176
+ "eval_steps_per_second": 0.435,
1177
+ "eval_wer": 0.5850557950798884,
1178
+ "step": 15000
1179
+ },
1180
+ {
1181
+ "epoch": 84.36,
1182
+ "learning_rate": 0.0004042857142857143,
1183
+ "loss": 0.2027,
1184
+ "step": 15100
1185
+ },
1186
+ {
1187
+ "epoch": 84.92,
1188
+ "learning_rate": 0.0004002040816326531,
1189
+ "loss": 0.2016,
1190
+ "step": 15200
1191
+ },
1192
+ {
1193
+ "epoch": 85.47,
1194
+ "learning_rate": 0.00039612244897959184,
1195
+ "loss": 0.1935,
1196
+ "step": 15300
1197
+ },
1198
+ {
1199
+ "epoch": 86.03,
1200
+ "learning_rate": 0.0003920408163265306,
1201
+ "loss": 0.1948,
1202
+ "step": 15400
1203
+ },
1204
+ {
1205
+ "epoch": 86.59,
1206
+ "learning_rate": 0.0003879591836734694,
1207
+ "loss": 0.1895,
1208
+ "step": 15500
1209
+ },
1210
+ {
1211
+ "epoch": 86.59,
1212
+ "eval_loss": 0.8080111145973206,
1213
+ "eval_runtime": 192.0878,
1214
+ "eval_samples_per_second": 13.603,
1215
+ "eval_steps_per_second": 0.427,
1216
+ "eval_wer": 0.5679368501141263,
1217
+ "step": 15500
1218
+ },
1219
+ {
1220
+ "epoch": 87.15,
1221
+ "learning_rate": 0.00038387755102040815,
1222
+ "loss": 0.1894,
1223
+ "step": 15600
1224
+ },
1225
+ {
1226
+ "epoch": 87.71,
1227
+ "learning_rate": 0.00037979591836734695,
1228
+ "loss": 0.1798,
1229
+ "step": 15700
1230
+ },
1231
+ {
1232
+ "epoch": 88.27,
1233
+ "learning_rate": 0.00037571428571428575,
1234
+ "loss": 0.1891,
1235
+ "step": 15800
1236
+ },
1237
+ {
1238
+ "epoch": 88.83,
1239
+ "learning_rate": 0.0003716326530612245,
1240
+ "loss": 0.1916,
1241
+ "step": 15900
1242
+ },
1243
+ {
1244
+ "epoch": 89.39,
1245
+ "learning_rate": 0.0003675510204081633,
1246
+ "loss": 0.1883,
1247
+ "step": 16000
1248
+ },
1249
+ {
1250
+ "epoch": 89.39,
1251
+ "eval_loss": 0.8103044033050537,
1252
+ "eval_runtime": 192.0932,
1253
+ "eval_samples_per_second": 13.603,
1254
+ "eval_steps_per_second": 0.427,
1255
+ "eval_wer": 0.5712338321075323,
1256
+ "step": 16000
1257
+ },
1258
+ {
1259
+ "epoch": 89.94,
1260
+ "learning_rate": 0.000363469387755102,
1261
+ "loss": 0.1774,
1262
+ "step": 16100
1263
+ },
1264
+ {
1265
+ "epoch": 90.5,
1266
+ "learning_rate": 0.0003593877551020408,
1267
+ "loss": 0.1831,
1268
+ "step": 16200
1269
+ },
1270
+ {
1271
+ "epoch": 91.06,
1272
+ "learning_rate": 0.00035530612244897956,
1273
+ "loss": 0.1858,
1274
+ "step": 16300
1275
+ },
1276
+ {
1277
+ "epoch": 91.62,
1278
+ "learning_rate": 0.00035122448979591837,
1279
+ "loss": 0.1863,
1280
+ "step": 16400
1281
+ },
1282
+ {
1283
+ "epoch": 92.18,
1284
+ "learning_rate": 0.00034714285714285717,
1285
+ "loss": 0.1802,
1286
+ "step": 16500
1287
+ },
1288
+ {
1289
+ "epoch": 92.18,
1290
+ "eval_loss": 0.8383206129074097,
1291
+ "eval_runtime": 187.0696,
1292
+ "eval_samples_per_second": 13.968,
1293
+ "eval_steps_per_second": 0.438,
1294
+ "eval_wer": 0.5644496576211007,
1295
+ "step": 16500
1296
+ },
1297
+ {
1298
+ "epoch": 92.74,
1299
+ "learning_rate": 0.0003430612244897959,
1300
+ "loss": 0.1818,
1301
+ "step": 16600
1302
+ },
1303
+ {
1304
+ "epoch": 93.3,
1305
+ "learning_rate": 0.0003389795918367347,
1306
+ "loss": 0.1805,
1307
+ "step": 16700
1308
+ },
1309
+ {
1310
+ "epoch": 93.85,
1311
+ "learning_rate": 0.0003348979591836735,
1312
+ "loss": 0.1784,
1313
+ "step": 16800
1314
+ },
1315
+ {
1316
+ "epoch": 94.41,
1317
+ "learning_rate": 0.0003308163265306123,
1318
+ "loss": 0.1731,
1319
+ "step": 16900
1320
+ },
1321
+ {
1322
+ "epoch": 94.97,
1323
+ "learning_rate": 0.000326734693877551,
1324
+ "loss": 0.1826,
1325
+ "step": 17000
1326
+ },
1327
+ {
1328
+ "epoch": 94.97,
1329
+ "eval_loss": 0.8798549771308899,
1330
+ "eval_runtime": 188.9431,
1331
+ "eval_samples_per_second": 13.83,
1332
+ "eval_steps_per_second": 0.434,
1333
+ "eval_wer": 0.5657494293685011,
1334
+ "step": 17000
1335
+ },
1336
+ {
1337
+ "epoch": 95.53,
1338
+ "learning_rate": 0.0003226530612244898,
1339
+ "loss": 0.1733,
1340
+ "step": 17100
1341
+ },
1342
+ {
1343
+ "epoch": 96.09,
1344
+ "learning_rate": 0.0003185714285714286,
1345
+ "loss": 0.1723,
1346
+ "step": 17200
1347
+ },
1348
+ {
1349
+ "epoch": 96.65,
1350
+ "learning_rate": 0.00031448979591836734,
1351
+ "loss": 0.1693,
1352
+ "step": 17300
1353
+ },
1354
+ {
1355
+ "epoch": 97.21,
1356
+ "learning_rate": 0.00031044897959183674,
1357
+ "loss": 0.1705,
1358
+ "step": 17400
1359
+ },
1360
+ {
1361
+ "epoch": 97.77,
1362
+ "learning_rate": 0.0003063673469387755,
1363
+ "loss": 0.1717,
1364
+ "step": 17500
1365
+ },
1366
+ {
1367
+ "epoch": 97.77,
1368
+ "eval_loss": 0.8619566559791565,
1369
+ "eval_runtime": 188.0319,
1370
+ "eval_samples_per_second": 13.897,
1371
+ "eval_steps_per_second": 0.436,
1372
+ "eval_wer": 0.5709168146081663,
1373
+ "step": 17500
1374
+ },
1375
+ {
1376
+ "epoch": 98.32,
1377
+ "learning_rate": 0.0003022857142857143,
1378
+ "loss": 0.1616,
1379
+ "step": 17600
1380
+ },
1381
+ {
1382
+ "epoch": 98.88,
1383
+ "learning_rate": 0.00029820408163265305,
1384
+ "loss": 0.1653,
1385
+ "step": 17700
1386
+ },
1387
+ {
1388
+ "epoch": 99.44,
1389
+ "learning_rate": 0.00029412244897959185,
1390
+ "loss": 0.1708,
1391
+ "step": 17800
1392
+ },
1393
+ {
1394
+ "epoch": 100.0,
1395
+ "learning_rate": 0.0002900408163265306,
1396
+ "loss": 0.1681,
1397
+ "step": 17900
1398
+ },
1399
+ {
1400
+ "epoch": 100.56,
1401
+ "learning_rate": 0.00028599999999999996,
1402
+ "loss": 0.1701,
1403
+ "step": 18000
1404
+ },
1405
+ {
1406
+ "epoch": 100.56,
1407
+ "eval_loss": 0.8717033267021179,
1408
+ "eval_runtime": 192.2586,
1409
+ "eval_samples_per_second": 13.591,
1410
+ "eval_steps_per_second": 0.427,
1411
+ "eval_wer": 0.5661932538676135,
1412
+ "step": 18000
1413
+ },
1414
+ {
1415
+ "epoch": 101.12,
1416
+ "learning_rate": 0.00028191836734693876,
1417
+ "loss": 0.1614,
1418
+ "step": 18100
1419
+ },
1420
+ {
1421
+ "epoch": 101.68,
1422
+ "learning_rate": 0.00027783673469387757,
1423
+ "loss": 0.1641,
1424
+ "step": 18200
1425
+ },
1426
+ {
1427
+ "epoch": 102.23,
1428
+ "learning_rate": 0.0002737551020408163,
1429
+ "loss": 0.1615,
1430
+ "step": 18300
1431
+ },
1432
+ {
1433
+ "epoch": 102.79,
1434
+ "learning_rate": 0.0002696734693877551,
1435
+ "loss": 0.1588,
1436
+ "step": 18400
1437
+ },
1438
+ {
1439
+ "epoch": 103.35,
1440
+ "learning_rate": 0.0002655918367346939,
1441
+ "loss": 0.1623,
1442
+ "step": 18500
1443
+ },
1444
+ {
1445
+ "epoch": 103.35,
1446
+ "eval_loss": 0.8533861041069031,
1447
+ "eval_runtime": 187.659,
1448
+ "eval_samples_per_second": 13.924,
1449
+ "eval_steps_per_second": 0.437,
1450
+ "eval_wer": 0.5594090793811819,
1451
+ "step": 18500
1452
+ },
1453
+ {
1454
+ "epoch": 103.91,
1455
+ "learning_rate": 0.0002615102040816327,
1456
+ "loss": 0.1566,
1457
+ "step": 18600
1458
+ },
1459
+ {
1460
+ "epoch": 104.47,
1461
+ "learning_rate": 0.0002574285714285715,
1462
+ "loss": 0.1528,
1463
+ "step": 18700
1464
+ },
1465
+ {
1466
+ "epoch": 105.03,
1467
+ "learning_rate": 0.0002533469387755102,
1468
+ "loss": 0.1538,
1469
+ "step": 18800
1470
+ },
1471
+ {
1472
+ "epoch": 105.59,
1473
+ "learning_rate": 0.000249265306122449,
1474
+ "loss": 0.1557,
1475
+ "step": 18900
1476
+ },
1477
+ {
1478
+ "epoch": 106.15,
1479
+ "learning_rate": 0.00024518367346938773,
1480
+ "loss": 0.158,
1481
+ "step": 19000
1482
+ },
1483
+ {
1484
+ "epoch": 106.15,
1485
+ "eval_loss": 0.8595470786094666,
1486
+ "eval_runtime": 194.975,
1487
+ "eval_samples_per_second": 13.402,
1488
+ "eval_steps_per_second": 0.421,
1489
+ "eval_wer": 0.5546221151407558,
1490
+ "step": 19000
1491
+ },
1492
+ {
1493
+ "epoch": 106.7,
1494
+ "learning_rate": 0.00024110204081632654,
1495
+ "loss": 0.1459,
1496
+ "step": 19100
1497
+ },
1498
+ {
1499
+ "epoch": 107.26,
1500
+ "learning_rate": 0.00023702040816326532,
1501
+ "loss": 0.152,
1502
+ "step": 19200
1503
+ },
1504
+ {
1505
+ "epoch": 107.82,
1506
+ "learning_rate": 0.0002329387755102041,
1507
+ "loss": 0.1533,
1508
+ "step": 19300
1509
+ },
1510
+ {
1511
+ "epoch": 108.38,
1512
+ "learning_rate": 0.00022885714285714287,
1513
+ "loss": 0.146,
1514
+ "step": 19400
1515
+ },
1516
+ {
1517
+ "epoch": 108.94,
1518
+ "learning_rate": 0.00022477551020408162,
1519
+ "loss": 0.1508,
1520
+ "step": 19500
1521
+ },
1522
+ {
1523
+ "epoch": 108.94,
1524
+ "eval_loss": 0.8573695421218872,
1525
+ "eval_runtime": 191.3527,
1526
+ "eval_samples_per_second": 13.655,
1527
+ "eval_steps_per_second": 0.429,
1528
+ "eval_wer": 0.5544636063910728,
1529
+ "step": 19500
1530
+ },
1531
+ {
1532
+ "epoch": 109.5,
1533
+ "learning_rate": 0.0002206938775510204,
1534
+ "loss": 0.1454,
1535
+ "step": 19600
1536
+ },
1537
+ {
1538
+ "epoch": 110.06,
1539
+ "learning_rate": 0.0002166122448979592,
1540
+ "loss": 0.1466,
1541
+ "step": 19700
1542
+ },
1543
+ {
1544
+ "epoch": 110.61,
1545
+ "learning_rate": 0.00021253061224489798,
1546
+ "loss": 0.1416,
1547
+ "step": 19800
1548
+ },
1549
+ {
1550
+ "epoch": 111.17,
1551
+ "learning_rate": 0.00020844897959183673,
1552
+ "loss": 0.1444,
1553
+ "step": 19900
1554
+ },
1555
+ {
1556
+ "epoch": 111.73,
1557
+ "learning_rate": 0.0002043673469387755,
1558
+ "loss": 0.142,
1559
+ "step": 20000
1560
+ },
1561
+ {
1562
+ "epoch": 111.73,
1563
+ "eval_loss": 0.867087721824646,
1564
+ "eval_runtime": 187.948,
1565
+ "eval_samples_per_second": 13.903,
1566
+ "eval_steps_per_second": 0.436,
1567
+ "eval_wer": 0.5536710626426579,
1568
+ "step": 20000
1569
+ },
1570
+ {
1571
+ "epoch": 112.29,
1572
+ "learning_rate": 0.0002002857142857143,
1573
+ "loss": 0.1485,
1574
+ "step": 20100
1575
+ },
1576
+ {
1577
+ "epoch": 112.85,
1578
+ "learning_rate": 0.00019620408163265306,
1579
+ "loss": 0.1466,
1580
+ "step": 20200
1581
+ },
1582
+ {
1583
+ "epoch": 113.41,
1584
+ "learning_rate": 0.00019212244897959184,
1585
+ "loss": 0.1364,
1586
+ "step": 20300
1587
+ },
1588
+ {
1589
+ "epoch": 113.97,
1590
+ "learning_rate": 0.00018804081632653062,
1591
+ "loss": 0.1421,
1592
+ "step": 20400
1593
+ },
1594
+ {
1595
+ "epoch": 114.53,
1596
+ "learning_rate": 0.0001839591836734694,
1597
+ "loss": 0.1395,
1598
+ "step": 20500
1599
+ },
1600
+ {
1601
+ "epoch": 114.53,
1602
+ "eval_loss": 0.843588650226593,
1603
+ "eval_runtime": 191.7789,
1604
+ "eval_samples_per_second": 13.625,
1605
+ "eval_steps_per_second": 0.428,
1606
+ "eval_wer": 0.5524663961450672,
1607
+ "step": 20500
1608
+ },
1609
+ {
1610
+ "epoch": 115.08,
1611
+ "learning_rate": 0.00017987755102040817,
1612
+ "loss": 0.1409,
1613
+ "step": 20600
1614
+ },
1615
+ {
1616
+ "epoch": 115.64,
1617
+ "learning_rate": 0.00017579591836734692,
1618
+ "loss": 0.1356,
1619
+ "step": 20700
1620
+ },
1621
+ {
1622
+ "epoch": 116.2,
1623
+ "learning_rate": 0.0001717142857142857,
1624
+ "loss": 0.1364,
1625
+ "step": 20800
1626
+ },
1627
+ {
1628
+ "epoch": 116.76,
1629
+ "learning_rate": 0.0001676326530612245,
1630
+ "loss": 0.1328,
1631
+ "step": 20900
1632
+ },
1633
+ {
1634
+ "epoch": 117.32,
1635
+ "learning_rate": 0.0001635918367346939,
1636
+ "loss": 0.1373,
1637
+ "step": 21000
1638
+ },
1639
+ {
1640
+ "epoch": 117.32,
1641
+ "eval_loss": 0.8807795643806458,
1642
+ "eval_runtime": 186.8391,
1643
+ "eval_samples_per_second": 13.985,
1644
+ "eval_steps_per_second": 0.439,
1645
+ "eval_wer": 0.5481866599036267,
1646
+ "step": 21000
1647
+ },
1648
+ {
1649
+ "epoch": 117.88,
1650
+ "learning_rate": 0.00015951020408163267,
1651
+ "loss": 0.1316,
1652
+ "step": 21100
1653
+ },
1654
+ {
1655
+ "epoch": 118.44,
1656
+ "learning_rate": 0.00015542857142857142,
1657
+ "loss": 0.133,
1658
+ "step": 21200
1659
+ },
1660
+ {
1661
+ "epoch": 118.99,
1662
+ "learning_rate": 0.0001513469387755102,
1663
+ "loss": 0.1348,
1664
+ "step": 21300
1665
+ },
1666
+ {
1667
+ "epoch": 119.55,
1668
+ "learning_rate": 0.00014726530612244897,
1669
+ "loss": 0.1321,
1670
+ "step": 21400
1671
+ },
1672
+ {
1673
+ "epoch": 120.11,
1674
+ "learning_rate": 0.00014318367346938778,
1675
+ "loss": 0.1338,
1676
+ "step": 21500
1677
+ },
1678
+ {
1679
+ "epoch": 120.11,
1680
+ "eval_loss": 0.902350127696991,
1681
+ "eval_runtime": 188.1097,
1682
+ "eval_samples_per_second": 13.891,
1683
+ "eval_steps_per_second": 0.436,
1684
+ "eval_wer": 0.5418146081663708,
1685
+ "step": 21500
1686
+ },
1687
+ {
1688
+ "epoch": 120.67,
1689
+ "learning_rate": 0.00013910204081632655,
1690
+ "loss": 0.1248,
1691
+ "step": 21600
1692
+ },
1693
+ {
1694
+ "epoch": 121.23,
1695
+ "learning_rate": 0.0001350204081632653,
1696
+ "loss": 0.1267,
1697
+ "step": 21700
1698
+ },
1699
+ {
1700
+ "epoch": 121.79,
1701
+ "learning_rate": 0.00013093877551020408,
1702
+ "loss": 0.1282,
1703
+ "step": 21800
1704
+ },
1705
+ {
1706
+ "epoch": 122.35,
1707
+ "learning_rate": 0.00012685714285714286,
1708
+ "loss": 0.1284,
1709
+ "step": 21900
1710
+ },
1711
+ {
1712
+ "epoch": 122.91,
1713
+ "learning_rate": 0.00012277551020408164,
1714
+ "loss": 0.1278,
1715
+ "step": 22000
1716
+ },
1717
+ {
1718
+ "epoch": 122.91,
1719
+ "eval_loss": 0.9142583608627319,
1720
+ "eval_runtime": 187.6194,
1721
+ "eval_samples_per_second": 13.927,
1722
+ "eval_steps_per_second": 0.437,
1723
+ "eval_wer": 0.5408952574182094,
1724
+ "step": 22000
1725
+ },
1726
+ {
1727
+ "epoch": 123.46,
1728
+ "learning_rate": 0.00011869387755102041,
1729
+ "loss": 0.1292,
1730
+ "step": 22100
1731
+ },
1732
+ {
1733
+ "epoch": 124.02,
1734
+ "learning_rate": 0.00011461224489795918,
1735
+ "loss": 0.1201,
1736
+ "step": 22200
1737
+ },
1738
+ {
1739
+ "epoch": 124.58,
1740
+ "learning_rate": 0.00011053061224489797,
1741
+ "loss": 0.1249,
1742
+ "step": 22300
1743
+ },
1744
+ {
1745
+ "epoch": 125.14,
1746
+ "learning_rate": 0.00010644897959183673,
1747
+ "loss": 0.1251,
1748
+ "step": 22400
1749
+ },
1750
+ {
1751
+ "epoch": 125.7,
1752
+ "learning_rate": 0.00010236734693877551,
1753
+ "loss": 0.1207,
1754
+ "step": 22500
1755
+ },
1756
+ {
1757
+ "epoch": 125.7,
1758
+ "eval_loss": 0.8916703462600708,
1759
+ "eval_runtime": 187.4806,
1760
+ "eval_samples_per_second": 13.937,
1761
+ "eval_steps_per_second": 0.437,
1762
+ "eval_wer": 0.5357912756784174,
1763
+ "step": 22500
1764
+ },
1765
+ {
1766
+ "epoch": 126.26,
1767
+ "learning_rate": 9.828571428571429e-05,
1768
+ "loss": 0.1233,
1769
+ "step": 22600
1770
+ },
1771
+ {
1772
+ "epoch": 126.82,
1773
+ "learning_rate": 9.420408163265307e-05,
1774
+ "loss": 0.1217,
1775
+ "step": 22700
1776
+ },
1777
+ {
1778
+ "epoch": 127.37,
1779
+ "learning_rate": 9.012244897959183e-05,
1780
+ "loss": 0.1197,
1781
+ "step": 22800
1782
+ },
1783
+ {
1784
+ "epoch": 127.93,
1785
+ "learning_rate": 8.604081632653062e-05,
1786
+ "loss": 0.1167,
1787
+ "step": 22900
1788
+ },
1789
+ {
1790
+ "epoch": 128.49,
1791
+ "learning_rate": 8.195918367346938e-05,
1792
+ "loss": 0.1203,
1793
+ "step": 23000
1794
+ },
1795
+ {
1796
+ "epoch": 128.49,
1797
+ "eval_loss": 0.9041045904159546,
1798
+ "eval_runtime": 186.9588,
1799
+ "eval_samples_per_second": 13.976,
1800
+ "eval_steps_per_second": 0.439,
1801
+ "eval_wer": 0.5341110829317778,
1802
+ "step": 23000
1803
+ },
1804
+ {
1805
+ "epoch": 129.05,
1806
+ "learning_rate": 7.791836734693878e-05,
1807
+ "loss": 0.1199,
1808
+ "step": 23100
1809
+ },
1810
+ {
1811
+ "epoch": 129.61,
1812
+ "learning_rate": 7.383673469387756e-05,
1813
+ "loss": 0.1171,
1814
+ "step": 23200
1815
+ },
1816
+ {
1817
+ "epoch": 130.17,
1818
+ "learning_rate": 6.975510204081632e-05,
1819
+ "loss": 0.1136,
1820
+ "step": 23300
1821
+ },
1822
+ {
1823
+ "epoch": 130.73,
1824
+ "learning_rate": 6.567346938775511e-05,
1825
+ "loss": 0.1133,
1826
+ "step": 23400
1827
+ },
1828
+ {
1829
+ "epoch": 131.28,
1830
+ "learning_rate": 6.159183673469388e-05,
1831
+ "loss": 0.1083,
1832
+ "step": 23500
1833
+ },
1834
+ {
1835
+ "epoch": 131.28,
1836
+ "eval_loss": 0.8883697986602783,
1837
+ "eval_runtime": 186.6245,
1838
+ "eval_samples_per_second": 14.001,
1839
+ "eval_steps_per_second": 0.439,
1840
+ "eval_wer": 0.5340793811818413,
1841
+ "step": 23500
1842
+ },
1843
+ {
1844
+ "epoch": 131.84,
1845
+ "learning_rate": 5.751020408163265e-05,
1846
+ "loss": 0.115,
1847
+ "step": 23600
1848
+ },
1849
+ {
1850
+ "epoch": 132.4,
1851
+ "learning_rate": 5.342857142857143e-05,
1852
+ "loss": 0.1132,
1853
+ "step": 23700
1854
+ },
1855
+ {
1856
+ "epoch": 132.96,
1857
+ "learning_rate": 4.934693877551021e-05,
1858
+ "loss": 0.1111,
1859
+ "step": 23800
1860
+ },
1861
+ {
1862
+ "epoch": 133.52,
1863
+ "learning_rate": 4.526530612244898e-05,
1864
+ "loss": 0.1079,
1865
+ "step": 23900
1866
+ },
1867
+ {
1868
+ "epoch": 134.08,
1869
+ "learning_rate": 4.1183673469387756e-05,
1870
+ "loss": 0.1147,
1871
+ "step": 24000
1872
+ },
1873
+ {
1874
+ "epoch": 134.08,
1875
+ "eval_loss": 0.891002357006073,
1876
+ "eval_runtime": 186.8507,
1877
+ "eval_samples_per_second": 13.984,
1878
+ "eval_steps_per_second": 0.439,
1879
+ "eval_wer": 0.525456505199087,
1880
+ "step": 24000
1881
+ },
1882
+ {
1883
+ "epoch": 134.64,
1884
+ "learning_rate": 3.7102040816326533e-05,
1885
+ "loss": 0.1085,
1886
+ "step": 24100
1887
+ },
1888
+ {
1889
+ "epoch": 135.2,
1890
+ "learning_rate": 3.3020408163265304e-05,
1891
+ "loss": 0.1104,
1892
+ "step": 24200
1893
+ },
1894
+ {
1895
+ "epoch": 135.75,
1896
+ "learning_rate": 2.8938775510204082e-05,
1897
+ "loss": 0.1096,
1898
+ "step": 24300
1899
+ },
1900
+ {
1901
+ "epoch": 136.31,
1902
+ "learning_rate": 2.4857142857142856e-05,
1903
+ "loss": 0.1064,
1904
+ "step": 24400
1905
+ },
1906
+ {
1907
+ "epoch": 136.87,
1908
+ "learning_rate": 2.0775510204081633e-05,
1909
+ "loss": 0.1129,
1910
+ "step": 24500
1911
+ },
1912
+ {
1913
+ "epoch": 136.87,
1914
+ "eval_loss": 0.8826149106025696,
1915
+ "eval_runtime": 187.6626,
1916
+ "eval_samples_per_second": 13.924,
1917
+ "eval_steps_per_second": 0.437,
1918
+ "eval_wer": 0.52412503170175,
1919
+ "step": 24500
1920
+ },
1921
+ {
1922
+ "epoch": 137.43,
1923
+ "learning_rate": 1.669387755102041e-05,
1924
+ "loss": 0.1086,
1925
+ "step": 24600
1926
+ },
1927
+ {
1928
+ "epoch": 137.99,
1929
+ "learning_rate": 1.2612244897959185e-05,
1930
+ "loss": 0.1061,
1931
+ "step": 24700
1932
+ },
1933
+ {
1934
+ "epoch": 138.55,
1935
+ "learning_rate": 8.53061224489796e-06,
1936
+ "loss": 0.1104,
1937
+ "step": 24800
1938
+ },
1939
+ {
1940
+ "epoch": 139.11,
1941
+ "learning_rate": 4.448979591836735e-06,
1942
+ "loss": 0.1095,
1943
+ "step": 24900
1944
+ },
1945
+ {
1946
+ "epoch": 139.66,
1947
+ "learning_rate": 3.673469387755102e-07,
1948
+ "loss": 0.1029,
1949
+ "step": 25000
1950
+ },
1951
+ {
1952
+ "epoch": 139.66,
1953
+ "eval_loss": 0.882430911064148,
1954
+ "eval_runtime": 187.8168,
1955
+ "eval_samples_per_second": 13.912,
1956
+ "eval_steps_per_second": 0.437,
1957
+ "eval_wer": 0.5246322597007355,
1958
+ "step": 25000
1959
+ },
1960
+ {
1961
+ "epoch": 139.66,
1962
+ "step": 25000,
1963
+ "total_flos": 4.393390203049772e+20,
1964
+ "train_loss": 0.3391121254348755,
1965
+ "train_runtime": 108834.0383,
1966
+ "train_samples_per_second": 7.351,
1967
+ "train_steps_per_second": 0.23
1968
+ }
1969
+ ],
1970
+ "max_steps": 25000,
1971
+ "num_train_epochs": 140,
1972
+ "total_flos": 4.393390203049772e+20,
1973
+ "trial_name": null,
1974
+ "trial_params": null
1975
+ }