Tiago Barbosa de Lima commited on
Commit
ac27b2e
1 Parent(s): 60727ef

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +15 -0
  2. eval_results.json +10 -0
  3. train_results.json +8 -0
  4. trainer_state.json +351 -0
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_bleu": 90.1821,
4
+ "eval_gen_len": 45.9318,
5
+ "eval_loss": 0.043235890567302704,
6
+ "eval_runtime": 489.576,
7
+ "eval_samples": 1041,
8
+ "eval_samples_per_second": 2.126,
9
+ "eval_steps_per_second": 1.064,
10
+ "train_loss": 0.047856709064706696,
11
+ "train_runtime": 5577.5722,
12
+ "train_samples": 9371,
13
+ "train_samples_per_second": 8.401,
14
+ "train_steps_per_second": 4.201
15
+ }
eval_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_bleu": 90.1821,
4
+ "eval_gen_len": 45.9318,
5
+ "eval_loss": 0.043235890567302704,
6
+ "eval_runtime": 489.576,
7
+ "eval_samples": 1041,
8
+ "eval_samples_per_second": 2.126,
9
+ "eval_steps_per_second": 1.064
10
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "train_loss": 0.047856709064706696,
4
+ "train_runtime": 5577.5722,
5
+ "train_samples": 9371,
6
+ "train_samples_per_second": 8.401,
7
+ "train_steps_per_second": 4.201
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,351 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 5.0,
5
+ "global_step": 23430,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.11,
12
+ "learning_rate": 4.893299189073837e-05,
13
+ "loss": 0.7701,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.21,
18
+ "learning_rate": 4.786598378147674e-05,
19
+ "loss": 0.1282,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.32,
24
+ "learning_rate": 4.6798975672215114e-05,
25
+ "loss": 0.0914,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 0.43,
30
+ "learning_rate": 4.5731967562953484e-05,
31
+ "loss": 0.0791,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 0.53,
36
+ "learning_rate": 4.4664959453691854e-05,
37
+ "loss": 0.0656,
38
+ "step": 2500
39
+ },
40
+ {
41
+ "epoch": 0.64,
42
+ "learning_rate": 4.359795134443022e-05,
43
+ "loss": 0.0645,
44
+ "step": 3000
45
+ },
46
+ {
47
+ "epoch": 0.75,
48
+ "learning_rate": 4.253094323516859e-05,
49
+ "loss": 0.0573,
50
+ "step": 3500
51
+ },
52
+ {
53
+ "epoch": 0.85,
54
+ "learning_rate": 4.146393512590696e-05,
55
+ "loss": 0.0566,
56
+ "step": 4000
57
+ },
58
+ {
59
+ "epoch": 0.96,
60
+ "learning_rate": 4.039692701664533e-05,
61
+ "loss": 0.0544,
62
+ "step": 4500
63
+ },
64
+ {
65
+ "epoch": 1.0,
66
+ "eval_bleu": 25.989,
67
+ "eval_gen_len": 18.8655,
68
+ "eval_loss": 0.044239919632673264,
69
+ "eval_runtime": 180.9818,
70
+ "eval_samples_per_second": 5.752,
71
+ "eval_steps_per_second": 2.879,
72
+ "step": 4686
73
+ },
74
+ {
75
+ "epoch": 1.07,
76
+ "learning_rate": 3.93299189073837e-05,
77
+ "loss": 0.0476,
78
+ "step": 5000
79
+ },
80
+ {
81
+ "epoch": 1.17,
82
+ "learning_rate": 3.826291079812207e-05,
83
+ "loss": 0.0442,
84
+ "step": 5500
85
+ },
86
+ {
87
+ "epoch": 1.28,
88
+ "learning_rate": 3.719590268886043e-05,
89
+ "loss": 0.0375,
90
+ "step": 6000
91
+ },
92
+ {
93
+ "epoch": 1.39,
94
+ "learning_rate": 3.6128894579598804e-05,
95
+ "loss": 0.0408,
96
+ "step": 6500
97
+ },
98
+ {
99
+ "epoch": 1.49,
100
+ "learning_rate": 3.5061886470337174e-05,
101
+ "loss": 0.0351,
102
+ "step": 7000
103
+ },
104
+ {
105
+ "epoch": 1.6,
106
+ "learning_rate": 3.3994878361075544e-05,
107
+ "loss": 0.0355,
108
+ "step": 7500
109
+ },
110
+ {
111
+ "epoch": 1.71,
112
+ "learning_rate": 3.2927870251813915e-05,
113
+ "loss": 0.0379,
114
+ "step": 8000
115
+ },
116
+ {
117
+ "epoch": 1.81,
118
+ "learning_rate": 3.1860862142552285e-05,
119
+ "loss": 0.0331,
120
+ "step": 8500
121
+ },
122
+ {
123
+ "epoch": 1.92,
124
+ "learning_rate": 3.0793854033290656e-05,
125
+ "loss": 0.0367,
126
+ "step": 9000
127
+ },
128
+ {
129
+ "epoch": 2.0,
130
+ "eval_bleu": 25.9358,
131
+ "eval_gen_len": 18.8713,
132
+ "eval_loss": 0.03710315003991127,
133
+ "eval_runtime": 179.552,
134
+ "eval_samples_per_second": 5.798,
135
+ "eval_steps_per_second": 2.902,
136
+ "step": 9372
137
+ },
138
+ {
139
+ "epoch": 2.03,
140
+ "learning_rate": 2.9726845924029023e-05,
141
+ "loss": 0.0307,
142
+ "step": 9500
143
+ },
144
+ {
145
+ "epoch": 2.13,
146
+ "learning_rate": 2.8659837814767393e-05,
147
+ "loss": 0.0218,
148
+ "step": 10000
149
+ },
150
+ {
151
+ "epoch": 2.24,
152
+ "learning_rate": 2.7592829705505763e-05,
153
+ "loss": 0.0234,
154
+ "step": 10500
155
+ },
156
+ {
157
+ "epoch": 2.35,
158
+ "learning_rate": 2.6525821596244134e-05,
159
+ "loss": 0.0265,
160
+ "step": 11000
161
+ },
162
+ {
163
+ "epoch": 2.45,
164
+ "learning_rate": 2.54588134869825e-05,
165
+ "loss": 0.0239,
166
+ "step": 11500
167
+ },
168
+ {
169
+ "epoch": 2.56,
170
+ "learning_rate": 2.439180537772087e-05,
171
+ "loss": 0.0261,
172
+ "step": 12000
173
+ },
174
+ {
175
+ "epoch": 2.67,
176
+ "learning_rate": 2.332479726845924e-05,
177
+ "loss": 0.0247,
178
+ "step": 12500
179
+ },
180
+ {
181
+ "epoch": 2.77,
182
+ "learning_rate": 2.2257789159197612e-05,
183
+ "loss": 0.0228,
184
+ "step": 13000
185
+ },
186
+ {
187
+ "epoch": 2.88,
188
+ "learning_rate": 2.1190781049935982e-05,
189
+ "loss": 0.0233,
190
+ "step": 13500
191
+ },
192
+ {
193
+ "epoch": 2.99,
194
+ "learning_rate": 2.0123772940674353e-05,
195
+ "loss": 0.0222,
196
+ "step": 14000
197
+ },
198
+ {
199
+ "epoch": 3.0,
200
+ "eval_bleu": 25.8976,
201
+ "eval_gen_len": 18.8694,
202
+ "eval_loss": 0.037382081151008606,
203
+ "eval_runtime": 178.2936,
204
+ "eval_samples_per_second": 5.839,
205
+ "eval_steps_per_second": 2.922,
206
+ "step": 14058
207
+ },
208
+ {
209
+ "epoch": 3.09,
210
+ "learning_rate": 1.905676483141272e-05,
211
+ "loss": 0.0156,
212
+ "step": 14500
213
+ },
214
+ {
215
+ "epoch": 3.2,
216
+ "learning_rate": 1.7989756722151087e-05,
217
+ "loss": 0.0153,
218
+ "step": 15000
219
+ },
220
+ {
221
+ "epoch": 3.31,
222
+ "learning_rate": 1.6922748612889457e-05,
223
+ "loss": 0.0174,
224
+ "step": 15500
225
+ },
226
+ {
227
+ "epoch": 3.41,
228
+ "learning_rate": 1.5855740503627827e-05,
229
+ "loss": 0.0171,
230
+ "step": 16000
231
+ },
232
+ {
233
+ "epoch": 3.52,
234
+ "learning_rate": 1.4788732394366198e-05,
235
+ "loss": 0.0176,
236
+ "step": 16500
237
+ },
238
+ {
239
+ "epoch": 3.63,
240
+ "learning_rate": 1.3721724285104568e-05,
241
+ "loss": 0.0187,
242
+ "step": 17000
243
+ },
244
+ {
245
+ "epoch": 3.73,
246
+ "learning_rate": 1.2654716175842937e-05,
247
+ "loss": 0.0157,
248
+ "step": 17500
249
+ },
250
+ {
251
+ "epoch": 3.84,
252
+ "learning_rate": 1.1587708066581307e-05,
253
+ "loss": 0.0169,
254
+ "step": 18000
255
+ },
256
+ {
257
+ "epoch": 3.95,
258
+ "learning_rate": 1.0520699957319676e-05,
259
+ "loss": 0.0152,
260
+ "step": 18500
261
+ },
262
+ {
263
+ "epoch": 4.0,
264
+ "eval_bleu": 26.1575,
265
+ "eval_gen_len": 18.8694,
266
+ "eval_loss": 0.040931396186351776,
267
+ "eval_runtime": 182.3446,
268
+ "eval_samples_per_second": 5.709,
269
+ "eval_steps_per_second": 2.857,
270
+ "step": 18744
271
+ },
272
+ {
273
+ "epoch": 4.05,
274
+ "learning_rate": 9.453691848058044e-06,
275
+ "loss": 0.0148,
276
+ "step": 19000
277
+ },
278
+ {
279
+ "epoch": 4.16,
280
+ "learning_rate": 8.386683738796415e-06,
281
+ "loss": 0.0124,
282
+ "step": 19500
283
+ },
284
+ {
285
+ "epoch": 4.27,
286
+ "learning_rate": 7.319675629534785e-06,
287
+ "loss": 0.0148,
288
+ "step": 20000
289
+ },
290
+ {
291
+ "epoch": 4.37,
292
+ "learning_rate": 6.252667520273155e-06,
293
+ "loss": 0.0123,
294
+ "step": 20500
295
+ },
296
+ {
297
+ "epoch": 4.48,
298
+ "learning_rate": 5.1856594110115235e-06,
299
+ "loss": 0.0139,
300
+ "step": 21000
301
+ },
302
+ {
303
+ "epoch": 4.59,
304
+ "learning_rate": 4.118651301749894e-06,
305
+ "loss": 0.0116,
306
+ "step": 21500
307
+ },
308
+ {
309
+ "epoch": 4.69,
310
+ "learning_rate": 3.051643192488263e-06,
311
+ "loss": 0.014,
312
+ "step": 22000
313
+ },
314
+ {
315
+ "epoch": 4.8,
316
+ "learning_rate": 1.9846350832266325e-06,
317
+ "loss": 0.0143,
318
+ "step": 22500
319
+ },
320
+ {
321
+ "epoch": 4.91,
322
+ "learning_rate": 9.176269739650021e-07,
323
+ "loss": 0.0147,
324
+ "step": 23000
325
+ },
326
+ {
327
+ "epoch": 5.0,
328
+ "eval_bleu": 26.0973,
329
+ "eval_gen_len": 18.8694,
330
+ "eval_loss": 0.043235890567302704,
331
+ "eval_runtime": 184.0328,
332
+ "eval_samples_per_second": 5.657,
333
+ "eval_steps_per_second": 2.831,
334
+ "step": 23430
335
+ },
336
+ {
337
+ "epoch": 5.0,
338
+ "step": 23430,
339
+ "total_flos": 2019762272885760.0,
340
+ "train_loss": 0.047856709064706696,
341
+ "train_runtime": 5577.5722,
342
+ "train_samples_per_second": 8.401,
343
+ "train_steps_per_second": 4.201
344
+ }
345
+ ],
346
+ "max_steps": 23430,
347
+ "num_train_epochs": 5,
348
+ "total_flos": 2019762272885760.0,
349
+ "trial_name": null,
350
+ "trial_params": null
351
+ }