DunnBC22 commited on
Commit
88d61ac
1 Parent(s): a6eb8b5

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +8 -0
  2. train_results.json +8 -0
  3. trainer_state.json +769 -0
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 17.86,
3
+ "total_flos": 5.536126404560683e+18,
4
+ "train_loss": 1.1784635154149865,
5
+ "train_runtime": 5483.2083,
6
+ "train_samples_per_second": 13.131,
7
+ "train_steps_per_second": 0.102
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 17.86,
3
+ "total_flos": 5.536126404560683e+18,
4
+ "train_loss": 1.1784635154149865,
5
+ "train_runtime": 5483.2083,
6
+ "train_samples_per_second": 13.131,
7
+ "train_steps_per_second": 0.102
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,769 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 17.856,
5
+ "global_step": 558,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.03,
12
+ "learning_rate": 8.928571428571428e-07,
13
+ "loss": 2.9786,
14
+ "step": 1
15
+ },
16
+ {
17
+ "epoch": 0.26,
18
+ "learning_rate": 7.142857142857143e-06,
19
+ "loss": 3.0172,
20
+ "step": 8
21
+ },
22
+ {
23
+ "epoch": 0.51,
24
+ "learning_rate": 1.4285714285714285e-05,
25
+ "loss": 2.9016,
26
+ "step": 16
27
+ },
28
+ {
29
+ "epoch": 0.77,
30
+ "learning_rate": 2.1428571428571428e-05,
31
+ "loss": 2.7266,
32
+ "step": 24
33
+ },
34
+ {
35
+ "epoch": 0.99,
36
+ "eval_Macro F1": 0.18274435256625565,
37
+ "eval_Macro Precision": 0.22464899203701116,
38
+ "eval_Macro Recall": 0.21014375581515982,
39
+ "eval_Micro F1": 0.208,
40
+ "eval_Micro Precision": 0.208,
41
+ "eval_Micro Recall": 0.208,
42
+ "eval_Weighted F1": 0.18112534226726865,
43
+ "eval_Weighted Precision": 0.21432408746391318,
44
+ "eval_Weighted Recall": 0.208,
45
+ "eval_accuracy": 0.208,
46
+ "eval_loss": 2.473806381225586,
47
+ "eval_runtime": 340.1389,
48
+ "eval_samples_per_second": 2.94,
49
+ "eval_steps_per_second": 0.094,
50
+ "step": 31
51
+ },
52
+ {
53
+ "epoch": 1.02,
54
+ "learning_rate": 2.857142857142857e-05,
55
+ "loss": 2.5835,
56
+ "step": 32
57
+ },
58
+ {
59
+ "epoch": 1.28,
60
+ "learning_rate": 3.571428571428572e-05,
61
+ "loss": 2.4181,
62
+ "step": 40
63
+ },
64
+ {
65
+ "epoch": 1.54,
66
+ "learning_rate": 4.2857142857142856e-05,
67
+ "loss": 2.2732,
68
+ "step": 48
69
+ },
70
+ {
71
+ "epoch": 1.79,
72
+ "learning_rate": 5e-05,
73
+ "loss": 2.171,
74
+ "step": 56
75
+ },
76
+ {
77
+ "epoch": 1.98,
78
+ "eval_Macro F1": 0.39250727765144267,
79
+ "eval_Macro Precision": 0.4446397468859132,
80
+ "eval_Macro Recall": 0.42427064875651993,
81
+ "eval_Micro F1": 0.42299999999999993,
82
+ "eval_Micro Precision": 0.423,
83
+ "eval_Micro Recall": 0.423,
84
+ "eval_Weighted F1": 0.39356324128346765,
85
+ "eval_Weighted Precision": 0.45034474867370766,
86
+ "eval_Weighted Recall": 0.423,
87
+ "eval_accuracy": 0.423,
88
+ "eval_loss": 1.8510247468948364,
89
+ "eval_runtime": 23.3293,
90
+ "eval_samples_per_second": 42.864,
91
+ "eval_steps_per_second": 1.372,
92
+ "step": 62
93
+ },
94
+ {
95
+ "epoch": 2.05,
96
+ "learning_rate": 4.9203187250996016e-05,
97
+ "loss": 2.0057,
98
+ "step": 64
99
+ },
100
+ {
101
+ "epoch": 2.3,
102
+ "learning_rate": 4.840637450199204e-05,
103
+ "loss": 1.8883,
104
+ "step": 72
105
+ },
106
+ {
107
+ "epoch": 2.56,
108
+ "learning_rate": 4.760956175298805e-05,
109
+ "loss": 1.7538,
110
+ "step": 80
111
+ },
112
+ {
113
+ "epoch": 2.82,
114
+ "learning_rate": 4.6812749003984064e-05,
115
+ "loss": 1.6525,
116
+ "step": 88
117
+ },
118
+ {
119
+ "epoch": 2.98,
120
+ "eval_Macro F1": 0.5854998981328875,
121
+ "eval_Macro Precision": 0.6283041062692261,
122
+ "eval_Macro Recall": 0.6124154907646651,
123
+ "eval_Micro F1": 0.61,
124
+ "eval_Micro Precision": 0.61,
125
+ "eval_Micro Recall": 0.61,
126
+ "eval_Weighted F1": 0.5883778569595041,
127
+ "eval_Weighted Precision": 0.6376755054054996,
128
+ "eval_Weighted Recall": 0.61,
129
+ "eval_accuracy": 0.61,
130
+ "eval_loss": 1.2633185386657715,
131
+ "eval_runtime": 23.4866,
132
+ "eval_samples_per_second": 42.577,
133
+ "eval_steps_per_second": 1.362,
134
+ "step": 93
135
+ },
136
+ {
137
+ "epoch": 3.07,
138
+ "learning_rate": 4.601593625498008e-05,
139
+ "loss": 1.4741,
140
+ "step": 96
141
+ },
142
+ {
143
+ "epoch": 3.33,
144
+ "learning_rate": 4.52191235059761e-05,
145
+ "loss": 1.4995,
146
+ "step": 104
147
+ },
148
+ {
149
+ "epoch": 3.58,
150
+ "learning_rate": 4.442231075697211e-05,
151
+ "loss": 1.3476,
152
+ "step": 112
153
+ },
154
+ {
155
+ "epoch": 3.84,
156
+ "learning_rate": 4.362549800796813e-05,
157
+ "loss": 1.346,
158
+ "step": 120
159
+ },
160
+ {
161
+ "epoch": 4.0,
162
+ "eval_Macro F1": 0.6991540885965197,
163
+ "eval_Macro Precision": 0.7033970309397422,
164
+ "eval_Macro Recall": 0.705813786350264,
165
+ "eval_Micro F1": 0.706,
166
+ "eval_Micro Precision": 0.706,
167
+ "eval_Micro Recall": 0.706,
168
+ "eval_Weighted F1": 0.7023066954415087,
169
+ "eval_Weighted Precision": 0.7095418750091215,
170
+ "eval_Weighted Recall": 0.706,
171
+ "eval_accuracy": 0.706,
172
+ "eval_loss": 1.025865077972412,
173
+ "eval_runtime": 25.2258,
174
+ "eval_samples_per_second": 39.642,
175
+ "eval_steps_per_second": 1.269,
176
+ "step": 125
177
+ },
178
+ {
179
+ "epoch": 4.1,
180
+ "learning_rate": 4.2828685258964146e-05,
181
+ "loss": 1.361,
182
+ "step": 128
183
+ },
184
+ {
185
+ "epoch": 4.35,
186
+ "learning_rate": 4.203187250996016e-05,
187
+ "loss": 1.2724,
188
+ "step": 136
189
+ },
190
+ {
191
+ "epoch": 4.61,
192
+ "learning_rate": 4.123505976095618e-05,
193
+ "loss": 1.2634,
194
+ "step": 144
195
+ },
196
+ {
197
+ "epoch": 4.86,
198
+ "learning_rate": 4.043824701195219e-05,
199
+ "loss": 1.253,
200
+ "step": 152
201
+ },
202
+ {
203
+ "epoch": 4.99,
204
+ "eval_Macro F1": 0.7239315752163233,
205
+ "eval_Macro Precision": 0.7261038012066161,
206
+ "eval_Macro Recall": 0.7290773391581917,
207
+ "eval_Micro F1": 0.729,
208
+ "eval_Micro Precision": 0.729,
209
+ "eval_Micro Recall": 0.729,
210
+ "eval_Weighted F1": 0.7276898829930567,
211
+ "eval_Weighted Precision": 0.7340103739336716,
212
+ "eval_Weighted Recall": 0.729,
213
+ "eval_accuracy": 0.729,
214
+ "eval_loss": 0.9180329442024231,
215
+ "eval_runtime": 24.244,
216
+ "eval_samples_per_second": 41.247,
217
+ "eval_steps_per_second": 1.32,
218
+ "step": 156
219
+ },
220
+ {
221
+ "epoch": 5.12,
222
+ "learning_rate": 3.964143426294821e-05,
223
+ "loss": 1.1127,
224
+ "step": 160
225
+ },
226
+ {
227
+ "epoch": 5.38,
228
+ "learning_rate": 3.884462151394422e-05,
229
+ "loss": 1.2072,
230
+ "step": 168
231
+ },
232
+ {
233
+ "epoch": 5.63,
234
+ "learning_rate": 3.804780876494024e-05,
235
+ "loss": 1.1051,
236
+ "step": 176
237
+ },
238
+ {
239
+ "epoch": 5.89,
240
+ "learning_rate": 3.7250996015936256e-05,
241
+ "loss": 1.0975,
242
+ "step": 184
243
+ },
244
+ {
245
+ "epoch": 5.98,
246
+ "eval_Macro F1": 0.7437283226344332,
247
+ "eval_Macro Precision": 0.7525965488125351,
248
+ "eval_Macro Recall": 0.7471731443855207,
249
+ "eval_Micro F1": 0.747,
250
+ "eval_Micro Precision": 0.747,
251
+ "eval_Micro Recall": 0.747,
252
+ "eval_Weighted F1": 0.7479974803294792,
253
+ "eval_Weighted Precision": 0.7608663358741853,
254
+ "eval_Weighted Recall": 0.747,
255
+ "eval_accuracy": 0.747,
256
+ "eval_loss": 0.8858795166015625,
257
+ "eval_runtime": 24.5565,
258
+ "eval_samples_per_second": 40.722,
259
+ "eval_steps_per_second": 1.303,
260
+ "step": 187
261
+ },
262
+ {
263
+ "epoch": 6.14,
264
+ "learning_rate": 3.6454183266932277e-05,
265
+ "loss": 1.0922,
266
+ "step": 192
267
+ },
268
+ {
269
+ "epoch": 6.4,
270
+ "learning_rate": 3.565737051792829e-05,
271
+ "loss": 1.0797,
272
+ "step": 200
273
+ },
274
+ {
275
+ "epoch": 6.66,
276
+ "learning_rate": 3.4860557768924304e-05,
277
+ "loss": 1.0066,
278
+ "step": 208
279
+ },
280
+ {
281
+ "epoch": 6.91,
282
+ "learning_rate": 3.406374501992032e-05,
283
+ "loss": 1.1122,
284
+ "step": 216
285
+ },
286
+ {
287
+ "epoch": 6.98,
288
+ "eval_Macro F1": 0.7577762006111012,
289
+ "eval_Macro Precision": 0.7726630864911376,
290
+ "eval_Macro Recall": 0.7594437988791525,
291
+ "eval_Micro F1": 0.76,
292
+ "eval_Micro Precision": 0.76,
293
+ "eval_Micro Recall": 0.76,
294
+ "eval_Weighted F1": 0.7606134776349917,
295
+ "eval_Weighted Precision": 0.7772442524935343,
296
+ "eval_Weighted Recall": 0.76,
297
+ "eval_accuracy": 0.76,
298
+ "eval_loss": 0.8269779086112976,
299
+ "eval_runtime": 24.3399,
300
+ "eval_samples_per_second": 41.085,
301
+ "eval_steps_per_second": 1.315,
302
+ "step": 218
303
+ },
304
+ {
305
+ "epoch": 7.17,
306
+ "learning_rate": 3.326693227091633e-05,
307
+ "loss": 0.988,
308
+ "step": 224
309
+ },
310
+ {
311
+ "epoch": 7.42,
312
+ "learning_rate": 3.247011952191235e-05,
313
+ "loss": 1.0301,
314
+ "step": 232
315
+ },
316
+ {
317
+ "epoch": 7.68,
318
+ "learning_rate": 3.1673306772908366e-05,
319
+ "loss": 0.9724,
320
+ "step": 240
321
+ },
322
+ {
323
+ "epoch": 7.94,
324
+ "learning_rate": 3.0876494023904386e-05,
325
+ "loss": 1.0365,
326
+ "step": 248
327
+ },
328
+ {
329
+ "epoch": 8.0,
330
+ "eval_Macro F1": 0.7730173451661722,
331
+ "eval_Macro Precision": 0.7919871344293006,
332
+ "eval_Macro Recall": 0.7734992733449892,
333
+ "eval_Micro F1": 0.775,
334
+ "eval_Micro Precision": 0.775,
335
+ "eval_Micro Recall": 0.775,
336
+ "eval_Weighted F1": 0.7759067917731501,
337
+ "eval_Weighted Precision": 0.7957122736768526,
338
+ "eval_Weighted Recall": 0.775,
339
+ "eval_accuracy": 0.775,
340
+ "eval_loss": 0.7806075215339661,
341
+ "eval_runtime": 23.5653,
342
+ "eval_samples_per_second": 42.435,
343
+ "eval_steps_per_second": 1.358,
344
+ "step": 250
345
+ },
346
+ {
347
+ "epoch": 8.19,
348
+ "learning_rate": 3.00796812749004e-05,
349
+ "loss": 0.9482,
350
+ "step": 256
351
+ },
352
+ {
353
+ "epoch": 8.45,
354
+ "learning_rate": 2.9282868525896417e-05,
355
+ "loss": 0.9364,
356
+ "step": 264
357
+ },
358
+ {
359
+ "epoch": 8.7,
360
+ "learning_rate": 2.8486055776892434e-05,
361
+ "loss": 0.9417,
362
+ "step": 272
363
+ },
364
+ {
365
+ "epoch": 8.96,
366
+ "learning_rate": 2.7689243027888445e-05,
367
+ "loss": 1.004,
368
+ "step": 280
369
+ },
370
+ {
371
+ "epoch": 8.99,
372
+ "eval_Macro F1": 0.7956663948657638,
373
+ "eval_Macro Precision": 0.8150981558104353,
374
+ "eval_Macro Recall": 0.7955573765342054,
375
+ "eval_Micro F1": 0.796,
376
+ "eval_Micro Precision": 0.796,
377
+ "eval_Micro Recall": 0.796,
378
+ "eval_Weighted F1": 0.7977477337842563,
379
+ "eval_Weighted Precision": 0.8193174046095921,
380
+ "eval_Weighted Recall": 0.796,
381
+ "eval_accuracy": 0.796,
382
+ "eval_loss": 0.7471871972084045,
383
+ "eval_runtime": 24.2779,
384
+ "eval_samples_per_second": 41.19,
385
+ "eval_steps_per_second": 1.318,
386
+ "step": 281
387
+ },
388
+ {
389
+ "epoch": 9.22,
390
+ "learning_rate": 2.6892430278884462e-05,
391
+ "loss": 0.8964,
392
+ "step": 288
393
+ },
394
+ {
395
+ "epoch": 9.47,
396
+ "learning_rate": 2.609561752988048e-05,
397
+ "loss": 0.9098,
398
+ "step": 296
399
+ },
400
+ {
401
+ "epoch": 9.73,
402
+ "learning_rate": 2.5298804780876496e-05,
403
+ "loss": 0.9576,
404
+ "step": 304
405
+ },
406
+ {
407
+ "epoch": 9.98,
408
+ "learning_rate": 2.4501992031872513e-05,
409
+ "loss": 0.9278,
410
+ "step": 312
411
+ },
412
+ {
413
+ "epoch": 9.98,
414
+ "eval_Macro F1": 0.7956928280629123,
415
+ "eval_Macro Precision": 0.8114507473411248,
416
+ "eval_Macro Recall": 0.7953090431389529,
417
+ "eval_Micro F1": 0.795,
418
+ "eval_Micro Precision": 0.795,
419
+ "eval_Micro Recall": 0.795,
420
+ "eval_Weighted F1": 0.7974177696936208,
421
+ "eval_Weighted Precision": 0.8157219938612603,
422
+ "eval_Weighted Recall": 0.795,
423
+ "eval_accuracy": 0.795,
424
+ "eval_loss": 0.7296148538589478,
425
+ "eval_runtime": 24.012,
426
+ "eval_samples_per_second": 41.646,
427
+ "eval_steps_per_second": 1.333,
428
+ "step": 312
429
+ },
430
+ {
431
+ "epoch": 10.24,
432
+ "learning_rate": 2.3705179282868527e-05,
433
+ "loss": 0.8623,
434
+ "step": 320
435
+ },
436
+ {
437
+ "epoch": 10.5,
438
+ "learning_rate": 2.290836653386454e-05,
439
+ "loss": 0.9,
440
+ "step": 328
441
+ },
442
+ {
443
+ "epoch": 10.75,
444
+ "learning_rate": 2.2111553784860558e-05,
445
+ "loss": 0.8767,
446
+ "step": 336
447
+ },
448
+ {
449
+ "epoch": 10.98,
450
+ "eval_Macro F1": 0.8077597575155505,
451
+ "eval_Macro Precision": 0.8135983545887402,
452
+ "eval_Macro Recall": 0.8090640618814349,
453
+ "eval_Micro F1": 0.809,
454
+ "eval_Micro Precision": 0.809,
455
+ "eval_Micro Recall": 0.809,
456
+ "eval_Weighted F1": 0.8101031910520137,
457
+ "eval_Weighted Precision": 0.8181958157506283,
458
+ "eval_Weighted Recall": 0.809,
459
+ "eval_accuracy": 0.809,
460
+ "eval_loss": 0.7256603240966797,
461
+ "eval_runtime": 23.8159,
462
+ "eval_samples_per_second": 41.989,
463
+ "eval_steps_per_second": 1.344,
464
+ "step": 343
465
+ },
466
+ {
467
+ "epoch": 11.01,
468
+ "learning_rate": 2.1314741035856575e-05,
469
+ "loss": 0.9239,
470
+ "step": 344
471
+ },
472
+ {
473
+ "epoch": 11.26,
474
+ "learning_rate": 2.0517928286852592e-05,
475
+ "loss": 0.8971,
476
+ "step": 352
477
+ },
478
+ {
479
+ "epoch": 11.52,
480
+ "learning_rate": 1.9721115537848606e-05,
481
+ "loss": 0.9387,
482
+ "step": 360
483
+ },
484
+ {
485
+ "epoch": 11.78,
486
+ "learning_rate": 1.8924302788844623e-05,
487
+ "loss": 0.8656,
488
+ "step": 368
489
+ },
490
+ {
491
+ "epoch": 12.0,
492
+ "eval_Macro F1": 0.8106317572025369,
493
+ "eval_Macro Precision": 0.8164411111931356,
494
+ "eval_Macro Recall": 0.812159053433045,
495
+ "eval_Micro F1": 0.8140000000000001,
496
+ "eval_Micro Precision": 0.814,
497
+ "eval_Micro Recall": 0.814,
498
+ "eval_Weighted F1": 0.813675273019787,
499
+ "eval_Weighted Precision": 0.8206734023674667,
500
+ "eval_Weighted Recall": 0.814,
501
+ "eval_accuracy": 0.814,
502
+ "eval_loss": 0.687544584274292,
503
+ "eval_runtime": 24.187,
504
+ "eval_samples_per_second": 41.344,
505
+ "eval_steps_per_second": 1.323,
506
+ "step": 375
507
+ },
508
+ {
509
+ "epoch": 12.03,
510
+ "learning_rate": 1.812749003984064e-05,
511
+ "loss": 0.8568,
512
+ "step": 376
513
+ },
514
+ {
515
+ "epoch": 12.29,
516
+ "learning_rate": 1.7330677290836657e-05,
517
+ "loss": 0.9105,
518
+ "step": 384
519
+ },
520
+ {
521
+ "epoch": 12.54,
522
+ "learning_rate": 1.653386454183267e-05,
523
+ "loss": 0.7687,
524
+ "step": 392
525
+ },
526
+ {
527
+ "epoch": 12.8,
528
+ "learning_rate": 1.5737051792828685e-05,
529
+ "loss": 0.7905,
530
+ "step": 400
531
+ },
532
+ {
533
+ "epoch": 12.99,
534
+ "eval_Macro F1": 0.807144557531445,
535
+ "eval_Macro Precision": 0.8145226206650988,
536
+ "eval_Macro Recall": 0.8067641953560594,
537
+ "eval_Micro F1": 0.808,
538
+ "eval_Micro Precision": 0.808,
539
+ "eval_Micro Recall": 0.808,
540
+ "eval_Weighted F1": 0.8093388838073848,
541
+ "eval_Weighted Precision": 0.8181875581570741,
542
+ "eval_Weighted Recall": 0.808,
543
+ "eval_accuracy": 0.808,
544
+ "eval_loss": 0.7060463428497314,
545
+ "eval_runtime": 23.4383,
546
+ "eval_samples_per_second": 42.665,
547
+ "eval_steps_per_second": 1.365,
548
+ "step": 406
549
+ },
550
+ {
551
+ "epoch": 13.06,
552
+ "learning_rate": 1.4940239043824702e-05,
553
+ "loss": 0.8453,
554
+ "step": 408
555
+ },
556
+ {
557
+ "epoch": 13.31,
558
+ "learning_rate": 1.4143426294820719e-05,
559
+ "loss": 0.7926,
560
+ "step": 416
561
+ },
562
+ {
563
+ "epoch": 13.57,
564
+ "learning_rate": 1.3346613545816733e-05,
565
+ "loss": 0.8721,
566
+ "step": 424
567
+ },
568
+ {
569
+ "epoch": 13.82,
570
+ "learning_rate": 1.254980079681275e-05,
571
+ "loss": 0.8804,
572
+ "step": 432
573
+ },
574
+ {
575
+ "epoch": 13.98,
576
+ "eval_Macro F1": 0.8182926573190057,
577
+ "eval_Macro Precision": 0.8215050680984892,
578
+ "eval_Macro Recall": 0.8183483781309105,
579
+ "eval_Micro F1": 0.82,
580
+ "eval_Micro Precision": 0.82,
581
+ "eval_Micro Recall": 0.82,
582
+ "eval_Weighted F1": 0.8213528638428045,
583
+ "eval_Weighted Precision": 0.8260224129157536,
584
+ "eval_Weighted Recall": 0.82,
585
+ "eval_accuracy": 0.82,
586
+ "eval_loss": 0.6848881244659424,
587
+ "eval_runtime": 24.2686,
588
+ "eval_samples_per_second": 41.206,
589
+ "eval_steps_per_second": 1.319,
590
+ "step": 437
591
+ },
592
+ {
593
+ "epoch": 14.08,
594
+ "learning_rate": 1.1752988047808767e-05,
595
+ "loss": 0.8041,
596
+ "step": 440
597
+ },
598
+ {
599
+ "epoch": 14.34,
600
+ "learning_rate": 1.095617529880478e-05,
601
+ "loss": 0.8291,
602
+ "step": 448
603
+ },
604
+ {
605
+ "epoch": 14.59,
606
+ "learning_rate": 1.0159362549800798e-05,
607
+ "loss": 0.8109,
608
+ "step": 456
609
+ },
610
+ {
611
+ "epoch": 14.85,
612
+ "learning_rate": 9.362549800796813e-06,
613
+ "loss": 0.8265,
614
+ "step": 464
615
+ },
616
+ {
617
+ "epoch": 14.98,
618
+ "eval_Macro F1": 0.814299562240979,
619
+ "eval_Macro Precision": 0.8205669101031257,
620
+ "eval_Macro Recall": 0.8141933591904861,
621
+ "eval_Micro F1": 0.816,
622
+ "eval_Micro Precision": 0.816,
623
+ "eval_Micro Recall": 0.816,
624
+ "eval_Weighted F1": 0.8171172793618807,
625
+ "eval_Weighted Precision": 0.8242012281620977,
626
+ "eval_Weighted Recall": 0.816,
627
+ "eval_accuracy": 0.816,
628
+ "eval_loss": 0.6820688247680664,
629
+ "eval_runtime": 24.1434,
630
+ "eval_samples_per_second": 41.419,
631
+ "eval_steps_per_second": 1.325,
632
+ "step": 468
633
+ },
634
+ {
635
+ "epoch": 15.1,
636
+ "learning_rate": 8.565737051792829e-06,
637
+ "loss": 0.7845,
638
+ "step": 472
639
+ },
640
+ {
641
+ "epoch": 15.36,
642
+ "learning_rate": 7.768924302788846e-06,
643
+ "loss": 0.8545,
644
+ "step": 480
645
+ },
646
+ {
647
+ "epoch": 15.62,
648
+ "learning_rate": 6.97211155378486e-06,
649
+ "loss": 0.7685,
650
+ "step": 488
651
+ },
652
+ {
653
+ "epoch": 15.87,
654
+ "learning_rate": 6.175298804780877e-06,
655
+ "loss": 0.7929,
656
+ "step": 496
657
+ },
658
+ {
659
+ "epoch": 16.0,
660
+ "eval_Macro F1": 0.8151613063095204,
661
+ "eval_Macro Precision": 0.818634892770315,
662
+ "eval_Macro Recall": 0.8167488060127654,
663
+ "eval_Micro F1": 0.818,
664
+ "eval_Micro Precision": 0.818,
665
+ "eval_Micro Recall": 0.818,
666
+ "eval_Weighted F1": 0.8184268497331145,
667
+ "eval_Weighted Precision": 0.8239745058078853,
668
+ "eval_Weighted Recall": 0.818,
669
+ "eval_accuracy": 0.818,
670
+ "eval_loss": 0.6877326369285583,
671
+ "eval_runtime": 23.4065,
672
+ "eval_samples_per_second": 42.723,
673
+ "eval_steps_per_second": 1.367,
674
+ "step": 500
675
+ },
676
+ {
677
+ "epoch": 16.13,
678
+ "learning_rate": 5.378486055776893e-06,
679
+ "loss": 0.8338,
680
+ "step": 504
681
+ },
682
+ {
683
+ "epoch": 16.38,
684
+ "learning_rate": 4.581673306772908e-06,
685
+ "loss": 0.8154,
686
+ "step": 512
687
+ },
688
+ {
689
+ "epoch": 16.64,
690
+ "learning_rate": 3.7848605577689246e-06,
691
+ "loss": 0.8054,
692
+ "step": 520
693
+ },
694
+ {
695
+ "epoch": 16.9,
696
+ "learning_rate": 2.9880478087649404e-06,
697
+ "loss": 0.7993,
698
+ "step": 528
699
+ },
700
+ {
701
+ "epoch": 16.99,
702
+ "eval_Macro F1": 0.8233893661888394,
703
+ "eval_Macro Precision": 0.8281793690152945,
704
+ "eval_Macro Recall": 0.822655931109436,
705
+ "eval_Micro F1": 0.825,
706
+ "eval_Micro Precision": 0.825,
707
+ "eval_Micro Recall": 0.825,
708
+ "eval_Weighted F1": 0.8258766455781327,
709
+ "eval_Weighted Precision": 0.8305915821251276,
710
+ "eval_Weighted Recall": 0.825,
711
+ "eval_accuracy": 0.825,
712
+ "eval_loss": 0.6717957854270935,
713
+ "eval_runtime": 24.415,
714
+ "eval_samples_per_second": 40.958,
715
+ "eval_steps_per_second": 1.311,
716
+ "step": 531
717
+ },
718
+ {
719
+ "epoch": 17.15,
720
+ "learning_rate": 2.1912350597609563e-06,
721
+ "loss": 0.7572,
722
+ "step": 536
723
+ },
724
+ {
725
+ "epoch": 17.41,
726
+ "learning_rate": 1.3944223107569721e-06,
727
+ "loss": 0.8022,
728
+ "step": 544
729
+ },
730
+ {
731
+ "epoch": 17.66,
732
+ "learning_rate": 5.976095617529881e-07,
733
+ "loss": 0.7954,
734
+ "step": 552
735
+ },
736
+ {
737
+ "epoch": 17.86,
738
+ "eval_Macro F1": 0.8242217019056312,
739
+ "eval_Macro Precision": 0.8293094252297472,
740
+ "eval_Macro Recall": 0.8236754312981294,
741
+ "eval_Micro F1": 0.826,
742
+ "eval_Micro Precision": 0.826,
743
+ "eval_Micro Recall": 0.826,
744
+ "eval_Weighted F1": 0.827162008102242,
745
+ "eval_Weighted Precision": 0.8326500761444383,
746
+ "eval_Weighted Recall": 0.826,
747
+ "eval_accuracy": 0.826,
748
+ "eval_loss": 0.6715443134307861,
749
+ "eval_runtime": 24.1692,
750
+ "eval_samples_per_second": 41.375,
751
+ "eval_steps_per_second": 1.324,
752
+ "step": 558
753
+ },
754
+ {
755
+ "epoch": 17.86,
756
+ "step": 558,
757
+ "total_flos": 5.536126404560683e+18,
758
+ "train_loss": 1.1784635154149865,
759
+ "train_runtime": 5483.2083,
760
+ "train_samples_per_second": 13.131,
761
+ "train_steps_per_second": 0.102
762
+ }
763
+ ],
764
+ "max_steps": 558,
765
+ "num_train_epochs": 18,
766
+ "total_flos": 5.536126404560683e+18,
767
+ "trial_name": null,
768
+ "trial_params": null
769
+ }