DunnBC22 commited on
Commit
5068037
1 Parent(s): 20c1e24

All Dunn!!!

Browse files
Files changed (3) hide show
  1. all_results.json +7 -0
  2. train_results.json +7 -0
  3. trainer_state.json +715 -0
all_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "train_loss": 0.5799241374931839,
4
+ "train_runtime": 32322.6365,
5
+ "train_samples_per_second": 12.916,
6
+ "train_steps_per_second": 1.615
7
+ }
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "train_loss": 0.5799241374931839,
4
+ "train_runtime": 32322.6365,
5
+ "train_samples_per_second": 12.916,
6
+ "train_steps_per_second": 1.615
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,715 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 5.0,
5
+ "global_step": 52190,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.0,
12
+ "learning_rate": 0.000999980839241234,
13
+ "loss": 0.7834,
14
+ "step": 1
15
+ },
16
+ {
17
+ "epoch": 0.05,
18
+ "learning_rate": 0.0009899980839241235,
19
+ "loss": 0.7384,
20
+ "step": 522
21
+ },
22
+ {
23
+ "epoch": 0.1,
24
+ "learning_rate": 0.000979996167848247,
25
+ "loss": 0.7597,
26
+ "step": 1044
27
+ },
28
+ {
29
+ "epoch": 0.15,
30
+ "learning_rate": 0.0009699942517723703,
31
+ "loss": 0.7401,
32
+ "step": 1566
33
+ },
34
+ {
35
+ "epoch": 0.2,
36
+ "learning_rate": 0.0009599923356964936,
37
+ "loss": 0.7495,
38
+ "step": 2088
39
+ },
40
+ {
41
+ "epoch": 0.25,
42
+ "learning_rate": 0.000949990419620617,
43
+ "loss": 0.7017,
44
+ "step": 2610
45
+ },
46
+ {
47
+ "epoch": 0.3,
48
+ "learning_rate": 0.0009399885035447404,
49
+ "loss": 0.7398,
50
+ "step": 3132
51
+ },
52
+ {
53
+ "epoch": 0.35,
54
+ "learning_rate": 0.0009299865874688638,
55
+ "loss": 0.6949,
56
+ "step": 3654
57
+ },
58
+ {
59
+ "epoch": 0.4,
60
+ "learning_rate": 0.0009199846713929871,
61
+ "loss": 0.6335,
62
+ "step": 4176
63
+ },
64
+ {
65
+ "epoch": 0.45,
66
+ "learning_rate": 0.0009099827553171107,
67
+ "loss": 0.648,
68
+ "step": 4698
69
+ },
70
+ {
71
+ "epoch": 0.5,
72
+ "learning_rate": 0.000899980839241234,
73
+ "loss": 0.6445,
74
+ "step": 5220
75
+ },
76
+ {
77
+ "epoch": 0.55,
78
+ "learning_rate": 0.0008899789231653573,
79
+ "loss": 0.6429,
80
+ "step": 5742
81
+ },
82
+ {
83
+ "epoch": 0.6,
84
+ "learning_rate": 0.0008799770070894808,
85
+ "loss": 0.6316,
86
+ "step": 6264
87
+ },
88
+ {
89
+ "epoch": 0.65,
90
+ "learning_rate": 0.0008699750910136041,
91
+ "loss": 0.6066,
92
+ "step": 6786
93
+ },
94
+ {
95
+ "epoch": 0.7,
96
+ "learning_rate": 0.0008599731749377275,
97
+ "loss": 0.6397,
98
+ "step": 7308
99
+ },
100
+ {
101
+ "epoch": 0.75,
102
+ "learning_rate": 0.0008499712588618509,
103
+ "loss": 0.6243,
104
+ "step": 7830
105
+ },
106
+ {
107
+ "epoch": 0.8,
108
+ "learning_rate": 0.0008399693427859744,
109
+ "loss": 0.6271,
110
+ "step": 8352
111
+ },
112
+ {
113
+ "epoch": 0.85,
114
+ "learning_rate": 0.0008299674267100977,
115
+ "loss": 0.614,
116
+ "step": 8874
117
+ },
118
+ {
119
+ "epoch": 0.9,
120
+ "learning_rate": 0.0008199655106342211,
121
+ "loss": 0.6358,
122
+ "step": 9396
123
+ },
124
+ {
125
+ "epoch": 0.95,
126
+ "learning_rate": 0.0008099635945583445,
127
+ "loss": 0.633,
128
+ "step": 9918
129
+ },
130
+ {
131
+ "epoch": 1.0,
132
+ "eval_Macro F1": 0.5744881635875178,
133
+ "eval_Macro Precision": 0.6541570568719827,
134
+ "eval_Macro Recall": 0.564346253205228,
135
+ "eval_Micro F1": 0.826105701864693,
136
+ "eval_Micro Precision": 0.826105701864693,
137
+ "eval_Micro Recall": 0.826105701864693,
138
+ "eval_Weighted F1": 0.7913544313349856,
139
+ "eval_Weighted Precision": 0.7843732666581203,
140
+ "eval_Weighted Recall": 0.826105701864693,
141
+ "eval_accuracy": 0.826105701864693,
142
+ "eval_loss": 0.5608153343200684,
143
+ "eval_runtime": 2136.2944,
144
+ "eval_samples_per_second": 13.029,
145
+ "eval_steps_per_second": 1.629,
146
+ "step": 10438
147
+ },
148
+ {
149
+ "epoch": 1.0,
150
+ "learning_rate": 0.0007999616784824679,
151
+ "loss": 0.6139,
152
+ "step": 10440
153
+ },
154
+ {
155
+ "epoch": 1.05,
156
+ "learning_rate": 0.0007899597624065913,
157
+ "loss": 0.5945,
158
+ "step": 10962
159
+ },
160
+ {
161
+ "epoch": 1.1,
162
+ "learning_rate": 0.0007799578463307148,
163
+ "loss": 0.6018,
164
+ "step": 11484
165
+ },
166
+ {
167
+ "epoch": 1.15,
168
+ "learning_rate": 0.0007699559302548381,
169
+ "loss": 0.597,
170
+ "step": 12006
171
+ },
172
+ {
173
+ "epoch": 1.2,
174
+ "learning_rate": 0.0007599540141789615,
175
+ "loss": 0.5891,
176
+ "step": 12528
177
+ },
178
+ {
179
+ "epoch": 1.25,
180
+ "learning_rate": 0.0007499520981030848,
181
+ "loss": 0.6139,
182
+ "step": 13050
183
+ },
184
+ {
185
+ "epoch": 1.3,
186
+ "learning_rate": 0.0007399501820272084,
187
+ "loss": 0.5878,
188
+ "step": 13572
189
+ },
190
+ {
191
+ "epoch": 1.35,
192
+ "learning_rate": 0.0007299482659513317,
193
+ "loss": 0.6183,
194
+ "step": 14094
195
+ },
196
+ {
197
+ "epoch": 1.4,
198
+ "learning_rate": 0.0007199463498754551,
199
+ "loss": 0.6038,
200
+ "step": 14616
201
+ },
202
+ {
203
+ "epoch": 1.45,
204
+ "learning_rate": 0.0007099444337995785,
205
+ "loss": 0.6506,
206
+ "step": 15138
207
+ },
208
+ {
209
+ "epoch": 1.5,
210
+ "learning_rate": 0.0006999425177237019,
211
+ "loss": 0.6063,
212
+ "step": 15660
213
+ },
214
+ {
215
+ "epoch": 1.55,
216
+ "learning_rate": 0.0006899406016478252,
217
+ "loss": 0.5931,
218
+ "step": 16182
219
+ },
220
+ {
221
+ "epoch": 1.6,
222
+ "learning_rate": 0.0006799386855719488,
223
+ "loss": 0.5828,
224
+ "step": 16704
225
+ },
226
+ {
227
+ "epoch": 1.65,
228
+ "learning_rate": 0.0006699367694960721,
229
+ "loss": 0.5912,
230
+ "step": 17226
231
+ },
232
+ {
233
+ "epoch": 1.7,
234
+ "learning_rate": 0.0006599348534201954,
235
+ "loss": 0.6105,
236
+ "step": 17748
237
+ },
238
+ {
239
+ "epoch": 1.75,
240
+ "learning_rate": 0.0006499329373443188,
241
+ "loss": 0.5893,
242
+ "step": 18270
243
+ },
244
+ {
245
+ "epoch": 1.8,
246
+ "learning_rate": 0.0006399310212684422,
247
+ "loss": 0.611,
248
+ "step": 18792
249
+ },
250
+ {
251
+ "epoch": 1.85,
252
+ "learning_rate": 0.0006299291051925656,
253
+ "loss": 0.6371,
254
+ "step": 19314
255
+ },
256
+ {
257
+ "epoch": 1.9,
258
+ "learning_rate": 0.000619927189116689,
259
+ "loss": 0.5925,
260
+ "step": 19836
261
+ },
262
+ {
263
+ "epoch": 1.95,
264
+ "learning_rate": 0.0006099252730408125,
265
+ "loss": 0.6029,
266
+ "step": 20358
267
+ },
268
+ {
269
+ "epoch": 2.0,
270
+ "eval_Macro F1": 0.5060450221643912,
271
+ "eval_Macro Precision": 0.6929448637916937,
272
+ "eval_Macro Recall": 0.5239477514921818,
273
+ "eval_Micro F1": 0.8330758452197032,
274
+ "eval_Micro Precision": 0.8330758452197032,
275
+ "eval_Micro Recall": 0.8330758452197032,
276
+ "eval_Weighted F1": 0.7724263350491593,
277
+ "eval_Weighted Precision": 0.7892310862523967,
278
+ "eval_Weighted Recall": 0.8330758452197032,
279
+ "eval_accuracy": 0.8330758452197032,
280
+ "eval_loss": 0.6489848494529724,
281
+ "eval_runtime": 1320.9895,
282
+ "eval_samples_per_second": 21.07,
283
+ "eval_steps_per_second": 2.634,
284
+ "step": 20876
285
+ },
286
+ {
287
+ "epoch": 2.0,
288
+ "learning_rate": 0.0005999233569649358,
289
+ "loss": 0.5882,
290
+ "step": 20880
291
+ },
292
+ {
293
+ "epoch": 2.05,
294
+ "learning_rate": 0.0005899214408890592,
295
+ "loss": 0.5831,
296
+ "step": 21402
297
+ },
298
+ {
299
+ "epoch": 2.1,
300
+ "learning_rate": 0.0005799195248131826,
301
+ "loss": 0.5755,
302
+ "step": 21924
303
+ },
304
+ {
305
+ "epoch": 2.15,
306
+ "learning_rate": 0.000569917608737306,
307
+ "loss": 0.5822,
308
+ "step": 22446
309
+ },
310
+ {
311
+ "epoch": 2.2,
312
+ "learning_rate": 0.0005599156926614294,
313
+ "loss": 0.5632,
314
+ "step": 22968
315
+ },
316
+ {
317
+ "epoch": 2.25,
318
+ "learning_rate": 0.0005499137765855528,
319
+ "loss": 0.5756,
320
+ "step": 23490
321
+ },
322
+ {
323
+ "epoch": 2.3,
324
+ "learning_rate": 0.0005399118605096762,
325
+ "loss": 0.6018,
326
+ "step": 24012
327
+ },
328
+ {
329
+ "epoch": 2.35,
330
+ "learning_rate": 0.0005299099444337996,
331
+ "loss": 0.5978,
332
+ "step": 24534
333
+ },
334
+ {
335
+ "epoch": 2.4,
336
+ "learning_rate": 0.0005199080283579229,
337
+ "loss": 0.5767,
338
+ "step": 25056
339
+ },
340
+ {
341
+ "epoch": 2.45,
342
+ "learning_rate": 0.0005099061122820464,
343
+ "loss": 0.5538,
344
+ "step": 25578
345
+ },
346
+ {
347
+ "epoch": 2.5,
348
+ "learning_rate": 0.0004999041962061698,
349
+ "loss": 0.5621,
350
+ "step": 26100
351
+ },
352
+ {
353
+ "epoch": 2.55,
354
+ "learning_rate": 0.0004899022801302932,
355
+ "loss": 0.594,
356
+ "step": 26622
357
+ },
358
+ {
359
+ "epoch": 2.6,
360
+ "learning_rate": 0.00047990036405441656,
361
+ "loss": 0.5616,
362
+ "step": 27144
363
+ },
364
+ {
365
+ "epoch": 2.65,
366
+ "learning_rate": 0.00046989844797853996,
367
+ "loss": 0.5444,
368
+ "step": 27666
369
+ },
370
+ {
371
+ "epoch": 2.7,
372
+ "learning_rate": 0.00045989653190266336,
373
+ "loss": 0.5558,
374
+ "step": 28188
375
+ },
376
+ {
377
+ "epoch": 2.75,
378
+ "learning_rate": 0.00044989461582678675,
379
+ "loss": 0.5415,
380
+ "step": 28710
381
+ },
382
+ {
383
+ "epoch": 2.8,
384
+ "learning_rate": 0.0004398926997509101,
385
+ "loss": 0.5452,
386
+ "step": 29232
387
+ },
388
+ {
389
+ "epoch": 2.85,
390
+ "learning_rate": 0.00042989078367503355,
391
+ "loss": 0.5595,
392
+ "step": 29754
393
+ },
394
+ {
395
+ "epoch": 2.9,
396
+ "learning_rate": 0.00041988886759915695,
397
+ "loss": 0.565,
398
+ "step": 30276
399
+ },
400
+ {
401
+ "epoch": 2.95,
402
+ "learning_rate": 0.0004098869515232803,
403
+ "loss": 0.5478,
404
+ "step": 30798
405
+ },
406
+ {
407
+ "epoch": 3.0,
408
+ "eval_Macro F1": 0.6188634906559709,
409
+ "eval_Macro Precision": 0.6783819638851676,
410
+ "eval_Macro Recall": 0.6003398559173716,
411
+ "eval_Micro F1": 0.8304530593180757,
412
+ "eval_Micro Precision": 0.8304530593180757,
413
+ "eval_Micro Recall": 0.8304530593180757,
414
+ "eval_Weighted F1": 0.8070780134214183,
415
+ "eval_Weighted Precision": 0.8001960025950923,
416
+ "eval_Weighted Recall": 0.8304530593180757,
417
+ "eval_accuracy": 0.8304530593180757,
418
+ "eval_loss": 0.5508156418800354,
419
+ "eval_runtime": 1285.8765,
420
+ "eval_samples_per_second": 21.645,
421
+ "eval_steps_per_second": 2.706,
422
+ "step": 31314
423
+ },
424
+ {
425
+ "epoch": 3.0,
426
+ "learning_rate": 0.00039988503544740374,
427
+ "loss": 0.5593,
428
+ "step": 31320
429
+ },
430
+ {
431
+ "epoch": 3.05,
432
+ "learning_rate": 0.0003898831193715271,
433
+ "loss": 0.5348,
434
+ "step": 31842
435
+ },
436
+ {
437
+ "epoch": 3.1,
438
+ "learning_rate": 0.0003798812032956505,
439
+ "loss": 0.5459,
440
+ "step": 32364
441
+ },
442
+ {
443
+ "epoch": 3.15,
444
+ "learning_rate": 0.00036987928721977394,
445
+ "loss": 0.5544,
446
+ "step": 32886
447
+ },
448
+ {
449
+ "epoch": 3.2,
450
+ "learning_rate": 0.0003598773711438973,
451
+ "loss": 0.5442,
452
+ "step": 33408
453
+ },
454
+ {
455
+ "epoch": 3.25,
456
+ "learning_rate": 0.00034987545506802074,
457
+ "loss": 0.542,
458
+ "step": 33930
459
+ },
460
+ {
461
+ "epoch": 3.3,
462
+ "learning_rate": 0.0003398735389921441,
463
+ "loss": 0.5325,
464
+ "step": 34452
465
+ },
466
+ {
467
+ "epoch": 3.35,
468
+ "learning_rate": 0.0003298716229162675,
469
+ "loss": 0.5525,
470
+ "step": 34974
471
+ },
472
+ {
473
+ "epoch": 3.4,
474
+ "learning_rate": 0.00031986970684039093,
475
+ "loss": 0.5547,
476
+ "step": 35496
477
+ },
478
+ {
479
+ "epoch": 3.45,
480
+ "learning_rate": 0.0003098677907645143,
481
+ "loss": 0.5484,
482
+ "step": 36018
483
+ },
484
+ {
485
+ "epoch": 3.5,
486
+ "learning_rate": 0.00029986587468863767,
487
+ "loss": 0.5368,
488
+ "step": 36540
489
+ },
490
+ {
491
+ "epoch": 3.55,
492
+ "learning_rate": 0.00028986395861276107,
493
+ "loss": 0.5487,
494
+ "step": 37062
495
+ },
496
+ {
497
+ "epoch": 3.6,
498
+ "learning_rate": 0.00027986204253688447,
499
+ "loss": 0.5403,
500
+ "step": 37584
501
+ },
502
+ {
503
+ "epoch": 3.65,
504
+ "learning_rate": 0.00026986012646100787,
505
+ "loss": 0.5459,
506
+ "step": 38106
507
+ },
508
+ {
509
+ "epoch": 3.7,
510
+ "learning_rate": 0.00025985821038513126,
511
+ "loss": 0.5376,
512
+ "step": 38628
513
+ },
514
+ {
515
+ "epoch": 3.75,
516
+ "learning_rate": 0.00024985629430925466,
517
+ "loss": 0.5186,
518
+ "step": 39150
519
+ },
520
+ {
521
+ "epoch": 3.8,
522
+ "learning_rate": 0.00023985437823337806,
523
+ "loss": 0.5371,
524
+ "step": 39672
525
+ },
526
+ {
527
+ "epoch": 3.85,
528
+ "learning_rate": 0.00022985246215750143,
529
+ "loss": 0.5413,
530
+ "step": 40194
531
+ },
532
+ {
533
+ "epoch": 3.9,
534
+ "learning_rate": 0.00021985054608162483,
535
+ "loss": 0.5328,
536
+ "step": 40716
537
+ },
538
+ {
539
+ "epoch": 3.95,
540
+ "learning_rate": 0.00020984863000574823,
541
+ "loss": 0.513,
542
+ "step": 41238
543
+ },
544
+ {
545
+ "epoch": 4.0,
546
+ "eval_Macro F1": 0.6224111150035051,
547
+ "eval_Macro Precision": 0.6915564589610266,
548
+ "eval_Macro Recall": 0.60231662265339,
549
+ "eval_Micro F1": 0.8347285596234686,
550
+ "eval_Micro Precision": 0.8347285596234686,
551
+ "eval_Micro Recall": 0.8347285596234686,
552
+ "eval_Weighted F1": 0.8100695837354521,
553
+ "eval_Weighted Precision": 0.8049325371288767,
554
+ "eval_Weighted Recall": 0.8347285596234686,
555
+ "eval_accuracy": 0.8347285596234686,
556
+ "eval_loss": 0.5459285974502563,
557
+ "eval_runtime": 1277.4842,
558
+ "eval_samples_per_second": 21.787,
559
+ "eval_steps_per_second": 2.724,
560
+ "step": 41752
561
+ },
562
+ {
563
+ "epoch": 4.0,
564
+ "learning_rate": 0.00019984671392987163,
565
+ "loss": 0.5375,
566
+ "step": 41760
567
+ },
568
+ {
569
+ "epoch": 4.05,
570
+ "learning_rate": 0.00018984479785399502,
571
+ "loss": 0.5505,
572
+ "step": 42282
573
+ },
574
+ {
575
+ "epoch": 4.1,
576
+ "learning_rate": 0.00017984288177811842,
577
+ "loss": 0.5258,
578
+ "step": 42804
579
+ },
580
+ {
581
+ "epoch": 4.15,
582
+ "learning_rate": 0.00016984096570224182,
583
+ "loss": 0.5132,
584
+ "step": 43326
585
+ },
586
+ {
587
+ "epoch": 4.2,
588
+ "learning_rate": 0.0001598390496263652,
589
+ "loss": 0.5182,
590
+ "step": 43848
591
+ },
592
+ {
593
+ "epoch": 4.25,
594
+ "learning_rate": 0.00014983713355048862,
595
+ "loss": 0.5147,
596
+ "step": 44370
597
+ },
598
+ {
599
+ "epoch": 4.3,
600
+ "learning_rate": 0.00013983521747461201,
601
+ "loss": 0.5364,
602
+ "step": 44892
603
+ },
604
+ {
605
+ "epoch": 4.35,
606
+ "learning_rate": 0.00012983330139873538,
607
+ "loss": 0.53,
608
+ "step": 45414
609
+ },
610
+ {
611
+ "epoch": 4.4,
612
+ "learning_rate": 0.00011983138532285878,
613
+ "loss": 0.5288,
614
+ "step": 45936
615
+ },
616
+ {
617
+ "epoch": 4.45,
618
+ "learning_rate": 0.00010982946924698218,
619
+ "loss": 0.5087,
620
+ "step": 46458
621
+ },
622
+ {
623
+ "epoch": 4.5,
624
+ "learning_rate": 9.982755317110558e-05,
625
+ "loss": 0.5101,
626
+ "step": 46980
627
+ },
628
+ {
629
+ "epoch": 4.55,
630
+ "learning_rate": 8.982563709522898e-05,
631
+ "loss": 0.5235,
632
+ "step": 47502
633
+ },
634
+ {
635
+ "epoch": 4.6,
636
+ "learning_rate": 7.982372101935238e-05,
637
+ "loss": 0.5143,
638
+ "step": 48024
639
+ },
640
+ {
641
+ "epoch": 4.65,
642
+ "learning_rate": 6.982180494347576e-05,
643
+ "loss": 0.5299,
644
+ "step": 48546
645
+ },
646
+ {
647
+ "epoch": 4.7,
648
+ "learning_rate": 5.981988886759916e-05,
649
+ "loss": 0.5362,
650
+ "step": 49068
651
+ },
652
+ {
653
+ "epoch": 4.75,
654
+ "learning_rate": 4.981797279172255e-05,
655
+ "loss": 0.5186,
656
+ "step": 49590
657
+ },
658
+ {
659
+ "epoch": 4.8,
660
+ "learning_rate": 3.981605671584595e-05,
661
+ "loss": 0.5256,
662
+ "step": 50112
663
+ },
664
+ {
665
+ "epoch": 4.85,
666
+ "learning_rate": 2.9814140639969346e-05,
667
+ "loss": 0.5233,
668
+ "step": 50634
669
+ },
670
+ {
671
+ "epoch": 4.9,
672
+ "learning_rate": 1.9812224564092737e-05,
673
+ "loss": 0.5263,
674
+ "step": 51156
675
+ },
676
+ {
677
+ "epoch": 4.95,
678
+ "learning_rate": 9.810308488216133e-06,
679
+ "loss": 0.5288,
680
+ "step": 51678
681
+ },
682
+ {
683
+ "epoch": 5.0,
684
+ "eval_Macro F1": 0.6307998158823078,
685
+ "eval_Macro Precision": 0.7029273840489014,
686
+ "eval_Macro Recall": 0.6089557840149206,
687
+ "eval_Micro F1": 0.8381417741529839,
688
+ "eval_Micro Precision": 0.8381417741529839,
689
+ "eval_Micro Recall": 0.8381417741529839,
690
+ "eval_Weighted F1": 0.8141748808079556,
691
+ "eval_Weighted Precision": 0.810121498718634,
692
+ "eval_Weighted Recall": 0.8381417741529839,
693
+ "eval_accuracy": 0.8381417741529839,
694
+ "eval_loss": 0.5336272716522217,
695
+ "eval_runtime": 1277.7048,
696
+ "eval_samples_per_second": 21.784,
697
+ "eval_steps_per_second": 2.724,
698
+ "step": 52190
699
+ },
700
+ {
701
+ "epoch": 5.0,
702
+ "step": 52190,
703
+ "total_flos": 2.1040687845486864e+16,
704
+ "train_loss": 0.5799241374931839,
705
+ "train_runtime": 32322.6365,
706
+ "train_samples_per_second": 12.916,
707
+ "train_steps_per_second": 1.615
708
+ }
709
+ ],
710
+ "max_steps": 52190,
711
+ "num_train_epochs": 5,
712
+ "total_flos": 2.1040687845486864e+16,
713
+ "trial_name": null,
714
+ "trial_params": null
715
+ }