ayjays132 commited on
Commit
7ba3759
1 Parent(s): 1a8a45f

Delete trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +0 -3141
trainer_state.json DELETED
@@ -1,3141 +0,0 @@
1
- {
2
- "best_metric": 0.7981651376146789,
3
- "best_model_checkpoint": "./results\\checkpoint-3000",
4
- "epoch": 0.9501187648456056,
5
- "eval_steps": 50,
6
- "global_step": 4000,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.0,
13
- "learning_rate": 2.3752969121140145e-07,
14
- "loss": 1.1402,
15
- "step": 10
16
- },
17
- {
18
- "epoch": 0.0,
19
- "learning_rate": 4.750593824228029e-07,
20
- "loss": 1.0834,
21
- "step": 20
22
- },
23
- {
24
- "epoch": 0.01,
25
- "learning_rate": 7.125890736342043e-07,
26
- "loss": 0.9403,
27
- "step": 30
28
- },
29
- {
30
- "epoch": 0.01,
31
- "learning_rate": 9.501187648456058e-07,
32
- "loss": 0.8238,
33
- "step": 40
34
- },
35
- {
36
- "epoch": 0.01,
37
- "learning_rate": 1.187648456057007e-06,
38
- "loss": 0.7737,
39
- "step": 50
40
- },
41
- {
42
- "epoch": 0.01,
43
- "eval_accuracy": 0.5091743119266054,
44
- "eval_loss": 0.7308847904205322,
45
- "eval_runtime": 10.532,
46
- "eval_samples_per_second": 82.796,
47
- "eval_steps_per_second": 5.222,
48
- "step": 50
49
- },
50
- {
51
- "epoch": 0.01,
52
- "learning_rate": 1.4251781472684086e-06,
53
- "loss": 0.7321,
54
- "step": 60
55
- },
56
- {
57
- "epoch": 0.02,
58
- "learning_rate": 1.6627078384798101e-06,
59
- "loss": 0.7221,
60
- "step": 70
61
- },
62
- {
63
- "epoch": 0.02,
64
- "learning_rate": 1.9002375296912116e-06,
65
- "loss": 0.7081,
66
- "step": 80
67
- },
68
- {
69
- "epoch": 0.02,
70
- "learning_rate": 2.137767220902613e-06,
71
- "loss": 0.7255,
72
- "step": 90
73
- },
74
- {
75
- "epoch": 0.02,
76
- "learning_rate": 2.375296912114014e-06,
77
- "loss": 0.7094,
78
- "step": 100
79
- },
80
- {
81
- "epoch": 0.02,
82
- "eval_accuracy": 0.5091743119266054,
83
- "eval_loss": 0.7017449736595154,
84
- "eval_runtime": 10.4451,
85
- "eval_samples_per_second": 83.484,
86
- "eval_steps_per_second": 5.266,
87
- "step": 100
88
- },
89
- {
90
- "epoch": 0.03,
91
- "learning_rate": 2.612826603325416e-06,
92
- "loss": 0.6745,
93
- "step": 110
94
- },
95
- {
96
- "epoch": 0.03,
97
- "learning_rate": 2.850356294536817e-06,
98
- "loss": 0.674,
99
- "step": 120
100
- },
101
- {
102
- "epoch": 0.03,
103
- "learning_rate": 3.0878859857482185e-06,
104
- "loss": 0.7235,
105
- "step": 130
106
- },
107
- {
108
- "epoch": 0.03,
109
- "learning_rate": 3.3254156769596202e-06,
110
- "loss": 0.7192,
111
- "step": 140
112
- },
113
- {
114
- "epoch": 0.04,
115
- "learning_rate": 3.5629453681710215e-06,
116
- "loss": 0.7033,
117
- "step": 150
118
- },
119
- {
120
- "epoch": 0.04,
121
- "eval_accuracy": 0.4908256880733945,
122
- "eval_loss": 0.7025471329689026,
123
- "eval_runtime": 10.4492,
124
- "eval_samples_per_second": 83.451,
125
- "eval_steps_per_second": 5.264,
126
- "step": 150
127
- },
128
- {
129
- "epoch": 0.04,
130
- "learning_rate": 3.8004750593824232e-06,
131
- "loss": 0.7037,
132
- "step": 160
133
- },
134
- {
135
- "epoch": 0.04,
136
- "learning_rate": 4.038004750593825e-06,
137
- "loss": 0.6754,
138
- "step": 170
139
- },
140
- {
141
- "epoch": 0.04,
142
- "learning_rate": 4.275534441805226e-06,
143
- "loss": 0.7119,
144
- "step": 180
145
- },
146
- {
147
- "epoch": 0.05,
148
- "learning_rate": 4.513064133016627e-06,
149
- "loss": 0.7077,
150
- "step": 190
151
- },
152
- {
153
- "epoch": 0.05,
154
- "learning_rate": 4.750593824228028e-06,
155
- "loss": 0.698,
156
- "step": 200
157
- },
158
- {
159
- "epoch": 0.05,
160
- "eval_accuracy": 0.4873853211009174,
161
- "eval_loss": 0.7025485038757324,
162
- "eval_runtime": 10.6349,
163
- "eval_samples_per_second": 81.994,
164
- "eval_steps_per_second": 5.172,
165
- "step": 200
166
- },
167
- {
168
- "epoch": 0.05,
169
- "learning_rate": 4.98812351543943e-06,
170
- "loss": 0.6954,
171
- "step": 210
172
- },
173
- {
174
- "epoch": 0.05,
175
- "learning_rate": 5.225653206650832e-06,
176
- "loss": 0.7078,
177
- "step": 220
178
- },
179
- {
180
- "epoch": 0.05,
181
- "learning_rate": 5.4631828978622335e-06,
182
- "loss": 0.6521,
183
- "step": 230
184
- },
185
- {
186
- "epoch": 0.06,
187
- "learning_rate": 5.700712589073634e-06,
188
- "loss": 0.6937,
189
- "step": 240
190
- },
191
- {
192
- "epoch": 0.06,
193
- "learning_rate": 5.938242280285035e-06,
194
- "loss": 0.6965,
195
- "step": 250
196
- },
197
- {
198
- "epoch": 0.06,
199
- "eval_accuracy": 0.5229357798165137,
200
- "eval_loss": 0.6914423704147339,
201
- "eval_runtime": 10.7015,
202
- "eval_samples_per_second": 81.484,
203
- "eval_steps_per_second": 5.139,
204
- "step": 250
205
- },
206
- {
207
- "epoch": 0.06,
208
- "learning_rate": 6.175771971496437e-06,
209
- "loss": 0.679,
210
- "step": 260
211
- },
212
- {
213
- "epoch": 0.06,
214
- "learning_rate": 6.4133016627078396e-06,
215
- "loss": 0.68,
216
- "step": 270
217
- },
218
- {
219
- "epoch": 0.07,
220
- "learning_rate": 6.6508313539192404e-06,
221
- "loss": 0.679,
222
- "step": 280
223
- },
224
- {
225
- "epoch": 0.07,
226
- "learning_rate": 6.888361045130641e-06,
227
- "loss": 0.7055,
228
- "step": 290
229
- },
230
- {
231
- "epoch": 0.07,
232
- "learning_rate": 7.125890736342043e-06,
233
- "loss": 0.6866,
234
- "step": 300
235
- },
236
- {
237
- "epoch": 0.07,
238
- "eval_accuracy": 0.5091743119266054,
239
- "eval_loss": 0.7029032707214355,
240
- "eval_runtime": 10.4489,
241
- "eval_samples_per_second": 83.453,
242
- "eval_steps_per_second": 5.264,
243
- "step": 300
244
- },
245
- {
246
- "epoch": 0.07,
247
- "learning_rate": 7.363420427553444e-06,
248
- "loss": 0.6868,
249
- "step": 310
250
- },
251
- {
252
- "epoch": 0.08,
253
- "learning_rate": 7.6009501187648464e-06,
254
- "loss": 0.6827,
255
- "step": 320
256
- },
257
- {
258
- "epoch": 0.08,
259
- "learning_rate": 7.838479809976247e-06,
260
- "loss": 0.6765,
261
- "step": 330
262
- },
263
- {
264
- "epoch": 0.08,
265
- "learning_rate": 8.07600950118765e-06,
266
- "loss": 0.6836,
267
- "step": 340
268
- },
269
- {
270
- "epoch": 0.08,
271
- "learning_rate": 8.31353919239905e-06,
272
- "loss": 0.6894,
273
- "step": 350
274
- },
275
- {
276
- "epoch": 0.08,
277
- "eval_accuracy": 0.5068807339449541,
278
- "eval_loss": 0.6934388875961304,
279
- "eval_runtime": 9.7535,
280
- "eval_samples_per_second": 89.404,
281
- "eval_steps_per_second": 5.639,
282
- "step": 350
283
- },
284
- {
285
- "epoch": 0.09,
286
- "learning_rate": 8.551068883610452e-06,
287
- "loss": 0.6939,
288
- "step": 360
289
- },
290
- {
291
- "epoch": 0.09,
292
- "learning_rate": 8.788598574821852e-06,
293
- "loss": 0.6903,
294
- "step": 370
295
- },
296
- {
297
- "epoch": 0.09,
298
- "learning_rate": 9.026128266033253e-06,
299
- "loss": 0.6508,
300
- "step": 380
301
- },
302
- {
303
- "epoch": 0.09,
304
- "learning_rate": 9.263657957244656e-06,
305
- "loss": 0.7377,
306
- "step": 390
307
- },
308
- {
309
- "epoch": 0.1,
310
- "learning_rate": 9.501187648456057e-06,
311
- "loss": 0.7484,
312
- "step": 400
313
- },
314
- {
315
- "epoch": 0.1,
316
- "eval_accuracy": 0.4908256880733945,
317
- "eval_loss": 0.7315114140510559,
318
- "eval_runtime": 11.129,
319
- "eval_samples_per_second": 78.354,
320
- "eval_steps_per_second": 4.942,
321
- "step": 400
322
- },
323
- {
324
- "epoch": 0.1,
325
- "learning_rate": 9.73871733966746e-06,
326
- "loss": 0.6887,
327
- "step": 410
328
- },
329
- {
330
- "epoch": 0.1,
331
- "learning_rate": 9.97624703087886e-06,
332
- "loss": 0.6562,
333
- "step": 420
334
- },
335
- {
336
- "epoch": 0.1,
337
- "learning_rate": 1.0213776722090261e-05,
338
- "loss": 0.6696,
339
- "step": 430
340
- },
341
- {
342
- "epoch": 0.1,
343
- "learning_rate": 1.0451306413301664e-05,
344
- "loss": 0.7045,
345
- "step": 440
346
- },
347
- {
348
- "epoch": 0.11,
349
- "learning_rate": 1.0688836104513065e-05,
350
- "loss": 0.6498,
351
- "step": 450
352
- },
353
- {
354
- "epoch": 0.11,
355
- "eval_accuracy": 0.5997706422018348,
356
- "eval_loss": 0.6686387658119202,
357
- "eval_runtime": 18.4439,
358
- "eval_samples_per_second": 47.278,
359
- "eval_steps_per_second": 2.982,
360
- "step": 450
361
- },
362
- {
363
- "epoch": 0.11,
364
- "learning_rate": 1.0926365795724467e-05,
365
- "loss": 0.67,
366
- "step": 460
367
- },
368
- {
369
- "epoch": 0.11,
370
- "learning_rate": 1.1163895486935868e-05,
371
- "loss": 0.6821,
372
- "step": 470
373
- },
374
- {
375
- "epoch": 0.11,
376
- "learning_rate": 1.1401425178147269e-05,
377
- "loss": 0.6477,
378
- "step": 480
379
- },
380
- {
381
- "epoch": 0.12,
382
- "learning_rate": 1.163895486935867e-05,
383
- "loss": 0.6935,
384
- "step": 490
385
- },
386
- {
387
- "epoch": 0.12,
388
- "learning_rate": 1.187648456057007e-05,
389
- "loss": 0.6687,
390
- "step": 500
391
- },
392
- {
393
- "epoch": 0.12,
394
- "eval_accuracy": 0.5745412844036697,
395
- "eval_loss": 0.6807542443275452,
396
- "eval_runtime": 24.027,
397
- "eval_samples_per_second": 36.293,
398
- "eval_steps_per_second": 2.289,
399
- "step": 500
400
- },
401
- {
402
- "epoch": 0.12,
403
- "learning_rate": 1.2114014251781473e-05,
404
- "loss": 0.6542,
405
- "step": 510
406
- },
407
- {
408
- "epoch": 0.12,
409
- "learning_rate": 1.2351543942992874e-05,
410
- "loss": 0.7159,
411
- "step": 520
412
- },
413
- {
414
- "epoch": 0.13,
415
- "learning_rate": 1.2589073634204277e-05,
416
- "loss": 0.7332,
417
- "step": 530
418
- },
419
- {
420
- "epoch": 0.13,
421
- "learning_rate": 1.2826603325415679e-05,
422
- "loss": 0.6521,
423
- "step": 540
424
- },
425
- {
426
- "epoch": 0.13,
427
- "learning_rate": 1.3064133016627078e-05,
428
- "loss": 0.673,
429
- "step": 550
430
- },
431
- {
432
- "epoch": 0.13,
433
- "eval_accuracy": 0.6594036697247706,
434
- "eval_loss": 0.6414512395858765,
435
- "eval_runtime": 24.4855,
436
- "eval_samples_per_second": 35.613,
437
- "eval_steps_per_second": 2.246,
438
- "step": 550
439
- },
440
- {
441
- "epoch": 0.13,
442
- "learning_rate": 1.3301662707838481e-05,
443
- "loss": 0.6498,
444
- "step": 560
445
- },
446
- {
447
- "epoch": 0.14,
448
- "learning_rate": 1.3539192399049882e-05,
449
- "loss": 0.6634,
450
- "step": 570
451
- },
452
- {
453
- "epoch": 0.14,
454
- "learning_rate": 1.3776722090261283e-05,
455
- "loss": 0.6557,
456
- "step": 580
457
- },
458
- {
459
- "epoch": 0.14,
460
- "learning_rate": 1.4014251781472683e-05,
461
- "loss": 0.6924,
462
- "step": 590
463
- },
464
- {
465
- "epoch": 0.14,
466
- "learning_rate": 1.4251781472684086e-05,
467
- "loss": 0.629,
468
- "step": 600
469
- },
470
- {
471
- "epoch": 0.14,
472
- "eval_accuracy": 0.6192660550458715,
473
- "eval_loss": 0.6346095204353333,
474
- "eval_runtime": 21.937,
475
- "eval_samples_per_second": 39.75,
476
- "eval_steps_per_second": 2.507,
477
- "step": 600
478
- },
479
- {
480
- "epoch": 0.14,
481
- "learning_rate": 1.4489311163895489e-05,
482
- "loss": 0.6623,
483
- "step": 610
484
- },
485
- {
486
- "epoch": 0.15,
487
- "learning_rate": 1.4726840855106888e-05,
488
- "loss": 0.6252,
489
- "step": 620
490
- },
491
- {
492
- "epoch": 0.15,
493
- "learning_rate": 1.496437054631829e-05,
494
- "loss": 0.6662,
495
- "step": 630
496
- },
497
- {
498
- "epoch": 0.15,
499
- "learning_rate": 1.5201900237529693e-05,
500
- "loss": 0.6038,
501
- "step": 640
502
- },
503
- {
504
- "epoch": 0.15,
505
- "learning_rate": 1.5439429928741092e-05,
506
- "loss": 0.611,
507
- "step": 650
508
- },
509
- {
510
- "epoch": 0.15,
511
- "eval_accuracy": 0.6915137614678899,
512
- "eval_loss": 0.5941868424415588,
513
- "eval_runtime": 23.4369,
514
- "eval_samples_per_second": 37.206,
515
- "eval_steps_per_second": 2.347,
516
- "step": 650
517
- },
518
- {
519
- "epoch": 0.16,
520
- "learning_rate": 1.5676959619952495e-05,
521
- "loss": 0.5919,
522
- "step": 660
523
- },
524
- {
525
- "epoch": 0.16,
526
- "learning_rate": 1.5914489311163897e-05,
527
- "loss": 0.6268,
528
- "step": 670
529
- },
530
- {
531
- "epoch": 0.16,
532
- "learning_rate": 1.61520190023753e-05,
533
- "loss": 0.5221,
534
- "step": 680
535
- },
536
- {
537
- "epoch": 0.16,
538
- "learning_rate": 1.63895486935867e-05,
539
- "loss": 0.7314,
540
- "step": 690
541
- },
542
- {
543
- "epoch": 0.17,
544
- "learning_rate": 1.66270783847981e-05,
545
- "loss": 0.7231,
546
- "step": 700
547
- },
548
- {
549
- "epoch": 0.17,
550
- "eval_accuracy": 0.6410550458715596,
551
- "eval_loss": 0.6234958171844482,
552
- "eval_runtime": 17.243,
553
- "eval_samples_per_second": 50.571,
554
- "eval_steps_per_second": 3.19,
555
- "step": 700
556
- },
557
- {
558
- "epoch": 0.17,
559
- "learning_rate": 1.6864608076009504e-05,
560
- "loss": 0.6203,
561
- "step": 710
562
- },
563
- {
564
- "epoch": 0.17,
565
- "learning_rate": 1.7102137767220903e-05,
566
- "loss": 0.6168,
567
- "step": 720
568
- },
569
- {
570
- "epoch": 0.17,
571
- "learning_rate": 1.7339667458432306e-05,
572
- "loss": 0.6334,
573
- "step": 730
574
- },
575
- {
576
- "epoch": 0.18,
577
- "learning_rate": 1.7577197149643705e-05,
578
- "loss": 0.6718,
579
- "step": 740
580
- },
581
- {
582
- "epoch": 0.18,
583
- "learning_rate": 1.7814726840855108e-05,
584
- "loss": 0.6445,
585
- "step": 750
586
- },
587
- {
588
- "epoch": 0.18,
589
- "eval_accuracy": 0.7075688073394495,
590
- "eval_loss": 0.5777224898338318,
591
- "eval_runtime": 24.0528,
592
- "eval_samples_per_second": 36.254,
593
- "eval_steps_per_second": 2.287,
594
- "step": 750
595
- },
596
- {
597
- "epoch": 0.18,
598
- "learning_rate": 1.8052256532066507e-05,
599
- "loss": 0.6013,
600
- "step": 760
601
- },
602
- {
603
- "epoch": 0.18,
604
- "learning_rate": 1.828978622327791e-05,
605
- "loss": 0.5739,
606
- "step": 770
607
- },
608
- {
609
- "epoch": 0.19,
610
- "learning_rate": 1.8527315914489312e-05,
611
- "loss": 0.6245,
612
- "step": 780
613
- },
614
- {
615
- "epoch": 0.19,
616
- "learning_rate": 1.876484560570071e-05,
617
- "loss": 0.5358,
618
- "step": 790
619
- },
620
- {
621
- "epoch": 0.19,
622
- "learning_rate": 1.9002375296912114e-05,
623
- "loss": 0.5986,
624
- "step": 800
625
- },
626
- {
627
- "epoch": 0.19,
628
- "eval_accuracy": 0.6685779816513762,
629
- "eval_loss": 0.6269965767860413,
630
- "eval_runtime": 24.3364,
631
- "eval_samples_per_second": 35.831,
632
- "eval_steps_per_second": 2.26,
633
- "step": 800
634
- },
635
- {
636
- "epoch": 0.19,
637
- "learning_rate": 1.9239904988123516e-05,
638
- "loss": 0.6833,
639
- "step": 810
640
- },
641
- {
642
- "epoch": 0.19,
643
- "learning_rate": 1.947743467933492e-05,
644
- "loss": 0.6864,
645
- "step": 820
646
- },
647
- {
648
- "epoch": 0.2,
649
- "learning_rate": 1.9714964370546318e-05,
650
- "loss": 0.5795,
651
- "step": 830
652
- },
653
- {
654
- "epoch": 0.2,
655
- "learning_rate": 1.995249406175772e-05,
656
- "loss": 0.6274,
657
- "step": 840
658
- },
659
- {
660
- "epoch": 0.2,
661
- "learning_rate": 2.0190023752969123e-05,
662
- "loss": 0.663,
663
- "step": 850
664
- },
665
- {
666
- "epoch": 0.2,
667
- "eval_accuracy": 0.6972477064220184,
668
- "eval_loss": 0.5657657384872437,
669
- "eval_runtime": 26.7355,
670
- "eval_samples_per_second": 32.616,
671
- "eval_steps_per_second": 2.057,
672
- "step": 850
673
- },
674
- {
675
- "epoch": 0.2,
676
- "learning_rate": 2.0427553444180522e-05,
677
- "loss": 0.5579,
678
- "step": 860
679
- },
680
- {
681
- "epoch": 0.21,
682
- "learning_rate": 2.0665083135391925e-05,
683
- "loss": 0.5483,
684
- "step": 870
685
- },
686
- {
687
- "epoch": 0.21,
688
- "learning_rate": 2.0902612826603327e-05,
689
- "loss": 0.5827,
690
- "step": 880
691
- },
692
- {
693
- "epoch": 0.21,
694
- "learning_rate": 2.114014251781473e-05,
695
- "loss": 0.5757,
696
- "step": 890
697
- },
698
- {
699
- "epoch": 0.21,
700
- "learning_rate": 2.137767220902613e-05,
701
- "loss": 0.6553,
702
- "step": 900
703
- },
704
- {
705
- "epoch": 0.21,
706
- "eval_accuracy": 0.6112385321100917,
707
- "eval_loss": 0.639819860458374,
708
- "eval_runtime": 23.3332,
709
- "eval_samples_per_second": 37.372,
710
- "eval_steps_per_second": 2.357,
711
- "step": 900
712
- },
713
- {
714
- "epoch": 0.22,
715
- "learning_rate": 2.161520190023753e-05,
716
- "loss": 0.5793,
717
- "step": 910
718
- },
719
- {
720
- "epoch": 0.22,
721
- "learning_rate": 2.1852731591448934e-05,
722
- "loss": 0.5839,
723
- "step": 920
724
- },
725
- {
726
- "epoch": 0.22,
727
- "learning_rate": 2.2090261282660333e-05,
728
- "loss": 0.5909,
729
- "step": 930
730
- },
731
- {
732
- "epoch": 0.22,
733
- "learning_rate": 2.2327790973871736e-05,
734
- "loss": 0.5239,
735
- "step": 940
736
- },
737
- {
738
- "epoch": 0.23,
739
- "learning_rate": 2.2565320665083135e-05,
740
- "loss": 0.5402,
741
- "step": 950
742
- },
743
- {
744
- "epoch": 0.23,
745
- "eval_accuracy": 0.6915137614678899,
746
- "eval_loss": 0.5853348970413208,
747
- "eval_runtime": 28.0695,
748
- "eval_samples_per_second": 31.066,
749
- "eval_steps_per_second": 1.959,
750
- "step": 950
751
- },
752
- {
753
- "epoch": 0.23,
754
- "learning_rate": 2.2802850356294538e-05,
755
- "loss": 0.5396,
756
- "step": 960
757
- },
758
- {
759
- "epoch": 0.23,
760
- "learning_rate": 2.3040380047505937e-05,
761
- "loss": 0.5771,
762
- "step": 970
763
- },
764
- {
765
- "epoch": 0.23,
766
- "learning_rate": 2.327790973871734e-05,
767
- "loss": 0.5087,
768
- "step": 980
769
- },
770
- {
771
- "epoch": 0.24,
772
- "learning_rate": 2.3515439429928742e-05,
773
- "loss": 0.5752,
774
- "step": 990
775
- },
776
- {
777
- "epoch": 0.24,
778
- "learning_rate": 2.375296912114014e-05,
779
- "loss": 0.7053,
780
- "step": 1000
781
- },
782
- {
783
- "epoch": 0.24,
784
- "eval_accuracy": 0.5779816513761468,
785
- "eval_loss": 0.7715405225753784,
786
- "eval_runtime": 22.4698,
787
- "eval_samples_per_second": 38.808,
788
- "eval_steps_per_second": 2.448,
789
- "step": 1000
790
- },
791
- {
792
- "epoch": 0.24,
793
- "learning_rate": 2.3990498812351544e-05,
794
- "loss": 0.5453,
795
- "step": 1010
796
- },
797
- {
798
- "epoch": 0.24,
799
- "learning_rate": 2.4228028503562946e-05,
800
- "loss": 0.5469,
801
- "step": 1020
802
- },
803
- {
804
- "epoch": 0.24,
805
- "learning_rate": 2.446555819477435e-05,
806
- "loss": 0.5702,
807
- "step": 1030
808
- },
809
- {
810
- "epoch": 0.25,
811
- "learning_rate": 2.4703087885985748e-05,
812
- "loss": 0.519,
813
- "step": 1040
814
- },
815
- {
816
- "epoch": 0.25,
817
- "learning_rate": 2.494061757719715e-05,
818
- "loss": 0.613,
819
- "step": 1050
820
- },
821
- {
822
- "epoch": 0.25,
823
- "eval_accuracy": 0.6112385321100917,
824
- "eval_loss": 0.713302493095398,
825
- "eval_runtime": 22.1389,
826
- "eval_samples_per_second": 39.388,
827
- "eval_steps_per_second": 2.484,
828
- "step": 1050
829
- },
830
- {
831
- "epoch": 0.25,
832
- "learning_rate": 2.5178147268408553e-05,
833
- "loss": 0.5345,
834
- "step": 1060
835
- },
836
- {
837
- "epoch": 0.25,
838
- "learning_rate": 2.5415676959619956e-05,
839
- "loss": 0.5408,
840
- "step": 1070
841
- },
842
- {
843
- "epoch": 0.26,
844
- "learning_rate": 2.5653206650831358e-05,
845
- "loss": 0.5472,
846
- "step": 1080
847
- },
848
- {
849
- "epoch": 0.26,
850
- "learning_rate": 2.5890736342042754e-05,
851
- "loss": 0.6234,
852
- "step": 1090
853
- },
854
- {
855
- "epoch": 0.26,
856
- "learning_rate": 2.6128266033254157e-05,
857
- "loss": 0.4965,
858
- "step": 1100
859
- },
860
- {
861
- "epoch": 0.26,
862
- "eval_accuracy": 0.7385321100917431,
863
- "eval_loss": 0.5148417353630066,
864
- "eval_runtime": 11.4511,
865
- "eval_samples_per_second": 76.15,
866
- "eval_steps_per_second": 4.803,
867
- "step": 1100
868
- },
869
- {
870
- "epoch": 0.26,
871
- "learning_rate": 2.636579572446556e-05,
872
- "loss": 0.5747,
873
- "step": 1110
874
- },
875
- {
876
- "epoch": 0.27,
877
- "learning_rate": 2.6603325415676962e-05,
878
- "loss": 0.8352,
879
- "step": 1120
880
- },
881
- {
882
- "epoch": 0.27,
883
- "learning_rate": 2.6840855106888364e-05,
884
- "loss": 0.6406,
885
- "step": 1130
886
- },
887
- {
888
- "epoch": 0.27,
889
- "learning_rate": 2.7078384798099763e-05,
890
- "loss": 0.5701,
891
- "step": 1140
892
- },
893
- {
894
- "epoch": 0.27,
895
- "learning_rate": 2.7315914489311166e-05,
896
- "loss": 0.5114,
897
- "step": 1150
898
- },
899
- {
900
- "epoch": 0.27,
901
- "eval_accuracy": 0.6467889908256881,
902
- "eval_loss": 0.6622269749641418,
903
- "eval_runtime": 11.4464,
904
- "eval_samples_per_second": 76.181,
905
- "eval_steps_per_second": 4.805,
906
- "step": 1150
907
- },
908
- {
909
- "epoch": 0.28,
910
- "learning_rate": 2.7553444180522565e-05,
911
- "loss": 0.6769,
912
- "step": 1160
913
- },
914
- {
915
- "epoch": 0.28,
916
- "learning_rate": 2.7790973871733968e-05,
917
- "loss": 0.5533,
918
- "step": 1170
919
- },
920
- {
921
- "epoch": 0.28,
922
- "learning_rate": 2.8028503562945367e-05,
923
- "loss": 0.4968,
924
- "step": 1180
925
- },
926
- {
927
- "epoch": 0.28,
928
- "learning_rate": 2.826603325415677e-05,
929
- "loss": 0.4944,
930
- "step": 1190
931
- },
932
- {
933
- "epoch": 0.29,
934
- "learning_rate": 2.8503562945368172e-05,
935
- "loss": 0.4682,
936
- "step": 1200
937
- },
938
- {
939
- "epoch": 0.29,
940
- "eval_accuracy": 0.7213302752293578,
941
- "eval_loss": 0.5471405386924744,
942
- "eval_runtime": 11.4757,
943
- "eval_samples_per_second": 75.987,
944
- "eval_steps_per_second": 4.793,
945
- "step": 1200
946
- },
947
- {
948
- "epoch": 0.29,
949
- "learning_rate": 2.8741092636579575e-05,
950
- "loss": 0.5635,
951
- "step": 1210
952
- },
953
- {
954
- "epoch": 0.29,
955
- "learning_rate": 2.8978622327790977e-05,
956
- "loss": 0.4861,
957
- "step": 1220
958
- },
959
- {
960
- "epoch": 0.29,
961
- "learning_rate": 2.9216152019002373e-05,
962
- "loss": 0.5109,
963
- "step": 1230
964
- },
965
- {
966
- "epoch": 0.29,
967
- "learning_rate": 2.9453681710213776e-05,
968
- "loss": 0.5228,
969
- "step": 1240
970
- },
971
- {
972
- "epoch": 0.3,
973
- "learning_rate": 2.9691211401425178e-05,
974
- "loss": 0.4801,
975
- "step": 1250
976
- },
977
- {
978
- "epoch": 0.3,
979
- "eval_accuracy": 0.7545871559633027,
980
- "eval_loss": 0.4974513649940491,
981
- "eval_runtime": 11.4419,
982
- "eval_samples_per_second": 76.211,
983
- "eval_steps_per_second": 4.807,
984
- "step": 1250
985
- },
986
- {
987
- "epoch": 0.3,
988
- "learning_rate": 2.992874109263658e-05,
989
- "loss": 0.3829,
990
- "step": 1260
991
- },
992
- {
993
- "epoch": 0.3,
994
- "learning_rate": 3.0166270783847983e-05,
995
- "loss": 0.527,
996
- "step": 1270
997
- },
998
- {
999
- "epoch": 0.3,
1000
- "learning_rate": 3.0403800475059386e-05,
1001
- "loss": 0.5883,
1002
- "step": 1280
1003
- },
1004
- {
1005
- "epoch": 0.31,
1006
- "learning_rate": 3.064133016627079e-05,
1007
- "loss": 0.5133,
1008
- "step": 1290
1009
- },
1010
- {
1011
- "epoch": 0.31,
1012
- "learning_rate": 3.0878859857482184e-05,
1013
- "loss": 0.5178,
1014
- "step": 1300
1015
- },
1016
- {
1017
- "epoch": 0.31,
1018
- "eval_accuracy": 0.7236238532110092,
1019
- "eval_loss": 0.5414833426475525,
1020
- "eval_runtime": 11.5239,
1021
- "eval_samples_per_second": 75.669,
1022
- "eval_steps_per_second": 4.773,
1023
- "step": 1300
1024
- },
1025
- {
1026
- "epoch": 0.31,
1027
- "learning_rate": 3.111638954869359e-05,
1028
- "loss": 0.4607,
1029
- "step": 1310
1030
- },
1031
- {
1032
- "epoch": 0.31,
1033
- "learning_rate": 3.135391923990499e-05,
1034
- "loss": 0.4809,
1035
- "step": 1320
1036
- },
1037
- {
1038
- "epoch": 0.32,
1039
- "learning_rate": 3.159144893111639e-05,
1040
- "loss": 0.5025,
1041
- "step": 1330
1042
- },
1043
- {
1044
- "epoch": 0.32,
1045
- "learning_rate": 3.1828978622327794e-05,
1046
- "loss": 0.514,
1047
- "step": 1340
1048
- },
1049
- {
1050
- "epoch": 0.32,
1051
- "learning_rate": 3.20665083135392e-05,
1052
- "loss": 0.581,
1053
- "step": 1350
1054
- },
1055
- {
1056
- "epoch": 0.32,
1057
- "eval_accuracy": 0.7614678899082569,
1058
- "eval_loss": 0.5108276009559631,
1059
- "eval_runtime": 11.4369,
1060
- "eval_samples_per_second": 76.245,
1061
- "eval_steps_per_second": 4.809,
1062
- "step": 1350
1063
- },
1064
- {
1065
- "epoch": 0.32,
1066
- "learning_rate": 3.23040380047506e-05,
1067
- "loss": 0.5912,
1068
- "step": 1360
1069
- },
1070
- {
1071
- "epoch": 0.33,
1072
- "learning_rate": 3.2541567695961995e-05,
1073
- "loss": 0.5702,
1074
- "step": 1370
1075
- },
1076
- {
1077
- "epoch": 0.33,
1078
- "learning_rate": 3.27790973871734e-05,
1079
- "loss": 0.5757,
1080
- "step": 1380
1081
- },
1082
- {
1083
- "epoch": 0.33,
1084
- "learning_rate": 3.30166270783848e-05,
1085
- "loss": 0.5328,
1086
- "step": 1390
1087
- },
1088
- {
1089
- "epoch": 0.33,
1090
- "learning_rate": 3.32541567695962e-05,
1091
- "loss": 0.5175,
1092
- "step": 1400
1093
- },
1094
- {
1095
- "epoch": 0.33,
1096
- "eval_accuracy": 0.6651376146788991,
1097
- "eval_loss": 0.5825160145759583,
1098
- "eval_runtime": 12.4677,
1099
- "eval_samples_per_second": 69.941,
1100
- "eval_steps_per_second": 4.411,
1101
- "step": 1400
1102
- },
1103
- {
1104
- "epoch": 0.33,
1105
- "learning_rate": 3.3491686460807606e-05,
1106
- "loss": 0.4831,
1107
- "step": 1410
1108
- },
1109
- {
1110
- "epoch": 0.34,
1111
- "learning_rate": 3.372921615201901e-05,
1112
- "loss": 0.3822,
1113
- "step": 1420
1114
- },
1115
- {
1116
- "epoch": 0.34,
1117
- "learning_rate": 3.396674584323041e-05,
1118
- "loss": 0.4679,
1119
- "step": 1430
1120
- },
1121
- {
1122
- "epoch": 0.34,
1123
- "learning_rate": 3.4204275534441806e-05,
1124
- "loss": 0.4313,
1125
- "step": 1440
1126
- },
1127
- {
1128
- "epoch": 0.34,
1129
- "learning_rate": 3.444180522565321e-05,
1130
- "loss": 0.4897,
1131
- "step": 1450
1132
- },
1133
- {
1134
- "epoch": 0.34,
1135
- "eval_accuracy": 0.7362385321100917,
1136
- "eval_loss": 0.5593736171722412,
1137
- "eval_runtime": 26.9471,
1138
- "eval_samples_per_second": 32.36,
1139
- "eval_steps_per_second": 2.041,
1140
- "step": 1450
1141
- },
1142
- {
1143
- "epoch": 0.35,
1144
- "learning_rate": 3.467933491686461e-05,
1145
- "loss": 0.4222,
1146
- "step": 1460
1147
- },
1148
- {
1149
- "epoch": 0.35,
1150
- "learning_rate": 3.4916864608076014e-05,
1151
- "loss": 0.4754,
1152
- "step": 1470
1153
- },
1154
- {
1155
- "epoch": 0.35,
1156
- "learning_rate": 3.515439429928741e-05,
1157
- "loss": 0.5092,
1158
- "step": 1480
1159
- },
1160
- {
1161
- "epoch": 0.35,
1162
- "learning_rate": 3.539192399049881e-05,
1163
- "loss": 0.4992,
1164
- "step": 1490
1165
- },
1166
- {
1167
- "epoch": 0.36,
1168
- "learning_rate": 3.5629453681710215e-05,
1169
- "loss": 0.5653,
1170
- "step": 1500
1171
- },
1172
- {
1173
- "epoch": 0.36,
1174
- "eval_accuracy": 0.7672018348623854,
1175
- "eval_loss": 0.48585110902786255,
1176
- "eval_runtime": 24.8178,
1177
- "eval_samples_per_second": 35.136,
1178
- "eval_steps_per_second": 2.216,
1179
- "step": 1500
1180
- },
1181
- {
1182
- "epoch": 0.36,
1183
- "learning_rate": 3.586698337292162e-05,
1184
- "loss": 0.4569,
1185
- "step": 1510
1186
- },
1187
- {
1188
- "epoch": 0.36,
1189
- "learning_rate": 3.6104513064133013e-05,
1190
- "loss": 0.4708,
1191
- "step": 1520
1192
- },
1193
- {
1194
- "epoch": 0.36,
1195
- "learning_rate": 3.6342042755344416e-05,
1196
- "loss": 0.5629,
1197
- "step": 1530
1198
- },
1199
- {
1200
- "epoch": 0.37,
1201
- "learning_rate": 3.657957244655582e-05,
1202
- "loss": 0.4748,
1203
- "step": 1540
1204
- },
1205
- {
1206
- "epoch": 0.37,
1207
- "learning_rate": 3.681710213776722e-05,
1208
- "loss": 0.4647,
1209
- "step": 1550
1210
- },
1211
- {
1212
- "epoch": 0.37,
1213
- "eval_accuracy": 0.7511467889908257,
1214
- "eval_loss": 0.5034508109092712,
1215
- "eval_runtime": 25.1309,
1216
- "eval_samples_per_second": 34.698,
1217
- "eval_steps_per_second": 2.189,
1218
- "step": 1550
1219
- },
1220
- {
1221
- "epoch": 0.37,
1222
- "learning_rate": 3.7054631828978624e-05,
1223
- "loss": 0.3937,
1224
- "step": 1560
1225
- },
1226
- {
1227
- "epoch": 0.37,
1228
- "learning_rate": 3.7292161520190026e-05,
1229
- "loss": 0.4446,
1230
- "step": 1570
1231
- },
1232
- {
1233
- "epoch": 0.38,
1234
- "learning_rate": 3.752969121140142e-05,
1235
- "loss": 0.4019,
1236
- "step": 1580
1237
- },
1238
- {
1239
- "epoch": 0.38,
1240
- "learning_rate": 3.7767220902612825e-05,
1241
- "loss": 0.4452,
1242
- "step": 1590
1243
- },
1244
- {
1245
- "epoch": 0.38,
1246
- "learning_rate": 3.800475059382423e-05,
1247
- "loss": 0.5062,
1248
- "step": 1600
1249
- },
1250
- {
1251
- "epoch": 0.38,
1252
- "eval_accuracy": 0.7775229357798165,
1253
- "eval_loss": 0.4737890362739563,
1254
- "eval_runtime": 28.0162,
1255
- "eval_samples_per_second": 31.125,
1256
- "eval_steps_per_second": 1.963,
1257
- "step": 1600
1258
- },
1259
- {
1260
- "epoch": 0.38,
1261
- "learning_rate": 3.824228028503563e-05,
1262
- "loss": 0.5397,
1263
- "step": 1610
1264
- },
1265
- {
1266
- "epoch": 0.38,
1267
- "learning_rate": 3.847980997624703e-05,
1268
- "loss": 0.3397,
1269
- "step": 1620
1270
- },
1271
- {
1272
- "epoch": 0.39,
1273
- "learning_rate": 3.8717339667458435e-05,
1274
- "loss": 0.474,
1275
- "step": 1630
1276
- },
1277
- {
1278
- "epoch": 0.39,
1279
- "learning_rate": 3.895486935866984e-05,
1280
- "loss": 0.4831,
1281
- "step": 1640
1282
- },
1283
- {
1284
- "epoch": 0.39,
1285
- "learning_rate": 3.919239904988123e-05,
1286
- "loss": 0.4812,
1287
- "step": 1650
1288
- },
1289
- {
1290
- "epoch": 0.39,
1291
- "eval_accuracy": 0.7362385321100917,
1292
- "eval_loss": 0.5663760304450989,
1293
- "eval_runtime": 23.8733,
1294
- "eval_samples_per_second": 36.526,
1295
- "eval_steps_per_second": 2.304,
1296
- "step": 1650
1297
- },
1298
- {
1299
- "epoch": 0.39,
1300
- "learning_rate": 3.9429928741092636e-05,
1301
- "loss": 0.4661,
1302
- "step": 1660
1303
- },
1304
- {
1305
- "epoch": 0.4,
1306
- "learning_rate": 3.966745843230404e-05,
1307
- "loss": 0.5724,
1308
- "step": 1670
1309
- },
1310
- {
1311
- "epoch": 0.4,
1312
- "learning_rate": 3.990498812351544e-05,
1313
- "loss": 0.5851,
1314
- "step": 1680
1315
- },
1316
- {
1317
- "epoch": 0.4,
1318
- "learning_rate": 4.0142517814726843e-05,
1319
- "loss": 0.5317,
1320
- "step": 1690
1321
- },
1322
- {
1323
- "epoch": 0.4,
1324
- "learning_rate": 4.0380047505938246e-05,
1325
- "loss": 0.4891,
1326
- "step": 1700
1327
- },
1328
- {
1329
- "epoch": 0.4,
1330
- "eval_accuracy": 0.6123853211009175,
1331
- "eval_loss": 0.7159540057182312,
1332
- "eval_runtime": 21.4544,
1333
- "eval_samples_per_second": 40.644,
1334
- "eval_steps_per_second": 2.564,
1335
- "step": 1700
1336
- },
1337
- {
1338
- "epoch": 0.41,
1339
- "learning_rate": 4.061757719714965e-05,
1340
- "loss": 0.5505,
1341
- "step": 1710
1342
- },
1343
- {
1344
- "epoch": 0.41,
1345
- "learning_rate": 4.0855106888361044e-05,
1346
- "loss": 0.4684,
1347
- "step": 1720
1348
- },
1349
- {
1350
- "epoch": 0.41,
1351
- "learning_rate": 4.109263657957245e-05,
1352
- "loss": 0.3916,
1353
- "step": 1730
1354
- },
1355
- {
1356
- "epoch": 0.41,
1357
- "learning_rate": 4.133016627078385e-05,
1358
- "loss": 0.48,
1359
- "step": 1740
1360
- },
1361
- {
1362
- "epoch": 0.42,
1363
- "learning_rate": 4.156769596199525e-05,
1364
- "loss": 0.494,
1365
- "step": 1750
1366
- },
1367
- {
1368
- "epoch": 0.42,
1369
- "eval_accuracy": 0.6961009174311926,
1370
- "eval_loss": 0.6904969811439514,
1371
- "eval_runtime": 12.0521,
1372
- "eval_samples_per_second": 72.352,
1373
- "eval_steps_per_second": 4.564,
1374
- "step": 1750
1375
- },
1376
- {
1377
- "epoch": 0.42,
1378
- "learning_rate": 4.1805225653206655e-05,
1379
- "loss": 0.4914,
1380
- "step": 1760
1381
- },
1382
- {
1383
- "epoch": 0.42,
1384
- "learning_rate": 4.204275534441806e-05,
1385
- "loss": 0.4142,
1386
- "step": 1770
1387
- },
1388
- {
1389
- "epoch": 0.42,
1390
- "learning_rate": 4.228028503562946e-05,
1391
- "loss": 0.3954,
1392
- "step": 1780
1393
- },
1394
- {
1395
- "epoch": 0.43,
1396
- "learning_rate": 4.2517814726840856e-05,
1397
- "loss": 0.4304,
1398
- "step": 1790
1399
- },
1400
- {
1401
- "epoch": 0.43,
1402
- "learning_rate": 4.275534441805226e-05,
1403
- "loss": 0.4446,
1404
- "step": 1800
1405
- },
1406
- {
1407
- "epoch": 0.43,
1408
- "eval_accuracy": 0.7603211009174312,
1409
- "eval_loss": 0.470760315656662,
1410
- "eval_runtime": 11.4415,
1411
- "eval_samples_per_second": 76.214,
1412
- "eval_steps_per_second": 4.807,
1413
- "step": 1800
1414
- },
1415
- {
1416
- "epoch": 0.43,
1417
- "learning_rate": 4.299287410926366e-05,
1418
- "loss": 0.4779,
1419
- "step": 1810
1420
- },
1421
- {
1422
- "epoch": 0.43,
1423
- "learning_rate": 4.323040380047506e-05,
1424
- "loss": 0.4142,
1425
- "step": 1820
1426
- },
1427
- {
1428
- "epoch": 0.43,
1429
- "learning_rate": 4.3467933491686466e-05,
1430
- "loss": 0.4997,
1431
- "step": 1830
1432
- },
1433
- {
1434
- "epoch": 0.44,
1435
- "learning_rate": 4.370546318289787e-05,
1436
- "loss": 0.456,
1437
- "step": 1840
1438
- },
1439
- {
1440
- "epoch": 0.44,
1441
- "learning_rate": 4.394299287410927e-05,
1442
- "loss": 0.3551,
1443
- "step": 1850
1444
- },
1445
- {
1446
- "epoch": 0.44,
1447
- "eval_accuracy": 0.6720183486238532,
1448
- "eval_loss": 0.9761282801628113,
1449
- "eval_runtime": 11.4416,
1450
- "eval_samples_per_second": 76.213,
1451
- "eval_steps_per_second": 4.807,
1452
- "step": 1850
1453
- },
1454
- {
1455
- "epoch": 0.44,
1456
- "learning_rate": 4.418052256532067e-05,
1457
- "loss": 0.5181,
1458
- "step": 1860
1459
- },
1460
- {
1461
- "epoch": 0.44,
1462
- "learning_rate": 4.441805225653207e-05,
1463
- "loss": 0.4539,
1464
- "step": 1870
1465
- },
1466
- {
1467
- "epoch": 0.45,
1468
- "learning_rate": 4.465558194774347e-05,
1469
- "loss": 0.5408,
1470
- "step": 1880
1471
- },
1472
- {
1473
- "epoch": 0.45,
1474
- "learning_rate": 4.4893111638954874e-05,
1475
- "loss": 0.4418,
1476
- "step": 1890
1477
- },
1478
- {
1479
- "epoch": 0.45,
1480
- "learning_rate": 4.513064133016627e-05,
1481
- "loss": 0.4393,
1482
- "step": 1900
1483
- },
1484
- {
1485
- "epoch": 0.45,
1486
- "eval_accuracy": 0.7488532110091743,
1487
- "eval_loss": 0.5114963054656982,
1488
- "eval_runtime": 11.4593,
1489
- "eval_samples_per_second": 76.095,
1490
- "eval_steps_per_second": 4.8,
1491
- "step": 1900
1492
- },
1493
- {
1494
- "epoch": 0.45,
1495
- "learning_rate": 4.536817102137767e-05,
1496
- "loss": 0.3827,
1497
- "step": 1910
1498
- },
1499
- {
1500
- "epoch": 0.46,
1501
- "learning_rate": 4.5605700712589075e-05,
1502
- "loss": 0.3863,
1503
- "step": 1920
1504
- },
1505
- {
1506
- "epoch": 0.46,
1507
- "learning_rate": 4.584323040380048e-05,
1508
- "loss": 0.3994,
1509
- "step": 1930
1510
- },
1511
- {
1512
- "epoch": 0.46,
1513
- "learning_rate": 4.6080760095011874e-05,
1514
- "loss": 0.498,
1515
- "step": 1940
1516
- },
1517
- {
1518
- "epoch": 0.46,
1519
- "learning_rate": 4.6318289786223276e-05,
1520
- "loss": 0.4129,
1521
- "step": 1950
1522
- },
1523
- {
1524
- "epoch": 0.46,
1525
- "eval_accuracy": 0.7924311926605505,
1526
- "eval_loss": 0.4416314661502838,
1527
- "eval_runtime": 27.8567,
1528
- "eval_samples_per_second": 31.303,
1529
- "eval_steps_per_second": 1.974,
1530
- "step": 1950
1531
- },
1532
- {
1533
- "epoch": 0.47,
1534
- "learning_rate": 4.655581947743468e-05,
1535
- "loss": 0.305,
1536
- "step": 1960
1537
- },
1538
- {
1539
- "epoch": 0.47,
1540
- "learning_rate": 4.679334916864608e-05,
1541
- "loss": 0.3489,
1542
- "step": 1970
1543
- },
1544
- {
1545
- "epoch": 0.47,
1546
- "learning_rate": 4.7030878859857484e-05,
1547
- "loss": 0.5725,
1548
- "step": 1980
1549
- },
1550
- {
1551
- "epoch": 0.47,
1552
- "learning_rate": 4.7268408551068886e-05,
1553
- "loss": 0.3962,
1554
- "step": 1990
1555
- },
1556
- {
1557
- "epoch": 0.48,
1558
- "learning_rate": 4.750593824228028e-05,
1559
- "loss": 0.428,
1560
- "step": 2000
1561
- },
1562
- {
1563
- "epoch": 0.48,
1564
- "eval_accuracy": 0.713302752293578,
1565
- "eval_loss": 0.5976735949516296,
1566
- "eval_runtime": 26.9577,
1567
- "eval_samples_per_second": 32.347,
1568
- "eval_steps_per_second": 2.04,
1569
- "step": 2000
1570
- },
1571
- {
1572
- "epoch": 0.48,
1573
- "learning_rate": 4.7743467933491685e-05,
1574
- "loss": 0.4047,
1575
- "step": 2010
1576
- },
1577
- {
1578
- "epoch": 0.48,
1579
- "learning_rate": 4.798099762470309e-05,
1580
- "loss": 0.4644,
1581
- "step": 2020
1582
- },
1583
- {
1584
- "epoch": 0.48,
1585
- "learning_rate": 4.821852731591449e-05,
1586
- "loss": 0.5173,
1587
- "step": 2030
1588
- },
1589
- {
1590
- "epoch": 0.48,
1591
- "learning_rate": 4.845605700712589e-05,
1592
- "loss": 0.4207,
1593
- "step": 2040
1594
- },
1595
- {
1596
- "epoch": 0.49,
1597
- "learning_rate": 4.8693586698337295e-05,
1598
- "loss": 0.6847,
1599
- "step": 2050
1600
- },
1601
- {
1602
- "epoch": 0.49,
1603
- "eval_accuracy": 0.7522935779816514,
1604
- "eval_loss": 0.4740794599056244,
1605
- "eval_runtime": 26.5007,
1606
- "eval_samples_per_second": 32.905,
1607
- "eval_steps_per_second": 2.075,
1608
- "step": 2050
1609
- },
1610
- {
1611
- "epoch": 0.49,
1612
- "learning_rate": 4.89311163895487e-05,
1613
- "loss": 0.4262,
1614
- "step": 2060
1615
- },
1616
- {
1617
- "epoch": 0.49,
1618
- "learning_rate": 4.9168646080760093e-05,
1619
- "loss": 0.3127,
1620
- "step": 2070
1621
- },
1622
- {
1623
- "epoch": 0.49,
1624
- "learning_rate": 4.9406175771971496e-05,
1625
- "loss": 0.4341,
1626
- "step": 2080
1627
- },
1628
- {
1629
- "epoch": 0.5,
1630
- "learning_rate": 4.96437054631829e-05,
1631
- "loss": 0.3944,
1632
- "step": 2090
1633
- },
1634
- {
1635
- "epoch": 0.5,
1636
- "learning_rate": 4.98812351543943e-05,
1637
- "loss": 0.4921,
1638
- "step": 2100
1639
- },
1640
- {
1641
- "epoch": 0.5,
1642
- "eval_accuracy": 0.731651376146789,
1643
- "eval_loss": 0.5092917680740356,
1644
- "eval_runtime": 29.3929,
1645
- "eval_samples_per_second": 29.667,
1646
- "eval_steps_per_second": 1.871,
1647
- "step": 2100
1648
- },
1649
- {
1650
- "epoch": 0.5,
1651
- "learning_rate": 4.998680390604381e-05,
1652
- "loss": 0.4166,
1653
- "step": 2110
1654
- },
1655
- {
1656
- "epoch": 0.5,
1657
- "learning_rate": 4.996041171813143e-05,
1658
- "loss": 0.3623,
1659
- "step": 2120
1660
- },
1661
- {
1662
- "epoch": 0.51,
1663
- "learning_rate": 4.9934019530219056e-05,
1664
- "loss": 0.4034,
1665
- "step": 2130
1666
- },
1667
- {
1668
- "epoch": 0.51,
1669
- "learning_rate": 4.990762734230668e-05,
1670
- "loss": 0.4566,
1671
- "step": 2140
1672
- },
1673
- {
1674
- "epoch": 0.51,
1675
- "learning_rate": 4.98812351543943e-05,
1676
- "loss": 0.4414,
1677
- "step": 2150
1678
- },
1679
- {
1680
- "epoch": 0.51,
1681
- "eval_accuracy": 0.713302752293578,
1682
- "eval_loss": 0.6652459502220154,
1683
- "eval_runtime": 25.6161,
1684
- "eval_samples_per_second": 34.041,
1685
- "eval_steps_per_second": 2.147,
1686
- "step": 2150
1687
- },
1688
- {
1689
- "epoch": 0.51,
1690
- "learning_rate": 4.9854842966481924e-05,
1691
- "loss": 0.4862,
1692
- "step": 2160
1693
- },
1694
- {
1695
- "epoch": 0.52,
1696
- "learning_rate": 4.9831089997360785e-05,
1697
- "loss": 0.5651,
1698
- "step": 2170
1699
- },
1700
- {
1701
- "epoch": 0.52,
1702
- "learning_rate": 4.98046978094484e-05,
1703
- "loss": 0.4251,
1704
- "step": 2180
1705
- },
1706
- {
1707
- "epoch": 0.52,
1708
- "learning_rate": 4.9778305621536024e-05,
1709
- "loss": 0.514,
1710
- "step": 2190
1711
- },
1712
- {
1713
- "epoch": 0.52,
1714
- "learning_rate": 4.9751913433623646e-05,
1715
- "loss": 0.3697,
1716
- "step": 2200
1717
- },
1718
- {
1719
- "epoch": 0.52,
1720
- "eval_accuracy": 0.7350917431192661,
1721
- "eval_loss": 0.5495473146438599,
1722
- "eval_runtime": 27.2319,
1723
- "eval_samples_per_second": 32.021,
1724
- "eval_steps_per_second": 2.02,
1725
- "step": 2200
1726
- },
1727
- {
1728
- "epoch": 0.52,
1729
- "learning_rate": 4.972552124571127e-05,
1730
- "loss": 0.3555,
1731
- "step": 2210
1732
- },
1733
- {
1734
- "epoch": 0.53,
1735
- "learning_rate": 4.969912905779889e-05,
1736
- "loss": 0.3627,
1737
- "step": 2220
1738
- },
1739
- {
1740
- "epoch": 0.53,
1741
- "learning_rate": 4.9672736869886514e-05,
1742
- "loss": 0.353,
1743
- "step": 2230
1744
- },
1745
- {
1746
- "epoch": 0.53,
1747
- "learning_rate": 4.964634468197414e-05,
1748
- "loss": 0.3561,
1749
- "step": 2240
1750
- },
1751
- {
1752
- "epoch": 0.53,
1753
- "learning_rate": 4.961995249406176e-05,
1754
- "loss": 0.3599,
1755
- "step": 2250
1756
- },
1757
- {
1758
- "epoch": 0.53,
1759
- "eval_accuracy": 0.783256880733945,
1760
- "eval_loss": 0.4456700384616852,
1761
- "eval_runtime": 27.007,
1762
- "eval_samples_per_second": 32.288,
1763
- "eval_steps_per_second": 2.037,
1764
- "step": 2250
1765
- },
1766
- {
1767
- "epoch": 0.54,
1768
- "learning_rate": 4.959356030614938e-05,
1769
- "loss": 0.3917,
1770
- "step": 2260
1771
- },
1772
- {
1773
- "epoch": 0.54,
1774
- "learning_rate": 4.9567168118237005e-05,
1775
- "loss": 0.3342,
1776
- "step": 2270
1777
- },
1778
- {
1779
- "epoch": 0.54,
1780
- "learning_rate": 4.954077593032463e-05,
1781
- "loss": 0.3964,
1782
- "step": 2280
1783
- },
1784
- {
1785
- "epoch": 0.54,
1786
- "learning_rate": 4.9514383742412244e-05,
1787
- "loss": 0.4588,
1788
- "step": 2290
1789
- },
1790
- {
1791
- "epoch": 0.55,
1792
- "learning_rate": 4.9487991554499866e-05,
1793
- "loss": 0.4021,
1794
- "step": 2300
1795
- },
1796
- {
1797
- "epoch": 0.55,
1798
- "eval_accuracy": 0.7924311926605505,
1799
- "eval_loss": 0.43415939807891846,
1800
- "eval_runtime": 28.1746,
1801
- "eval_samples_per_second": 30.95,
1802
- "eval_steps_per_second": 1.952,
1803
- "step": 2300
1804
- },
1805
- {
1806
- "epoch": 0.55,
1807
- "learning_rate": 4.946159936658749e-05,
1808
- "loss": 0.3698,
1809
- "step": 2310
1810
- },
1811
- {
1812
- "epoch": 0.55,
1813
- "learning_rate": 4.943520717867511e-05,
1814
- "loss": 0.4352,
1815
- "step": 2320
1816
- },
1817
- {
1818
- "epoch": 0.55,
1819
- "learning_rate": 4.9408814990762734e-05,
1820
- "loss": 0.4253,
1821
- "step": 2330
1822
- },
1823
- {
1824
- "epoch": 0.56,
1825
- "learning_rate": 4.938242280285036e-05,
1826
- "loss": 0.3442,
1827
- "step": 2340
1828
- },
1829
- {
1830
- "epoch": 0.56,
1831
- "learning_rate": 4.935603061493798e-05,
1832
- "loss": 0.4341,
1833
- "step": 2350
1834
- },
1835
- {
1836
- "epoch": 0.56,
1837
- "eval_accuracy": 0.7626146788990825,
1838
- "eval_loss": 0.4870525598526001,
1839
- "eval_runtime": 27.9701,
1840
- "eval_samples_per_second": 31.176,
1841
- "eval_steps_per_second": 1.966,
1842
- "step": 2350
1843
- },
1844
- {
1845
- "epoch": 0.56,
1846
- "learning_rate": 4.93296384270256e-05,
1847
- "loss": 0.3156,
1848
- "step": 2360
1849
- },
1850
- {
1851
- "epoch": 0.56,
1852
- "learning_rate": 4.9303246239113225e-05,
1853
- "loss": 0.3465,
1854
- "step": 2370
1855
- },
1856
- {
1857
- "epoch": 0.57,
1858
- "learning_rate": 4.927685405120085e-05,
1859
- "loss": 0.3555,
1860
- "step": 2380
1861
- },
1862
- {
1863
- "epoch": 0.57,
1864
- "learning_rate": 4.9250461863288464e-05,
1865
- "loss": 0.4034,
1866
- "step": 2390
1867
- },
1868
- {
1869
- "epoch": 0.57,
1870
- "learning_rate": 4.9224069675376086e-05,
1871
- "loss": 0.4811,
1872
- "step": 2400
1873
- },
1874
- {
1875
- "epoch": 0.57,
1876
- "eval_accuracy": 0.694954128440367,
1877
- "eval_loss": 1.0977351665496826,
1878
- "eval_runtime": 25.7159,
1879
- "eval_samples_per_second": 33.909,
1880
- "eval_steps_per_second": 2.139,
1881
- "step": 2400
1882
- },
1883
- {
1884
- "epoch": 0.57,
1885
- "learning_rate": 4.919767748746371e-05,
1886
- "loss": 0.7109,
1887
- "step": 2410
1888
- },
1889
- {
1890
- "epoch": 0.57,
1891
- "learning_rate": 4.917128529955133e-05,
1892
- "loss": 0.4311,
1893
- "step": 2420
1894
- },
1895
- {
1896
- "epoch": 0.58,
1897
- "learning_rate": 4.9144893111638955e-05,
1898
- "loss": 0.4666,
1899
- "step": 2430
1900
- },
1901
- {
1902
- "epoch": 0.58,
1903
- "learning_rate": 4.911850092372658e-05,
1904
- "loss": 0.6718,
1905
- "step": 2440
1906
- },
1907
- {
1908
- "epoch": 0.58,
1909
- "learning_rate": 4.90921087358142e-05,
1910
- "loss": 0.417,
1911
- "step": 2450
1912
- },
1913
- {
1914
- "epoch": 0.58,
1915
- "eval_accuracy": 0.7637614678899083,
1916
- "eval_loss": 0.4990720748901367,
1917
- "eval_runtime": 32.7311,
1918
- "eval_samples_per_second": 26.641,
1919
- "eval_steps_per_second": 1.68,
1920
- "step": 2450
1921
- },
1922
- {
1923
- "epoch": 0.58,
1924
- "learning_rate": 4.906571654790182e-05,
1925
- "loss": 0.4022,
1926
- "step": 2460
1927
- },
1928
- {
1929
- "epoch": 0.59,
1930
- "learning_rate": 4.9039324359989445e-05,
1931
- "loss": 0.3948,
1932
- "step": 2470
1933
- },
1934
- {
1935
- "epoch": 0.59,
1936
- "learning_rate": 4.901293217207707e-05,
1937
- "loss": 0.4361,
1938
- "step": 2480
1939
- },
1940
- {
1941
- "epoch": 0.59,
1942
- "learning_rate": 4.898653998416469e-05,
1943
- "loss": 0.3763,
1944
- "step": 2490
1945
- },
1946
- {
1947
- "epoch": 0.59,
1948
- "learning_rate": 4.8960147796252307e-05,
1949
- "loss": 0.4257,
1950
- "step": 2500
1951
- },
1952
- {
1953
- "epoch": 0.59,
1954
- "eval_accuracy": 0.7626146788990825,
1955
- "eval_loss": 0.6092020869255066,
1956
- "eval_runtime": 17.7696,
1957
- "eval_samples_per_second": 49.073,
1958
- "eval_steps_per_second": 3.095,
1959
- "step": 2500
1960
- },
1961
- {
1962
- "epoch": 0.6,
1963
- "learning_rate": 4.893375560833993e-05,
1964
- "loss": 0.3702,
1965
- "step": 2510
1966
- },
1967
- {
1968
- "epoch": 0.6,
1969
- "learning_rate": 4.890736342042755e-05,
1970
- "loss": 0.3374,
1971
- "step": 2520
1972
- },
1973
- {
1974
- "epoch": 0.6,
1975
- "learning_rate": 4.8880971232515175e-05,
1976
- "loss": 0.4812,
1977
- "step": 2530
1978
- },
1979
- {
1980
- "epoch": 0.6,
1981
- "learning_rate": 4.88545790446028e-05,
1982
- "loss": 0.3349,
1983
- "step": 2540
1984
- },
1985
- {
1986
- "epoch": 0.61,
1987
- "learning_rate": 4.882818685669042e-05,
1988
- "loss": 0.4071,
1989
- "step": 2550
1990
- },
1991
- {
1992
- "epoch": 0.61,
1993
- "eval_accuracy": 0.8084862385321101,
1994
- "eval_loss": 0.44936081767082214,
1995
- "eval_runtime": 11.5824,
1996
- "eval_samples_per_second": 75.287,
1997
- "eval_steps_per_second": 4.749,
1998
- "step": 2550
1999
- },
2000
- {
2001
- "epoch": 0.61,
2002
- "learning_rate": 4.880179466877804e-05,
2003
- "loss": 0.4774,
2004
- "step": 2560
2005
- },
2006
- {
2007
- "epoch": 0.61,
2008
- "learning_rate": 4.8775402480865665e-05,
2009
- "loss": 0.3806,
2010
- "step": 2570
2011
- },
2012
- {
2013
- "epoch": 0.61,
2014
- "learning_rate": 4.874901029295329e-05,
2015
- "loss": 0.38,
2016
- "step": 2580
2017
- },
2018
- {
2019
- "epoch": 0.62,
2020
- "learning_rate": 4.872261810504091e-05,
2021
- "loss": 0.4337,
2022
- "step": 2590
2023
- },
2024
- {
2025
- "epoch": 0.62,
2026
- "learning_rate": 4.869622591712853e-05,
2027
- "loss": 0.3033,
2028
- "step": 2600
2029
- },
2030
- {
2031
- "epoch": 0.62,
2032
- "eval_accuracy": 0.783256880733945,
2033
- "eval_loss": 0.4898684322834015,
2034
- "eval_runtime": 28.0155,
2035
- "eval_samples_per_second": 31.126,
2036
- "eval_steps_per_second": 1.963,
2037
- "step": 2600
2038
- },
2039
- {
2040
- "epoch": 0.62,
2041
- "learning_rate": 4.866983372921615e-05,
2042
- "loss": 0.3711,
2043
- "step": 2610
2044
- },
2045
- {
2046
- "epoch": 0.62,
2047
- "learning_rate": 4.864344154130377e-05,
2048
- "loss": 0.3867,
2049
- "step": 2620
2050
- },
2051
- {
2052
- "epoch": 0.62,
2053
- "learning_rate": 4.8617049353391395e-05,
2054
- "loss": 0.4458,
2055
- "step": 2630
2056
- },
2057
- {
2058
- "epoch": 0.63,
2059
- "learning_rate": 4.859065716547902e-05,
2060
- "loss": 0.3669,
2061
- "step": 2640
2062
- },
2063
- {
2064
- "epoch": 0.63,
2065
- "learning_rate": 4.856426497756664e-05,
2066
- "loss": 0.4616,
2067
- "step": 2650
2068
- },
2069
- {
2070
- "epoch": 0.63,
2071
- "eval_accuracy": 0.7844036697247706,
2072
- "eval_loss": 0.46433430910110474,
2073
- "eval_runtime": 21.9181,
2074
- "eval_samples_per_second": 39.784,
2075
- "eval_steps_per_second": 2.509,
2076
- "step": 2650
2077
- },
2078
- {
2079
- "epoch": 0.63,
2080
- "learning_rate": 4.853787278965426e-05,
2081
- "loss": 0.5175,
2082
- "step": 2660
2083
- },
2084
- {
2085
- "epoch": 0.63,
2086
- "learning_rate": 4.8511480601741886e-05,
2087
- "loss": 0.4101,
2088
- "step": 2670
2089
- },
2090
- {
2091
- "epoch": 0.64,
2092
- "learning_rate": 4.848508841382951e-05,
2093
- "loss": 0.3916,
2094
- "step": 2680
2095
- },
2096
- {
2097
- "epoch": 0.64,
2098
- "learning_rate": 4.845869622591713e-05,
2099
- "loss": 0.3209,
2100
- "step": 2690
2101
- },
2102
- {
2103
- "epoch": 0.64,
2104
- "learning_rate": 4.8432304038004754e-05,
2105
- "loss": 0.4432,
2106
- "step": 2700
2107
- },
2108
- {
2109
- "epoch": 0.64,
2110
- "eval_accuracy": 0.7981651376146789,
2111
- "eval_loss": 0.46843382716178894,
2112
- "eval_runtime": 21.7437,
2113
- "eval_samples_per_second": 40.104,
2114
- "eval_steps_per_second": 2.529,
2115
- "step": 2700
2116
- },
2117
- {
2118
- "epoch": 0.64,
2119
- "learning_rate": 4.840591185009237e-05,
2120
- "loss": 0.33,
2121
- "step": 2710
2122
- },
2123
- {
2124
- "epoch": 0.65,
2125
- "learning_rate": 4.837951966217999e-05,
2126
- "loss": 0.3966,
2127
- "step": 2720
2128
- },
2129
- {
2130
- "epoch": 0.65,
2131
- "learning_rate": 4.8353127474267615e-05,
2132
- "loss": 0.312,
2133
- "step": 2730
2134
- },
2135
- {
2136
- "epoch": 0.65,
2137
- "learning_rate": 4.832673528635524e-05,
2138
- "loss": 0.3508,
2139
- "step": 2740
2140
- },
2141
- {
2142
- "epoch": 0.65,
2143
- "learning_rate": 4.830034309844286e-05,
2144
- "loss": 0.3636,
2145
- "step": 2750
2146
- },
2147
- {
2148
- "epoch": 0.65,
2149
- "eval_accuracy": 0.7694954128440367,
2150
- "eval_loss": 0.6283801198005676,
2151
- "eval_runtime": 20.1708,
2152
- "eval_samples_per_second": 43.231,
2153
- "eval_steps_per_second": 2.727,
2154
- "step": 2750
2155
- },
2156
- {
2157
- "epoch": 0.66,
2158
- "learning_rate": 4.827395091053048e-05,
2159
- "loss": 0.5102,
2160
- "step": 2760
2161
- },
2162
- {
2163
- "epoch": 0.66,
2164
- "learning_rate": 4.8247558722618106e-05,
2165
- "loss": 0.4305,
2166
- "step": 2770
2167
- },
2168
- {
2169
- "epoch": 0.66,
2170
- "learning_rate": 4.822116653470573e-05,
2171
- "loss": 0.3684,
2172
- "step": 2780
2173
- },
2174
- {
2175
- "epoch": 0.66,
2176
- "learning_rate": 4.819477434679335e-05,
2177
- "loss": 0.3314,
2178
- "step": 2790
2179
- },
2180
- {
2181
- "epoch": 0.67,
2182
- "learning_rate": 4.8168382158880974e-05,
2183
- "loss": 0.4871,
2184
- "step": 2800
2185
- },
2186
- {
2187
- "epoch": 0.67,
2188
- "eval_accuracy": 0.7729357798165137,
2189
- "eval_loss": 0.5208825469017029,
2190
- "eval_runtime": 21.7303,
2191
- "eval_samples_per_second": 40.128,
2192
- "eval_steps_per_second": 2.531,
2193
- "step": 2800
2194
- },
2195
- {
2196
- "epoch": 0.67,
2197
- "learning_rate": 4.814198997096859e-05,
2198
- "loss": 0.3994,
2199
- "step": 2810
2200
- },
2201
- {
2202
- "epoch": 0.67,
2203
- "learning_rate": 4.811559778305621e-05,
2204
- "loss": 0.3938,
2205
- "step": 2820
2206
- },
2207
- {
2208
- "epoch": 0.67,
2209
- "learning_rate": 4.8089205595143835e-05,
2210
- "loss": 0.361,
2211
- "step": 2830
2212
- },
2213
- {
2214
- "epoch": 0.67,
2215
- "learning_rate": 4.806281340723146e-05,
2216
- "loss": 0.3131,
2217
- "step": 2840
2218
- },
2219
- {
2220
- "epoch": 0.68,
2221
- "learning_rate": 4.803642121931908e-05,
2222
- "loss": 0.4091,
2223
- "step": 2850
2224
- },
2225
- {
2226
- "epoch": 0.68,
2227
- "eval_accuracy": 0.8027522935779816,
2228
- "eval_loss": 0.43396520614624023,
2229
- "eval_runtime": 21.2308,
2230
- "eval_samples_per_second": 41.072,
2231
- "eval_steps_per_second": 2.591,
2232
- "step": 2850
2233
- },
2234
- {
2235
- "epoch": 0.68,
2236
- "learning_rate": 4.80100290314067e-05,
2237
- "loss": 0.386,
2238
- "step": 2860
2239
- },
2240
- {
2241
- "epoch": 0.68,
2242
- "learning_rate": 4.7983636843494326e-05,
2243
- "loss": 0.3953,
2244
- "step": 2870
2245
- },
2246
- {
2247
- "epoch": 0.68,
2248
- "learning_rate": 4.795724465558195e-05,
2249
- "loss": 0.4312,
2250
- "step": 2880
2251
- },
2252
- {
2253
- "epoch": 0.69,
2254
- "learning_rate": 4.793085246766957e-05,
2255
- "loss": 0.3507,
2256
- "step": 2890
2257
- },
2258
- {
2259
- "epoch": 0.69,
2260
- "learning_rate": 4.7904460279757194e-05,
2261
- "loss": 0.2085,
2262
- "step": 2900
2263
- },
2264
- {
2265
- "epoch": 0.69,
2266
- "eval_accuracy": 0.8004587155963303,
2267
- "eval_loss": 0.5883902311325073,
2268
- "eval_runtime": 21.6546,
2269
- "eval_samples_per_second": 40.269,
2270
- "eval_steps_per_second": 2.54,
2271
- "step": 2900
2272
- },
2273
- {
2274
- "epoch": 0.69,
2275
- "learning_rate": 4.7878068091844817e-05,
2276
- "loss": 0.5439,
2277
- "step": 2910
2278
- },
2279
- {
2280
- "epoch": 0.69,
2281
- "learning_rate": 4.785167590393243e-05,
2282
- "loss": 0.4659,
2283
- "step": 2920
2284
- },
2285
- {
2286
- "epoch": 0.7,
2287
- "learning_rate": 4.7825283716020055e-05,
2288
- "loss": 0.495,
2289
- "step": 2930
2290
- },
2291
- {
2292
- "epoch": 0.7,
2293
- "learning_rate": 4.779889152810768e-05,
2294
- "loss": 0.4629,
2295
- "step": 2940
2296
- },
2297
- {
2298
- "epoch": 0.7,
2299
- "learning_rate": 4.77724993401953e-05,
2300
- "loss": 0.3517,
2301
- "step": 2950
2302
- },
2303
- {
2304
- "epoch": 0.7,
2305
- "eval_accuracy": 0.7844036697247706,
2306
- "eval_loss": 0.5798487067222595,
2307
- "eval_runtime": 22.0365,
2308
- "eval_samples_per_second": 39.571,
2309
- "eval_steps_per_second": 2.496,
2310
- "step": 2950
2311
- },
2312
- {
2313
- "epoch": 0.7,
2314
- "learning_rate": 4.774610715228292e-05,
2315
- "loss": 0.5166,
2316
- "step": 2960
2317
- },
2318
- {
2319
- "epoch": 0.71,
2320
- "learning_rate": 4.7719714964370546e-05,
2321
- "loss": 0.4298,
2322
- "step": 2970
2323
- },
2324
- {
2325
- "epoch": 0.71,
2326
- "learning_rate": 4.769332277645817e-05,
2327
- "loss": 0.3885,
2328
- "step": 2980
2329
- },
2330
- {
2331
- "epoch": 0.71,
2332
- "learning_rate": 4.766693058854579e-05,
2333
- "loss": 0.4401,
2334
- "step": 2990
2335
- },
2336
- {
2337
- "epoch": 0.71,
2338
- "learning_rate": 4.7640538400633414e-05,
2339
- "loss": 0.37,
2340
- "step": 3000
2341
- },
2342
- {
2343
- "epoch": 0.71,
2344
- "eval_accuracy": 0.7981651376146789,
2345
- "eval_loss": 0.5207229852676392,
2346
- "eval_runtime": 11.4929,
2347
- "eval_samples_per_second": 75.873,
2348
- "eval_steps_per_second": 4.786,
2349
- "step": 3000
2350
- },
2351
- {
2352
- "epoch": 0.71,
2353
- "learning_rate": 4.761414621272104e-05,
2354
- "loss": 0.434,
2355
- "step": 3010
2356
- },
2357
- {
2358
- "epoch": 0.72,
2359
- "learning_rate": 4.758775402480866e-05,
2360
- "loss": 0.3609,
2361
- "step": 3020
2362
- },
2363
- {
2364
- "epoch": 0.72,
2365
- "learning_rate": 4.756136183689628e-05,
2366
- "loss": 0.4148,
2367
- "step": 3030
2368
- },
2369
- {
2370
- "epoch": 0.72,
2371
- "learning_rate": 4.7534969648983905e-05,
2372
- "loss": 0.3513,
2373
- "step": 3040
2374
- },
2375
- {
2376
- "epoch": 0.72,
2377
- "learning_rate": 4.750857746107152e-05,
2378
- "loss": 0.4267,
2379
- "step": 3050
2380
- },
2381
- {
2382
- "epoch": 0.72,
2383
- "eval_accuracy": 0.7752293577981652,
2384
- "eval_loss": 0.4631665349006653,
2385
- "eval_runtime": 11.4748,
2386
- "eval_samples_per_second": 75.993,
2387
- "eval_steps_per_second": 4.793,
2388
- "step": 3050
2389
- },
2390
- {
2391
- "epoch": 0.73,
2392
- "learning_rate": 4.748218527315914e-05,
2393
- "loss": 0.3484,
2394
- "step": 3060
2395
- },
2396
- {
2397
- "epoch": 0.73,
2398
- "learning_rate": 4.7455793085246766e-05,
2399
- "loss": 0.2985,
2400
- "step": 3070
2401
- },
2402
- {
2403
- "epoch": 0.73,
2404
- "learning_rate": 4.742940089733439e-05,
2405
- "loss": 0.4244,
2406
- "step": 3080
2407
- },
2408
- {
2409
- "epoch": 0.73,
2410
- "learning_rate": 4.740300870942201e-05,
2411
- "loss": 0.3836,
2412
- "step": 3090
2413
- },
2414
- {
2415
- "epoch": 0.74,
2416
- "learning_rate": 4.7376616521509634e-05,
2417
- "loss": 0.4646,
2418
- "step": 3100
2419
- },
2420
- {
2421
- "epoch": 0.74,
2422
- "eval_accuracy": 0.7591743119266054,
2423
- "eval_loss": 0.5199323296546936,
2424
- "eval_runtime": 11.5496,
2425
- "eval_samples_per_second": 75.5,
2426
- "eval_steps_per_second": 4.762,
2427
- "step": 3100
2428
- },
2429
- {
2430
- "epoch": 0.74,
2431
- "learning_rate": 4.735022433359726e-05,
2432
- "loss": 0.3064,
2433
- "step": 3110
2434
- },
2435
- {
2436
- "epoch": 0.74,
2437
- "learning_rate": 4.732383214568488e-05,
2438
- "loss": 0.3248,
2439
- "step": 3120
2440
- },
2441
- {
2442
- "epoch": 0.74,
2443
- "learning_rate": 4.72974399577725e-05,
2444
- "loss": 0.3718,
2445
- "step": 3130
2446
- },
2447
- {
2448
- "epoch": 0.75,
2449
- "learning_rate": 4.7271047769860125e-05,
2450
- "loss": 0.3535,
2451
- "step": 3140
2452
- },
2453
- {
2454
- "epoch": 0.75,
2455
- "learning_rate": 4.724465558194775e-05,
2456
- "loss": 0.3569,
2457
- "step": 3150
2458
- },
2459
- {
2460
- "epoch": 0.75,
2461
- "eval_accuracy": 0.7672018348623854,
2462
- "eval_loss": 0.4929494559764862,
2463
- "eval_runtime": 12.339,
2464
- "eval_samples_per_second": 70.67,
2465
- "eval_steps_per_second": 4.457,
2466
- "step": 3150
2467
- },
2468
- {
2469
- "epoch": 0.75,
2470
- "learning_rate": 4.721826339403537e-05,
2471
- "loss": 0.5133,
2472
- "step": 3160
2473
- },
2474
- {
2475
- "epoch": 0.75,
2476
- "learning_rate": 4.7191871206122986e-05,
2477
- "loss": 0.3535,
2478
- "step": 3170
2479
- },
2480
- {
2481
- "epoch": 0.76,
2482
- "learning_rate": 4.716547901821061e-05,
2483
- "loss": 0.4051,
2484
- "step": 3180
2485
- },
2486
- {
2487
- "epoch": 0.76,
2488
- "learning_rate": 4.713908683029823e-05,
2489
- "loss": 0.341,
2490
- "step": 3190
2491
- },
2492
- {
2493
- "epoch": 0.76,
2494
- "learning_rate": 4.7112694642385854e-05,
2495
- "loss": 0.3356,
2496
- "step": 3200
2497
- },
2498
- {
2499
- "epoch": 0.76,
2500
- "eval_accuracy": 0.7844036697247706,
2501
- "eval_loss": 0.4769574701786041,
2502
- "eval_runtime": 11.5756,
2503
- "eval_samples_per_second": 75.331,
2504
- "eval_steps_per_second": 4.751,
2505
- "step": 3200
2506
- },
2507
- {
2508
- "epoch": 0.76,
2509
- "learning_rate": 4.708630245447348e-05,
2510
- "loss": 0.3053,
2511
- "step": 3210
2512
- },
2513
- {
2514
- "epoch": 0.76,
2515
- "learning_rate": 4.70599102665611e-05,
2516
- "loss": 0.3905,
2517
- "step": 3220
2518
- },
2519
- {
2520
- "epoch": 0.77,
2521
- "learning_rate": 4.703351807864872e-05,
2522
- "loss": 0.4397,
2523
- "step": 3230
2524
- },
2525
- {
2526
- "epoch": 0.77,
2527
- "learning_rate": 4.7007125890736345e-05,
2528
- "loss": 0.4199,
2529
- "step": 3240
2530
- },
2531
- {
2532
- "epoch": 0.77,
2533
- "learning_rate": 4.698073370282397e-05,
2534
- "loss": 0.3777,
2535
- "step": 3250
2536
- },
2537
- {
2538
- "epoch": 0.77,
2539
- "eval_accuracy": 0.786697247706422,
2540
- "eval_loss": 0.4674142897129059,
2541
- "eval_runtime": 25.3754,
2542
- "eval_samples_per_second": 34.364,
2543
- "eval_steps_per_second": 2.167,
2544
- "step": 3250
2545
- },
2546
- {
2547
- "epoch": 0.77,
2548
- "learning_rate": 4.695434151491159e-05,
2549
- "loss": 0.3039,
2550
- "step": 3260
2551
- },
2552
- {
2553
- "epoch": 0.78,
2554
- "learning_rate": 4.692794932699921e-05,
2555
- "loss": 0.3881,
2556
- "step": 3270
2557
- },
2558
- {
2559
- "epoch": 0.78,
2560
- "learning_rate": 4.6901557139086836e-05,
2561
- "loss": 0.2602,
2562
- "step": 3280
2563
- },
2564
- {
2565
- "epoch": 0.78,
2566
- "learning_rate": 4.687516495117445e-05,
2567
- "loss": 0.3553,
2568
- "step": 3290
2569
- },
2570
- {
2571
- "epoch": 0.78,
2572
- "learning_rate": 4.6848772763262074e-05,
2573
- "loss": 0.3472,
2574
- "step": 3300
2575
- },
2576
- {
2577
- "epoch": 0.78,
2578
- "eval_accuracy": 0.7373853211009175,
2579
- "eval_loss": 0.5634092688560486,
2580
- "eval_runtime": 26.2811,
2581
- "eval_samples_per_second": 33.18,
2582
- "eval_steps_per_second": 2.093,
2583
- "step": 3300
2584
- },
2585
- {
2586
- "epoch": 0.79,
2587
- "learning_rate": 4.68223805753497e-05,
2588
- "loss": 0.4972,
2589
- "step": 3310
2590
- },
2591
- {
2592
- "epoch": 0.79,
2593
- "learning_rate": 4.679598838743732e-05,
2594
- "loss": 0.3577,
2595
- "step": 3320
2596
- },
2597
- {
2598
- "epoch": 0.79,
2599
- "learning_rate": 4.676959619952494e-05,
2600
- "loss": 0.3653,
2601
- "step": 3330
2602
- },
2603
- {
2604
- "epoch": 0.79,
2605
- "learning_rate": 4.6743204011612565e-05,
2606
- "loss": 0.2476,
2607
- "step": 3340
2608
- },
2609
- {
2610
- "epoch": 0.8,
2611
- "learning_rate": 4.671681182370019e-05,
2612
- "loss": 0.4177,
2613
- "step": 3350
2614
- },
2615
- {
2616
- "epoch": 0.8,
2617
- "eval_accuracy": 0.783256880733945,
2618
- "eval_loss": 0.5188720226287842,
2619
- "eval_runtime": 24.6069,
2620
- "eval_samples_per_second": 35.437,
2621
- "eval_steps_per_second": 2.235,
2622
- "step": 3350
2623
- },
2624
- {
2625
- "epoch": 0.8,
2626
- "learning_rate": 4.669041963578781e-05,
2627
- "loss": 0.3181,
2628
- "step": 3360
2629
- },
2630
- {
2631
- "epoch": 0.8,
2632
- "learning_rate": 4.666402744787543e-05,
2633
- "loss": 0.3579,
2634
- "step": 3370
2635
- },
2636
- {
2637
- "epoch": 0.8,
2638
- "learning_rate": 4.6637635259963056e-05,
2639
- "loss": 0.5733,
2640
- "step": 3380
2641
- },
2642
- {
2643
- "epoch": 0.81,
2644
- "learning_rate": 4.661124307205068e-05,
2645
- "loss": 0.3851,
2646
- "step": 3390
2647
- },
2648
- {
2649
- "epoch": 0.81,
2650
- "learning_rate": 4.65848508841383e-05,
2651
- "loss": 0.4028,
2652
- "step": 3400
2653
- },
2654
- {
2655
- "epoch": 0.81,
2656
- "eval_accuracy": 0.7844036697247706,
2657
- "eval_loss": 0.4956331253051758,
2658
- "eval_runtime": 24.5882,
2659
- "eval_samples_per_second": 35.464,
2660
- "eval_steps_per_second": 2.237,
2661
- "step": 3400
2662
- },
2663
- {
2664
- "epoch": 0.81,
2665
- "learning_rate": 4.6558458696225924e-05,
2666
- "loss": 0.3687,
2667
- "step": 3410
2668
- },
2669
- {
2670
- "epoch": 0.81,
2671
- "learning_rate": 4.653206650831354e-05,
2672
- "loss": 0.353,
2673
- "step": 3420
2674
- },
2675
- {
2676
- "epoch": 0.81,
2677
- "learning_rate": 4.650567432040116e-05,
2678
- "loss": 0.3628,
2679
- "step": 3430
2680
- },
2681
- {
2682
- "epoch": 0.82,
2683
- "learning_rate": 4.6479282132488785e-05,
2684
- "loss": 0.5389,
2685
- "step": 3440
2686
- },
2687
- {
2688
- "epoch": 0.82,
2689
- "learning_rate": 4.645288994457641e-05,
2690
- "loss": 0.483,
2691
- "step": 3450
2692
- },
2693
- {
2694
- "epoch": 0.82,
2695
- "eval_accuracy": 0.8084862385321101,
2696
- "eval_loss": 0.43807780742645264,
2697
- "eval_runtime": 24.4347,
2698
- "eval_samples_per_second": 35.687,
2699
- "eval_steps_per_second": 2.251,
2700
- "step": 3450
2701
- },
2702
- {
2703
- "epoch": 0.82,
2704
- "learning_rate": 4.642649775666403e-05,
2705
- "loss": 0.3414,
2706
- "step": 3460
2707
- },
2708
- {
2709
- "epoch": 0.82,
2710
- "learning_rate": 4.640010556875165e-05,
2711
- "loss": 0.4314,
2712
- "step": 3470
2713
- },
2714
- {
2715
- "epoch": 0.83,
2716
- "learning_rate": 4.6373713380839276e-05,
2717
- "loss": 0.2962,
2718
- "step": 3480
2719
- },
2720
- {
2721
- "epoch": 0.83,
2722
- "learning_rate": 4.63473211929269e-05,
2723
- "loss": 0.2627,
2724
- "step": 3490
2725
- },
2726
- {
2727
- "epoch": 0.83,
2728
- "learning_rate": 4.632092900501452e-05,
2729
- "loss": 0.3413,
2730
- "step": 3500
2731
- },
2732
- {
2733
- "epoch": 0.83,
2734
- "eval_accuracy": 0.7935779816513762,
2735
- "eval_loss": 0.5697915554046631,
2736
- "eval_runtime": 14.9148,
2737
- "eval_samples_per_second": 58.465,
2738
- "eval_steps_per_second": 3.688,
2739
- "step": 3500
2740
- },
2741
- {
2742
- "epoch": 0.83,
2743
- "learning_rate": 4.6294536817102144e-05,
2744
- "loss": 0.4393,
2745
- "step": 3510
2746
- },
2747
- {
2748
- "epoch": 0.84,
2749
- "learning_rate": 4.626814462918977e-05,
2750
- "loss": 0.4142,
2751
- "step": 3520
2752
- },
2753
- {
2754
- "epoch": 0.84,
2755
- "learning_rate": 4.624175244127739e-05,
2756
- "loss": 0.3803,
2757
- "step": 3530
2758
- },
2759
- {
2760
- "epoch": 0.84,
2761
- "learning_rate": 4.6215360253365005e-05,
2762
- "loss": 0.4087,
2763
- "step": 3540
2764
- },
2765
- {
2766
- "epoch": 0.84,
2767
- "learning_rate": 4.618896806545263e-05,
2768
- "loss": 0.3966,
2769
- "step": 3550
2770
- },
2771
- {
2772
- "epoch": 0.84,
2773
- "eval_accuracy": 0.786697247706422,
2774
- "eval_loss": 0.47139275074005127,
2775
- "eval_runtime": 23.4839,
2776
- "eval_samples_per_second": 37.132,
2777
- "eval_steps_per_second": 2.342,
2778
- "step": 3550
2779
- },
2780
- {
2781
- "epoch": 0.85,
2782
- "learning_rate": 4.616257587754025e-05,
2783
- "loss": 0.3998,
2784
- "step": 3560
2785
- },
2786
- {
2787
- "epoch": 0.85,
2788
- "learning_rate": 4.6136183689627873e-05,
2789
- "loss": 0.3903,
2790
- "step": 3570
2791
- },
2792
- {
2793
- "epoch": 0.85,
2794
- "learning_rate": 4.6109791501715496e-05,
2795
- "loss": 0.3425,
2796
- "step": 3580
2797
- },
2798
- {
2799
- "epoch": 0.85,
2800
- "learning_rate": 4.608339931380312e-05,
2801
- "loss": 0.321,
2802
- "step": 3590
2803
- },
2804
- {
2805
- "epoch": 0.86,
2806
- "learning_rate": 4.605700712589074e-05,
2807
- "loss": 0.3299,
2808
- "step": 3600
2809
- },
2810
- {
2811
- "epoch": 0.86,
2812
- "eval_accuracy": 0.783256880733945,
2813
- "eval_loss": 0.46382883191108704,
2814
- "eval_runtime": 24.48,
2815
- "eval_samples_per_second": 35.621,
2816
- "eval_steps_per_second": 2.247,
2817
- "step": 3600
2818
- },
2819
- {
2820
- "epoch": 0.86,
2821
- "learning_rate": 4.6030614937978364e-05,
2822
- "loss": 0.4465,
2823
- "step": 3610
2824
- },
2825
- {
2826
- "epoch": 0.86,
2827
- "learning_rate": 4.600422275006599e-05,
2828
- "loss": 0.3761,
2829
- "step": 3620
2830
- },
2831
- {
2832
- "epoch": 0.86,
2833
- "learning_rate": 4.597783056215361e-05,
2834
- "loss": 0.346,
2835
- "step": 3630
2836
- },
2837
- {
2838
- "epoch": 0.86,
2839
- "learning_rate": 4.595143837424123e-05,
2840
- "loss": 0.3839,
2841
- "step": 3640
2842
- },
2843
- {
2844
- "epoch": 0.87,
2845
- "learning_rate": 4.592504618632885e-05,
2846
- "loss": 0.4783,
2847
- "step": 3650
2848
- },
2849
- {
2850
- "epoch": 0.87,
2851
- "eval_accuracy": 0.7844036697247706,
2852
- "eval_loss": 0.49812304973602295,
2853
- "eval_runtime": 24.7287,
2854
- "eval_samples_per_second": 35.263,
2855
- "eval_steps_per_second": 2.224,
2856
- "step": 3650
2857
- },
2858
- {
2859
- "epoch": 0.87,
2860
- "learning_rate": 4.589865399841647e-05,
2861
- "loss": 0.3789,
2862
- "step": 3660
2863
- },
2864
- {
2865
- "epoch": 0.87,
2866
- "learning_rate": 4.5872261810504094e-05,
2867
- "loss": 0.4411,
2868
- "step": 3670
2869
- },
2870
- {
2871
- "epoch": 0.87,
2872
- "learning_rate": 4.5845869622591716e-05,
2873
- "loss": 0.4694,
2874
- "step": 3680
2875
- },
2876
- {
2877
- "epoch": 0.88,
2878
- "learning_rate": 4.581947743467934e-05,
2879
- "loss": 0.2994,
2880
- "step": 3690
2881
- },
2882
- {
2883
- "epoch": 0.88,
2884
- "learning_rate": 4.579308524676696e-05,
2885
- "loss": 0.4475,
2886
- "step": 3700
2887
- },
2888
- {
2889
- "epoch": 0.88,
2890
- "eval_accuracy": 0.8027522935779816,
2891
- "eval_loss": 0.4598585367202759,
2892
- "eval_runtime": 24.9853,
2893
- "eval_samples_per_second": 34.901,
2894
- "eval_steps_per_second": 2.201,
2895
- "step": 3700
2896
- },
2897
- {
2898
- "epoch": 0.88,
2899
- "learning_rate": 4.5766693058854584e-05,
2900
- "loss": 0.3204,
2901
- "step": 3710
2902
- },
2903
- {
2904
- "epoch": 0.88,
2905
- "learning_rate": 4.574030087094221e-05,
2906
- "loss": 0.3833,
2907
- "step": 3720
2908
- },
2909
- {
2910
- "epoch": 0.89,
2911
- "learning_rate": 4.571390868302983e-05,
2912
- "loss": 0.2844,
2913
- "step": 3730
2914
- },
2915
- {
2916
- "epoch": 0.89,
2917
- "learning_rate": 4.568751649511745e-05,
2918
- "loss": 0.3206,
2919
- "step": 3740
2920
- },
2921
- {
2922
- "epoch": 0.89,
2923
- "learning_rate": 4.5661124307205075e-05,
2924
- "loss": 0.3527,
2925
- "step": 3750
2926
- },
2927
- {
2928
- "epoch": 0.89,
2929
- "eval_accuracy": 0.7981651376146789,
2930
- "eval_loss": 0.5331198573112488,
2931
- "eval_runtime": 24.8534,
2932
- "eval_samples_per_second": 35.086,
2933
- "eval_steps_per_second": 2.213,
2934
- "step": 3750
2935
- },
2936
- {
2937
- "epoch": 0.89,
2938
- "learning_rate": 4.563473211929269e-05,
2939
- "loss": 0.3891,
2940
- "step": 3760
2941
- },
2942
- {
2943
- "epoch": 0.9,
2944
- "learning_rate": 4.5608339931380314e-05,
2945
- "loss": 0.324,
2946
- "step": 3770
2947
- },
2948
- {
2949
- "epoch": 0.9,
2950
- "learning_rate": 4.5581947743467936e-05,
2951
- "loss": 0.3532,
2952
- "step": 3780
2953
- },
2954
- {
2955
- "epoch": 0.9,
2956
- "learning_rate": 4.555555555555556e-05,
2957
- "loss": 0.3021,
2958
- "step": 3790
2959
- },
2960
- {
2961
- "epoch": 0.9,
2962
- "learning_rate": 4.552916336764318e-05,
2963
- "loss": 0.4124,
2964
- "step": 3800
2965
- },
2966
- {
2967
- "epoch": 0.9,
2968
- "eval_accuracy": 0.7626146788990825,
2969
- "eval_loss": 0.5969462394714355,
2970
- "eval_runtime": 19.8238,
2971
- "eval_samples_per_second": 43.988,
2972
- "eval_steps_per_second": 2.774,
2973
- "step": 3800
2974
- },
2975
- {
2976
- "epoch": 0.9,
2977
- "learning_rate": 4.5502771179730804e-05,
2978
- "loss": 0.5429,
2979
- "step": 3810
2980
- },
2981
- {
2982
- "epoch": 0.91,
2983
- "learning_rate": 4.547637899181843e-05,
2984
- "loss": 0.4458,
2985
- "step": 3820
2986
- },
2987
- {
2988
- "epoch": 0.91,
2989
- "learning_rate": 4.544998680390605e-05,
2990
- "loss": 0.3272,
2991
- "step": 3830
2992
- },
2993
- {
2994
- "epoch": 0.91,
2995
- "learning_rate": 4.542359461599367e-05,
2996
- "loss": 0.3482,
2997
- "step": 3840
2998
- },
2999
- {
3000
- "epoch": 0.91,
3001
- "learning_rate": 4.5397202428081295e-05,
3002
- "loss": 0.3683,
3003
- "step": 3850
3004
- },
3005
- {
3006
- "epoch": 0.91,
3007
- "eval_accuracy": 0.7775229357798165,
3008
- "eval_loss": 0.5118904113769531,
3009
- "eval_runtime": 11.5129,
3010
- "eval_samples_per_second": 75.741,
3011
- "eval_steps_per_second": 4.777,
3012
- "step": 3850
3013
- },
3014
- {
3015
- "epoch": 0.92,
3016
- "learning_rate": 4.537081024016891e-05,
3017
- "loss": 0.2501,
3018
- "step": 3860
3019
- },
3020
- {
3021
- "epoch": 0.92,
3022
- "learning_rate": 4.5344418052256534e-05,
3023
- "loss": 0.3256,
3024
- "step": 3870
3025
- },
3026
- {
3027
- "epoch": 0.92,
3028
- "learning_rate": 4.5318025864344157e-05,
3029
- "loss": 0.4049,
3030
- "step": 3880
3031
- },
3032
- {
3033
- "epoch": 0.92,
3034
- "learning_rate": 4.529163367643178e-05,
3035
- "loss": 0.2541,
3036
- "step": 3890
3037
- },
3038
- {
3039
- "epoch": 0.93,
3040
- "learning_rate": 4.52652414885194e-05,
3041
- "loss": 0.3894,
3042
- "step": 3900
3043
- },
3044
- {
3045
- "epoch": 0.93,
3046
- "eval_accuracy": 0.8084862385321101,
3047
- "eval_loss": 0.5941323041915894,
3048
- "eval_runtime": 25.3823,
3049
- "eval_samples_per_second": 34.355,
3050
- "eval_steps_per_second": 2.167,
3051
- "step": 3900
3052
- },
3053
- {
3054
- "epoch": 0.93,
3055
- "learning_rate": 4.5238849300607025e-05,
3056
- "loss": 0.4195,
3057
- "step": 3910
3058
- },
3059
- {
3060
- "epoch": 0.93,
3061
- "learning_rate": 4.521245711269465e-05,
3062
- "loss": 0.2915,
3063
- "step": 3920
3064
- },
3065
- {
3066
- "epoch": 0.93,
3067
- "learning_rate": 4.518606492478227e-05,
3068
- "loss": 0.3745,
3069
- "step": 3930
3070
- },
3071
- {
3072
- "epoch": 0.94,
3073
- "learning_rate": 4.515967273686989e-05,
3074
- "loss": 0.3915,
3075
- "step": 3940
3076
- },
3077
- {
3078
- "epoch": 0.94,
3079
- "learning_rate": 4.5133280548957515e-05,
3080
- "loss": 0.4001,
3081
- "step": 3950
3082
- },
3083
- {
3084
- "epoch": 0.94,
3085
- "eval_accuracy": 0.7717889908256881,
3086
- "eval_loss": 0.4977372884750366,
3087
- "eval_runtime": 11.5193,
3088
- "eval_samples_per_second": 75.699,
3089
- "eval_steps_per_second": 4.775,
3090
- "step": 3950
3091
- },
3092
- {
3093
- "epoch": 0.94,
3094
- "learning_rate": 4.510688836104514e-05,
3095
- "loss": 0.2936,
3096
- "step": 3960
3097
- },
3098
- {
3099
- "epoch": 0.94,
3100
- "learning_rate": 4.5080496173132754e-05,
3101
- "loss": 0.3909,
3102
- "step": 3970
3103
- },
3104
- {
3105
- "epoch": 0.95,
3106
- "learning_rate": 4.505410398522038e-05,
3107
- "loss": 0.356,
3108
- "step": 3980
3109
- },
3110
- {
3111
- "epoch": 0.95,
3112
- "learning_rate": 4.5027711797308e-05,
3113
- "loss": 0.3305,
3114
- "step": 3990
3115
- },
3116
- {
3117
- "epoch": 0.95,
3118
- "learning_rate": 4.500131960939562e-05,
3119
- "loss": 0.3394,
3120
- "step": 4000
3121
- },
3122
- {
3123
- "epoch": 0.95,
3124
- "eval_accuracy": 0.7981651376146789,
3125
- "eval_loss": 0.5128748416900635,
3126
- "eval_runtime": 11.5097,
3127
- "eval_samples_per_second": 75.762,
3128
- "eval_steps_per_second": 4.779,
3129
- "step": 4000
3130
- }
3131
- ],
3132
- "logging_steps": 10,
3133
- "max_steps": 21050,
3134
- "num_input_tokens_seen": 0,
3135
- "num_train_epochs": 5,
3136
- "save_steps": 500,
3137
- "total_flos": 1.6839258734592e+16,
3138
- "train_batch_size": 16,
3139
- "trial_name": null,
3140
- "trial_params": null
3141
- }