nielsr HF staff commited on
Commit
5904b65
1 Parent(s): d95f787

End of training

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_accuracy": 0.9863,
4
- "eval_loss": 0.09992218017578125,
5
- "eval_runtime": 87.3311,
6
- "eval_samples_per_second": 114.507,
7
- "eval_steps_per_second": 3.584,
8
  "total_flos": 1.161843208194687e+19,
9
- "train_loss": 0.5011348225112654,
10
- "train_runtime": 3321.3479,
11
- "train_samples_per_second": 45.162,
12
- "train_steps_per_second": 0.352
13
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_accuracy": 0.9855,
4
+ "eval_loss": 0.10110656172037125,
5
+ "eval_runtime": 153.2781,
6
+ "eval_samples_per_second": 65.241,
7
+ "eval_steps_per_second": 2.042,
8
  "total_flos": 1.161843208194687e+19,
9
+ "train_loss": 0.5034837816515539,
10
+ "train_runtime": 6176.4988,
11
+ "train_samples_per_second": 24.286,
12
+ "train_steps_per_second": 0.189
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_accuracy": 0.9863,
4
- "eval_loss": 0.09992218017578125,
5
- "eval_runtime": 87.3311,
6
- "eval_samples_per_second": 114.507,
7
- "eval_steps_per_second": 3.584
8
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_accuracy": 0.9855,
4
+ "eval_loss": 0.10110656172037125,
5
+ "eval_runtime": 153.2781,
6
+ "eval_samples_per_second": 65.241,
7
+ "eval_steps_per_second": 2.042
8
  }
runs/Feb14_15-28-49_f9ff78a7b58d/events.out.tfevents.1644859402.f9ff78a7b58d.82.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfb36ac69f21bd4591d1a9404d1e7eae2769a696718bfb744c1ca7cf931622a7
3
+ size 363
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
  "total_flos": 1.161843208194687e+19,
4
- "train_loss": 0.5011348225112654,
5
- "train_runtime": 3321.3479,
6
- "train_samples_per_second": 45.162,
7
- "train_steps_per_second": 0.352
8
  }
 
1
  {
2
  "epoch": 3.0,
3
  "total_flos": 1.161843208194687e+19,
4
+ "train_loss": 0.5034837816515539,
5
+ "train_runtime": 6176.4988,
6
+ "train_samples_per_second": 24.286,
7
+ "train_steps_per_second": 0.189
8
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.9863,
3
  "best_model_checkpoint": "vit-base-patch16-224-in21k-finetuned-cifar10/checkpoint-1170",
4
  "epoch": 2.998080614203455,
5
  "global_step": 1170,
@@ -10,145 +10,145 @@
10
  {
11
  "epoch": 0.03,
12
  "learning_rate": 4.273504273504274e-06,
13
- "loss": 2.3003,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.05,
18
  "learning_rate": 8.547008547008548e-06,
19
- "loss": 2.2805,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.08,
24
  "learning_rate": 1.282051282051282e-05,
25
- "loss": 2.2424,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.1,
30
  "learning_rate": 1.7094017094017095e-05,
31
- "loss": 2.1851,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.13,
36
  "learning_rate": 2.1367521367521368e-05,
37
- "loss": 2.0954,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.15,
42
  "learning_rate": 2.564102564102564e-05,
43
- "loss": 1.9605,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.18,
48
  "learning_rate": 2.9914529914529915e-05,
49
- "loss": 1.8082,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.2,
54
  "learning_rate": 3.418803418803419e-05,
55
- "loss": 1.6045,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.23,
60
  "learning_rate": 3.846153846153846e-05,
61
- "loss": 1.416,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.26,
66
  "learning_rate": 4.2735042735042735e-05,
67
- "loss": 1.2521,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.28,
72
  "learning_rate": 4.700854700854701e-05,
73
- "loss": 1.1004,
74
  "step": 110
75
  },
76
  {
77
  "epoch": 0.31,
78
  "learning_rate": 4.985754985754986e-05,
79
- "loss": 0.9802,
80
  "step": 120
81
  },
82
  {
83
  "epoch": 0.33,
84
  "learning_rate": 4.938271604938271e-05,
85
- "loss": 0.8694,
86
  "step": 130
87
  },
88
  {
89
  "epoch": 0.36,
90
  "learning_rate": 4.890788224121557e-05,
91
- "loss": 0.8239,
92
  "step": 140
93
  },
94
  {
95
  "epoch": 0.38,
96
  "learning_rate": 4.8433048433048433e-05,
97
- "loss": 0.726,
98
  "step": 150
99
  },
100
  {
101
  "epoch": 0.41,
102
  "learning_rate": 4.7958214624881294e-05,
103
- "loss": 0.6769,
104
  "step": 160
105
  },
106
  {
107
  "epoch": 0.44,
108
  "learning_rate": 4.7483380816714154e-05,
109
- "loss": 0.653,
110
  "step": 170
111
  },
112
  {
113
  "epoch": 0.46,
114
  "learning_rate": 4.700854700854701e-05,
115
- "loss": 0.6043,
116
  "step": 180
117
  },
118
  {
119
  "epoch": 0.49,
120
  "learning_rate": 4.653371320037987e-05,
121
- "loss": 0.6188,
122
  "step": 190
123
  },
124
  {
125
  "epoch": 0.51,
126
  "learning_rate": 4.605887939221273e-05,
127
- "loss": 0.5447,
128
  "step": 200
129
  },
130
  {
131
  "epoch": 0.54,
132
  "learning_rate": 4.558404558404559e-05,
133
- "loss": 0.5582,
134
  "step": 210
135
  },
136
  {
137
  "epoch": 0.56,
138
  "learning_rate": 4.510921177587845e-05,
139
- "loss": 0.525,
140
  "step": 220
141
  },
142
  {
143
  "epoch": 0.59,
144
  "learning_rate": 4.463437796771131e-05,
145
- "loss": 0.5545,
146
  "step": 230
147
  },
148
  {
149
  "epoch": 0.61,
150
  "learning_rate": 4.415954415954416e-05,
151
- "loss": 0.5203,
152
  "step": 240
153
  },
154
  {
@@ -160,590 +160,590 @@
160
  {
161
  "epoch": 0.67,
162
  "learning_rate": 4.3209876543209875e-05,
163
- "loss": 0.4704,
164
  "step": 260
165
  },
166
  {
167
  "epoch": 0.69,
168
  "learning_rate": 4.2735042735042735e-05,
169
- "loss": 0.5016,
170
  "step": 270
171
  },
172
  {
173
  "epoch": 0.72,
174
  "learning_rate": 4.2260208926875595e-05,
175
- "loss": 0.4651,
176
  "step": 280
177
  },
178
  {
179
  "epoch": 0.74,
180
  "learning_rate": 4.1785375118708455e-05,
181
- "loss": 0.4805,
182
  "step": 290
183
  },
184
  {
185
  "epoch": 0.77,
186
  "learning_rate": 4.131054131054131e-05,
187
- "loss": 0.4609,
188
  "step": 300
189
  },
190
  {
191
  "epoch": 0.79,
192
  "learning_rate": 4.083570750237417e-05,
193
- "loss": 0.4392,
194
  "step": 310
195
  },
196
  {
197
  "epoch": 0.82,
198
  "learning_rate": 4.036087369420703e-05,
199
- "loss": 0.4761,
200
  "step": 320
201
  },
202
  {
203
  "epoch": 0.84,
204
  "learning_rate": 3.988603988603989e-05,
205
- "loss": 0.4517,
206
  "step": 330
207
  },
208
  {
209
  "epoch": 0.87,
210
  "learning_rate": 3.941120607787275e-05,
211
- "loss": 0.4361,
212
  "step": 340
213
  },
214
  {
215
  "epoch": 0.9,
216
  "learning_rate": 3.893637226970561e-05,
217
- "loss": 0.4287,
218
  "step": 350
219
  },
220
  {
221
  "epoch": 0.92,
222
  "learning_rate": 3.846153846153846e-05,
223
- "loss": 0.4151,
224
  "step": 360
225
  },
226
  {
227
  "epoch": 0.95,
228
  "learning_rate": 3.798670465337132e-05,
229
- "loss": 0.4098,
230
  "step": 370
231
  },
232
  {
233
  "epoch": 0.97,
234
  "learning_rate": 3.7511870845204176e-05,
235
- "loss": 0.4236,
236
  "step": 380
237
  },
238
  {
239
  "epoch": 1.0,
240
  "learning_rate": 3.7037037037037037e-05,
241
- "loss": 0.3897,
242
  "step": 390
243
  },
244
  {
245
  "epoch": 1.0,
246
- "eval_accuracy": 0.9757,
247
- "eval_loss": 0.20828521251678467,
248
- "eval_runtime": 85.3525,
249
- "eval_samples_per_second": 117.161,
250
- "eval_steps_per_second": 3.667,
251
  "step": 390
252
  },
253
  {
254
  "epoch": 1.03,
255
  "learning_rate": 3.65622032288699e-05,
256
- "loss": 0.4293,
257
  "step": 400
258
  },
259
  {
260
  "epoch": 1.05,
261
  "learning_rate": 3.608736942070276e-05,
262
- "loss": 0.3485,
263
  "step": 410
264
  },
265
  {
266
  "epoch": 1.08,
267
  "learning_rate": 3.561253561253561e-05,
268
- "loss": 0.387,
269
  "step": 420
270
  },
271
  {
272
  "epoch": 1.1,
273
  "learning_rate": 3.513770180436847e-05,
274
- "loss": 0.3417,
275
  "step": 430
276
  },
277
  {
278
  "epoch": 1.13,
279
  "learning_rate": 3.466286799620133e-05,
280
- "loss": 0.3836,
281
  "step": 440
282
  },
283
  {
284
  "epoch": 1.15,
285
  "learning_rate": 3.418803418803419e-05,
286
- "loss": 0.3623,
287
  "step": 450
288
  },
289
  {
290
  "epoch": 1.18,
291
  "learning_rate": 3.371320037986705e-05,
292
- "loss": 0.3662,
293
  "step": 460
294
  },
295
  {
296
  "epoch": 1.2,
297
  "learning_rate": 3.323836657169991e-05,
298
- "loss": 0.3724,
299
  "step": 470
300
  },
301
  {
302
  "epoch": 1.23,
303
  "learning_rate": 3.2763532763532764e-05,
304
- "loss": 0.3278,
305
  "step": 480
306
  },
307
  {
308
  "epoch": 1.26,
309
  "learning_rate": 3.2288698955365625e-05,
310
- "loss": 0.3703,
311
  "step": 490
312
  },
313
  {
314
  "epoch": 1.28,
315
  "learning_rate": 3.181386514719848e-05,
316
- "loss": 0.3691,
317
  "step": 500
318
  },
319
  {
320
  "epoch": 1.31,
321
  "learning_rate": 3.133903133903134e-05,
322
- "loss": 0.3644,
323
  "step": 510
324
  },
325
  {
326
  "epoch": 1.33,
327
  "learning_rate": 3.08641975308642e-05,
328
- "loss": 0.3536,
329
  "step": 520
330
  },
331
  {
332
  "epoch": 1.36,
333
  "learning_rate": 3.0389363722697055e-05,
334
- "loss": 0.3383,
335
  "step": 530
336
  },
337
  {
338
  "epoch": 1.38,
339
  "learning_rate": 2.9914529914529915e-05,
340
- "loss": 0.3239,
341
  "step": 540
342
  },
343
  {
344
  "epoch": 1.41,
345
  "learning_rate": 2.9439696106362775e-05,
346
- "loss": 0.3155,
347
  "step": 550
348
  },
349
  {
350
  "epoch": 1.44,
351
  "learning_rate": 2.8964862298195632e-05,
352
- "loss": 0.3401,
353
  "step": 560
354
  },
355
  {
356
  "epoch": 1.46,
357
  "learning_rate": 2.8490028490028492e-05,
358
- "loss": 0.3221,
359
  "step": 570
360
  },
361
  {
362
  "epoch": 1.49,
363
  "learning_rate": 2.8015194681861352e-05,
364
- "loss": 0.3183,
365
  "step": 580
366
  },
367
  {
368
  "epoch": 1.51,
369
  "learning_rate": 2.754036087369421e-05,
370
- "loss": 0.3256,
371
  "step": 590
372
  },
373
  {
374
  "epoch": 1.54,
375
  "learning_rate": 2.706552706552707e-05,
376
- "loss": 0.3407,
377
  "step": 600
378
  },
379
  {
380
  "epoch": 1.56,
381
  "learning_rate": 2.6590693257359926e-05,
382
- "loss": 0.2972,
383
  "step": 610
384
  },
385
  {
386
  "epoch": 1.59,
387
  "learning_rate": 2.611585944919278e-05,
388
- "loss": 0.3366,
389
  "step": 620
390
  },
391
  {
392
  "epoch": 1.61,
393
  "learning_rate": 2.564102564102564e-05,
394
- "loss": 0.2942,
395
  "step": 630
396
  },
397
  {
398
  "epoch": 1.64,
399
  "learning_rate": 2.51661918328585e-05,
400
- "loss": 0.3066,
401
  "step": 640
402
  },
403
  {
404
  "epoch": 1.67,
405
  "learning_rate": 2.4691358024691357e-05,
406
- "loss": 0.3247,
407
  "step": 650
408
  },
409
  {
410
  "epoch": 1.69,
411
  "learning_rate": 2.4216524216524217e-05,
412
- "loss": 0.3191,
413
  "step": 660
414
  },
415
  {
416
  "epoch": 1.72,
417
  "learning_rate": 2.3741690408357077e-05,
418
- "loss": 0.3076,
419
  "step": 670
420
  },
421
  {
422
  "epoch": 1.74,
423
  "learning_rate": 2.3266856600189934e-05,
424
- "loss": 0.3037,
425
  "step": 680
426
  },
427
  {
428
  "epoch": 1.77,
429
  "learning_rate": 2.2792022792022794e-05,
430
- "loss": 0.2986,
431
  "step": 690
432
  },
433
  {
434
  "epoch": 1.79,
435
  "learning_rate": 2.2317188983855654e-05,
436
- "loss": 0.2659,
437
  "step": 700
438
  },
439
  {
440
  "epoch": 1.82,
441
  "learning_rate": 2.184235517568851e-05,
442
- "loss": 0.3203,
443
  "step": 710
444
  },
445
  {
446
  "epoch": 1.84,
447
  "learning_rate": 2.1367521367521368e-05,
448
- "loss": 0.2952,
449
  "step": 720
450
  },
451
  {
452
  "epoch": 1.87,
453
  "learning_rate": 2.0892687559354228e-05,
454
- "loss": 0.3053,
455
  "step": 730
456
  },
457
  {
458
  "epoch": 1.9,
459
  "learning_rate": 2.0417853751187084e-05,
460
- "loss": 0.2889,
461
  "step": 740
462
  },
463
  {
464
  "epoch": 1.92,
465
  "learning_rate": 1.9943019943019945e-05,
466
- "loss": 0.2518,
467
  "step": 750
468
  },
469
  {
470
  "epoch": 1.95,
471
  "learning_rate": 1.9468186134852805e-05,
472
- "loss": 0.2816,
473
  "step": 760
474
  },
475
  {
476
  "epoch": 1.97,
477
  "learning_rate": 1.899335232668566e-05,
478
- "loss": 0.2813,
479
  "step": 770
480
  },
481
  {
482
  "epoch": 2.0,
483
  "learning_rate": 1.8518518518518518e-05,
484
- "loss": 0.3045,
485
  "step": 780
486
  },
487
  {
488
  "epoch": 2.0,
489
- "eval_accuracy": 0.986,
490
- "eval_loss": 0.11745991557836533,
491
- "eval_runtime": 84.0012,
492
- "eval_samples_per_second": 119.046,
493
- "eval_steps_per_second": 3.726,
494
  "step": 780
495
  },
496
  {
497
  "epoch": 2.03,
498
  "learning_rate": 1.804368471035138e-05,
499
- "loss": 0.2973,
500
  "step": 790
501
  },
502
  {
503
  "epoch": 2.05,
504
  "learning_rate": 1.7568850902184235e-05,
505
- "loss": 0.27,
506
  "step": 800
507
  },
508
  {
509
  "epoch": 2.08,
510
  "learning_rate": 1.7094017094017095e-05,
511
- "loss": 0.2932,
512
  "step": 810
513
  },
514
  {
515
  "epoch": 2.1,
516
  "learning_rate": 1.6619183285849956e-05,
517
- "loss": 0.3179,
518
  "step": 820
519
  },
520
  {
521
  "epoch": 2.13,
522
  "learning_rate": 1.6144349477682812e-05,
523
- "loss": 0.2581,
524
  "step": 830
525
  },
526
  {
527
  "epoch": 2.15,
528
  "learning_rate": 1.566951566951567e-05,
529
- "loss": 0.275,
530
  "step": 840
531
  },
532
  {
533
  "epoch": 2.18,
534
  "learning_rate": 1.5194681861348528e-05,
535
- "loss": 0.2882,
536
  "step": 850
537
  },
538
  {
539
  "epoch": 2.2,
540
  "learning_rate": 1.4719848053181388e-05,
541
- "loss": 0.26,
542
  "step": 860
543
  },
544
  {
545
  "epoch": 2.23,
546
  "learning_rate": 1.4245014245014246e-05,
547
- "loss": 0.2748,
548
  "step": 870
549
  },
550
  {
551
  "epoch": 2.26,
552
  "learning_rate": 1.3770180436847105e-05,
553
- "loss": 0.2553,
554
  "step": 880
555
  },
556
  {
557
  "epoch": 2.28,
558
  "learning_rate": 1.3295346628679963e-05,
559
- "loss": 0.2578,
560
  "step": 890
561
  },
562
  {
563
  "epoch": 2.31,
564
  "learning_rate": 1.282051282051282e-05,
565
- "loss": 0.3364,
566
  "step": 900
567
  },
568
  {
569
  "epoch": 2.33,
570
  "learning_rate": 1.2345679012345678e-05,
571
- "loss": 0.2927,
572
  "step": 910
573
  },
574
  {
575
  "epoch": 2.36,
576
  "learning_rate": 1.1870845204178538e-05,
577
- "loss": 0.2481,
578
  "step": 920
579
  },
580
  {
581
  "epoch": 2.38,
582
  "learning_rate": 1.1396011396011397e-05,
583
- "loss": 0.2591,
584
  "step": 930
585
  },
586
  {
587
  "epoch": 2.41,
588
  "learning_rate": 1.0921177587844255e-05,
589
- "loss": 0.2648,
590
  "step": 940
591
  },
592
  {
593
  "epoch": 2.44,
594
  "learning_rate": 1.0446343779677114e-05,
595
- "loss": 0.2691,
596
  "step": 950
597
  },
598
  {
599
  "epoch": 2.46,
600
  "learning_rate": 9.971509971509972e-06,
601
- "loss": 0.2627,
602
  "step": 960
603
  },
604
  {
605
  "epoch": 2.49,
606
  "learning_rate": 9.49667616334283e-06,
607
- "loss": 0.2612,
608
  "step": 970
609
  },
610
  {
611
  "epoch": 2.51,
612
  "learning_rate": 9.02184235517569e-06,
613
- "loss": 0.2389,
614
  "step": 980
615
  },
616
  {
617
  "epoch": 2.54,
618
  "learning_rate": 8.547008547008548e-06,
619
- "loss": 0.2792,
620
  "step": 990
621
  },
622
  {
623
  "epoch": 2.56,
624
  "learning_rate": 8.072174738841406e-06,
625
- "loss": 0.2446,
626
  "step": 1000
627
  },
628
  {
629
  "epoch": 2.59,
630
  "learning_rate": 7.597340930674264e-06,
631
- "loss": 0.2305,
632
  "step": 1010
633
  },
634
  {
635
  "epoch": 2.61,
636
  "learning_rate": 7.122507122507123e-06,
637
- "loss": 0.2527,
638
  "step": 1020
639
  },
640
  {
641
  "epoch": 2.64,
642
  "learning_rate": 6.6476733143399815e-06,
643
- "loss": 0.2403,
644
  "step": 1030
645
  },
646
  {
647
  "epoch": 2.67,
648
  "learning_rate": 6.172839506172839e-06,
649
- "loss": 0.2254,
650
  "step": 1040
651
  },
652
  {
653
  "epoch": 2.69,
654
  "learning_rate": 5.6980056980056985e-06,
655
- "loss": 0.2879,
656
  "step": 1050
657
  },
658
  {
659
  "epoch": 2.72,
660
  "learning_rate": 5.223171889838557e-06,
661
- "loss": 0.2233,
662
  "step": 1060
663
  },
664
  {
665
  "epoch": 2.74,
666
  "learning_rate": 4.748338081671415e-06,
667
- "loss": 0.2258,
668
  "step": 1070
669
  },
670
  {
671
  "epoch": 2.77,
672
  "learning_rate": 4.273504273504274e-06,
673
- "loss": 0.2382,
674
  "step": 1080
675
  },
676
  {
677
  "epoch": 2.79,
678
  "learning_rate": 3.798670465337132e-06,
679
- "loss": 0.2564,
680
  "step": 1090
681
  },
682
  {
683
  "epoch": 2.82,
684
  "learning_rate": 3.3238366571699908e-06,
685
- "loss": 0.2513,
686
  "step": 1100
687
  },
688
  {
689
  "epoch": 2.84,
690
  "learning_rate": 2.8490028490028492e-06,
691
- "loss": 0.2152,
692
  "step": 1110
693
  },
694
  {
695
  "epoch": 2.87,
696
  "learning_rate": 2.3741690408357077e-06,
697
- "loss": 0.243,
698
  "step": 1120
699
  },
700
  {
701
  "epoch": 2.9,
702
  "learning_rate": 1.899335232668566e-06,
703
- "loss": 0.2701,
704
  "step": 1130
705
  },
706
  {
707
  "epoch": 2.92,
708
  "learning_rate": 1.4245014245014246e-06,
709
- "loss": 0.2531,
710
  "step": 1140
711
  },
712
  {
713
  "epoch": 2.95,
714
  "learning_rate": 9.49667616334283e-07,
715
- "loss": 0.2637,
716
  "step": 1150
717
  },
718
  {
719
  "epoch": 2.97,
720
  "learning_rate": 4.748338081671415e-07,
721
- "loss": 0.2424,
722
  "step": 1160
723
  },
724
  {
725
  "epoch": 3.0,
726
  "learning_rate": 0.0,
727
- "loss": 0.2524,
728
  "step": 1170
729
  },
730
  {
731
  "epoch": 3.0,
732
- "eval_accuracy": 0.9863,
733
- "eval_loss": 0.09992218017578125,
734
- "eval_runtime": 83.7402,
735
- "eval_samples_per_second": 119.417,
736
- "eval_steps_per_second": 3.738,
737
  "step": 1170
738
  },
739
  {
740
  "epoch": 3.0,
741
  "step": 1170,
742
  "total_flos": 1.161843208194687e+19,
743
- "train_loss": 0.5011348225112654,
744
- "train_runtime": 3321.3479,
745
- "train_samples_per_second": 45.162,
746
- "train_steps_per_second": 0.352
747
  }
748
  ],
749
  "max_steps": 1170,
 
1
  {
2
+ "best_metric": 0.9855,
3
  "best_model_checkpoint": "vit-base-patch16-224-in21k-finetuned-cifar10/checkpoint-1170",
4
  "epoch": 2.998080614203455,
5
  "global_step": 1170,
 
10
  {
11
  "epoch": 0.03,
12
  "learning_rate": 4.273504273504274e-06,
13
+ "loss": 2.3074,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.05,
18
  "learning_rate": 8.547008547008548e-06,
19
+ "loss": 2.2947,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.08,
24
  "learning_rate": 1.282051282051282e-05,
25
+ "loss": 2.2588,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.1,
30
  "learning_rate": 1.7094017094017095e-05,
31
+ "loss": 2.2041,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.13,
36
  "learning_rate": 2.1367521367521368e-05,
37
+ "loss": 2.1221,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.15,
42
  "learning_rate": 2.564102564102564e-05,
43
+ "loss": 2.0014,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.18,
48
  "learning_rate": 2.9914529914529915e-05,
49
+ "loss": 1.8365,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.2,
54
  "learning_rate": 3.418803418803419e-05,
55
+ "loss": 1.6374,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.23,
60
  "learning_rate": 3.846153846153846e-05,
61
+ "loss": 1.438,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.26,
66
  "learning_rate": 4.2735042735042735e-05,
67
+ "loss": 1.2628,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.28,
72
  "learning_rate": 4.700854700854701e-05,
73
+ "loss": 1.0934,
74
  "step": 110
75
  },
76
  {
77
  "epoch": 0.31,
78
  "learning_rate": 4.985754985754986e-05,
79
+ "loss": 0.9678,
80
  "step": 120
81
  },
82
  {
83
  "epoch": 0.33,
84
  "learning_rate": 4.938271604938271e-05,
85
+ "loss": 0.8619,
86
  "step": 130
87
  },
88
  {
89
  "epoch": 0.36,
90
  "learning_rate": 4.890788224121557e-05,
91
+ "loss": 0.837,
92
  "step": 140
93
  },
94
  {
95
  "epoch": 0.38,
96
  "learning_rate": 4.8433048433048433e-05,
97
+ "loss": 0.7366,
98
  "step": 150
99
  },
100
  {
101
  "epoch": 0.41,
102
  "learning_rate": 4.7958214624881294e-05,
103
+ "loss": 0.6799,
104
  "step": 160
105
  },
106
  {
107
  "epoch": 0.44,
108
  "learning_rate": 4.7483380816714154e-05,
109
+ "loss": 0.6741,
110
  "step": 170
111
  },
112
  {
113
  "epoch": 0.46,
114
  "learning_rate": 4.700854700854701e-05,
115
+ "loss": 0.6262,
116
  "step": 180
117
  },
118
  {
119
  "epoch": 0.49,
120
  "learning_rate": 4.653371320037987e-05,
121
+ "loss": 0.6355,
122
  "step": 190
123
  },
124
  {
125
  "epoch": 0.51,
126
  "learning_rate": 4.605887939221273e-05,
127
+ "loss": 0.5781,
128
  "step": 200
129
  },
130
  {
131
  "epoch": 0.54,
132
  "learning_rate": 4.558404558404559e-05,
133
+ "loss": 0.5671,
134
  "step": 210
135
  },
136
  {
137
  "epoch": 0.56,
138
  "learning_rate": 4.510921177587845e-05,
139
+ "loss": 0.5192,
140
  "step": 220
141
  },
142
  {
143
  "epoch": 0.59,
144
  "learning_rate": 4.463437796771131e-05,
145
+ "loss": 0.5498,
146
  "step": 230
147
  },
148
  {
149
  "epoch": 0.61,
150
  "learning_rate": 4.415954415954416e-05,
151
+ "loss": 0.507,
152
  "step": 240
153
  },
154
  {
 
160
  {
161
  "epoch": 0.67,
162
  "learning_rate": 4.3209876543209875e-05,
163
+ "loss": 0.4586,
164
  "step": 260
165
  },
166
  {
167
  "epoch": 0.69,
168
  "learning_rate": 4.2735042735042735e-05,
169
+ "loss": 0.4815,
170
  "step": 270
171
  },
172
  {
173
  "epoch": 0.72,
174
  "learning_rate": 4.2260208926875595e-05,
175
+ "loss": 0.4656,
176
  "step": 280
177
  },
178
  {
179
  "epoch": 0.74,
180
  "learning_rate": 4.1785375118708455e-05,
181
+ "loss": 0.4937,
182
  "step": 290
183
  },
184
  {
185
  "epoch": 0.77,
186
  "learning_rate": 4.131054131054131e-05,
187
+ "loss": 0.4649,
188
  "step": 300
189
  },
190
  {
191
  "epoch": 0.79,
192
  "learning_rate": 4.083570750237417e-05,
193
+ "loss": 0.4279,
194
  "step": 310
195
  },
196
  {
197
  "epoch": 0.82,
198
  "learning_rate": 4.036087369420703e-05,
199
+ "loss": 0.4736,
200
  "step": 320
201
  },
202
  {
203
  "epoch": 0.84,
204
  "learning_rate": 3.988603988603989e-05,
205
+ "loss": 0.4434,
206
  "step": 330
207
  },
208
  {
209
  "epoch": 0.87,
210
  "learning_rate": 3.941120607787275e-05,
211
+ "loss": 0.4459,
212
  "step": 340
213
  },
214
  {
215
  "epoch": 0.9,
216
  "learning_rate": 3.893637226970561e-05,
217
+ "loss": 0.4307,
218
  "step": 350
219
  },
220
  {
221
  "epoch": 0.92,
222
  "learning_rate": 3.846153846153846e-05,
223
+ "loss": 0.4029,
224
  "step": 360
225
  },
226
  {
227
  "epoch": 0.95,
228
  "learning_rate": 3.798670465337132e-05,
229
+ "loss": 0.4007,
230
  "step": 370
231
  },
232
  {
233
  "epoch": 0.97,
234
  "learning_rate": 3.7511870845204176e-05,
235
+ "loss": 0.4279,
236
  "step": 380
237
  },
238
  {
239
  "epoch": 1.0,
240
  "learning_rate": 3.7037037037037037e-05,
241
+ "loss": 0.3831,
242
  "step": 390
243
  },
244
  {
245
  "epoch": 1.0,
246
+ "eval_accuracy": 0.978,
247
+ "eval_loss": 0.20566777884960175,
248
+ "eval_runtime": 152.329,
249
+ "eval_samples_per_second": 65.647,
250
+ "eval_steps_per_second": 2.055,
251
  "step": 390
252
  },
253
  {
254
  "epoch": 1.03,
255
  "learning_rate": 3.65622032288699e-05,
256
+ "loss": 0.4258,
257
  "step": 400
258
  },
259
  {
260
  "epoch": 1.05,
261
  "learning_rate": 3.608736942070276e-05,
262
+ "loss": 0.3416,
263
  "step": 410
264
  },
265
  {
266
  "epoch": 1.08,
267
  "learning_rate": 3.561253561253561e-05,
268
+ "loss": 0.3881,
269
  "step": 420
270
  },
271
  {
272
  "epoch": 1.1,
273
  "learning_rate": 3.513770180436847e-05,
274
+ "loss": 0.3453,
275
  "step": 430
276
  },
277
  {
278
  "epoch": 1.13,
279
  "learning_rate": 3.466286799620133e-05,
280
+ "loss": 0.3806,
281
  "step": 440
282
  },
283
  {
284
  "epoch": 1.15,
285
  "learning_rate": 3.418803418803419e-05,
286
+ "loss": 0.361,
287
  "step": 450
288
  },
289
  {
290
  "epoch": 1.18,
291
  "learning_rate": 3.371320037986705e-05,
292
+ "loss": 0.3834,
293
  "step": 460
294
  },
295
  {
296
  "epoch": 1.2,
297
  "learning_rate": 3.323836657169991e-05,
298
+ "loss": 0.3696,
299
  "step": 470
300
  },
301
  {
302
  "epoch": 1.23,
303
  "learning_rate": 3.2763532763532764e-05,
304
+ "loss": 0.3389,
305
  "step": 480
306
  },
307
  {
308
  "epoch": 1.26,
309
  "learning_rate": 3.2288698955365625e-05,
310
+ "loss": 0.3671,
311
  "step": 490
312
  },
313
  {
314
  "epoch": 1.28,
315
  "learning_rate": 3.181386514719848e-05,
316
+ "loss": 0.3685,
317
  "step": 500
318
  },
319
  {
320
  "epoch": 1.31,
321
  "learning_rate": 3.133903133903134e-05,
322
+ "loss": 0.3533,
323
  "step": 510
324
  },
325
  {
326
  "epoch": 1.33,
327
  "learning_rate": 3.08641975308642e-05,
328
+ "loss": 0.3539,
329
  "step": 520
330
  },
331
  {
332
  "epoch": 1.36,
333
  "learning_rate": 3.0389363722697055e-05,
334
+ "loss": 0.3499,
335
  "step": 530
336
  },
337
  {
338
  "epoch": 1.38,
339
  "learning_rate": 2.9914529914529915e-05,
340
+ "loss": 0.3246,
341
  "step": 540
342
  },
343
  {
344
  "epoch": 1.41,
345
  "learning_rate": 2.9439696106362775e-05,
346
+ "loss": 0.3134,
347
  "step": 550
348
  },
349
  {
350
  "epoch": 1.44,
351
  "learning_rate": 2.8964862298195632e-05,
352
+ "loss": 0.3425,
353
  "step": 560
354
  },
355
  {
356
  "epoch": 1.46,
357
  "learning_rate": 2.8490028490028492e-05,
358
+ "loss": 0.3185,
359
  "step": 570
360
  },
361
  {
362
  "epoch": 1.49,
363
  "learning_rate": 2.8015194681861352e-05,
364
+ "loss": 0.3215,
365
  "step": 580
366
  },
367
  {
368
  "epoch": 1.51,
369
  "learning_rate": 2.754036087369421e-05,
370
+ "loss": 0.3268,
371
  "step": 590
372
  },
373
  {
374
  "epoch": 1.54,
375
  "learning_rate": 2.706552706552707e-05,
376
+ "loss": 0.3439,
377
  "step": 600
378
  },
379
  {
380
  "epoch": 1.56,
381
  "learning_rate": 2.6590693257359926e-05,
382
+ "loss": 0.3049,
383
  "step": 610
384
  },
385
  {
386
  "epoch": 1.59,
387
  "learning_rate": 2.611585944919278e-05,
388
+ "loss": 0.3306,
389
  "step": 620
390
  },
391
  {
392
  "epoch": 1.61,
393
  "learning_rate": 2.564102564102564e-05,
394
+ "loss": 0.3158,
395
  "step": 630
396
  },
397
  {
398
  "epoch": 1.64,
399
  "learning_rate": 2.51661918328585e-05,
400
+ "loss": 0.3118,
401
  "step": 640
402
  },
403
  {
404
  "epoch": 1.67,
405
  "learning_rate": 2.4691358024691357e-05,
406
+ "loss": 0.3312,
407
  "step": 650
408
  },
409
  {
410
  "epoch": 1.69,
411
  "learning_rate": 2.4216524216524217e-05,
412
+ "loss": 0.3094,
413
  "step": 660
414
  },
415
  {
416
  "epoch": 1.72,
417
  "learning_rate": 2.3741690408357077e-05,
418
+ "loss": 0.3064,
419
  "step": 670
420
  },
421
  {
422
  "epoch": 1.74,
423
  "learning_rate": 2.3266856600189934e-05,
424
+ "loss": 0.3144,
425
  "step": 680
426
  },
427
  {
428
  "epoch": 1.77,
429
  "learning_rate": 2.2792022792022794e-05,
430
+ "loss": 0.2858,
431
  "step": 690
432
  },
433
  {
434
  "epoch": 1.79,
435
  "learning_rate": 2.2317188983855654e-05,
436
+ "loss": 0.2634,
437
  "step": 700
438
  },
439
  {
440
  "epoch": 1.82,
441
  "learning_rate": 2.184235517568851e-05,
442
+ "loss": 0.31,
443
  "step": 710
444
  },
445
  {
446
  "epoch": 1.84,
447
  "learning_rate": 2.1367521367521368e-05,
448
+ "loss": 0.3035,
449
  "step": 720
450
  },
451
  {
452
  "epoch": 1.87,
453
  "learning_rate": 2.0892687559354228e-05,
454
+ "loss": 0.2942,
455
  "step": 730
456
  },
457
  {
458
  "epoch": 1.9,
459
  "learning_rate": 2.0417853751187084e-05,
460
+ "loss": 0.2951,
461
  "step": 740
462
  },
463
  {
464
  "epoch": 1.92,
465
  "learning_rate": 1.9943019943019945e-05,
466
+ "loss": 0.26,
467
  "step": 750
468
  },
469
  {
470
  "epoch": 1.95,
471
  "learning_rate": 1.9468186134852805e-05,
472
+ "loss": 0.2861,
473
  "step": 760
474
  },
475
  {
476
  "epoch": 1.97,
477
  "learning_rate": 1.899335232668566e-05,
478
+ "loss": 0.2839,
479
  "step": 770
480
  },
481
  {
482
  "epoch": 2.0,
483
  "learning_rate": 1.8518518518518518e-05,
484
+ "loss": 0.3007,
485
  "step": 780
486
  },
487
  {
488
  "epoch": 2.0,
489
+ "eval_accuracy": 0.9845,
490
+ "eval_loss": 0.11994641274213791,
491
+ "eval_runtime": 153.4534,
492
+ "eval_samples_per_second": 65.166,
493
+ "eval_steps_per_second": 2.04,
494
  "step": 780
495
  },
496
  {
497
  "epoch": 2.03,
498
  "learning_rate": 1.804368471035138e-05,
499
+ "loss": 0.3043,
500
  "step": 790
501
  },
502
  {
503
  "epoch": 2.05,
504
  "learning_rate": 1.7568850902184235e-05,
505
+ "loss": 0.2769,
506
  "step": 800
507
  },
508
  {
509
  "epoch": 2.08,
510
  "learning_rate": 1.7094017094017095e-05,
511
+ "loss": 0.2983,
512
  "step": 810
513
  },
514
  {
515
  "epoch": 2.1,
516
  "learning_rate": 1.6619183285849956e-05,
517
+ "loss": 0.3186,
518
  "step": 820
519
  },
520
  {
521
  "epoch": 2.13,
522
  "learning_rate": 1.6144349477682812e-05,
523
+ "loss": 0.2619,
524
  "step": 830
525
  },
526
  {
527
  "epoch": 2.15,
528
  "learning_rate": 1.566951566951567e-05,
529
+ "loss": 0.283,
530
  "step": 840
531
  },
532
  {
533
  "epoch": 2.18,
534
  "learning_rate": 1.5194681861348528e-05,
535
+ "loss": 0.287,
536
  "step": 850
537
  },
538
  {
539
  "epoch": 2.2,
540
  "learning_rate": 1.4719848053181388e-05,
541
+ "loss": 0.2631,
542
  "step": 860
543
  },
544
  {
545
  "epoch": 2.23,
546
  "learning_rate": 1.4245014245014246e-05,
547
+ "loss": 0.263,
548
  "step": 870
549
  },
550
  {
551
  "epoch": 2.26,
552
  "learning_rate": 1.3770180436847105e-05,
553
+ "loss": 0.2554,
554
  "step": 880
555
  },
556
  {
557
  "epoch": 2.28,
558
  "learning_rate": 1.3295346628679963e-05,
559
+ "loss": 0.2702,
560
  "step": 890
561
  },
562
  {
563
  "epoch": 2.31,
564
  "learning_rate": 1.282051282051282e-05,
565
+ "loss": 0.3224,
566
  "step": 900
567
  },
568
  {
569
  "epoch": 2.33,
570
  "learning_rate": 1.2345679012345678e-05,
571
+ "loss": 0.2801,
572
  "step": 910
573
  },
574
  {
575
  "epoch": 2.36,
576
  "learning_rate": 1.1870845204178538e-05,
577
+ "loss": 0.2521,
578
  "step": 920
579
  },
580
  {
581
  "epoch": 2.38,
582
  "learning_rate": 1.1396011396011397e-05,
583
+ "loss": 0.2613,
584
  "step": 930
585
  },
586
  {
587
  "epoch": 2.41,
588
  "learning_rate": 1.0921177587844255e-05,
589
+ "loss": 0.2485,
590
  "step": 940
591
  },
592
  {
593
  "epoch": 2.44,
594
  "learning_rate": 1.0446343779677114e-05,
595
+ "loss": 0.2672,
596
  "step": 950
597
  },
598
  {
599
  "epoch": 2.46,
600
  "learning_rate": 9.971509971509972e-06,
601
+ "loss": 0.2695,
602
  "step": 960
603
  },
604
  {
605
  "epoch": 2.49,
606
  "learning_rate": 9.49667616334283e-06,
607
+ "loss": 0.2593,
608
  "step": 970
609
  },
610
  {
611
  "epoch": 2.51,
612
  "learning_rate": 9.02184235517569e-06,
613
+ "loss": 0.2402,
614
  "step": 980
615
  },
616
  {
617
  "epoch": 2.54,
618
  "learning_rate": 8.547008547008548e-06,
619
+ "loss": 0.2759,
620
  "step": 990
621
  },
622
  {
623
  "epoch": 2.56,
624
  "learning_rate": 8.072174738841406e-06,
625
+ "loss": 0.2654,
626
  "step": 1000
627
  },
628
  {
629
  "epoch": 2.59,
630
  "learning_rate": 7.597340930674264e-06,
631
+ "loss": 0.2363,
632
  "step": 1010
633
  },
634
  {
635
  "epoch": 2.61,
636
  "learning_rate": 7.122507122507123e-06,
637
+ "loss": 0.2553,
638
  "step": 1020
639
  },
640
  {
641
  "epoch": 2.64,
642
  "learning_rate": 6.6476733143399815e-06,
643
+ "loss": 0.2384,
644
  "step": 1030
645
  },
646
  {
647
  "epoch": 2.67,
648
  "learning_rate": 6.172839506172839e-06,
649
+ "loss": 0.2173,
650
  "step": 1040
651
  },
652
  {
653
  "epoch": 2.69,
654
  "learning_rate": 5.6980056980056985e-06,
655
+ "loss": 0.2794,
656
  "step": 1050
657
  },
658
  {
659
  "epoch": 2.72,
660
  "learning_rate": 5.223171889838557e-06,
661
+ "loss": 0.2293,
662
  "step": 1060
663
  },
664
  {
665
  "epoch": 2.74,
666
  "learning_rate": 4.748338081671415e-06,
667
+ "loss": 0.2107,
668
  "step": 1070
669
  },
670
  {
671
  "epoch": 2.77,
672
  "learning_rate": 4.273504273504274e-06,
673
+ "loss": 0.2387,
674
  "step": 1080
675
  },
676
  {
677
  "epoch": 2.79,
678
  "learning_rate": 3.798670465337132e-06,
679
+ "loss": 0.2534,
680
  "step": 1090
681
  },
682
  {
683
  "epoch": 2.82,
684
  "learning_rate": 3.3238366571699908e-06,
685
+ "loss": 0.2449,
686
  "step": 1100
687
  },
688
  {
689
  "epoch": 2.84,
690
  "learning_rate": 2.8490028490028492e-06,
691
+ "loss": 0.2188,
692
  "step": 1110
693
  },
694
  {
695
  "epoch": 2.87,
696
  "learning_rate": 2.3741690408357077e-06,
697
+ "loss": 0.2444,
698
  "step": 1120
699
  },
700
  {
701
  "epoch": 2.9,
702
  "learning_rate": 1.899335232668566e-06,
703
+ "loss": 0.2611,
704
  "step": 1130
705
  },
706
  {
707
  "epoch": 2.92,
708
  "learning_rate": 1.4245014245014246e-06,
709
+ "loss": 0.2607,
710
  "step": 1140
711
  },
712
  {
713
  "epoch": 2.95,
714
  "learning_rate": 9.49667616334283e-07,
715
+ "loss": 0.2601,
716
  "step": 1150
717
  },
718
  {
719
  "epoch": 2.97,
720
  "learning_rate": 4.748338081671415e-07,
721
+ "loss": 0.2441,
722
  "step": 1160
723
  },
724
  {
725
  "epoch": 3.0,
726
  "learning_rate": 0.0,
727
+ "loss": 0.2442,
728
  "step": 1170
729
  },
730
  {
731
  "epoch": 3.0,
732
+ "eval_accuracy": 0.9855,
733
+ "eval_loss": 0.10110656172037125,
734
+ "eval_runtime": 153.3402,
735
+ "eval_samples_per_second": 65.214,
736
+ "eval_steps_per_second": 2.041,
737
  "step": 1170
738
  },
739
  {
740
  "epoch": 3.0,
741
  "step": 1170,
742
  "total_flos": 1.161843208194687e+19,
743
+ "train_loss": 0.5034837816515539,
744
+ "train_runtime": 6176.4988,
745
+ "train_samples_per_second": 24.286,
746
+ "train_steps_per_second": 0.189
747
  }
748
  ],
749
  "max_steps": 1170,