|
,loss,learning_rate,epoch,step,eval_loss,eval_accuracy,eval_f1,eval_runtime,eval_samples_per_second,eval_steps_per_second,train_runtime,train_samples_per_second,train_steps_per_second,total_flos,train_loss |
|
0,1.0375,3.912141264809884e-06,1.0,235,,,,,,,,,,, |
|
1,,,1.0,235,0.836928129196167,0.6248,0.612402481155663,1.266,1974.649,15.797,,,,, |
|
2,0.7758,7.807635119982278e-06,2.0,470,,,,,,,,,,, |
|
3,,,2.0,470,0.7121442556381226,0.6932,0.6899832814917121,1.2656,1975.323,15.803,,,,, |
|
4,0.6803,1.1719776384792164e-05,3.0,705,,,,,,,,,,, |
|
5,,,3.0,705,0.6878911852836609,0.7044,0.7069424431108294,1.2664,1974.166,15.793,,,,, |
|
6,0.6025,1.5631917649602047e-05,4.0,940,,,,,,,,,,, |
|
7,,,4.0,940,0.6585601568222046,0.7184,0.7168436594753593,1.266,1974.74,15.798,,,,, |
|
8,0.5176,1.9544058914411928e-05,5.0,1175,,,,,,,,,,, |
|
9,,,5.0,1175,0.6809464693069458,0.7144,0.7137994849164195,1.2678,1971.987,15.776,,,,, |
|
10,0.428,2.3456200179221815e-05,6.0,1410,,,,,,,,,,, |
|
11,,,6.0,1410,0.7105868458747864,0.7124,0.710171369539371,1.2665,1973.953,15.792,,,,, |
|
12,0.3298,2.7368341444031695e-05,7.0,1645,,,,,,,,,,, |
|
13,,,7.0,1645,0.8034451603889465,0.7064,0.7055785145607243,1.2652,1975.987,15.808,,,,, |
|
14,,,7.0,1645,,,,,,,350.5605,8557.724,67.036,1.0289189979256608e+16,0.6244962929954645 |
|
|