5roop's picture
Upload 9 files
da7c2b8
raw
history blame
47.3 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 19.997150997150996,
"global_step": 3500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06,
"learning_rate": 9.971428571428571e-05,
"loss": 1.6461,
"step": 10
},
{
"epoch": 0.11,
"learning_rate": 9.942857142857144e-05,
"loss": 1.5964,
"step": 20
},
{
"epoch": 0.17,
"learning_rate": 9.914285714285715e-05,
"loss": 1.52,
"step": 30
},
{
"epoch": 0.23,
"learning_rate": 9.885714285714286e-05,
"loss": 1.5243,
"step": 40
},
{
"epoch": 0.28,
"learning_rate": 9.857142857142858e-05,
"loss": 1.4323,
"step": 50
},
{
"epoch": 0.34,
"learning_rate": 9.831428571428572e-05,
"loss": 1.6594,
"step": 60
},
{
"epoch": 0.4,
"learning_rate": 9.802857142857143e-05,
"loss": 1.472,
"step": 70
},
{
"epoch": 0.46,
"learning_rate": 9.774285714285715e-05,
"loss": 1.4544,
"step": 80
},
{
"epoch": 0.51,
"learning_rate": 9.745714285714286e-05,
"loss": 1.4888,
"step": 90
},
{
"epoch": 0.57,
"learning_rate": 9.72e-05,
"loss": 1.3526,
"step": 100
},
{
"epoch": 0.63,
"learning_rate": 9.691428571428573e-05,
"loss": 1.3166,
"step": 110
},
{
"epoch": 0.68,
"learning_rate": 9.662857142857144e-05,
"loss": 1.283,
"step": 120
},
{
"epoch": 0.74,
"learning_rate": 9.634285714285715e-05,
"loss": 1.3889,
"step": 130
},
{
"epoch": 0.8,
"learning_rate": 9.605714285714286e-05,
"loss": 1.2688,
"step": 140
},
{
"epoch": 0.85,
"learning_rate": 9.577142857142858e-05,
"loss": 1.047,
"step": 150
},
{
"epoch": 0.91,
"learning_rate": 9.548571428571429e-05,
"loss": 1.3696,
"step": 160
},
{
"epoch": 0.97,
"learning_rate": 9.522857142857143e-05,
"loss": 1.3446,
"step": 170
},
{
"epoch": 1.0,
"eval_accuracy": 0.4554455578327179,
"eval_loss": 1.536527156829834,
"eval_runtime": 11.3063,
"eval_samples_per_second": 17.866,
"eval_steps_per_second": 8.933,
"step": 175
},
{
"epoch": 1.03,
"learning_rate": 9.494285714285714e-05,
"loss": 0.8856,
"step": 180
},
{
"epoch": 1.09,
"learning_rate": 9.465714285714286e-05,
"loss": 1.2261,
"step": 190
},
{
"epoch": 1.14,
"learning_rate": 9.437142857142857e-05,
"loss": 1.1836,
"step": 200
},
{
"epoch": 1.2,
"learning_rate": 9.40857142857143e-05,
"loss": 1.3243,
"step": 210
},
{
"epoch": 1.26,
"learning_rate": 9.38e-05,
"loss": 0.747,
"step": 220
},
{
"epoch": 1.31,
"learning_rate": 9.351428571428573e-05,
"loss": 0.9695,
"step": 230
},
{
"epoch": 1.37,
"learning_rate": 9.322857142857144e-05,
"loss": 1.0319,
"step": 240
},
{
"epoch": 1.43,
"learning_rate": 9.294285714285714e-05,
"loss": 1.0031,
"step": 250
},
{
"epoch": 1.48,
"learning_rate": 9.265714285714287e-05,
"loss": 1.1495,
"step": 260
},
{
"epoch": 1.54,
"learning_rate": 9.237142857142858e-05,
"loss": 0.9622,
"step": 270
},
{
"epoch": 1.6,
"learning_rate": 9.208571428571429e-05,
"loss": 0.9744,
"step": 280
},
{
"epoch": 1.66,
"learning_rate": 9.180000000000001e-05,
"loss": 1.1524,
"step": 290
},
{
"epoch": 1.71,
"learning_rate": 9.151428571428572e-05,
"loss": 0.9051,
"step": 300
},
{
"epoch": 1.77,
"learning_rate": 9.122857142857143e-05,
"loss": 1.1604,
"step": 310
},
{
"epoch": 1.83,
"learning_rate": 9.094285714285715e-05,
"loss": 1.2363,
"step": 320
},
{
"epoch": 1.88,
"learning_rate": 9.065714285714286e-05,
"loss": 1.139,
"step": 330
},
{
"epoch": 1.94,
"learning_rate": 9.037142857142857e-05,
"loss": 1.1337,
"step": 340
},
{
"epoch": 2.0,
"learning_rate": 9.008571428571429e-05,
"loss": 0.9654,
"step": 350
},
{
"epoch": 2.0,
"eval_accuracy": 0.6237623691558838,
"eval_loss": 1.0576841831207275,
"eval_runtime": 11.4424,
"eval_samples_per_second": 17.654,
"eval_steps_per_second": 8.827,
"step": 350
},
{
"epoch": 2.06,
"learning_rate": 8.98e-05,
"loss": 1.0752,
"step": 360
},
{
"epoch": 2.11,
"learning_rate": 8.951428571428572e-05,
"loss": 0.8605,
"step": 370
},
{
"epoch": 2.17,
"learning_rate": 8.922857142857143e-05,
"loss": 0.8216,
"step": 380
},
{
"epoch": 2.23,
"learning_rate": 8.894285714285716e-05,
"loss": 1.2676,
"step": 390
},
{
"epoch": 2.28,
"learning_rate": 8.865714285714287e-05,
"loss": 0.9299,
"step": 400
},
{
"epoch": 2.34,
"learning_rate": 8.837142857142857e-05,
"loss": 0.9775,
"step": 410
},
{
"epoch": 2.4,
"learning_rate": 8.80857142857143e-05,
"loss": 0.7707,
"step": 420
},
{
"epoch": 2.46,
"learning_rate": 8.78e-05,
"loss": 0.8136,
"step": 430
},
{
"epoch": 2.51,
"learning_rate": 8.751428571428572e-05,
"loss": 1.0706,
"step": 440
},
{
"epoch": 2.57,
"learning_rate": 8.722857142857144e-05,
"loss": 0.8833,
"step": 450
},
{
"epoch": 2.63,
"learning_rate": 8.694285714285715e-05,
"loss": 0.9111,
"step": 460
},
{
"epoch": 2.68,
"learning_rate": 8.665714285714286e-05,
"loss": 0.6969,
"step": 470
},
{
"epoch": 2.74,
"learning_rate": 8.637142857142858e-05,
"loss": 1.1425,
"step": 480
},
{
"epoch": 2.8,
"learning_rate": 8.608571428571429e-05,
"loss": 1.0298,
"step": 490
},
{
"epoch": 2.85,
"learning_rate": 8.58e-05,
"loss": 0.91,
"step": 500
},
{
"epoch": 2.91,
"learning_rate": 8.551428571428571e-05,
"loss": 0.8342,
"step": 510
},
{
"epoch": 2.97,
"learning_rate": 8.522857142857143e-05,
"loss": 0.8069,
"step": 520
},
{
"epoch": 3.0,
"eval_accuracy": 0.6386138796806335,
"eval_loss": 0.9259141683578491,
"eval_runtime": 11.2435,
"eval_samples_per_second": 17.966,
"eval_steps_per_second": 8.983,
"step": 525
},
{
"epoch": 3.03,
"learning_rate": 8.494285714285714e-05,
"loss": 0.8804,
"step": 530
},
{
"epoch": 3.09,
"learning_rate": 8.465714285714286e-05,
"loss": 0.7882,
"step": 540
},
{
"epoch": 3.14,
"learning_rate": 8.437142857142859e-05,
"loss": 0.5915,
"step": 550
},
{
"epoch": 3.2,
"learning_rate": 8.40857142857143e-05,
"loss": 0.5102,
"step": 560
},
{
"epoch": 3.26,
"learning_rate": 8.38e-05,
"loss": 0.6473,
"step": 570
},
{
"epoch": 3.31,
"learning_rate": 8.351428571428573e-05,
"loss": 0.7545,
"step": 580
},
{
"epoch": 3.37,
"learning_rate": 8.322857142857144e-05,
"loss": 0.5438,
"step": 590
},
{
"epoch": 3.43,
"learning_rate": 8.294285714285715e-05,
"loss": 0.8545,
"step": 600
},
{
"epoch": 3.48,
"learning_rate": 8.265714285714287e-05,
"loss": 0.563,
"step": 610
},
{
"epoch": 3.54,
"learning_rate": 8.237142857142858e-05,
"loss": 0.7048,
"step": 620
},
{
"epoch": 3.6,
"learning_rate": 8.208571428571429e-05,
"loss": 1.019,
"step": 630
},
{
"epoch": 3.66,
"learning_rate": 8.18e-05,
"loss": 0.5084,
"step": 640
},
{
"epoch": 3.71,
"learning_rate": 8.151428571428572e-05,
"loss": 0.7297,
"step": 650
},
{
"epoch": 3.77,
"learning_rate": 8.122857142857143e-05,
"loss": 0.4933,
"step": 660
},
{
"epoch": 3.83,
"learning_rate": 8.094285714285714e-05,
"loss": 0.5224,
"step": 670
},
{
"epoch": 3.88,
"learning_rate": 8.065714285714286e-05,
"loss": 0.5695,
"step": 680
},
{
"epoch": 3.94,
"learning_rate": 8.037142857142857e-05,
"loss": 0.4603,
"step": 690
},
{
"epoch": 4.0,
"learning_rate": 8.008571428571429e-05,
"loss": 1.1203,
"step": 700
},
{
"epoch": 4.0,
"eval_accuracy": 0.6831682920455933,
"eval_loss": 1.0746746063232422,
"eval_runtime": 11.8499,
"eval_samples_per_second": 17.047,
"eval_steps_per_second": 8.523,
"step": 700
},
{
"epoch": 4.06,
"learning_rate": 7.98e-05,
"loss": 0.6816,
"step": 710
},
{
"epoch": 4.11,
"learning_rate": 7.951428571428572e-05,
"loss": 0.629,
"step": 720
},
{
"epoch": 4.17,
"learning_rate": 7.922857142857143e-05,
"loss": 0.6881,
"step": 730
},
{
"epoch": 4.23,
"learning_rate": 7.894285714285716e-05,
"loss": 0.8816,
"step": 740
},
{
"epoch": 4.28,
"learning_rate": 7.865714285714287e-05,
"loss": 0.4466,
"step": 750
},
{
"epoch": 4.34,
"learning_rate": 7.837142857142858e-05,
"loss": 0.721,
"step": 760
},
{
"epoch": 4.4,
"learning_rate": 7.808571428571428e-05,
"loss": 0.8953,
"step": 770
},
{
"epoch": 4.46,
"learning_rate": 7.780000000000001e-05,
"loss": 0.4612,
"step": 780
},
{
"epoch": 4.51,
"learning_rate": 7.751428571428572e-05,
"loss": 0.5196,
"step": 790
},
{
"epoch": 4.57,
"learning_rate": 7.722857142857143e-05,
"loss": 0.62,
"step": 800
},
{
"epoch": 4.63,
"learning_rate": 7.694285714285715e-05,
"loss": 0.3506,
"step": 810
},
{
"epoch": 4.68,
"learning_rate": 7.665714285714286e-05,
"loss": 0.2639,
"step": 820
},
{
"epoch": 4.74,
"learning_rate": 7.637142857142857e-05,
"loss": 0.9862,
"step": 830
},
{
"epoch": 4.8,
"learning_rate": 7.608571428571429e-05,
"loss": 0.6958,
"step": 840
},
{
"epoch": 4.85,
"learning_rate": 7.58e-05,
"loss": 0.5734,
"step": 850
},
{
"epoch": 4.91,
"learning_rate": 7.551428571428571e-05,
"loss": 0.6894,
"step": 860
},
{
"epoch": 4.97,
"learning_rate": 7.522857142857143e-05,
"loss": 0.3681,
"step": 870
},
{
"epoch": 5.0,
"eval_accuracy": 0.7029703259468079,
"eval_loss": 1.0062588453292847,
"eval_runtime": 11.6332,
"eval_samples_per_second": 17.364,
"eval_steps_per_second": 8.682,
"step": 875
},
{
"epoch": 5.03,
"learning_rate": 7.494285714285715e-05,
"loss": 0.4552,
"step": 880
},
{
"epoch": 5.09,
"learning_rate": 7.465714285714286e-05,
"loss": 0.3066,
"step": 890
},
{
"epoch": 5.14,
"learning_rate": 7.437142857142857e-05,
"loss": 0.691,
"step": 900
},
{
"epoch": 5.2,
"learning_rate": 7.40857142857143e-05,
"loss": 0.5948,
"step": 910
},
{
"epoch": 5.26,
"learning_rate": 7.38e-05,
"loss": 0.2739,
"step": 920
},
{
"epoch": 5.31,
"learning_rate": 7.351428571428571e-05,
"loss": 0.29,
"step": 930
},
{
"epoch": 5.37,
"learning_rate": 7.322857142857144e-05,
"loss": 0.4972,
"step": 940
},
{
"epoch": 5.43,
"learning_rate": 7.294285714285715e-05,
"loss": 0.4721,
"step": 950
},
{
"epoch": 5.48,
"learning_rate": 7.265714285714286e-05,
"loss": 0.346,
"step": 960
},
{
"epoch": 5.54,
"learning_rate": 7.237142857142858e-05,
"loss": 0.6904,
"step": 970
},
{
"epoch": 5.6,
"learning_rate": 7.211428571428572e-05,
"loss": 0.6566,
"step": 980
},
{
"epoch": 5.66,
"learning_rate": 7.182857142857143e-05,
"loss": 0.3559,
"step": 990
},
{
"epoch": 5.71,
"learning_rate": 7.154285714285714e-05,
"loss": 0.2403,
"step": 1000
},
{
"epoch": 5.77,
"learning_rate": 7.125714285714286e-05,
"loss": 0.3333,
"step": 1010
},
{
"epoch": 5.83,
"learning_rate": 7.097142857142857e-05,
"loss": 0.6078,
"step": 1020
},
{
"epoch": 5.88,
"learning_rate": 7.06857142857143e-05,
"loss": 0.5469,
"step": 1030
},
{
"epoch": 5.94,
"learning_rate": 7.04e-05,
"loss": 0.327,
"step": 1040
},
{
"epoch": 6.0,
"learning_rate": 7.011428571428573e-05,
"loss": 0.6719,
"step": 1050
},
{
"epoch": 6.0,
"eval_accuracy": 0.6930692791938782,
"eval_loss": 1.4494578838348389,
"eval_runtime": 12.9771,
"eval_samples_per_second": 15.566,
"eval_steps_per_second": 7.783,
"step": 1050
},
{
"epoch": 6.06,
"learning_rate": 6.982857142857144e-05,
"loss": 0.6276,
"step": 1060
},
{
"epoch": 6.11,
"learning_rate": 6.954285714285714e-05,
"loss": 0.2453,
"step": 1070
},
{
"epoch": 6.17,
"learning_rate": 6.925714285714287e-05,
"loss": 0.4183,
"step": 1080
},
{
"epoch": 6.23,
"learning_rate": 6.897142857142858e-05,
"loss": 0.5071,
"step": 1090
},
{
"epoch": 6.28,
"learning_rate": 6.868571428571429e-05,
"loss": 0.3372,
"step": 1100
},
{
"epoch": 6.34,
"learning_rate": 6.840000000000001e-05,
"loss": 0.3326,
"step": 1110
},
{
"epoch": 6.4,
"learning_rate": 6.811428571428572e-05,
"loss": 0.6091,
"step": 1120
},
{
"epoch": 6.46,
"learning_rate": 6.782857142857143e-05,
"loss": 0.387,
"step": 1130
},
{
"epoch": 6.51,
"learning_rate": 6.754285714285714e-05,
"loss": 0.3271,
"step": 1140
},
{
"epoch": 6.57,
"learning_rate": 6.725714285714286e-05,
"loss": 0.4024,
"step": 1150
},
{
"epoch": 6.63,
"learning_rate": 6.697142857142857e-05,
"loss": 0.5858,
"step": 1160
},
{
"epoch": 6.68,
"learning_rate": 6.668571428571428e-05,
"loss": 0.3328,
"step": 1170
},
{
"epoch": 6.74,
"learning_rate": 6.64e-05,
"loss": 0.5711,
"step": 1180
},
{
"epoch": 6.8,
"learning_rate": 6.611428571428572e-05,
"loss": 0.5658,
"step": 1190
},
{
"epoch": 6.85,
"learning_rate": 6.582857142857143e-05,
"loss": 0.3108,
"step": 1200
},
{
"epoch": 6.91,
"learning_rate": 6.554285714285716e-05,
"loss": 0.4348,
"step": 1210
},
{
"epoch": 6.97,
"learning_rate": 6.525714285714287e-05,
"loss": 0.646,
"step": 1220
},
{
"epoch": 7.0,
"eval_accuracy": 0.6930692791938782,
"eval_loss": 1.4014908075332642,
"eval_runtime": 11.3752,
"eval_samples_per_second": 17.758,
"eval_steps_per_second": 8.879,
"step": 1225
},
{
"epoch": 7.03,
"learning_rate": 6.497142857142857e-05,
"loss": 0.6164,
"step": 1230
},
{
"epoch": 7.09,
"learning_rate": 6.46857142857143e-05,
"loss": 0.1188,
"step": 1240
},
{
"epoch": 7.14,
"learning_rate": 6.440000000000001e-05,
"loss": 0.7997,
"step": 1250
},
{
"epoch": 7.2,
"learning_rate": 6.411428571428572e-05,
"loss": 0.3099,
"step": 1260
},
{
"epoch": 7.26,
"learning_rate": 6.382857142857143e-05,
"loss": 0.1419,
"step": 1270
},
{
"epoch": 7.31,
"learning_rate": 6.354285714285715e-05,
"loss": 0.3644,
"step": 1280
},
{
"epoch": 7.37,
"learning_rate": 6.325714285714286e-05,
"loss": 0.2829,
"step": 1290
},
{
"epoch": 7.43,
"learning_rate": 6.297142857142857e-05,
"loss": 0.8076,
"step": 1300
},
{
"epoch": 7.48,
"learning_rate": 6.268571428571429e-05,
"loss": 0.4266,
"step": 1310
},
{
"epoch": 7.54,
"learning_rate": 6.24e-05,
"loss": 0.3325,
"step": 1320
},
{
"epoch": 7.6,
"learning_rate": 6.211428571428571e-05,
"loss": 0.8035,
"step": 1330
},
{
"epoch": 7.66,
"learning_rate": 6.182857142857143e-05,
"loss": 0.1186,
"step": 1340
},
{
"epoch": 7.71,
"learning_rate": 6.154285714285714e-05,
"loss": 0.5125,
"step": 1350
},
{
"epoch": 7.77,
"learning_rate": 6.125714285714286e-05,
"loss": 0.2002,
"step": 1360
},
{
"epoch": 7.83,
"learning_rate": 6.097142857142858e-05,
"loss": 0.147,
"step": 1370
},
{
"epoch": 7.88,
"learning_rate": 6.068571428571429e-05,
"loss": 0.4292,
"step": 1380
},
{
"epoch": 7.94,
"learning_rate": 6.04e-05,
"loss": 0.7013,
"step": 1390
},
{
"epoch": 8.0,
"learning_rate": 6.0114285714285714e-05,
"loss": 0.3072,
"step": 1400
},
{
"epoch": 8.0,
"eval_accuracy": 0.6534653306007385,
"eval_loss": 1.5413367748260498,
"eval_runtime": 11.4039,
"eval_samples_per_second": 17.713,
"eval_steps_per_second": 8.857,
"step": 1400
},
{
"epoch": 8.06,
"learning_rate": 5.9828571428571437e-05,
"loss": 0.1924,
"step": 1410
},
{
"epoch": 8.11,
"learning_rate": 5.9542857142857146e-05,
"loss": 0.2838,
"step": 1420
},
{
"epoch": 8.17,
"learning_rate": 5.9257142857142855e-05,
"loss": 0.1783,
"step": 1430
},
{
"epoch": 8.23,
"learning_rate": 5.897142857142858e-05,
"loss": 0.2159,
"step": 1440
},
{
"epoch": 8.28,
"learning_rate": 5.868571428571429e-05,
"loss": 0.3815,
"step": 1450
},
{
"epoch": 8.34,
"learning_rate": 5.8399999999999997e-05,
"loss": 0.3401,
"step": 1460
},
{
"epoch": 8.4,
"learning_rate": 5.811428571428572e-05,
"loss": 0.2045,
"step": 1470
},
{
"epoch": 8.46,
"learning_rate": 5.782857142857143e-05,
"loss": 0.0864,
"step": 1480
},
{
"epoch": 8.51,
"learning_rate": 5.7542857142857145e-05,
"loss": 0.2836,
"step": 1490
},
{
"epoch": 8.57,
"learning_rate": 5.725714285714287e-05,
"loss": 0.1675,
"step": 1500
},
{
"epoch": 8.63,
"learning_rate": 5.697142857142858e-05,
"loss": 0.4174,
"step": 1510
},
{
"epoch": 8.68,
"learning_rate": 5.6685714285714286e-05,
"loss": 0.5875,
"step": 1520
},
{
"epoch": 8.74,
"learning_rate": 5.6399999999999995e-05,
"loss": 0.1532,
"step": 1530
},
{
"epoch": 8.8,
"learning_rate": 5.611428571428572e-05,
"loss": 0.2927,
"step": 1540
},
{
"epoch": 8.85,
"learning_rate": 5.582857142857143e-05,
"loss": 0.1527,
"step": 1550
},
{
"epoch": 8.91,
"learning_rate": 5.5542857142857143e-05,
"loss": 0.2842,
"step": 1560
},
{
"epoch": 8.97,
"learning_rate": 5.525714285714286e-05,
"loss": 0.3331,
"step": 1570
},
{
"epoch": 9.0,
"eval_accuracy": 0.6930692791938782,
"eval_loss": 1.759947657585144,
"eval_runtime": 12.7286,
"eval_samples_per_second": 15.87,
"eval_steps_per_second": 7.935,
"step": 1575
},
{
"epoch": 9.03,
"learning_rate": 5.4971428571428576e-05,
"loss": 0.3041,
"step": 1580
},
{
"epoch": 9.09,
"learning_rate": 5.4685714285714285e-05,
"loss": 0.2894,
"step": 1590
},
{
"epoch": 9.14,
"learning_rate": 5.440000000000001e-05,
"loss": 0.2129,
"step": 1600
},
{
"epoch": 9.2,
"learning_rate": 5.411428571428572e-05,
"loss": 0.3424,
"step": 1610
},
{
"epoch": 9.26,
"learning_rate": 5.3828571428571426e-05,
"loss": 0.0508,
"step": 1620
},
{
"epoch": 9.31,
"learning_rate": 5.354285714285715e-05,
"loss": 0.3036,
"step": 1630
},
{
"epoch": 9.37,
"learning_rate": 5.325714285714286e-05,
"loss": 0.4638,
"step": 1640
},
{
"epoch": 9.43,
"learning_rate": 5.2971428571428574e-05,
"loss": 0.3329,
"step": 1650
},
{
"epoch": 9.48,
"learning_rate": 5.2685714285714284e-05,
"loss": 0.0781,
"step": 1660
},
{
"epoch": 9.54,
"learning_rate": 5.2400000000000007e-05,
"loss": 0.1371,
"step": 1670
},
{
"epoch": 9.6,
"learning_rate": 5.2114285714285716e-05,
"loss": 0.244,
"step": 1680
},
{
"epoch": 9.66,
"learning_rate": 5.1828571428571425e-05,
"loss": 0.4502,
"step": 1690
},
{
"epoch": 9.71,
"learning_rate": 5.154285714285715e-05,
"loss": 0.4222,
"step": 1700
},
{
"epoch": 9.77,
"learning_rate": 5.125714285714286e-05,
"loss": 0.4389,
"step": 1710
},
{
"epoch": 9.83,
"learning_rate": 5.097142857142857e-05,
"loss": 0.3595,
"step": 1720
},
{
"epoch": 9.88,
"learning_rate": 5.068571428571429e-05,
"loss": 0.2946,
"step": 1730
},
{
"epoch": 9.94,
"learning_rate": 5.0400000000000005e-05,
"loss": 0.3272,
"step": 1740
},
{
"epoch": 10.0,
"learning_rate": 5.0114285714285715e-05,
"loss": 0.3357,
"step": 1750
},
{
"epoch": 10.0,
"eval_accuracy": 0.7475247383117676,
"eval_loss": 1.4022135734558105,
"eval_runtime": 12.0818,
"eval_samples_per_second": 16.719,
"eval_steps_per_second": 8.36,
"step": 1750
},
{
"epoch": 10.06,
"learning_rate": 4.982857142857143e-05,
"loss": 0.1572,
"step": 1760
},
{
"epoch": 10.11,
"learning_rate": 4.954285714285715e-05,
"loss": 0.129,
"step": 1770
},
{
"epoch": 10.17,
"learning_rate": 4.9257142857142856e-05,
"loss": 0.0891,
"step": 1780
},
{
"epoch": 10.23,
"learning_rate": 4.897142857142857e-05,
"loss": 0.27,
"step": 1790
},
{
"epoch": 10.28,
"learning_rate": 4.868571428571429e-05,
"loss": 0.1743,
"step": 1800
},
{
"epoch": 10.34,
"learning_rate": 4.8400000000000004e-05,
"loss": 0.2713,
"step": 1810
},
{
"epoch": 10.4,
"learning_rate": 4.811428571428572e-05,
"loss": 0.0383,
"step": 1820
},
{
"epoch": 10.46,
"learning_rate": 4.782857142857143e-05,
"loss": 0.0301,
"step": 1830
},
{
"epoch": 10.51,
"learning_rate": 4.7542857142857146e-05,
"loss": 0.0353,
"step": 1840
},
{
"epoch": 10.57,
"learning_rate": 4.725714285714286e-05,
"loss": 0.238,
"step": 1850
},
{
"epoch": 10.63,
"learning_rate": 4.697142857142857e-05,
"loss": 0.2494,
"step": 1860
},
{
"epoch": 10.68,
"learning_rate": 4.668571428571429e-05,
"loss": 0.2638,
"step": 1870
},
{
"epoch": 10.74,
"learning_rate": 4.64e-05,
"loss": 0.2013,
"step": 1880
},
{
"epoch": 10.8,
"learning_rate": 4.611428571428571e-05,
"loss": 0.0893,
"step": 1890
},
{
"epoch": 10.85,
"learning_rate": 4.5828571428571435e-05,
"loss": 0.4689,
"step": 1900
},
{
"epoch": 10.91,
"learning_rate": 4.5542857142857144e-05,
"loss": 0.2516,
"step": 1910
},
{
"epoch": 10.97,
"learning_rate": 4.525714285714286e-05,
"loss": 0.2441,
"step": 1920
},
{
"epoch": 11.0,
"eval_accuracy": 0.7425742745399475,
"eval_loss": 1.6350345611572266,
"eval_runtime": 11.7948,
"eval_samples_per_second": 17.126,
"eval_steps_per_second": 8.563,
"step": 1925
},
{
"epoch": 11.03,
"learning_rate": 4.4971428571428576e-05,
"loss": 0.0519,
"step": 1930
},
{
"epoch": 11.09,
"learning_rate": 4.4685714285714286e-05,
"loss": 0.1633,
"step": 1940
},
{
"epoch": 11.14,
"learning_rate": 4.44e-05,
"loss": 0.1561,
"step": 1950
},
{
"epoch": 11.2,
"learning_rate": 4.411428571428572e-05,
"loss": 0.0328,
"step": 1960
},
{
"epoch": 11.26,
"learning_rate": 4.382857142857143e-05,
"loss": 0.1487,
"step": 1970
},
{
"epoch": 11.31,
"learning_rate": 4.354285714285714e-05,
"loss": 0.05,
"step": 1980
},
{
"epoch": 11.37,
"learning_rate": 4.325714285714286e-05,
"loss": 0.2281,
"step": 1990
},
{
"epoch": 11.43,
"learning_rate": 4.2971428571428575e-05,
"loss": 0.1016,
"step": 2000
},
{
"epoch": 11.48,
"learning_rate": 4.268571428571429e-05,
"loss": 0.3914,
"step": 2010
},
{
"epoch": 11.54,
"learning_rate": 4.24e-05,
"loss": 0.5323,
"step": 2020
},
{
"epoch": 11.6,
"learning_rate": 4.211428571428572e-05,
"loss": 0.0534,
"step": 2030
},
{
"epoch": 11.66,
"learning_rate": 4.1828571428571426e-05,
"loss": 0.1185,
"step": 2040
},
{
"epoch": 11.71,
"learning_rate": 4.154285714285714e-05,
"loss": 0.104,
"step": 2050
},
{
"epoch": 11.77,
"learning_rate": 4.125714285714286e-05,
"loss": 0.2268,
"step": 2060
},
{
"epoch": 11.83,
"learning_rate": 4.0971428571428574e-05,
"loss": 0.1499,
"step": 2070
},
{
"epoch": 11.88,
"learning_rate": 4.068571428571429e-05,
"loss": 0.0944,
"step": 2080
},
{
"epoch": 11.94,
"learning_rate": 4.0400000000000006e-05,
"loss": 0.0604,
"step": 2090
},
{
"epoch": 12.0,
"learning_rate": 4.0114285714285715e-05,
"loss": 0.1318,
"step": 2100
},
{
"epoch": 12.0,
"eval_accuracy": 0.6881188154220581,
"eval_loss": 1.895858645439148,
"eval_runtime": 13.4328,
"eval_samples_per_second": 15.038,
"eval_steps_per_second": 7.519,
"step": 2100
},
{
"epoch": 12.06,
"learning_rate": 3.982857142857143e-05,
"loss": 0.035,
"step": 2110
},
{
"epoch": 12.11,
"learning_rate": 3.954285714285714e-05,
"loss": 0.1331,
"step": 2120
},
{
"epoch": 12.17,
"learning_rate": 3.925714285714286e-05,
"loss": 0.3371,
"step": 2130
},
{
"epoch": 12.23,
"learning_rate": 3.897142857142857e-05,
"loss": 0.0372,
"step": 2140
},
{
"epoch": 12.28,
"learning_rate": 3.868571428571429e-05,
"loss": 0.1479,
"step": 2150
},
{
"epoch": 12.34,
"learning_rate": 3.8400000000000005e-05,
"loss": 0.0245,
"step": 2160
},
{
"epoch": 12.4,
"learning_rate": 3.8114285714285714e-05,
"loss": 0.1451,
"step": 2170
},
{
"epoch": 12.46,
"learning_rate": 3.782857142857143e-05,
"loss": 0.3234,
"step": 2180
},
{
"epoch": 12.51,
"learning_rate": 3.7542857142857146e-05,
"loss": 0.196,
"step": 2190
},
{
"epoch": 12.57,
"learning_rate": 3.7257142857142856e-05,
"loss": 0.1208,
"step": 2200
},
{
"epoch": 12.63,
"learning_rate": 3.697142857142857e-05,
"loss": 0.1025,
"step": 2210
},
{
"epoch": 12.68,
"learning_rate": 3.668571428571429e-05,
"loss": 0.1806,
"step": 2220
},
{
"epoch": 12.74,
"learning_rate": 3.6400000000000004e-05,
"loss": 0.1551,
"step": 2230
},
{
"epoch": 12.8,
"learning_rate": 3.611428571428572e-05,
"loss": 0.1155,
"step": 2240
},
{
"epoch": 12.85,
"learning_rate": 3.582857142857143e-05,
"loss": 0.0046,
"step": 2250
},
{
"epoch": 12.91,
"learning_rate": 3.5542857142857145e-05,
"loss": 0.0258,
"step": 2260
},
{
"epoch": 12.97,
"learning_rate": 3.525714285714286e-05,
"loss": 0.1937,
"step": 2270
},
{
"epoch": 13.0,
"eval_accuracy": 0.7029703259468079,
"eval_loss": 2.013838291168213,
"eval_runtime": 12.0463,
"eval_samples_per_second": 16.769,
"eval_steps_per_second": 8.384,
"step": 2275
},
{
"epoch": 13.03,
"learning_rate": 3.497142857142857e-05,
"loss": 0.0615,
"step": 2280
},
{
"epoch": 13.09,
"learning_rate": 3.468571428571429e-05,
"loss": 0.1725,
"step": 2290
},
{
"epoch": 13.14,
"learning_rate": 3.4399999999999996e-05,
"loss": 0.0289,
"step": 2300
},
{
"epoch": 13.2,
"learning_rate": 3.411428571428571e-05,
"loss": 0.173,
"step": 2310
},
{
"epoch": 13.26,
"learning_rate": 3.3828571428571435e-05,
"loss": 0.0992,
"step": 2320
},
{
"epoch": 13.31,
"learning_rate": 3.3542857142857144e-05,
"loss": 0.1459,
"step": 2330
},
{
"epoch": 13.37,
"learning_rate": 3.325714285714286e-05,
"loss": 0.0768,
"step": 2340
},
{
"epoch": 13.43,
"learning_rate": 3.2971428571428576e-05,
"loss": 0.0721,
"step": 2350
},
{
"epoch": 13.48,
"learning_rate": 3.2685714285714285e-05,
"loss": 0.002,
"step": 2360
},
{
"epoch": 13.54,
"learning_rate": 3.24e-05,
"loss": 0.0738,
"step": 2370
},
{
"epoch": 13.6,
"learning_rate": 3.211428571428571e-05,
"loss": 0.007,
"step": 2380
},
{
"epoch": 13.66,
"learning_rate": 3.182857142857143e-05,
"loss": 0.1269,
"step": 2390
},
{
"epoch": 13.71,
"learning_rate": 3.154285714285714e-05,
"loss": 0.1667,
"step": 2400
},
{
"epoch": 13.77,
"learning_rate": 3.125714285714286e-05,
"loss": 0.003,
"step": 2410
},
{
"epoch": 13.83,
"learning_rate": 3.0971428571428575e-05,
"loss": 0.2794,
"step": 2420
},
{
"epoch": 13.88,
"learning_rate": 3.068571428571429e-05,
"loss": 0.0959,
"step": 2430
},
{
"epoch": 13.94,
"learning_rate": 3.04e-05,
"loss": 0.1878,
"step": 2440
},
{
"epoch": 14.0,
"learning_rate": 3.0114285714285716e-05,
"loss": 0.0164,
"step": 2450
},
{
"epoch": 14.0,
"eval_accuracy": 0.7079207897186279,
"eval_loss": 2.0977747440338135,
"eval_runtime": 11.5146,
"eval_samples_per_second": 17.543,
"eval_steps_per_second": 8.771,
"step": 2450
},
{
"epoch": 14.06,
"learning_rate": 2.982857142857143e-05,
"loss": 0.0451,
"step": 2460
},
{
"epoch": 14.11,
"learning_rate": 2.9542857142857145e-05,
"loss": 0.1998,
"step": 2470
},
{
"epoch": 14.17,
"learning_rate": 2.925714285714286e-05,
"loss": 0.0231,
"step": 2480
},
{
"epoch": 14.23,
"learning_rate": 2.897142857142857e-05,
"loss": 0.0211,
"step": 2490
},
{
"epoch": 14.28,
"learning_rate": 2.8685714285714286e-05,
"loss": 0.2257,
"step": 2500
},
{
"epoch": 14.34,
"learning_rate": 2.84e-05,
"loss": 0.0013,
"step": 2510
},
{
"epoch": 14.4,
"learning_rate": 2.8114285714285715e-05,
"loss": 0.2982,
"step": 2520
},
{
"epoch": 14.46,
"learning_rate": 2.782857142857143e-05,
"loss": 0.2192,
"step": 2530
},
{
"epoch": 14.51,
"learning_rate": 2.7542857142857144e-05,
"loss": 0.18,
"step": 2540
},
{
"epoch": 14.57,
"learning_rate": 2.725714285714286e-05,
"loss": 0.0076,
"step": 2550
},
{
"epoch": 14.63,
"learning_rate": 2.6971428571428576e-05,
"loss": 0.0029,
"step": 2560
},
{
"epoch": 14.68,
"learning_rate": 2.6685714285714285e-05,
"loss": 0.0309,
"step": 2570
},
{
"epoch": 14.74,
"learning_rate": 2.64e-05,
"loss": 0.079,
"step": 2580
},
{
"epoch": 14.8,
"learning_rate": 2.6114285714285714e-05,
"loss": 0.0142,
"step": 2590
},
{
"epoch": 14.85,
"learning_rate": 2.582857142857143e-05,
"loss": 0.0451,
"step": 2600
},
{
"epoch": 14.91,
"learning_rate": 2.5542857142857146e-05,
"loss": 0.0036,
"step": 2610
},
{
"epoch": 14.97,
"learning_rate": 2.5257142857142855e-05,
"loss": 0.1794,
"step": 2620
},
{
"epoch": 15.0,
"eval_accuracy": 0.7178217768669128,
"eval_loss": 1.9837726354599,
"eval_runtime": 11.491,
"eval_samples_per_second": 17.579,
"eval_steps_per_second": 8.789,
"step": 2625
},
{
"epoch": 15.03,
"learning_rate": 2.4971428571428575e-05,
"loss": 0.1493,
"step": 2630
},
{
"epoch": 15.09,
"learning_rate": 2.4685714285714288e-05,
"loss": 0.115,
"step": 2640
},
{
"epoch": 15.14,
"learning_rate": 2.44e-05,
"loss": 0.005,
"step": 2650
},
{
"epoch": 15.2,
"learning_rate": 2.4114285714285713e-05,
"loss": 0.1421,
"step": 2660
},
{
"epoch": 15.26,
"learning_rate": 2.3828571428571432e-05,
"loss": 0.0637,
"step": 2670
},
{
"epoch": 15.31,
"learning_rate": 2.3542857142857145e-05,
"loss": 0.0165,
"step": 2680
},
{
"epoch": 15.37,
"learning_rate": 2.3257142857142858e-05,
"loss": 0.0551,
"step": 2690
},
{
"epoch": 15.43,
"learning_rate": 2.297142857142857e-05,
"loss": 0.0804,
"step": 2700
},
{
"epoch": 15.48,
"learning_rate": 2.2685714285714286e-05,
"loss": 0.1237,
"step": 2710
},
{
"epoch": 15.54,
"learning_rate": 2.2400000000000002e-05,
"loss": 0.154,
"step": 2720
},
{
"epoch": 15.6,
"learning_rate": 2.2114285714285715e-05,
"loss": 0.0109,
"step": 2730
},
{
"epoch": 15.66,
"learning_rate": 2.1828571428571428e-05,
"loss": 0.0115,
"step": 2740
},
{
"epoch": 15.71,
"learning_rate": 2.1542857142857144e-05,
"loss": 0.1456,
"step": 2750
},
{
"epoch": 15.77,
"learning_rate": 2.125714285714286e-05,
"loss": 0.0106,
"step": 2760
},
{
"epoch": 15.83,
"learning_rate": 2.0971428571428572e-05,
"loss": 0.0021,
"step": 2770
},
{
"epoch": 15.88,
"learning_rate": 2.0685714285714285e-05,
"loss": 0.0102,
"step": 2780
},
{
"epoch": 15.94,
"learning_rate": 2.04e-05,
"loss": 0.2036,
"step": 2790
},
{
"epoch": 16.0,
"learning_rate": 2.0114285714285717e-05,
"loss": 0.0257,
"step": 2800
},
{
"epoch": 16.0,
"eval_accuracy": 0.7178217768669128,
"eval_loss": 1.9555984735488892,
"eval_runtime": 13.1046,
"eval_samples_per_second": 15.414,
"eval_steps_per_second": 7.707,
"step": 2800
},
{
"epoch": 16.06,
"learning_rate": 1.982857142857143e-05,
"loss": 0.0406,
"step": 2810
},
{
"epoch": 16.11,
"learning_rate": 1.9542857142857143e-05,
"loss": 0.2677,
"step": 2820
},
{
"epoch": 16.17,
"learning_rate": 1.9257142857142855e-05,
"loss": 0.3214,
"step": 2830
},
{
"epoch": 16.23,
"learning_rate": 1.8971428571428575e-05,
"loss": 0.035,
"step": 2840
},
{
"epoch": 16.28,
"learning_rate": 1.8685714285714287e-05,
"loss": 0.0105,
"step": 2850
},
{
"epoch": 16.34,
"learning_rate": 1.84e-05,
"loss": 0.0205,
"step": 2860
},
{
"epoch": 16.4,
"learning_rate": 1.8114285714285713e-05,
"loss": 0.0006,
"step": 2870
},
{
"epoch": 16.46,
"learning_rate": 1.7828571428571432e-05,
"loss": 0.2564,
"step": 2880
},
{
"epoch": 16.51,
"learning_rate": 1.7542857142857145e-05,
"loss": 0.0396,
"step": 2890
},
{
"epoch": 16.57,
"learning_rate": 1.7257142857142857e-05,
"loss": 0.0237,
"step": 2900
},
{
"epoch": 16.63,
"learning_rate": 1.697142857142857e-05,
"loss": 0.0028,
"step": 2910
},
{
"epoch": 16.68,
"learning_rate": 1.6685714285714286e-05,
"loss": 0.2431,
"step": 2920
},
{
"epoch": 16.74,
"learning_rate": 1.6400000000000002e-05,
"loss": 0.0013,
"step": 2930
},
{
"epoch": 16.8,
"learning_rate": 1.6114285714285715e-05,
"loss": 0.362,
"step": 2940
},
{
"epoch": 16.85,
"learning_rate": 1.5828571428571428e-05,
"loss": 0.031,
"step": 2950
},
{
"epoch": 16.91,
"learning_rate": 1.5542857142857144e-05,
"loss": 0.1515,
"step": 2960
},
{
"epoch": 16.97,
"learning_rate": 1.5257142857142858e-05,
"loss": 0.1409,
"step": 2970
},
{
"epoch": 17.0,
"eval_accuracy": 0.6930692791938782,
"eval_loss": 2.0634045600891113,
"eval_runtime": 11.0027,
"eval_samples_per_second": 18.359,
"eval_steps_per_second": 9.18,
"step": 2975
},
{
"epoch": 17.03,
"learning_rate": 1.4971428571428572e-05,
"loss": 0.0622,
"step": 2980
},
{
"epoch": 17.09,
"learning_rate": 1.4685714285714287e-05,
"loss": 0.0295,
"step": 2990
},
{
"epoch": 17.14,
"learning_rate": 1.44e-05,
"loss": 0.045,
"step": 3000
},
{
"epoch": 17.2,
"learning_rate": 1.4114285714285715e-05,
"loss": 0.0384,
"step": 3010
},
{
"epoch": 17.26,
"learning_rate": 1.382857142857143e-05,
"loss": 0.0035,
"step": 3020
},
{
"epoch": 17.31,
"learning_rate": 1.3542857142857142e-05,
"loss": 0.0014,
"step": 3030
},
{
"epoch": 17.37,
"learning_rate": 1.3257142857142857e-05,
"loss": 0.1624,
"step": 3040
},
{
"epoch": 17.43,
"learning_rate": 1.2971428571428573e-05,
"loss": 0.0309,
"step": 3050
},
{
"epoch": 17.48,
"learning_rate": 1.2685714285714287e-05,
"loss": 0.3965,
"step": 3060
},
{
"epoch": 17.54,
"learning_rate": 1.24e-05,
"loss": 0.2438,
"step": 3070
},
{
"epoch": 17.6,
"learning_rate": 1.2114285714285716e-05,
"loss": 0.0551,
"step": 3080
},
{
"epoch": 17.66,
"learning_rate": 1.1828571428571429e-05,
"loss": 0.0676,
"step": 3090
},
{
"epoch": 17.71,
"learning_rate": 1.1542857142857143e-05,
"loss": 0.0025,
"step": 3100
},
{
"epoch": 17.77,
"learning_rate": 1.1257142857142857e-05,
"loss": 0.0031,
"step": 3110
},
{
"epoch": 17.83,
"learning_rate": 1.0971428571428572e-05,
"loss": 0.0166,
"step": 3120
},
{
"epoch": 17.88,
"learning_rate": 1.0685714285714286e-05,
"loss": 0.2534,
"step": 3130
},
{
"epoch": 17.94,
"learning_rate": 1.04e-05,
"loss": 0.002,
"step": 3140
},
{
"epoch": 18.0,
"learning_rate": 1.0114285714285715e-05,
"loss": 0.0123,
"step": 3150
},
{
"epoch": 18.0,
"eval_accuracy": 0.698019802570343,
"eval_loss": 2.1222872734069824,
"eval_runtime": 11.8575,
"eval_samples_per_second": 17.036,
"eval_steps_per_second": 8.518,
"step": 3150
},
{
"epoch": 18.06,
"learning_rate": 9.828571428571429e-06,
"loss": 0.1595,
"step": 3160
},
{
"epoch": 18.11,
"learning_rate": 9.542857142857143e-06,
"loss": 0.0442,
"step": 3170
},
{
"epoch": 18.17,
"learning_rate": 9.257142857142858e-06,
"loss": 0.2398,
"step": 3180
},
{
"epoch": 18.23,
"learning_rate": 8.971428571428572e-06,
"loss": 0.0063,
"step": 3190
},
{
"epoch": 18.28,
"learning_rate": 8.685714285714287e-06,
"loss": 0.1119,
"step": 3200
},
{
"epoch": 18.34,
"learning_rate": 8.400000000000001e-06,
"loss": 0.0093,
"step": 3210
},
{
"epoch": 18.4,
"learning_rate": 8.114285714285715e-06,
"loss": 0.0219,
"step": 3220
},
{
"epoch": 18.46,
"learning_rate": 7.82857142857143e-06,
"loss": 0.0055,
"step": 3230
},
{
"epoch": 18.51,
"learning_rate": 7.542857142857143e-06,
"loss": 0.0329,
"step": 3240
},
{
"epoch": 18.57,
"learning_rate": 7.257142857142857e-06,
"loss": 0.0038,
"step": 3250
},
{
"epoch": 18.63,
"learning_rate": 6.971428571428572e-06,
"loss": 0.0437,
"step": 3260
},
{
"epoch": 18.68,
"learning_rate": 6.685714285714285e-06,
"loss": 0.0081,
"step": 3270
},
{
"epoch": 18.74,
"learning_rate": 6.4000000000000006e-06,
"loss": 0.0346,
"step": 3280
},
{
"epoch": 18.8,
"learning_rate": 6.114285714285715e-06,
"loss": 0.0025,
"step": 3290
},
{
"epoch": 18.85,
"learning_rate": 5.828571428571429e-06,
"loss": 0.2022,
"step": 3300
},
{
"epoch": 18.91,
"learning_rate": 5.542857142857144e-06,
"loss": 0.0005,
"step": 3310
},
{
"epoch": 18.97,
"learning_rate": 5.257142857142858e-06,
"loss": 0.0476,
"step": 3320
},
{
"epoch": 19.0,
"eval_accuracy": 0.7277227640151978,
"eval_loss": 1.9925730228424072,
"eval_runtime": 11.4361,
"eval_samples_per_second": 17.663,
"eval_steps_per_second": 8.832,
"step": 3325
},
{
"epoch": 19.03,
"learning_rate": 4.9714285714285715e-06,
"loss": 0.0063,
"step": 3330
},
{
"epoch": 19.09,
"learning_rate": 4.685714285714286e-06,
"loss": 0.0022,
"step": 3340
},
{
"epoch": 19.14,
"learning_rate": 4.4e-06,
"loss": 0.0207,
"step": 3350
},
{
"epoch": 19.2,
"learning_rate": 4.114285714285715e-06,
"loss": 0.1322,
"step": 3360
},
{
"epoch": 19.26,
"learning_rate": 3.828571428571429e-06,
"loss": 0.3377,
"step": 3370
},
{
"epoch": 19.31,
"learning_rate": 3.542857142857143e-06,
"loss": 0.0005,
"step": 3380
},
{
"epoch": 19.37,
"learning_rate": 3.2571428571428572e-06,
"loss": 0.001,
"step": 3390
},
{
"epoch": 19.43,
"learning_rate": 2.9714285714285716e-06,
"loss": 0.0004,
"step": 3400
},
{
"epoch": 19.48,
"learning_rate": 2.685714285714286e-06,
"loss": 0.1235,
"step": 3410
},
{
"epoch": 19.54,
"learning_rate": 2.4000000000000003e-06,
"loss": 0.006,
"step": 3420
},
{
"epoch": 19.6,
"learning_rate": 2.1142857142857147e-06,
"loss": 0.0027,
"step": 3430
},
{
"epoch": 19.66,
"learning_rate": 1.8285714285714288e-06,
"loss": 0.001,
"step": 3440
},
{
"epoch": 19.71,
"learning_rate": 1.542857142857143e-06,
"loss": 0.0032,
"step": 3450
},
{
"epoch": 19.77,
"learning_rate": 1.2571428571428573e-06,
"loss": 0.0011,
"step": 3460
},
{
"epoch": 19.83,
"learning_rate": 9.714285714285715e-07,
"loss": 0.0029,
"step": 3470
},
{
"epoch": 19.88,
"learning_rate": 6.857142857142857e-07,
"loss": 0.0141,
"step": 3480
},
{
"epoch": 19.94,
"learning_rate": 4.0000000000000003e-07,
"loss": 0.0007,
"step": 3490
},
{
"epoch": 20.0,
"learning_rate": 1.142857142857143e-07,
"loss": 0.0006,
"step": 3500
},
{
"epoch": 20.0,
"eval_accuracy": 0.7277227640151978,
"eval_loss": 1.9778043031692505,
"eval_runtime": 12.1861,
"eval_samples_per_second": 16.576,
"eval_steps_per_second": 8.288,
"step": 3500
}
],
"max_steps": 3500,
"num_train_epochs": 20,
"total_flos": 2.418962508100631e+18,
"trial_name": null,
"trial_params": null
}