|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.997150997150996, |
|
"global_step": 3500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.971428571428571e-05, |
|
"loss": 1.6461, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.942857142857144e-05, |
|
"loss": 1.5964, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.914285714285715e-05, |
|
"loss": 1.52, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.885714285714286e-05, |
|
"loss": 1.5243, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.857142857142858e-05, |
|
"loss": 1.4323, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 9.831428571428572e-05, |
|
"loss": 1.6594, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.802857142857143e-05, |
|
"loss": 1.472, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.774285714285715e-05, |
|
"loss": 1.4544, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.745714285714286e-05, |
|
"loss": 1.4888, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.72e-05, |
|
"loss": 1.3526, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.691428571428573e-05, |
|
"loss": 1.3166, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.662857142857144e-05, |
|
"loss": 1.283, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.634285714285715e-05, |
|
"loss": 1.3889, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.605714285714286e-05, |
|
"loss": 1.2688, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 9.577142857142858e-05, |
|
"loss": 1.047, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 9.548571428571429e-05, |
|
"loss": 1.3696, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.522857142857143e-05, |
|
"loss": 1.3446, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.4554455578327179, |
|
"eval_loss": 1.536527156829834, |
|
"eval_runtime": 11.3063, |
|
"eval_samples_per_second": 17.866, |
|
"eval_steps_per_second": 8.933, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 9.494285714285714e-05, |
|
"loss": 0.8856, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.465714285714286e-05, |
|
"loss": 1.2261, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 9.437142857142857e-05, |
|
"loss": 1.1836, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 9.40857142857143e-05, |
|
"loss": 1.3243, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 9.38e-05, |
|
"loss": 0.747, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 9.351428571428573e-05, |
|
"loss": 0.9695, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 9.322857142857144e-05, |
|
"loss": 1.0319, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 9.294285714285714e-05, |
|
"loss": 1.0031, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 9.265714285714287e-05, |
|
"loss": 1.1495, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 9.237142857142858e-05, |
|
"loss": 0.9622, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.208571428571429e-05, |
|
"loss": 0.9744, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 9.180000000000001e-05, |
|
"loss": 1.1524, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 9.151428571428572e-05, |
|
"loss": 0.9051, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 9.122857142857143e-05, |
|
"loss": 1.1604, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 9.094285714285715e-05, |
|
"loss": 1.2363, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 9.065714285714286e-05, |
|
"loss": 1.139, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 9.037142857142857e-05, |
|
"loss": 1.1337, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 9.008571428571429e-05, |
|
"loss": 0.9654, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6237623691558838, |
|
"eval_loss": 1.0576841831207275, |
|
"eval_runtime": 11.4424, |
|
"eval_samples_per_second": 17.654, |
|
"eval_steps_per_second": 8.827, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 8.98e-05, |
|
"loss": 1.0752, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 8.951428571428572e-05, |
|
"loss": 0.8605, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.922857142857143e-05, |
|
"loss": 0.8216, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 8.894285714285716e-05, |
|
"loss": 1.2676, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 8.865714285714287e-05, |
|
"loss": 0.9299, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 8.837142857142857e-05, |
|
"loss": 0.9775, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 8.80857142857143e-05, |
|
"loss": 0.7707, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 8.78e-05, |
|
"loss": 0.8136, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 8.751428571428572e-05, |
|
"loss": 1.0706, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 8.722857142857144e-05, |
|
"loss": 0.8833, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 8.694285714285715e-05, |
|
"loss": 0.9111, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 8.665714285714286e-05, |
|
"loss": 0.6969, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 8.637142857142858e-05, |
|
"loss": 1.1425, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 8.608571428571429e-05, |
|
"loss": 1.0298, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 8.58e-05, |
|
"loss": 0.91, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 8.551428571428571e-05, |
|
"loss": 0.8342, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 8.522857142857143e-05, |
|
"loss": 0.8069, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6386138796806335, |
|
"eval_loss": 0.9259141683578491, |
|
"eval_runtime": 11.2435, |
|
"eval_samples_per_second": 17.966, |
|
"eval_steps_per_second": 8.983, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 8.494285714285714e-05, |
|
"loss": 0.8804, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 8.465714285714286e-05, |
|
"loss": 0.7882, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 8.437142857142859e-05, |
|
"loss": 0.5915, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 8.40857142857143e-05, |
|
"loss": 0.5102, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 8.38e-05, |
|
"loss": 0.6473, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 8.351428571428573e-05, |
|
"loss": 0.7545, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 8.322857142857144e-05, |
|
"loss": 0.5438, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 8.294285714285715e-05, |
|
"loss": 0.8545, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 8.265714285714287e-05, |
|
"loss": 0.563, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 8.237142857142858e-05, |
|
"loss": 0.7048, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 8.208571428571429e-05, |
|
"loss": 1.019, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 8.18e-05, |
|
"loss": 0.5084, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 8.151428571428572e-05, |
|
"loss": 0.7297, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 8.122857142857143e-05, |
|
"loss": 0.4933, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 8.094285714285714e-05, |
|
"loss": 0.5224, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 8.065714285714286e-05, |
|
"loss": 0.5695, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 8.037142857142857e-05, |
|
"loss": 0.4603, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 8.008571428571429e-05, |
|
"loss": 1.1203, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6831682920455933, |
|
"eval_loss": 1.0746746063232422, |
|
"eval_runtime": 11.8499, |
|
"eval_samples_per_second": 17.047, |
|
"eval_steps_per_second": 8.523, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 7.98e-05, |
|
"loss": 0.6816, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 7.951428571428572e-05, |
|
"loss": 0.629, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 7.922857142857143e-05, |
|
"loss": 0.6881, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 7.894285714285716e-05, |
|
"loss": 0.8816, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 7.865714285714287e-05, |
|
"loss": 0.4466, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 7.837142857142858e-05, |
|
"loss": 0.721, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 7.808571428571428e-05, |
|
"loss": 0.8953, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 7.780000000000001e-05, |
|
"loss": 0.4612, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 7.751428571428572e-05, |
|
"loss": 0.5196, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 7.722857142857143e-05, |
|
"loss": 0.62, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 7.694285714285715e-05, |
|
"loss": 0.3506, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 7.665714285714286e-05, |
|
"loss": 0.2639, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 7.637142857142857e-05, |
|
"loss": 0.9862, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 7.608571428571429e-05, |
|
"loss": 0.6958, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 7.58e-05, |
|
"loss": 0.5734, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 7.551428571428571e-05, |
|
"loss": 0.6894, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 7.522857142857143e-05, |
|
"loss": 0.3681, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7029703259468079, |
|
"eval_loss": 1.0062588453292847, |
|
"eval_runtime": 11.6332, |
|
"eval_samples_per_second": 17.364, |
|
"eval_steps_per_second": 8.682, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 7.494285714285715e-05, |
|
"loss": 0.4552, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 7.465714285714286e-05, |
|
"loss": 0.3066, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 7.437142857142857e-05, |
|
"loss": 0.691, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 7.40857142857143e-05, |
|
"loss": 0.5948, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 7.38e-05, |
|
"loss": 0.2739, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 7.351428571428571e-05, |
|
"loss": 0.29, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 7.322857142857144e-05, |
|
"loss": 0.4972, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 7.294285714285715e-05, |
|
"loss": 0.4721, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 7.265714285714286e-05, |
|
"loss": 0.346, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 7.237142857142858e-05, |
|
"loss": 0.6904, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 7.211428571428572e-05, |
|
"loss": 0.6566, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 7.182857142857143e-05, |
|
"loss": 0.3559, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 7.154285714285714e-05, |
|
"loss": 0.2403, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 7.125714285714286e-05, |
|
"loss": 0.3333, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 7.097142857142857e-05, |
|
"loss": 0.6078, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 7.06857142857143e-05, |
|
"loss": 0.5469, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 7.04e-05, |
|
"loss": 0.327, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 7.011428571428573e-05, |
|
"loss": 0.6719, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6930692791938782, |
|
"eval_loss": 1.4494578838348389, |
|
"eval_runtime": 12.9771, |
|
"eval_samples_per_second": 15.566, |
|
"eval_steps_per_second": 7.783, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 6.982857142857144e-05, |
|
"loss": 0.6276, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 6.954285714285714e-05, |
|
"loss": 0.2453, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 6.925714285714287e-05, |
|
"loss": 0.4183, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 6.897142857142858e-05, |
|
"loss": 0.5071, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 6.868571428571429e-05, |
|
"loss": 0.3372, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 6.840000000000001e-05, |
|
"loss": 0.3326, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 6.811428571428572e-05, |
|
"loss": 0.6091, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 6.782857142857143e-05, |
|
"loss": 0.387, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 6.754285714285714e-05, |
|
"loss": 0.3271, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 6.725714285714286e-05, |
|
"loss": 0.4024, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 6.697142857142857e-05, |
|
"loss": 0.5858, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 6.668571428571428e-05, |
|
"loss": 0.3328, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 6.64e-05, |
|
"loss": 0.5711, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 6.611428571428572e-05, |
|
"loss": 0.5658, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 6.582857142857143e-05, |
|
"loss": 0.3108, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 6.554285714285716e-05, |
|
"loss": 0.4348, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 6.525714285714287e-05, |
|
"loss": 0.646, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6930692791938782, |
|
"eval_loss": 1.4014908075332642, |
|
"eval_runtime": 11.3752, |
|
"eval_samples_per_second": 17.758, |
|
"eval_steps_per_second": 8.879, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 6.497142857142857e-05, |
|
"loss": 0.6164, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 6.46857142857143e-05, |
|
"loss": 0.1188, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 6.440000000000001e-05, |
|
"loss": 0.7997, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 6.411428571428572e-05, |
|
"loss": 0.3099, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 6.382857142857143e-05, |
|
"loss": 0.1419, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 6.354285714285715e-05, |
|
"loss": 0.3644, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 6.325714285714286e-05, |
|
"loss": 0.2829, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"learning_rate": 6.297142857142857e-05, |
|
"loss": 0.8076, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 6.268571428571429e-05, |
|
"loss": 0.4266, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 6.24e-05, |
|
"loss": 0.3325, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 6.211428571428571e-05, |
|
"loss": 0.8035, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 6.182857142857143e-05, |
|
"loss": 0.1186, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 6.154285714285714e-05, |
|
"loss": 0.5125, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 6.125714285714286e-05, |
|
"loss": 0.2002, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 6.097142857142858e-05, |
|
"loss": 0.147, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 6.068571428571429e-05, |
|
"loss": 0.4292, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 6.04e-05, |
|
"loss": 0.7013, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 6.0114285714285714e-05, |
|
"loss": 0.3072, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6534653306007385, |
|
"eval_loss": 1.5413367748260498, |
|
"eval_runtime": 11.4039, |
|
"eval_samples_per_second": 17.713, |
|
"eval_steps_per_second": 8.857, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 5.9828571428571437e-05, |
|
"loss": 0.1924, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 5.9542857142857146e-05, |
|
"loss": 0.2838, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 5.9257142857142855e-05, |
|
"loss": 0.1783, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 5.897142857142858e-05, |
|
"loss": 0.2159, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"learning_rate": 5.868571428571429e-05, |
|
"loss": 0.3815, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 5.8399999999999997e-05, |
|
"loss": 0.3401, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 5.811428571428572e-05, |
|
"loss": 0.2045, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 5.782857142857143e-05, |
|
"loss": 0.0864, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 5.7542857142857145e-05, |
|
"loss": 0.2836, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 5.725714285714287e-05, |
|
"loss": 0.1675, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 5.697142857142858e-05, |
|
"loss": 0.4174, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"learning_rate": 5.6685714285714286e-05, |
|
"loss": 0.5875, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 5.6399999999999995e-05, |
|
"loss": 0.1532, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 5.611428571428572e-05, |
|
"loss": 0.2927, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 5.582857142857143e-05, |
|
"loss": 0.1527, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 5.5542857142857143e-05, |
|
"loss": 0.2842, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 5.525714285714286e-05, |
|
"loss": 0.3331, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6930692791938782, |
|
"eval_loss": 1.759947657585144, |
|
"eval_runtime": 12.7286, |
|
"eval_samples_per_second": 15.87, |
|
"eval_steps_per_second": 7.935, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 5.4971428571428576e-05, |
|
"loss": 0.3041, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 5.4685714285714285e-05, |
|
"loss": 0.2894, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 5.440000000000001e-05, |
|
"loss": 0.2129, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 5.411428571428572e-05, |
|
"loss": 0.3424, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 5.3828571428571426e-05, |
|
"loss": 0.0508, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 5.354285714285715e-05, |
|
"loss": 0.3036, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 5.325714285714286e-05, |
|
"loss": 0.4638, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 5.2971428571428574e-05, |
|
"loss": 0.3329, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 5.2685714285714284e-05, |
|
"loss": 0.0781, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"learning_rate": 5.2400000000000007e-05, |
|
"loss": 0.1371, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 5.2114285714285716e-05, |
|
"loss": 0.244, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 5.1828571428571425e-05, |
|
"loss": 0.4502, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 5.154285714285715e-05, |
|
"loss": 0.4222, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 5.125714285714286e-05, |
|
"loss": 0.4389, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 5.097142857142857e-05, |
|
"loss": 0.3595, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"learning_rate": 5.068571428571429e-05, |
|
"loss": 0.2946, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"learning_rate": 5.0400000000000005e-05, |
|
"loss": 0.3272, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 5.0114285714285715e-05, |
|
"loss": 0.3357, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7475247383117676, |
|
"eval_loss": 1.4022135734558105, |
|
"eval_runtime": 12.0818, |
|
"eval_samples_per_second": 16.719, |
|
"eval_steps_per_second": 8.36, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 10.06, |
|
"learning_rate": 4.982857142857143e-05, |
|
"loss": 0.1572, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"learning_rate": 4.954285714285715e-05, |
|
"loss": 0.129, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"learning_rate": 4.9257142857142856e-05, |
|
"loss": 0.0891, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 10.23, |
|
"learning_rate": 4.897142857142857e-05, |
|
"loss": 0.27, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 10.28, |
|
"learning_rate": 4.868571428571429e-05, |
|
"loss": 0.1743, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 10.34, |
|
"learning_rate": 4.8400000000000004e-05, |
|
"loss": 0.2713, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 4.811428571428572e-05, |
|
"loss": 0.0383, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 10.46, |
|
"learning_rate": 4.782857142857143e-05, |
|
"loss": 0.0301, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 10.51, |
|
"learning_rate": 4.7542857142857146e-05, |
|
"loss": 0.0353, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 10.57, |
|
"learning_rate": 4.725714285714286e-05, |
|
"loss": 0.238, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 10.63, |
|
"learning_rate": 4.697142857142857e-05, |
|
"loss": 0.2494, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 10.68, |
|
"learning_rate": 4.668571428571429e-05, |
|
"loss": 0.2638, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 10.74, |
|
"learning_rate": 4.64e-05, |
|
"loss": 0.2013, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 4.611428571428571e-05, |
|
"loss": 0.0893, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 10.85, |
|
"learning_rate": 4.5828571428571435e-05, |
|
"loss": 0.4689, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 10.91, |
|
"learning_rate": 4.5542857142857144e-05, |
|
"loss": 0.2516, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"learning_rate": 4.525714285714286e-05, |
|
"loss": 0.2441, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7425742745399475, |
|
"eval_loss": 1.6350345611572266, |
|
"eval_runtime": 11.7948, |
|
"eval_samples_per_second": 17.126, |
|
"eval_steps_per_second": 8.563, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"learning_rate": 4.4971428571428576e-05, |
|
"loss": 0.0519, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 11.09, |
|
"learning_rate": 4.4685714285714286e-05, |
|
"loss": 0.1633, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 11.14, |
|
"learning_rate": 4.44e-05, |
|
"loss": 0.1561, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 4.411428571428572e-05, |
|
"loss": 0.0328, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 11.26, |
|
"learning_rate": 4.382857142857143e-05, |
|
"loss": 0.1487, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 11.31, |
|
"learning_rate": 4.354285714285714e-05, |
|
"loss": 0.05, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 11.37, |
|
"learning_rate": 4.325714285714286e-05, |
|
"loss": 0.2281, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"learning_rate": 4.2971428571428575e-05, |
|
"loss": 0.1016, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 11.48, |
|
"learning_rate": 4.268571428571429e-05, |
|
"loss": 0.3914, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 11.54, |
|
"learning_rate": 4.24e-05, |
|
"loss": 0.5323, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"learning_rate": 4.211428571428572e-05, |
|
"loss": 0.0534, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 11.66, |
|
"learning_rate": 4.1828571428571426e-05, |
|
"loss": 0.1185, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 11.71, |
|
"learning_rate": 4.154285714285714e-05, |
|
"loss": 0.104, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 11.77, |
|
"learning_rate": 4.125714285714286e-05, |
|
"loss": 0.2268, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 11.83, |
|
"learning_rate": 4.0971428571428574e-05, |
|
"loss": 0.1499, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 11.88, |
|
"learning_rate": 4.068571428571429e-05, |
|
"loss": 0.0944, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 11.94, |
|
"learning_rate": 4.0400000000000006e-05, |
|
"loss": 0.0604, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 4.0114285714285715e-05, |
|
"loss": 0.1318, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6881188154220581, |
|
"eval_loss": 1.895858645439148, |
|
"eval_runtime": 13.4328, |
|
"eval_samples_per_second": 15.038, |
|
"eval_steps_per_second": 7.519, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 12.06, |
|
"learning_rate": 3.982857142857143e-05, |
|
"loss": 0.035, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 12.11, |
|
"learning_rate": 3.954285714285714e-05, |
|
"loss": 0.1331, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 12.17, |
|
"learning_rate": 3.925714285714286e-05, |
|
"loss": 0.3371, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 12.23, |
|
"learning_rate": 3.897142857142857e-05, |
|
"loss": 0.0372, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 12.28, |
|
"learning_rate": 3.868571428571429e-05, |
|
"loss": 0.1479, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 12.34, |
|
"learning_rate": 3.8400000000000005e-05, |
|
"loss": 0.0245, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 3.8114285714285714e-05, |
|
"loss": 0.1451, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 12.46, |
|
"learning_rate": 3.782857142857143e-05, |
|
"loss": 0.3234, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 12.51, |
|
"learning_rate": 3.7542857142857146e-05, |
|
"loss": 0.196, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 12.57, |
|
"learning_rate": 3.7257142857142856e-05, |
|
"loss": 0.1208, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"learning_rate": 3.697142857142857e-05, |
|
"loss": 0.1025, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 12.68, |
|
"learning_rate": 3.668571428571429e-05, |
|
"loss": 0.1806, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 12.74, |
|
"learning_rate": 3.6400000000000004e-05, |
|
"loss": 0.1551, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 3.611428571428572e-05, |
|
"loss": 0.1155, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 12.85, |
|
"learning_rate": 3.582857142857143e-05, |
|
"loss": 0.0046, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 12.91, |
|
"learning_rate": 3.5542857142857145e-05, |
|
"loss": 0.0258, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 12.97, |
|
"learning_rate": 3.525714285714286e-05, |
|
"loss": 0.1937, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7029703259468079, |
|
"eval_loss": 2.013838291168213, |
|
"eval_runtime": 12.0463, |
|
"eval_samples_per_second": 16.769, |
|
"eval_steps_per_second": 8.384, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 13.03, |
|
"learning_rate": 3.497142857142857e-05, |
|
"loss": 0.0615, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 13.09, |
|
"learning_rate": 3.468571428571429e-05, |
|
"loss": 0.1725, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 13.14, |
|
"learning_rate": 3.4399999999999996e-05, |
|
"loss": 0.0289, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"learning_rate": 3.411428571428571e-05, |
|
"loss": 0.173, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 13.26, |
|
"learning_rate": 3.3828571428571435e-05, |
|
"loss": 0.0992, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 13.31, |
|
"learning_rate": 3.3542857142857144e-05, |
|
"loss": 0.1459, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 13.37, |
|
"learning_rate": 3.325714285714286e-05, |
|
"loss": 0.0768, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 13.43, |
|
"learning_rate": 3.2971428571428576e-05, |
|
"loss": 0.0721, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 13.48, |
|
"learning_rate": 3.2685714285714285e-05, |
|
"loss": 0.002, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 13.54, |
|
"learning_rate": 3.24e-05, |
|
"loss": 0.0738, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"learning_rate": 3.211428571428571e-05, |
|
"loss": 0.007, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 13.66, |
|
"learning_rate": 3.182857142857143e-05, |
|
"loss": 0.1269, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"learning_rate": 3.154285714285714e-05, |
|
"loss": 0.1667, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 13.77, |
|
"learning_rate": 3.125714285714286e-05, |
|
"loss": 0.003, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 13.83, |
|
"learning_rate": 3.0971428571428575e-05, |
|
"loss": 0.2794, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 13.88, |
|
"learning_rate": 3.068571428571429e-05, |
|
"loss": 0.0959, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"learning_rate": 3.04e-05, |
|
"loss": 0.1878, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 3.0114285714285716e-05, |
|
"loss": 0.0164, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7079207897186279, |
|
"eval_loss": 2.0977747440338135, |
|
"eval_runtime": 11.5146, |
|
"eval_samples_per_second": 17.543, |
|
"eval_steps_per_second": 8.771, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 14.06, |
|
"learning_rate": 2.982857142857143e-05, |
|
"loss": 0.0451, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 14.11, |
|
"learning_rate": 2.9542857142857145e-05, |
|
"loss": 0.1998, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 14.17, |
|
"learning_rate": 2.925714285714286e-05, |
|
"loss": 0.0231, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 14.23, |
|
"learning_rate": 2.897142857142857e-05, |
|
"loss": 0.0211, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 14.28, |
|
"learning_rate": 2.8685714285714286e-05, |
|
"loss": 0.2257, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 14.34, |
|
"learning_rate": 2.84e-05, |
|
"loss": 0.0013, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 2.8114285714285715e-05, |
|
"loss": 0.2982, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 14.46, |
|
"learning_rate": 2.782857142857143e-05, |
|
"loss": 0.2192, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 14.51, |
|
"learning_rate": 2.7542857142857144e-05, |
|
"loss": 0.18, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 14.57, |
|
"learning_rate": 2.725714285714286e-05, |
|
"loss": 0.0076, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 14.63, |
|
"learning_rate": 2.6971428571428576e-05, |
|
"loss": 0.0029, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 14.68, |
|
"learning_rate": 2.6685714285714285e-05, |
|
"loss": 0.0309, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 14.74, |
|
"learning_rate": 2.64e-05, |
|
"loss": 0.079, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 14.8, |
|
"learning_rate": 2.6114285714285714e-05, |
|
"loss": 0.0142, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 14.85, |
|
"learning_rate": 2.582857142857143e-05, |
|
"loss": 0.0451, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 14.91, |
|
"learning_rate": 2.5542857142857146e-05, |
|
"loss": 0.0036, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"learning_rate": 2.5257142857142855e-05, |
|
"loss": 0.1794, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.7178217768669128, |
|
"eval_loss": 1.9837726354599, |
|
"eval_runtime": 11.491, |
|
"eval_samples_per_second": 17.579, |
|
"eval_steps_per_second": 8.789, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 15.03, |
|
"learning_rate": 2.4971428571428575e-05, |
|
"loss": 0.1493, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"learning_rate": 2.4685714285714288e-05, |
|
"loss": 0.115, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 15.14, |
|
"learning_rate": 2.44e-05, |
|
"loss": 0.005, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"learning_rate": 2.4114285714285713e-05, |
|
"loss": 0.1421, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 15.26, |
|
"learning_rate": 2.3828571428571432e-05, |
|
"loss": 0.0637, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 15.31, |
|
"learning_rate": 2.3542857142857145e-05, |
|
"loss": 0.0165, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 15.37, |
|
"learning_rate": 2.3257142857142858e-05, |
|
"loss": 0.0551, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 15.43, |
|
"learning_rate": 2.297142857142857e-05, |
|
"loss": 0.0804, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"learning_rate": 2.2685714285714286e-05, |
|
"loss": 0.1237, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 15.54, |
|
"learning_rate": 2.2400000000000002e-05, |
|
"loss": 0.154, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 15.6, |
|
"learning_rate": 2.2114285714285715e-05, |
|
"loss": 0.0109, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 15.66, |
|
"learning_rate": 2.1828571428571428e-05, |
|
"loss": 0.0115, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 15.71, |
|
"learning_rate": 2.1542857142857144e-05, |
|
"loss": 0.1456, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 15.77, |
|
"learning_rate": 2.125714285714286e-05, |
|
"loss": 0.0106, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 15.83, |
|
"learning_rate": 2.0971428571428572e-05, |
|
"loss": 0.0021, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 15.88, |
|
"learning_rate": 2.0685714285714285e-05, |
|
"loss": 0.0102, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 15.94, |
|
"learning_rate": 2.04e-05, |
|
"loss": 0.2036, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 2.0114285714285717e-05, |
|
"loss": 0.0257, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7178217768669128, |
|
"eval_loss": 1.9555984735488892, |
|
"eval_runtime": 13.1046, |
|
"eval_samples_per_second": 15.414, |
|
"eval_steps_per_second": 7.707, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 16.06, |
|
"learning_rate": 1.982857142857143e-05, |
|
"loss": 0.0406, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 16.11, |
|
"learning_rate": 1.9542857142857143e-05, |
|
"loss": 0.2677, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 16.17, |
|
"learning_rate": 1.9257142857142855e-05, |
|
"loss": 0.3214, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 16.23, |
|
"learning_rate": 1.8971428571428575e-05, |
|
"loss": 0.035, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 16.28, |
|
"learning_rate": 1.8685714285714287e-05, |
|
"loss": 0.0105, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 16.34, |
|
"learning_rate": 1.84e-05, |
|
"loss": 0.0205, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 16.4, |
|
"learning_rate": 1.8114285714285713e-05, |
|
"loss": 0.0006, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 16.46, |
|
"learning_rate": 1.7828571428571432e-05, |
|
"loss": 0.2564, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 16.51, |
|
"learning_rate": 1.7542857142857145e-05, |
|
"loss": 0.0396, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 16.57, |
|
"learning_rate": 1.7257142857142857e-05, |
|
"loss": 0.0237, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 16.63, |
|
"learning_rate": 1.697142857142857e-05, |
|
"loss": 0.0028, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 16.68, |
|
"learning_rate": 1.6685714285714286e-05, |
|
"loss": 0.2431, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 16.74, |
|
"learning_rate": 1.6400000000000002e-05, |
|
"loss": 0.0013, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"learning_rate": 1.6114285714285715e-05, |
|
"loss": 0.362, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 16.85, |
|
"learning_rate": 1.5828571428571428e-05, |
|
"loss": 0.031, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 16.91, |
|
"learning_rate": 1.5542857142857144e-05, |
|
"loss": 0.1515, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 16.97, |
|
"learning_rate": 1.5257142857142858e-05, |
|
"loss": 0.1409, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.6930692791938782, |
|
"eval_loss": 2.0634045600891113, |
|
"eval_runtime": 11.0027, |
|
"eval_samples_per_second": 18.359, |
|
"eval_steps_per_second": 9.18, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 17.03, |
|
"learning_rate": 1.4971428571428572e-05, |
|
"loss": 0.0622, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 17.09, |
|
"learning_rate": 1.4685714285714287e-05, |
|
"loss": 0.0295, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 17.14, |
|
"learning_rate": 1.44e-05, |
|
"loss": 0.045, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 17.2, |
|
"learning_rate": 1.4114285714285715e-05, |
|
"loss": 0.0384, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 17.26, |
|
"learning_rate": 1.382857142857143e-05, |
|
"loss": 0.0035, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 17.31, |
|
"learning_rate": 1.3542857142857142e-05, |
|
"loss": 0.0014, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 17.37, |
|
"learning_rate": 1.3257142857142857e-05, |
|
"loss": 0.1624, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 17.43, |
|
"learning_rate": 1.2971428571428573e-05, |
|
"loss": 0.0309, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 17.48, |
|
"learning_rate": 1.2685714285714287e-05, |
|
"loss": 0.3965, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 17.54, |
|
"learning_rate": 1.24e-05, |
|
"loss": 0.2438, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 1.2114285714285716e-05, |
|
"loss": 0.0551, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 17.66, |
|
"learning_rate": 1.1828571428571429e-05, |
|
"loss": 0.0676, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 17.71, |
|
"learning_rate": 1.1542857142857143e-05, |
|
"loss": 0.0025, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 17.77, |
|
"learning_rate": 1.1257142857142857e-05, |
|
"loss": 0.0031, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 17.83, |
|
"learning_rate": 1.0971428571428572e-05, |
|
"loss": 0.0166, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 17.88, |
|
"learning_rate": 1.0685714285714286e-05, |
|
"loss": 0.2534, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 17.94, |
|
"learning_rate": 1.04e-05, |
|
"loss": 0.002, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 1.0114285714285715e-05, |
|
"loss": 0.0123, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.698019802570343, |
|
"eval_loss": 2.1222872734069824, |
|
"eval_runtime": 11.8575, |
|
"eval_samples_per_second": 17.036, |
|
"eval_steps_per_second": 8.518, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 18.06, |
|
"learning_rate": 9.828571428571429e-06, |
|
"loss": 0.1595, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 18.11, |
|
"learning_rate": 9.542857142857143e-06, |
|
"loss": 0.0442, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 18.17, |
|
"learning_rate": 9.257142857142858e-06, |
|
"loss": 0.2398, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 18.23, |
|
"learning_rate": 8.971428571428572e-06, |
|
"loss": 0.0063, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 18.28, |
|
"learning_rate": 8.685714285714287e-06, |
|
"loss": 0.1119, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 18.34, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 0.0093, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"learning_rate": 8.114285714285715e-06, |
|
"loss": 0.0219, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 18.46, |
|
"learning_rate": 7.82857142857143e-06, |
|
"loss": 0.0055, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 18.51, |
|
"learning_rate": 7.542857142857143e-06, |
|
"loss": 0.0329, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 18.57, |
|
"learning_rate": 7.257142857142857e-06, |
|
"loss": 0.0038, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 18.63, |
|
"learning_rate": 6.971428571428572e-06, |
|
"loss": 0.0437, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 18.68, |
|
"learning_rate": 6.685714285714285e-06, |
|
"loss": 0.0081, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 18.74, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 0.0346, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"learning_rate": 6.114285714285715e-06, |
|
"loss": 0.0025, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 18.85, |
|
"learning_rate": 5.828571428571429e-06, |
|
"loss": 0.2022, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 18.91, |
|
"learning_rate": 5.542857142857144e-06, |
|
"loss": 0.0005, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 18.97, |
|
"learning_rate": 5.257142857142858e-06, |
|
"loss": 0.0476, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.7277227640151978, |
|
"eval_loss": 1.9925730228424072, |
|
"eval_runtime": 11.4361, |
|
"eval_samples_per_second": 17.663, |
|
"eval_steps_per_second": 8.832, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 19.03, |
|
"learning_rate": 4.9714285714285715e-06, |
|
"loss": 0.0063, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 19.09, |
|
"learning_rate": 4.685714285714286e-06, |
|
"loss": 0.0022, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 19.14, |
|
"learning_rate": 4.4e-06, |
|
"loss": 0.0207, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 4.114285714285715e-06, |
|
"loss": 0.1322, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 19.26, |
|
"learning_rate": 3.828571428571429e-06, |
|
"loss": 0.3377, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 19.31, |
|
"learning_rate": 3.542857142857143e-06, |
|
"loss": 0.0005, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 19.37, |
|
"learning_rate": 3.2571428571428572e-06, |
|
"loss": 0.001, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 19.43, |
|
"learning_rate": 2.9714285714285716e-06, |
|
"loss": 0.0004, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 19.48, |
|
"learning_rate": 2.685714285714286e-06, |
|
"loss": 0.1235, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 19.54, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 0.006, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 19.6, |
|
"learning_rate": 2.1142857142857147e-06, |
|
"loss": 0.0027, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 19.66, |
|
"learning_rate": 1.8285714285714288e-06, |
|
"loss": 0.001, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 19.71, |
|
"learning_rate": 1.542857142857143e-06, |
|
"loss": 0.0032, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 19.77, |
|
"learning_rate": 1.2571428571428573e-06, |
|
"loss": 0.0011, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 19.83, |
|
"learning_rate": 9.714285714285715e-07, |
|
"loss": 0.0029, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 19.88, |
|
"learning_rate": 6.857142857142857e-07, |
|
"loss": 0.0141, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 19.94, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 0.0007, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 1.142857142857143e-07, |
|
"loss": 0.0006, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7277227640151978, |
|
"eval_loss": 1.9778043031692505, |
|
"eval_runtime": 12.1861, |
|
"eval_samples_per_second": 16.576, |
|
"eval_steps_per_second": 8.288, |
|
"step": 3500 |
|
} |
|
], |
|
"max_steps": 3500, |
|
"num_train_epochs": 20, |
|
"total_flos": 2.418962508100631e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|