|
{ |
|
"best_metric": 0.9288338932978429, |
|
"best_model_checkpoint": "cls_comment-phobert-base-v2-v3.2/checkpoint-3200", |
|
"epoch": 34.78260869565217, |
|
"eval_steps": 100, |
|
"global_step": 4000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.8695652173913043, |
|
"grad_norm": 1.351387858390808, |
|
"learning_rate": 2.5e-06, |
|
"loss": 1.8639, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8695652173913043, |
|
"eval_accuracy": 0.40038053818972547, |
|
"eval_f1_score": 0.08351588662990973, |
|
"eval_loss": 1.7088394165039062, |
|
"eval_precision": 0.17952180872348938, |
|
"eval_recall": 0.1438423645320197, |
|
"eval_runtime": 6.7556, |
|
"eval_samples_per_second": 544.589, |
|
"eval_steps_per_second": 8.586, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.7391304347826086, |
|
"grad_norm": 5.332070827484131, |
|
"learning_rate": 5e-06, |
|
"loss": 1.5668, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.7391304347826086, |
|
"eval_accuracy": 0.580048926338679, |
|
"eval_f1_score": 0.21719867493792772, |
|
"eval_loss": 1.3287572860717773, |
|
"eval_precision": 0.26741462611831024, |
|
"eval_recall": 0.2574687919678381, |
|
"eval_runtime": 6.8289, |
|
"eval_samples_per_second": 538.739, |
|
"eval_steps_per_second": 8.493, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.608695652173913, |
|
"grad_norm": 4.961886882781982, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 1.2197, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.608695652173913, |
|
"eval_accuracy": 0.7667844522968198, |
|
"eval_f1_score": 0.5365670142781532, |
|
"eval_loss": 0.974587619304657, |
|
"eval_precision": 0.581964201034555, |
|
"eval_recall": 0.5148284343114082, |
|
"eval_runtime": 6.7013, |
|
"eval_samples_per_second": 548.998, |
|
"eval_steps_per_second": 8.655, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.4782608695652173, |
|
"grad_norm": 4.706761837005615, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9384, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.4782608695652173, |
|
"eval_accuracy": 0.8390867083446589, |
|
"eval_f1_score": 0.6137538550736412, |
|
"eval_loss": 0.7674435973167419, |
|
"eval_precision": 0.6052609240082055, |
|
"eval_recall": 0.6267002705049249, |
|
"eval_runtime": 6.7131, |
|
"eval_samples_per_second": 548.036, |
|
"eval_steps_per_second": 8.64, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.3478260869565215, |
|
"grad_norm": 4.063348293304443, |
|
"learning_rate": 9.722222222222223e-06, |
|
"loss": 0.7551, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.3478260869565215, |
|
"eval_accuracy": 0.8526773579777114, |
|
"eval_f1_score": 0.6283738392523406, |
|
"eval_loss": 0.6780158877372742, |
|
"eval_precision": 0.6146556832242253, |
|
"eval_recall": 0.6453587005137035, |
|
"eval_runtime": 6.7179, |
|
"eval_samples_per_second": 547.644, |
|
"eval_steps_per_second": 8.634, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.217391304347826, |
|
"grad_norm": 2.9883711338043213, |
|
"learning_rate": 9.444444444444445e-06, |
|
"loss": 0.6636, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.217391304347826, |
|
"eval_accuracy": 0.8684425115520522, |
|
"eval_f1_score": 0.6832828988792853, |
|
"eval_loss": 0.6152337193489075, |
|
"eval_precision": 0.7626363529752824, |
|
"eval_recall": 0.678494572728881, |
|
"eval_runtime": 6.6923, |
|
"eval_samples_per_second": 549.738, |
|
"eval_steps_per_second": 8.667, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.086956521739131, |
|
"grad_norm": 4.749546051025391, |
|
"learning_rate": 9.166666666666666e-06, |
|
"loss": 0.5767, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.086956521739131, |
|
"eval_accuracy": 0.8929056808915466, |
|
"eval_f1_score": 0.788416346435481, |
|
"eval_loss": 0.5486906170845032, |
|
"eval_precision": 0.8967939035531012, |
|
"eval_recall": 0.7698370330953708, |
|
"eval_runtime": 6.716, |
|
"eval_samples_per_second": 547.799, |
|
"eval_steps_per_second": 8.636, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.956521739130435, |
|
"grad_norm": 3.5176820755004883, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.5059, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.956521739130435, |
|
"eval_accuracy": 0.8986137537374287, |
|
"eval_f1_score": 0.866530151951532, |
|
"eval_loss": 0.526166558265686, |
|
"eval_precision": 0.8880275046999001, |
|
"eval_recall": 0.8533816092377208, |
|
"eval_runtime": 6.7032, |
|
"eval_samples_per_second": 548.839, |
|
"eval_steps_per_second": 8.653, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.826086956521739, |
|
"grad_norm": 5.969176292419434, |
|
"learning_rate": 8.611111111111112e-06, |
|
"loss": 0.4512, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 7.826086956521739, |
|
"eval_accuracy": 0.9195433541723295, |
|
"eval_f1_score": 0.9002421953779064, |
|
"eval_loss": 0.48821330070495605, |
|
"eval_precision": 0.8927762999261393, |
|
"eval_recall": 0.9082191267746272, |
|
"eval_runtime": 6.6813, |
|
"eval_samples_per_second": 550.644, |
|
"eval_steps_per_second": 8.681, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 8.695652173913043, |
|
"grad_norm": 4.83783483505249, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.4098, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.695652173913043, |
|
"eval_accuracy": 0.9211742321282957, |
|
"eval_f1_score": 0.9111062555604621, |
|
"eval_loss": 0.4828358292579651, |
|
"eval_precision": 0.9182989102010178, |
|
"eval_recall": 0.9060960014608563, |
|
"eval_runtime": 6.6852, |
|
"eval_samples_per_second": 550.323, |
|
"eval_steps_per_second": 8.676, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.565217391304348, |
|
"grad_norm": 4.399734973907471, |
|
"learning_rate": 8.055555555555557e-06, |
|
"loss": 0.3916, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 9.565217391304348, |
|
"eval_accuracy": 0.927969556944822, |
|
"eval_f1_score": 0.9192607581713574, |
|
"eval_loss": 0.46853822469711304, |
|
"eval_precision": 0.9254336746301863, |
|
"eval_recall": 0.9140372115171418, |
|
"eval_runtime": 6.7479, |
|
"eval_samples_per_second": 545.206, |
|
"eval_steps_per_second": 8.595, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 10.434782608695652, |
|
"grad_norm": 5.233398914337158, |
|
"learning_rate": 7.77777777777778e-06, |
|
"loss": 0.373, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 10.434782608695652, |
|
"eval_accuracy": 0.9238923620549062, |
|
"eval_f1_score": 0.9145336176845754, |
|
"eval_loss": 0.4755556881427765, |
|
"eval_precision": 0.9100056761034006, |
|
"eval_recall": 0.9210482679945721, |
|
"eval_runtime": 6.7342, |
|
"eval_samples_per_second": 546.317, |
|
"eval_steps_per_second": 8.613, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 11.304347826086957, |
|
"grad_norm": 5.472758769989014, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.3592, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 11.304347826086957, |
|
"eval_accuracy": 0.9317749388420766, |
|
"eval_f1_score": 0.9229649364321404, |
|
"eval_loss": 0.45966240763664246, |
|
"eval_precision": 0.9263012712773211, |
|
"eval_recall": 0.9203258950418504, |
|
"eval_runtime": 6.7847, |
|
"eval_samples_per_second": 542.251, |
|
"eval_steps_per_second": 8.549, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 12.173913043478262, |
|
"grad_norm": 5.35235595703125, |
|
"learning_rate": 7.222222222222223e-06, |
|
"loss": 0.3377, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 12.173913043478262, |
|
"eval_accuracy": 0.9304158738787714, |
|
"eval_f1_score": 0.9181369027515391, |
|
"eval_loss": 0.4691704213619232, |
|
"eval_precision": 0.9174925655697546, |
|
"eval_recall": 0.9197889600383383, |
|
"eval_runtime": 6.7123, |
|
"eval_samples_per_second": 548.096, |
|
"eval_steps_per_second": 8.641, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 13.043478260869565, |
|
"grad_norm": 4.6181230545043945, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.3299, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 13.043478260869565, |
|
"eval_accuracy": 0.9328621908127208, |
|
"eval_f1_score": 0.9244416019330937, |
|
"eval_loss": 0.46716630458831787, |
|
"eval_precision": 0.9291821693223221, |
|
"eval_recall": 0.9215636094215414, |
|
"eval_runtime": 6.6884, |
|
"eval_samples_per_second": 550.057, |
|
"eval_steps_per_second": 8.672, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 13.91304347826087, |
|
"grad_norm": 8.43385124206543, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.3198, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 13.91304347826087, |
|
"eval_accuracy": 0.9331340038053819, |
|
"eval_f1_score": 0.9241282766973115, |
|
"eval_loss": 0.4618851840496063, |
|
"eval_precision": 0.926426704429738, |
|
"eval_recall": 0.9225031003334492, |
|
"eval_runtime": 6.6952, |
|
"eval_samples_per_second": 549.5, |
|
"eval_steps_per_second": 8.663, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 14.782608695652174, |
|
"grad_norm": 8.825912475585938, |
|
"learning_rate": 6.3888888888888885e-06, |
|
"loss": 0.3121, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 14.782608695652174, |
|
"eval_accuracy": 0.9331340038053819, |
|
"eval_f1_score": 0.9243317785632609, |
|
"eval_loss": 0.46724241971969604, |
|
"eval_precision": 0.9249039681497474, |
|
"eval_recall": 0.9245288905066229, |
|
"eval_runtime": 6.8264, |
|
"eval_samples_per_second": 538.934, |
|
"eval_steps_per_second": 8.496, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 15.652173913043478, |
|
"grad_norm": 4.476284027099609, |
|
"learning_rate": 6.111111111111112e-06, |
|
"loss": 0.3053, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 15.652173913043478, |
|
"eval_accuracy": 0.9344930687686871, |
|
"eval_f1_score": 0.9216180971737599, |
|
"eval_loss": 0.46642911434173584, |
|
"eval_precision": 0.9166826151640262, |
|
"eval_recall": 0.9271868922165902, |
|
"eval_runtime": 6.7491, |
|
"eval_samples_per_second": 545.113, |
|
"eval_steps_per_second": 8.594, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 16.52173913043478, |
|
"grad_norm": 1.7301744222640991, |
|
"learning_rate": 5.833333333333334e-06, |
|
"loss": 0.3058, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 16.52173913043478, |
|
"eval_accuracy": 0.9331340038053819, |
|
"eval_f1_score": 0.9228827765631413, |
|
"eval_loss": 0.46549805998802185, |
|
"eval_precision": 0.9240164131101741, |
|
"eval_recall": 0.9221056672944972, |
|
"eval_runtime": 6.7346, |
|
"eval_samples_per_second": 546.286, |
|
"eval_steps_per_second": 8.612, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 17.391304347826086, |
|
"grad_norm": 7.452052116394043, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 0.2976, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 17.391304347826086, |
|
"eval_accuracy": 0.9355803207393314, |
|
"eval_f1_score": 0.9258585265558317, |
|
"eval_loss": 0.4619200825691223, |
|
"eval_precision": 0.9298877666967595, |
|
"eval_recall": 0.9220516121183885, |
|
"eval_runtime": 6.6864, |
|
"eval_samples_per_second": 550.217, |
|
"eval_steps_per_second": 8.674, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 18.26086956521739, |
|
"grad_norm": 2.030193567276001, |
|
"learning_rate": 5.2777777777777785e-06, |
|
"loss": 0.2975, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 18.26086956521739, |
|
"eval_accuracy": 0.9342212557760261, |
|
"eval_f1_score": 0.9254667743916674, |
|
"eval_loss": 0.4662647545337677, |
|
"eval_precision": 0.9267335070457062, |
|
"eval_recall": 0.9247905559616514, |
|
"eval_runtime": 6.6885, |
|
"eval_samples_per_second": 550.05, |
|
"eval_steps_per_second": 8.672, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 19.130434782608695, |
|
"grad_norm": 5.05163049697876, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2872, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 19.130434782608695, |
|
"eval_accuracy": 0.9344930687686871, |
|
"eval_f1_score": 0.923695179464076, |
|
"eval_loss": 0.47371503710746765, |
|
"eval_precision": 0.928532382805131, |
|
"eval_recall": 0.9194008334880648, |
|
"eval_runtime": 6.6854, |
|
"eval_samples_per_second": 550.301, |
|
"eval_steps_per_second": 8.676, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 6.272797584533691, |
|
"learning_rate": 4.722222222222222e-06, |
|
"loss": 0.2879, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9317749388420766, |
|
"eval_f1_score": 0.9201329128675652, |
|
"eval_loss": 0.47988325357437134, |
|
"eval_precision": 0.9115622601790818, |
|
"eval_recall": 0.9295115231425782, |
|
"eval_runtime": 6.7353, |
|
"eval_samples_per_second": 546.224, |
|
"eval_steps_per_second": 8.611, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 20.869565217391305, |
|
"grad_norm": 5.919195175170898, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.2848, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 20.869565217391305, |
|
"eval_accuracy": 0.9325903778200598, |
|
"eval_f1_score": 0.9194393410487441, |
|
"eval_loss": 0.48427507281303406, |
|
"eval_precision": 0.9091962837604621, |
|
"eval_recall": 0.9309372715371734, |
|
"eval_runtime": 6.6852, |
|
"eval_samples_per_second": 550.322, |
|
"eval_steps_per_second": 8.676, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 21.73913043478261, |
|
"grad_norm": 3.268333673477173, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.2808, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 21.73913043478261, |
|
"eval_accuracy": 0.9325903778200598, |
|
"eval_f1_score": 0.9242720840575954, |
|
"eval_loss": 0.48389649391174316, |
|
"eval_precision": 0.925898105451882, |
|
"eval_recall": 0.9236990043362497, |
|
"eval_runtime": 6.6806, |
|
"eval_samples_per_second": 550.7, |
|
"eval_steps_per_second": 8.682, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 22.608695652173914, |
|
"grad_norm": 3.7583141326904297, |
|
"learning_rate": 3.88888888888889e-06, |
|
"loss": 0.2798, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 22.608695652173914, |
|
"eval_accuracy": 0.9342212557760261, |
|
"eval_f1_score": 0.9240298173904554, |
|
"eval_loss": 0.4839774966239929, |
|
"eval_precision": 0.9196521435699385, |
|
"eval_recall": 0.9288845586975052, |
|
"eval_runtime": 6.7723, |
|
"eval_samples_per_second": 543.245, |
|
"eval_steps_per_second": 8.564, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 23.47826086956522, |
|
"grad_norm": 5.6299052238464355, |
|
"learning_rate": 3.6111111111111115e-06, |
|
"loss": 0.2797, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 23.47826086956522, |
|
"eval_accuracy": 0.9334058167980429, |
|
"eval_f1_score": 0.922297295666424, |
|
"eval_loss": 0.4770027697086334, |
|
"eval_precision": 0.9202906698861152, |
|
"eval_recall": 0.9245720434908796, |
|
"eval_runtime": 6.7079, |
|
"eval_samples_per_second": 548.456, |
|
"eval_steps_per_second": 8.646, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 24.347826086956523, |
|
"grad_norm": 11.368356704711914, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.2754, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 24.347826086956523, |
|
"eval_accuracy": 0.9317749388420766, |
|
"eval_f1_score": 0.9225259593995536, |
|
"eval_loss": 0.4862979054450989, |
|
"eval_precision": 0.9212011606593585, |
|
"eval_recall": 0.9252089494748691, |
|
"eval_runtime": 6.8104, |
|
"eval_samples_per_second": 540.2, |
|
"eval_steps_per_second": 8.516, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 25.217391304347824, |
|
"grad_norm": 0.14479239284992218, |
|
"learning_rate": 3.055555555555556e-06, |
|
"loss": 0.2752, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 25.217391304347824, |
|
"eval_accuracy": 0.9325903778200598, |
|
"eval_f1_score": 0.9242984205440742, |
|
"eval_loss": 0.48786690831184387, |
|
"eval_precision": 0.9237836539478133, |
|
"eval_recall": 0.9258615724988001, |
|
"eval_runtime": 6.7704, |
|
"eval_samples_per_second": 543.395, |
|
"eval_steps_per_second": 8.567, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 26.08695652173913, |
|
"grad_norm": 7.544506072998047, |
|
"learning_rate": 2.7777777777777783e-06, |
|
"loss": 0.2718, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 26.08695652173913, |
|
"eval_accuracy": 0.9361239467246535, |
|
"eval_f1_score": 0.9270177056615392, |
|
"eval_loss": 0.47883340716362, |
|
"eval_precision": 0.9300585908606723, |
|
"eval_recall": 0.924359200098848, |
|
"eval_runtime": 6.7784, |
|
"eval_samples_per_second": 542.751, |
|
"eval_steps_per_second": 8.557, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 26.956521739130434, |
|
"grad_norm": 1.55753493309021, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.2712, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 26.956521739130434, |
|
"eval_accuracy": 0.9355803207393314, |
|
"eval_f1_score": 0.9252996225270083, |
|
"eval_loss": 0.47663480043411255, |
|
"eval_precision": 0.9273088499395005, |
|
"eval_recall": 0.9236556967474857, |
|
"eval_runtime": 6.7007, |
|
"eval_samples_per_second": 549.049, |
|
"eval_steps_per_second": 8.656, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 27.82608695652174, |
|
"grad_norm": 1.633718729019165, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.2714, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 27.82608695652174, |
|
"eval_accuracy": 0.9382984506659419, |
|
"eval_f1_score": 0.9288338932978429, |
|
"eval_loss": 0.47798144817352295, |
|
"eval_precision": 0.9284697321489548, |
|
"eval_recall": 0.929401062619975, |
|
"eval_runtime": 6.6895, |
|
"eval_samples_per_second": 549.965, |
|
"eval_steps_per_second": 8.67, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 28.695652173913043, |
|
"grad_norm": 0.5183067917823792, |
|
"learning_rate": 1.944444444444445e-06, |
|
"loss": 0.2697, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 28.695652173913043, |
|
"eval_accuracy": 0.9366675727099756, |
|
"eval_f1_score": 0.9263019689053353, |
|
"eval_loss": 0.485741525888443, |
|
"eval_precision": 0.9242977411952608, |
|
"eval_recall": 0.9286175955534596, |
|
"eval_runtime": 6.7616, |
|
"eval_samples_per_second": 544.104, |
|
"eval_steps_per_second": 8.578, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 29.565217391304348, |
|
"grad_norm": 4.411154747009277, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.2674, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 29.565217391304348, |
|
"eval_accuracy": 0.9347648817613482, |
|
"eval_f1_score": 0.9235274952764979, |
|
"eval_loss": 0.48756158351898193, |
|
"eval_precision": 0.9173551034787693, |
|
"eval_recall": 0.9303550035125531, |
|
"eval_runtime": 6.831, |
|
"eval_samples_per_second": 538.573, |
|
"eval_steps_per_second": 8.491, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 30.434782608695652, |
|
"grad_norm": 9.261029243469238, |
|
"learning_rate": 1.3888888888888892e-06, |
|
"loss": 0.2681, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 30.434782608695652, |
|
"eval_accuracy": 0.9361239467246535, |
|
"eval_f1_score": 0.9262356484286315, |
|
"eval_loss": 0.486868292093277, |
|
"eval_precision": 0.9184420290722909, |
|
"eval_recall": 0.934775981488375, |
|
"eval_runtime": 6.6731, |
|
"eval_samples_per_second": 551.315, |
|
"eval_steps_per_second": 8.692, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 31.304347826086957, |
|
"grad_norm": 0.14830726385116577, |
|
"learning_rate": 1.111111111111111e-06, |
|
"loss": 0.2685, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 31.304347826086957, |
|
"eval_accuracy": 0.933949442783365, |
|
"eval_f1_score": 0.9240517281456259, |
|
"eval_loss": 0.4930637776851654, |
|
"eval_precision": 0.9212029440977482, |
|
"eval_recall": 0.9279123078001273, |
|
"eval_runtime": 6.6837, |
|
"eval_samples_per_second": 550.442, |
|
"eval_steps_per_second": 8.678, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 32.17391304347826, |
|
"grad_norm": 2.589641571044922, |
|
"learning_rate": 8.333333333333333e-07, |
|
"loss": 0.2665, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 32.17391304347826, |
|
"eval_accuracy": 0.933949442783365, |
|
"eval_f1_score": 0.9234423513703757, |
|
"eval_loss": 0.48508113622665405, |
|
"eval_precision": 0.9211049540814888, |
|
"eval_recall": 0.9261949377668691, |
|
"eval_runtime": 6.7146, |
|
"eval_samples_per_second": 547.914, |
|
"eval_steps_per_second": 8.638, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 33.04347826086956, |
|
"grad_norm": 0.28960728645324707, |
|
"learning_rate": 5.555555555555555e-07, |
|
"loss": 0.2703, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 33.04347826086956, |
|
"eval_accuracy": 0.9366675727099756, |
|
"eval_f1_score": 0.9263225046285886, |
|
"eval_loss": 0.4864026606082916, |
|
"eval_precision": 0.9226325281435318, |
|
"eval_recall": 0.9303985110853679, |
|
"eval_runtime": 6.7275, |
|
"eval_samples_per_second": 546.86, |
|
"eval_steps_per_second": 8.621, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 33.91304347826087, |
|
"grad_norm": 0.5719828009605408, |
|
"learning_rate": 2.7777777777777776e-07, |
|
"loss": 0.2661, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 33.91304347826087, |
|
"eval_accuracy": 0.9363957597173145, |
|
"eval_f1_score": 0.9271367551931604, |
|
"eval_loss": 0.484861820936203, |
|
"eval_precision": 0.9227424858610155, |
|
"eval_recall": 0.9318596341609905, |
|
"eval_runtime": 6.7272, |
|
"eval_samples_per_second": 546.887, |
|
"eval_steps_per_second": 8.622, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 34.78260869565217, |
|
"grad_norm": 4.2173051834106445, |
|
"learning_rate": 0.0, |
|
"loss": 0.2695, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 34.78260869565217, |
|
"eval_accuracy": 0.9361239467246535, |
|
"eval_f1_score": 0.9269315744830021, |
|
"eval_loss": 0.48626089096069336, |
|
"eval_precision": 0.9223039042139164, |
|
"eval_recall": 0.9319526344370851, |
|
"eval_runtime": 6.7602, |
|
"eval_samples_per_second": 544.211, |
|
"eval_steps_per_second": 8.58, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 34.78260869565217, |
|
"step": 4000, |
|
"total_flos": 8772403331841000.0, |
|
"train_loss": 0.4447998676300049, |
|
"train_runtime": 3679.271, |
|
"train_samples_per_second": 139.158, |
|
"train_steps_per_second": 1.087 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 4000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 35, |
|
"save_steps": 100, |
|
"total_flos": 8772403331841000.0, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|