cls-comment-phobert-base-v2-v3.2 / trainer_state.json
tiennguyenbnbk's picture
End of training
604c396 verified
{
"best_metric": 0.9288338932978429,
"best_model_checkpoint": "cls_comment-phobert-base-v2-v3.2/checkpoint-3200",
"epoch": 34.78260869565217,
"eval_steps": 100,
"global_step": 4000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.8695652173913043,
"grad_norm": 1.351387858390808,
"learning_rate": 2.5e-06,
"loss": 1.8639,
"step": 100
},
{
"epoch": 0.8695652173913043,
"eval_accuracy": 0.40038053818972547,
"eval_f1_score": 0.08351588662990973,
"eval_loss": 1.7088394165039062,
"eval_precision": 0.17952180872348938,
"eval_recall": 0.1438423645320197,
"eval_runtime": 6.7556,
"eval_samples_per_second": 544.589,
"eval_steps_per_second": 8.586,
"step": 100
},
{
"epoch": 1.7391304347826086,
"grad_norm": 5.332070827484131,
"learning_rate": 5e-06,
"loss": 1.5668,
"step": 200
},
{
"epoch": 1.7391304347826086,
"eval_accuracy": 0.580048926338679,
"eval_f1_score": 0.21719867493792772,
"eval_loss": 1.3287572860717773,
"eval_precision": 0.26741462611831024,
"eval_recall": 0.2574687919678381,
"eval_runtime": 6.8289,
"eval_samples_per_second": 538.739,
"eval_steps_per_second": 8.493,
"step": 200
},
{
"epoch": 2.608695652173913,
"grad_norm": 4.961886882781982,
"learning_rate": 7.500000000000001e-06,
"loss": 1.2197,
"step": 300
},
{
"epoch": 2.608695652173913,
"eval_accuracy": 0.7667844522968198,
"eval_f1_score": 0.5365670142781532,
"eval_loss": 0.974587619304657,
"eval_precision": 0.581964201034555,
"eval_recall": 0.5148284343114082,
"eval_runtime": 6.7013,
"eval_samples_per_second": 548.998,
"eval_steps_per_second": 8.655,
"step": 300
},
{
"epoch": 3.4782608695652173,
"grad_norm": 4.706761837005615,
"learning_rate": 1e-05,
"loss": 0.9384,
"step": 400
},
{
"epoch": 3.4782608695652173,
"eval_accuracy": 0.8390867083446589,
"eval_f1_score": 0.6137538550736412,
"eval_loss": 0.7674435973167419,
"eval_precision": 0.6052609240082055,
"eval_recall": 0.6267002705049249,
"eval_runtime": 6.7131,
"eval_samples_per_second": 548.036,
"eval_steps_per_second": 8.64,
"step": 400
},
{
"epoch": 4.3478260869565215,
"grad_norm": 4.063348293304443,
"learning_rate": 9.722222222222223e-06,
"loss": 0.7551,
"step": 500
},
{
"epoch": 4.3478260869565215,
"eval_accuracy": 0.8526773579777114,
"eval_f1_score": 0.6283738392523406,
"eval_loss": 0.6780158877372742,
"eval_precision": 0.6146556832242253,
"eval_recall": 0.6453587005137035,
"eval_runtime": 6.7179,
"eval_samples_per_second": 547.644,
"eval_steps_per_second": 8.634,
"step": 500
},
{
"epoch": 5.217391304347826,
"grad_norm": 2.9883711338043213,
"learning_rate": 9.444444444444445e-06,
"loss": 0.6636,
"step": 600
},
{
"epoch": 5.217391304347826,
"eval_accuracy": 0.8684425115520522,
"eval_f1_score": 0.6832828988792853,
"eval_loss": 0.6152337193489075,
"eval_precision": 0.7626363529752824,
"eval_recall": 0.678494572728881,
"eval_runtime": 6.6923,
"eval_samples_per_second": 549.738,
"eval_steps_per_second": 8.667,
"step": 600
},
{
"epoch": 6.086956521739131,
"grad_norm": 4.749546051025391,
"learning_rate": 9.166666666666666e-06,
"loss": 0.5767,
"step": 700
},
{
"epoch": 6.086956521739131,
"eval_accuracy": 0.8929056808915466,
"eval_f1_score": 0.788416346435481,
"eval_loss": 0.5486906170845032,
"eval_precision": 0.8967939035531012,
"eval_recall": 0.7698370330953708,
"eval_runtime": 6.716,
"eval_samples_per_second": 547.799,
"eval_steps_per_second": 8.636,
"step": 700
},
{
"epoch": 6.956521739130435,
"grad_norm": 3.5176820755004883,
"learning_rate": 8.888888888888888e-06,
"loss": 0.5059,
"step": 800
},
{
"epoch": 6.956521739130435,
"eval_accuracy": 0.8986137537374287,
"eval_f1_score": 0.866530151951532,
"eval_loss": 0.526166558265686,
"eval_precision": 0.8880275046999001,
"eval_recall": 0.8533816092377208,
"eval_runtime": 6.7032,
"eval_samples_per_second": 548.839,
"eval_steps_per_second": 8.653,
"step": 800
},
{
"epoch": 7.826086956521739,
"grad_norm": 5.969176292419434,
"learning_rate": 8.611111111111112e-06,
"loss": 0.4512,
"step": 900
},
{
"epoch": 7.826086956521739,
"eval_accuracy": 0.9195433541723295,
"eval_f1_score": 0.9002421953779064,
"eval_loss": 0.48821330070495605,
"eval_precision": 0.8927762999261393,
"eval_recall": 0.9082191267746272,
"eval_runtime": 6.6813,
"eval_samples_per_second": 550.644,
"eval_steps_per_second": 8.681,
"step": 900
},
{
"epoch": 8.695652173913043,
"grad_norm": 4.83783483505249,
"learning_rate": 8.333333333333334e-06,
"loss": 0.4098,
"step": 1000
},
{
"epoch": 8.695652173913043,
"eval_accuracy": 0.9211742321282957,
"eval_f1_score": 0.9111062555604621,
"eval_loss": 0.4828358292579651,
"eval_precision": 0.9182989102010178,
"eval_recall": 0.9060960014608563,
"eval_runtime": 6.6852,
"eval_samples_per_second": 550.323,
"eval_steps_per_second": 8.676,
"step": 1000
},
{
"epoch": 9.565217391304348,
"grad_norm": 4.399734973907471,
"learning_rate": 8.055555555555557e-06,
"loss": 0.3916,
"step": 1100
},
{
"epoch": 9.565217391304348,
"eval_accuracy": 0.927969556944822,
"eval_f1_score": 0.9192607581713574,
"eval_loss": 0.46853822469711304,
"eval_precision": 0.9254336746301863,
"eval_recall": 0.9140372115171418,
"eval_runtime": 6.7479,
"eval_samples_per_second": 545.206,
"eval_steps_per_second": 8.595,
"step": 1100
},
{
"epoch": 10.434782608695652,
"grad_norm": 5.233398914337158,
"learning_rate": 7.77777777777778e-06,
"loss": 0.373,
"step": 1200
},
{
"epoch": 10.434782608695652,
"eval_accuracy": 0.9238923620549062,
"eval_f1_score": 0.9145336176845754,
"eval_loss": 0.4755556881427765,
"eval_precision": 0.9100056761034006,
"eval_recall": 0.9210482679945721,
"eval_runtime": 6.7342,
"eval_samples_per_second": 546.317,
"eval_steps_per_second": 8.613,
"step": 1200
},
{
"epoch": 11.304347826086957,
"grad_norm": 5.472758769989014,
"learning_rate": 7.500000000000001e-06,
"loss": 0.3592,
"step": 1300
},
{
"epoch": 11.304347826086957,
"eval_accuracy": 0.9317749388420766,
"eval_f1_score": 0.9229649364321404,
"eval_loss": 0.45966240763664246,
"eval_precision": 0.9263012712773211,
"eval_recall": 0.9203258950418504,
"eval_runtime": 6.7847,
"eval_samples_per_second": 542.251,
"eval_steps_per_second": 8.549,
"step": 1300
},
{
"epoch": 12.173913043478262,
"grad_norm": 5.35235595703125,
"learning_rate": 7.222222222222223e-06,
"loss": 0.3377,
"step": 1400
},
{
"epoch": 12.173913043478262,
"eval_accuracy": 0.9304158738787714,
"eval_f1_score": 0.9181369027515391,
"eval_loss": 0.4691704213619232,
"eval_precision": 0.9174925655697546,
"eval_recall": 0.9197889600383383,
"eval_runtime": 6.7123,
"eval_samples_per_second": 548.096,
"eval_steps_per_second": 8.641,
"step": 1400
},
{
"epoch": 13.043478260869565,
"grad_norm": 4.6181230545043945,
"learning_rate": 6.944444444444445e-06,
"loss": 0.3299,
"step": 1500
},
{
"epoch": 13.043478260869565,
"eval_accuracy": 0.9328621908127208,
"eval_f1_score": 0.9244416019330937,
"eval_loss": 0.46716630458831787,
"eval_precision": 0.9291821693223221,
"eval_recall": 0.9215636094215414,
"eval_runtime": 6.6884,
"eval_samples_per_second": 550.057,
"eval_steps_per_second": 8.672,
"step": 1500
},
{
"epoch": 13.91304347826087,
"grad_norm": 8.43385124206543,
"learning_rate": 6.666666666666667e-06,
"loss": 0.3198,
"step": 1600
},
{
"epoch": 13.91304347826087,
"eval_accuracy": 0.9331340038053819,
"eval_f1_score": 0.9241282766973115,
"eval_loss": 0.4618851840496063,
"eval_precision": 0.926426704429738,
"eval_recall": 0.9225031003334492,
"eval_runtime": 6.6952,
"eval_samples_per_second": 549.5,
"eval_steps_per_second": 8.663,
"step": 1600
},
{
"epoch": 14.782608695652174,
"grad_norm": 8.825912475585938,
"learning_rate": 6.3888888888888885e-06,
"loss": 0.3121,
"step": 1700
},
{
"epoch": 14.782608695652174,
"eval_accuracy": 0.9331340038053819,
"eval_f1_score": 0.9243317785632609,
"eval_loss": 0.46724241971969604,
"eval_precision": 0.9249039681497474,
"eval_recall": 0.9245288905066229,
"eval_runtime": 6.8264,
"eval_samples_per_second": 538.934,
"eval_steps_per_second": 8.496,
"step": 1700
},
{
"epoch": 15.652173913043478,
"grad_norm": 4.476284027099609,
"learning_rate": 6.111111111111112e-06,
"loss": 0.3053,
"step": 1800
},
{
"epoch": 15.652173913043478,
"eval_accuracy": 0.9344930687686871,
"eval_f1_score": 0.9216180971737599,
"eval_loss": 0.46642911434173584,
"eval_precision": 0.9166826151640262,
"eval_recall": 0.9271868922165902,
"eval_runtime": 6.7491,
"eval_samples_per_second": 545.113,
"eval_steps_per_second": 8.594,
"step": 1800
},
{
"epoch": 16.52173913043478,
"grad_norm": 1.7301744222640991,
"learning_rate": 5.833333333333334e-06,
"loss": 0.3058,
"step": 1900
},
{
"epoch": 16.52173913043478,
"eval_accuracy": 0.9331340038053819,
"eval_f1_score": 0.9228827765631413,
"eval_loss": 0.46549805998802185,
"eval_precision": 0.9240164131101741,
"eval_recall": 0.9221056672944972,
"eval_runtime": 6.7346,
"eval_samples_per_second": 546.286,
"eval_steps_per_second": 8.612,
"step": 1900
},
{
"epoch": 17.391304347826086,
"grad_norm": 7.452052116394043,
"learning_rate": 5.555555555555557e-06,
"loss": 0.2976,
"step": 2000
},
{
"epoch": 17.391304347826086,
"eval_accuracy": 0.9355803207393314,
"eval_f1_score": 0.9258585265558317,
"eval_loss": 0.4619200825691223,
"eval_precision": 0.9298877666967595,
"eval_recall": 0.9220516121183885,
"eval_runtime": 6.6864,
"eval_samples_per_second": 550.217,
"eval_steps_per_second": 8.674,
"step": 2000
},
{
"epoch": 18.26086956521739,
"grad_norm": 2.030193567276001,
"learning_rate": 5.2777777777777785e-06,
"loss": 0.2975,
"step": 2100
},
{
"epoch": 18.26086956521739,
"eval_accuracy": 0.9342212557760261,
"eval_f1_score": 0.9254667743916674,
"eval_loss": 0.4662647545337677,
"eval_precision": 0.9267335070457062,
"eval_recall": 0.9247905559616514,
"eval_runtime": 6.6885,
"eval_samples_per_second": 550.05,
"eval_steps_per_second": 8.672,
"step": 2100
},
{
"epoch": 19.130434782608695,
"grad_norm": 5.05163049697876,
"learning_rate": 5e-06,
"loss": 0.2872,
"step": 2200
},
{
"epoch": 19.130434782608695,
"eval_accuracy": 0.9344930687686871,
"eval_f1_score": 0.923695179464076,
"eval_loss": 0.47371503710746765,
"eval_precision": 0.928532382805131,
"eval_recall": 0.9194008334880648,
"eval_runtime": 6.6854,
"eval_samples_per_second": 550.301,
"eval_steps_per_second": 8.676,
"step": 2200
},
{
"epoch": 20.0,
"grad_norm": 6.272797584533691,
"learning_rate": 4.722222222222222e-06,
"loss": 0.2879,
"step": 2300
},
{
"epoch": 20.0,
"eval_accuracy": 0.9317749388420766,
"eval_f1_score": 0.9201329128675652,
"eval_loss": 0.47988325357437134,
"eval_precision": 0.9115622601790818,
"eval_recall": 0.9295115231425782,
"eval_runtime": 6.7353,
"eval_samples_per_second": 546.224,
"eval_steps_per_second": 8.611,
"step": 2300
},
{
"epoch": 20.869565217391305,
"grad_norm": 5.919195175170898,
"learning_rate": 4.444444444444444e-06,
"loss": 0.2848,
"step": 2400
},
{
"epoch": 20.869565217391305,
"eval_accuracy": 0.9325903778200598,
"eval_f1_score": 0.9194393410487441,
"eval_loss": 0.48427507281303406,
"eval_precision": 0.9091962837604621,
"eval_recall": 0.9309372715371734,
"eval_runtime": 6.6852,
"eval_samples_per_second": 550.322,
"eval_steps_per_second": 8.676,
"step": 2400
},
{
"epoch": 21.73913043478261,
"grad_norm": 3.268333673477173,
"learning_rate": 4.166666666666667e-06,
"loss": 0.2808,
"step": 2500
},
{
"epoch": 21.73913043478261,
"eval_accuracy": 0.9325903778200598,
"eval_f1_score": 0.9242720840575954,
"eval_loss": 0.48389649391174316,
"eval_precision": 0.925898105451882,
"eval_recall": 0.9236990043362497,
"eval_runtime": 6.6806,
"eval_samples_per_second": 550.7,
"eval_steps_per_second": 8.682,
"step": 2500
},
{
"epoch": 22.608695652173914,
"grad_norm": 3.7583141326904297,
"learning_rate": 3.88888888888889e-06,
"loss": 0.2798,
"step": 2600
},
{
"epoch": 22.608695652173914,
"eval_accuracy": 0.9342212557760261,
"eval_f1_score": 0.9240298173904554,
"eval_loss": 0.4839774966239929,
"eval_precision": 0.9196521435699385,
"eval_recall": 0.9288845586975052,
"eval_runtime": 6.7723,
"eval_samples_per_second": 543.245,
"eval_steps_per_second": 8.564,
"step": 2600
},
{
"epoch": 23.47826086956522,
"grad_norm": 5.6299052238464355,
"learning_rate": 3.6111111111111115e-06,
"loss": 0.2797,
"step": 2700
},
{
"epoch": 23.47826086956522,
"eval_accuracy": 0.9334058167980429,
"eval_f1_score": 0.922297295666424,
"eval_loss": 0.4770027697086334,
"eval_precision": 0.9202906698861152,
"eval_recall": 0.9245720434908796,
"eval_runtime": 6.7079,
"eval_samples_per_second": 548.456,
"eval_steps_per_second": 8.646,
"step": 2700
},
{
"epoch": 24.347826086956523,
"grad_norm": 11.368356704711914,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.2754,
"step": 2800
},
{
"epoch": 24.347826086956523,
"eval_accuracy": 0.9317749388420766,
"eval_f1_score": 0.9225259593995536,
"eval_loss": 0.4862979054450989,
"eval_precision": 0.9212011606593585,
"eval_recall": 0.9252089494748691,
"eval_runtime": 6.8104,
"eval_samples_per_second": 540.2,
"eval_steps_per_second": 8.516,
"step": 2800
},
{
"epoch": 25.217391304347824,
"grad_norm": 0.14479239284992218,
"learning_rate": 3.055555555555556e-06,
"loss": 0.2752,
"step": 2900
},
{
"epoch": 25.217391304347824,
"eval_accuracy": 0.9325903778200598,
"eval_f1_score": 0.9242984205440742,
"eval_loss": 0.48786690831184387,
"eval_precision": 0.9237836539478133,
"eval_recall": 0.9258615724988001,
"eval_runtime": 6.7704,
"eval_samples_per_second": 543.395,
"eval_steps_per_second": 8.567,
"step": 2900
},
{
"epoch": 26.08695652173913,
"grad_norm": 7.544506072998047,
"learning_rate": 2.7777777777777783e-06,
"loss": 0.2718,
"step": 3000
},
{
"epoch": 26.08695652173913,
"eval_accuracy": 0.9361239467246535,
"eval_f1_score": 0.9270177056615392,
"eval_loss": 0.47883340716362,
"eval_precision": 0.9300585908606723,
"eval_recall": 0.924359200098848,
"eval_runtime": 6.7784,
"eval_samples_per_second": 542.751,
"eval_steps_per_second": 8.557,
"step": 3000
},
{
"epoch": 26.956521739130434,
"grad_norm": 1.55753493309021,
"learning_rate": 2.5e-06,
"loss": 0.2712,
"step": 3100
},
{
"epoch": 26.956521739130434,
"eval_accuracy": 0.9355803207393314,
"eval_f1_score": 0.9252996225270083,
"eval_loss": 0.47663480043411255,
"eval_precision": 0.9273088499395005,
"eval_recall": 0.9236556967474857,
"eval_runtime": 6.7007,
"eval_samples_per_second": 549.049,
"eval_steps_per_second": 8.656,
"step": 3100
},
{
"epoch": 27.82608695652174,
"grad_norm": 1.633718729019165,
"learning_rate": 2.222222222222222e-06,
"loss": 0.2714,
"step": 3200
},
{
"epoch": 27.82608695652174,
"eval_accuracy": 0.9382984506659419,
"eval_f1_score": 0.9288338932978429,
"eval_loss": 0.47798144817352295,
"eval_precision": 0.9284697321489548,
"eval_recall": 0.929401062619975,
"eval_runtime": 6.6895,
"eval_samples_per_second": 549.965,
"eval_steps_per_second": 8.67,
"step": 3200
},
{
"epoch": 28.695652173913043,
"grad_norm": 0.5183067917823792,
"learning_rate": 1.944444444444445e-06,
"loss": 0.2697,
"step": 3300
},
{
"epoch": 28.695652173913043,
"eval_accuracy": 0.9366675727099756,
"eval_f1_score": 0.9263019689053353,
"eval_loss": 0.485741525888443,
"eval_precision": 0.9242977411952608,
"eval_recall": 0.9286175955534596,
"eval_runtime": 6.7616,
"eval_samples_per_second": 544.104,
"eval_steps_per_second": 8.578,
"step": 3300
},
{
"epoch": 29.565217391304348,
"grad_norm": 4.411154747009277,
"learning_rate": 1.6666666666666667e-06,
"loss": 0.2674,
"step": 3400
},
{
"epoch": 29.565217391304348,
"eval_accuracy": 0.9347648817613482,
"eval_f1_score": 0.9235274952764979,
"eval_loss": 0.48756158351898193,
"eval_precision": 0.9173551034787693,
"eval_recall": 0.9303550035125531,
"eval_runtime": 6.831,
"eval_samples_per_second": 538.573,
"eval_steps_per_second": 8.491,
"step": 3400
},
{
"epoch": 30.434782608695652,
"grad_norm": 9.261029243469238,
"learning_rate": 1.3888888888888892e-06,
"loss": 0.2681,
"step": 3500
},
{
"epoch": 30.434782608695652,
"eval_accuracy": 0.9361239467246535,
"eval_f1_score": 0.9262356484286315,
"eval_loss": 0.486868292093277,
"eval_precision": 0.9184420290722909,
"eval_recall": 0.934775981488375,
"eval_runtime": 6.6731,
"eval_samples_per_second": 551.315,
"eval_steps_per_second": 8.692,
"step": 3500
},
{
"epoch": 31.304347826086957,
"grad_norm": 0.14830726385116577,
"learning_rate": 1.111111111111111e-06,
"loss": 0.2685,
"step": 3600
},
{
"epoch": 31.304347826086957,
"eval_accuracy": 0.933949442783365,
"eval_f1_score": 0.9240517281456259,
"eval_loss": 0.4930637776851654,
"eval_precision": 0.9212029440977482,
"eval_recall": 0.9279123078001273,
"eval_runtime": 6.6837,
"eval_samples_per_second": 550.442,
"eval_steps_per_second": 8.678,
"step": 3600
},
{
"epoch": 32.17391304347826,
"grad_norm": 2.589641571044922,
"learning_rate": 8.333333333333333e-07,
"loss": 0.2665,
"step": 3700
},
{
"epoch": 32.17391304347826,
"eval_accuracy": 0.933949442783365,
"eval_f1_score": 0.9234423513703757,
"eval_loss": 0.48508113622665405,
"eval_precision": 0.9211049540814888,
"eval_recall": 0.9261949377668691,
"eval_runtime": 6.7146,
"eval_samples_per_second": 547.914,
"eval_steps_per_second": 8.638,
"step": 3700
},
{
"epoch": 33.04347826086956,
"grad_norm": 0.28960728645324707,
"learning_rate": 5.555555555555555e-07,
"loss": 0.2703,
"step": 3800
},
{
"epoch": 33.04347826086956,
"eval_accuracy": 0.9366675727099756,
"eval_f1_score": 0.9263225046285886,
"eval_loss": 0.4864026606082916,
"eval_precision": 0.9226325281435318,
"eval_recall": 0.9303985110853679,
"eval_runtime": 6.7275,
"eval_samples_per_second": 546.86,
"eval_steps_per_second": 8.621,
"step": 3800
},
{
"epoch": 33.91304347826087,
"grad_norm": 0.5719828009605408,
"learning_rate": 2.7777777777777776e-07,
"loss": 0.2661,
"step": 3900
},
{
"epoch": 33.91304347826087,
"eval_accuracy": 0.9363957597173145,
"eval_f1_score": 0.9271367551931604,
"eval_loss": 0.484861820936203,
"eval_precision": 0.9227424858610155,
"eval_recall": 0.9318596341609905,
"eval_runtime": 6.7272,
"eval_samples_per_second": 546.887,
"eval_steps_per_second": 8.622,
"step": 3900
},
{
"epoch": 34.78260869565217,
"grad_norm": 4.2173051834106445,
"learning_rate": 0.0,
"loss": 0.2695,
"step": 4000
},
{
"epoch": 34.78260869565217,
"eval_accuracy": 0.9361239467246535,
"eval_f1_score": 0.9269315744830021,
"eval_loss": 0.48626089096069336,
"eval_precision": 0.9223039042139164,
"eval_recall": 0.9319526344370851,
"eval_runtime": 6.7602,
"eval_samples_per_second": 544.211,
"eval_steps_per_second": 8.58,
"step": 4000
},
{
"epoch": 34.78260869565217,
"step": 4000,
"total_flos": 8772403331841000.0,
"train_loss": 0.4447998676300049,
"train_runtime": 3679.271,
"train_samples_per_second": 139.158,
"train_steps_per_second": 1.087
}
],
"logging_steps": 100,
"max_steps": 4000,
"num_input_tokens_seen": 0,
"num_train_epochs": 35,
"save_steps": 100,
"total_flos": 8772403331841000.0,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}