tiennguyenbnbk's picture
End of training
84402d3 verified
raw
history blame
17.5 kB
{
"best_metric": 0.9261695691084951,
"best_model_checkpoint": "cls_comment-phobert-base-v2-v3.2.1/checkpoint-2000",
"epoch": 26.08695652173913,
"eval_steps": 100,
"global_step": 3000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.8695652173913043,
"grad_norm": 2.4986989498138428,
"learning_rate": 2.5e-06,
"loss": 1.8947,
"step": 100
},
{
"epoch": 0.8695652173913043,
"eval_accuracy": 0.4001087251970644,
"eval_f1_score": 0.08320411950694513,
"eval_loss": 1.68748140335083,
"eval_precision": 0.14637409036074248,
"eval_recall": 0.14367816091954025,
"eval_runtime": 6.6876,
"eval_samples_per_second": 550.125,
"eval_steps_per_second": 8.673,
"step": 100
},
{
"epoch": 1.7391304347826086,
"grad_norm": 1.596021294593811,
"learning_rate": 5e-06,
"loss": 1.5395,
"step": 200
},
{
"epoch": 1.7391304347826086,
"eval_accuracy": 0.5849415602065778,
"eval_f1_score": 0.2355807809182458,
"eval_loss": 1.2897096872329712,
"eval_precision": 0.27516139357553443,
"eval_recall": 0.26320585050663253,
"eval_runtime": 6.7112,
"eval_samples_per_second": 548.189,
"eval_steps_per_second": 8.642,
"step": 200
},
{
"epoch": 2.608695652173913,
"grad_norm": 5.161496162414551,
"learning_rate": 7.500000000000001e-06,
"loss": 1.1205,
"step": 300
},
{
"epoch": 2.608695652173913,
"eval_accuracy": 0.7999456374014677,
"eval_f1_score": 0.5833178086765388,
"eval_loss": 0.8468331098556519,
"eval_precision": 0.5889764394952819,
"eval_recall": 0.5810488238671468,
"eval_runtime": 6.6686,
"eval_samples_per_second": 551.694,
"eval_steps_per_second": 8.698,
"step": 300
},
{
"epoch": 3.4782608695652173,
"grad_norm": 5.227330207824707,
"learning_rate": 1e-05,
"loss": 0.82,
"step": 400
},
{
"epoch": 3.4782608695652173,
"eval_accuracy": 0.8369122044033704,
"eval_f1_score": 0.6179371343772609,
"eval_loss": 0.6537477374076843,
"eval_precision": 0.6062100200393906,
"eval_recall": 0.6355302315827523,
"eval_runtime": 6.728,
"eval_samples_per_second": 546.823,
"eval_steps_per_second": 8.621,
"step": 400
},
{
"epoch": 4.3478260869565215,
"grad_norm": 5.6816534996032715,
"learning_rate": 9.722222222222223e-06,
"loss": 0.6232,
"step": 500
},
{
"epoch": 4.3478260869565215,
"eval_accuracy": 0.8537646099483556,
"eval_f1_score": 0.633743239294036,
"eval_loss": 0.537100613117218,
"eval_precision": 0.7525070200257705,
"eval_recall": 0.6518017678843925,
"eval_runtime": 6.7932,
"eval_samples_per_second": 541.573,
"eval_steps_per_second": 8.538,
"step": 500
},
{
"epoch": 5.217391304347826,
"grad_norm": 5.096814155578613,
"learning_rate": 9.444444444444445e-06,
"loss": 0.5148,
"step": 600
},
{
"epoch": 5.217391304347826,
"eval_accuracy": 0.872791519434629,
"eval_f1_score": 0.7299293979398146,
"eval_loss": 0.46505650877952576,
"eval_precision": 0.7548552896750885,
"eval_recall": 0.7210618976649555,
"eval_runtime": 6.7028,
"eval_samples_per_second": 548.875,
"eval_steps_per_second": 8.653,
"step": 600
},
{
"epoch": 6.086956521739131,
"grad_norm": 5.458530902862549,
"learning_rate": 9.166666666666666e-06,
"loss": 0.4204,
"step": 700
},
{
"epoch": 6.086956521739131,
"eval_accuracy": 0.8869257950530035,
"eval_f1_score": 0.7654329783869755,
"eval_loss": 0.40097591280937195,
"eval_precision": 0.8914471413846636,
"eval_recall": 0.7712133932759179,
"eval_runtime": 6.7443,
"eval_samples_per_second": 545.497,
"eval_steps_per_second": 8.6,
"step": 700
},
{
"epoch": 6.956521739130435,
"grad_norm": 6.144416809082031,
"learning_rate": 8.888888888888888e-06,
"loss": 0.3421,
"step": 800
},
{
"epoch": 6.956521739130435,
"eval_accuracy": 0.9051372655612938,
"eval_f1_score": 0.8713582894968701,
"eval_loss": 0.3648029565811157,
"eval_precision": 0.8940734807154502,
"eval_recall": 0.8588405388993653,
"eval_runtime": 6.7352,
"eval_samples_per_second": 546.232,
"eval_steps_per_second": 8.611,
"step": 800
},
{
"epoch": 7.826086956521739,
"grad_norm": 9.907292366027832,
"learning_rate": 8.611111111111112e-06,
"loss": 0.2841,
"step": 900
},
{
"epoch": 7.826086956521739,
"eval_accuracy": 0.9181842892090242,
"eval_f1_score": 0.9006880118200489,
"eval_loss": 0.3239505887031555,
"eval_precision": 0.8978148514278343,
"eval_recall": 0.9038252102525616,
"eval_runtime": 6.7315,
"eval_samples_per_second": 546.532,
"eval_steps_per_second": 8.616,
"step": 900
},
{
"epoch": 8.695652173913043,
"grad_norm": 6.941843032836914,
"learning_rate": 8.333333333333334e-06,
"loss": 0.2319,
"step": 1000
},
{
"epoch": 8.695652173913043,
"eval_accuracy": 0.9203587931503125,
"eval_f1_score": 0.9060754755748909,
"eval_loss": 0.3025033473968506,
"eval_precision": 0.9175362378163865,
"eval_recall": 0.8975903509513042,
"eval_runtime": 6.6908,
"eval_samples_per_second": 549.861,
"eval_steps_per_second": 8.669,
"step": 1000
},
{
"epoch": 9.565217391304348,
"grad_norm": 4.255012035369873,
"learning_rate": 8.055555555555557e-06,
"loss": 0.205,
"step": 1100
},
{
"epoch": 9.565217391304348,
"eval_accuracy": 0.9209024191356346,
"eval_f1_score": 0.9098640550303895,
"eval_loss": 0.29862046241760254,
"eval_precision": 0.9123097696068861,
"eval_recall": 0.9086287269577242,
"eval_runtime": 6.7134,
"eval_samples_per_second": 548.01,
"eval_steps_per_second": 8.639,
"step": 1100
},
{
"epoch": 10.434782608695652,
"grad_norm": 5.848569393157959,
"learning_rate": 7.77777777777778e-06,
"loss": 0.1783,
"step": 1200
},
{
"epoch": 10.434782608695652,
"eval_accuracy": 0.9206306061429737,
"eval_f1_score": 0.9104384776037051,
"eval_loss": 0.3047122657299042,
"eval_precision": 0.9024848857165658,
"eval_recall": 0.9207396220750284,
"eval_runtime": 6.6561,
"eval_samples_per_second": 552.726,
"eval_steps_per_second": 8.714,
"step": 1200
},
{
"epoch": 11.304347826086957,
"grad_norm": 7.340043544769287,
"learning_rate": 7.500000000000001e-06,
"loss": 0.1587,
"step": 1300
},
{
"epoch": 11.304347826086957,
"eval_accuracy": 0.9296004349007883,
"eval_f1_score": 0.9202832724978299,
"eval_loss": 0.2757803201675415,
"eval_precision": 0.9233347498988893,
"eval_recall": 0.917658614989255,
"eval_runtime": 6.6787,
"eval_samples_per_second": 550.859,
"eval_steps_per_second": 8.684,
"step": 1300
},
{
"epoch": 12.173913043478262,
"grad_norm": 5.315700054168701,
"learning_rate": 7.222222222222223e-06,
"loss": 0.1286,
"step": 1400
},
{
"epoch": 12.173913043478262,
"eval_accuracy": 0.9266104919815167,
"eval_f1_score": 0.9144278995332229,
"eval_loss": 0.29267847537994385,
"eval_precision": 0.9100638576136009,
"eval_recall": 0.9198715425139269,
"eval_runtime": 6.7676,
"eval_samples_per_second": 543.619,
"eval_steps_per_second": 8.57,
"step": 1400
},
{
"epoch": 13.043478260869565,
"grad_norm": 5.173799514770508,
"learning_rate": 6.944444444444445e-06,
"loss": 0.1221,
"step": 1500
},
{
"epoch": 13.043478260869565,
"eval_accuracy": 0.9317749388420766,
"eval_f1_score": 0.9245023460604546,
"eval_loss": 0.28211963176727295,
"eval_precision": 0.9309417300478454,
"eval_recall": 0.9193579289135766,
"eval_runtime": 6.7091,
"eval_samples_per_second": 548.359,
"eval_steps_per_second": 8.645,
"step": 1500
},
{
"epoch": 13.91304347826087,
"grad_norm": 8.639619827270508,
"learning_rate": 6.666666666666667e-06,
"loss": 0.1087,
"step": 1600
},
{
"epoch": 13.91304347826087,
"eval_accuracy": 0.9293286219081273,
"eval_f1_score": 0.9159607873769989,
"eval_loss": 0.27890825271606445,
"eval_precision": 0.9090390134661626,
"eval_recall": 0.9236924050215896,
"eval_runtime": 6.7017,
"eval_samples_per_second": 548.966,
"eval_steps_per_second": 8.655,
"step": 1600
},
{
"epoch": 14.782608695652174,
"grad_norm": 6.424872398376465,
"learning_rate": 6.3888888888888885e-06,
"loss": 0.0982,
"step": 1700
},
{
"epoch": 14.782608695652174,
"eval_accuracy": 0.9290568089154662,
"eval_f1_score": 0.9196461825352048,
"eval_loss": 0.2833573520183563,
"eval_precision": 0.9187836187318232,
"eval_recall": 0.9213402050339831,
"eval_runtime": 6.7096,
"eval_samples_per_second": 548.32,
"eval_steps_per_second": 8.644,
"step": 1700
},
{
"epoch": 15.652173913043478,
"grad_norm": 4.618613243103027,
"learning_rate": 6.111111111111112e-06,
"loss": 0.089,
"step": 1800
},
{
"epoch": 15.652173913043478,
"eval_accuracy": 0.9298722478934494,
"eval_f1_score": 0.9202166850732406,
"eval_loss": 0.28280356526374817,
"eval_precision": 0.9151663252588741,
"eval_recall": 0.9260674008256092,
"eval_runtime": 6.7345,
"eval_samples_per_second": 546.292,
"eval_steps_per_second": 8.612,
"step": 1800
},
{
"epoch": 16.52173913043478,
"grad_norm": 1.9568698406219482,
"learning_rate": 5.833333333333334e-06,
"loss": 0.0795,
"step": 1900
},
{
"epoch": 16.52173913043478,
"eval_accuracy": 0.9331340038053819,
"eval_f1_score": 0.9244095368032713,
"eval_loss": 0.273701936006546,
"eval_precision": 0.925343846727414,
"eval_recall": 0.9238732382441093,
"eval_runtime": 6.7425,
"eval_samples_per_second": 545.641,
"eval_steps_per_second": 8.602,
"step": 1900
},
{
"epoch": 17.391304347826086,
"grad_norm": 2.161759614944458,
"learning_rate": 5.555555555555557e-06,
"loss": 0.0684,
"step": 2000
},
{
"epoch": 17.391304347826086,
"eval_accuracy": 0.9323185648273987,
"eval_f1_score": 0.9261695691084951,
"eval_loss": 0.2873239815235138,
"eval_precision": 0.9319834922740233,
"eval_recall": 0.9216726996777184,
"eval_runtime": 6.7577,
"eval_samples_per_second": 544.415,
"eval_steps_per_second": 8.583,
"step": 2000
},
{
"epoch": 18.26086956521739,
"grad_norm": 4.607916355133057,
"learning_rate": 5.2777777777777785e-06,
"loss": 0.0673,
"step": 2100
},
{
"epoch": 18.26086956521739,
"eval_accuracy": 0.9320467518347377,
"eval_f1_score": 0.925184613434992,
"eval_loss": 0.2904324531555176,
"eval_precision": 0.9332741752610002,
"eval_recall": 0.9184445089519725,
"eval_runtime": 6.7294,
"eval_samples_per_second": 546.702,
"eval_steps_per_second": 8.619,
"step": 2100
},
{
"epoch": 19.130434782608695,
"grad_norm": 6.327251434326172,
"learning_rate": 5e-06,
"loss": 0.0571,
"step": 2200
},
{
"epoch": 19.130434782608695,
"eval_accuracy": 0.9293286219081273,
"eval_f1_score": 0.9221668516434853,
"eval_loss": 0.3166205883026123,
"eval_precision": 0.925137476734381,
"eval_recall": 0.920952609526737,
"eval_runtime": 6.7037,
"eval_samples_per_second": 548.799,
"eval_steps_per_second": 8.652,
"step": 2200
},
{
"epoch": 20.0,
"grad_norm": 9.082805633544922,
"learning_rate": 4.722222222222222e-06,
"loss": 0.0561,
"step": 2300
},
{
"epoch": 20.0,
"eval_accuracy": 0.9317749388420766,
"eval_f1_score": 0.9221280725480369,
"eval_loss": 0.2922111749649048,
"eval_precision": 0.9150274852978553,
"eval_recall": 0.9297539237688469,
"eval_runtime": 6.7503,
"eval_samples_per_second": 545.017,
"eval_steps_per_second": 8.592,
"step": 2300
},
{
"epoch": 20.869565217391305,
"grad_norm": 5.283856391906738,
"learning_rate": 4.444444444444444e-06,
"loss": 0.0511,
"step": 2400
},
{
"epoch": 20.869565217391305,
"eval_accuracy": 0.9315031258494156,
"eval_f1_score": 0.9190655007648246,
"eval_loss": 0.29927295446395874,
"eval_precision": 0.9088064828335735,
"eval_recall": 0.9303236730969998,
"eval_runtime": 6.7281,
"eval_samples_per_second": 546.809,
"eval_steps_per_second": 8.621,
"step": 2400
},
{
"epoch": 21.73913043478261,
"grad_norm": 6.0074896812438965,
"learning_rate": 4.166666666666667e-06,
"loss": 0.0442,
"step": 2500
},
{
"epoch": 21.73913043478261,
"eval_accuracy": 0.9266104919815167,
"eval_f1_score": 0.9161795338292905,
"eval_loss": 0.32011494040489197,
"eval_precision": 0.9060451440252857,
"eval_recall": 0.9280493422296427,
"eval_runtime": 6.7127,
"eval_samples_per_second": 548.067,
"eval_steps_per_second": 8.64,
"step": 2500
},
{
"epoch": 22.608695652173914,
"grad_norm": 3.1078407764434814,
"learning_rate": 3.88888888888889e-06,
"loss": 0.0447,
"step": 2600
},
{
"epoch": 22.608695652173914,
"eval_accuracy": 0.928241369937483,
"eval_f1_score": 0.9137497551284842,
"eval_loss": 0.3155056834220886,
"eval_precision": 0.9009580466238951,
"eval_recall": 0.9281730038314259,
"eval_runtime": 6.7337,
"eval_samples_per_second": 546.354,
"eval_steps_per_second": 8.613,
"step": 2600
},
{
"epoch": 23.47826086956522,
"grad_norm": 2.9584195613861084,
"learning_rate": 3.6111111111111115e-06,
"loss": 0.0415,
"step": 2700
},
{
"epoch": 23.47826086956522,
"eval_accuracy": 0.9334058167980429,
"eval_f1_score": 0.9226018260362496,
"eval_loss": 0.30177852511405945,
"eval_precision": 0.9185179495480513,
"eval_recall": 0.9269833265460256,
"eval_runtime": 6.7411,
"eval_samples_per_second": 545.757,
"eval_steps_per_second": 8.604,
"step": 2700
},
{
"epoch": 24.347826086956523,
"grad_norm": 12.190321922302246,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.0359,
"step": 2800
},
{
"epoch": 24.347826086956523,
"eval_accuracy": 0.9298722478934494,
"eval_f1_score": 0.9177278989948806,
"eval_loss": 0.31918126344680786,
"eval_precision": 0.9062664068560837,
"eval_recall": 0.9308234663752396,
"eval_runtime": 6.7802,
"eval_samples_per_second": 542.606,
"eval_steps_per_second": 8.554,
"step": 2800
},
{
"epoch": 25.217391304347824,
"grad_norm": 0.2598835527896881,
"learning_rate": 3.055555555555556e-06,
"loss": 0.0369,
"step": 2900
},
{
"epoch": 25.217391304347824,
"eval_accuracy": 0.933677629790704,
"eval_f1_score": 0.9210521238209074,
"eval_loss": 0.3063570559024811,
"eval_precision": 0.9140578271273506,
"eval_recall": 0.9285610502121662,
"eval_runtime": 6.7729,
"eval_samples_per_second": 543.197,
"eval_steps_per_second": 8.564,
"step": 2900
},
{
"epoch": 26.08695652173913,
"grad_norm": 0.24433408677577972,
"learning_rate": 2.7777777777777783e-06,
"loss": 0.0296,
"step": 3000
},
{
"epoch": 26.08695652173913,
"eval_accuracy": 0.9328621908127208,
"eval_f1_score": 0.9237047805131925,
"eval_loss": 0.311038613319397,
"eval_precision": 0.9198460229141495,
"eval_recall": 0.9279424126946928,
"eval_runtime": 6.8161,
"eval_samples_per_second": 539.754,
"eval_steps_per_second": 8.509,
"step": 3000
},
{
"epoch": 26.08695652173913,
"step": 3000,
"total_flos": 6579999363349350.0,
"train_loss": 0.31640464369455973,
"train_runtime": 3238.6183,
"train_samples_per_second": 158.092,
"train_steps_per_second": 1.235
}
],
"logging_steps": 100,
"max_steps": 4000,
"num_input_tokens_seen": 0,
"num_train_epochs": 35,
"save_steps": 100,
"total_flos": 6579999363349350.0,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}