|
{ |
|
"best_metric": 0.9261695691084951, |
|
"best_model_checkpoint": "cls_comment-phobert-base-v2-v3.2.1/checkpoint-2000", |
|
"epoch": 26.08695652173913, |
|
"eval_steps": 100, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.8695652173913043, |
|
"grad_norm": 2.4986989498138428, |
|
"learning_rate": 2.5e-06, |
|
"loss": 1.8947, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8695652173913043, |
|
"eval_accuracy": 0.4001087251970644, |
|
"eval_f1_score": 0.08320411950694513, |
|
"eval_loss": 1.68748140335083, |
|
"eval_precision": 0.14637409036074248, |
|
"eval_recall": 0.14367816091954025, |
|
"eval_runtime": 6.6876, |
|
"eval_samples_per_second": 550.125, |
|
"eval_steps_per_second": 8.673, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.7391304347826086, |
|
"grad_norm": 1.596021294593811, |
|
"learning_rate": 5e-06, |
|
"loss": 1.5395, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.7391304347826086, |
|
"eval_accuracy": 0.5849415602065778, |
|
"eval_f1_score": 0.2355807809182458, |
|
"eval_loss": 1.2897096872329712, |
|
"eval_precision": 0.27516139357553443, |
|
"eval_recall": 0.26320585050663253, |
|
"eval_runtime": 6.7112, |
|
"eval_samples_per_second": 548.189, |
|
"eval_steps_per_second": 8.642, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.608695652173913, |
|
"grad_norm": 5.161496162414551, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 1.1205, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.608695652173913, |
|
"eval_accuracy": 0.7999456374014677, |
|
"eval_f1_score": 0.5833178086765388, |
|
"eval_loss": 0.8468331098556519, |
|
"eval_precision": 0.5889764394952819, |
|
"eval_recall": 0.5810488238671468, |
|
"eval_runtime": 6.6686, |
|
"eval_samples_per_second": 551.694, |
|
"eval_steps_per_second": 8.698, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.4782608695652173, |
|
"grad_norm": 5.227330207824707, |
|
"learning_rate": 1e-05, |
|
"loss": 0.82, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.4782608695652173, |
|
"eval_accuracy": 0.8369122044033704, |
|
"eval_f1_score": 0.6179371343772609, |
|
"eval_loss": 0.6537477374076843, |
|
"eval_precision": 0.6062100200393906, |
|
"eval_recall": 0.6355302315827523, |
|
"eval_runtime": 6.728, |
|
"eval_samples_per_second": 546.823, |
|
"eval_steps_per_second": 8.621, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.3478260869565215, |
|
"grad_norm": 5.6816534996032715, |
|
"learning_rate": 9.722222222222223e-06, |
|
"loss": 0.6232, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.3478260869565215, |
|
"eval_accuracy": 0.8537646099483556, |
|
"eval_f1_score": 0.633743239294036, |
|
"eval_loss": 0.537100613117218, |
|
"eval_precision": 0.7525070200257705, |
|
"eval_recall": 0.6518017678843925, |
|
"eval_runtime": 6.7932, |
|
"eval_samples_per_second": 541.573, |
|
"eval_steps_per_second": 8.538, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.217391304347826, |
|
"grad_norm": 5.096814155578613, |
|
"learning_rate": 9.444444444444445e-06, |
|
"loss": 0.5148, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.217391304347826, |
|
"eval_accuracy": 0.872791519434629, |
|
"eval_f1_score": 0.7299293979398146, |
|
"eval_loss": 0.46505650877952576, |
|
"eval_precision": 0.7548552896750885, |
|
"eval_recall": 0.7210618976649555, |
|
"eval_runtime": 6.7028, |
|
"eval_samples_per_second": 548.875, |
|
"eval_steps_per_second": 8.653, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.086956521739131, |
|
"grad_norm": 5.458530902862549, |
|
"learning_rate": 9.166666666666666e-06, |
|
"loss": 0.4204, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.086956521739131, |
|
"eval_accuracy": 0.8869257950530035, |
|
"eval_f1_score": 0.7654329783869755, |
|
"eval_loss": 0.40097591280937195, |
|
"eval_precision": 0.8914471413846636, |
|
"eval_recall": 0.7712133932759179, |
|
"eval_runtime": 6.7443, |
|
"eval_samples_per_second": 545.497, |
|
"eval_steps_per_second": 8.6, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.956521739130435, |
|
"grad_norm": 6.144416809082031, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.3421, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.956521739130435, |
|
"eval_accuracy": 0.9051372655612938, |
|
"eval_f1_score": 0.8713582894968701, |
|
"eval_loss": 0.3648029565811157, |
|
"eval_precision": 0.8940734807154502, |
|
"eval_recall": 0.8588405388993653, |
|
"eval_runtime": 6.7352, |
|
"eval_samples_per_second": 546.232, |
|
"eval_steps_per_second": 8.611, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.826086956521739, |
|
"grad_norm": 9.907292366027832, |
|
"learning_rate": 8.611111111111112e-06, |
|
"loss": 0.2841, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 7.826086956521739, |
|
"eval_accuracy": 0.9181842892090242, |
|
"eval_f1_score": 0.9006880118200489, |
|
"eval_loss": 0.3239505887031555, |
|
"eval_precision": 0.8978148514278343, |
|
"eval_recall": 0.9038252102525616, |
|
"eval_runtime": 6.7315, |
|
"eval_samples_per_second": 546.532, |
|
"eval_steps_per_second": 8.616, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 8.695652173913043, |
|
"grad_norm": 6.941843032836914, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.2319, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.695652173913043, |
|
"eval_accuracy": 0.9203587931503125, |
|
"eval_f1_score": 0.9060754755748909, |
|
"eval_loss": 0.3025033473968506, |
|
"eval_precision": 0.9175362378163865, |
|
"eval_recall": 0.8975903509513042, |
|
"eval_runtime": 6.6908, |
|
"eval_samples_per_second": 549.861, |
|
"eval_steps_per_second": 8.669, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.565217391304348, |
|
"grad_norm": 4.255012035369873, |
|
"learning_rate": 8.055555555555557e-06, |
|
"loss": 0.205, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 9.565217391304348, |
|
"eval_accuracy": 0.9209024191356346, |
|
"eval_f1_score": 0.9098640550303895, |
|
"eval_loss": 0.29862046241760254, |
|
"eval_precision": 0.9123097696068861, |
|
"eval_recall": 0.9086287269577242, |
|
"eval_runtime": 6.7134, |
|
"eval_samples_per_second": 548.01, |
|
"eval_steps_per_second": 8.639, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 10.434782608695652, |
|
"grad_norm": 5.848569393157959, |
|
"learning_rate": 7.77777777777778e-06, |
|
"loss": 0.1783, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 10.434782608695652, |
|
"eval_accuracy": 0.9206306061429737, |
|
"eval_f1_score": 0.9104384776037051, |
|
"eval_loss": 0.3047122657299042, |
|
"eval_precision": 0.9024848857165658, |
|
"eval_recall": 0.9207396220750284, |
|
"eval_runtime": 6.6561, |
|
"eval_samples_per_second": 552.726, |
|
"eval_steps_per_second": 8.714, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 11.304347826086957, |
|
"grad_norm": 7.340043544769287, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.1587, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 11.304347826086957, |
|
"eval_accuracy": 0.9296004349007883, |
|
"eval_f1_score": 0.9202832724978299, |
|
"eval_loss": 0.2757803201675415, |
|
"eval_precision": 0.9233347498988893, |
|
"eval_recall": 0.917658614989255, |
|
"eval_runtime": 6.6787, |
|
"eval_samples_per_second": 550.859, |
|
"eval_steps_per_second": 8.684, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 12.173913043478262, |
|
"grad_norm": 5.315700054168701, |
|
"learning_rate": 7.222222222222223e-06, |
|
"loss": 0.1286, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 12.173913043478262, |
|
"eval_accuracy": 0.9266104919815167, |
|
"eval_f1_score": 0.9144278995332229, |
|
"eval_loss": 0.29267847537994385, |
|
"eval_precision": 0.9100638576136009, |
|
"eval_recall": 0.9198715425139269, |
|
"eval_runtime": 6.7676, |
|
"eval_samples_per_second": 543.619, |
|
"eval_steps_per_second": 8.57, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 13.043478260869565, |
|
"grad_norm": 5.173799514770508, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.1221, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 13.043478260869565, |
|
"eval_accuracy": 0.9317749388420766, |
|
"eval_f1_score": 0.9245023460604546, |
|
"eval_loss": 0.28211963176727295, |
|
"eval_precision": 0.9309417300478454, |
|
"eval_recall": 0.9193579289135766, |
|
"eval_runtime": 6.7091, |
|
"eval_samples_per_second": 548.359, |
|
"eval_steps_per_second": 8.645, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 13.91304347826087, |
|
"grad_norm": 8.639619827270508, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.1087, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 13.91304347826087, |
|
"eval_accuracy": 0.9293286219081273, |
|
"eval_f1_score": 0.9159607873769989, |
|
"eval_loss": 0.27890825271606445, |
|
"eval_precision": 0.9090390134661626, |
|
"eval_recall": 0.9236924050215896, |
|
"eval_runtime": 6.7017, |
|
"eval_samples_per_second": 548.966, |
|
"eval_steps_per_second": 8.655, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 14.782608695652174, |
|
"grad_norm": 6.424872398376465, |
|
"learning_rate": 6.3888888888888885e-06, |
|
"loss": 0.0982, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 14.782608695652174, |
|
"eval_accuracy": 0.9290568089154662, |
|
"eval_f1_score": 0.9196461825352048, |
|
"eval_loss": 0.2833573520183563, |
|
"eval_precision": 0.9187836187318232, |
|
"eval_recall": 0.9213402050339831, |
|
"eval_runtime": 6.7096, |
|
"eval_samples_per_second": 548.32, |
|
"eval_steps_per_second": 8.644, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 15.652173913043478, |
|
"grad_norm": 4.618613243103027, |
|
"learning_rate": 6.111111111111112e-06, |
|
"loss": 0.089, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 15.652173913043478, |
|
"eval_accuracy": 0.9298722478934494, |
|
"eval_f1_score": 0.9202166850732406, |
|
"eval_loss": 0.28280356526374817, |
|
"eval_precision": 0.9151663252588741, |
|
"eval_recall": 0.9260674008256092, |
|
"eval_runtime": 6.7345, |
|
"eval_samples_per_second": 546.292, |
|
"eval_steps_per_second": 8.612, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 16.52173913043478, |
|
"grad_norm": 1.9568698406219482, |
|
"learning_rate": 5.833333333333334e-06, |
|
"loss": 0.0795, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 16.52173913043478, |
|
"eval_accuracy": 0.9331340038053819, |
|
"eval_f1_score": 0.9244095368032713, |
|
"eval_loss": 0.273701936006546, |
|
"eval_precision": 0.925343846727414, |
|
"eval_recall": 0.9238732382441093, |
|
"eval_runtime": 6.7425, |
|
"eval_samples_per_second": 545.641, |
|
"eval_steps_per_second": 8.602, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 17.391304347826086, |
|
"grad_norm": 2.161759614944458, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 0.0684, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 17.391304347826086, |
|
"eval_accuracy": 0.9323185648273987, |
|
"eval_f1_score": 0.9261695691084951, |
|
"eval_loss": 0.2873239815235138, |
|
"eval_precision": 0.9319834922740233, |
|
"eval_recall": 0.9216726996777184, |
|
"eval_runtime": 6.7577, |
|
"eval_samples_per_second": 544.415, |
|
"eval_steps_per_second": 8.583, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 18.26086956521739, |
|
"grad_norm": 4.607916355133057, |
|
"learning_rate": 5.2777777777777785e-06, |
|
"loss": 0.0673, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 18.26086956521739, |
|
"eval_accuracy": 0.9320467518347377, |
|
"eval_f1_score": 0.925184613434992, |
|
"eval_loss": 0.2904324531555176, |
|
"eval_precision": 0.9332741752610002, |
|
"eval_recall": 0.9184445089519725, |
|
"eval_runtime": 6.7294, |
|
"eval_samples_per_second": 546.702, |
|
"eval_steps_per_second": 8.619, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 19.130434782608695, |
|
"grad_norm": 6.327251434326172, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0571, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 19.130434782608695, |
|
"eval_accuracy": 0.9293286219081273, |
|
"eval_f1_score": 0.9221668516434853, |
|
"eval_loss": 0.3166205883026123, |
|
"eval_precision": 0.925137476734381, |
|
"eval_recall": 0.920952609526737, |
|
"eval_runtime": 6.7037, |
|
"eval_samples_per_second": 548.799, |
|
"eval_steps_per_second": 8.652, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 9.082805633544922, |
|
"learning_rate": 4.722222222222222e-06, |
|
"loss": 0.0561, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9317749388420766, |
|
"eval_f1_score": 0.9221280725480369, |
|
"eval_loss": 0.2922111749649048, |
|
"eval_precision": 0.9150274852978553, |
|
"eval_recall": 0.9297539237688469, |
|
"eval_runtime": 6.7503, |
|
"eval_samples_per_second": 545.017, |
|
"eval_steps_per_second": 8.592, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 20.869565217391305, |
|
"grad_norm": 5.283856391906738, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.0511, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 20.869565217391305, |
|
"eval_accuracy": 0.9315031258494156, |
|
"eval_f1_score": 0.9190655007648246, |
|
"eval_loss": 0.29927295446395874, |
|
"eval_precision": 0.9088064828335735, |
|
"eval_recall": 0.9303236730969998, |
|
"eval_runtime": 6.7281, |
|
"eval_samples_per_second": 546.809, |
|
"eval_steps_per_second": 8.621, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 21.73913043478261, |
|
"grad_norm": 6.0074896812438965, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.0442, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 21.73913043478261, |
|
"eval_accuracy": 0.9266104919815167, |
|
"eval_f1_score": 0.9161795338292905, |
|
"eval_loss": 0.32011494040489197, |
|
"eval_precision": 0.9060451440252857, |
|
"eval_recall": 0.9280493422296427, |
|
"eval_runtime": 6.7127, |
|
"eval_samples_per_second": 548.067, |
|
"eval_steps_per_second": 8.64, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 22.608695652173914, |
|
"grad_norm": 3.1078407764434814, |
|
"learning_rate": 3.88888888888889e-06, |
|
"loss": 0.0447, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 22.608695652173914, |
|
"eval_accuracy": 0.928241369937483, |
|
"eval_f1_score": 0.9137497551284842, |
|
"eval_loss": 0.3155056834220886, |
|
"eval_precision": 0.9009580466238951, |
|
"eval_recall": 0.9281730038314259, |
|
"eval_runtime": 6.7337, |
|
"eval_samples_per_second": 546.354, |
|
"eval_steps_per_second": 8.613, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 23.47826086956522, |
|
"grad_norm": 2.9584195613861084, |
|
"learning_rate": 3.6111111111111115e-06, |
|
"loss": 0.0415, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 23.47826086956522, |
|
"eval_accuracy": 0.9334058167980429, |
|
"eval_f1_score": 0.9226018260362496, |
|
"eval_loss": 0.30177852511405945, |
|
"eval_precision": 0.9185179495480513, |
|
"eval_recall": 0.9269833265460256, |
|
"eval_runtime": 6.7411, |
|
"eval_samples_per_second": 545.757, |
|
"eval_steps_per_second": 8.604, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 24.347826086956523, |
|
"grad_norm": 12.190321922302246, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.0359, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 24.347826086956523, |
|
"eval_accuracy": 0.9298722478934494, |
|
"eval_f1_score": 0.9177278989948806, |
|
"eval_loss": 0.31918126344680786, |
|
"eval_precision": 0.9062664068560837, |
|
"eval_recall": 0.9308234663752396, |
|
"eval_runtime": 6.7802, |
|
"eval_samples_per_second": 542.606, |
|
"eval_steps_per_second": 8.554, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 25.217391304347824, |
|
"grad_norm": 0.2598835527896881, |
|
"learning_rate": 3.055555555555556e-06, |
|
"loss": 0.0369, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 25.217391304347824, |
|
"eval_accuracy": 0.933677629790704, |
|
"eval_f1_score": 0.9210521238209074, |
|
"eval_loss": 0.3063570559024811, |
|
"eval_precision": 0.9140578271273506, |
|
"eval_recall": 0.9285610502121662, |
|
"eval_runtime": 6.7729, |
|
"eval_samples_per_second": 543.197, |
|
"eval_steps_per_second": 8.564, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 26.08695652173913, |
|
"grad_norm": 0.24433408677577972, |
|
"learning_rate": 2.7777777777777783e-06, |
|
"loss": 0.0296, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 26.08695652173913, |
|
"eval_accuracy": 0.9328621908127208, |
|
"eval_f1_score": 0.9237047805131925, |
|
"eval_loss": 0.311038613319397, |
|
"eval_precision": 0.9198460229141495, |
|
"eval_recall": 0.9279424126946928, |
|
"eval_runtime": 6.8161, |
|
"eval_samples_per_second": 539.754, |
|
"eval_steps_per_second": 8.509, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 26.08695652173913, |
|
"step": 3000, |
|
"total_flos": 6579999363349350.0, |
|
"train_loss": 0.31640464369455973, |
|
"train_runtime": 3238.6183, |
|
"train_samples_per_second": 158.092, |
|
"train_steps_per_second": 1.235 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 4000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 35, |
|
"save_steps": 100, |
|
"total_flos": 6579999363349350.0, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|