|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 17.856, |
|
"global_step": 558, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.928571428571428e-07, |
|
"loss": 2.9786, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.142857142857143e-06, |
|
"loss": 3.0172, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.4285714285714285e-05, |
|
"loss": 2.9016, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.1428571428571428e-05, |
|
"loss": 2.7266, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_Macro F1": 0.18274435256625565, |
|
"eval_Macro Precision": 0.22464899203701116, |
|
"eval_Macro Recall": 0.21014375581515982, |
|
"eval_Micro F1": 0.208, |
|
"eval_Micro Precision": 0.208, |
|
"eval_Micro Recall": 0.208, |
|
"eval_Weighted F1": 0.18112534226726865, |
|
"eval_Weighted Precision": 0.21432408746391318, |
|
"eval_Weighted Recall": 0.208, |
|
"eval_accuracy": 0.208, |
|
"eval_loss": 2.473806381225586, |
|
"eval_runtime": 340.1389, |
|
"eval_samples_per_second": 2.94, |
|
"eval_steps_per_second": 0.094, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 2.5835, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 2.4181, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.2857142857142856e-05, |
|
"loss": 2.2732, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.171, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_Macro F1": 0.39250727765144267, |
|
"eval_Macro Precision": 0.4446397468859132, |
|
"eval_Macro Recall": 0.42427064875651993, |
|
"eval_Micro F1": 0.42299999999999993, |
|
"eval_Micro Precision": 0.423, |
|
"eval_Micro Recall": 0.423, |
|
"eval_Weighted F1": 0.39356324128346765, |
|
"eval_Weighted Precision": 0.45034474867370766, |
|
"eval_Weighted Recall": 0.423, |
|
"eval_accuracy": 0.423, |
|
"eval_loss": 1.8510247468948364, |
|
"eval_runtime": 23.3293, |
|
"eval_samples_per_second": 42.864, |
|
"eval_steps_per_second": 1.372, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.9203187250996016e-05, |
|
"loss": 2.0057, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 4.840637450199204e-05, |
|
"loss": 1.8883, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 4.760956175298805e-05, |
|
"loss": 1.7538, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 4.6812749003984064e-05, |
|
"loss": 1.6525, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_Macro F1": 0.5854998981328875, |
|
"eval_Macro Precision": 0.6283041062692261, |
|
"eval_Macro Recall": 0.6124154907646651, |
|
"eval_Micro F1": 0.61, |
|
"eval_Micro Precision": 0.61, |
|
"eval_Micro Recall": 0.61, |
|
"eval_Weighted F1": 0.5883778569595041, |
|
"eval_Weighted Precision": 0.6376755054054996, |
|
"eval_Weighted Recall": 0.61, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 1.2633185386657715, |
|
"eval_runtime": 23.4866, |
|
"eval_samples_per_second": 42.577, |
|
"eval_steps_per_second": 1.362, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 4.601593625498008e-05, |
|
"loss": 1.4741, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 4.52191235059761e-05, |
|
"loss": 1.4995, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 4.442231075697211e-05, |
|
"loss": 1.3476, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 4.362549800796813e-05, |
|
"loss": 1.346, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_Macro F1": 0.6991540885965197, |
|
"eval_Macro Precision": 0.7033970309397422, |
|
"eval_Macro Recall": 0.705813786350264, |
|
"eval_Micro F1": 0.706, |
|
"eval_Micro Precision": 0.706, |
|
"eval_Micro Recall": 0.706, |
|
"eval_Weighted F1": 0.7023066954415087, |
|
"eval_Weighted Precision": 0.7095418750091215, |
|
"eval_Weighted Recall": 0.706, |
|
"eval_accuracy": 0.706, |
|
"eval_loss": 1.025865077972412, |
|
"eval_runtime": 25.2258, |
|
"eval_samples_per_second": 39.642, |
|
"eval_steps_per_second": 1.269, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 4.2828685258964146e-05, |
|
"loss": 1.361, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 4.203187250996016e-05, |
|
"loss": 1.2724, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 4.123505976095618e-05, |
|
"loss": 1.2634, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 4.043824701195219e-05, |
|
"loss": 1.253, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_Macro F1": 0.7239315752163233, |
|
"eval_Macro Precision": 0.7261038012066161, |
|
"eval_Macro Recall": 0.7290773391581917, |
|
"eval_Micro F1": 0.729, |
|
"eval_Micro Precision": 0.729, |
|
"eval_Micro Recall": 0.729, |
|
"eval_Weighted F1": 0.7276898829930567, |
|
"eval_Weighted Precision": 0.7340103739336716, |
|
"eval_Weighted Recall": 0.729, |
|
"eval_accuracy": 0.729, |
|
"eval_loss": 0.9180329442024231, |
|
"eval_runtime": 24.244, |
|
"eval_samples_per_second": 41.247, |
|
"eval_steps_per_second": 1.32, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 3.964143426294821e-05, |
|
"loss": 1.1127, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 3.884462151394422e-05, |
|
"loss": 1.2072, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 3.804780876494024e-05, |
|
"loss": 1.1051, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 3.7250996015936256e-05, |
|
"loss": 1.0975, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"eval_Macro F1": 0.7437283226344332, |
|
"eval_Macro Precision": 0.7525965488125351, |
|
"eval_Macro Recall": 0.7471731443855207, |
|
"eval_Micro F1": 0.747, |
|
"eval_Micro Precision": 0.747, |
|
"eval_Micro Recall": 0.747, |
|
"eval_Weighted F1": 0.7479974803294792, |
|
"eval_Weighted Precision": 0.7608663358741853, |
|
"eval_Weighted Recall": 0.747, |
|
"eval_accuracy": 0.747, |
|
"eval_loss": 0.8858795166015625, |
|
"eval_runtime": 24.5565, |
|
"eval_samples_per_second": 40.722, |
|
"eval_steps_per_second": 1.303, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 3.6454183266932277e-05, |
|
"loss": 1.0922, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 3.565737051792829e-05, |
|
"loss": 1.0797, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 3.4860557768924304e-05, |
|
"loss": 1.0066, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 3.406374501992032e-05, |
|
"loss": 1.1122, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"eval_Macro F1": 0.7577762006111012, |
|
"eval_Macro Precision": 0.7726630864911376, |
|
"eval_Macro Recall": 0.7594437988791525, |
|
"eval_Micro F1": 0.76, |
|
"eval_Micro Precision": 0.76, |
|
"eval_Micro Recall": 0.76, |
|
"eval_Weighted F1": 0.7606134776349917, |
|
"eval_Weighted Precision": 0.7772442524935343, |
|
"eval_Weighted Recall": 0.76, |
|
"eval_accuracy": 0.76, |
|
"eval_loss": 0.8269779086112976, |
|
"eval_runtime": 24.3399, |
|
"eval_samples_per_second": 41.085, |
|
"eval_steps_per_second": 1.315, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 3.326693227091633e-05, |
|
"loss": 0.988, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 3.247011952191235e-05, |
|
"loss": 1.0301, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 3.1673306772908366e-05, |
|
"loss": 0.9724, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 3.0876494023904386e-05, |
|
"loss": 1.0365, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_Macro F1": 0.7730173451661722, |
|
"eval_Macro Precision": 0.7919871344293006, |
|
"eval_Macro Recall": 0.7734992733449892, |
|
"eval_Micro F1": 0.775, |
|
"eval_Micro Precision": 0.775, |
|
"eval_Micro Recall": 0.775, |
|
"eval_Weighted F1": 0.7759067917731501, |
|
"eval_Weighted Precision": 0.7957122736768526, |
|
"eval_Weighted Recall": 0.775, |
|
"eval_accuracy": 0.775, |
|
"eval_loss": 0.7806075215339661, |
|
"eval_runtime": 23.5653, |
|
"eval_samples_per_second": 42.435, |
|
"eval_steps_per_second": 1.358, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 3.00796812749004e-05, |
|
"loss": 0.9482, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 2.9282868525896417e-05, |
|
"loss": 0.9364, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 2.8486055776892434e-05, |
|
"loss": 0.9417, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 2.7689243027888445e-05, |
|
"loss": 1.004, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_Macro F1": 0.7956663948657638, |
|
"eval_Macro Precision": 0.8150981558104353, |
|
"eval_Macro Recall": 0.7955573765342054, |
|
"eval_Micro F1": 0.796, |
|
"eval_Micro Precision": 0.796, |
|
"eval_Micro Recall": 0.796, |
|
"eval_Weighted F1": 0.7977477337842563, |
|
"eval_Weighted Precision": 0.8193174046095921, |
|
"eval_Weighted Recall": 0.796, |
|
"eval_accuracy": 0.796, |
|
"eval_loss": 0.7471871972084045, |
|
"eval_runtime": 24.2779, |
|
"eval_samples_per_second": 41.19, |
|
"eval_steps_per_second": 1.318, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 2.6892430278884462e-05, |
|
"loss": 0.8964, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 2.609561752988048e-05, |
|
"loss": 0.9098, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 2.5298804780876496e-05, |
|
"loss": 0.9576, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"learning_rate": 2.4501992031872513e-05, |
|
"loss": 0.9278, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"eval_Macro F1": 0.7956928280629123, |
|
"eval_Macro Precision": 0.8114507473411248, |
|
"eval_Macro Recall": 0.7953090431389529, |
|
"eval_Micro F1": 0.795, |
|
"eval_Micro Precision": 0.795, |
|
"eval_Micro Recall": 0.795, |
|
"eval_Weighted F1": 0.7974177696936208, |
|
"eval_Weighted Precision": 0.8157219938612603, |
|
"eval_Weighted Recall": 0.795, |
|
"eval_accuracy": 0.795, |
|
"eval_loss": 0.7296148538589478, |
|
"eval_runtime": 24.012, |
|
"eval_samples_per_second": 41.646, |
|
"eval_steps_per_second": 1.333, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 10.24, |
|
"learning_rate": 2.3705179282868527e-05, |
|
"loss": 0.8623, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"learning_rate": 2.290836653386454e-05, |
|
"loss": 0.9, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 10.75, |
|
"learning_rate": 2.2111553784860558e-05, |
|
"loss": 0.8767, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"eval_Macro F1": 0.8077597575155505, |
|
"eval_Macro Precision": 0.8135983545887402, |
|
"eval_Macro Recall": 0.8090640618814349, |
|
"eval_Micro F1": 0.809, |
|
"eval_Micro Precision": 0.809, |
|
"eval_Micro Recall": 0.809, |
|
"eval_Weighted F1": 0.8101031910520137, |
|
"eval_Weighted Precision": 0.8181958157506283, |
|
"eval_Weighted Recall": 0.809, |
|
"eval_accuracy": 0.809, |
|
"eval_loss": 0.7256603240966797, |
|
"eval_runtime": 23.8159, |
|
"eval_samples_per_second": 41.989, |
|
"eval_steps_per_second": 1.344, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 2.1314741035856575e-05, |
|
"loss": 0.9239, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 11.26, |
|
"learning_rate": 2.0517928286852592e-05, |
|
"loss": 0.8971, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"learning_rate": 1.9721115537848606e-05, |
|
"loss": 0.9387, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 11.78, |
|
"learning_rate": 1.8924302788844623e-05, |
|
"loss": 0.8656, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_Macro F1": 0.8106317572025369, |
|
"eval_Macro Precision": 0.8164411111931356, |
|
"eval_Macro Recall": 0.812159053433045, |
|
"eval_Micro F1": 0.8140000000000001, |
|
"eval_Micro Precision": 0.814, |
|
"eval_Micro Recall": 0.814, |
|
"eval_Weighted F1": 0.813675273019787, |
|
"eval_Weighted Precision": 0.8206734023674667, |
|
"eval_Weighted Recall": 0.814, |
|
"eval_accuracy": 0.814, |
|
"eval_loss": 0.687544584274292, |
|
"eval_runtime": 24.187, |
|
"eval_samples_per_second": 41.344, |
|
"eval_steps_per_second": 1.323, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 12.03, |
|
"learning_rate": 1.812749003984064e-05, |
|
"loss": 0.8568, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 12.29, |
|
"learning_rate": 1.7330677290836657e-05, |
|
"loss": 0.9105, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 12.54, |
|
"learning_rate": 1.653386454183267e-05, |
|
"loss": 0.7687, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 1.5737051792828685e-05, |
|
"loss": 0.7905, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_Macro F1": 0.807144557531445, |
|
"eval_Macro Precision": 0.8145226206650988, |
|
"eval_Macro Recall": 0.8067641953560594, |
|
"eval_Micro F1": 0.808, |
|
"eval_Micro Precision": 0.808, |
|
"eval_Micro Recall": 0.808, |
|
"eval_Weighted F1": 0.8093388838073848, |
|
"eval_Weighted Precision": 0.8181875581570741, |
|
"eval_Weighted Recall": 0.808, |
|
"eval_accuracy": 0.808, |
|
"eval_loss": 0.7060463428497314, |
|
"eval_runtime": 23.4383, |
|
"eval_samples_per_second": 42.665, |
|
"eval_steps_per_second": 1.365, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 13.06, |
|
"learning_rate": 1.4940239043824702e-05, |
|
"loss": 0.8453, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 13.31, |
|
"learning_rate": 1.4143426294820719e-05, |
|
"loss": 0.7926, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 13.57, |
|
"learning_rate": 1.3346613545816733e-05, |
|
"loss": 0.8721, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 13.82, |
|
"learning_rate": 1.254980079681275e-05, |
|
"loss": 0.8804, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 13.98, |
|
"eval_Macro F1": 0.8182926573190057, |
|
"eval_Macro Precision": 0.8215050680984892, |
|
"eval_Macro Recall": 0.8183483781309105, |
|
"eval_Micro F1": 0.82, |
|
"eval_Micro Precision": 0.82, |
|
"eval_Micro Recall": 0.82, |
|
"eval_Weighted F1": 0.8213528638428045, |
|
"eval_Weighted Precision": 0.8260224129157536, |
|
"eval_Weighted Recall": 0.82, |
|
"eval_accuracy": 0.82, |
|
"eval_loss": 0.6848881244659424, |
|
"eval_runtime": 24.2686, |
|
"eval_samples_per_second": 41.206, |
|
"eval_steps_per_second": 1.319, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 14.08, |
|
"learning_rate": 1.1752988047808767e-05, |
|
"loss": 0.8041, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 14.34, |
|
"learning_rate": 1.095617529880478e-05, |
|
"loss": 0.8291, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 14.59, |
|
"learning_rate": 1.0159362549800798e-05, |
|
"loss": 0.8109, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 14.85, |
|
"learning_rate": 9.362549800796813e-06, |
|
"loss": 0.8265, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 14.98, |
|
"eval_Macro F1": 0.814299562240979, |
|
"eval_Macro Precision": 0.8205669101031257, |
|
"eval_Macro Recall": 0.8141933591904861, |
|
"eval_Micro F1": 0.816, |
|
"eval_Micro Precision": 0.816, |
|
"eval_Micro Recall": 0.816, |
|
"eval_Weighted F1": 0.8171172793618807, |
|
"eval_Weighted Precision": 0.8242012281620977, |
|
"eval_Weighted Recall": 0.816, |
|
"eval_accuracy": 0.816, |
|
"eval_loss": 0.6820688247680664, |
|
"eval_runtime": 24.1434, |
|
"eval_samples_per_second": 41.419, |
|
"eval_steps_per_second": 1.325, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 15.1, |
|
"learning_rate": 8.565737051792829e-06, |
|
"loss": 0.7845, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 15.36, |
|
"learning_rate": 7.768924302788846e-06, |
|
"loss": 0.8545, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"learning_rate": 6.97211155378486e-06, |
|
"loss": 0.7685, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 15.87, |
|
"learning_rate": 6.175298804780877e-06, |
|
"loss": 0.7929, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_Macro F1": 0.8151613063095204, |
|
"eval_Macro Precision": 0.818634892770315, |
|
"eval_Macro Recall": 0.8167488060127654, |
|
"eval_Micro F1": 0.818, |
|
"eval_Micro Precision": 0.818, |
|
"eval_Micro Recall": 0.818, |
|
"eval_Weighted F1": 0.8184268497331145, |
|
"eval_Weighted Precision": 0.8239745058078853, |
|
"eval_Weighted Recall": 0.818, |
|
"eval_accuracy": 0.818, |
|
"eval_loss": 0.6877326369285583, |
|
"eval_runtime": 23.4065, |
|
"eval_samples_per_second": 42.723, |
|
"eval_steps_per_second": 1.367, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 16.13, |
|
"learning_rate": 5.378486055776893e-06, |
|
"loss": 0.8338, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 16.38, |
|
"learning_rate": 4.581673306772908e-06, |
|
"loss": 0.8154, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 16.64, |
|
"learning_rate": 3.7848605577689246e-06, |
|
"loss": 0.8054, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 16.9, |
|
"learning_rate": 2.9880478087649404e-06, |
|
"loss": 0.7993, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"eval_Macro F1": 0.8233893661888394, |
|
"eval_Macro Precision": 0.8281793690152945, |
|
"eval_Macro Recall": 0.822655931109436, |
|
"eval_Micro F1": 0.825, |
|
"eval_Micro Precision": 0.825, |
|
"eval_Micro Recall": 0.825, |
|
"eval_Weighted F1": 0.8258766455781327, |
|
"eval_Weighted Precision": 0.8305915821251276, |
|
"eval_Weighted Recall": 0.825, |
|
"eval_accuracy": 0.825, |
|
"eval_loss": 0.6717957854270935, |
|
"eval_runtime": 24.415, |
|
"eval_samples_per_second": 40.958, |
|
"eval_steps_per_second": 1.311, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 17.15, |
|
"learning_rate": 2.1912350597609563e-06, |
|
"loss": 0.7572, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 17.41, |
|
"learning_rate": 1.3944223107569721e-06, |
|
"loss": 0.8022, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 17.66, |
|
"learning_rate": 5.976095617529881e-07, |
|
"loss": 0.7954, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 17.86, |
|
"eval_Macro F1": 0.8242217019056312, |
|
"eval_Macro Precision": 0.8293094252297472, |
|
"eval_Macro Recall": 0.8236754312981294, |
|
"eval_Micro F1": 0.826, |
|
"eval_Micro Precision": 0.826, |
|
"eval_Micro Recall": 0.826, |
|
"eval_Weighted F1": 0.827162008102242, |
|
"eval_Weighted Precision": 0.8326500761444383, |
|
"eval_Weighted Recall": 0.826, |
|
"eval_accuracy": 0.826, |
|
"eval_loss": 0.6715443134307861, |
|
"eval_runtime": 24.1692, |
|
"eval_samples_per_second": 41.375, |
|
"eval_steps_per_second": 1.324, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 17.86, |
|
"step": 558, |
|
"total_flos": 5.536126404560683e+18, |
|
"train_loss": 1.1784635154149865, |
|
"train_runtime": 5483.2083, |
|
"train_samples_per_second": 13.131, |
|
"train_steps_per_second": 0.102 |
|
} |
|
], |
|
"max_steps": 558, |
|
"num_train_epochs": 18, |
|
"total_flos": 5.536126404560683e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|