|
{ |
|
"best_metric": 0.6441834028040925, |
|
"best_model_checkpoint": "hubert-base-ls960/checkpoint-6000", |
|
"epoch": 32.0, |
|
"global_step": 6336, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 2.3659305993690853e-05, |
|
"loss": 4.523, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_accuracy": 0.02046229632436529, |
|
"eval_f1": 0.003726162630911285, |
|
"eval_loss": 5.154743194580078, |
|
"eval_precision": 0.0047326994458260765, |
|
"eval_runtime": 111.372, |
|
"eval_samples_per_second": 23.695, |
|
"eval_steps_per_second": 0.79, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 2.8074359873728517e-05, |
|
"loss": 3.4187, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"eval_accuracy": 0.03372489579386131, |
|
"eval_f1": 0.01633580104617213, |
|
"eval_loss": 4.628676891326904, |
|
"eval_precision": 0.025641939245838297, |
|
"eval_runtime": 106.2725, |
|
"eval_samples_per_second": 24.832, |
|
"eval_steps_per_second": 0.828, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 2.5443703963521573e-05, |
|
"loss": 2.3533, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"eval_accuracy": 0.09435392194012884, |
|
"eval_f1": 0.06405678590079675, |
|
"eval_loss": 4.254951477050781, |
|
"eval_precision": 0.10330351479105922, |
|
"eval_runtime": 108.1414, |
|
"eval_samples_per_second": 24.403, |
|
"eval_steps_per_second": 0.814, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"learning_rate": 2.2813048053314627e-05, |
|
"loss": 1.7145, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"eval_accuracy": 0.10951117847669571, |
|
"eval_f1": 0.09636838131822689, |
|
"eval_loss": 3.953977108001709, |
|
"eval_precision": 0.20908275360175174, |
|
"eval_runtime": 101.3178, |
|
"eval_samples_per_second": 26.047, |
|
"eval_steps_per_second": 0.869, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"learning_rate": 2.0182392143107683e-05, |
|
"loss": 1.3245, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"eval_accuracy": 0.17582417582417584, |
|
"eval_f1": 0.1859427403647579, |
|
"eval_loss": 3.855651617050171, |
|
"eval_precision": 0.3608930277105155, |
|
"eval_runtime": 107.9879, |
|
"eval_samples_per_second": 24.438, |
|
"eval_steps_per_second": 0.815, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 15.15, |
|
"learning_rate": 1.7551736232900737e-05, |
|
"loss": 1.0729, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 15.15, |
|
"eval_accuracy": 0.224706328154604, |
|
"eval_f1": 0.2537469884726095, |
|
"eval_loss": 3.74106764793396, |
|
"eval_precision": 0.4918090735633196, |
|
"eval_runtime": 107.1954, |
|
"eval_samples_per_second": 24.619, |
|
"eval_steps_per_second": 0.821, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 17.68, |
|
"learning_rate": 1.4921080322693792e-05, |
|
"loss": 0.8955, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 17.68, |
|
"eval_accuracy": 0.378931413414172, |
|
"eval_f1": 0.42563545406792835, |
|
"eval_loss": 3.2683181762695312, |
|
"eval_precision": 0.6161701808557754, |
|
"eval_runtime": 108.9972, |
|
"eval_samples_per_second": 24.212, |
|
"eval_steps_per_second": 0.807, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 20.2, |
|
"learning_rate": 1.2290424412486847e-05, |
|
"loss": 0.7697, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 20.2, |
|
"eval_accuracy": 0.46115953012504735, |
|
"eval_f1": 0.5171156262633545, |
|
"eval_loss": 2.874896764755249, |
|
"eval_precision": 0.7105984753027313, |
|
"eval_runtime": 104.5833, |
|
"eval_samples_per_second": 25.233, |
|
"eval_steps_per_second": 0.841, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 22.73, |
|
"learning_rate": 9.659768502279902e-06, |
|
"loss": 0.6864, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 22.73, |
|
"eval_accuracy": 0.5168624478969307, |
|
"eval_f1": 0.5779425289838436, |
|
"eval_loss": 2.7250964641571045, |
|
"eval_precision": 0.7436539303592055, |
|
"eval_runtime": 102.232, |
|
"eval_samples_per_second": 25.814, |
|
"eval_steps_per_second": 0.861, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 25.25, |
|
"learning_rate": 7.029112592072957e-06, |
|
"loss": 0.6061, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 25.25, |
|
"eval_accuracy": 0.5630920803334597, |
|
"eval_f1": 0.6334793930128474, |
|
"eval_loss": 2.506129264831543, |
|
"eval_precision": 0.8042779660767879, |
|
"eval_runtime": 108.8606, |
|
"eval_samples_per_second": 24.242, |
|
"eval_steps_per_second": 0.808, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 27.78, |
|
"learning_rate": 4.398456681866012e-06, |
|
"loss": 0.5777, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 27.78, |
|
"eval_accuracy": 0.6176582038651004, |
|
"eval_f1": 0.6836726868522811, |
|
"eval_loss": 2.28301739692688, |
|
"eval_precision": 0.8183499709851196, |
|
"eval_runtime": 109.1511, |
|
"eval_samples_per_second": 24.177, |
|
"eval_steps_per_second": 0.806, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 30.3, |
|
"learning_rate": 1.767800771659067e-06, |
|
"loss": 0.5304, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 30.3, |
|
"eval_accuracy": 0.6441834028040925, |
|
"eval_f1": 0.7121017502331707, |
|
"eval_loss": 2.1857309341430664, |
|
"eval_precision": 0.8368800792283315, |
|
"eval_runtime": 110.4001, |
|
"eval_samples_per_second": 23.904, |
|
"eval_steps_per_second": 0.797, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"step": 6336, |
|
"total_flos": 3.804433776973221e+19, |
|
"train_loss": 1.4852149317962955, |
|
"train_runtime": 31381.271, |
|
"train_samples_per_second": 24.216, |
|
"train_steps_per_second": 0.202 |
|
} |
|
], |
|
"max_steps": 6336, |
|
"num_train_epochs": 32, |
|
"total_flos": 3.804433776973221e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|