|
{ |
|
"best_metric": 0.7412639349881154, |
|
"best_model_checkpoint": "trained/hebban-reviews/robbert-v2-dutch-base/checkpoint-3500", |
|
"epoch": 4.382997370727432, |
|
"global_step": 5001, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.501099780043991e-05, |
|
"loss": 0.6723, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_accuracy": 0.8079388560157791, |
|
"eval_f1": 0.8117501994363496, |
|
"eval_loss": 0.5760409235954285, |
|
"eval_precision": 0.8174343819162313, |
|
"eval_qwk": 0.7302784941209646, |
|
"eval_recall": 0.8079388560157791, |
|
"eval_runtime": 23.4439, |
|
"eval_samples_per_second": 692.036, |
|
"eval_steps_per_second": 5.417, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.0011997600479906e-05, |
|
"loss": 0.5754, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_accuracy": 0.7649778106508875, |
|
"eval_f1": 0.7798244436633573, |
|
"eval_loss": 0.5568957328796387, |
|
"eval_precision": 0.8129894746393622, |
|
"eval_qwk": 0.6942695698574534, |
|
"eval_recall": 0.7649778106508875, |
|
"eval_runtime": 23.1665, |
|
"eval_samples_per_second": 700.32, |
|
"eval_steps_per_second": 5.482, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.50129974005199e-05, |
|
"loss": 0.5052, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_accuracy": 0.7829758382642998, |
|
"eval_f1": 0.7967934169486903, |
|
"eval_loss": 0.5705748200416565, |
|
"eval_precision": 0.8279826615797163, |
|
"eval_qwk": 0.7174911347556729, |
|
"eval_recall": 0.7829758382642998, |
|
"eval_runtime": 23.1285, |
|
"eval_samples_per_second": 701.473, |
|
"eval_steps_per_second": 5.491, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.001399720055989e-05, |
|
"loss": 0.4723, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_accuracy": 0.7924063116370809, |
|
"eval_f1": 0.8033559165914831, |
|
"eval_loss": 0.5601416230201721, |
|
"eval_precision": 0.8252798007612112, |
|
"eval_qwk": 0.7270656679689509, |
|
"eval_recall": 0.7924063116370809, |
|
"eval_runtime": 23.2157, |
|
"eval_samples_per_second": 698.837, |
|
"eval_steps_per_second": 5.47, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 2.5024995000999802e-05, |
|
"loss": 0.4285, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_accuracy": 0.8101577909270217, |
|
"eval_f1": 0.8186486636369545, |
|
"eval_loss": 0.691852867603302, |
|
"eval_precision": 0.8361687645268726, |
|
"eval_qwk": 0.7349979059232339, |
|
"eval_recall": 0.8101577909270217, |
|
"eval_runtime": 23.1035, |
|
"eval_samples_per_second": 702.232, |
|
"eval_steps_per_second": 5.497, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.0025994801039795e-05, |
|
"loss": 0.3553, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_accuracy": 0.7843318540433925, |
|
"eval_f1": 0.7982445447583074, |
|
"eval_loss": 0.674355685710907, |
|
"eval_precision": 0.8311821725851122, |
|
"eval_qwk": 0.7133869717906458, |
|
"eval_recall": 0.7843318540433925, |
|
"eval_runtime": 23.1222, |
|
"eval_samples_per_second": 701.663, |
|
"eval_steps_per_second": 5.493, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.5026994601079786e-05, |
|
"loss": 0.3433, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"eval_accuracy": 0.813732741617357, |
|
"eval_f1": 0.821008951036937, |
|
"eval_loss": 0.8146640658378601, |
|
"eval_precision": 0.8342737114916078, |
|
"eval_qwk": 0.7412639349881154, |
|
"eval_recall": 0.813732741617357, |
|
"eval_runtime": 23.1151, |
|
"eval_samples_per_second": 701.878, |
|
"eval_steps_per_second": 5.494, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 1.0037992401519696e-05, |
|
"loss": 0.2751, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"eval_accuracy": 0.8128698224852071, |
|
"eval_f1": 0.8199992286734568, |
|
"eval_loss": 0.8419223427772522, |
|
"eval_precision": 0.8332550382998175, |
|
"eval_qwk": 0.7383621154665407, |
|
"eval_recall": 0.8128698224852071, |
|
"eval_runtime": 23.1425, |
|
"eval_samples_per_second": 701.047, |
|
"eval_steps_per_second": 5.488, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 5.038992201559688e-06, |
|
"loss": 0.2659, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"eval_accuracy": 0.8022682445759369, |
|
"eval_f1": 0.8125481330626305, |
|
"eval_loss": 0.8309345841407776, |
|
"eval_precision": 0.8344366995643693, |
|
"eval_qwk": 0.7292405615679007, |
|
"eval_recall": 0.8022682445759369, |
|
"eval_runtime": 23.1428, |
|
"eval_samples_per_second": 701.039, |
|
"eval_steps_per_second": 5.488, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 3.9992001599680065e-08, |
|
"loss": 0.2346, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"eval_accuracy": 0.8102810650887574, |
|
"eval_f1": 0.8179537922797215, |
|
"eval_loss": 0.8737895488739014, |
|
"eval_precision": 0.8320255779887702, |
|
"eval_qwk": 0.7367921505621005, |
|
"eval_recall": 0.8102810650887574, |
|
"eval_runtime": 23.1382, |
|
"eval_samples_per_second": 701.178, |
|
"eval_steps_per_second": 5.489, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"step": 5001, |
|
"total_flos": 1.6838837781764506e+17, |
|
"train_loss": 0.4127759954567505, |
|
"train_runtime": 2598.1165, |
|
"train_samples_per_second": 246.382, |
|
"train_steps_per_second": 1.925 |
|
} |
|
], |
|
"max_steps": 5001, |
|
"num_train_epochs": 5, |
|
"total_flos": 1.6838837781764506e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|