|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 8.0, |
|
"eval_steps": 500, |
|
"global_step": 2208, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"step": 276, |
|
"train_eval_accuracy": 0.7003401360544218, |
|
"train_eval_f1": 0.6599009769808177, |
|
"train_eval_loss": 1.0742188692092896, |
|
"train_eval_precision": 0.7685367987097715, |
|
"train_eval_recall": 0.7009459009381873, |
|
"train_loss": 1.074218988418579, |
|
"train_runtime": 292.4292, |
|
"train_samples_per_second": 30.161, |
|
"train_steps_per_second": 0.944 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6865079365079365, |
|
"eval_f1": 0.6448401353068663, |
|
"eval_loss": 1.1159266233444214, |
|
"eval_precision": 0.7389370546469851, |
|
"eval_recall": 0.685088186510769, |
|
"eval_runtime": 125.2302, |
|
"eval_samples_per_second": 30.184, |
|
"eval_steps_per_second": 0.95, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 552, |
|
"train_eval_accuracy": 0.9049886621315193, |
|
"train_eval_f1": 0.9047389370162652, |
|
"train_eval_loss": 0.3874468207359314, |
|
"train_eval_precision": 0.9101320070777226, |
|
"train_eval_recall": 0.9053131019265812, |
|
"train_loss": 0.3874468505382538, |
|
"train_runtime": 292.6477, |
|
"train_samples_per_second": 30.139, |
|
"train_steps_per_second": 0.943 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.873015873015873, |
|
"eval_f1": 0.8716046163578155, |
|
"eval_loss": 0.4920203685760498, |
|
"eval_precision": 0.8781278884267814, |
|
"eval_recall": 0.8725450168508944, |
|
"eval_runtime": 125.7044, |
|
"eval_samples_per_second": 30.071, |
|
"eval_steps_per_second": 0.947, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 828, |
|
"train_eval_accuracy": 0.9286848072562358, |
|
"train_eval_f1": 0.9285171455529603, |
|
"train_eval_loss": 0.27065399289131165, |
|
"train_eval_precision": 0.9322649415650033, |
|
"train_eval_recall": 0.9289123109383434, |
|
"train_loss": 0.27065402269363403, |
|
"train_runtime": 292.5422, |
|
"train_samples_per_second": 30.149, |
|
"train_steps_per_second": 0.943 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8973544973544973, |
|
"eval_f1": 0.8961220197070991, |
|
"eval_loss": 0.42477917671203613, |
|
"eval_precision": 0.9011567773926408, |
|
"eval_recall": 0.8971253090647534, |
|
"eval_runtime": 125.1591, |
|
"eval_samples_per_second": 30.202, |
|
"eval_steps_per_second": 0.951, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 1104, |
|
"train_eval_accuracy": 0.9471655328798186, |
|
"train_eval_f1": 0.947517595979444, |
|
"train_eval_loss": 0.2088230848312378, |
|
"train_eval_precision": 0.9488916851742717, |
|
"train_eval_recall": 0.9477037054061244, |
|
"train_loss": 0.2088230848312378, |
|
"train_runtime": 292.1312, |
|
"train_samples_per_second": 30.192, |
|
"train_steps_per_second": 0.945 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9010582010582011, |
|
"eval_f1": 0.8999224906052751, |
|
"eval_loss": 0.41053256392478943, |
|
"eval_precision": 0.9025635496532717, |
|
"eval_recall": 0.9002552887231217, |
|
"eval_runtime": 125.1369, |
|
"eval_samples_per_second": 30.207, |
|
"eval_steps_per_second": 0.951, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 1380, |
|
"train_eval_accuracy": 0.9515873015873015, |
|
"train_eval_f1": 0.9516673939530801, |
|
"train_eval_loss": 0.17656771838665009, |
|
"train_eval_precision": 0.9539169437133798, |
|
"train_eval_recall": 0.9520537526614696, |
|
"train_loss": 0.1765676885843277, |
|
"train_runtime": 292.8194, |
|
"train_samples_per_second": 30.121, |
|
"train_steps_per_second": 0.943 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9063492063492063, |
|
"eval_f1": 0.9046138481463505, |
|
"eval_loss": 0.41682690382003784, |
|
"eval_precision": 0.9088264652953383, |
|
"eval_recall": 0.9051041885149284, |
|
"eval_runtime": 125.6464, |
|
"eval_samples_per_second": 30.084, |
|
"eval_steps_per_second": 0.947, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"step": 1656, |
|
"train_eval_accuracy": 0.9654195011337868, |
|
"train_eval_f1": 0.9656476663607069, |
|
"train_eval_loss": 0.12671761214733124, |
|
"train_eval_precision": 0.9666620904833755, |
|
"train_eval_recall": 0.9657485060432522, |
|
"train_loss": 0.12671762704849243, |
|
"train_runtime": 292.8539, |
|
"train_samples_per_second": 30.117, |
|
"train_steps_per_second": 0.942 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9084656084656084, |
|
"eval_f1": 0.9069212907450369, |
|
"eval_loss": 0.41428086161613464, |
|
"eval_precision": 0.909310676401177, |
|
"eval_recall": 0.9074800568136571, |
|
"eval_runtime": 125.6663, |
|
"eval_samples_per_second": 30.08, |
|
"eval_steps_per_second": 0.947, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 1932, |
|
"train_eval_accuracy": 0.9712018140589569, |
|
"train_eval_f1": 0.9713117879463059, |
|
"train_eval_loss": 0.10472333431243896, |
|
"train_eval_precision": 0.9726857205787973, |
|
"train_eval_recall": 0.9716513762432712, |
|
"train_loss": 0.10472334921360016, |
|
"train_runtime": 293.0912, |
|
"train_samples_per_second": 30.093, |
|
"train_steps_per_second": 0.942 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9058201058201059, |
|
"eval_f1": 0.9040971215004513, |
|
"eval_loss": 0.4402031898498535, |
|
"eval_precision": 0.9063334381728124, |
|
"eval_recall": 0.9047346717153943, |
|
"eval_runtime": 125.6961, |
|
"eval_samples_per_second": 30.073, |
|
"eval_steps_per_second": 0.947, |
|
"step": 1932 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 2208, |
|
"train_eval_accuracy": 0.977437641723356, |
|
"train_eval_f1": 0.9777628423711627, |
|
"train_eval_loss": 0.08053447306156158, |
|
"train_eval_precision": 0.9778969889316966, |
|
"train_eval_recall": 0.9778987153746018, |
|
"train_loss": 0.08053448051214218, |
|
"train_runtime": 293.5192, |
|
"train_samples_per_second": 30.049, |
|
"train_steps_per_second": 0.94 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9031746031746032, |
|
"eval_f1": 0.9017850721426728, |
|
"eval_loss": 0.45825621485710144, |
|
"eval_precision": 0.9036277073612298, |
|
"eval_recall": 0.902066631184587, |
|
"eval_runtime": 125.7628, |
|
"eval_samples_per_second": 30.057, |
|
"eval_steps_per_second": 0.946, |
|
"step": 2208 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 2208, |
|
"total_flos": 1.85717836136448e+16, |
|
"train_loss": 0.5860137939453125, |
|
"train_runtime": 10564.8969, |
|
"train_samples_per_second": 12.523, |
|
"train_steps_per_second": 0.392 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9031746031746032, |
|
"eval_f1": 0.9017850721426728, |
|
"eval_loss": 0.45825621485710144, |
|
"eval_precision": 0.9036277073612298, |
|
"eval_recall": 0.902066631184587, |
|
"eval_runtime": 125.8523, |
|
"eval_samples_per_second": 30.035, |
|
"eval_steps_per_second": 0.946, |
|
"step": 2208 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 2208, |
|
"train_en_eval_accuracy": 0.977437641723356, |
|
"train_en_eval_f1": 0.9777628423711627, |
|
"train_en_eval_loss": 0.08053447306156158, |
|
"train_en_eval_precision": 0.9778969889316966, |
|
"train_en_eval_recall": 0.9778987153746018, |
|
"train_en_loss": 0.08053448051214218, |
|
"train_en_runtime": 292.6514, |
|
"train_en_samples_per_second": 30.138, |
|
"train_en_steps_per_second": 0.943 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 2208, |
|
"test_en_eval_accuracy": 0.9031746031746032, |
|
"test_en_eval_f1": 0.9017850721426728, |
|
"test_en_eval_loss": 0.45825621485710144, |
|
"test_en_eval_precision": 0.9036277073612298, |
|
"test_en_eval_recall": 0.902066631184587, |
|
"test_en_loss": 0.45825621485710144, |
|
"test_en_runtime": 125.7293, |
|
"test_en_samples_per_second": 30.065, |
|
"test_en_steps_per_second": 0.946 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 4140, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.85717836136448e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|