|
{ |
|
"best_metric": 0.6626126766204834, |
|
"best_model_checkpoint": "./results/roberta-base/checkpoint-8097", |
|
"epoch": 6.0, |
|
"eval_steps": 500, |
|
"global_step": 16194, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.618255853652954, |
|
"learning_rate": 4.672437244961049e-05, |
|
"loss": 0.3464, |
|
"step": 2699 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7697120458314488, |
|
"eval_conf_mat": [ |
|
[ |
|
6318, |
|
522 |
|
], |
|
[ |
|
2533, |
|
3893 |
|
] |
|
], |
|
"eval_f1": 0.7181994280970391, |
|
"eval_loss": 0.7109997272491455, |
|
"eval_precision": 0.8817667044167611, |
|
"eval_recall": 0.6058201058201058, |
|
"eval_runtime": 31.5248, |
|
"eval_samples_per_second": 420.812, |
|
"eval_steps_per_second": 13.164, |
|
"step": 2699 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 30.145366668701172, |
|
"learning_rate": 4.338691727463831e-05, |
|
"loss": 0.1889, |
|
"step": 5398 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8014473089099955, |
|
"eval_conf_mat": [ |
|
[ |
|
6207, |
|
633 |
|
], |
|
[ |
|
2001, |
|
4425 |
|
] |
|
], |
|
"eval_f1": 0.7706374085684431, |
|
"eval_loss": 0.8169035911560059, |
|
"eval_precision": 0.8748517200474496, |
|
"eval_recall": 0.688608776844071, |
|
"eval_runtime": 31.2094, |
|
"eval_samples_per_second": 425.065, |
|
"eval_steps_per_second": 13.297, |
|
"step": 5398 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 11.833324432373047, |
|
"learning_rate": 4.004946209966614e-05, |
|
"loss": 0.1456, |
|
"step": 8097 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.818407960199005, |
|
"eval_conf_mat": [ |
|
[ |
|
6207, |
|
633 |
|
], |
|
[ |
|
1776, |
|
4650 |
|
] |
|
], |
|
"eval_f1": 0.7942608250064054, |
|
"eval_loss": 0.6626126766204834, |
|
"eval_precision": 0.8801817149346962, |
|
"eval_recall": 0.7236227824463118, |
|
"eval_runtime": 31.2622, |
|
"eval_samples_per_second": 424.346, |
|
"eval_steps_per_second": 13.275, |
|
"step": 8097 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 16.268821716308594, |
|
"learning_rate": 3.6712006924693956e-05, |
|
"loss": 0.1181, |
|
"step": 10796 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7795115332428765, |
|
"eval_conf_mat": [ |
|
[ |
|
6320, |
|
520 |
|
], |
|
[ |
|
2405, |
|
4021 |
|
] |
|
], |
|
"eval_f1": 0.7332907814352148, |
|
"eval_loss": 1.1288779973983765, |
|
"eval_precision": 0.885487778022462, |
|
"eval_recall": 0.6257391845627139, |
|
"eval_runtime": 31.3467, |
|
"eval_samples_per_second": 423.202, |
|
"eval_steps_per_second": 13.239, |
|
"step": 10796 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 152.2488555908203, |
|
"learning_rate": 3.337455174972178e-05, |
|
"loss": 0.0985, |
|
"step": 13495 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7281772953414745, |
|
"eval_conf_mat": [ |
|
[ |
|
6390, |
|
450 |
|
], |
|
[ |
|
3156, |
|
3270 |
|
] |
|
], |
|
"eval_f1": 0.644589000591366, |
|
"eval_loss": 1.4766658544540405, |
|
"eval_precision": 0.8790322580645161, |
|
"eval_recall": 0.5088702147525677, |
|
"eval_runtime": 31.2848, |
|
"eval_samples_per_second": 424.039, |
|
"eval_steps_per_second": 13.265, |
|
"step": 13495 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.3119850754737854, |
|
"learning_rate": 3.0037096574749602e-05, |
|
"loss": 0.0771, |
|
"step": 16194 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7473239861299563, |
|
"eval_conf_mat": [ |
|
[ |
|
6387, |
|
453 |
|
], |
|
[ |
|
2899, |
|
3527 |
|
] |
|
], |
|
"eval_f1": 0.6778781472227561, |
|
"eval_loss": 1.5410542488098145, |
|
"eval_precision": 0.8861809045226131, |
|
"eval_recall": 0.5488639900404606, |
|
"eval_runtime": 31.3637, |
|
"eval_samples_per_second": 422.973, |
|
"eval_steps_per_second": 13.232, |
|
"step": 16194 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 40485, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.229063080284522e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|