|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 2020, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0028514851485148514, |
|
"loss": 8.3611, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.002702970297029703, |
|
"loss": 8.0607, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0025544554455445546, |
|
"loss": 7.7784, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0024059405940594063, |
|
"loss": 7.5413, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 8.850217819213867, |
|
"eval_runtime": 584.1219, |
|
"eval_samples_per_second": 15.183, |
|
"eval_steps_per_second": 7.593, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0022574257425742577, |
|
"loss": 7.3312, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.0021089108910891086, |
|
"loss": 7.1317, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.0019603960396039604, |
|
"loss": 6.9568, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.0018118811881188118, |
|
"loss": 6.7965, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 8.118351936340332, |
|
"eval_runtime": 586.5561, |
|
"eval_samples_per_second": 15.12, |
|
"eval_steps_per_second": 7.561, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.0016633663366336635, |
|
"loss": 6.6621, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.001514851485148515, |
|
"loss": 6.5571, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 0.0013663366336633665, |
|
"loss": 6.4736, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 0.001217821782178218, |
|
"loss": 6.3963, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 7.695041179656982, |
|
"eval_runtime": 586.9195, |
|
"eval_samples_per_second": 15.111, |
|
"eval_steps_per_second": 7.556, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 0.0010693069306930692, |
|
"loss": 6.3235, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 0.0009207920792079207, |
|
"loss": 6.283, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 0.0007722772277227723, |
|
"loss": 6.2224, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 0.0006237623762376238, |
|
"loss": 6.1664, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 7.485514163970947, |
|
"eval_runtime": 587.006, |
|
"eval_samples_per_second": 15.109, |
|
"eval_steps_per_second": 7.555, |
|
"step": 1616 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 0.0004752475247524753, |
|
"loss": 6.1536, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 0.00032673267326732675, |
|
"loss": 6.1402, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 0.0001782178217821782, |
|
"loss": 6.0973, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 2.9702970297029706e-05, |
|
"loss": 6.1028, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 7.420807361602783, |
|
"eval_runtime": 587.2049, |
|
"eval_samples_per_second": 15.104, |
|
"eval_steps_per_second": 7.553, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 2020, |
|
"total_flos": 4.4078196477394944e+17, |
|
"train_loss": 6.77048750395822, |
|
"train_runtime": 16676.4775, |
|
"train_samples_per_second": 15.502, |
|
"train_steps_per_second": 0.121 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 2020, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 4.4078196477394944e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|