|
{ |
|
"best_metric": 0.6320356726646423, |
|
"best_model_checkpoint": "./results/checkpoint-200", |
|
"epoch": 8.181818181818182, |
|
"eval_steps": 50, |
|
"global_step": 450, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9348, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_loss": 1.2767767906188965, |
|
"eval_runtime": 6.5658, |
|
"eval_samples_per_second": 3.808, |
|
"eval_steps_per_second": 0.609, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8572, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 0.7143042087554932, |
|
"eval_runtime": 6.9652, |
|
"eval_samples_per_second": 3.589, |
|
"eval_steps_per_second": 0.574, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5587, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_loss": 0.6453649401664734, |
|
"eval_runtime": 6.6927, |
|
"eval_samples_per_second": 3.735, |
|
"eval_steps_per_second": 0.598, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4965, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"eval_loss": 0.6320356726646423, |
|
"eval_runtime": 7.193, |
|
"eval_samples_per_second": 3.476, |
|
"eval_steps_per_second": 0.556, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4535, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"eval_loss": 0.6632673144340515, |
|
"eval_runtime": 6.7989, |
|
"eval_samples_per_second": 3.677, |
|
"eval_steps_per_second": 0.588, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 0.0002, |
|
"loss": 0.405, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"eval_loss": 0.6746855974197388, |
|
"eval_runtime": 6.7569, |
|
"eval_samples_per_second": 3.7, |
|
"eval_steps_per_second": 0.592, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4152, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"eval_loss": 0.7429562211036682, |
|
"eval_runtime": 6.9904, |
|
"eval_samples_per_second": 3.576, |
|
"eval_steps_per_second": 0.572, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3915, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"eval_loss": 0.6850793957710266, |
|
"eval_runtime": 7.0689, |
|
"eval_samples_per_second": 3.537, |
|
"eval_steps_per_second": 0.566, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3716, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"eval_loss": 0.6914016604423523, |
|
"eval_runtime": 6.7862, |
|
"eval_samples_per_second": 3.684, |
|
"eval_steps_per_second": 0.589, |
|
"step": 450 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 50, |
|
"total_flos": 83840921026560.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|