|
{ |
|
"best_metric": 1.3857518434524536, |
|
"best_model_checkpoint": "./outputs/checkpoint-1200", |
|
"epoch": 0.8743169398907104, |
|
"eval_steps": 100, |
|
"global_step": 1200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002, |
|
"loss": 1.765, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 1.631608486175537, |
|
"eval_runtime": 430.1354, |
|
"eval_samples_per_second": 14.586, |
|
"eval_steps_per_second": 1.825, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6077, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 1.5870035886764526, |
|
"eval_runtime": 417.8578, |
|
"eval_samples_per_second": 15.015, |
|
"eval_steps_per_second": 1.879, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5755, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.5563029050827026, |
|
"eval_runtime": 417.8787, |
|
"eval_samples_per_second": 15.014, |
|
"eval_steps_per_second": 1.879, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5445, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.5300686359405518, |
|
"eval_runtime": 417.9846, |
|
"eval_samples_per_second": 15.01, |
|
"eval_steps_per_second": 1.878, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5114, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.5074832439422607, |
|
"eval_runtime": 417.962, |
|
"eval_samples_per_second": 15.011, |
|
"eval_steps_per_second": 1.878, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4946, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.4870833158493042, |
|
"eval_runtime": 417.6568, |
|
"eval_samples_per_second": 15.022, |
|
"eval_steps_per_second": 1.88, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002, |
|
"loss": 1.472, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.4683642387390137, |
|
"eval_runtime": 417.7114, |
|
"eval_samples_per_second": 15.02, |
|
"eval_steps_per_second": 1.879, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4622, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.4501450061798096, |
|
"eval_runtime": 417.7046, |
|
"eval_samples_per_second": 15.02, |
|
"eval_steps_per_second": 1.879, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002, |
|
"loss": 1.433, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.4323909282684326, |
|
"eval_runtime": 417.8696, |
|
"eval_samples_per_second": 15.014, |
|
"eval_steps_per_second": 1.879, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4268, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.4162870645523071, |
|
"eval_runtime": 417.7667, |
|
"eval_samples_per_second": 15.018, |
|
"eval_steps_per_second": 1.879, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4125, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.4018020629882812, |
|
"eval_runtime": 417.5926, |
|
"eval_samples_per_second": 15.024, |
|
"eval_steps_per_second": 1.88, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3846, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 1.3857518434524536, |
|
"eval_runtime": 417.6267, |
|
"eval_samples_per_second": 15.023, |
|
"eval_steps_per_second": 1.88, |
|
"step": 1200 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 4116, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 3.890658552378409e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|