|
{ |
|
"best_metric": 1.17391836643219, |
|
"best_model_checkpoint": "./outputs/checkpoint-2200", |
|
"epoch": 2.9333333333333336, |
|
"eval_steps": 100, |
|
"global_step": 2200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2656, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 1.350274920463562, |
|
"eval_runtime": 546.2738, |
|
"eval_samples_per_second": 3.533, |
|
"eval_steps_per_second": 0.443, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002, |
|
"loss": 1.103, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 1.3182251453399658, |
|
"eval_runtime": 459.6303, |
|
"eval_samples_per_second": 4.199, |
|
"eval_steps_per_second": 0.527, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0834, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.299042820930481, |
|
"eval_runtime": 457.0511, |
|
"eval_samples_per_second": 4.223, |
|
"eval_steps_per_second": 0.529, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0678, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 1.287185788154602, |
|
"eval_runtime": 463.8545, |
|
"eval_samples_per_second": 4.161, |
|
"eval_steps_per_second": 0.522, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0544, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.273476243019104, |
|
"eval_runtime": 461.7034, |
|
"eval_samples_per_second": 4.18, |
|
"eval_steps_per_second": 0.524, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0495, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.2602167129516602, |
|
"eval_runtime": 455.1997, |
|
"eval_samples_per_second": 4.24, |
|
"eval_steps_per_second": 0.532, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0406, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_loss": 1.2561663389205933, |
|
"eval_runtime": 449.9152, |
|
"eval_samples_per_second": 4.29, |
|
"eval_steps_per_second": 0.538, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0282, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_loss": 1.2437876462936401, |
|
"eval_runtime": 451.2737, |
|
"eval_samples_per_second": 4.277, |
|
"eval_steps_per_second": 0.536, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0168, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_loss": 1.2371761798858643, |
|
"eval_runtime": 452.8697, |
|
"eval_samples_per_second": 4.262, |
|
"eval_steps_per_second": 0.534, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0056, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_loss": 1.2295143604278564, |
|
"eval_runtime": 453.1971, |
|
"eval_samples_per_second": 4.259, |
|
"eval_steps_per_second": 0.534, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0137, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_loss": 1.224471092224121, |
|
"eval_runtime": 452.058, |
|
"eval_samples_per_second": 4.269, |
|
"eval_steps_per_second": 0.535, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0002, |
|
"loss": 1.001, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 1.2201058864593506, |
|
"eval_runtime": 451.8545, |
|
"eval_samples_per_second": 4.271, |
|
"eval_steps_per_second": 0.536, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.0002, |
|
"loss": 0.991, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_loss": 1.2112103700637817, |
|
"eval_runtime": 449.46, |
|
"eval_samples_per_second": 4.294, |
|
"eval_steps_per_second": 0.538, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9919, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_loss": 1.2065283060073853, |
|
"eval_runtime": 451.912, |
|
"eval_samples_per_second": 4.271, |
|
"eval_steps_per_second": 0.536, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9797, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.2011693716049194, |
|
"eval_runtime": 452.3637, |
|
"eval_samples_per_second": 4.266, |
|
"eval_steps_per_second": 0.535, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9734, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_loss": 1.1984320878982544, |
|
"eval_runtime": 455.5044, |
|
"eval_samples_per_second": 4.237, |
|
"eval_steps_per_second": 0.531, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9694, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_loss": 1.1960954666137695, |
|
"eval_runtime": 454.7892, |
|
"eval_samples_per_second": 4.244, |
|
"eval_steps_per_second": 0.532, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9693, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 1.1902049779891968, |
|
"eval_runtime": 453.32, |
|
"eval_samples_per_second": 4.257, |
|
"eval_steps_per_second": 0.534, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9606, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_loss": 1.183451771736145, |
|
"eval_runtime": 452.8791, |
|
"eval_samples_per_second": 4.262, |
|
"eval_steps_per_second": 0.534, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9541, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_loss": 1.1814736127853394, |
|
"eval_runtime": 456.1407, |
|
"eval_samples_per_second": 4.231, |
|
"eval_steps_per_second": 0.531, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9638, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_loss": 1.1796127557754517, |
|
"eval_runtime": 454.0476, |
|
"eval_samples_per_second": 4.251, |
|
"eval_steps_per_second": 0.533, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9646, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_loss": 1.17391836643219, |
|
"eval_runtime": 453.8066, |
|
"eval_samples_per_second": 4.253, |
|
"eval_steps_per_second": 0.533, |
|
"step": 2200 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 2250, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 7.321308131141222e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|