|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 12525, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.800399201596807e-05, |
|
"loss": 0.3028, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.600798403193613e-05, |
|
"loss": 0.2468, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.40119760479042e-05, |
|
"loss": 0.2372, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.201596806387226e-05, |
|
"loss": 0.2205, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.0019960079840326e-05, |
|
"loss": 0.2164, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.802395209580839e-05, |
|
"loss": 0.2057, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.6027944111776455e-05, |
|
"loss": 0.2063, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.4031936127744515e-05, |
|
"loss": 0.2132, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.2035928143712576e-05, |
|
"loss": 0.1854, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.003992015968064e-05, |
|
"loss": 0.1965, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.8043912175648708e-05, |
|
"loss": 0.2027, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.604790419161677e-05, |
|
"loss": 0.2023, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.4051896207584833e-05, |
|
"loss": 0.1888, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.2055888223552897e-05, |
|
"loss": 0.1855, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.0059880239520957e-05, |
|
"loss": 0.1887, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.806387225548902e-05, |
|
"loss": 0.1855, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.6067864271457086e-05, |
|
"loss": 0.1828, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.407185628742515e-05, |
|
"loss": 0.1744, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.2075848303393214e-05, |
|
"loss": 0.1745, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.0079840319361278e-05, |
|
"loss": 0.1922, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 8.083832335329342e-06, |
|
"loss": 0.1827, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.0878243512974054e-06, |
|
"loss": 0.1682, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.091816367265469e-06, |
|
"loss": 0.1826, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.095808383233533e-06, |
|
"loss": 0.1874, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 9.98003992015968e-08, |
|
"loss": 0.1663, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 12525, |
|
"total_flos": 581706992910336.0, |
|
"train_loss": 0.1998214961525923, |
|
"train_runtime": 7855.9485, |
|
"train_samples_per_second": 6.377, |
|
"train_steps_per_second": 1.594 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 12525, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 581706992910336.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|