|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 732, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9963183634476757e-05, |
|
"loss": 1.9326, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.985300562686109e-05, |
|
"loss": 1.8276, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9670277247913205e-05, |
|
"loss": 1.8392, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.941634397659126e-05, |
|
"loss": 1.3092, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.909307559292236e-05, |
|
"loss": 1.1181, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.8702852410301556e-05, |
|
"loss": 1.2467, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.8248547748594246e-05, |
|
"loss": 1.0989, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.77335067770973e-05, |
|
"loss": 1.1427, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.7161521883143936e-05, |
|
"loss": 1.1778, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.653680474772006e-05, |
|
"loss": 1.0765, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.586395533370696e-05, |
|
"loss": 1.0176, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5147928015098309e-05, |
|
"loss": 1.1858, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4393995096591415e-05, |
|
"loss": 1.1799, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.3607707992167836e-05, |
|
"loss": 1.1227, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.2794856348516095e-05, |
|
"loss": 1.0963, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.196142541428197e-05, |
|
"loss": 1.0744, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.1113551969048088e-05, |
|
"loss": 1.1303, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.0257479136549889e-05, |
|
"loss": 1.1179, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 9.399510414850518e-06, |
|
"loss": 1.0918, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 8.545963261963102e-06, |
|
"loss": 1.0483, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 7.703122578682047e-06, |
|
"loss": 1.2234, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 6.877194431142055e-06, |
|
"loss": 1.1311, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 6.074260353858283e-06, |
|
"loss": 1.1261, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5.300232569726805e-06, |
|
"loss": 1.1854, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.560810456712754e-06, |
|
"loss": 1.1388, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 3.86143858177388e-06, |
|
"loss": 1.0478, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.207266611027069e-06, |
|
"loss": 1.1229, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.6031113913503337e-06, |
|
"loss": 0.9668, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.0534214826237486e-06, |
|
"loss": 1.1179, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.5622444017681438e-06, |
|
"loss": 1.0696, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.1331968197725985e-06, |
|
"loss": 1.055, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 7.694379311582401e-07, |
|
"loss": 1.058, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 4.73646191966175e-07, |
|
"loss": 1.0865, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.479995975541749e-07, |
|
"loss": 0.9857, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 9.415964542203059e-08, |
|
"loss": 1.1078, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.325910115169471e-08, |
|
"loss": 1.1117, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 732, |
|
"total_flos": 2.256724490932224e+16, |
|
"train_loss": 1.1766422537506604, |
|
"train_runtime": 7172.2389, |
|
"train_samples_per_second": 0.204, |
|
"train_steps_per_second": 0.102 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 732, |
|
"num_train_epochs": 2, |
|
"save_steps": 20, |
|
"total_flos": 2.256724490932224e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|