|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.7127355796687607, |
|
"eval_steps": 13, |
|
"global_step": 39, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.018275271273557967, |
|
"grad_norm": 4.217789649963379, |
|
"learning_rate": 5e-05, |
|
"loss": 8.8561, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.018275271273557967, |
|
"eval_loss": 8.832687377929688, |
|
"eval_runtime": 66.6285, |
|
"eval_samples_per_second": 5.538, |
|
"eval_steps_per_second": 1.396, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.036550542547115934, |
|
"grad_norm": 4.4302568435668945, |
|
"learning_rate": 0.0001, |
|
"loss": 8.7806, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0548258138206739, |
|
"grad_norm": 4.538593292236328, |
|
"learning_rate": 9.989294616193017e-05, |
|
"loss": 8.662, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.07310108509423187, |
|
"grad_norm": 5.8020195960998535, |
|
"learning_rate": 9.957224306869053e-05, |
|
"loss": 7.9472, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.09137635636778983, |
|
"grad_norm": 6.238458633422852, |
|
"learning_rate": 9.903926402016153e-05, |
|
"loss": 6.4558, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.1096516276413478, |
|
"grad_norm": 5.397599220275879, |
|
"learning_rate": 9.829629131445342e-05, |
|
"loss": 4.8986, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.12792689891490577, |
|
"grad_norm": 4.881111145019531, |
|
"learning_rate": 9.73465064747553e-05, |
|
"loss": 4.1723, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.14620217018846374, |
|
"grad_norm": 3.493053436279297, |
|
"learning_rate": 9.619397662556435e-05, |
|
"loss": 3.2577, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.1644774414620217, |
|
"grad_norm": 2.9015204906463623, |
|
"learning_rate": 9.484363707663442e-05, |
|
"loss": 2.7507, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.18275271273557966, |
|
"grad_norm": 2.8159022331237793, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 2.5523, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.20102798400913763, |
|
"grad_norm": 3.2096753120422363, |
|
"learning_rate": 9.157348061512727e-05, |
|
"loss": 2.0235, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.2193032552826956, |
|
"grad_norm": 3.640693426132202, |
|
"learning_rate": 8.966766701456177e-05, |
|
"loss": 1.4851, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.23757852655625358, |
|
"grad_norm": 3.8002378940582275, |
|
"learning_rate": 8.759199037394887e-05, |
|
"loss": 1.0231, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.23757852655625358, |
|
"eval_loss": 0.6289411783218384, |
|
"eval_runtime": 66.1722, |
|
"eval_samples_per_second": 5.576, |
|
"eval_steps_per_second": 1.405, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.25585379782981155, |
|
"grad_norm": 2.663853168487549, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 0.6116, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.2741290691033695, |
|
"grad_norm": 2.392077922821045, |
|
"learning_rate": 8.296729075500344e-05, |
|
"loss": 0.3944, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.2924043403769275, |
|
"grad_norm": 2.333585262298584, |
|
"learning_rate": 8.043807145043604e-05, |
|
"loss": 0.2627, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.3106796116504854, |
|
"grad_norm": 0.5413778424263, |
|
"learning_rate": 7.777851165098012e-05, |
|
"loss": 0.1026, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.3289548829240434, |
|
"grad_norm": 1.2651410102844238, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.2252, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.3472301541976014, |
|
"grad_norm": 0.9693997502326965, |
|
"learning_rate": 7.211443451095007e-05, |
|
"loss": 0.0994, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.36550542547115933, |
|
"grad_norm": 1.002900242805481, |
|
"learning_rate": 6.91341716182545e-05, |
|
"loss": 0.1058, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3837806967447173, |
|
"grad_norm": 0.8303418755531311, |
|
"learning_rate": 6.607197326515808e-05, |
|
"loss": 0.0462, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.40205596801827526, |
|
"grad_norm": 0.6390141248703003, |
|
"learning_rate": 6.294095225512603e-05, |
|
"loss": 0.0405, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.42033123929183325, |
|
"grad_norm": 0.6512762308120728, |
|
"learning_rate": 5.9754516100806423e-05, |
|
"loss": 0.0238, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.4386065105653912, |
|
"grad_norm": 0.7656252980232239, |
|
"learning_rate": 5.6526309611002594e-05, |
|
"loss": 0.0375, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.4568817818389492, |
|
"grad_norm": 0.46867817640304565, |
|
"learning_rate": 5.327015646150716e-05, |
|
"loss": 0.016, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.47515705311250717, |
|
"grad_norm": 0.22703154385089874, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0108, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.47515705311250717, |
|
"eval_loss": 0.011774315498769283, |
|
"eval_runtime": 66.1555, |
|
"eval_samples_per_second": 5.578, |
|
"eval_steps_per_second": 1.406, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.4934323243860651, |
|
"grad_norm": 0.2927595376968384, |
|
"learning_rate": 4.6729843538492847e-05, |
|
"loss": 0.0162, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.5117075956596231, |
|
"grad_norm": 0.1291339248418808, |
|
"learning_rate": 4.347369038899744e-05, |
|
"loss": 0.0014, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.5299828669331811, |
|
"grad_norm": 0.24390743672847748, |
|
"learning_rate": 4.0245483899193595e-05, |
|
"loss": 0.01, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.548258138206739, |
|
"grad_norm": 0.07123029232025146, |
|
"learning_rate": 3.705904774487396e-05, |
|
"loss": 0.0007, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.566533409480297, |
|
"grad_norm": 0.03508315980434418, |
|
"learning_rate": 3.392802673484193e-05, |
|
"loss": 0.0004, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.584808680753855, |
|
"grad_norm": 0.33584150671958923, |
|
"learning_rate": 3.086582838174551e-05, |
|
"loss": 0.0116, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.6030839520274129, |
|
"grad_norm": 0.18450838327407837, |
|
"learning_rate": 2.7885565489049946e-05, |
|
"loss": 0.0017, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.6213592233009708, |
|
"grad_norm": 0.30941474437713623, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 0.0089, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.6396344945745288, |
|
"grad_norm": 0.3536520004272461, |
|
"learning_rate": 2.2221488349019903e-05, |
|
"loss": 0.0024, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.6579097658480868, |
|
"grad_norm": 0.3076607882976532, |
|
"learning_rate": 1.9561928549563968e-05, |
|
"loss": 0.0032, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.6761850371216448, |
|
"grad_norm": 0.09125807881355286, |
|
"learning_rate": 1.703270924499656e-05, |
|
"loss": 0.005, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.6944603083952028, |
|
"grad_norm": 0.09541966021060944, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 0.0006, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.7127355796687607, |
|
"grad_norm": 0.042761899530887604, |
|
"learning_rate": 1.2408009626051137e-05, |
|
"loss": 0.0003, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.7127355796687607, |
|
"eval_loss": 0.005322535987943411, |
|
"eval_runtime": 66.1258, |
|
"eval_samples_per_second": 5.58, |
|
"eval_steps_per_second": 1.406, |
|
"step": 39 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 13, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.5844083502531215e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|