|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.004256949470009791, |
|
"eval_steps": 9, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 8.513898940019582e-05, |
|
"grad_norm": 0.06632853299379349, |
|
"learning_rate": 1e-05, |
|
"loss": 10.3844, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 8.513898940019582e-05, |
|
"eval_loss": 10.380781173706055, |
|
"eval_runtime": 37.6703, |
|
"eval_samples_per_second": 262.567, |
|
"eval_steps_per_second": 32.838, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00017027797880039163, |
|
"grad_norm": 0.06305789947509766, |
|
"learning_rate": 2e-05, |
|
"loss": 10.3866, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0002554169682005875, |
|
"grad_norm": 0.09040896594524384, |
|
"learning_rate": 3e-05, |
|
"loss": 10.3699, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00034055595760078326, |
|
"grad_norm": 0.08260149508714676, |
|
"learning_rate": 4e-05, |
|
"loss": 10.3876, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0004256949470009791, |
|
"grad_norm": 0.09195930510759354, |
|
"learning_rate": 5e-05, |
|
"loss": 10.39, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.000510833936401175, |
|
"grad_norm": 0.08319511264562607, |
|
"learning_rate": 6e-05, |
|
"loss": 10.3756, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0005959729258013707, |
|
"grad_norm": 0.07701607048511505, |
|
"learning_rate": 7e-05, |
|
"loss": 10.3714, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0006811119152015665, |
|
"grad_norm": 0.08848275989294052, |
|
"learning_rate": 8e-05, |
|
"loss": 10.3649, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0007662509046017624, |
|
"grad_norm": 0.07215868681669235, |
|
"learning_rate": 9e-05, |
|
"loss": 10.3825, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0007662509046017624, |
|
"eval_loss": 10.380110740661621, |
|
"eval_runtime": 37.0373, |
|
"eval_samples_per_second": 267.055, |
|
"eval_steps_per_second": 33.399, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0008513898940019582, |
|
"grad_norm": 0.0638326108455658, |
|
"learning_rate": 0.0001, |
|
"loss": 10.376, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.000936528883402154, |
|
"grad_norm": 0.0767514705657959, |
|
"learning_rate": 9.99695413509548e-05, |
|
"loss": 10.3839, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.00102166787280235, |
|
"grad_norm": 0.06683874130249023, |
|
"learning_rate": 9.987820251299122e-05, |
|
"loss": 10.3901, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0011068068622025456, |
|
"grad_norm": 0.06641669571399689, |
|
"learning_rate": 9.972609476841367e-05, |
|
"loss": 10.3774, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0011919458516027415, |
|
"grad_norm": 0.05661563575267792, |
|
"learning_rate": 9.951340343707852e-05, |
|
"loss": 10.3847, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0012770848410029374, |
|
"grad_norm": 0.08233150839805603, |
|
"learning_rate": 9.924038765061042e-05, |
|
"loss": 10.3761, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.001362223830403133, |
|
"grad_norm": 0.06432665884494781, |
|
"learning_rate": 9.890738003669029e-05, |
|
"loss": 10.3694, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.001447362819803329, |
|
"grad_norm": 0.08495861291885376, |
|
"learning_rate": 9.851478631379982e-05, |
|
"loss": 10.3777, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0015325018092035248, |
|
"grad_norm": 0.06713090091943741, |
|
"learning_rate": 9.806308479691595e-05, |
|
"loss": 10.3927, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0015325018092035248, |
|
"eval_loss": 10.378459930419922, |
|
"eval_runtime": 36.9921, |
|
"eval_samples_per_second": 267.381, |
|
"eval_steps_per_second": 33.44, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0016176407986037205, |
|
"grad_norm": 0.07406238466501236, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 10.3736, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0017027797880039164, |
|
"grad_norm": 0.06992701441049576, |
|
"learning_rate": 9.698463103929542e-05, |
|
"loss": 10.3714, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.001787918777404112, |
|
"grad_norm": 0.0749615877866745, |
|
"learning_rate": 9.635919272833938e-05, |
|
"loss": 10.372, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.001873057766804308, |
|
"grad_norm": 0.06695009022951126, |
|
"learning_rate": 9.567727288213005e-05, |
|
"loss": 10.3943, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0019581967562045037, |
|
"grad_norm": 0.058970991522073746, |
|
"learning_rate": 9.493970231495835e-05, |
|
"loss": 10.3757, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0020433357456047, |
|
"grad_norm": 0.060394152998924255, |
|
"learning_rate": 9.414737964294636e-05, |
|
"loss": 10.3769, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0021284747350048955, |
|
"grad_norm": 0.07403624802827835, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 10.3898, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.002213613724405091, |
|
"grad_norm": 0.06255259364843369, |
|
"learning_rate": 9.24024048078213e-05, |
|
"loss": 10.3848, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0022987527138052873, |
|
"grad_norm": 0.07093428820371628, |
|
"learning_rate": 9.145187862775209e-05, |
|
"loss": 10.3834, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0022987527138052873, |
|
"eval_loss": 10.376791000366211, |
|
"eval_runtime": 37.1822, |
|
"eval_samples_per_second": 266.014, |
|
"eval_steps_per_second": 33.269, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.002383891703205483, |
|
"grad_norm": 0.06431137025356293, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 10.3878, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0024690306926056786, |
|
"grad_norm": 0.06749837845563889, |
|
"learning_rate": 8.940053768033609e-05, |
|
"loss": 10.3917, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0025541696820058747, |
|
"grad_norm": 0.06856685876846313, |
|
"learning_rate": 8.83022221559489e-05, |
|
"loss": 10.3812, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0026393086714060704, |
|
"grad_norm": 0.07790417969226837, |
|
"learning_rate": 8.715724127386972e-05, |
|
"loss": 10.3734, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.002724447660806266, |
|
"grad_norm": 0.06906121224164963, |
|
"learning_rate": 8.596699001693255e-05, |
|
"loss": 10.3667, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0028095866502064622, |
|
"grad_norm": 0.07379811257123947, |
|
"learning_rate": 8.473291852294987e-05, |
|
"loss": 10.3681, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.002894725639606658, |
|
"grad_norm": 0.0777546837925911, |
|
"learning_rate": 8.345653031794292e-05, |
|
"loss": 10.3742, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0029798646290068536, |
|
"grad_norm": 0.09084642678499222, |
|
"learning_rate": 8.213938048432697e-05, |
|
"loss": 10.3725, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0030650036184070497, |
|
"grad_norm": 0.07533272355794907, |
|
"learning_rate": 8.07830737662829e-05, |
|
"loss": 10.3616, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0030650036184070497, |
|
"eval_loss": 10.3751859664917, |
|
"eval_runtime": 37.0132, |
|
"eval_samples_per_second": 267.229, |
|
"eval_steps_per_second": 33.42, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0031501426078072454, |
|
"grad_norm": 0.09557131677865982, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 10.3627, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.003235281597207441, |
|
"grad_norm": 0.086721271276474, |
|
"learning_rate": 7.795964517353735e-05, |
|
"loss": 10.3797, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.003320420586607637, |
|
"grad_norm": 0.0951017513871193, |
|
"learning_rate": 7.649596321166024e-05, |
|
"loss": 10.3642, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.003405559576007833, |
|
"grad_norm": 0.08322468400001526, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 10.3718, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0034906985654080285, |
|
"grad_norm": 0.08715411275625229, |
|
"learning_rate": 7.347357813929454e-05, |
|
"loss": 10.3912, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.003575837554808224, |
|
"grad_norm": 0.07306662946939468, |
|
"learning_rate": 7.191855733945387e-05, |
|
"loss": 10.3831, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0036609765442084203, |
|
"grad_norm": 0.07265879958868027, |
|
"learning_rate": 7.033683215379002e-05, |
|
"loss": 10.3775, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.003746115533608616, |
|
"grad_norm": 0.07239469140768051, |
|
"learning_rate": 6.873032967079561e-05, |
|
"loss": 10.3708, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.0038312545230088117, |
|
"grad_norm": 0.06718676537275314, |
|
"learning_rate": 6.710100716628344e-05, |
|
"loss": 10.3772, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0038312545230088117, |
|
"eval_loss": 10.373581886291504, |
|
"eval_runtime": 36.9465, |
|
"eval_samples_per_second": 267.712, |
|
"eval_steps_per_second": 33.481, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.003916393512409007, |
|
"grad_norm": 0.1044333428144455, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 10.3736, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.004001532501809204, |
|
"grad_norm": 0.09110475331544876, |
|
"learning_rate": 6.378186779084995e-05, |
|
"loss": 10.3743, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.0040866714912094, |
|
"grad_norm": 0.08908325433731079, |
|
"learning_rate": 6.209609477998338e-05, |
|
"loss": 10.3715, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.004171810480609595, |
|
"grad_norm": 0.09201818704605103, |
|
"learning_rate": 6.0395584540887963e-05, |
|
"loss": 10.3745, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.004256949470009791, |
|
"grad_norm": 0.08138486742973328, |
|
"learning_rate": 5.868240888334653e-05, |
|
"loss": 10.3831, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4864127533056.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|