|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.002944741917910411, |
|
"eval_steps": 25, |
|
"global_step": 75, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.9263225572138815e-05, |
|
"grad_norm": 16.53192710876465, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 3.6605, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 3.9263225572138815e-05, |
|
"eval_loss": 3.731626272201538, |
|
"eval_runtime": 2304.5028, |
|
"eval_samples_per_second": 9.307, |
|
"eval_steps_per_second": 4.653, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 7.852645114427763e-05, |
|
"grad_norm": 19.084394454956055, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 3.7609, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00011778967671641644, |
|
"grad_norm": 13.099997520446777, |
|
"learning_rate": 0.0002, |
|
"loss": 2.7956, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00015705290228855526, |
|
"grad_norm": 9.987568855285645, |
|
"learning_rate": 0.0001999048221581858, |
|
"loss": 1.528, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00019631612786069407, |
|
"grad_norm": 6.246072292327881, |
|
"learning_rate": 0.00019961946980917456, |
|
"loss": 0.4466, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0002355793534328329, |
|
"grad_norm": 0.9562373757362366, |
|
"learning_rate": 0.00019914448613738106, |
|
"loss": 0.0189, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0002748425790049717, |
|
"grad_norm": 0.07823207974433899, |
|
"learning_rate": 0.00019848077530122083, |
|
"loss": 0.0009, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0003141058045771105, |
|
"grad_norm": 9.684601783752441, |
|
"learning_rate": 0.00019762960071199333, |
|
"loss": 0.0253, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.00035336903014924933, |
|
"grad_norm": 0.009641322307288647, |
|
"learning_rate": 0.00019659258262890683, |
|
"loss": 0.0001, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00039263225572138815, |
|
"grad_norm": 0.03295723348855972, |
|
"learning_rate": 0.0001953716950748227, |
|
"loss": 0.0002, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00043189548129352696, |
|
"grad_norm": 1.8471201658248901, |
|
"learning_rate": 0.00019396926207859084, |
|
"loss": 0.0208, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0004711587068656658, |
|
"grad_norm": 2.772557020187378, |
|
"learning_rate": 0.0001923879532511287, |
|
"loss": 0.0095, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0005104219324378046, |
|
"grad_norm": 0.0034740748815238476, |
|
"learning_rate": 0.000190630778703665, |
|
"loss": 0.0, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0005496851580099434, |
|
"grad_norm": 0.0018053193343803287, |
|
"learning_rate": 0.00018870108331782217, |
|
"loss": 0.0, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0005889483835820823, |
|
"grad_norm": 0.001508252345956862, |
|
"learning_rate": 0.00018660254037844388, |
|
"loss": 0.0, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.000628211609154221, |
|
"grad_norm": 0.001372707192786038, |
|
"learning_rate": 0.0001843391445812886, |
|
"loss": 0.0, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0006674748347263599, |
|
"grad_norm": 0.0011158657725900412, |
|
"learning_rate": 0.0001819152044288992, |
|
"loss": 0.0, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0007067380602984987, |
|
"grad_norm": 84.22279357910156, |
|
"learning_rate": 0.00017933533402912354, |
|
"loss": 0.0303, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0007460012858706375, |
|
"grad_norm": 0.0011357753537595272, |
|
"learning_rate": 0.0001766044443118978, |
|
"loss": 0.0, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0007852645114427763, |
|
"grad_norm": 0.0009733820916153491, |
|
"learning_rate": 0.0001737277336810124, |
|
"loss": 0.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0008245277370149152, |
|
"grad_norm": 0.0008526451420038939, |
|
"learning_rate": 0.00017071067811865476, |
|
"loss": 0.0, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0008637909625870539, |
|
"grad_norm": 0.0012607588432729244, |
|
"learning_rate": 0.00016755902076156604, |
|
"loss": 0.0, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0009030541881591928, |
|
"grad_norm": 10.768465042114258, |
|
"learning_rate": 0.00016427876096865394, |
|
"loss": 0.0027, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0009423174137313316, |
|
"grad_norm": 0.0007196764345280826, |
|
"learning_rate": 0.00016087614290087208, |
|
"loss": 0.0, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0009815806393034703, |
|
"grad_norm": 0.0006661998922936618, |
|
"learning_rate": 0.0001573576436351046, |
|
"loss": 0.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0009815806393034703, |
|
"eval_loss": 0.0005611914093606174, |
|
"eval_runtime": 2318.7094, |
|
"eval_samples_per_second": 9.25, |
|
"eval_steps_per_second": 4.625, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0010208438648756093, |
|
"grad_norm": 0.0007277107215486467, |
|
"learning_rate": 0.0001537299608346824, |
|
"loss": 0.0, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.001060107090447748, |
|
"grad_norm": 0.0006697736098431051, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 0.0, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0010993703160198868, |
|
"grad_norm": 0.0029933627229183912, |
|
"learning_rate": 0.00014617486132350343, |
|
"loss": 0.0, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0011386335415920256, |
|
"grad_norm": 0.035499222576618195, |
|
"learning_rate": 0.00014226182617406996, |
|
"loss": 0.0001, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0011778967671641646, |
|
"grad_norm": 0.10762330144643784, |
|
"learning_rate": 0.000138268343236509, |
|
"loss": 0.0002, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0012171599927363033, |
|
"grad_norm": 0.0013505893293768167, |
|
"learning_rate": 0.00013420201433256689, |
|
"loss": 0.0, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.001256423218308442, |
|
"grad_norm": 0.001411484437994659, |
|
"learning_rate": 0.00013007057995042732, |
|
"loss": 0.0, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0012956864438805808, |
|
"grad_norm": 0.0007225474109873176, |
|
"learning_rate": 0.00012588190451025207, |
|
"loss": 0.0, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0013349496694527198, |
|
"grad_norm": 0.0009576158481650054, |
|
"learning_rate": 0.00012164396139381029, |
|
"loss": 0.0, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0013742128950248586, |
|
"grad_norm": 0.0016589387087151408, |
|
"learning_rate": 0.00011736481776669306, |
|
"loss": 0.0, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0014134761205969973, |
|
"grad_norm": 0.001446246518753469, |
|
"learning_rate": 0.00011305261922200519, |
|
"loss": 0.0, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.001452739346169136, |
|
"grad_norm": 0.0017250552773475647, |
|
"learning_rate": 0.00010871557427476583, |
|
"loss": 0.0, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.001492002571741275, |
|
"grad_norm": 0.0012471925001591444, |
|
"learning_rate": 0.00010436193873653361, |
|
"loss": 0.0, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0015312657973134138, |
|
"grad_norm": 0.0012117830337956548, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0015705290228855526, |
|
"grad_norm": 0.0012083291076123714, |
|
"learning_rate": 9.563806126346642e-05, |
|
"loss": 0.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0016097922484576914, |
|
"grad_norm": 0.000552519632037729, |
|
"learning_rate": 9.128442572523417e-05, |
|
"loss": 0.0, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0016490554740298303, |
|
"grad_norm": 0.0005032281042076647, |
|
"learning_rate": 8.694738077799488e-05, |
|
"loss": 0.0, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.001688318699601969, |
|
"grad_norm": 0.0005083618452772498, |
|
"learning_rate": 8.263518223330697e-05, |
|
"loss": 0.0, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.0017275819251741078, |
|
"grad_norm": 0.000337311445036903, |
|
"learning_rate": 7.835603860618972e-05, |
|
"loss": 0.0, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.0017668451507462466, |
|
"grad_norm": 0.0011383501114323735, |
|
"learning_rate": 7.411809548974792e-05, |
|
"loss": 0.0, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0018061083763183856, |
|
"grad_norm": 0.0006025996990501881, |
|
"learning_rate": 6.992942004957271e-05, |
|
"loss": 0.0, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.0018453716018905243, |
|
"grad_norm": 0.0002779973146971315, |
|
"learning_rate": 6.579798566743314e-05, |
|
"loss": 0.0, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.001884634827462663, |
|
"grad_norm": 0.0003003528981935233, |
|
"learning_rate": 6.173165676349103e-05, |
|
"loss": 0.0, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0019238980530348019, |
|
"grad_norm": 0.00026264195912517607, |
|
"learning_rate": 5.773817382593008e-05, |
|
"loss": 0.0, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0019631612786069406, |
|
"grad_norm": 0.00046483968617394567, |
|
"learning_rate": 5.382513867649663e-05, |
|
"loss": 0.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0019631612786069406, |
|
"eval_loss": 0.0007199626415967941, |
|
"eval_runtime": 2318.0998, |
|
"eval_samples_per_second": 9.252, |
|
"eval_steps_per_second": 4.626, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0020024245041790796, |
|
"grad_norm": 0.00016967965348158032, |
|
"learning_rate": 5.000000000000002e-05, |
|
"loss": 0.0, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.0020416877297512186, |
|
"grad_norm": 0.00023787225654814392, |
|
"learning_rate": 4.6270039165317605e-05, |
|
"loss": 0.0, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.002080950955323357, |
|
"grad_norm": 0.00023337510356213897, |
|
"learning_rate": 4.264235636489542e-05, |
|
"loss": 0.0, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.002120214180895496, |
|
"grad_norm": 0.0002012766053667292, |
|
"learning_rate": 3.9123857099127936e-05, |
|
"loss": 0.0, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.0021594774064676346, |
|
"grad_norm": 0.00024241099890787154, |
|
"learning_rate": 3.5721239031346066e-05, |
|
"loss": 0.0, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.0021987406320397736, |
|
"grad_norm": 0.000289903546217829, |
|
"learning_rate": 3.244097923843398e-05, |
|
"loss": 0.0, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.0022380038576119126, |
|
"grad_norm": 0.00017998011026065797, |
|
"learning_rate": 2.9289321881345254e-05, |
|
"loss": 0.0, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.002277267083184051, |
|
"grad_norm": 0.00022262134007178247, |
|
"learning_rate": 2.6272266318987603e-05, |
|
"loss": 0.0, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.00231653030875619, |
|
"grad_norm": 0.000276467326330021, |
|
"learning_rate": 2.339555568810221e-05, |
|
"loss": 0.0, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.002355793534328329, |
|
"grad_norm": 0.00016941240755841136, |
|
"learning_rate": 2.0664665970876496e-05, |
|
"loss": 0.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0023950567599004676, |
|
"grad_norm": 0.0001846144295996055, |
|
"learning_rate": 1.808479557110081e-05, |
|
"loss": 0.0, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.0024343199854726066, |
|
"grad_norm": 0.000201681352336891, |
|
"learning_rate": 1.566085541871145e-05, |
|
"loss": 0.0, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.002473583211044745, |
|
"grad_norm": 0.0002414260379737243, |
|
"learning_rate": 1.339745962155613e-05, |
|
"loss": 0.0, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.002512846436616884, |
|
"grad_norm": 0.00019271476776339114, |
|
"learning_rate": 1.129891668217783e-05, |
|
"loss": 0.0, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.002552109662189023, |
|
"grad_norm": 0.00026961477124132216, |
|
"learning_rate": 9.369221296335006e-06, |
|
"loss": 0.0, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.0025913728877611617, |
|
"grad_norm": 0.0002070392220048234, |
|
"learning_rate": 7.612046748871327e-06, |
|
"loss": 0.0, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.0026306361133333006, |
|
"grad_norm": 0.00019015038560610265, |
|
"learning_rate": 6.030737921409169e-06, |
|
"loss": 0.0, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.0026698993389054396, |
|
"grad_norm": 0.00013412888802122325, |
|
"learning_rate": 4.628304925177318e-06, |
|
"loss": 0.0, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.002709162564477578, |
|
"grad_norm": 0.0001483526430092752, |
|
"learning_rate": 3.40741737109318e-06, |
|
"loss": 0.0, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.002748425790049717, |
|
"grad_norm": 0.00013009592657908797, |
|
"learning_rate": 2.3703992880066638e-06, |
|
"loss": 0.0, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0027876890156218557, |
|
"grad_norm": 0.00021120438759680837, |
|
"learning_rate": 1.5192246987791981e-06, |
|
"loss": 0.0, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.0028269522411939947, |
|
"grad_norm": 0.0002538190165068954, |
|
"learning_rate": 8.555138626189618e-07, |
|
"loss": 0.0, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.0028662154667661336, |
|
"grad_norm": 0.00025384288164786994, |
|
"learning_rate": 3.805301908254455e-07, |
|
"loss": 0.0, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.002905478692338272, |
|
"grad_norm": 0.00015469672507606447, |
|
"learning_rate": 9.517784181422019e-08, |
|
"loss": 0.0, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.002944741917910411, |
|
"grad_norm": 0.0003116559819318354, |
|
"learning_rate": 0.0, |
|
"loss": 0.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.002944741917910411, |
|
"eval_loss": 0.0007300978759303689, |
|
"eval_runtime": 2317.0561, |
|
"eval_samples_per_second": 9.257, |
|
"eval_steps_per_second": 4.628, |
|
"step": 75 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 75, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.95132603613184e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|