|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997505612372163, |
|
"eval_steps": 500, |
|
"global_step": 1002, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0009977550511349464, |
|
"grad_norm": 3.95422100343339e+17, |
|
"learning_rate": 1.9801980198019803e-07, |
|
"loss": 1.1312, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004988775255674732, |
|
"grad_norm": 2182.5525466022195, |
|
"learning_rate": 9.900990099009902e-07, |
|
"loss": 1.1202, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.009977550511349464, |
|
"grad_norm": 10.696306402986602, |
|
"learning_rate": 1.9801980198019803e-06, |
|
"loss": 1.0843, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.014966325767024195, |
|
"grad_norm": 4.8421359026925295, |
|
"learning_rate": 2.9702970297029703e-06, |
|
"loss": 1.024, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01995510102269893, |
|
"grad_norm": 1.4664994361442427, |
|
"learning_rate": 3.960396039603961e-06, |
|
"loss": 0.9479, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.024943876278373658, |
|
"grad_norm": 1.1625196109582225, |
|
"learning_rate": 4.950495049504951e-06, |
|
"loss": 0.8935, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02993265153404839, |
|
"grad_norm": 1.2801604519342376, |
|
"learning_rate": 5.940594059405941e-06, |
|
"loss": 0.871, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.034921426789723126, |
|
"grad_norm": 2.033559906503939, |
|
"learning_rate": 6.930693069306931e-06, |
|
"loss": 0.8425, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.03991020204539786, |
|
"grad_norm": 1.2072354289878107, |
|
"learning_rate": 7.920792079207921e-06, |
|
"loss": 0.8193, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04489897730107259, |
|
"grad_norm": 1.3137258088382902, |
|
"learning_rate": 8.910891089108911e-06, |
|
"loss": 0.8013, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.049887752556747315, |
|
"grad_norm": 1.1213863664000594, |
|
"learning_rate": 9.900990099009901e-06, |
|
"loss": 0.7906, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05487652781242205, |
|
"grad_norm": 0.9999895583902438, |
|
"learning_rate": 1.0891089108910893e-05, |
|
"loss": 0.7642, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.05986530306809678, |
|
"grad_norm": 1.3377086334649673, |
|
"learning_rate": 1.1881188118811881e-05, |
|
"loss": 0.7495, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06485407832377152, |
|
"grad_norm": 1.0583607289478394, |
|
"learning_rate": 1.2871287128712873e-05, |
|
"loss": 0.7328, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.06984285357944625, |
|
"grad_norm": 1.3493827534349543, |
|
"learning_rate": 1.3861386138613861e-05, |
|
"loss": 0.7383, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07483162883512098, |
|
"grad_norm": 1.2261995345556986, |
|
"learning_rate": 1.4851485148514853e-05, |
|
"loss": 0.7281, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.07982040409079572, |
|
"grad_norm": 1.3328058553211537, |
|
"learning_rate": 1.5841584158415843e-05, |
|
"loss": 0.7236, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.08480917934647045, |
|
"grad_norm": 1.1145579124084846, |
|
"learning_rate": 1.683168316831683e-05, |
|
"loss": 0.7255, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.08979795460214518, |
|
"grad_norm": 0.9193198348331784, |
|
"learning_rate": 1.7821782178217823e-05, |
|
"loss": 0.7181, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0947867298578199, |
|
"grad_norm": 0.9946484577080871, |
|
"learning_rate": 1.881188118811881e-05, |
|
"loss": 0.7103, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.09977550511349463, |
|
"grad_norm": 1.041681712316516, |
|
"learning_rate": 1.9801980198019803e-05, |
|
"loss": 0.7007, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10476428036916936, |
|
"grad_norm": 2.164939078852322, |
|
"learning_rate": 1.9999027402586235e-05, |
|
"loss": 0.7064, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.1097530556248441, |
|
"grad_norm": 2.7326170629817335, |
|
"learning_rate": 1.9995076549835638e-05, |
|
"loss": 0.7129, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.11474183088051883, |
|
"grad_norm": 0.958937879949106, |
|
"learning_rate": 1.9988087854284224e-05, |
|
"loss": 0.6984, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.11973060613619356, |
|
"grad_norm": 1.1161507946755316, |
|
"learning_rate": 1.997806344003363e-05, |
|
"loss": 0.7025, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.12471938139186829, |
|
"grad_norm": 0.9783646600814512, |
|
"learning_rate": 1.996500635384337e-05, |
|
"loss": 0.6918, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.12970815664754304, |
|
"grad_norm": 0.8060832238745128, |
|
"learning_rate": 1.994892056420485e-05, |
|
"loss": 0.6842, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.13469693190321777, |
|
"grad_norm": 0.8669120420326126, |
|
"learning_rate": 1.992981096013517e-05, |
|
"loss": 0.6806, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.1396857071588925, |
|
"grad_norm": 0.8551160872182698, |
|
"learning_rate": 1.990768334969122e-05, |
|
"loss": 0.6908, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.14467448241456723, |
|
"grad_norm": 0.6818189908779505, |
|
"learning_rate": 1.9882544458204386e-05, |
|
"loss": 0.6888, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.14966325767024197, |
|
"grad_norm": 0.7649549705168317, |
|
"learning_rate": 1.9854401926236518e-05, |
|
"loss": 0.6867, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1546520329259167, |
|
"grad_norm": 0.8499401555713652, |
|
"learning_rate": 1.9823264307257683e-05, |
|
"loss": 0.6707, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.15964080818159143, |
|
"grad_norm": 0.7166408203324516, |
|
"learning_rate": 1.9789141065046495e-05, |
|
"loss": 0.676, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.16462958343726616, |
|
"grad_norm": 0.6182479498389858, |
|
"learning_rate": 1.9752042570813733e-05, |
|
"loss": 0.6738, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.1696183586929409, |
|
"grad_norm": 0.6690357471493384, |
|
"learning_rate": 1.9711980100050196e-05, |
|
"loss": 0.6672, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.17460713394861563, |
|
"grad_norm": 0.5822226665191199, |
|
"learning_rate": 1.966896582909968e-05, |
|
"loss": 0.6736, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.17959590920429036, |
|
"grad_norm": 0.7406095161368872, |
|
"learning_rate": 1.962301283145819e-05, |
|
"loss": 0.6761, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.18458468445996506, |
|
"grad_norm": 0.7296907738291923, |
|
"learning_rate": 1.957413507380046e-05, |
|
"loss": 0.6678, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.1895734597156398, |
|
"grad_norm": 0.8518753276369776, |
|
"learning_rate": 1.952234741173499e-05, |
|
"loss": 0.6733, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.19456223497131453, |
|
"grad_norm": 0.5554147873181055, |
|
"learning_rate": 1.946766558528895e-05, |
|
"loss": 0.6621, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.19955101022698926, |
|
"grad_norm": 0.6525658967181038, |
|
"learning_rate": 1.941010621412422e-05, |
|
"loss": 0.6649, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.204539785482664, |
|
"grad_norm": 0.5780920597815026, |
|
"learning_rate": 1.9349686792486143e-05, |
|
"loss": 0.657, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.20952856073833873, |
|
"grad_norm": 0.5578703671497164, |
|
"learning_rate": 1.9286425683886403e-05, |
|
"loss": 0.6687, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.21451733599401346, |
|
"grad_norm": 0.72305301104437, |
|
"learning_rate": 1.9220342115521746e-05, |
|
"loss": 0.6624, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.2195061112496882, |
|
"grad_norm": 0.6165020865206677, |
|
"learning_rate": 1.9151456172430186e-05, |
|
"loss": 0.6532, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.22449488650536292, |
|
"grad_norm": 0.5606109731701377, |
|
"learning_rate": 1.9079788791386468e-05, |
|
"loss": 0.6562, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.22948366176103766, |
|
"grad_norm": 0.6353820290325317, |
|
"learning_rate": 1.9005361754538677e-05, |
|
"loss": 0.6671, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.2344724370167124, |
|
"grad_norm": 0.5815877946035953, |
|
"learning_rate": 1.8928197682787914e-05, |
|
"loss": 0.6593, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.23946121227238712, |
|
"grad_norm": 0.6348680697801418, |
|
"learning_rate": 1.8848320028913017e-05, |
|
"loss": 0.6507, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.24444998752806185, |
|
"grad_norm": 0.5695451117286802, |
|
"learning_rate": 1.8765753070442486e-05, |
|
"loss": 0.6523, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.24943876278373658, |
|
"grad_norm": 0.5168318177147343, |
|
"learning_rate": 1.868052190227571e-05, |
|
"loss": 0.6591, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2544275380394113, |
|
"grad_norm": 0.7348069026040828, |
|
"learning_rate": 1.859265242905577e-05, |
|
"loss": 0.6541, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.2594163132950861, |
|
"grad_norm": 0.5329392930102724, |
|
"learning_rate": 1.8502171357296144e-05, |
|
"loss": 0.6589, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2644050885507608, |
|
"grad_norm": 0.6628783134030882, |
|
"learning_rate": 1.84091061872637e-05, |
|
"loss": 0.6443, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.26939386380643554, |
|
"grad_norm": 0.6959729166297904, |
|
"learning_rate": 1.8313485204620428e-05, |
|
"loss": 0.6459, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.27438263906211025, |
|
"grad_norm": 0.5848246972834032, |
|
"learning_rate": 1.821533747182645e-05, |
|
"loss": 0.6606, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.279371414317785, |
|
"grad_norm": 0.6475331258568309, |
|
"learning_rate": 1.811469281930698e-05, |
|
"loss": 0.656, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.2843601895734597, |
|
"grad_norm": 0.5357406950163816, |
|
"learning_rate": 1.8011581836385828e-05, |
|
"loss": 0.6474, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.28934896482913447, |
|
"grad_norm": 0.5973939212545811, |
|
"learning_rate": 1.790603586198827e-05, |
|
"loss": 0.6376, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.2943377400848092, |
|
"grad_norm": 0.5096252662581786, |
|
"learning_rate": 1.7798086975116096e-05, |
|
"loss": 0.6487, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.29932651534048393, |
|
"grad_norm": 0.545088543038122, |
|
"learning_rate": 1.7687767985097695e-05, |
|
"loss": 0.6526, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.30431529059615864, |
|
"grad_norm": 0.707465419807657, |
|
"learning_rate": 1.7575112421616203e-05, |
|
"loss": 0.6465, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.3093040658518334, |
|
"grad_norm": 0.520929461518716, |
|
"learning_rate": 1.7460154524518688e-05, |
|
"loss": 0.6346, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.3142928411075081, |
|
"grad_norm": 0.5724374980096262, |
|
"learning_rate": 1.73429292334095e-05, |
|
"loss": 0.6533, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.31928161636318286, |
|
"grad_norm": 0.545092535159253, |
|
"learning_rate": 1.722347217703094e-05, |
|
"loss": 0.6437, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.32427039161885757, |
|
"grad_norm": 0.49712776699676936, |
|
"learning_rate": 1.710181966243447e-05, |
|
"loss": 0.6373, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.3292591668745323, |
|
"grad_norm": 0.5463641929477563, |
|
"learning_rate": 1.6978008663945794e-05, |
|
"loss": 0.6496, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.33424794213020703, |
|
"grad_norm": 0.5263242364290428, |
|
"learning_rate": 1.6852076811927066e-05, |
|
"loss": 0.6369, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.3392367173858818, |
|
"grad_norm": 0.5461638677630304, |
|
"learning_rate": 1.672406238133978e-05, |
|
"loss": 0.639, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.3442254926415565, |
|
"grad_norm": 0.5328798983853884, |
|
"learning_rate": 1.6594004280111697e-05, |
|
"loss": 0.6497, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.34921426789723126, |
|
"grad_norm": 0.5449564175014876, |
|
"learning_rate": 1.6461942037311406e-05, |
|
"loss": 0.64, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.35420304315290596, |
|
"grad_norm": 0.4924182140259062, |
|
"learning_rate": 1.6327915791134107e-05, |
|
"loss": 0.6396, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.3591918184085807, |
|
"grad_norm": 0.6190239635709287, |
|
"learning_rate": 1.6191966276702235e-05, |
|
"loss": 0.6377, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3641805936642554, |
|
"grad_norm": 0.6147520812137072, |
|
"learning_rate": 1.6054134813684697e-05, |
|
"loss": 0.6375, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.36916936891993013, |
|
"grad_norm": 0.5215763519986214, |
|
"learning_rate": 1.5914463293738402e-05, |
|
"loss": 0.6368, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3741581441756049, |
|
"grad_norm": 0.6214647949635035, |
|
"learning_rate": 1.5772994167775986e-05, |
|
"loss": 0.6303, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.3791469194312796, |
|
"grad_norm": 0.6365612833821749, |
|
"learning_rate": 1.5629770433063523e-05, |
|
"loss": 0.6244, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.38413569468695435, |
|
"grad_norm": 0.594264835461608, |
|
"learning_rate": 1.5484835620152198e-05, |
|
"loss": 0.6323, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.38912446994262906, |
|
"grad_norm": 0.6107791643380025, |
|
"learning_rate": 1.533823377964791e-05, |
|
"loss": 0.6298, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.3941132451983038, |
|
"grad_norm": 0.6001005800935538, |
|
"learning_rate": 1.5190009468822782e-05, |
|
"loss": 0.63, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.3991020204539785, |
|
"grad_norm": 0.5176320349977592, |
|
"learning_rate": 1.5040207738072714e-05, |
|
"loss": 0.6296, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4040907957096533, |
|
"grad_norm": 0.5530769815557066, |
|
"learning_rate": 1.4888874117225013e-05, |
|
"loss": 0.6202, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.409079570965328, |
|
"grad_norm": 0.4750983813132654, |
|
"learning_rate": 1.4736054601700361e-05, |
|
"loss": 0.6339, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.41406834622100275, |
|
"grad_norm": 0.5280260411948464, |
|
"learning_rate": 1.4581795638533227e-05, |
|
"loss": 0.6244, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.41905712147667745, |
|
"grad_norm": 0.49444720544930004, |
|
"learning_rate": 1.4426144112255057e-05, |
|
"loss": 0.6226, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.4240458967323522, |
|
"grad_norm": 0.5416758041687645, |
|
"learning_rate": 1.426914733064444e-05, |
|
"loss": 0.6281, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.4290346719880269, |
|
"grad_norm": 0.4913059879733367, |
|
"learning_rate": 1.4110853010348717e-05, |
|
"loss": 0.6327, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.4340234472437017, |
|
"grad_norm": 0.5475336882998988, |
|
"learning_rate": 1.3951309262381231e-05, |
|
"loss": 0.6319, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.4390122224993764, |
|
"grad_norm": 0.5887136176843448, |
|
"learning_rate": 1.3790564577498791e-05, |
|
"loss": 0.6323, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.44400099775505114, |
|
"grad_norm": 0.53325274747872, |
|
"learning_rate": 1.3628667811463654e-05, |
|
"loss": 0.6165, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.44898977301072585, |
|
"grad_norm": 0.5538441977213863, |
|
"learning_rate": 1.3465668170194633e-05, |
|
"loss": 0.6259, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.4539785482664006, |
|
"grad_norm": 0.516406002882252, |
|
"learning_rate": 1.330161519481172e-05, |
|
"loss": 0.6251, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.4589673235220753, |
|
"grad_norm": 0.5294430423934866, |
|
"learning_rate": 1.3136558746578888e-05, |
|
"loss": 0.6269, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.46395609877775007, |
|
"grad_norm": 0.5548195323966518, |
|
"learning_rate": 1.2970548991749538e-05, |
|
"loss": 0.6239, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.4689448740334248, |
|
"grad_norm": 0.48735736648704486, |
|
"learning_rate": 1.2803636386319288e-05, |
|
"loss": 0.62, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.47393364928909953, |
|
"grad_norm": 0.5093962136183301, |
|
"learning_rate": 1.2635871660690677e-05, |
|
"loss": 0.6259, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.47892242454477424, |
|
"grad_norm": 0.5220605413877938, |
|
"learning_rate": 1.2467305804254472e-05, |
|
"loss": 0.6233, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.483911199800449, |
|
"grad_norm": 0.46987756163402217, |
|
"learning_rate": 1.2297990049892274e-05, |
|
"loss": 0.6224, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.4888999750561237, |
|
"grad_norm": 0.4851271226507692, |
|
"learning_rate": 1.2127975858405096e-05, |
|
"loss": 0.6248, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.49388875031179846, |
|
"grad_norm": 0.49812035008570954, |
|
"learning_rate": 1.1957314902872686e-05, |
|
"loss": 0.6162, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.49887752556747317, |
|
"grad_norm": 0.4543789239256326, |
|
"learning_rate": 1.178605905294832e-05, |
|
"loss": 0.6191, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5038663008231479, |
|
"grad_norm": 0.49450863626445246, |
|
"learning_rate": 1.1614260359093869e-05, |
|
"loss": 0.6298, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.5088550760788226, |
|
"grad_norm": 0.46204592956818197, |
|
"learning_rate": 1.144197103675988e-05, |
|
"loss": 0.6108, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.5138438513344974, |
|
"grad_norm": 0.46446807942496315, |
|
"learning_rate": 1.1269243450515537e-05, |
|
"loss": 0.6255, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.5188326265901722, |
|
"grad_norm": 0.4523344385336006, |
|
"learning_rate": 1.1096130098133296e-05, |
|
"loss": 0.621, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.5238214018458468, |
|
"grad_norm": 0.5281185337565003, |
|
"learning_rate": 1.092268359463302e-05, |
|
"loss": 0.6181, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.5288101771015216, |
|
"grad_norm": 0.5592357254092617, |
|
"learning_rate": 1.0748956656290512e-05, |
|
"loss": 0.625, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.5337989523571963, |
|
"grad_norm": 0.49985683194766767, |
|
"learning_rate": 1.057500208461522e-05, |
|
"loss": 0.6088, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.5387877276128711, |
|
"grad_norm": 0.5348402703139689, |
|
"learning_rate": 1.0400872750302095e-05, |
|
"loss": 0.6215, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.5437765028685457, |
|
"grad_norm": 0.5016453093230498, |
|
"learning_rate": 1.0226621577162377e-05, |
|
"loss": 0.6067, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.5487652781242205, |
|
"grad_norm": 0.5033968932851582, |
|
"learning_rate": 1.005230152603826e-05, |
|
"loss": 0.6056, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5537540533798953, |
|
"grad_norm": 0.4779633495557308, |
|
"learning_rate": 9.877965578706286e-06, |
|
"loss": 0.6158, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.55874282863557, |
|
"grad_norm": 0.507768651916945, |
|
"learning_rate": 9.703666721774403e-06, |
|
"loss": 0.6168, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5637316038912447, |
|
"grad_norm": 0.4975220405187006, |
|
"learning_rate": 9.52945793057753e-06, |
|
"loss": 0.6133, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.5687203791469194, |
|
"grad_norm": 0.567581970238524, |
|
"learning_rate": 9.355392153076541e-06, |
|
"loss": 0.6153, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.5737091544025942, |
|
"grad_norm": 0.45994820207954157, |
|
"learning_rate": 9.18152229376561e-06, |
|
"loss": 0.6075, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.5786979296582689, |
|
"grad_norm": 0.4776147923803636, |
|
"learning_rate": 9.007901197592722e-06, |
|
"loss": 0.6083, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.5836867049139436, |
|
"grad_norm": 0.4458298111691603, |
|
"learning_rate": 8.834581633898307e-06, |
|
"loss": 0.6151, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.5886754801696183, |
|
"grad_norm": 0.5015975267018272, |
|
"learning_rate": 8.661616280376846e-06, |
|
"loss": 0.6083, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.5936642554252931, |
|
"grad_norm": 0.44736317209890847, |
|
"learning_rate": 8.489057707066335e-06, |
|
"loss": 0.6077, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.5986530306809679, |
|
"grad_norm": 0.4542312795928054, |
|
"learning_rate": 8.316958360370462e-06, |
|
"loss": 0.6089, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6036418059366425, |
|
"grad_norm": 0.4669024055915673, |
|
"learning_rate": 8.145370547118374e-06, |
|
"loss": 0.614, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.6086305811923173, |
|
"grad_norm": 0.460625150490194, |
|
"learning_rate": 7.974346418666854e-06, |
|
"loss": 0.6097, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.613619356447992, |
|
"grad_norm": 0.45169077334743357, |
|
"learning_rate": 7.803937955049743e-06, |
|
"loss": 0.6134, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.6186081317036668, |
|
"grad_norm": 0.5045442729073779, |
|
"learning_rate": 7.634196949179472e-06, |
|
"loss": 0.6056, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.6235969069593414, |
|
"grad_norm": 0.4976507270581154, |
|
"learning_rate": 7.465174991105405e-06, |
|
"loss": 0.6087, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.6285856822150162, |
|
"grad_norm": 0.47413589578954074, |
|
"learning_rate": 7.296923452333908e-06, |
|
"loss": 0.6073, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.633574457470691, |
|
"grad_norm": 0.4468358705510705, |
|
"learning_rate": 7.129493470214775e-06, |
|
"loss": 0.6065, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.6385632327263657, |
|
"grad_norm": 0.44813379558060285, |
|
"learning_rate": 6.962935932398862e-06, |
|
"loss": 0.5989, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.6435520079820404, |
|
"grad_norm": 0.4267618712107622, |
|
"learning_rate": 6.797301461371626e-06, |
|
"loss": 0.5981, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.6485407832377151, |
|
"grad_norm": 0.44491038507852154, |
|
"learning_rate": 6.632640399067197e-06, |
|
"loss": 0.602, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.6535295584933899, |
|
"grad_norm": 0.455548462410925, |
|
"learning_rate": 6.469002791567792e-06, |
|
"loss": 0.6077, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.6585183337490647, |
|
"grad_norm": 0.4655649758885719, |
|
"learning_rate": 6.306438373892985e-06, |
|
"loss": 0.6027, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.6635071090047393, |
|
"grad_norm": 0.4603705069002276, |
|
"learning_rate": 6.144996554883556e-06, |
|
"loss": 0.6072, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.6684958842604141, |
|
"grad_norm": 0.42809636369496656, |
|
"learning_rate": 5.98472640218449e-06, |
|
"loss": 0.5984, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.6734846595160888, |
|
"grad_norm": 0.4311787554066561, |
|
"learning_rate": 5.825676627331614e-06, |
|
"loss": 0.5997, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.6784734347717636, |
|
"grad_norm": 0.45825795981326356, |
|
"learning_rate": 5.667895570946554e-06, |
|
"loss": 0.6034, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.6834622100274382, |
|
"grad_norm": 0.4167893564717864, |
|
"learning_rate": 5.5114311880443374e-06, |
|
"loss": 0.5975, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.688450985283113, |
|
"grad_norm": 0.46394354368013807, |
|
"learning_rate": 5.356331033458276e-06, |
|
"loss": 0.6065, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.6934397605387878, |
|
"grad_norm": 0.45003494825662715, |
|
"learning_rate": 5.202642247386409e-06, |
|
"loss": 0.6052, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.6984285357944625, |
|
"grad_norm": 0.40702863232292363, |
|
"learning_rate": 5.0504115410640105e-06, |
|
"loss": 0.5985, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7034173110501372, |
|
"grad_norm": 0.42165174105900677, |
|
"learning_rate": 4.899685182566472e-06, |
|
"loss": 0.5917, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.7084060863058119, |
|
"grad_norm": 0.5050037064515478, |
|
"learning_rate": 4.7505089827468335e-06, |
|
"loss": 0.5959, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.7133948615614867, |
|
"grad_norm": 0.4558754163109762, |
|
"learning_rate": 4.602928281312351e-06, |
|
"loss": 0.5933, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.7183836368171614, |
|
"grad_norm": 0.4071864016802162, |
|
"learning_rate": 4.456987933044185e-06, |
|
"loss": 0.5992, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.7233724120728361, |
|
"grad_norm": 0.4248472014437075, |
|
"learning_rate": 4.3127322941645385e-06, |
|
"loss": 0.5937, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.7283611873285109, |
|
"grad_norm": 0.40980252115955373, |
|
"learning_rate": 4.170205208855281e-06, |
|
"loss": 0.5968, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.7333499625841856, |
|
"grad_norm": 0.4052569143394039, |
|
"learning_rate": 4.029449995932213e-06, |
|
"loss": 0.5926, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.7383387378398603, |
|
"grad_norm": 0.41360853365570666, |
|
"learning_rate": 3.890509435679026e-06, |
|
"loss": 0.6021, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.743327513095535, |
|
"grad_norm": 0.41835345554744635, |
|
"learning_rate": 3.7534257568448995e-06, |
|
"loss": 0.5952, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.7483162883512098, |
|
"grad_norm": 0.401256370852048, |
|
"learning_rate": 3.6182406238097745e-06, |
|
"loss": 0.5972, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7533050636068845, |
|
"grad_norm": 0.4174244244132666, |
|
"learning_rate": 3.484995123921112e-06, |
|
"loss": 0.5945, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.7582938388625592, |
|
"grad_norm": 0.4081805968439954, |
|
"learning_rate": 3.353729755006081e-06, |
|
"loss": 0.5952, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.763282614118234, |
|
"grad_norm": 0.40126195224404465, |
|
"learning_rate": 3.2244844130628684e-06, |
|
"loss": 0.5869, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.7682713893739087, |
|
"grad_norm": 0.4252743360297237, |
|
"learning_rate": 3.0972983801349464e-06, |
|
"loss": 0.6057, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.7732601646295835, |
|
"grad_norm": 0.40849879013758245, |
|
"learning_rate": 2.9722103123719324e-06, |
|
"loss": 0.5987, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.7782489398852581, |
|
"grad_norm": 0.4124888296503082, |
|
"learning_rate": 2.849258228280656e-06, |
|
"loss": 0.6048, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.7832377151409329, |
|
"grad_norm": 0.408929858016356, |
|
"learning_rate": 2.728479497170066e-06, |
|
"loss": 0.591, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.7882264903966076, |
|
"grad_norm": 0.40717905524554926, |
|
"learning_rate": 2.6099108277934105e-06, |
|
"loss": 0.5957, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.7932152656522824, |
|
"grad_norm": 0.40906715914325326, |
|
"learning_rate": 2.4935882571912107e-06, |
|
"loss": 0.585, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.798204040907957, |
|
"grad_norm": 0.39973439105002123, |
|
"learning_rate": 2.379547139738392e-06, |
|
"loss": 0.5881, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.8031928161636318, |
|
"grad_norm": 0.39566194107090014, |
|
"learning_rate": 2.267822136398864e-06, |
|
"loss": 0.5948, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.8081815914193066, |
|
"grad_norm": 0.3943529028329431, |
|
"learning_rate": 2.15844720419091e-06, |
|
"loss": 0.5928, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.8131703666749813, |
|
"grad_norm": 0.396743086451483, |
|
"learning_rate": 2.0514555858664663e-06, |
|
"loss": 0.5955, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.818159141930656, |
|
"grad_norm": 0.4158031464312053, |
|
"learning_rate": 1.9468797998075494e-06, |
|
"loss": 0.5938, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.8231479171863307, |
|
"grad_norm": 0.4159354667890639, |
|
"learning_rate": 1.844751630142797e-06, |
|
"loss": 0.5811, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.8281366924420055, |
|
"grad_norm": 0.38601272105412726, |
|
"learning_rate": 1.7451021170871974e-06, |
|
"loss": 0.5933, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.8331254676976803, |
|
"grad_norm": 0.3986644463931031, |
|
"learning_rate": 1.6479615475079291e-06, |
|
"loss": 0.5892, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.8381142429533549, |
|
"grad_norm": 0.40080909281458676, |
|
"learning_rate": 1.5533594457191326e-06, |
|
"loss": 0.5898, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.8431030182090297, |
|
"grad_norm": 0.3942697489480343, |
|
"learning_rate": 1.4613245645084894e-06, |
|
"loss": 0.5863, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.8480917934647044, |
|
"grad_norm": 0.39162896724772583, |
|
"learning_rate": 1.3718848763982596e-06, |
|
"loss": 0.5963, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.8530805687203792, |
|
"grad_norm": 0.38744891578031804, |
|
"learning_rate": 1.2850675651434962e-06, |
|
"loss": 0.5931, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.8580693439760538, |
|
"grad_norm": 0.3757895616733075, |
|
"learning_rate": 1.2008990174699685e-06, |
|
"loss": 0.5958, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.8630581192317286, |
|
"grad_norm": 0.38105622554372953, |
|
"learning_rate": 1.1194048150543457e-06, |
|
"loss": 0.5928, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.8680468944874034, |
|
"grad_norm": 0.38949843229532266, |
|
"learning_rate": 1.0406097267490644e-06, |
|
"loss": 0.5894, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.8730356697430781, |
|
"grad_norm": 0.39133896634308507, |
|
"learning_rate": 9.645377010542212e-07, |
|
"loss": 0.5893, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.8780244449987528, |
|
"grad_norm": 0.4015625907458179, |
|
"learning_rate": 8.91211858838823e-07, |
|
"loss": 0.5982, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.8830132202544275, |
|
"grad_norm": 0.4047912248494648, |
|
"learning_rate": 8.206544863135612e-07, |
|
"loss": 0.5865, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.8880019955101023, |
|
"grad_norm": 0.38503511137561774, |
|
"learning_rate": 7.528870282572864e-07, |
|
"loss": 0.5831, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.892990770765777, |
|
"grad_norm": 0.387759644652555, |
|
"learning_rate": 6.879300814992007e-07, |
|
"loss": 0.5985, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.8979795460214517, |
|
"grad_norm": 0.39050325202063846, |
|
"learning_rate": 6.258033886587911e-07, |
|
"loss": 0.5881, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.9029683212771265, |
|
"grad_norm": 0.3870890888051975, |
|
"learning_rate": 5.66525832145377e-07, |
|
"loss": 0.5945, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.9079570965328012, |
|
"grad_norm": 0.3856245616892951, |
|
"learning_rate": 5.101154284191035e-07, |
|
"loss": 0.5929, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.912945871788476, |
|
"grad_norm": 0.3910109970295147, |
|
"learning_rate": 4.5658932251512856e-07, |
|
"loss": 0.6021, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.9179346470441506, |
|
"grad_norm": 0.3676846272782438, |
|
"learning_rate": 4.059637828326657e-07, |
|
"loss": 0.5878, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.9229234222998254, |
|
"grad_norm": 0.3819934536854667, |
|
"learning_rate": 3.5825419619046176e-07, |
|
"loss": 0.5936, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.9279121975555001, |
|
"grad_norm": 0.3875995828243716, |
|
"learning_rate": 3.1347506315023036e-07, |
|
"loss": 0.6038, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.9329009728111749, |
|
"grad_norm": 0.38002441832217876, |
|
"learning_rate": 2.716399936094294e-07, |
|
"loss": 0.5931, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.9378897480668495, |
|
"grad_norm": 0.3848197669462124, |
|
"learning_rate": 2.327617026647533e-07, |
|
"loss": 0.595, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.9428785233225243, |
|
"grad_norm": 0.38930339170989675, |
|
"learning_rate": 1.968520067475921e-07, |
|
"loss": 0.5938, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.9478672985781991, |
|
"grad_norm": 0.3797874082101493, |
|
"learning_rate": 1.6392182003260427e-07, |
|
"loss": 0.5942, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.9528560738338738, |
|
"grad_norm": 0.3803276424025686, |
|
"learning_rate": 1.3398115112054243e-07, |
|
"loss": 0.5876, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.9578448490895485, |
|
"grad_norm": 0.3765573493263119, |
|
"learning_rate": 1.070390999962867e-07, |
|
"loss": 0.5922, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.9628336243452232, |
|
"grad_norm": 0.3849948624629748, |
|
"learning_rate": 8.31038552630603e-08, |
|
"loss": 0.5945, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.967822399600898, |
|
"grad_norm": 0.38879243314742995, |
|
"learning_rate": 6.218269165363166e-08, |
|
"loss": 0.5954, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.9728111748565728, |
|
"grad_norm": 0.38003968946404176, |
|
"learning_rate": 4.42819678192774e-08, |
|
"loss": 0.5913, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.9777999501122474, |
|
"grad_norm": 0.3869545513042188, |
|
"learning_rate": 2.9407124397169418e-08, |
|
"loss": 0.594, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.9827887253679222, |
|
"grad_norm": 0.3851047915663657, |
|
"learning_rate": 1.7562682356786488e-08, |
|
"loss": 0.5925, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.9877775006235969, |
|
"grad_norm": 0.38621262692876246, |
|
"learning_rate": 8.752241625831215e-09, |
|
"loss": 0.6004, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.9927662758792717, |
|
"grad_norm": 0.3798867531020366, |
|
"learning_rate": 2.978479996098571e-09, |
|
"loss": 0.5909, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.9977550511349463, |
|
"grad_norm": 0.38274768291794226, |
|
"learning_rate": 2.4315230959359726e-10, |
|
"loss": 0.5939, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.9997505612372163, |
|
"eval_loss": 0.5914663672447205, |
|
"eval_runtime": 139.1149, |
|
"eval_samples_per_second": 48.528, |
|
"eval_steps_per_second": 1.517, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.9997505612372163, |
|
"step": 1002, |
|
"total_flos": 838984280309760.0, |
|
"train_loss": 0.6422406448099666, |
|
"train_runtime": 10627.6191, |
|
"train_samples_per_second": 12.071, |
|
"train_steps_per_second": 0.094 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1002, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 838984280309760.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|