|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 360, |
|
"global_step": 1085, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 16.129365409595223, |
|
"learning_rate": 3.0303030303030305e-07, |
|
"loss": 0.9764, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 23.837321667388085, |
|
"learning_rate": 6.060606060606061e-07, |
|
"loss": 1.1089, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 21.22641145653194, |
|
"learning_rate": 9.090909090909091e-07, |
|
"loss": 1.146, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 19.43838955090012, |
|
"learning_rate": 1.2121212121212122e-06, |
|
"loss": 0.9844, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 21.119259211109828, |
|
"learning_rate": 1.5151515151515152e-06, |
|
"loss": 1.111, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 13.239645949682986, |
|
"learning_rate": 1.8181818181818183e-06, |
|
"loss": 1.0758, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 16.634917711449923, |
|
"learning_rate": 2.1212121212121216e-06, |
|
"loss": 1.0476, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 11.236126323459082, |
|
"learning_rate": 2.4242424242424244e-06, |
|
"loss": 0.9484, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 16.700714724312387, |
|
"learning_rate": 2.7272727272727272e-06, |
|
"loss": 0.946, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 8.92070999051616, |
|
"learning_rate": 3.0303030303030305e-06, |
|
"loss": 0.9667, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 8.284153090916707, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.8781, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 6.132327487551557, |
|
"learning_rate": 3.6363636363636366e-06, |
|
"loss": 0.8894, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 6.078573420798111, |
|
"learning_rate": 3.93939393939394e-06, |
|
"loss": 0.9164, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.8212192573632233, |
|
"learning_rate": 4.242424242424243e-06, |
|
"loss": 0.8519, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 6.167377075103833, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"loss": 0.9036, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.6918572965840766, |
|
"learning_rate": 4.848484848484849e-06, |
|
"loss": 0.8936, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.3749857940241776, |
|
"learning_rate": 5.151515151515152e-06, |
|
"loss": 0.8125, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 8.242810694053402, |
|
"learning_rate": 5.4545454545454545e-06, |
|
"loss": 0.8036, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 4.410736818533579, |
|
"learning_rate": 5.7575757575757586e-06, |
|
"loss": 0.8736, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.4969703990640535, |
|
"learning_rate": 6.060606060606061e-06, |
|
"loss": 0.8694, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0162345460801148, |
|
"learning_rate": 6.363636363636364e-06, |
|
"loss": 0.8277, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.7821235760367586, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.7463, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.984372802199261, |
|
"learning_rate": 6.969696969696971e-06, |
|
"loss": 0.7771, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.1781858812979578, |
|
"learning_rate": 7.272727272727273e-06, |
|
"loss": 0.8202, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.4244788380973845, |
|
"learning_rate": 7.5757575757575764e-06, |
|
"loss": 0.7329, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.323382123238373, |
|
"learning_rate": 7.87878787878788e-06, |
|
"loss": 0.8099, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.438565494153211, |
|
"learning_rate": 8.181818181818183e-06, |
|
"loss": 0.6891, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 6.928726222746485, |
|
"learning_rate": 8.484848484848486e-06, |
|
"loss": 0.7683, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.696562865037612, |
|
"learning_rate": 8.787878787878788e-06, |
|
"loss": 0.8103, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.0766589777704834, |
|
"learning_rate": 9.090909090909091e-06, |
|
"loss": 0.7722, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.168856586909847, |
|
"learning_rate": 9.393939393939396e-06, |
|
"loss": 0.8968, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.2461676550878007, |
|
"learning_rate": 9.696969696969698e-06, |
|
"loss": 0.808, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.453728879072183, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8598, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.9113959644302687, |
|
"learning_rate": 9.999977704975616e-06, |
|
"loss": 0.7454, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.4099367075523803, |
|
"learning_rate": 9.999910820101292e-06, |
|
"loss": 0.7574, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.270300156314859, |
|
"learning_rate": 9.999799345973507e-06, |
|
"loss": 0.7398, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.358981799608668, |
|
"learning_rate": 9.99964328358639e-06, |
|
"loss": 0.7587, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 3.1653542146382745, |
|
"learning_rate": 9.999442634331703e-06, |
|
"loss": 0.6518, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 3.738717692770909, |
|
"learning_rate": 9.999197399998842e-06, |
|
"loss": 0.7824, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.000494320016642, |
|
"learning_rate": 9.998907582774808e-06, |
|
"loss": 0.8417, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.452509426088607, |
|
"learning_rate": 9.998573185244192e-06, |
|
"loss": 0.8029, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 3.7926851998623228, |
|
"learning_rate": 9.998194210389158e-06, |
|
"loss": 0.7651, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.026512006063561, |
|
"learning_rate": 9.997770661589404e-06, |
|
"loss": 0.7327, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.4209663160371155, |
|
"learning_rate": 9.997302542622144e-06, |
|
"loss": 0.6456, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.641518350587522, |
|
"learning_rate": 9.996789857662068e-06, |
|
"loss": 0.6919, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.599607788159595, |
|
"learning_rate": 9.996232611281305e-06, |
|
"loss": 0.6614, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.715328472466144, |
|
"learning_rate": 9.995630808449384e-06, |
|
"loss": 0.6826, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.203030177637588, |
|
"learning_rate": 9.994984454533185e-06, |
|
"loss": 0.6869, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.760101944558736, |
|
"learning_rate": 9.994293555296905e-06, |
|
"loss": 0.605, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 4.853347031830067, |
|
"learning_rate": 9.993558116901984e-06, |
|
"loss": 0.6368, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.1151345822300724, |
|
"learning_rate": 9.992778145907073e-06, |
|
"loss": 0.6057, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.406684758008133, |
|
"learning_rate": 9.99195364926796e-06, |
|
"loss": 0.6529, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 4.619474763840996, |
|
"learning_rate": 9.991084634337512e-06, |
|
"loss": 0.6274, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.581535039528409, |
|
"learning_rate": 9.990171108865615e-06, |
|
"loss": 0.5609, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 4.37519555779229, |
|
"learning_rate": 9.989213080999097e-06, |
|
"loss": 0.5885, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 4.831452306017058, |
|
"learning_rate": 9.98821055928166e-06, |
|
"loss": 0.6207, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 4.856668685797109, |
|
"learning_rate": 9.987163552653802e-06, |
|
"loss": 0.5988, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 3.795835211461076, |
|
"learning_rate": 9.986072070452739e-06, |
|
"loss": 0.5497, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 3.5271814227815894, |
|
"learning_rate": 9.984936122412319e-06, |
|
"loss": 0.5599, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 3.0934898475040646, |
|
"learning_rate": 9.98375571866294e-06, |
|
"loss": 0.4616, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.7916913155015934, |
|
"learning_rate": 9.982530869731452e-06, |
|
"loss": 0.5413, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.56088982742967, |
|
"learning_rate": 9.981261586541068e-06, |
|
"loss": 0.4671, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 3.168195444967521, |
|
"learning_rate": 9.979947880411274e-06, |
|
"loss": 0.5476, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.6048313787874067, |
|
"learning_rate": 9.978589763057707e-06, |
|
"loss": 0.5249, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.3963291553988126, |
|
"learning_rate": 9.977187246592077e-06, |
|
"loss": 0.4986, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.3557715502121024, |
|
"learning_rate": 9.975740343522033e-06, |
|
"loss": 0.5556, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.257861259561017, |
|
"learning_rate": 9.974249066751078e-06, |
|
"loss": 0.4784, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.3375531169232153, |
|
"learning_rate": 9.972713429578427e-06, |
|
"loss": 0.3906, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 3.559723628495009, |
|
"learning_rate": 9.971133445698908e-06, |
|
"loss": 0.4879, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.194903952303971, |
|
"learning_rate": 9.969509129202836e-06, |
|
"loss": 0.4214, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 3.3275730613175782, |
|
"learning_rate": 9.96784049457588e-06, |
|
"loss": 0.4623, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 3.5635212966654914, |
|
"learning_rate": 9.966127556698937e-06, |
|
"loss": 0.5575, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 3.0097194021841425, |
|
"learning_rate": 9.964370330848005e-06, |
|
"loss": 0.6115, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.816462672408414, |
|
"learning_rate": 9.962568832694046e-06, |
|
"loss": 0.534, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.3189782455504777, |
|
"learning_rate": 9.960723078302831e-06, |
|
"loss": 0.4456, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 3.0074509816127737, |
|
"learning_rate": 9.958833084134821e-06, |
|
"loss": 0.4709, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.5611868066341175, |
|
"learning_rate": 9.956898867045e-06, |
|
"loss": 0.5881, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.700986720821575, |
|
"learning_rate": 9.954920444282733e-06, |
|
"loss": 0.573, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.4159123698956884, |
|
"learning_rate": 9.952897833491617e-06, |
|
"loss": 0.5419, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.1081028350711035, |
|
"learning_rate": 9.950831052709315e-06, |
|
"loss": 0.4993, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.6391399727281084, |
|
"learning_rate": 9.948720120367395e-06, |
|
"loss": 0.4646, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.2824993832357268, |
|
"learning_rate": 9.946565055291175e-06, |
|
"loss": 0.4608, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 3.0836295676643863, |
|
"learning_rate": 9.944365876699545e-06, |
|
"loss": 0.5675, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.6895916676660425, |
|
"learning_rate": 9.942122604204801e-06, |
|
"loss": 0.5574, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.674274085195954, |
|
"learning_rate": 9.939835257812468e-06, |
|
"loss": 0.516, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.5758744418706985, |
|
"learning_rate": 9.937503857921126e-06, |
|
"loss": 0.4949, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 3.11345842522007, |
|
"learning_rate": 9.93512842532222e-06, |
|
"loss": 0.4908, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.070822431047042, |
|
"learning_rate": 9.932708981199884e-06, |
|
"loss": 0.411, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.4626185376360046, |
|
"learning_rate": 9.93024554713074e-06, |
|
"loss": 0.4968, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.3899527875013096, |
|
"learning_rate": 9.92773814508372e-06, |
|
"loss": 0.6052, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.0242500276011803, |
|
"learning_rate": 9.925186797419859e-06, |
|
"loss": 0.4088, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 3.254960261668724, |
|
"learning_rate": 9.9225915268921e-06, |
|
"loss": 0.5032, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.121594536508012, |
|
"learning_rate": 9.919952356645093e-06, |
|
"loss": 0.4736, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.591853805964485, |
|
"learning_rate": 9.917269310214981e-06, |
|
"loss": 0.5118, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.975273286527196, |
|
"learning_rate": 9.9145424115292e-06, |
|
"loss": 0.4194, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 3.0207999126195797, |
|
"learning_rate": 9.911771684906257e-06, |
|
"loss": 0.481, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.223652296880912, |
|
"learning_rate": 9.908957155055522e-06, |
|
"loss": 0.4881, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.145057143621607, |
|
"learning_rate": 9.906098847077e-06, |
|
"loss": 0.4256, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.1968182998546792, |
|
"learning_rate": 9.903196786461106e-06, |
|
"loss": 0.4447, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.488996647778079, |
|
"learning_rate": 9.900250999088447e-06, |
|
"loss": 0.4832, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.7075122583499844, |
|
"learning_rate": 9.897261511229584e-06, |
|
"loss": 0.5647, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.249140261199728, |
|
"learning_rate": 9.894228349544796e-06, |
|
"loss": 0.6393, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.138039351395474, |
|
"learning_rate": 9.891151541083853e-06, |
|
"loss": 0.4877, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.2281784162372515, |
|
"learning_rate": 9.888031113285758e-06, |
|
"loss": 0.471, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.320283473080627, |
|
"learning_rate": 9.88486709397852e-06, |
|
"loss": 0.533, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.3375483348489654, |
|
"learning_rate": 9.881659511378893e-06, |
|
"loss": 0.4609, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.9407782549862607, |
|
"learning_rate": 9.87840839409213e-06, |
|
"loss": 0.4836, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.233860400341602, |
|
"learning_rate": 9.875113771111726e-06, |
|
"loss": 0.4909, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.843385330799683, |
|
"learning_rate": 9.871775671819162e-06, |
|
"loss": 0.4387, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.1589676091186156, |
|
"learning_rate": 9.868394125983639e-06, |
|
"loss": 0.486, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.040342418056587, |
|
"learning_rate": 9.864969163761818e-06, |
|
"loss": 0.4788, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.428882692637533, |
|
"learning_rate": 9.861500815697541e-06, |
|
"loss": 0.5592, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.2835414884007244, |
|
"learning_rate": 9.857989112721574e-06, |
|
"loss": 0.4444, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.112223960038215, |
|
"learning_rate": 9.854434086151318e-06, |
|
"loss": 0.4274, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.0412310191062737, |
|
"learning_rate": 9.850835767690532e-06, |
|
"loss": 0.4685, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.4647322646697654, |
|
"learning_rate": 9.847194189429057e-06, |
|
"loss": 0.5397, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.7943751391033103, |
|
"learning_rate": 9.843509383842524e-06, |
|
"loss": 0.4868, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.8448536703371594, |
|
"learning_rate": 9.839781383792065e-06, |
|
"loss": 0.47, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.912968908916307, |
|
"learning_rate": 9.836010222524018e-06, |
|
"loss": 0.456, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.1780775293684287, |
|
"learning_rate": 9.832195933669639e-06, |
|
"loss": 0.4071, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.8028208875094607, |
|
"learning_rate": 9.828338551244794e-06, |
|
"loss": 0.4125, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.130779457730023, |
|
"learning_rate": 9.824438109649654e-06, |
|
"loss": 0.5437, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.0007324967598823, |
|
"learning_rate": 9.820494643668397e-06, |
|
"loss": 0.4136, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.179925551173916, |
|
"learning_rate": 9.81650818846889e-06, |
|
"loss": 0.3808, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.1852746498130355, |
|
"learning_rate": 9.812478779602382e-06, |
|
"loss": 0.5346, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.138463505709442, |
|
"learning_rate": 9.808406453003177e-06, |
|
"loss": 0.4781, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.6209448601330558, |
|
"learning_rate": 9.804291244988324e-06, |
|
"loss": 0.3635, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.130274052940016, |
|
"learning_rate": 9.80013319225729e-06, |
|
"loss": 0.477, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.386102342816235, |
|
"learning_rate": 9.79593233189163e-06, |
|
"loss": 0.475, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.245518577211273, |
|
"learning_rate": 9.791688701354656e-06, |
|
"loss": 0.4893, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.0216906989251298, |
|
"learning_rate": 9.787402338491107e-06, |
|
"loss": 0.5008, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.0992634451137686, |
|
"learning_rate": 9.78307328152681e-06, |
|
"loss": 0.491, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.182492396879651, |
|
"learning_rate": 9.778701569068336e-06, |
|
"loss": 0.4291, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.322495194500256, |
|
"learning_rate": 9.77428724010266e-06, |
|
"loss": 0.4719, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.0990527106566095, |
|
"learning_rate": 9.76983033399681e-06, |
|
"loss": 0.545, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.197449587105137, |
|
"learning_rate": 9.765330890497518e-06, |
|
"loss": 0.4402, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.1444974057383197, |
|
"learning_rate": 9.760788949730866e-06, |
|
"loss": 0.4596, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.0082778523434954, |
|
"learning_rate": 9.756204552201926e-06, |
|
"loss": 0.5243, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.3012157115740797, |
|
"learning_rate": 9.751577738794397e-06, |
|
"loss": 0.466, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.027306320186225, |
|
"learning_rate": 9.746908550770252e-06, |
|
"loss": 0.4119, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.2945854233184275, |
|
"learning_rate": 9.74219702976935e-06, |
|
"loss": 0.5636, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.8562471392647044, |
|
"learning_rate": 9.737443217809083e-06, |
|
"loss": 0.4643, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.1780745804692625, |
|
"learning_rate": 9.732647157283994e-06, |
|
"loss": 0.4851, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.987075025431446, |
|
"learning_rate": 9.727808890965396e-06, |
|
"loss": 0.528, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.9653726410307981, |
|
"learning_rate": 9.722928462000995e-06, |
|
"loss": 0.4628, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.0367912532832206, |
|
"learning_rate": 9.718005913914504e-06, |
|
"loss": 0.4967, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.071421018578479, |
|
"learning_rate": 9.713041290605255e-06, |
|
"loss": 0.4603, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.8900063373062326, |
|
"learning_rate": 9.708034636347807e-06, |
|
"loss": 0.4436, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 3.0076364938112627, |
|
"learning_rate": 9.702985995791554e-06, |
|
"loss": 0.4846, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.6019702541050833, |
|
"learning_rate": 9.69789541396032e-06, |
|
"loss": 0.5282, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.825342999834098, |
|
"learning_rate": 9.69276293625196e-06, |
|
"loss": 0.4162, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.7815368946800145, |
|
"learning_rate": 9.687588608437963e-06, |
|
"loss": 0.5089, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.9315012959714708, |
|
"learning_rate": 9.682372476663038e-06, |
|
"loss": 0.3959, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.2305953049147274, |
|
"learning_rate": 9.677114587444695e-06, |
|
"loss": 0.4668, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.741881922031329, |
|
"learning_rate": 9.671814987672844e-06, |
|
"loss": 0.4607, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.3110309083466585, |
|
"learning_rate": 9.666473724609364e-06, |
|
"loss": 0.5638, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.0682482956606467, |
|
"learning_rate": 9.661090845887693e-06, |
|
"loss": 0.4686, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.6193571588443016, |
|
"learning_rate": 9.655666399512399e-06, |
|
"loss": 0.5315, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.9797453669645546, |
|
"learning_rate": 9.650200433858742e-06, |
|
"loss": 0.4896, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.4216613612020654, |
|
"learning_rate": 9.64469299767226e-06, |
|
"loss": 0.3741, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.9693170147881809, |
|
"learning_rate": 9.639144140068324e-06, |
|
"loss": 0.4272, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.2194466246097613, |
|
"learning_rate": 9.633553910531697e-06, |
|
"loss": 0.4457, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.201593460499194, |
|
"learning_rate": 9.627922358916103e-06, |
|
"loss": 0.4102, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.6525412697622233, |
|
"learning_rate": 9.622249535443773e-06, |
|
"loss": 0.3761, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.2227663902993156, |
|
"learning_rate": 9.616535490705003e-06, |
|
"loss": 0.4669, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.7035693121877094, |
|
"learning_rate": 9.610780275657699e-06, |
|
"loss": 0.5599, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.011593619843652, |
|
"learning_rate": 9.604983941626924e-06, |
|
"loss": 0.4761, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.6713907964698778, |
|
"learning_rate": 9.599146540304445e-06, |
|
"loss": 0.5458, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.0578837694185688, |
|
"learning_rate": 9.59326812374826e-06, |
|
"loss": 0.5034, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.009264530645865, |
|
"learning_rate": 9.587348744382146e-06, |
|
"loss": 0.4794, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.8448925507784202, |
|
"learning_rate": 9.581388454995188e-06, |
|
"loss": 0.5835, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.7924002279972235, |
|
"learning_rate": 9.575387308741301e-06, |
|
"loss": 0.5, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.1290643907506412, |
|
"learning_rate": 9.569345359138771e-06, |
|
"loss": 0.3709, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.218893686302514, |
|
"learning_rate": 9.56326266006976e-06, |
|
"loss": 0.5468, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.2371127166937597, |
|
"learning_rate": 9.557139265779839e-06, |
|
"loss": 0.5076, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.2443513250129365, |
|
"learning_rate": 9.550975230877495e-06, |
|
"loss": 0.3882, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.080847037819759, |
|
"learning_rate": 9.544770610333656e-06, |
|
"loss": 0.5072, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.063334530889104, |
|
"learning_rate": 9.538525459481185e-06, |
|
"loss": 0.5129, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.8429299549087172, |
|
"learning_rate": 9.5322398340144e-06, |
|
"loss": 0.5222, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.5812772666454338, |
|
"learning_rate": 9.52591378998857e-06, |
|
"loss": 0.5519, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.058285902666481, |
|
"learning_rate": 9.519547383819417e-06, |
|
"loss": 0.4551, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.8818304058002688, |
|
"learning_rate": 9.513140672282613e-06, |
|
"loss": 0.4273, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.9209762314258565, |
|
"learning_rate": 9.506693712513276e-06, |
|
"loss": 0.5037, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.0870128777895767, |
|
"learning_rate": 9.500206562005451e-06, |
|
"loss": 0.5833, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.2623949551946128, |
|
"learning_rate": 9.493679278611616e-06, |
|
"loss": 0.5183, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.333429280532828, |
|
"learning_rate": 9.487111920542144e-06, |
|
"loss": 0.4874, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.815562707725, |
|
"learning_rate": 9.480504546364799e-06, |
|
"loss": 0.4692, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.7660766038392852, |
|
"learning_rate": 9.47385721500421e-06, |
|
"loss": 0.5067, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.9680194643495112, |
|
"learning_rate": 9.467169985741338e-06, |
|
"loss": 0.4563, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.9920282177444568, |
|
"learning_rate": 9.460442918212965e-06, |
|
"loss": 0.4578, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 2.241206997432981, |
|
"learning_rate": 9.453676072411142e-06, |
|
"loss": 0.4639, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 2.311099672661341, |
|
"learning_rate": 9.446869508682666e-06, |
|
"loss": 0.5097, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.7661459736458465, |
|
"learning_rate": 9.440023287728536e-06, |
|
"loss": 0.5023, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 2.305876852824869, |
|
"learning_rate": 9.433137470603424e-06, |
|
"loss": 0.5889, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.7528835048955134, |
|
"learning_rate": 9.426212118715107e-06, |
|
"loss": 0.4261, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.9984846584908784, |
|
"learning_rate": 9.419247293823947e-06, |
|
"loss": 0.4611, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 2.2838282643195535, |
|
"learning_rate": 9.412243058042316e-06, |
|
"loss": 0.5117, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.9369138203254217, |
|
"learning_rate": 9.405199473834058e-06, |
|
"loss": 0.4893, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.7502665191769333, |
|
"learning_rate": 9.398116604013926e-06, |
|
"loss": 0.4881, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.8704327603370965, |
|
"learning_rate": 9.390994511747021e-06, |
|
"loss": 0.4546, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.17508798618509, |
|
"learning_rate": 9.383833260548234e-06, |
|
"loss": 0.4726, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.2849265007425497, |
|
"learning_rate": 9.37663291428167e-06, |
|
"loss": 0.4642, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.9841051903862645, |
|
"learning_rate": 9.36939353716009e-06, |
|
"loss": 0.4622, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.257470318496205, |
|
"learning_rate": 9.362115193744329e-06, |
|
"loss": 0.4282, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.392014920409948, |
|
"learning_rate": 9.35479794894272e-06, |
|
"loss": 0.4345, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.8052120410615011, |
|
"learning_rate": 9.347441868010531e-06, |
|
"loss": 0.4247, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.7695741916190524, |
|
"learning_rate": 9.340047016549359e-06, |
|
"loss": 0.5194, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.8450154842082274, |
|
"learning_rate": 9.332613460506563e-06, |
|
"loss": 0.4131, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.8662503542524322, |
|
"learning_rate": 9.325141266174666e-06, |
|
"loss": 0.4947, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 4.295232461263357, |
|
"learning_rate": 9.317630500190774e-06, |
|
"loss": 0.5474, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.7297698392367244, |
|
"learning_rate": 9.310081229535968e-06, |
|
"loss": 0.4592, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.9626826054816364, |
|
"learning_rate": 9.302493521534718e-06, |
|
"loss": 0.4943, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.7282048031834156, |
|
"learning_rate": 9.294867443854278e-06, |
|
"loss": 0.4225, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.8569486568399847, |
|
"learning_rate": 9.287203064504084e-06, |
|
"loss": 0.4129, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.7618256251989388, |
|
"learning_rate": 9.279500451835146e-06, |
|
"loss": 0.4748, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.3797244120825227, |
|
"learning_rate": 9.271759674539437e-06, |
|
"loss": 0.5891, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.072223692212461, |
|
"learning_rate": 9.263980801649286e-06, |
|
"loss": 0.5031, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.050876876872574, |
|
"learning_rate": 9.256163902536756e-06, |
|
"loss": 0.4236, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.1198932578024876, |
|
"learning_rate": 9.248309046913032e-06, |
|
"loss": 0.5258, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.146695653453272, |
|
"learning_rate": 9.24041630482779e-06, |
|
"loss": 0.4794, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.7199343616506693, |
|
"learning_rate": 9.232485746668583e-06, |
|
"loss": 0.3485, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.286003830086287, |
|
"learning_rate": 9.224517443160205e-06, |
|
"loss": 0.526, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.7514678642956694, |
|
"learning_rate": 9.216511465364066e-06, |
|
"loss": 0.5042, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.210433946519186, |
|
"learning_rate": 9.208467884677553e-06, |
|
"loss": 0.5777, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.8071033204951692, |
|
"learning_rate": 9.200386772833394e-06, |
|
"loss": 0.4398, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.0356702912458835, |
|
"learning_rate": 9.192268201899028e-06, |
|
"loss": 0.4432, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.798415887689644, |
|
"learning_rate": 9.184112244275949e-06, |
|
"loss": 0.4139, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.9228155662900879, |
|
"learning_rate": 9.175918972699063e-06, |
|
"loss": 0.4344, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.0327204788445496, |
|
"learning_rate": 9.167688460236049e-06, |
|
"loss": 0.4207, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.5559510625922655, |
|
"learning_rate": 9.159420780286699e-06, |
|
"loss": 0.3875, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.7831249479009101, |
|
"learning_rate": 9.151116006582259e-06, |
|
"loss": 0.5297, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.1056676282274136, |
|
"learning_rate": 9.142774213184785e-06, |
|
"loss": 0.4287, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.7222296800330472, |
|
"learning_rate": 9.134395474486471e-06, |
|
"loss": 0.494, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.9376342013761003, |
|
"learning_rate": 9.12597986520899e-06, |
|
"loss": 0.4693, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.4953747502341712, |
|
"learning_rate": 9.117527460402826e-06, |
|
"loss": 0.4201, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.718097073507747, |
|
"learning_rate": 9.109038335446612e-06, |
|
"loss": 0.4611, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.8055791887833874, |
|
"learning_rate": 9.100512566046445e-06, |
|
"loss": 0.5568, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.8902275123309493, |
|
"learning_rate": 9.091950228235218e-06, |
|
"loss": 0.5016, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.7798148032487224, |
|
"learning_rate": 9.083351398371944e-06, |
|
"loss": 0.3797, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.9541566954758047, |
|
"learning_rate": 9.074716153141074e-06, |
|
"loss": 0.4734, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.8373280148490558, |
|
"learning_rate": 9.066044569551806e-06, |
|
"loss": 0.546, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.439997049046741, |
|
"learning_rate": 9.057336724937409e-06, |
|
"loss": 0.4715, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.0915262974626607, |
|
"learning_rate": 9.048592696954525e-06, |
|
"loss": 0.4606, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.8438692284406528, |
|
"learning_rate": 9.039812563582481e-06, |
|
"loss": 0.4807, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.238913866496149, |
|
"learning_rate": 9.030996403122593e-06, |
|
"loss": 0.4993, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.727513240522199, |
|
"learning_rate": 9.022144294197465e-06, |
|
"loss": 0.4872, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.8561137203921283, |
|
"learning_rate": 9.013256315750291e-06, |
|
"loss": 0.4521, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.9418328007205319, |
|
"learning_rate": 9.00433254704415e-06, |
|
"loss": 0.4693, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.1530206936727425, |
|
"learning_rate": 8.995373067661297e-06, |
|
"loss": 0.4198, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.750946114305235, |
|
"learning_rate": 8.986377957502459e-06, |
|
"loss": 0.5011, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.8983504775289417, |
|
"learning_rate": 8.977347296786113e-06, |
|
"loss": 0.4639, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.466069503803939, |
|
"learning_rate": 8.968281166047782e-06, |
|
"loss": 0.509, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.8749710619294966, |
|
"learning_rate": 8.959179646139307e-06, |
|
"loss": 0.4688, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.5434884020163335, |
|
"learning_rate": 8.95004281822813e-06, |
|
"loss": 0.4903, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 2.3537492058556992, |
|
"learning_rate": 8.940870763796575e-06, |
|
"loss": 0.4347, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 2.046709463687296, |
|
"learning_rate": 8.93166356464111e-06, |
|
"loss": 0.4785, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.8894291264166485, |
|
"learning_rate": 8.922421302871628e-06, |
|
"loss": 0.5522, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.8650094987487535, |
|
"learning_rate": 8.91314406091071e-06, |
|
"loss": 0.4594, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 2.189918791153672, |
|
"learning_rate": 8.903831921492889e-06, |
|
"loss": 0.4493, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 2.0404519379156523, |
|
"learning_rate": 8.894484967663917e-06, |
|
"loss": 0.448, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.800269060842294, |
|
"learning_rate": 8.885103282780017e-06, |
|
"loss": 0.4584, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 2.006281433815957, |
|
"learning_rate": 8.875686950507148e-06, |
|
"loss": 0.5058, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.8369002219910917, |
|
"learning_rate": 8.866236054820252e-06, |
|
"loss": 0.4434, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 2.224177043935375, |
|
"learning_rate": 8.85675068000251e-06, |
|
"loss": 0.5084, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.869776027369733, |
|
"learning_rate": 8.847230910644586e-06, |
|
"loss": 0.3914, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.5520635751992689, |
|
"learning_rate": 8.837676831643878e-06, |
|
"loss": 0.3972, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.6861043321143212, |
|
"learning_rate": 8.828088528203754e-06, |
|
"loss": 0.4617, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.7990345859922723, |
|
"learning_rate": 8.818466085832797e-06, |
|
"loss": 0.4946, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.6043477816641816, |
|
"learning_rate": 8.808809590344043e-06, |
|
"loss": 0.4647, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.5615958854636416, |
|
"learning_rate": 8.799119127854211e-06, |
|
"loss": 0.3511, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.2697929179032403, |
|
"learning_rate": 8.789394784782945e-06, |
|
"loss": 0.4179, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.5996581325163564, |
|
"learning_rate": 8.779636647852028e-06, |
|
"loss": 0.4466, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.8067082124141383, |
|
"learning_rate": 8.76984480408462e-06, |
|
"loss": 0.4125, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.1459929653678342, |
|
"learning_rate": 8.760019340804478e-06, |
|
"loss": 0.5071, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.2872319731726547, |
|
"learning_rate": 8.750160345635183e-06, |
|
"loss": 0.4584, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.7407350134278134, |
|
"learning_rate": 8.740267906499347e-06, |
|
"loss": 0.4991, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 2.609979578565858, |
|
"learning_rate": 8.73034211161784e-06, |
|
"loss": 0.5205, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 2.200677272547663, |
|
"learning_rate": 8.720383049508998e-06, |
|
"loss": 0.5487, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.6432820372618948, |
|
"learning_rate": 8.710390808987834e-06, |
|
"loss": 0.4042, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.7156721753241313, |
|
"learning_rate": 8.700365479165246e-06, |
|
"loss": 0.3996, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.9082538060806855, |
|
"learning_rate": 8.690307149447222e-06, |
|
"loss": 0.4812, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.63902982998481, |
|
"learning_rate": 8.680215909534044e-06, |
|
"loss": 0.446, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.587339989963004, |
|
"learning_rate": 8.67009184941949e-06, |
|
"loss": 0.3901, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.6319010966753653, |
|
"learning_rate": 8.659935059390029e-06, |
|
"loss": 0.4277, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.7250865003847689, |
|
"learning_rate": 8.64974563002401e-06, |
|
"loss": 0.3561, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.7173033701439209, |
|
"learning_rate": 8.639523652190863e-06, |
|
"loss": 0.3946, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 2.0533966512825748, |
|
"learning_rate": 8.62926921705029e-06, |
|
"loss": 0.4255, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.9264061742598322, |
|
"learning_rate": 8.618982416051437e-06, |
|
"loss": 0.4631, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.0607639449740534, |
|
"learning_rate": 8.608663340932105e-06, |
|
"loss": 0.4806, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.9707326291467142, |
|
"learning_rate": 8.598312083717897e-06, |
|
"loss": 0.4838, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.8760820163547305, |
|
"learning_rate": 8.587928736721432e-06, |
|
"loss": 0.5108, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.7862721737223892, |
|
"learning_rate": 8.577513392541496e-06, |
|
"loss": 0.3962, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.9882280047781893, |
|
"learning_rate": 8.567066144062232e-06, |
|
"loss": 0.5171, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.0639143960233897, |
|
"learning_rate": 8.556587084452305e-06, |
|
"loss": 0.5001, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.8782269179129345, |
|
"learning_rate": 8.546076307164068e-06, |
|
"loss": 0.4568, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.6079291594479586, |
|
"learning_rate": 8.535533905932739e-06, |
|
"loss": 0.4282, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.0434150812546203, |
|
"learning_rate": 8.524959974775551e-06, |
|
"loss": 0.4884, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.9071711134863052, |
|
"learning_rate": 8.514354607990927e-06, |
|
"loss": 0.4573, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.8196098537969094, |
|
"learning_rate": 8.503717900157632e-06, |
|
"loss": 0.4788, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.9075527663018237, |
|
"learning_rate": 8.49304994613393e-06, |
|
"loss": 0.5259, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.6611505782230385, |
|
"learning_rate": 8.482350841056739e-06, |
|
"loss": 0.4273, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.8747309724134273, |
|
"learning_rate": 8.47162068034078e-06, |
|
"loss": 0.4702, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.9210086107236377, |
|
"learning_rate": 8.460859559677735e-06, |
|
"loss": 0.4309, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.7431646203338342, |
|
"learning_rate": 8.45006757503538e-06, |
|
"loss": 0.4613, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.5052628264537502, |
|
"learning_rate": 8.43924482265674e-06, |
|
"loss": 0.3804, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.9158138263968971, |
|
"learning_rate": 8.428391399059228e-06, |
|
"loss": 0.5001, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.480815467739981, |
|
"learning_rate": 8.41750740103378e-06, |
|
"loss": 0.5295, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.003610118605416, |
|
"learning_rate": 8.406592925643995e-06, |
|
"loss": 0.4291, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.8511717607117526, |
|
"learning_rate": 8.395648070225273e-06, |
|
"loss": 0.4799, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.5701507442392661, |
|
"learning_rate": 8.384672932383942e-06, |
|
"loss": 0.4526, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.5948064665545478, |
|
"learning_rate": 8.373667609996388e-06, |
|
"loss": 0.3488, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.7172848929373001, |
|
"learning_rate": 8.362632201208182e-06, |
|
"loss": 0.5191, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.861421603347419, |
|
"learning_rate": 8.351566804433207e-06, |
|
"loss": 0.4473, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.8015561005409069, |
|
"learning_rate": 8.340471518352781e-06, |
|
"loss": 0.4527, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.1556575012116967, |
|
"learning_rate": 8.329346441914774e-06, |
|
"loss": 0.5484, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.0076914952446954, |
|
"learning_rate": 8.318191674332724e-06, |
|
"loss": 0.4091, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.867497503681997, |
|
"learning_rate": 8.307007315084958e-06, |
|
"loss": 0.4939, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.1262537928980025, |
|
"learning_rate": 8.295793463913701e-06, |
|
"loss": 0.5834, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.7637175217775476, |
|
"learning_rate": 8.284550220824186e-06, |
|
"loss": 0.4526, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.648339289988361, |
|
"learning_rate": 8.273277686083768e-06, |
|
"loss": 0.3449, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.3992238626915308, |
|
"learning_rate": 8.261975960221016e-06, |
|
"loss": 0.4495, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.9018619117764128, |
|
"learning_rate": 8.250645144024837e-06, |
|
"loss": 0.3955, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.9522016280115226, |
|
"learning_rate": 8.23928533854356e-06, |
|
"loss": 0.5687, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.8534830913654115, |
|
"learning_rate": 8.227896645084037e-06, |
|
"loss": 0.4745, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.9964611419849239, |
|
"learning_rate": 8.216479165210748e-06, |
|
"loss": 0.4019, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.068111314185152, |
|
"learning_rate": 8.205033000744895e-06, |
|
"loss": 0.5143, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.5590845818504766, |
|
"learning_rate": 8.193558253763479e-06, |
|
"loss": 0.3693, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.186509835454746, |
|
"learning_rate": 8.182055026598408e-06, |
|
"loss": 0.416, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.5844801019590957, |
|
"learning_rate": 8.170523421835573e-06, |
|
"loss": 0.3545, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.8575577226281546, |
|
"learning_rate": 8.158963542313937e-06, |
|
"loss": 0.4761, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.7966713390914204, |
|
"learning_rate": 8.14737549112462e-06, |
|
"loss": 0.3864, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.8380592128499222, |
|
"learning_rate": 8.135759371609977e-06, |
|
"loss": 0.4845, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.0455274179399447, |
|
"learning_rate": 8.12411528736267e-06, |
|
"loss": 0.5534, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.7294266023913523, |
|
"learning_rate": 8.112443342224761e-06, |
|
"loss": 0.4138, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.0058011955497626, |
|
"learning_rate": 8.100743640286768e-06, |
|
"loss": 0.4999, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.651553365904595, |
|
"learning_rate": 8.089016285886747e-06, |
|
"loss": 0.4881, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.773227004412968, |
|
"learning_rate": 8.077261383609363e-06, |
|
"loss": 0.4598, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.8790139684478144, |
|
"learning_rate": 8.065479038284942e-06, |
|
"loss": 0.4663, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.919118851556668, |
|
"learning_rate": 8.05366935498856e-06, |
|
"loss": 0.4419, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.964652133274893, |
|
"learning_rate": 8.041832439039085e-06, |
|
"loss": 0.4673, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.7974809835670555, |
|
"learning_rate": 8.02996839599825e-06, |
|
"loss": 0.4727, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.8553171988918498, |
|
"learning_rate": 8.018077331669707e-06, |
|
"loss": 0.4163, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.9389312069845352, |
|
"learning_rate": 8.006159352098082e-06, |
|
"loss": 0.4334, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.6346055321688229, |
|
"learning_rate": 7.994214563568036e-06, |
|
"loss": 0.4002, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.599227362292106, |
|
"learning_rate": 7.982243072603306e-06, |
|
"loss": 0.4076, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.7368614934087998, |
|
"learning_rate": 7.970244985965767e-06, |
|
"loss": 0.5056, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.591309812293656, |
|
"learning_rate": 7.958220410654474e-06, |
|
"loss": 0.3454, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.9143496936902584, |
|
"learning_rate": 7.946169453904706e-06, |
|
"loss": 0.4637, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.9965493738056397, |
|
"learning_rate": 7.93409222318701e-06, |
|
"loss": 0.4425, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.5549050181218969, |
|
"learning_rate": 7.921988826206252e-06, |
|
"loss": 0.4003, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.0576285352992456, |
|
"learning_rate": 7.909859370900642e-06, |
|
"loss": 0.4399, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.8356947778398593, |
|
"learning_rate": 7.89770396544078e-06, |
|
"loss": 0.487, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.8974007514954563, |
|
"learning_rate": 7.88552271822869e-06, |
|
"loss": 0.4701, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.3226326302090365, |
|
"learning_rate": 7.873315737896856e-06, |
|
"loss": 0.4546, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.9787830221538376, |
|
"learning_rate": 7.861083133307248e-06, |
|
"loss": 0.5219, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.8025367170940212, |
|
"learning_rate": 7.84882501355035e-06, |
|
"loss": 0.5475, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.9086914362763832, |
|
"learning_rate": 7.836541487944194e-06, |
|
"loss": 0.5392, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.0101191131901044, |
|
"learning_rate": 7.824232666033382e-06, |
|
"loss": 0.4568, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.6340523028668645, |
|
"learning_rate": 7.81189865758811e-06, |
|
"loss": 0.4964, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.2283404114277596, |
|
"learning_rate": 7.79953957260318e-06, |
|
"loss": 0.5183, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 0.4910432696342468, |
|
"eval_runtime": 594.926, |
|
"eval_samples_per_second": 4.555, |
|
"eval_steps_per_second": 0.286, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.123698396493111, |
|
"learning_rate": 7.78715552129704e-06, |
|
"loss": 0.4642, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.9212714053414477, |
|
"learning_rate": 7.774746614110775e-06, |
|
"loss": 0.4842, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.7372173704119815, |
|
"learning_rate": 7.762312961707142e-06, |
|
"loss": 0.4649, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.077334722606827, |
|
"learning_rate": 7.749854674969572e-06, |
|
"loss": 0.4999, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.970176276378124, |
|
"learning_rate": 7.737371865001191e-06, |
|
"loss": 0.4424, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.989426659221798, |
|
"learning_rate": 7.72486464312382e-06, |
|
"loss": 0.4116, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.9514397016000429, |
|
"learning_rate": 7.712333120876983e-06, |
|
"loss": 0.4875, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.6795737086731901, |
|
"learning_rate": 7.699777410016919e-06, |
|
"loss": 0.4681, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.7615296207530442, |
|
"learning_rate": 7.68719762251558e-06, |
|
"loss": 0.4944, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.107061451712905, |
|
"learning_rate": 7.674593870559635e-06, |
|
"loss": 0.4358, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.6660235962385783, |
|
"learning_rate": 7.661966266549463e-06, |
|
"loss": 0.4869, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.9951185485725758, |
|
"learning_rate": 7.649314923098164e-06, |
|
"loss": 0.4729, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.9058474675851362, |
|
"learning_rate": 7.636639953030541e-06, |
|
"loss": 0.4598, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.88590824247099, |
|
"learning_rate": 7.623941469382099e-06, |
|
"loss": 0.3653, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.6804952330361298, |
|
"learning_rate": 7.6112195853980405e-06, |
|
"loss": 0.4879, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.822142130579958, |
|
"learning_rate": 7.598474414532252e-06, |
|
"loss": 0.4422, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 2.3799371329130317, |
|
"learning_rate": 7.585706070446288e-06, |
|
"loss": 0.4564, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.770691949972335, |
|
"learning_rate": 7.572914667008371e-06, |
|
"loss": 0.4476, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 2.1595484976714605, |
|
"learning_rate": 7.560100318292356e-06, |
|
"loss": 0.3401, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.5977458132598084, |
|
"learning_rate": 7.5472631385767325e-06, |
|
"loss": 0.3351, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.7747990571416914, |
|
"learning_rate": 7.5344032423435955e-06, |
|
"loss": 0.3917, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.8120059216772098, |
|
"learning_rate": 7.521520744277624e-06, |
|
"loss": 0.3841, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.6871158947809795, |
|
"learning_rate": 7.508615759265059e-06, |
|
"loss": 0.4564, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.6166435032638933, |
|
"learning_rate": 7.495688402392687e-06, |
|
"loss": 0.4139, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.9846131718538027, |
|
"learning_rate": 7.4827387889468e-06, |
|
"loss": 0.5126, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.5748881218512112, |
|
"learning_rate": 7.469767034412176e-06, |
|
"loss": 0.3554, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.8364294972416482, |
|
"learning_rate": 7.456773254471053e-06, |
|
"loss": 0.4038, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.5067889157844772, |
|
"learning_rate": 7.443757565002081e-06, |
|
"loss": 0.4199, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.7982314184205184, |
|
"learning_rate": 7.4307200820793116e-06, |
|
"loss": 0.5696, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.8564950286038326, |
|
"learning_rate": 7.417660921971141e-06, |
|
"loss": 0.4031, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.7423105060855828, |
|
"learning_rate": 7.404580201139287e-06, |
|
"loss": 0.364, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.7521782559338535, |
|
"learning_rate": 7.391478036237747e-06, |
|
"loss": 0.4695, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.7234339577319666, |
|
"learning_rate": 7.378354544111754e-06, |
|
"loss": 0.3918, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.8347981133237585, |
|
"learning_rate": 7.3652098417967385e-06, |
|
"loss": 0.4166, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.9133644782594936, |
|
"learning_rate": 7.352044046517286e-06, |
|
"loss": 0.3654, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.8683261855992503, |
|
"learning_rate": 7.338857275686084e-06, |
|
"loss": 0.4664, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.7767493778615355, |
|
"learning_rate": 7.325649646902887e-06, |
|
"loss": 0.5069, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.5207608762075067, |
|
"learning_rate": 7.312421277953455e-06, |
|
"loss": 0.4519, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.558289509724299, |
|
"learning_rate": 7.2991722868085116e-06, |
|
"loss": 0.4433, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.6758395841551788, |
|
"learning_rate": 7.285902791622689e-06, |
|
"loss": 0.4029, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.7967561396799412, |
|
"learning_rate": 7.2726129107334756e-06, |
|
"loss": 0.3314, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.721033476704588, |
|
"learning_rate": 7.259302762660158e-06, |
|
"loss": 0.4638, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 2.296407197054947, |
|
"learning_rate": 7.245972466102767e-06, |
|
"loss": 0.4548, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.795981307210963, |
|
"learning_rate": 7.2326221399410154e-06, |
|
"loss": 0.4076, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.8949401637558156, |
|
"learning_rate": 7.219251903233245e-06, |
|
"loss": 0.4093, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.73684976879178, |
|
"learning_rate": 7.205861875215356e-06, |
|
"loss": 0.3819, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.7843482784746565, |
|
"learning_rate": 7.192452175299749e-06, |
|
"loss": 0.3852, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.515979867371627, |
|
"learning_rate": 7.179022923074258e-06, |
|
"loss": 0.3516, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.6687889356941592, |
|
"learning_rate": 7.165574238301085e-06, |
|
"loss": 0.5291, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 2.318380976106726, |
|
"learning_rate": 7.152106240915735e-06, |
|
"loss": 0.5291, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.6326669832522336, |
|
"learning_rate": 7.138619051025936e-06, |
|
"loss": 0.3583, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.8202220733844416, |
|
"learning_rate": 7.125112788910581e-06, |
|
"loss": 0.4199, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.6587100069818803, |
|
"learning_rate": 7.1115875750186484e-06, |
|
"loss": 0.4062, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 2.0834531329912767, |
|
"learning_rate": 7.098043529968124e-06, |
|
"loss": 0.4267, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.9123822757520978, |
|
"learning_rate": 7.084480774544937e-06, |
|
"loss": 0.5463, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.5790511832425012, |
|
"learning_rate": 7.0708994297018725e-06, |
|
"loss": 0.4292, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.609041808151019, |
|
"learning_rate": 7.057299616557494e-06, |
|
"loss": 0.4687, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.0643914858474877, |
|
"learning_rate": 7.0436814563950685e-06, |
|
"loss": 0.4086, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.6647393613676398, |
|
"learning_rate": 7.030045070661484e-06, |
|
"loss": 0.4021, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.8392635961659551, |
|
"learning_rate": 7.016390580966157e-06, |
|
"loss": 0.4092, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.5961711152340379, |
|
"learning_rate": 7.0027181090799635e-06, |
|
"loss": 0.4444, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.9508635361928839, |
|
"learning_rate": 6.989027776934138e-06, |
|
"loss": 0.4403, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.6913630397691195, |
|
"learning_rate": 6.975319706619197e-06, |
|
"loss": 0.3893, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.7460218203007862, |
|
"learning_rate": 6.9615940203838485e-06, |
|
"loss": 0.4421, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.733368109080223, |
|
"learning_rate": 6.947850840633892e-06, |
|
"loss": 0.4468, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.8378497057178176, |
|
"learning_rate": 6.93409028993114e-06, |
|
"loss": 0.4311, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.52179250865843, |
|
"learning_rate": 6.92031249099232e-06, |
|
"loss": 0.3415, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.6773803379432295, |
|
"learning_rate": 6.906517566687974e-06, |
|
"loss": 0.4516, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.6271440674538975, |
|
"learning_rate": 6.8927056400413735e-06, |
|
"loss": 0.3851, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.703735768216468, |
|
"learning_rate": 6.878876834227414e-06, |
|
"loss": 0.4994, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.7864526474999918, |
|
"learning_rate": 6.8650312725715205e-06, |
|
"loss": 0.462, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.7484281417044363, |
|
"learning_rate": 6.85116907854855e-06, |
|
"loss": 0.4013, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.878619986993286, |
|
"learning_rate": 6.8372903757816785e-06, |
|
"loss": 0.3441, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.7713170367925475, |
|
"learning_rate": 6.823395288041315e-06, |
|
"loss": 0.3744, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.6816840368792083, |
|
"learning_rate": 6.809483939243991e-06, |
|
"loss": 0.426, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.6874406652983192, |
|
"learning_rate": 6.795556453451247e-06, |
|
"loss": 0.4653, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.8669154711798979, |
|
"learning_rate": 6.781612954868538e-06, |
|
"loss": 0.4462, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.6552135176054872, |
|
"learning_rate": 6.767653567844121e-06, |
|
"loss": 0.406, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.86645573903988, |
|
"learning_rate": 6.7536784168679435e-06, |
|
"loss": 0.397, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.790952614026057, |
|
"learning_rate": 6.739687626570542e-06, |
|
"loss": 0.2901, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.9158821336195229, |
|
"learning_rate": 6.725681321721916e-06, |
|
"loss": 0.4875, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.716712729279155, |
|
"learning_rate": 6.711659627230431e-06, |
|
"loss": 0.4671, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 2.358168373370002, |
|
"learning_rate": 6.697622668141699e-06, |
|
"loss": 0.4856, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.5652289238354264, |
|
"learning_rate": 6.683570569637452e-06, |
|
"loss": 0.3989, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 2.2252972642864846, |
|
"learning_rate": 6.669503457034445e-06, |
|
"loss": 0.4992, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.3986600415958652, |
|
"learning_rate": 6.6554214557833245e-06, |
|
"loss": 0.4299, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.7075847740067474, |
|
"learning_rate": 6.641324691467514e-06, |
|
"loss": 0.4358, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.389187659662136, |
|
"learning_rate": 6.627213289802098e-06, |
|
"loss": 0.3526, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.756357024413377, |
|
"learning_rate": 6.613087376632691e-06, |
|
"loss": 0.4007, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 2.0007846755874965, |
|
"learning_rate": 6.5989470779343265e-06, |
|
"loss": 0.5168, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.5519088968464894, |
|
"learning_rate": 6.584792519810326e-06, |
|
"loss": 0.3959, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 2.095052602192115, |
|
"learning_rate": 6.570623828491175e-06, |
|
"loss": 0.4732, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.5882275503599168, |
|
"learning_rate": 6.556441130333403e-06, |
|
"loss": 0.4358, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.6100778415480488, |
|
"learning_rate": 6.542244551818451e-06, |
|
"loss": 0.3534, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.7528487768698109, |
|
"learning_rate": 6.528034219551542e-06, |
|
"loss": 0.4225, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 2.115929646962235, |
|
"learning_rate": 6.513810260260559e-06, |
|
"loss": 0.4989, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.843848673295388, |
|
"learning_rate": 6.499572800794911e-06, |
|
"loss": 0.5289, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.5327924766590826, |
|
"learning_rate": 6.485321968124398e-06, |
|
"loss": 0.4567, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.4479520006001745, |
|
"learning_rate": 6.471057889338088e-06, |
|
"loss": 0.4135, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.5507352265630177, |
|
"learning_rate": 6.456780691643172e-06, |
|
"loss": 0.4116, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.4570245443001153, |
|
"learning_rate": 6.442490502363838e-06, |
|
"loss": 0.3086, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.8396731671196835, |
|
"learning_rate": 6.428187448940136e-06, |
|
"loss": 0.3928, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.7001256773742297, |
|
"learning_rate": 6.413871658926834e-06, |
|
"loss": 0.3558, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.0514844538438757, |
|
"learning_rate": 6.399543259992288e-06, |
|
"loss": 0.5153, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.7729510151345593, |
|
"learning_rate": 6.385202379917297e-06, |
|
"loss": 0.3732, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.75896661140076, |
|
"learning_rate": 6.370849146593973e-06, |
|
"loss": 0.4428, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.278481398853675, |
|
"learning_rate": 6.356483688024589e-06, |
|
"loss": 0.499, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.9928357122955027, |
|
"learning_rate": 6.342106132320442e-06, |
|
"loss": 0.5037, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.8000026678751535, |
|
"learning_rate": 6.327716607700719e-06, |
|
"loss": 0.4435, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.8559089886724838, |
|
"learning_rate": 6.313315242491338e-06, |
|
"loss": 0.438, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.0025851627973745, |
|
"learning_rate": 6.298902165123815e-06, |
|
"loss": 0.3831, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.5664343523094808, |
|
"learning_rate": 6.284477504134117e-06, |
|
"loss": 0.3618, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 2.019729314852934, |
|
"learning_rate": 6.2700413881615045e-06, |
|
"loss": 0.4521, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.5101081475388585, |
|
"learning_rate": 6.255593945947407e-06, |
|
"loss": 0.423, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.564874785306883, |
|
"learning_rate": 6.241135306334254e-06, |
|
"loss": 0.3541, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.8365721139692799, |
|
"learning_rate": 6.226665598264331e-06, |
|
"loss": 0.4385, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.811282555709379, |
|
"learning_rate": 6.21218495077864e-06, |
|
"loss": 0.432, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.7131707973857517, |
|
"learning_rate": 6.197693493015735e-06, |
|
"loss": 0.4757, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.6746091553025404, |
|
"learning_rate": 6.183191354210577e-06, |
|
"loss": 0.3876, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.6680270115475315, |
|
"learning_rate": 6.168678663693383e-06, |
|
"loss": 0.5057, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.783038683063815, |
|
"learning_rate": 6.154155550888467e-06, |
|
"loss": 0.4514, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.7422076759380254, |
|
"learning_rate": 6.139622145313089e-06, |
|
"loss": 0.431, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.9480782360670714, |
|
"learning_rate": 6.125078576576305e-06, |
|
"loss": 0.4041, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.601477220996701, |
|
"learning_rate": 6.110524974377802e-06, |
|
"loss": 0.3845, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 2.1706407426281413, |
|
"learning_rate": 6.095961468506744e-06, |
|
"loss": 0.4361, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.3929696780713714, |
|
"learning_rate": 6.081388188840623e-06, |
|
"loss": 0.3073, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 2.0401261029546944, |
|
"learning_rate": 6.066805265344084e-06, |
|
"loss": 0.4436, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 2.9148504474098886, |
|
"learning_rate": 6.052212828067787e-06, |
|
"loss": 0.4593, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.7635819099442833, |
|
"learning_rate": 6.037611007147224e-06, |
|
"loss": 0.5092, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.4802426475721033, |
|
"learning_rate": 6.0229999328015786e-06, |
|
"loss": 0.4024, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.4685599882750913, |
|
"learning_rate": 6.008379735332556e-06, |
|
"loss": 0.3088, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.748612085026246, |
|
"learning_rate": 5.993750545123217e-06, |
|
"loss": 0.4406, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.772166754683321, |
|
"learning_rate": 5.979112492636824e-06, |
|
"loss": 0.4471, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.7325520509519283, |
|
"learning_rate": 5.964465708415673e-06, |
|
"loss": 0.3231, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.6030922828091225, |
|
"learning_rate": 5.949810323079927e-06, |
|
"loss": 0.4312, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.1985222157973876, |
|
"learning_rate": 5.935146467326456e-06, |
|
"loss": 0.4116, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.7954988435751924, |
|
"learning_rate": 5.920474271927668e-06, |
|
"loss": 0.3573, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.7965610417403968, |
|
"learning_rate": 5.905793867730344e-06, |
|
"loss": 0.4705, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.4845270873323246, |
|
"learning_rate": 5.891105385654474e-06, |
|
"loss": 0.3947, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.6494063913341859, |
|
"learning_rate": 5.876408956692084e-06, |
|
"loss": 0.3885, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.61982882759216, |
|
"learning_rate": 5.861704711906068e-06, |
|
"loss": 0.3829, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.7422732722633554, |
|
"learning_rate": 5.846992782429028e-06, |
|
"loss": 0.4317, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.8793061898705192, |
|
"learning_rate": 5.832273299462092e-06, |
|
"loss": 0.4528, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.461398015349738, |
|
"learning_rate": 5.8175463942737544e-06, |
|
"loss": 0.3867, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.8164032576204665, |
|
"learning_rate": 5.8028121981987e-06, |
|
"loss": 0.4303, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.681083362899287, |
|
"learning_rate": 5.7880708426366296e-06, |
|
"loss": 0.4281, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.6403401791373944, |
|
"learning_rate": 5.773322459051099e-06, |
|
"loss": 0.4122, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.7904392830657831, |
|
"learning_rate": 5.758567178968336e-06, |
|
"loss": 0.5003, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.6816836281671608, |
|
"learning_rate": 5.7438051339760715e-06, |
|
"loss": 0.4503, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.4737121910087774, |
|
"learning_rate": 5.729036455722369e-06, |
|
"loss": 0.3735, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 2.1113107777546505, |
|
"learning_rate": 5.714261275914442e-06, |
|
"loss": 0.4567, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 2.437714366596938, |
|
"learning_rate": 5.6994797263174906e-06, |
|
"loss": 0.4383, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.480294742385032, |
|
"learning_rate": 5.684691938753517e-06, |
|
"loss": 0.3716, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 2.009485638198127, |
|
"learning_rate": 5.669898045100156e-06, |
|
"loss": 0.3101, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.9276848419323365, |
|
"learning_rate": 5.655098177289496e-06, |
|
"loss": 0.4296, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.89516458805726, |
|
"learning_rate": 5.6402924673068994e-06, |
|
"loss": 0.4179, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.5877840417481992, |
|
"learning_rate": 5.625481047189835e-06, |
|
"loss": 0.4067, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.5771628202151042, |
|
"learning_rate": 5.610664049026691e-06, |
|
"loss": 0.4776, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 2.0167337793713376, |
|
"learning_rate": 5.595841604955601e-06, |
|
"loss": 0.5034, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.6304179843092055, |
|
"learning_rate": 5.581013847163267e-06, |
|
"loss": 0.3563, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.6878087948379197, |
|
"learning_rate": 5.566180907883777e-06, |
|
"loss": 0.4238, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.5753762648228329, |
|
"learning_rate": 5.551342919397429e-06, |
|
"loss": 0.3613, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.854366092949144, |
|
"learning_rate": 5.536500014029548e-06, |
|
"loss": 0.3682, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.7481329815152795, |
|
"learning_rate": 5.521652324149307e-06, |
|
"loss": 0.4116, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.734446987948073, |
|
"learning_rate": 5.506799982168553e-06, |
|
"loss": 0.4758, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.9732915897763694, |
|
"learning_rate": 5.491943120540616e-06, |
|
"loss": 0.3322, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.754698004300824, |
|
"learning_rate": 5.47708187175913e-06, |
|
"loss": 0.4162, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.5763085121064524, |
|
"learning_rate": 5.4622163683568584e-06, |
|
"loss": 0.4129, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.7488374814897032, |
|
"learning_rate": 5.447346742904508e-06, |
|
"loss": 0.3756, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.9252531289816768, |
|
"learning_rate": 5.432473128009538e-06, |
|
"loss": 0.3684, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.6637183599770184, |
|
"learning_rate": 5.417595656314997e-06, |
|
"loss": 0.4825, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.592669717139386, |
|
"learning_rate": 5.402714460498318e-06, |
|
"loss": 0.3897, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 2.1055466691732154, |
|
"learning_rate": 5.387829673270152e-06, |
|
"loss": 0.3825, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.7506770604252968, |
|
"learning_rate": 5.3729414273731784e-06, |
|
"loss": 0.4271, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.3962443967517109, |
|
"learning_rate": 5.358049855580917e-06, |
|
"loss": 0.394, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.7444969274386137, |
|
"learning_rate": 5.343155090696551e-06, |
|
"loss": 0.5035, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.6890656706103564, |
|
"learning_rate": 5.328257265551742e-06, |
|
"loss": 0.4379, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.589046069503504, |
|
"learning_rate": 5.313356513005433e-06, |
|
"loss": 0.4579, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.2484378057944006, |
|
"learning_rate": 5.298452965942687e-06, |
|
"loss": 0.3371, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.871499749767781, |
|
"learning_rate": 5.28354675727348e-06, |
|
"loss": 0.4392, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.8492888640142464, |
|
"learning_rate": 5.268638019931524e-06, |
|
"loss": 0.4509, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.6893664867467961, |
|
"learning_rate": 5.25372688687309e-06, |
|
"loss": 0.4249, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.138165869330146, |
|
"learning_rate": 5.2388134910758015e-06, |
|
"loss": 0.5334, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.7083901767536924, |
|
"learning_rate": 5.223897965537469e-06, |
|
"loss": 0.3961, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.590896869491855, |
|
"learning_rate": 5.208980443274899e-06, |
|
"loss": 0.395, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.5600514572209423, |
|
"learning_rate": 5.1940610573226955e-06, |
|
"loss": 0.3988, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.7810548008575084, |
|
"learning_rate": 5.179139940732091e-06, |
|
"loss": 0.4167, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.7561585348208417, |
|
"learning_rate": 5.1642172265697475e-06, |
|
"loss": 0.4578, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.5113933802960846, |
|
"learning_rate": 5.149293047916576e-06, |
|
"loss": 0.3575, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.4740769283673236, |
|
"learning_rate": 5.134367537866546e-06, |
|
"loss": 0.3786, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.7964337445061287, |
|
"learning_rate": 5.119440829525504e-06, |
|
"loss": 0.436, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.474822981146484, |
|
"learning_rate": 5.104513056009978e-06, |
|
"loss": 0.3078, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 2.0652463208640217, |
|
"learning_rate": 5.089584350446001e-06, |
|
"loss": 0.3805, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 2.061096697608077, |
|
"learning_rate": 5.074654845967912e-06, |
|
"loss": 0.4306, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.4104876880344734, |
|
"learning_rate": 5.059724675717177e-06, |
|
"loss": 0.366, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.5017780767437552, |
|
"learning_rate": 5.044793972841203e-06, |
|
"loss": 0.3364, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 2.5589038243980964, |
|
"learning_rate": 5.029862870492142e-06, |
|
"loss": 0.4908, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.8861893390087554, |
|
"learning_rate": 5.0149315018257104e-06, |
|
"loss": 0.5019, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.5951533316204654, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3338, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.7630862605909332, |
|
"learning_rate": 4.98506849817429e-06, |
|
"loss": 0.4317, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 2.1599141486049276, |
|
"learning_rate": 4.97013712950786e-06, |
|
"loss": 0.5757, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.5004945405458627, |
|
"learning_rate": 4.955206027158798e-06, |
|
"loss": 0.4114, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.524002260752503, |
|
"learning_rate": 4.940275324282824e-06, |
|
"loss": 0.4123, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.7188355830824718, |
|
"learning_rate": 4.925345154032092e-06, |
|
"loss": 0.4532, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.769161229661791, |
|
"learning_rate": 4.910415649554001e-06, |
|
"loss": 0.4467, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.7121837803660678, |
|
"learning_rate": 4.895486943990023e-06, |
|
"loss": 0.4278, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.3657353013619622, |
|
"learning_rate": 4.8805591704745e-06, |
|
"loss": 0.2821, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.7857169814598675, |
|
"learning_rate": 4.865632462133456e-06, |
|
"loss": 0.4362, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.7123413107341325, |
|
"learning_rate": 4.850706952083426e-06, |
|
"loss": 0.4296, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.66979921252736, |
|
"learning_rate": 4.835782773430255e-06, |
|
"loss": 0.3368, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.7608078910887999, |
|
"learning_rate": 4.8208600592679105e-06, |
|
"loss": 0.4087, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.8553210769178123, |
|
"learning_rate": 4.805938942677305e-06, |
|
"loss": 0.4332, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.7077102226161969, |
|
"learning_rate": 4.791019556725104e-06, |
|
"loss": 0.4783, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.567173583407488, |
|
"learning_rate": 4.776102034462533e-06, |
|
"loss": 0.4127, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.71170113748187, |
|
"learning_rate": 4.7611865089242e-06, |
|
"loss": 0.3824, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.7817717043669978, |
|
"learning_rate": 4.746273113126912e-06, |
|
"loss": 0.3953, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.6662767108236813, |
|
"learning_rate": 4.7313619800684765e-06, |
|
"loss": 0.4033, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.8754863396071395, |
|
"learning_rate": 4.7164532427265206e-06, |
|
"loss": 0.4289, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.5528783522233447, |
|
"learning_rate": 4.701547034057314e-06, |
|
"loss": 0.392, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.4926378728821097, |
|
"learning_rate": 4.686643486994568e-06, |
|
"loss": 0.4548, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.7303457881353712, |
|
"learning_rate": 4.67174273444826e-06, |
|
"loss": 0.3818, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.6238622098504738, |
|
"learning_rate": 4.656844909303449e-06, |
|
"loss": 0.4397, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.4101186566517188, |
|
"learning_rate": 4.641950144419085e-06, |
|
"loss": 0.2988, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 2.1784078440486314, |
|
"learning_rate": 4.627058572626823e-06, |
|
"loss": 0.3701, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.7501312534120357, |
|
"learning_rate": 4.612170326729849e-06, |
|
"loss": 0.4129, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.48698910341266, |
|
"learning_rate": 4.597285539501684e-06, |
|
"loss": 0.4013, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.6417757683273082, |
|
"learning_rate": 4.5824043436850054e-06, |
|
"loss": 0.3804, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.5281429537537168, |
|
"learning_rate": 4.567526871990462e-06, |
|
"loss": 0.3938, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.5988936524719048, |
|
"learning_rate": 4.552653257095495e-06, |
|
"loss": 0.368, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 2.258896440894041, |
|
"learning_rate": 4.537783631643143e-06, |
|
"loss": 0.5043, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.7919794024010391, |
|
"learning_rate": 4.522918128240871e-06, |
|
"loss": 0.3857, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.873037865036066, |
|
"learning_rate": 4.508056879459387e-06, |
|
"loss": 0.3815, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 2.3923805891179883, |
|
"learning_rate": 4.493200017831448e-06, |
|
"loss": 0.3562, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.7449443105770417, |
|
"learning_rate": 4.478347675850692e-06, |
|
"loss": 0.3864, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.5506965557164598, |
|
"learning_rate": 4.463499985970455e-06, |
|
"loss": 0.3961, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.7471922729491982, |
|
"learning_rate": 4.448657080602573e-06, |
|
"loss": 0.4226, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.3267013207814808, |
|
"learning_rate": 4.433819092116223e-06, |
|
"loss": 0.3554, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.8265104161059438, |
|
"learning_rate": 4.418986152836735e-06, |
|
"loss": 0.3621, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.6099837359624083, |
|
"learning_rate": 4.404158395044401e-06, |
|
"loss": 0.3619, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.6571840549052268, |
|
"learning_rate": 4.38933595097331e-06, |
|
"loss": 0.4452, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.951655679068924, |
|
"learning_rate": 4.374518952810167e-06, |
|
"loss": 0.4506, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 2.0878776186312247, |
|
"learning_rate": 4.359707532693102e-06, |
|
"loss": 0.4417, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 2.082656505070747, |
|
"learning_rate": 4.344901822710505e-06, |
|
"loss": 0.5075, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.9874784900014248, |
|
"learning_rate": 4.330101954899847e-06, |
|
"loss": 0.4667, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 3.414272287045042, |
|
"learning_rate": 4.3153080612464835e-06, |
|
"loss": 0.3666, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 2.1885859077717607, |
|
"learning_rate": 4.300520273682511e-06, |
|
"loss": 0.4392, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 2.0519927072192363, |
|
"learning_rate": 4.28573872408556e-06, |
|
"loss": 0.3745, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.956594925966504, |
|
"learning_rate": 4.270963544277633e-06, |
|
"loss": 0.3878, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.7636709472530088, |
|
"learning_rate": 4.256194866023929e-06, |
|
"loss": 0.4661, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.9901540818140182, |
|
"learning_rate": 4.241432821031665e-06, |
|
"loss": 0.3897, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.764906598642035, |
|
"learning_rate": 4.226677540948902e-06, |
|
"loss": 0.3544, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.7932823295954536, |
|
"learning_rate": 4.211929157363372e-06, |
|
"loss": 0.4192, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.4835930955379155, |
|
"learning_rate": 4.197187801801301e-06, |
|
"loss": 0.3727, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.7336104464617754, |
|
"learning_rate": 4.182453605726246e-06, |
|
"loss": 0.4964, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.8329880443284419, |
|
"learning_rate": 4.167726700537909e-06, |
|
"loss": 0.3721, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 2.013138218420455, |
|
"learning_rate": 4.153007217570973e-06, |
|
"loss": 0.4649, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.8427860818655313, |
|
"learning_rate": 4.138295288093935e-06, |
|
"loss": 0.374, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.6825250019411562, |
|
"learning_rate": 4.123591043307918e-06, |
|
"loss": 0.4285, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.4196800310040327, |
|
"learning_rate": 4.1088946143455275e-06, |
|
"loss": 0.4058, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.9422054561550897, |
|
"learning_rate": 4.094206132269658e-06, |
|
"loss": 0.388, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.7459635361160109, |
|
"learning_rate": 4.079525728072335e-06, |
|
"loss": 0.4262, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.5984050785760113, |
|
"learning_rate": 4.0648535326735464e-06, |
|
"loss": 0.3966, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.7310491470959026, |
|
"learning_rate": 4.050189676920075e-06, |
|
"loss": 0.3646, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.660090655393537, |
|
"learning_rate": 4.035534291584328e-06, |
|
"loss": 0.3155, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.6484172315966699, |
|
"learning_rate": 4.020887507363177e-06, |
|
"loss": 0.3591, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 2.0685621080461996, |
|
"learning_rate": 4.006249454876785e-06, |
|
"loss": 0.4811, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.8125872688192208, |
|
"learning_rate": 3.991620264667446e-06, |
|
"loss": 0.4088, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.9152975230896734, |
|
"learning_rate": 3.977000067198422e-06, |
|
"loss": 0.3687, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 2.007047221638238, |
|
"learning_rate": 3.962388992852778e-06, |
|
"loss": 0.3888, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.884062017423324, |
|
"learning_rate": 3.947787171932215e-06, |
|
"loss": 0.3866, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.813504934485305, |
|
"learning_rate": 3.933194734655916e-06, |
|
"loss": 0.4673, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.7247817556858769, |
|
"learning_rate": 3.918611811159379e-06, |
|
"loss": 0.4553, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.9109345821437327, |
|
"learning_rate": 3.904038531493257e-06, |
|
"loss": 0.4128, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.7269043143853864, |
|
"learning_rate": 3.889475025622199e-06, |
|
"loss": 0.4578, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.8714889385967017, |
|
"learning_rate": 3.874921423423697e-06, |
|
"loss": 0.463, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.8601128891856535, |
|
"learning_rate": 3.860377854686913e-06, |
|
"loss": 0.3878, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.588587861996327, |
|
"learning_rate": 3.845844449111535e-06, |
|
"loss": 0.3896, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.6959027640047006, |
|
"learning_rate": 3.8313213363066195e-06, |
|
"loss": 0.4031, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 2.0487519369918483, |
|
"learning_rate": 3.816808645789425e-06, |
|
"loss": 0.5667, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.5283004570346033, |
|
"learning_rate": 3.802306506984266e-06, |
|
"loss": 0.3764, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.5944350505584564, |
|
"learning_rate": 3.7878150492213617e-06, |
|
"loss": 0.398, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.9997700960820088, |
|
"learning_rate": 3.7733344017356702e-06, |
|
"loss": 0.4948, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.7499616339572772, |
|
"learning_rate": 3.758864693665748e-06, |
|
"loss": 0.3559, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.5788625250459556, |
|
"learning_rate": 3.744406054052594e-06, |
|
"loss": 0.2929, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.9465951183623658, |
|
"learning_rate": 3.7299586118384967e-06, |
|
"loss": 0.4659, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.935063815996509, |
|
"learning_rate": 3.715522495865885e-06, |
|
"loss": 0.4169, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.7069776267296644, |
|
"learning_rate": 3.701097834876185e-06, |
|
"loss": 0.4085, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.7694847688907462, |
|
"learning_rate": 3.6866847575086626e-06, |
|
"loss": 0.3837, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.6799663024285176, |
|
"learning_rate": 3.6722833922992824e-06, |
|
"loss": 0.4804, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.7949733183074061, |
|
"learning_rate": 3.6578938676795576e-06, |
|
"loss": 0.4979, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.5398405813591576, |
|
"learning_rate": 3.6435163119754134e-06, |
|
"loss": 0.3302, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.69848168155202, |
|
"learning_rate": 3.6291508534060293e-06, |
|
"loss": 0.4512, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 2.0085078247743695, |
|
"learning_rate": 3.614797620082703e-06, |
|
"loss": 0.4211, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.8349562513355984, |
|
"learning_rate": 3.600456740007714e-06, |
|
"loss": 0.3772, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.5313049832419303, |
|
"learning_rate": 3.586128341073167e-06, |
|
"loss": 0.3375, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.601512381086004, |
|
"learning_rate": 3.5718125510598646e-06, |
|
"loss": 0.3185, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.7021346399312534, |
|
"learning_rate": 3.557509497636163e-06, |
|
"loss": 0.3741, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.812220332081257, |
|
"learning_rate": 3.54321930835683e-06, |
|
"loss": 0.4198, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.9490992488522427, |
|
"learning_rate": 3.5289421106619125e-06, |
|
"loss": 0.3687, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.593260907761964, |
|
"learning_rate": 3.5146780318756025e-06, |
|
"loss": 0.4384, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.54237996978636, |
|
"learning_rate": 3.500427199205091e-06, |
|
"loss": 0.3594, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.7268772903478766, |
|
"learning_rate": 3.486189739739442e-06, |
|
"loss": 0.4085, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.8912939127949164, |
|
"learning_rate": 3.4719657804484607e-06, |
|
"loss": 0.467, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.8970124049976202, |
|
"learning_rate": 3.4577554481815513e-06, |
|
"loss": 0.4586, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.9433147628840528, |
|
"learning_rate": 3.4435588696665977e-06, |
|
"loss": 0.3936, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.7797627873045827, |
|
"learning_rate": 3.429376171508827e-06, |
|
"loss": 0.4969, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.9262430414234508, |
|
"learning_rate": 3.415207480189676e-06, |
|
"loss": 0.3561, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.2861119104303473, |
|
"learning_rate": 3.401052922065675e-06, |
|
"loss": 0.3552, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.632481046878911, |
|
"learning_rate": 3.386912623367311e-06, |
|
"loss": 0.3602, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.9372946810317673, |
|
"learning_rate": 3.3727867101979037e-06, |
|
"loss": 0.4401, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.5715805933345222, |
|
"learning_rate": 3.3586753085324863e-06, |
|
"loss": 0.348, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 2.2178424888350166, |
|
"learning_rate": 3.344578544216678e-06, |
|
"loss": 0.4701, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.57523250263701, |
|
"learning_rate": 3.3304965429655567e-06, |
|
"loss": 0.4093, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.4714423430336083, |
|
"learning_rate": 3.3164294303625487e-06, |
|
"loss": 0.3806, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.774836779247577, |
|
"learning_rate": 3.302377331858302e-06, |
|
"loss": 0.4084, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.8798583661138206, |
|
"learning_rate": 3.2883403727695695e-06, |
|
"loss": 0.3936, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.9543516934159417, |
|
"learning_rate": 3.2743186782780855e-06, |
|
"loss": 0.3679, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.9366343183808823, |
|
"learning_rate": 3.2603123734294608e-06, |
|
"loss": 0.3919, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.9434599159853194, |
|
"learning_rate": 3.246321583132058e-06, |
|
"loss": 0.4365, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.992091467377912, |
|
"learning_rate": 3.2323464321558806e-06, |
|
"loss": 0.3731, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.6956161944817005, |
|
"learning_rate": 3.2183870451314624e-06, |
|
"loss": 0.3644, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.792655894061527, |
|
"learning_rate": 3.2044435465487545e-06, |
|
"loss": 0.432, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.7565960934993627, |
|
"learning_rate": 3.190516060756009e-06, |
|
"loss": 0.3551, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 2.0251586478522685, |
|
"learning_rate": 3.1766047119586846e-06, |
|
"loss": 0.4069, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.4722903621029912, |
|
"learning_rate": 3.162709624218324e-06, |
|
"loss": 0.3428, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 2.36831955527062, |
|
"learning_rate": 3.148830921451452e-06, |
|
"loss": 0.4013, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.83177751191809, |
|
"learning_rate": 3.1349687274284803e-06, |
|
"loss": 0.3712, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.375489215381062, |
|
"learning_rate": 3.121123165772588e-06, |
|
"loss": 0.2576, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.7824334878491648, |
|
"learning_rate": 3.107294359958628e-06, |
|
"loss": 0.3375, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.754892268091059, |
|
"learning_rate": 3.0934824333120273e-06, |
|
"loss": 0.3291, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.8208162050718975, |
|
"learning_rate": 3.0796875090076824e-06, |
|
"loss": 0.3929, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.7351771828031815, |
|
"learning_rate": 3.065909710068861e-06, |
|
"loss": 0.4085, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.6384512419654458, |
|
"learning_rate": 3.052149159366109e-06, |
|
"loss": 0.3737, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.484119659153863, |
|
"learning_rate": 3.0384059796161536e-06, |
|
"loss": 0.3597, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.7353506217902939, |
|
"learning_rate": 3.024680293380804e-06, |
|
"loss": 0.3506, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.6917973155179618, |
|
"learning_rate": 3.0109722230658634e-06, |
|
"loss": 0.384, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.746371203974182, |
|
"learning_rate": 2.99728189092004e-06, |
|
"loss": 0.3238, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.7133582169680055, |
|
"learning_rate": 2.983609419033845e-06, |
|
"loss": 0.3962, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.432909539517969, |
|
"learning_rate": 2.969954929338518e-06, |
|
"loss": 0.2525, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.4916482205359396, |
|
"learning_rate": 2.9563185436049323e-06, |
|
"loss": 0.3378, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.8166936097543536, |
|
"learning_rate": 2.942700383442508e-06, |
|
"loss": 0.4769, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.358690049804268, |
|
"learning_rate": 2.929100570298129e-06, |
|
"loss": 0.3305, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.4731450654852187, |
|
"learning_rate": 2.915519225455065e-06, |
|
"loss": 0.3507, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.6254972480142047, |
|
"learning_rate": 2.901956470031877e-06, |
|
"loss": 0.3509, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.5825345498709213, |
|
"learning_rate": 2.888412424981353e-06, |
|
"loss": 0.4081, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.5897743608822972, |
|
"learning_rate": 2.87488721108942e-06, |
|
"loss": 0.3497, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.4379237508967013, |
|
"learning_rate": 2.8613809489740665e-06, |
|
"loss": 0.2969, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.800012440266111, |
|
"learning_rate": 2.847893759084267e-06, |
|
"loss": 0.4193, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.9042276012953723, |
|
"learning_rate": 2.8344257616989145e-06, |
|
"loss": 0.3966, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.8274995596459567, |
|
"learning_rate": 2.820977076925744e-06, |
|
"loss": 0.3928, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.7082205838669184, |
|
"learning_rate": 2.807547824700252e-06, |
|
"loss": 0.3248, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 2.142315177939501, |
|
"learning_rate": 2.7941381247846455e-06, |
|
"loss": 0.4237, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.658641685481167, |
|
"learning_rate": 2.780748096766758e-06, |
|
"loss": 0.3241, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 2.374221636644457, |
|
"learning_rate": 2.7673778600589862e-06, |
|
"loss": 0.3503, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.683744855360216, |
|
"learning_rate": 2.7540275338972345e-06, |
|
"loss": 0.4029, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.7431637682180983, |
|
"learning_rate": 2.7406972373398443e-06, |
|
"loss": 0.3403, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.6389313240780434, |
|
"learning_rate": 2.7273870892665257e-06, |
|
"loss": 0.3558, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.9951930342344457, |
|
"learning_rate": 2.7140972083773125e-06, |
|
"loss": 0.4519, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.4977291642632802, |
|
"learning_rate": 2.700827713191492e-06, |
|
"loss": 0.3244, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.9719128206467786, |
|
"learning_rate": 2.6875787220465466e-06, |
|
"loss": 0.4308, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 0.4720384180545807, |
|
"eval_runtime": 531.3135, |
|
"eval_samples_per_second": 5.101, |
|
"eval_steps_per_second": 0.32, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.9044872002249398, |
|
"learning_rate": 2.6743503530971136e-06, |
|
"loss": 0.3943, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.7774415480268881, |
|
"learning_rate": 2.6611427243139166e-06, |
|
"loss": 0.3772, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 2.3385371503925785, |
|
"learning_rate": 2.647955953482717e-06, |
|
"loss": 0.3347, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.5686275838880652, |
|
"learning_rate": 2.6347901582032627e-06, |
|
"loss": 0.3345, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.5884350530337958, |
|
"learning_rate": 2.6216454558882486e-06, |
|
"loss": 0.3385, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 2.2760933804201096, |
|
"learning_rate": 2.6085219637622545e-06, |
|
"loss": 0.4525, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.8598949852430244, |
|
"learning_rate": 2.595419798860713e-06, |
|
"loss": 0.4161, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.614789920339542, |
|
"learning_rate": 2.5823390780288604e-06, |
|
"loss": 0.387, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.6825637810384086, |
|
"learning_rate": 2.5692799179206905e-06, |
|
"loss": 0.3596, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.5818504183911322, |
|
"learning_rate": 2.5562424349979198e-06, |
|
"loss": 0.3139, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.9512239910567963, |
|
"learning_rate": 2.5432267455289505e-06, |
|
"loss": 0.4087, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.4346478577174004, |
|
"learning_rate": 2.5302329655878243e-06, |
|
"loss": 0.3677, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.6197744535752765, |
|
"learning_rate": 2.5172612110532013e-06, |
|
"loss": 0.3781, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 2.0219491074323597, |
|
"learning_rate": 2.5043115976073166e-06, |
|
"loss": 0.497, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.7400391427698392, |
|
"learning_rate": 2.491384240734943e-06, |
|
"loss": 0.3739, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.6890186070354904, |
|
"learning_rate": 2.478479255722378e-06, |
|
"loss": 0.3927, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.5819118237338632, |
|
"learning_rate": 2.4655967576564066e-06, |
|
"loss": 0.4005, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.844066756069027, |
|
"learning_rate": 2.4527368614232683e-06, |
|
"loss": 0.4126, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.947388676610224, |
|
"learning_rate": 2.4398996817076458e-06, |
|
"loss": 0.4356, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.64465771062421, |
|
"learning_rate": 2.4270853329916305e-06, |
|
"loss": 0.3574, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.7772779084636083, |
|
"learning_rate": 2.414293929553713e-06, |
|
"loss": 0.357, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.559427759832433, |
|
"learning_rate": 2.4015255854677487e-06, |
|
"loss": 0.3445, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.3649092735699666, |
|
"learning_rate": 2.388780414601959e-06, |
|
"loss": 0.3573, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.7111213233065832, |
|
"learning_rate": 2.3760585306179013e-06, |
|
"loss": 0.3266, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.6863130092798397, |
|
"learning_rate": 2.3633600469694608e-06, |
|
"loss": 0.4125, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 2.2514447035959724, |
|
"learning_rate": 2.3506850769018363e-06, |
|
"loss": 0.3657, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.4130571629239113, |
|
"learning_rate": 2.3380337334505374e-06, |
|
"loss": 0.3493, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.4412927726602656, |
|
"learning_rate": 2.3254061294403664e-06, |
|
"loss": 0.2888, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.6984121235842002, |
|
"learning_rate": 2.3128023774844196e-06, |
|
"loss": 0.288, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.9026809021111732, |
|
"learning_rate": 2.300222589983084e-06, |
|
"loss": 0.4988, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.7493875713221276, |
|
"learning_rate": 2.2876668791230196e-06, |
|
"loss": 0.371, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 2.2417503401806425, |
|
"learning_rate": 2.2751353568761814e-06, |
|
"loss": 0.4444, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.7447814422023238, |
|
"learning_rate": 2.2626281349988106e-06, |
|
"loss": 0.385, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 2.379646006684634, |
|
"learning_rate": 2.2501453250304282e-06, |
|
"loss": 0.2819, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.700160730403637, |
|
"learning_rate": 2.2376870382928608e-06, |
|
"loss": 0.3979, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.4562605513649785, |
|
"learning_rate": 2.2252533858892278e-06, |
|
"loss": 0.3582, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.8929935611713011, |
|
"learning_rate": 2.212844478702962e-06, |
|
"loss": 0.4928, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.8464390400413067, |
|
"learning_rate": 2.200460427396819e-06, |
|
"loss": 0.3976, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.5935828621108423, |
|
"learning_rate": 2.1881013424118925e-06, |
|
"loss": 0.318, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.641770016765562, |
|
"learning_rate": 2.175767333966618e-06, |
|
"loss": 0.3763, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.9210613798451146, |
|
"learning_rate": 2.1634585120558076e-06, |
|
"loss": 0.3928, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.7068856262802559, |
|
"learning_rate": 2.1511749864496533e-06, |
|
"loss": 0.4222, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.794733425784197, |
|
"learning_rate": 2.138916866692754e-06, |
|
"loss": 0.5005, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.6616396879522488, |
|
"learning_rate": 2.1266842621031434e-06, |
|
"loss": 0.3553, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.8785861775392818, |
|
"learning_rate": 2.1144772817713106e-06, |
|
"loss": 0.4732, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.5125823752054015, |
|
"learning_rate": 2.1022960345592226e-06, |
|
"loss": 0.3431, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.9035924379439653, |
|
"learning_rate": 2.09014062909936e-06, |
|
"loss": 0.3238, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 2.2467574366537604, |
|
"learning_rate": 2.0780111737937495e-06, |
|
"loss": 0.5255, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.6757524552649274, |
|
"learning_rate": 2.0659077768129897e-06, |
|
"loss": 0.3495, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.8178528858642133, |
|
"learning_rate": 2.0538305460952944e-06, |
|
"loss": 0.3654, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.9355937874790798, |
|
"learning_rate": 2.041779589345527e-06, |
|
"loss": 0.4529, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.521367156724969, |
|
"learning_rate": 2.029755014034234e-06, |
|
"loss": 0.301, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.826744504300451, |
|
"learning_rate": 2.0177569273966944e-06, |
|
"loss": 0.3665, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.7149354203694684, |
|
"learning_rate": 2.0057854364319644e-06, |
|
"loss": 0.3673, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.4674644295610002, |
|
"learning_rate": 1.9938406479019186e-06, |
|
"loss": 0.3403, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.5070539943420556, |
|
"learning_rate": 1.9819226683302934e-06, |
|
"loss": 0.3795, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.4784387385558468, |
|
"learning_rate": 1.9700316040017513e-06, |
|
"loss": 0.4026, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.5868950806618929, |
|
"learning_rate": 1.9581675609609176e-06, |
|
"loss": 0.3108, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.592305830159606, |
|
"learning_rate": 1.9463306450114417e-06, |
|
"loss": 0.3549, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.655256097625306, |
|
"learning_rate": 1.934520961715058e-06, |
|
"loss": 0.4225, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.5809519234724267, |
|
"learning_rate": 1.922738616390639e-06, |
|
"loss": 0.3112, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.9427582694211518, |
|
"learning_rate": 1.910983714113253e-06, |
|
"loss": 0.421, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.4899484568922368, |
|
"learning_rate": 1.8992563597132324e-06, |
|
"loss": 0.3075, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.6154594194570981, |
|
"learning_rate": 1.8875566577752403e-06, |
|
"loss": 0.3521, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.3640422359449944, |
|
"learning_rate": 1.8758847126373304e-06, |
|
"loss": 0.3433, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.8143589393862407, |
|
"learning_rate": 1.8642406283900238e-06, |
|
"loss": 0.3839, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.5446087202063494, |
|
"learning_rate": 1.8526245088753802e-06, |
|
"loss": 0.2688, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.6333832866166789, |
|
"learning_rate": 1.8410364576860645e-06, |
|
"loss": 0.3158, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.7966540146020227, |
|
"learning_rate": 1.8294765781644285e-06, |
|
"loss": 0.411, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.4442265911590433, |
|
"learning_rate": 1.8179449734015946e-06, |
|
"loss": 0.44, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 2.0035737445307986, |
|
"learning_rate": 1.8064417462365225e-06, |
|
"loss": 0.386, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.8535330938944725, |
|
"learning_rate": 1.7949669992551056e-06, |
|
"loss": 0.4503, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.4981921904993794, |
|
"learning_rate": 1.7835208347892536e-06, |
|
"loss": 0.3836, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.9171239926769839, |
|
"learning_rate": 1.7721033549159656e-06, |
|
"loss": 0.3972, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.5637043438891536, |
|
"learning_rate": 1.7607146614564418e-06, |
|
"loss": 0.3937, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.6447122243659613, |
|
"learning_rate": 1.7493548559751638e-06, |
|
"loss": 0.3466, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 2.0092846561255793, |
|
"learning_rate": 1.7380240397789838e-06, |
|
"loss": 0.4356, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.5957076368272978, |
|
"learning_rate": 1.7267223139162343e-06, |
|
"loss": 0.351, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.8330485102111689, |
|
"learning_rate": 1.7154497791758156e-06, |
|
"loss": 0.383, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.7558593725919067, |
|
"learning_rate": 1.704206536086301e-06, |
|
"loss": 0.4745, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.8130146446382733, |
|
"learning_rate": 1.692992684915043e-06, |
|
"loss": 0.3835, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.8881982094164256, |
|
"learning_rate": 1.681808325667278e-06, |
|
"loss": 0.4934, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.5165095033729978, |
|
"learning_rate": 1.6706535580852268e-06, |
|
"loss": 0.3863, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.774357680731744, |
|
"learning_rate": 1.6595284816472196e-06, |
|
"loss": 0.3748, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.767710277007722, |
|
"learning_rate": 1.6484331955667948e-06, |
|
"loss": 0.3393, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.8448330342374666, |
|
"learning_rate": 1.6373677987918196e-06, |
|
"loss": 0.4351, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.85389177211945, |
|
"learning_rate": 1.6263323900036126e-06, |
|
"loss": 0.3646, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.4324306759275551, |
|
"learning_rate": 1.615327067616057e-06, |
|
"loss": 0.2679, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.542343302777527, |
|
"learning_rate": 1.6043519297747284e-06, |
|
"loss": 0.4538, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.4664118852248411, |
|
"learning_rate": 1.5934070743560066e-06, |
|
"loss": 0.2817, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.3646102425906645, |
|
"learning_rate": 1.5824925989662216e-06, |
|
"loss": 0.3436, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.726338790957311, |
|
"learning_rate": 1.5716086009407739e-06, |
|
"loss": 0.4175, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.7959210588836272, |
|
"learning_rate": 1.56075517734326e-06, |
|
"loss": 0.3795, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.7311804415302914, |
|
"learning_rate": 1.549932424964622e-06, |
|
"loss": 0.4281, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.8431199283255997, |
|
"learning_rate": 1.5391404403222676e-06, |
|
"loss": 0.3554, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.6029303411198748, |
|
"learning_rate": 1.528379319659221e-06, |
|
"loss": 0.399, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 2.041057354884482, |
|
"learning_rate": 1.517649158943263e-06, |
|
"loss": 0.3892, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 2.200908102889503, |
|
"learning_rate": 1.5069500538660714e-06, |
|
"loss": 0.3841, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.5822240990320229, |
|
"learning_rate": 1.4962820998423683e-06, |
|
"loss": 0.3568, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 2.1371703772175765, |
|
"learning_rate": 1.4856453920090742e-06, |
|
"loss": 0.4127, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.474872780748052, |
|
"learning_rate": 1.4750400252244511e-06, |
|
"loss": 0.3988, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.7498309567196768, |
|
"learning_rate": 1.4644660940672628e-06, |
|
"loss": 0.3476, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.8541907978098617, |
|
"learning_rate": 1.4539236928359319e-06, |
|
"loss": 0.434, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.942758452810042, |
|
"learning_rate": 1.4434129155476961e-06, |
|
"loss": 0.364, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.5720835349530153, |
|
"learning_rate": 1.4329338559377692e-06, |
|
"loss": 0.335, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.5728894698285647, |
|
"learning_rate": 1.4224866074585052e-06, |
|
"loss": 0.4565, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.7418556788519788, |
|
"learning_rate": 1.412071263278571e-06, |
|
"loss": 0.4165, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 2.304963198709215, |
|
"learning_rate": 1.4016879162821046e-06, |
|
"loss": 0.4437, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.6243364242282008, |
|
"learning_rate": 1.3913366590678967e-06, |
|
"loss": 0.465, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.95589187270623, |
|
"learning_rate": 1.381017583948563e-06, |
|
"loss": 0.4478, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.5997871441952878, |
|
"learning_rate": 1.370730782949713e-06, |
|
"loss": 0.3457, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 2.8336248474819126, |
|
"learning_rate": 1.3604763478091375e-06, |
|
"loss": 0.3157, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.7356602754382147, |
|
"learning_rate": 1.3502543699759918e-06, |
|
"loss": 0.3872, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.546716942025288, |
|
"learning_rate": 1.340064940609972e-06, |
|
"loss": 0.3602, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.8094889043425166, |
|
"learning_rate": 1.3299081505805088e-06, |
|
"loss": 0.3067, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.6451477176671583, |
|
"learning_rate": 1.319784090465958e-06, |
|
"loss": 0.2976, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.9811172173472407, |
|
"learning_rate": 1.3096928505527812e-06, |
|
"loss": 0.4255, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 2.082775157955807, |
|
"learning_rate": 1.2996345208347566e-06, |
|
"loss": 0.4362, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.8098736692974182, |
|
"learning_rate": 1.2896091910121667e-06, |
|
"loss": 0.3936, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.9547737166972166, |
|
"learning_rate": 1.2796169504910028e-06, |
|
"loss": 0.381, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.6251866261565813, |
|
"learning_rate": 1.2696578883821614e-06, |
|
"loss": 0.4156, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.8001878571293004, |
|
"learning_rate": 1.2597320935006541e-06, |
|
"loss": 0.3925, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.61611282004936, |
|
"learning_rate": 1.2498396543648196e-06, |
|
"loss": 0.3515, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.7985809369413097, |
|
"learning_rate": 1.2399806591955228e-06, |
|
"loss": 0.3292, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.5964590321964889, |
|
"learning_rate": 1.2301551959153812e-06, |
|
"loss": 0.3877, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.5734877825215414, |
|
"learning_rate": 1.2203633521479735e-06, |
|
"loss": 0.4054, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.7916360058254706, |
|
"learning_rate": 1.210605215217056e-06, |
|
"loss": 0.4528, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 2.033617142829962, |
|
"learning_rate": 1.2008808721457882e-06, |
|
"loss": 0.4323, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.7334991655343743, |
|
"learning_rate": 1.191190409655959e-06, |
|
"loss": 0.4516, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 2.0862219093692516, |
|
"learning_rate": 1.181533914167205e-06, |
|
"loss": 0.3788, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.7005854621422525, |
|
"learning_rate": 1.1719114717962476e-06, |
|
"loss": 0.3615, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.9223633799154827, |
|
"learning_rate": 1.1623231683561249e-06, |
|
"loss": 0.4191, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.5355028370021262, |
|
"learning_rate": 1.1527690893554154e-06, |
|
"loss": 0.4084, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.3409107385851025, |
|
"learning_rate": 1.143249319997491e-06, |
|
"loss": 0.3211, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.5436127305633105, |
|
"learning_rate": 1.1337639451797493e-06, |
|
"loss": 0.3983, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.5214360728755154, |
|
"learning_rate": 1.1243130494928533e-06, |
|
"loss": 0.3446, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.5727956777063972, |
|
"learning_rate": 1.1148967172199848e-06, |
|
"loss": 0.3665, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.6801572434918612, |
|
"learning_rate": 1.1055150323360852e-06, |
|
"loss": 0.3841, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.7269406372047564, |
|
"learning_rate": 1.0961680785071117e-06, |
|
"loss": 0.4971, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.661213058624383, |
|
"learning_rate": 1.0868559390892903e-06, |
|
"loss": 0.3375, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.7080284683248386, |
|
"learning_rate": 1.0775786971283725e-06, |
|
"loss": 0.351, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.4422313394158195, |
|
"learning_rate": 1.06833643535889e-06, |
|
"loss": 0.398, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.5369162182430969, |
|
"learning_rate": 1.0591292362034255e-06, |
|
"loss": 0.4384, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.4444746013578487, |
|
"learning_rate": 1.0499571817718707e-06, |
|
"loss": 0.2999, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.997456484534967, |
|
"learning_rate": 1.0408203538606948e-06, |
|
"loss": 0.3597, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.6400301763903629, |
|
"learning_rate": 1.0317188339522188e-06, |
|
"loss": 0.3436, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.6663652100357638, |
|
"learning_rate": 1.0226527032138877e-06, |
|
"loss": 0.3255, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.4660952325217926, |
|
"learning_rate": 1.0136220424975434e-06, |
|
"loss": 0.3129, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 2.068851578412713, |
|
"learning_rate": 1.0046269323387037e-06, |
|
"loss": 0.3851, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.4622200769565, |
|
"learning_rate": 9.956674529558518e-07, |
|
"loss": 0.3648, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.6996589952235117, |
|
"learning_rate": 9.867436842497103e-07, |
|
"loss": 0.473, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.6621520206328781, |
|
"learning_rate": 9.778557058025357e-07, |
|
"loss": 0.3517, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.3571771711435245, |
|
"learning_rate": 9.69003596877408e-07, |
|
"loss": 0.3822, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.868120941242638, |
|
"learning_rate": 9.601874364175206e-07, |
|
"loss": 0.3294, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.6696207498752886, |
|
"learning_rate": 9.514073030454763e-07, |
|
"loss": 0.3205, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 2.0728764939030246, |
|
"learning_rate": 9.426632750625919e-07, |
|
"loss": 0.4558, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.880333599994576, |
|
"learning_rate": 9.339554304481952e-07, |
|
"loss": 0.4021, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.9734543015644022, |
|
"learning_rate": 9.252838468589265e-07, |
|
"loss": 0.3466, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.3057676108168481, |
|
"learning_rate": 9.166486016280562e-07, |
|
"loss": 0.3686, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.6983493502731448, |
|
"learning_rate": 9.080497717647841e-07, |
|
"loss": 0.4109, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.6293001419023172, |
|
"learning_rate": 8.994874339535569e-07, |
|
"loss": 0.288, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.4963241842091834, |
|
"learning_rate": 8.909616645533886e-07, |
|
"loss": 0.2871, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.4822389606951585, |
|
"learning_rate": 8.824725395971745e-07, |
|
"loss": 0.3439, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.7177199805665169, |
|
"learning_rate": 8.740201347910133e-07, |
|
"loss": 0.3232, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.5014572884433388, |
|
"learning_rate": 8.656045255135314e-07, |
|
"loss": 0.3401, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.5310901313894647, |
|
"learning_rate": 8.572257868152173e-07, |
|
"loss": 0.3108, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.8246210773553824, |
|
"learning_rate": 8.488839934177424e-07, |
|
"loss": 0.3941, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 2.074647338585339, |
|
"learning_rate": 8.405792197133023e-07, |
|
"loss": 0.3768, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.6233824464176518, |
|
"learning_rate": 8.323115397639514e-07, |
|
"loss": 0.3695, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.9626326451926888, |
|
"learning_rate": 8.240810273009381e-07, |
|
"loss": 0.3497, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.72705310438082, |
|
"learning_rate": 8.15887755724053e-07, |
|
"loss": 0.4354, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 2.2193068673729988, |
|
"learning_rate": 8.077317981009731e-07, |
|
"loss": 0.4535, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.412893314116447, |
|
"learning_rate": 7.996132271666062e-07, |
|
"loss": 0.3713, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.439263478672628, |
|
"learning_rate": 7.915321153224487e-07, |
|
"loss": 0.2963, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.742179158576329, |
|
"learning_rate": 7.834885346359361e-07, |
|
"loss": 0.3215, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.5193201275472255, |
|
"learning_rate": 7.754825568397955e-07, |
|
"loss": 0.3238, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.9407844197719575, |
|
"learning_rate": 7.675142533314173e-07, |
|
"loss": 0.2893, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.7124951515190545, |
|
"learning_rate": 7.595836951722107e-07, |
|
"loss": 0.3607, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.6206596440963452, |
|
"learning_rate": 7.516909530869687e-07, |
|
"loss": 0.3338, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.8379118207091338, |
|
"learning_rate": 7.438360974632442e-07, |
|
"loss": 0.4015, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 2.0108442161255153, |
|
"learning_rate": 7.360191983507154e-07, |
|
"loss": 0.3681, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.5768823402693721, |
|
"learning_rate": 7.282403254605636e-07, |
|
"loss": 0.318, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.8501393420883374, |
|
"learning_rate": 7.204995481648547e-07, |
|
"loss": 0.3474, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.8000174106515199, |
|
"learning_rate": 7.127969354959164e-07, |
|
"loss": 0.3711, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.627552766175395, |
|
"learning_rate": 7.051325561457217e-07, |
|
"loss": 0.3521, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.7703400679821946, |
|
"learning_rate": 6.975064784652829e-07, |
|
"loss": 0.2869, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.9555640812703932, |
|
"learning_rate": 6.899187704640326e-07, |
|
"loss": 0.4446, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 2.3485869295221904, |
|
"learning_rate": 6.823694998092272e-07, |
|
"loss": 0.3514, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.6365626256545733, |
|
"learning_rate": 6.748587338253338e-07, |
|
"loss": 0.3263, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.8406884572978746, |
|
"learning_rate": 6.673865394934376e-07, |
|
"loss": 0.3831, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.8441311582841047, |
|
"learning_rate": 6.59952983450643e-07, |
|
"loss": 0.3672, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.7850511624040863, |
|
"learning_rate": 6.525581319894703e-07, |
|
"loss": 0.344, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.8426852257973754, |
|
"learning_rate": 6.452020510572799e-07, |
|
"loss": 0.3709, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.8933625260612084, |
|
"learning_rate": 6.378848062556742e-07, |
|
"loss": 0.3625, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.4364799820160647, |
|
"learning_rate": 6.306064628399111e-07, |
|
"loss": 0.3232, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.8362485371909967, |
|
"learning_rate": 6.23367085718331e-07, |
|
"loss": 0.3444, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.9245874517833783, |
|
"learning_rate": 6.161667394517684e-07, |
|
"loss": 0.3453, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 2.082501618335563, |
|
"learning_rate": 6.0900548825298e-07, |
|
"loss": 0.3834, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.6265615291277613, |
|
"learning_rate": 6.018833959860753e-07, |
|
"loss": 0.3871, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.5527250089612845, |
|
"learning_rate": 5.948005261659434e-07, |
|
"loss": 0.2978, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.7448719082955784, |
|
"learning_rate": 5.877569419576851e-07, |
|
"loss": 0.2905, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.5352526453200945, |
|
"learning_rate": 5.807527061760543e-07, |
|
"loss": 0.3547, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 2.010400248545671, |
|
"learning_rate": 5.73787881284893e-07, |
|
"loss": 0.3434, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 2.08981264322849, |
|
"learning_rate": 5.668625293965774e-07, |
|
"loss": 0.3871, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.544640538751419, |
|
"learning_rate": 5.599767122714628e-07, |
|
"loss": 0.3843, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.50324978941004, |
|
"learning_rate": 5.531304913173357e-07, |
|
"loss": 0.3062, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.7101075222047506, |
|
"learning_rate": 5.463239275888599e-07, |
|
"loss": 0.3505, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 2.0135553262312933, |
|
"learning_rate": 5.395570817870361e-07, |
|
"loss": 0.4003, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.64919292631168, |
|
"learning_rate": 5.328300142586628e-07, |
|
"loss": 0.4257, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.497483142992759, |
|
"learning_rate": 5.261427849957928e-07, |
|
"loss": 0.3368, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.578537761793323, |
|
"learning_rate": 5.194954536352021e-07, |
|
"loss": 0.3491, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 2.144696531083044, |
|
"learning_rate": 5.128880794578573e-07, |
|
"loss": 0.4249, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 2.216467711281768, |
|
"learning_rate": 5.063207213883858e-07, |
|
"loss": 0.4191, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.513179703987502, |
|
"learning_rate": 4.99793437994549e-07, |
|
"loss": 0.2815, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 2.0210242568879364, |
|
"learning_rate": 4.933062874867267e-07, |
|
"loss": 0.5077, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.5808310904304832, |
|
"learning_rate": 4.868593277173878e-07, |
|
"loss": 0.3387, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.9352852569545673, |
|
"learning_rate": 4.804526161805834e-07, |
|
"loss": 0.4165, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.4953454574921947, |
|
"learning_rate": 4.7408621001143076e-07, |
|
"loss": 0.3195, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.7650338603997144, |
|
"learning_rate": 4.6776016598560125e-07, |
|
"loss": 0.2577, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.5257201063214694, |
|
"learning_rate": 4.614745405188159e-07, |
|
"loss": 0.4102, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.5975131026855944, |
|
"learning_rate": 4.5522938966634514e-07, |
|
"loss": 0.2455, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.7214214108104902, |
|
"learning_rate": 4.490247691225058e-07, |
|
"loss": 0.3679, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.9125664855083362, |
|
"learning_rate": 4.428607342201635e-07, |
|
"loss": 0.4053, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 2.23497833527835, |
|
"learning_rate": 4.3673733993024103e-07, |
|
"loss": 0.4223, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.7278794126523196, |
|
"learning_rate": 4.3065464086123055e-07, |
|
"loss": 0.3194, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.3338372611487148, |
|
"learning_rate": 4.2461269125869896e-07, |
|
"loss": 0.3989, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.5824737042344048, |
|
"learning_rate": 4.1861154500481285e-07, |
|
"loss": 0.3773, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.5501936663824547, |
|
"learning_rate": 4.1265125561785466e-07, |
|
"loss": 0.3073, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.6744791716497447, |
|
"learning_rate": 4.067318762517419e-07, |
|
"loss": 0.3962, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.6176701058731642, |
|
"learning_rate": 4.0085345969555656e-07, |
|
"loss": 0.2961, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.668829643612793, |
|
"learning_rate": 3.9501605837307613e-07, |
|
"loss": 0.3983, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.6415929342466784, |
|
"learning_rate": 3.8921972434230183e-07, |
|
"loss": 0.3354, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.9107002582221606, |
|
"learning_rate": 3.8346450929499735e-07, |
|
"loss": 0.2923, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.5837382493413166, |
|
"learning_rate": 3.7775046455622853e-07, |
|
"loss": 0.3572, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.9578171945494112, |
|
"learning_rate": 3.7207764108389835e-07, |
|
"loss": 0.4438, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 2.012312836551855, |
|
"learning_rate": 3.664460894683036e-07, |
|
"loss": 0.3921, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.573634527392608, |
|
"learning_rate": 3.6085585993167804e-07, |
|
"loss": 0.3537, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.87219129280588, |
|
"learning_rate": 3.553070023277405e-07, |
|
"loss": 0.394, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.908976251642539, |
|
"learning_rate": 3.4979956614125956e-07, |
|
"loss": 0.3465, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.7447296830368977, |
|
"learning_rate": 3.4433360048760357e-07, |
|
"loss": 0.4396, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.4938127648980004, |
|
"learning_rate": 3.389091541123074e-07, |
|
"loss": 0.3833, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.6511435214606442, |
|
"learning_rate": 3.3352627539063707e-07, |
|
"loss": 0.2598, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.9044840155234906, |
|
"learning_rate": 3.2818501232715794e-07, |
|
"loss": 0.4915, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.6336211800573899, |
|
"learning_rate": 3.2288541255530546e-07, |
|
"loss": 0.3413, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.352176707831654, |
|
"learning_rate": 3.1762752333696303e-07, |
|
"loss": 0.368, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.515583060896109, |
|
"learning_rate": 3.124113915620375e-07, |
|
"loss": 0.3465, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.454044534368895, |
|
"learning_rate": 3.072370637480415e-07, |
|
"loss": 0.2625, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.767987541706428, |
|
"learning_rate": 3.0210458603968264e-07, |
|
"loss": 0.3992, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.7273964826434631, |
|
"learning_rate": 2.970140042084474e-07, |
|
"loss": 0.3708, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.647723427115109, |
|
"learning_rate": 2.919653636521935e-07, |
|
"loss": 0.3525, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.8493344960971922, |
|
"learning_rate": 2.8695870939474626e-07, |
|
"loss": 0.3229, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.794125649715343, |
|
"learning_rate": 2.8199408608549696e-07, |
|
"loss": 0.3222, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.9721188133776415, |
|
"learning_rate": 2.7707153799900685e-07, |
|
"loss": 0.3836, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.6417411860470836, |
|
"learning_rate": 2.721911090346052e-07, |
|
"loss": 0.3736, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.938690974748052, |
|
"learning_rate": 2.673528427160066e-07, |
|
"loss": 0.3205, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.7687245607064443, |
|
"learning_rate": 2.6255678219091754e-07, |
|
"loss": 0.368, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.9023761904635534, |
|
"learning_rate": 2.578029702306506e-07, |
|
"loss": 0.3177, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.4839406363342937, |
|
"learning_rate": 2.530914492297487e-07, |
|
"loss": 0.3611, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 2.3385861771964014, |
|
"learning_rate": 2.484222612056025e-07, |
|
"loss": 0.3316, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.9746739789044454, |
|
"learning_rate": 2.437954477980753e-07, |
|
"loss": 0.3969, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.6887922769065924, |
|
"learning_rate": 2.392110502691353e-07, |
|
"loss": 0.3968, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 2.175353517627243, |
|
"learning_rate": 2.3466910950248334e-07, |
|
"loss": 0.3524, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.9590405768224393, |
|
"learning_rate": 2.3016966600319158e-07, |
|
"loss": 0.4014, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.7539702537980744, |
|
"learning_rate": 2.2571275989734075e-07, |
|
"loss": 0.3558, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.8410012610197661, |
|
"learning_rate": 2.212984309316646e-07, |
|
"loss": 0.4289, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.74770502941488, |
|
"learning_rate": 2.1692671847319046e-07, |
|
"loss": 0.4004, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.444442338435704, |
|
"learning_rate": 2.125976615088926e-07, |
|
"loss": 0.3453, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.8660604720500005, |
|
"learning_rate": 2.083112986453445e-07, |
|
"loss": 0.3741, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.6568713310219376, |
|
"learning_rate": 2.040676681083703e-07, |
|
"loss": 0.4042, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.387457724388811, |
|
"learning_rate": 1.998668077427096e-07, |
|
"loss": 0.2919, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.3853710037974833, |
|
"learning_rate": 1.9570875501167651e-07, |
|
"loss": 0.2827, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.5185303113021482, |
|
"learning_rate": 1.9159354699682496e-07, |
|
"loss": 0.3902, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.9015197180656451, |
|
"learning_rate": 1.8752122039762011e-07, |
|
"loss": 0.3062, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 2.281701595570607, |
|
"learning_rate": 1.8349181153111074e-07, |
|
"loss": 0.4513, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.5802871368090137, |
|
"learning_rate": 1.7950535633160404e-07, |
|
"loss": 0.3702, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.5604086315052326, |
|
"learning_rate": 1.7556189035034642e-07, |
|
"loss": 0.3407, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.4540871479317765, |
|
"learning_rate": 1.7166144875520762e-07, |
|
"loss": 0.2945, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.6711052975721497, |
|
"learning_rate": 1.6780406633036094e-07, |
|
"loss": 0.3555, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.4918015703526908, |
|
"learning_rate": 1.6398977747598245e-07, |
|
"loss": 0.3745, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.523355764764963, |
|
"learning_rate": 1.6021861620793666e-07, |
|
"loss": 0.2958, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.933124025444002, |
|
"learning_rate": 1.564906161574764e-07, |
|
"loss": 0.3181, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.5606282051722882, |
|
"learning_rate": 1.5280581057094346e-07, |
|
"loss": 0.2937, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.9267494105876177, |
|
"learning_rate": 1.4916423230946885e-07, |
|
"loss": 0.4209, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.547866759100206, |
|
"learning_rate": 1.4556591384868368e-07, |
|
"loss": 0.3583, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 2.162739247908966, |
|
"learning_rate": 1.4201088727842648e-07, |
|
"loss": 0.4472, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 2.074387460144302, |
|
"learning_rate": 1.3849918430245911e-07, |
|
"loss": 0.4204, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.75405906319246, |
|
"learning_rate": 1.3503083623818413e-07, |
|
"loss": 0.3286, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 2.208827466327546, |
|
"learning_rate": 1.316058740163617e-07, |
|
"loss": 0.4257, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.9131618964245363, |
|
"learning_rate": 1.282243281808393e-07, |
|
"loss": 0.3653, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.6704670693024548, |
|
"learning_rate": 1.2488622888827517e-07, |
|
"loss": 0.4076, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.7005183654466214, |
|
"learning_rate": 1.2159160590787145e-07, |
|
"loss": 0.4183, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.7590126284604837, |
|
"learning_rate": 1.1834048862110815e-07, |
|
"loss": 0.3497, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.8495558159166359, |
|
"learning_rate": 1.1513290602148175e-07, |
|
"loss": 0.3989, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.5491493514285797, |
|
"learning_rate": 1.1196888671424378e-07, |
|
"loss": 0.4034, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.696992836044335, |
|
"learning_rate": 1.0884845891614926e-07, |
|
"loss": 0.3906, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.6104580839191522, |
|
"learning_rate": 1.0577165045520532e-07, |
|
"loss": 0.343, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.7381991868528064, |
|
"learning_rate": 1.0273848877041804e-07, |
|
"loss": 0.4373, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 2.0710854401837224, |
|
"learning_rate": 9.974900091155425e-08, |
|
"loss": 0.406, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 2.594545766782277, |
|
"learning_rate": 9.680321353889577e-08, |
|
"loss": 0.3871, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.9064797609695343, |
|
"learning_rate": 9.390115292300161e-08, |
|
"loss": 0.4445, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.8137268442191112, |
|
"learning_rate": 9.10428449444778e-08, |
|
"loss": 0.3649, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.632472777376261, |
|
"learning_rate": 8.822831509374297e-08, |
|
"loss": 0.3455, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.7487886157086556, |
|
"learning_rate": 8.545758847080144e-08, |
|
"loss": 0.3152, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.4612562835731007, |
|
"learning_rate": 8.273068978501997e-08, |
|
"loss": 0.3597, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.7201161847179576, |
|
"learning_rate": 8.004764335490856e-08, |
|
"loss": 0.3958, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.8017582089862216, |
|
"learning_rate": 7.74084731079e-08, |
|
"loss": 0.3913, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.3234352702837793, |
|
"learning_rate": 7.481320258014124e-08, |
|
"loss": 0.3034, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.3724778893397676, |
|
"learning_rate": 7.226185491628069e-08, |
|
"loss": 0.2954, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.6969184913162358, |
|
"learning_rate": 6.975445286926064e-08, |
|
"loss": 0.3897, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.9317477089546637, |
|
"learning_rate": 6.729101880011746e-08, |
|
"loss": 0.377, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.554878777893729, |
|
"learning_rate": 6.48715746777806e-08, |
|
"loss": 0.4087, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.7534864024515837, |
|
"learning_rate": 6.2496142078875e-08, |
|
"loss": 0.4967, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 2.4804685848961765, |
|
"learning_rate": 6.016474218753287e-08, |
|
"loss": 0.4207, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.612223174759301, |
|
"learning_rate": 5.787739579520113e-08, |
|
"loss": 0.4037, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.4804110785714588, |
|
"learning_rate": 5.5634123300457585e-08, |
|
"loss": 0.2889, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.8113911538397998, |
|
"learning_rate": 5.343494470882671e-08, |
|
"loss": 0.362, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.7167779730798833, |
|
"learning_rate": 5.127987963260583e-08, |
|
"loss": 0.3482, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.4150092955119626, |
|
"learning_rate": 4.916894729068644e-08, |
|
"loss": 0.2178, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.633077629302677, |
|
"learning_rate": 4.7102166508383173e-08, |
|
"loss": 0.3825, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.425048677014493, |
|
"learning_rate": 4.5079555717267855e-08, |
|
"loss": 0.3305, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.9174655190388756, |
|
"learning_rate": 4.31011329550024e-08, |
|
"loss": 0.3396, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.3843933529043337, |
|
"learning_rate": 4.1166915865180625e-08, |
|
"loss": 0.2631, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.4653796486037136, |
|
"learning_rate": 3.927692169716946e-08, |
|
"loss": 0.3227, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.635515966199329, |
|
"learning_rate": 3.743116730595575e-08, |
|
"loss": 0.3638, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 2.0640494844558477, |
|
"learning_rate": 3.562966915199473e-08, |
|
"loss": 0.363, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.5455021482175313, |
|
"learning_rate": 3.387244330106454e-08, |
|
"loss": 0.3479, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.8431655117744339, |
|
"learning_rate": 3.2159505424122495e-08, |
|
"loss": 0.328, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.4359979387464212, |
|
"learning_rate": 3.049087079716462e-08, |
|
"loss": 0.3124, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.680594322782353, |
|
"learning_rate": 2.8866554301091863e-08, |
|
"loss": 0.4445, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.6887895844276681, |
|
"learning_rate": 2.7286570421574677e-08, |
|
"loss": 0.3209, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.7841936111238237, |
|
"learning_rate": 2.575093324892364e-08, |
|
"loss": 0.3856, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.7816368074530717, |
|
"learning_rate": 2.42596564779668e-08, |
|
"loss": 0.4697, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.5805844941387572, |
|
"learning_rate": 2.281275340792477e-08, |
|
"loss": 0.368, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.637820628172062, |
|
"learning_rate": 2.141023694229305e-08, |
|
"loss": 0.3718, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.417147920836824, |
|
"learning_rate": 2.0052119588727105e-08, |
|
"loss": 0.3546, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.989312613003882, |
|
"learning_rate": 1.873841345893135e-08, |
|
"loss": 0.4529, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.537277887843072, |
|
"learning_rate": 1.746913026854924e-08, |
|
"loss": 0.3583, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.8191477288789326, |
|
"learning_rate": 1.6244281337060575e-08, |
|
"loss": 0.3171, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.827847238198048, |
|
"learning_rate": 1.506387758768102e-08, |
|
"loss": 0.3793, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.7778694252802711, |
|
"learning_rate": 1.3927929547261632e-08, |
|
"loss": 0.3753, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.543634711451779, |
|
"learning_rate": 1.2836447346198932e-08, |
|
"loss": 0.338, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 2.16029561176065, |
|
"learning_rate": 1.1789440718341094e-08, |
|
"loss": 0.3576, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 2.2006580842983734, |
|
"learning_rate": 1.0786919000903562e-08, |
|
"loss": 0.3864, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.7044121149329359, |
|
"learning_rate": 9.828891134385787e-09, |
|
"loss": 0.344, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.4977033409076637, |
|
"learning_rate": 8.915365662488517e-09, |
|
"loss": 0.3146, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.4691930868903902, |
|
"learning_rate": 8.046350732041075e-09, |
|
"loss": 0.3099, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.5513156194536561, |
|
"learning_rate": 7.221854092926972e-09, |
|
"loss": 0.341, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.703611443801933, |
|
"learning_rate": 6.441883098015633e-09, |
|
"loss": 0.3734, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 2.1506089060675455, |
|
"learning_rate": 5.706444703096337e-09, |
|
"loss": 0.3859, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.621345533853751, |
|
"learning_rate": 5.015545466814931e-09, |
|
"loss": 0.405, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.8186009796771267, |
|
"learning_rate": 4.369191550617768e-09, |
|
"loss": 0.3964, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.8157548418762393, |
|
"learning_rate": 3.7673887186961965e-09, |
|
"loss": 0.4728, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 2.070715007431859, |
|
"learning_rate": 3.210142337932709e-09, |
|
"loss": 0.464, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.6191854446760734, |
|
"learning_rate": 2.6974573778565383e-09, |
|
"loss": 0.3648, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.63053598233112, |
|
"learning_rate": 2.229338410597026e-09, |
|
"loss": 0.4364, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.5220833342814482, |
|
"learning_rate": 1.8057896108436557e-09, |
|
"loss": 0.4131, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.9238625872095938, |
|
"learning_rate": 1.4268147558088585e-09, |
|
"loss": 0.4702, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.6227133478707174, |
|
"learning_rate": 1.0924172251941533e-09, |
|
"loss": 0.3781, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.5844245279524536, |
|
"learning_rate": 8.026000011596147e-10, |
|
"loss": 0.367, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.8755295726944123, |
|
"learning_rate": 5.573656682977824e-10, |
|
"loss": 0.3954, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.4678705930709839, |
|
"eval_runtime": 533.0785, |
|
"eval_samples_per_second": 5.084, |
|
"eval_steps_per_second": 0.319, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.95867908444184, |
|
"learning_rate": 3.567164136120127e-10, |
|
"loss": 0.3425, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.9895722241127884, |
|
"learning_rate": 2.0065402649371845e-10, |
|
"loss": 0.3204, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.5770678955265505, |
|
"learning_rate": 8.917989870849131e-11, |
|
"loss": 0.3511, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.7159701303303903, |
|
"learning_rate": 2.2295024383889307e-11, |
|
"loss": 0.2967, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.9974547783910455, |
|
"learning_rate": 0.0, |
|
"loss": 0.3133, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1085, |
|
"total_flos": 2242775013654528.0, |
|
"train_loss": 0.4361060568951242, |
|
"train_runtime": 100289.4425, |
|
"train_samples_per_second": 0.346, |
|
"train_steps_per_second": 0.011 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 1085, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100.0, |
|
"total_flos": 2242775013654528.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|