|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9995792131285504, |
|
"eval_steps": 500, |
|
"global_step": 1188, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.001683147485798443, |
|
"grad_norm": 0.17560942471027374, |
|
"learning_rate": 0.0, |
|
"loss": 2.613, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.003366294971596886, |
|
"grad_norm": 0.15861666202545166, |
|
"learning_rate": 2.7894294565112984e-06, |
|
"loss": 2.6655, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.005049442457395329, |
|
"grad_norm": 0.1817302405834198, |
|
"learning_rate": 4.421141086977404e-06, |
|
"loss": 2.55, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.006732589943193772, |
|
"grad_norm": 0.17854492366313934, |
|
"learning_rate": 5.578858913022597e-06, |
|
"loss": 2.7908, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.008415737428992216, |
|
"grad_norm": 0.17169038951396942, |
|
"learning_rate": 6.47685462377997e-06, |
|
"loss": 2.6868, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.010098884914790659, |
|
"grad_norm": 0.18368647992610931, |
|
"learning_rate": 7.210570543488702e-06, |
|
"loss": 2.5874, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.011782032400589101, |
|
"grad_norm": 0.19648714363574982, |
|
"learning_rate": 7.830918514469461e-06, |
|
"loss": 2.6633, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.013465179886387544, |
|
"grad_norm": 0.18358571827411652, |
|
"learning_rate": 8.368288369533896e-06, |
|
"loss": 2.6355, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.015148327372185988, |
|
"grad_norm": 0.19153611361980438, |
|
"learning_rate": 8.842282173954808e-06, |
|
"loss": 2.6633, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.016831474857984433, |
|
"grad_norm": 0.20646820962429047, |
|
"learning_rate": 9.26628408029127e-06, |
|
"loss": 2.7268, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.018514622343782875, |
|
"grad_norm": 0.18688935041427612, |
|
"learning_rate": 9.64984045981344e-06, |
|
"loss": 2.7832, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.020197769829581318, |
|
"grad_norm": 0.1985747218132019, |
|
"learning_rate": 1e-05, |
|
"loss": 2.738, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.02188091731537976, |
|
"grad_norm": 0.19321100413799286, |
|
"learning_rate": 1e-05, |
|
"loss": 2.6206, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.023564064801178203, |
|
"grad_norm": 0.1875382661819458, |
|
"learning_rate": 1e-05, |
|
"loss": 2.7153, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.025247212286976645, |
|
"grad_norm": 0.18803201615810394, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5359, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.026930359772775088, |
|
"grad_norm": 0.19693922996520996, |
|
"learning_rate": 1e-05, |
|
"loss": 2.6082, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.028613507258573534, |
|
"grad_norm": 0.20534300804138184, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5317, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.030296654744371977, |
|
"grad_norm": 0.22174465656280518, |
|
"learning_rate": 1e-05, |
|
"loss": 2.6067, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.03197980223017042, |
|
"grad_norm": 0.1947612464427948, |
|
"learning_rate": 1e-05, |
|
"loss": 2.6824, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.033662949715968865, |
|
"grad_norm": 0.19715926051139832, |
|
"learning_rate": 1e-05, |
|
"loss": 2.6868, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.035346097201767304, |
|
"grad_norm": 0.19586338102817535, |
|
"learning_rate": 1e-05, |
|
"loss": 2.6206, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.03702924468756575, |
|
"grad_norm": 0.19280074536800385, |
|
"learning_rate": 1e-05, |
|
"loss": 2.6023, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.03871239217336419, |
|
"grad_norm": 0.19658198952674866, |
|
"learning_rate": 1e-05, |
|
"loss": 2.6384, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.040395539659162635, |
|
"grad_norm": 0.17433768510818481, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5305, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.042078687144961074, |
|
"grad_norm": 0.18013380467891693, |
|
"learning_rate": 1e-05, |
|
"loss": 2.6519, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.04376183463075952, |
|
"grad_norm": 0.1933555006980896, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5591, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.045444982116557966, |
|
"grad_norm": 0.18386027216911316, |
|
"learning_rate": 1e-05, |
|
"loss": 2.6169, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.047128129602356406, |
|
"grad_norm": 0.18173415958881378, |
|
"learning_rate": 1e-05, |
|
"loss": 2.623, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.04881127708815485, |
|
"grad_norm": 0.19154761731624603, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5981, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.05049442457395329, |
|
"grad_norm": 0.2001664638519287, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5066, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05217757205975174, |
|
"grad_norm": 0.15573543310165405, |
|
"learning_rate": 1e-05, |
|
"loss": 2.6013, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.053860719545550176, |
|
"grad_norm": 0.16071979701519012, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4634, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.05554386703134862, |
|
"grad_norm": 0.1769736260175705, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5491, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.05722701451714707, |
|
"grad_norm": 0.17623937129974365, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4399, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.05891016200294551, |
|
"grad_norm": 0.17367449402809143, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5464, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.06059330948874395, |
|
"grad_norm": 0.14842955768108368, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4174, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.06227645697454239, |
|
"grad_norm": 0.17405100166797638, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5303, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.06395960446034084, |
|
"grad_norm": 0.145203098654747, |
|
"learning_rate": 1e-05, |
|
"loss": 2.6428, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.06564275194613928, |
|
"grad_norm": 0.1542726755142212, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5618, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.06732589943193773, |
|
"grad_norm": 0.14489781856536865, |
|
"learning_rate": 1e-05, |
|
"loss": 2.6885, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06900904691773617, |
|
"grad_norm": 0.14798486232757568, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5322, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.07069219440353461, |
|
"grad_norm": 0.15226829051971436, |
|
"learning_rate": 1e-05, |
|
"loss": 2.6011, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.07237534188933305, |
|
"grad_norm": 0.14561522006988525, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5657, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.0740584893751315, |
|
"grad_norm": 0.13787826895713806, |
|
"learning_rate": 1e-05, |
|
"loss": 2.6011, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.07574163686092994, |
|
"grad_norm": 0.14005698263645172, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4673, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.07742478434672838, |
|
"grad_norm": 0.13822345435619354, |
|
"learning_rate": 1e-05, |
|
"loss": 2.512, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.07910793183252683, |
|
"grad_norm": 0.1284177154302597, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5625, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.08079107931832527, |
|
"grad_norm": 0.1279960423707962, |
|
"learning_rate": 1e-05, |
|
"loss": 2.46, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.08247422680412371, |
|
"grad_norm": 0.12479826807975769, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5706, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.08415737428992215, |
|
"grad_norm": 0.12982836365699768, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5098, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0858405217757206, |
|
"grad_norm": 0.13269256055355072, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4688, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.08752366926151904, |
|
"grad_norm": 0.11713477969169617, |
|
"learning_rate": 1e-05, |
|
"loss": 2.6226, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.08920681674731748, |
|
"grad_norm": 0.11179152131080627, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4224, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.09088996423311593, |
|
"grad_norm": 0.12146276980638504, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4639, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.09257311171891437, |
|
"grad_norm": 0.12470445781946182, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5195, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.09425625920471281, |
|
"grad_norm": 0.11872275173664093, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5186, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.09593940669051125, |
|
"grad_norm": 0.11616484075784683, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5581, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0976225541763097, |
|
"grad_norm": 0.1075875386595726, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5693, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.09930570166210814, |
|
"grad_norm": 0.10176095366477966, |
|
"learning_rate": 1e-05, |
|
"loss": 2.521, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.10098884914790658, |
|
"grad_norm": 0.1076890155673027, |
|
"learning_rate": 1e-05, |
|
"loss": 2.53, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.10267199663370503, |
|
"grad_norm": 0.09105601906776428, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3733, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.10435514411950347, |
|
"grad_norm": 0.09733142703771591, |
|
"learning_rate": 1e-05, |
|
"loss": 2.416, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.10603829160530191, |
|
"grad_norm": 0.09099874645471573, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3774, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.10772143909110035, |
|
"grad_norm": 0.0884426161646843, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4136, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.1094045865768988, |
|
"grad_norm": 0.08939989656209946, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4482, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.11108773406269724, |
|
"grad_norm": 0.09078355878591537, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5256, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.11277088154849568, |
|
"grad_norm": 0.08570227026939392, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4954, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.11445402903429414, |
|
"grad_norm": 0.0766797736287117, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3694, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.11613717652009257, |
|
"grad_norm": 0.08015618473291397, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4724, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.11782032400589101, |
|
"grad_norm": 0.08956343680620193, |
|
"learning_rate": 1e-05, |
|
"loss": 2.47, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.11950347149168945, |
|
"grad_norm": 0.08134786039590836, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4482, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.1211866189774879, |
|
"grad_norm": 0.07923366874456406, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4182, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.12286976646328635, |
|
"grad_norm": 0.07909434288740158, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3711, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.12455291394908478, |
|
"grad_norm": 0.07540368288755417, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3962, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.12623606143488322, |
|
"grad_norm": 0.06906846165657043, |
|
"learning_rate": 1e-05, |
|
"loss": 2.519, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.12791920892068168, |
|
"grad_norm": 0.07301697880029678, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5537, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.12960235640648013, |
|
"grad_norm": 0.07182423770427704, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4807, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.13128550389227855, |
|
"grad_norm": 0.06827539950609207, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5796, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.132968651378077, |
|
"grad_norm": 0.07280007749795914, |
|
"learning_rate": 1e-05, |
|
"loss": 2.499, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.13465179886387546, |
|
"grad_norm": 0.07410164177417755, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3418, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.13633494634967389, |
|
"grad_norm": 0.07245635986328125, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4685, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.13801809383547234, |
|
"grad_norm": 0.06992876529693604, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4634, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.13970124132127076, |
|
"grad_norm": 0.07322832196950912, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4949, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.14138438880706922, |
|
"grad_norm": 0.06528163701295853, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3982, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.14306753629286767, |
|
"grad_norm": 0.06972632557153702, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4268, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.1447506837786661, |
|
"grad_norm": 0.062493499368429184, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4309, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.14643383126446455, |
|
"grad_norm": 0.07086165249347687, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4373, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.148116978750263, |
|
"grad_norm": 0.06631726026535034, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4141, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.14980012623606143, |
|
"grad_norm": 0.07114582508802414, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3546, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.15148327372185988, |
|
"grad_norm": 0.06932078301906586, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4758, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.15316642120765833, |
|
"grad_norm": 0.06153389438986778, |
|
"learning_rate": 1e-05, |
|
"loss": 2.481, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.15484956869345676, |
|
"grad_norm": 0.06216192990541458, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4421, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.1565327161792552, |
|
"grad_norm": 0.06554314494132996, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3008, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.15821586366505366, |
|
"grad_norm": 0.06210967153310776, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2554, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.1598990111508521, |
|
"grad_norm": 0.06851295381784439, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5356, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.16158215863665054, |
|
"grad_norm": 0.06121644005179405, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4299, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.16326530612244897, |
|
"grad_norm": 0.06593657284975052, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3811, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.16494845360824742, |
|
"grad_norm": 0.06456276774406433, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3574, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.16663160109404587, |
|
"grad_norm": 0.061866894364356995, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4758, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.1683147485798443, |
|
"grad_norm": 0.058500371873378754, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4133, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.16999789606564275, |
|
"grad_norm": 0.06366603821516037, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3328, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.1716810435514412, |
|
"grad_norm": 0.061924271285533905, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4047, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.17336419103723963, |
|
"grad_norm": 0.057471342384815216, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4333, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.17504733852303808, |
|
"grad_norm": 0.05482906475663185, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3499, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.17673048600883653, |
|
"grad_norm": 0.056116051971912384, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4653, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.17841363349463496, |
|
"grad_norm": 0.052277661859989166, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4653, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.1800967809804334, |
|
"grad_norm": 0.06346592307090759, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3549, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.18177992846623187, |
|
"grad_norm": 0.06070290133357048, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2886, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.1834630759520303, |
|
"grad_norm": 0.055994004011154175, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4692, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.18514622343782874, |
|
"grad_norm": 0.05782800912857056, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3303, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.18682937092362717, |
|
"grad_norm": 0.05491410568356514, |
|
"learning_rate": 1e-05, |
|
"loss": 2.47, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.18851251840942562, |
|
"grad_norm": 0.060252465307712555, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5464, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.19019566589522408, |
|
"grad_norm": 0.05614893510937691, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3457, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.1918788133810225, |
|
"grad_norm": 0.051146939396858215, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3918, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.19356196086682095, |
|
"grad_norm": 0.05474052205681801, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3689, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.1952451083526194, |
|
"grad_norm": 0.052064936608076096, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5073, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.19692825583841783, |
|
"grad_norm": 0.06184034049510956, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4248, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.19861140332421628, |
|
"grad_norm": 0.05613533779978752, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5742, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.20029455081001474, |
|
"grad_norm": 0.05547456443309784, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3884, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.20197769829581316, |
|
"grad_norm": 0.05933033674955368, |
|
"learning_rate": 1e-05, |
|
"loss": 2.45, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.20366084578161162, |
|
"grad_norm": 0.058600571006536484, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3875, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.20534399326741007, |
|
"grad_norm": 0.0554657019674778, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3215, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.2070271407532085, |
|
"grad_norm": 0.05604475364089012, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3329, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.20871028823900695, |
|
"grad_norm": 0.06094202771782875, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4177, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.2103934357248054, |
|
"grad_norm": 0.05517999082803726, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3247, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.21207658321060383, |
|
"grad_norm": 0.05678452178835869, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3481, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.21375973069640228, |
|
"grad_norm": 0.05295870825648308, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3694, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.2154428781822007, |
|
"grad_norm": 0.05118125304579735, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4102, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.21712602566799916, |
|
"grad_norm": 0.05659961327910423, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3104, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.2188091731537976, |
|
"grad_norm": 0.05049075558781624, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4949, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.22049232063959603, |
|
"grad_norm": 0.05323097109794617, |
|
"learning_rate": 1e-05, |
|
"loss": 2.323, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.2221754681253945, |
|
"grad_norm": 0.05309610068798065, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5203, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.22385861561119294, |
|
"grad_norm": 0.05474167317152023, |
|
"learning_rate": 1e-05, |
|
"loss": 2.408, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.22554176309699137, |
|
"grad_norm": 0.056433092802762985, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3779, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.22722491058278982, |
|
"grad_norm": 0.047424182295799255, |
|
"learning_rate": 1e-05, |
|
"loss": 2.45, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.22890805806858827, |
|
"grad_norm": 0.05422671511769295, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3397, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.2305912055543867, |
|
"grad_norm": 0.05421329662203789, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3779, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.23227435304018515, |
|
"grad_norm": 0.057494040578603745, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4509, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.2339575005259836, |
|
"grad_norm": 0.0516960434615612, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3647, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.23564064801178203, |
|
"grad_norm": 0.049899645149707794, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4844, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.23732379549758048, |
|
"grad_norm": 0.05162065476179123, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3613, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.2390069429833789, |
|
"grad_norm": 0.05812832713127136, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4548, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.24069009046917736, |
|
"grad_norm": 0.04910556599497795, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3274, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.2423732379549758, |
|
"grad_norm": 0.05346587672829628, |
|
"learning_rate": 1e-05, |
|
"loss": 2.325, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.24405638544077424, |
|
"grad_norm": 0.0495002381503582, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4131, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.2457395329265727, |
|
"grad_norm": 0.05076875165104866, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3887, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.24742268041237114, |
|
"grad_norm": 0.050955574959516525, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4517, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.24910582789816957, |
|
"grad_norm": 0.05082906410098076, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3401, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.250788975383968, |
|
"grad_norm": 0.052096717059612274, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3218, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.25247212286976645, |
|
"grad_norm": 0.052378151565790176, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4246, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2541552703555649, |
|
"grad_norm": 0.04881056025624275, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3435, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.25583841784136335, |
|
"grad_norm": 0.05233067274093628, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4761, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.2575215653271618, |
|
"grad_norm": 0.05231297388672829, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4065, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.25920471281296026, |
|
"grad_norm": 0.04649129509925842, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4175, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.26088786029875866, |
|
"grad_norm": 0.05354660376906395, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4731, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.2625710077845571, |
|
"grad_norm": 0.05071151629090309, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4421, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.26425415527035556, |
|
"grad_norm": 0.04953297600150108, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3134, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.265937302756154, |
|
"grad_norm": 0.051142722368240356, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3335, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.26762045024195247, |
|
"grad_norm": 0.05187085270881653, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4387, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.2693035977277509, |
|
"grad_norm": 0.04968629032373428, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4905, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2709867452135493, |
|
"grad_norm": 0.053009629249572754, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4441, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.27266989269934777, |
|
"grad_norm": 0.04917874187231064, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4763, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.2743530401851462, |
|
"grad_norm": 0.048884451389312744, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4248, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.2760361876709447, |
|
"grad_norm": 0.049946676939725876, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5173, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.27771933515674313, |
|
"grad_norm": 0.052534863352775574, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4558, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.2794024826425415, |
|
"grad_norm": 0.05162844434380531, |
|
"learning_rate": 1e-05, |
|
"loss": 2.405, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.28108563012834, |
|
"grad_norm": 0.049985259771347046, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3542, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.28276877761413843, |
|
"grad_norm": 0.05239354074001312, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3721, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.2844519250999369, |
|
"grad_norm": 0.05592744052410126, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2701, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.28613507258573534, |
|
"grad_norm": 0.052739113569259644, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4216, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.2878182200715338, |
|
"grad_norm": 0.04806948080658913, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3884, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.2895013675573322, |
|
"grad_norm": 0.04990949481725693, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4419, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.29118451504313064, |
|
"grad_norm": 0.050067439675331116, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4331, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.2928676625289291, |
|
"grad_norm": 0.0507354810833931, |
|
"learning_rate": 1e-05, |
|
"loss": 2.406, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.29455081001472755, |
|
"grad_norm": 0.0538686104118824, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4182, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.296233957500526, |
|
"grad_norm": 0.05205219238996506, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3401, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.2979171049863244, |
|
"grad_norm": 0.04672086611390114, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3149, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.29960025247212285, |
|
"grad_norm": 0.051963068544864655, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2537, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.3012833999579213, |
|
"grad_norm": 0.053639005869627, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4353, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.30296654744371976, |
|
"grad_norm": 0.05326982960104942, |
|
"learning_rate": 1e-05, |
|
"loss": 2.334, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3046496949295182, |
|
"grad_norm": 0.05361334979534149, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4224, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.30633284241531666, |
|
"grad_norm": 0.05790587514638901, |
|
"learning_rate": 1e-05, |
|
"loss": 2.334, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.30801598990111506, |
|
"grad_norm": 0.04790763929486275, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5073, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.3096991373869135, |
|
"grad_norm": 0.054103124886751175, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3483, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.31138228487271197, |
|
"grad_norm": 0.05902162939310074, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3301, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.3130654323585104, |
|
"grad_norm": 0.04853544384241104, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5566, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.3147485798443089, |
|
"grad_norm": 0.055288165807724, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2903, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.3164317273301073, |
|
"grad_norm": 0.05180734023451805, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4285, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.3181148748159057, |
|
"grad_norm": 0.04889997839927673, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2542, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.3197980223017042, |
|
"grad_norm": 0.051011502742767334, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2893, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.32148116978750263, |
|
"grad_norm": 0.04864371567964554, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5225, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.3231643172733011, |
|
"grad_norm": 0.05374041944742203, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4504, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.32484746475909954, |
|
"grad_norm": 0.05158041790127754, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4683, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.32653061224489793, |
|
"grad_norm": 0.05630083382129669, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2415, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.3282137597306964, |
|
"grad_norm": 0.05439196154475212, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3684, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.32989690721649484, |
|
"grad_norm": 0.05023415759205818, |
|
"learning_rate": 1e-05, |
|
"loss": 2.415, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.3315800547022933, |
|
"grad_norm": 0.05531445890665054, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4626, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.33326320218809175, |
|
"grad_norm": 0.05087656155228615, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3936, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.3349463496738902, |
|
"grad_norm": 0.05231088399887085, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3779, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.3366294971596886, |
|
"grad_norm": 0.0514984093606472, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3967, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.33831264464548705, |
|
"grad_norm": 0.05334719642996788, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4604, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.3399957921312855, |
|
"grad_norm": 0.054843124002218246, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3538, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.34167893961708395, |
|
"grad_norm": 0.04888272285461426, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4844, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.3433620871028824, |
|
"grad_norm": 0.054122187197208405, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3291, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.34504523458868086, |
|
"grad_norm": 0.054561201483011246, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3218, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.34672838207447926, |
|
"grad_norm": 0.04919834062457085, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4478, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.3484115295602777, |
|
"grad_norm": 0.050551943480968475, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3755, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.35009467704607616, |
|
"grad_norm": 0.05242514982819557, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3922, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.3517778245318746, |
|
"grad_norm": 0.06077054515480995, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3218, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.35346097201767307, |
|
"grad_norm": 0.061367545276880264, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2957, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.35514411950347147, |
|
"grad_norm": 0.0511772483587265, |
|
"learning_rate": 1e-05, |
|
"loss": 2.374, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.3568272669892699, |
|
"grad_norm": 0.0496203638613224, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4182, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.3585104144750684, |
|
"grad_norm": 0.061339233070611954, |
|
"learning_rate": 1e-05, |
|
"loss": 2.406, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.3601935619608668, |
|
"grad_norm": 0.052460432052612305, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4309, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.3618767094466653, |
|
"grad_norm": 0.055436089634895325, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4141, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.36355985693246373, |
|
"grad_norm": 0.05396036058664322, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2705, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.36524300441826213, |
|
"grad_norm": 0.04853086173534393, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4473, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.3669261519040606, |
|
"grad_norm": 0.051015399396419525, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5115, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.36860929938985904, |
|
"grad_norm": 0.05526035279035568, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3123, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.3702924468756575, |
|
"grad_norm": 0.056169234216213226, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3447, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.37197559436145594, |
|
"grad_norm": 0.05238133668899536, |
|
"learning_rate": 1e-05, |
|
"loss": 2.26, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.37365874184725434, |
|
"grad_norm": 0.05587685480713844, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3083, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.3753418893330528, |
|
"grad_norm": 0.050364553928375244, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3459, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.37702503681885124, |
|
"grad_norm": 0.0506574809551239, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4246, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.3787081843046497, |
|
"grad_norm": 0.05842865630984306, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2617, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.38039133179044815, |
|
"grad_norm": 0.05097496882081032, |
|
"learning_rate": 1e-05, |
|
"loss": 2.52, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.3820744792762466, |
|
"grad_norm": 0.05665278434753418, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2715, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.383757626762045, |
|
"grad_norm": 0.053350359201431274, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3101, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.38544077424784345, |
|
"grad_norm": 0.05481604114174843, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3347, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.3871239217336419, |
|
"grad_norm": 0.06036606431007385, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2991, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.38880706921944036, |
|
"grad_norm": 0.0606355145573616, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4226, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.3904902167052388, |
|
"grad_norm": 0.052770137786865234, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4539, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.39217336419103727, |
|
"grad_norm": 0.050006203353405, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3477, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.39385651167683566, |
|
"grad_norm": 0.05640649050474167, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3123, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.3955396591626341, |
|
"grad_norm": 0.050969429314136505, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4534, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.39722280664843257, |
|
"grad_norm": 0.05676101893186569, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3481, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.398905954134231, |
|
"grad_norm": 0.05844707787036896, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3638, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.4005891016200295, |
|
"grad_norm": 0.053074926137924194, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3904, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.4022722491058279, |
|
"grad_norm": 0.04979414492845535, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3855, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.4039553965916263, |
|
"grad_norm": 0.05607665330171585, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3569, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4056385440774248, |
|
"grad_norm": 0.05964501202106476, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3459, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.40732169156322323, |
|
"grad_norm": 0.05849093198776245, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3213, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.4090048390490217, |
|
"grad_norm": 0.053846072405576706, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4436, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.41068798653482014, |
|
"grad_norm": 0.054448988288640976, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3716, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.41237113402061853, |
|
"grad_norm": 0.05229583755135536, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4099, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.414054281506417, |
|
"grad_norm": 0.05479966476559639, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4026, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.41573742899221544, |
|
"grad_norm": 0.061799049377441406, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4072, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.4174205764780139, |
|
"grad_norm": 0.061452727764844894, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2833, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.41910372396381235, |
|
"grad_norm": 0.05868072435259819, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3833, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.4207868714496108, |
|
"grad_norm": 0.05926290899515152, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3645, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4224700189354092, |
|
"grad_norm": 0.058858342468738556, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3152, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.42415316642120765, |
|
"grad_norm": 0.058599065989255905, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2827, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.4258363139070061, |
|
"grad_norm": 0.060381706804037094, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3024, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.42751946139280456, |
|
"grad_norm": 0.05441940575838089, |
|
"learning_rate": 1e-05, |
|
"loss": 2.446, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.429202608878603, |
|
"grad_norm": 0.05750846117734909, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3958, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.4308857563644014, |
|
"grad_norm": 0.060346368700265884, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2395, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.43256890385019986, |
|
"grad_norm": 0.056383710354566574, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3518, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.4342520513359983, |
|
"grad_norm": 0.057746805250644684, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2834, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.43593519882179677, |
|
"grad_norm": 0.051562029868364334, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3677, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.4376183463075952, |
|
"grad_norm": 0.059988316148519516, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3372, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.43930149379339367, |
|
"grad_norm": 0.05852155759930611, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3875, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.44098464127919207, |
|
"grad_norm": 0.06629418581724167, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4194, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.4426677887649905, |
|
"grad_norm": 0.061044465750455856, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2466, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.444350936250789, |
|
"grad_norm": 0.056285977363586426, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3105, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.44603408373658743, |
|
"grad_norm": 0.06135227158665657, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3853, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.4477172312223859, |
|
"grad_norm": 0.05644640699028969, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3888, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.4494003787081843, |
|
"grad_norm": 0.06326981633901596, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3132, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.45108352619398273, |
|
"grad_norm": 0.05710430070757866, |
|
"learning_rate": 1e-05, |
|
"loss": 2.365, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.4527666736797812, |
|
"grad_norm": 0.05607946217060089, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4648, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.45444982116557964, |
|
"grad_norm": 0.057825781404972076, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4189, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.4561329686513781, |
|
"grad_norm": 0.06380680948495865, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3188, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.45781611613717654, |
|
"grad_norm": 0.06377760320901871, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2896, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.45949926362297494, |
|
"grad_norm": 0.06210333853960037, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3663, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.4611824111087734, |
|
"grad_norm": 0.06039275974035263, |
|
"learning_rate": 1e-05, |
|
"loss": 2.408, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.46286555859457185, |
|
"grad_norm": 0.05442138388752937, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3843, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.4645487060803703, |
|
"grad_norm": 0.06208937615156174, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4355, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.46623185356616875, |
|
"grad_norm": 0.0619891993701458, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3196, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.4679150010519672, |
|
"grad_norm": 0.059192296117544174, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3237, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.4695981485377656, |
|
"grad_norm": 0.06284468621015549, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3694, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.47128129602356406, |
|
"grad_norm": 0.06121189519762993, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3606, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.4729644435093625, |
|
"grad_norm": 0.061919402331113815, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3381, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.47464759099516096, |
|
"grad_norm": 0.0676443800330162, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3624, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.4763307384809594, |
|
"grad_norm": 0.060140665620565414, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4541, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.4780138859667578, |
|
"grad_norm": 0.062285441905260086, |
|
"learning_rate": 1e-05, |
|
"loss": 2.323, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.47969703345255627, |
|
"grad_norm": 0.06063227355480194, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3596, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.4813801809383547, |
|
"grad_norm": 0.05906851589679718, |
|
"learning_rate": 1e-05, |
|
"loss": 2.458, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.48306332842415317, |
|
"grad_norm": 0.05862203240394592, |
|
"learning_rate": 1e-05, |
|
"loss": 2.291, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.4847464759099516, |
|
"grad_norm": 0.0629325732588768, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2634, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.4864296233957501, |
|
"grad_norm": 0.06464157998561859, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2531, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.4881127708815485, |
|
"grad_norm": 0.0547555610537529, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5339, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.4897959183673469, |
|
"grad_norm": 0.0606168657541275, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2886, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.4914790658531454, |
|
"grad_norm": 0.058814577758312225, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3337, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.49316221333894383, |
|
"grad_norm": 0.0691385492682457, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2904, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.4948453608247423, |
|
"grad_norm": 0.06522157788276672, |
|
"learning_rate": 1e-05, |
|
"loss": 2.469, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.4965285083105407, |
|
"grad_norm": 0.05957287177443504, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4095, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.49821165579633914, |
|
"grad_norm": 0.06277060508728027, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4697, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.4998948032821376, |
|
"grad_norm": 0.06802426278591156, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2517, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.501577950767936, |
|
"grad_norm": 0.06365792453289032, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2942, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.5032610982537344, |
|
"grad_norm": 0.06624794751405716, |
|
"learning_rate": 1e-05, |
|
"loss": 2.283, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.5049442457395329, |
|
"grad_norm": 0.05979595705866814, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4387, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5066273932253313, |
|
"grad_norm": 0.06187634915113449, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4205, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.5083105407111298, |
|
"grad_norm": 0.06389462947845459, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2775, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.5099936881969283, |
|
"grad_norm": 0.05831071361899376, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3892, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.5116768356827267, |
|
"grad_norm": 0.06568494439125061, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3087, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.5133599831685252, |
|
"grad_norm": 0.062109317630529404, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3268, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.5150431306543236, |
|
"grad_norm": 0.061168327927589417, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3093, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.5167262781401221, |
|
"grad_norm": 0.061159648001194, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3315, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.5184094256259205, |
|
"grad_norm": 0.06269169598817825, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3442, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.520092573111719, |
|
"grad_norm": 0.06711502373218536, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2008, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.5217757205975173, |
|
"grad_norm": 0.0663105845451355, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3502, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.5234588680833158, |
|
"grad_norm": 0.06040646880865097, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3414, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.5251420155691142, |
|
"grad_norm": 0.06823603063821793, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3392, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.5268251630549127, |
|
"grad_norm": 0.05944176763296127, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3193, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.5285083105407111, |
|
"grad_norm": 0.06610157340765, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2288, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.5301914580265096, |
|
"grad_norm": 0.06880299746990204, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3529, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.531874605512308, |
|
"grad_norm": 0.06061836704611778, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3533, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.5335577529981065, |
|
"grad_norm": 0.06552371382713318, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3579, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.5352409004839049, |
|
"grad_norm": 0.06967922300100327, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2983, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.5369240479697034, |
|
"grad_norm": 0.06997574120759964, |
|
"learning_rate": 1e-05, |
|
"loss": 2.355, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.5386071954555018, |
|
"grad_norm": 0.0654403418302536, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4258, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.5402903429413002, |
|
"grad_norm": 0.06031208485364914, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4011, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.5419734904270986, |
|
"grad_norm": 0.06496379524469376, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2429, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.5436566379128971, |
|
"grad_norm": 0.06525281816720963, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3254, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.5453397853986955, |
|
"grad_norm": 0.07553514093160629, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2953, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.547022932884494, |
|
"grad_norm": 0.06429509073495865, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3319, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.5487060803702924, |
|
"grad_norm": 0.0657946914434433, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3501, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.5503892278560909, |
|
"grad_norm": 0.06548567861318588, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2781, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.5520723753418894, |
|
"grad_norm": 0.06299672275781631, |
|
"learning_rate": 1e-05, |
|
"loss": 2.377, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.5537555228276878, |
|
"grad_norm": 0.06381850689649582, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3945, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.5554386703134863, |
|
"grad_norm": 0.06497140228748322, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3496, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5571218177992847, |
|
"grad_norm": 0.06588133424520493, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3955, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.558804965285083, |
|
"grad_norm": 0.06468643248081207, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2893, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.5604881127708815, |
|
"grad_norm": 0.07278285920619965, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3179, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.56217126025668, |
|
"grad_norm": 0.06992325931787491, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3588, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.5638544077424784, |
|
"grad_norm": 0.06566626578569412, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4763, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.5655375552282769, |
|
"grad_norm": 0.0633927658200264, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4685, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.5672207027140753, |
|
"grad_norm": 0.06903122365474701, |
|
"learning_rate": 1e-05, |
|
"loss": 2.311, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.5689038501998738, |
|
"grad_norm": 0.06421441584825516, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3589, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.5705869976856722, |
|
"grad_norm": 0.07122648507356644, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3798, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.5722701451714707, |
|
"grad_norm": 0.06518077105283737, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4546, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5739532926572691, |
|
"grad_norm": 0.07509720325469971, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3341, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.5756364401430676, |
|
"grad_norm": 0.06559302657842636, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3127, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.5773195876288659, |
|
"grad_norm": 0.06652245670557022, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3997, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.5790027351146644, |
|
"grad_norm": 0.07472145557403564, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3237, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.5806858826004628, |
|
"grad_norm": 0.07624109089374542, |
|
"learning_rate": 1e-05, |
|
"loss": 2.186, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.5823690300862613, |
|
"grad_norm": 0.06387084722518921, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2717, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.5840521775720597, |
|
"grad_norm": 0.06857839971780777, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3726, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.5857353250578582, |
|
"grad_norm": 0.06429892778396606, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4109, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.5874184725436566, |
|
"grad_norm": 0.0720372200012207, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3291, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.5891016200294551, |
|
"grad_norm": 0.0749678909778595, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3369, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.5907847675152536, |
|
"grad_norm": 0.0645705908536911, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3894, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.592467915001052, |
|
"grad_norm": 0.06680341064929962, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3335, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.5941510624868505, |
|
"grad_norm": 0.07383781671524048, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2733, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.5958342099726488, |
|
"grad_norm": 0.07338624447584152, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2236, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.5975173574584473, |
|
"grad_norm": 0.06998410820960999, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2552, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.5992005049442457, |
|
"grad_norm": 0.06697436422109604, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4231, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.6008836524300442, |
|
"grad_norm": 0.06693920493125916, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3296, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.6025667999158426, |
|
"grad_norm": 0.06306028366088867, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4009, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.6042499474016411, |
|
"grad_norm": 0.0724472776055336, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2986, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.6059330948874395, |
|
"grad_norm": 0.06711563467979431, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3755, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.607616242373238, |
|
"grad_norm": 0.07287666201591492, |
|
"learning_rate": 1e-05, |
|
"loss": 2.325, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.6092993898590364, |
|
"grad_norm": 0.07494334876537323, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2673, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.6109825373448349, |
|
"grad_norm": 0.07399529218673706, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3134, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.6126656848306333, |
|
"grad_norm": 0.06705833226442337, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3772, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.6143488323164318, |
|
"grad_norm": 0.07528689503669739, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3872, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.6160319798022301, |
|
"grad_norm": 0.06814612448215485, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2527, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.6177151272880286, |
|
"grad_norm": 0.06929857283830643, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4138, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.619398274773827, |
|
"grad_norm": 0.07336314767599106, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4197, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.6210814222596255, |
|
"grad_norm": 0.07009201496839523, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3943, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.6227645697454239, |
|
"grad_norm": 0.07367721945047379, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3044, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6244477172312224, |
|
"grad_norm": 0.07029354572296143, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3018, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.6261308647170208, |
|
"grad_norm": 0.07852700352668762, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3727, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.6278140122028193, |
|
"grad_norm": 0.0764508917927742, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1992, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.6294971596886177, |
|
"grad_norm": 0.0799420177936554, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2693, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.6311803071744162, |
|
"grad_norm": 0.06878554075956345, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4749, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.6328634546602147, |
|
"grad_norm": 0.07085944712162018, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3435, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.634546602146013, |
|
"grad_norm": 0.06489285826683044, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3257, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.6362297496318114, |
|
"grad_norm": 0.06664973497390747, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5022, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.6379128971176099, |
|
"grad_norm": 0.07660377770662308, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3269, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.6395960446034084, |
|
"grad_norm": 0.06934674084186554, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4021, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6412791920892068, |
|
"grad_norm": 0.07515530288219452, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3157, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.6429623395750053, |
|
"grad_norm": 0.07302498072385788, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3892, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.6446454870608037, |
|
"grad_norm": 0.07303425669670105, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3765, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.6463286345466022, |
|
"grad_norm": 0.07705460488796234, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2684, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.6480117820324006, |
|
"grad_norm": 0.07487067580223083, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3733, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.6496949295181991, |
|
"grad_norm": 0.06538619101047516, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3789, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.6513780770039975, |
|
"grad_norm": 0.07406684756278992, |
|
"learning_rate": 1e-05, |
|
"loss": 2.332, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.6530612244897959, |
|
"grad_norm": 0.07246539741754532, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2302, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.6547443719755943, |
|
"grad_norm": 0.07304323464632034, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3708, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.6564275194613928, |
|
"grad_norm": 0.07457181811332703, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2991, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.6581106669471912, |
|
"grad_norm": 0.07300930470228195, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2423, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.6597938144329897, |
|
"grad_norm": 0.07508236914873123, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2642, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.6614769619187881, |
|
"grad_norm": 0.07481173425912857, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.6631601094045866, |
|
"grad_norm": 0.06851742416620255, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4534, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.664843256890385, |
|
"grad_norm": 0.07536716759204865, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3264, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.6665264043761835, |
|
"grad_norm": 0.07752048969268799, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4158, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.6682095518619819, |
|
"grad_norm": 0.06357281655073166, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4956, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.6698926993477804, |
|
"grad_norm": 0.08333004266023636, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3921, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.6715758468335787, |
|
"grad_norm": 0.06873282790184021, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3611, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.6732589943193772, |
|
"grad_norm": 0.07533644139766693, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3708, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6749421418051756, |
|
"grad_norm": 0.07756076753139496, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3003, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.6766252892909741, |
|
"grad_norm": 0.06644177436828613, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4331, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.6783084367767725, |
|
"grad_norm": 0.07512148469686508, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2881, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.679991584262571, |
|
"grad_norm": 0.08939874172210693, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1564, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.6816747317483695, |
|
"grad_norm": 0.07984601706266403, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3967, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.6833578792341679, |
|
"grad_norm": 0.0724392980337143, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2859, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.6850410267199664, |
|
"grad_norm": 0.07025589793920517, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3027, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.6867241742057648, |
|
"grad_norm": 0.07863828539848328, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3286, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.6884073216915633, |
|
"grad_norm": 0.07466793060302734, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2849, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.6900904691773617, |
|
"grad_norm": 0.07291209697723389, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3931, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.6917736166631601, |
|
"grad_norm": 0.072298564016819, |
|
"learning_rate": 1e-05, |
|
"loss": 2.377, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.6934567641489585, |
|
"grad_norm": 0.06996294856071472, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3503, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.695139911634757, |
|
"grad_norm": 0.07319701462984085, |
|
"learning_rate": 1e-05, |
|
"loss": 2.345, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.6968230591205554, |
|
"grad_norm": 0.0768033117055893, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3679, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.6985062066063539, |
|
"grad_norm": 0.07401002943515778, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3435, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.7001893540921523, |
|
"grad_norm": 0.07700485736131668, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3428, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.7018725015779508, |
|
"grad_norm": 0.07446201890707016, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4133, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.7035556490637492, |
|
"grad_norm": 0.06801878660917282, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3665, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.7052387965495477, |
|
"grad_norm": 0.07989214360713959, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3303, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.7069219440353461, |
|
"grad_norm": 0.07385462522506714, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3608, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.7086050915211446, |
|
"grad_norm": 0.06808451563119888, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4851, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.7102882390069429, |
|
"grad_norm": 0.07354162633419037, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3005, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.7119713864927414, |
|
"grad_norm": 0.07730504870414734, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2815, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.7136545339785398, |
|
"grad_norm": 0.08045239001512527, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2695, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.7153376814643383, |
|
"grad_norm": 0.07997512817382812, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3608, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.7170208289501367, |
|
"grad_norm": 0.07076172530651093, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3411, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.7187039764359352, |
|
"grad_norm": 0.07223929464817047, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3452, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.7203871239217337, |
|
"grad_norm": 0.07667456567287445, |
|
"learning_rate": 1e-05, |
|
"loss": 2.333, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.7220702714075321, |
|
"grad_norm": 0.07509643584489822, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3701, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.7237534188933306, |
|
"grad_norm": 0.08230644464492798, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3577, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.725436566379129, |
|
"grad_norm": 0.06938886642456055, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4573, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.7271197138649275, |
|
"grad_norm": 0.07415178418159485, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2834, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.7288028613507258, |
|
"grad_norm": 0.0821278989315033, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2744, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.7304860088365243, |
|
"grad_norm": 0.07293502986431122, |
|
"learning_rate": 1e-05, |
|
"loss": 2.313, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.7321691563223227, |
|
"grad_norm": 0.07829819619655609, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3849, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.7338523038081212, |
|
"grad_norm": 0.07795297354459763, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2466, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.7355354512939196, |
|
"grad_norm": 0.06956803798675537, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4038, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.7372185987797181, |
|
"grad_norm": 0.07948347926139832, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3042, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.7389017462655165, |
|
"grad_norm": 0.08074218034744263, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3314, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.740584893751315, |
|
"grad_norm": 0.08029188960790634, |
|
"learning_rate": 1e-05, |
|
"loss": 2.312, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.7422680412371134, |
|
"grad_norm": 0.0783049538731575, |
|
"learning_rate": 1e-05, |
|
"loss": 2.307, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.7439511887229119, |
|
"grad_norm": 0.08203115314245224, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3081, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.7456343362087103, |
|
"grad_norm": 0.08666986972093582, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3721, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.7473174836945087, |
|
"grad_norm": 0.08097022771835327, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1912, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.7490006311803071, |
|
"grad_norm": 0.08272138237953186, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3562, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.7506837786661056, |
|
"grad_norm": 0.08114828914403915, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3569, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.752366926151904, |
|
"grad_norm": 0.07786712795495987, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3772, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.7540500736377025, |
|
"grad_norm": 0.07603191584348679, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2748, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.7557332211235009, |
|
"grad_norm": 0.08364319056272507, |
|
"learning_rate": 1e-05, |
|
"loss": 2.334, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.7574163686092994, |
|
"grad_norm": 0.07968125492334366, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3225, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7590995160950978, |
|
"grad_norm": 0.08204993605613708, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3107, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.7607826635808963, |
|
"grad_norm": 0.08319111168384552, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3994, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.7624658110666948, |
|
"grad_norm": 0.07812530547380447, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2771, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.7641489585524932, |
|
"grad_norm": 0.07962696999311447, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3094, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.7658321060382917, |
|
"grad_norm": 0.0815802663564682, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3169, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.76751525352409, |
|
"grad_norm": 0.08460783958435059, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2443, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.7691984010098885, |
|
"grad_norm": 0.07976390421390533, |
|
"learning_rate": 1e-05, |
|
"loss": 2.26, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.7708815484956869, |
|
"grad_norm": 0.08143635839223862, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2517, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.7725646959814854, |
|
"grad_norm": 0.08004558831453323, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3276, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.7742478434672838, |
|
"grad_norm": 0.0831751599907875, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2842, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.7759309909530823, |
|
"grad_norm": 0.07613930851221085, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3958, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.7776141384388807, |
|
"grad_norm": 0.08161590993404388, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3287, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.7792972859246792, |
|
"grad_norm": 0.08616164326667786, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3098, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.7809804334104776, |
|
"grad_norm": 0.08720822632312775, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1388, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.7826635808962761, |
|
"grad_norm": 0.08598899841308594, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3005, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.7843467283820745, |
|
"grad_norm": 0.07982167601585388, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3049, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.7860298758678729, |
|
"grad_norm": 0.08733374625444412, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2747, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.7877130233536713, |
|
"grad_norm": 0.08848235011100769, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4331, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.7893961708394698, |
|
"grad_norm": 0.08619164675474167, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2881, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.7910793183252682, |
|
"grad_norm": 0.08046075701713562, |
|
"learning_rate": 1e-05, |
|
"loss": 2.397, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.7927624658110667, |
|
"grad_norm": 0.08469874411821365, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3225, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.7944456132968651, |
|
"grad_norm": 0.08878640830516815, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2832, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.7961287607826636, |
|
"grad_norm": 0.08530005067586899, |
|
"learning_rate": 1e-05, |
|
"loss": 2.28, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.797811908268462, |
|
"grad_norm": 0.08089161664247513, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2822, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.7994950557542605, |
|
"grad_norm": 0.0770372822880745, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4031, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.801178203240059, |
|
"grad_norm": 0.08313820511102676, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4009, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.8028613507258574, |
|
"grad_norm": 0.08684401214122772, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4563, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.8045444982116557, |
|
"grad_norm": 0.08352997899055481, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3242, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.8062276456974542, |
|
"grad_norm": 0.08148252218961716, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3096, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.8079107931832527, |
|
"grad_norm": 0.08157838881015778, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3108, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.8095939406690511, |
|
"grad_norm": 0.08561182022094727, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2327, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.8112770881548496, |
|
"grad_norm": 0.09177689999341965, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2129, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.812960235640648, |
|
"grad_norm": 0.08262176811695099, |
|
"learning_rate": 1e-05, |
|
"loss": 2.397, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.8146433831264465, |
|
"grad_norm": 0.08541447669267654, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2419, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 0.08732729405164719, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3328, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.8180096780980434, |
|
"grad_norm": 0.08658833056688309, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2793, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.8196928255838418, |
|
"grad_norm": 0.0789208933711052, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4072, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.8213759730696403, |
|
"grad_norm": 0.07870952039957047, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4082, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.8230591205554386, |
|
"grad_norm": 0.07583601027727127, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3833, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.8247422680412371, |
|
"grad_norm": 0.08982661366462708, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2766, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.8264254155270355, |
|
"grad_norm": 0.08841705322265625, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2581, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.828108563012834, |
|
"grad_norm": 0.08784886449575424, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2352, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.8297917104986324, |
|
"grad_norm": 0.08765432238578796, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1957, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.8314748579844309, |
|
"grad_norm": 0.09070983529090881, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2451, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.8331580054702293, |
|
"grad_norm": 0.08307146281003952, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3645, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.8348411529560278, |
|
"grad_norm": 0.07774417847394943, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3921, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.8365243004418262, |
|
"grad_norm": 0.08441779762506485, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2974, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.8382074479276247, |
|
"grad_norm": 0.08773106336593628, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3984, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.8398905954134231, |
|
"grad_norm": 0.08157604187726974, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2946, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.8415737428992216, |
|
"grad_norm": 0.09280236810445786, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3628, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8432568903850199, |
|
"grad_norm": 0.08737549185752869, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2593, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.8449400378708184, |
|
"grad_norm": 0.08917705714702606, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2435, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.8466231853566168, |
|
"grad_norm": 0.08589258790016174, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2869, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.8483063328424153, |
|
"grad_norm": 0.08363740891218185, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1512, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.8499894803282138, |
|
"grad_norm": 0.09710842370986938, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3042, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.8516726278140122, |
|
"grad_norm": 0.09031599014997482, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2406, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.8533557752998107, |
|
"grad_norm": 0.08941849321126938, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2725, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.8550389227856091, |
|
"grad_norm": 0.08926845341920853, |
|
"learning_rate": 1e-05, |
|
"loss": 2.323, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.8567220702714076, |
|
"grad_norm": 0.08846578001976013, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3394, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.858405217757206, |
|
"grad_norm": 0.08452317863702774, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4158, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.8600883652430045, |
|
"grad_norm": 0.08531490713357925, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3113, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.8617715127288028, |
|
"grad_norm": 0.08221501857042313, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3826, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.8634546602146013, |
|
"grad_norm": 0.08809410035610199, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2666, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.8651378077003997, |
|
"grad_norm": 0.0881451964378357, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4678, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.8668209551861982, |
|
"grad_norm": 0.0958879366517067, |
|
"learning_rate": 1e-05, |
|
"loss": 2.17, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.8685041026719966, |
|
"grad_norm": 0.08498766273260117, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4021, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.8701872501577951, |
|
"grad_norm": 0.09182509779930115, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2476, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.8718703976435935, |
|
"grad_norm": 0.08831535279750824, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3013, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.873553545129392, |
|
"grad_norm": 0.08792266249656677, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2463, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.8752366926151904, |
|
"grad_norm": 0.0804978460073471, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5151, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.8769198401009889, |
|
"grad_norm": 0.09397967159748077, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2487, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.8786029875867873, |
|
"grad_norm": 0.08882005512714386, |
|
"learning_rate": 1e-05, |
|
"loss": 2.225, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.8802861350725857, |
|
"grad_norm": 0.08365931361913681, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4277, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.8819692825583841, |
|
"grad_norm": 0.08842651546001434, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3884, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.8836524300441826, |
|
"grad_norm": 0.08760154247283936, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2576, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.885335577529981, |
|
"grad_norm": 0.07843348383903503, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4143, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.8870187250157795, |
|
"grad_norm": 0.09312726557254791, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2472, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.888701872501578, |
|
"grad_norm": 0.09460542351007462, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2043, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.8903850199873764, |
|
"grad_norm": 0.09200920909643173, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3562, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.8920681674731749, |
|
"grad_norm": 0.08051000535488129, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4146, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.8937513149589733, |
|
"grad_norm": 0.09969057142734528, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3342, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.8954344624447718, |
|
"grad_norm": 0.08616895228624344, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3381, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.8971176099305702, |
|
"grad_norm": 0.09115055203437805, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2377, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.8988007574163686, |
|
"grad_norm": 0.10309138149023056, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1418, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.900483904902167, |
|
"grad_norm": 0.09327155351638794, |
|
"learning_rate": 1e-05, |
|
"loss": 2.312, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.9021670523879655, |
|
"grad_norm": 0.09104789048433304, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2759, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.9038501998737639, |
|
"grad_norm": 0.08858876675367355, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4138, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.9055333473595624, |
|
"grad_norm": 0.08850864320993423, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3915, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.9072164948453608, |
|
"grad_norm": 0.09071122854948044, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4199, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.9088996423311593, |
|
"grad_norm": 0.08702193200588226, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3079, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.9105827898169577, |
|
"grad_norm": 0.09564194083213806, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2996, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.9122659373027562, |
|
"grad_norm": 0.08906988054513931, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3958, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.9139490847885546, |
|
"grad_norm": 0.08117242157459259, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5557, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.9156322322743531, |
|
"grad_norm": 0.09870729595422745, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3542, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.9173153797601514, |
|
"grad_norm": 0.0906287208199501, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2866, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.9189985272459499, |
|
"grad_norm": 0.08649491518735886, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3547, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.9206816747317483, |
|
"grad_norm": 0.09572413563728333, |
|
"learning_rate": 1e-05, |
|
"loss": 2.377, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.9223648222175468, |
|
"grad_norm": 0.08862059563398361, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3452, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.9240479697033452, |
|
"grad_norm": 0.09061957150697708, |
|
"learning_rate": 1e-05, |
|
"loss": 2.264, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.9257311171891437, |
|
"grad_norm": 0.10327678918838501, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3362, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.9274142646749421, |
|
"grad_norm": 0.10101998597383499, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2091, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.9290974121607406, |
|
"grad_norm": 0.08099676668643951, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3779, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.930780559646539, |
|
"grad_norm": 0.09572342783212662, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2186, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.9324637071323375, |
|
"grad_norm": 0.10440348833799362, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2717, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.934146854618136, |
|
"grad_norm": 0.09859239310026169, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2964, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.9358300021039344, |
|
"grad_norm": 0.08539914339780807, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3541, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.9375131495897328, |
|
"grad_norm": 0.09667155891656876, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2412, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.9391962970755312, |
|
"grad_norm": 0.09381328523159027, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1632, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.9408794445613297, |
|
"grad_norm": 0.10293637216091156, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2969, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.9425625920471281, |
|
"grad_norm": 0.08901844918727875, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2806, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.9442457395329266, |
|
"grad_norm": 0.09931071847677231, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2671, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.945928887018725, |
|
"grad_norm": 0.08619210124015808, |
|
"learning_rate": 1e-05, |
|
"loss": 2.428, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.9476120345045235, |
|
"grad_norm": 0.08460855484008789, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2412, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.9492951819903219, |
|
"grad_norm": 0.09682973474264145, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3339, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.9509783294761204, |
|
"grad_norm": 0.10189709812402725, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2268, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.9526614769619188, |
|
"grad_norm": 0.10271991789340973, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1819, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.9543446244477173, |
|
"grad_norm": 0.0901963859796524, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3029, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.9560277719335156, |
|
"grad_norm": 0.09148905426263809, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3362, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.9577109194193141, |
|
"grad_norm": 0.10434332489967346, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3037, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.9593940669051125, |
|
"grad_norm": 0.0956675261259079, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3442, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.961077214390911, |
|
"grad_norm": 0.09394146502017975, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2913, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.9627603618767094, |
|
"grad_norm": 0.09179794043302536, |
|
"learning_rate": 1e-05, |
|
"loss": 2.21, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.9644435093625079, |
|
"grad_norm": 0.09866604208946228, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2721, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.9661266568483063, |
|
"grad_norm": 0.10069537162780762, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1637, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.9678098043341048, |
|
"grad_norm": 0.0923682376742363, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2343, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.9694929518199032, |
|
"grad_norm": 0.08836492151021957, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3794, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.9711760993057017, |
|
"grad_norm": 0.0894513726234436, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2378, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.9728592467915002, |
|
"grad_norm": 0.08647426962852478, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3589, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.9745423942772985, |
|
"grad_norm": 0.11035202443599701, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2371, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.976225541763097, |
|
"grad_norm": 0.09551876783370972, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3353, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.9779086892488954, |
|
"grad_norm": 0.0911082923412323, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3264, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.9795918367346939, |
|
"grad_norm": 0.10280529409646988, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2351, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.9812749842204923, |
|
"grad_norm": 0.09424940496683121, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3464, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.9829581317062908, |
|
"grad_norm": 0.092115618288517, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2799, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.9846412791920892, |
|
"grad_norm": 0.09771659225225449, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3777, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.9863244266778877, |
|
"grad_norm": 0.09877105802297592, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3613, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.9880075741636861, |
|
"grad_norm": 0.09816967695951462, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2925, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.9896907216494846, |
|
"grad_norm": 0.0874725803732872, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3154, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.991373869135283, |
|
"grad_norm": 0.09336823225021362, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3933, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.9930570166210814, |
|
"grad_norm": 0.10439187288284302, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3655, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.9947401641068798, |
|
"grad_norm": 0.09005751460790634, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2971, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.9964233115926783, |
|
"grad_norm": 0.10612068325281143, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3584, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.9981064590784767, |
|
"grad_norm": 0.09101177752017975, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4402, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.9997896065642752, |
|
"grad_norm": 0.09874800592660904, |
|
"learning_rate": 1e-05, |
|
"loss": 2.326, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.0014727540500736, |
|
"grad_norm": 0.1025647521018982, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4041, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.003155901535872, |
|
"grad_norm": 0.11109832674264908, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2881, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.0048390490216705, |
|
"grad_norm": 0.09670565277338028, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2003, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.0065221965074689, |
|
"grad_norm": 0.09513822942972183, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3225, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.0082053439932674, |
|
"grad_norm": 0.11121483892202377, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4143, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 1.0098884914790658, |
|
"grad_norm": 0.09941378980875015, |
|
"learning_rate": 1e-05, |
|
"loss": 2.333, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0115716389648644, |
|
"grad_norm": 0.09730757772922516, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3638, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.0132547864506627, |
|
"grad_norm": 0.10626422613859177, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2303, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.0149379339364613, |
|
"grad_norm": 0.0958971306681633, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3906, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.0166210814222596, |
|
"grad_norm": 0.10065159201622009, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3425, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.0183042289080582, |
|
"grad_norm": 0.08671624213457108, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2742, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.0199873763938565, |
|
"grad_norm": 0.09528376907110214, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3765, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.0216705238796548, |
|
"grad_norm": 0.09153752028942108, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2983, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.0233536713654534, |
|
"grad_norm": 0.10145740956068039, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1774, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.0250368188512518, |
|
"grad_norm": 0.09908965229988098, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3479, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.0267199663370503, |
|
"grad_norm": 0.09253786504268646, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3228, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.0284031138228487, |
|
"grad_norm": 0.094690702855587, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2864, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.0300862613086472, |
|
"grad_norm": 0.09160283952951431, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4285, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.0317694087944456, |
|
"grad_norm": 0.10157333314418793, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1316, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 1.0334525562802441, |
|
"grad_norm": 0.10498999804258347, |
|
"learning_rate": 1e-05, |
|
"loss": 2.373, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.0351357037660425, |
|
"grad_norm": 0.09599211066961288, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3511, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.036818851251841, |
|
"grad_norm": 0.1121436059474945, |
|
"learning_rate": 1e-05, |
|
"loss": 2.127, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.0385019987376394, |
|
"grad_norm": 0.10269173234701157, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2659, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 1.040185146223438, |
|
"grad_norm": 0.0945139229297638, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3281, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.0418682937092363, |
|
"grad_norm": 0.09318878501653671, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3247, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 1.0435514411950346, |
|
"grad_norm": 0.10471779108047485, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3098, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.0452345886808332, |
|
"grad_norm": 0.10514305531978607, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3647, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.0469177361666315, |
|
"grad_norm": 0.09875541925430298, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4204, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.04860088365243, |
|
"grad_norm": 0.10112539678812027, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3269, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 1.0502840311382284, |
|
"grad_norm": 0.09719318896532059, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3223, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.051967178624027, |
|
"grad_norm": 0.09615301340818405, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2798, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.0536503261098253, |
|
"grad_norm": 0.09600812941789627, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2738, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.055333473595624, |
|
"grad_norm": 0.09326303005218506, |
|
"learning_rate": 1e-05, |
|
"loss": 2.23, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.0570166210814222, |
|
"grad_norm": 0.09689430892467499, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2582, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.0586997685672208, |
|
"grad_norm": 0.10389314591884613, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3733, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.0603829160530192, |
|
"grad_norm": 0.09320785105228424, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3315, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.0620660635388175, |
|
"grad_norm": 0.10638166218996048, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4058, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.063749211024616, |
|
"grad_norm": 0.09525519609451294, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2803, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.0654323585104144, |
|
"grad_norm": 0.09904535114765167, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3613, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.067115505996213, |
|
"grad_norm": 0.10914106667041779, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3955, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.0687986534820113, |
|
"grad_norm": 0.10424593091011047, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2332, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.0704818009678099, |
|
"grad_norm": 0.10360780358314514, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3127, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.0721649484536082, |
|
"grad_norm": 0.11223631352186203, |
|
"learning_rate": 1e-05, |
|
"loss": 2.201, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.0738480959394068, |
|
"grad_norm": 0.09491337090730667, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3129, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.0755312434252051, |
|
"grad_norm": 0.09244826436042786, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3728, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.0772143909110037, |
|
"grad_norm": 0.0922231450676918, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3225, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.078897538396802, |
|
"grad_norm": 0.10818596929311752, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3104, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 1.0805806858826004, |
|
"grad_norm": 0.09497258812189102, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3176, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.082263833368399, |
|
"grad_norm": 0.10034379363059998, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3943, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 1.0839469808541973, |
|
"grad_norm": 0.10024038702249527, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3127, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.0856301283399958, |
|
"grad_norm": 0.10074039548635483, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2351, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.0873132758257942, |
|
"grad_norm": 0.09631813317537308, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3101, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.0889964233115927, |
|
"grad_norm": 0.10632781684398651, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3669, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.090679570797391, |
|
"grad_norm": 0.10795175284147263, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3064, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.0923627182831896, |
|
"grad_norm": 0.11120691895484924, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2911, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 1.094045865768988, |
|
"grad_norm": 0.10034749656915665, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3696, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.0957290132547866, |
|
"grad_norm": 0.10955310612916946, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3464, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.097412160740585, |
|
"grad_norm": 0.09739572554826736, |
|
"learning_rate": 1e-05, |
|
"loss": 2.325, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.0990953082263832, |
|
"grad_norm": 0.10152111947536469, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3745, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 1.1007784557121818, |
|
"grad_norm": 0.10103686153888702, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3303, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.1024616031979801, |
|
"grad_norm": 0.1003558412194252, |
|
"learning_rate": 1e-05, |
|
"loss": 2.312, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.1041447506837787, |
|
"grad_norm": 0.10518987476825714, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3444, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.105827898169577, |
|
"grad_norm": 0.09896016865968704, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2532, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.1075110456553756, |
|
"grad_norm": 0.09725090116262436, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3625, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.109194193141174, |
|
"grad_norm": 0.09022284299135208, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3743, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 1.1108773406269725, |
|
"grad_norm": 0.10471490770578384, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3416, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.1125604881127709, |
|
"grad_norm": 0.10991263389587402, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3214, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 1.1142436355985694, |
|
"grad_norm": 0.10231148451566696, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2832, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.1159267830843678, |
|
"grad_norm": 0.09433937072753906, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2645, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.117609930570166, |
|
"grad_norm": 0.13238666951656342, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2483, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.1192930780559647, |
|
"grad_norm": 0.10956214368343353, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2321, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.120976225541763, |
|
"grad_norm": 0.11065597832202911, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1869, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.1226593730275616, |
|
"grad_norm": 0.10971678793430328, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1855, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 1.12434252051336, |
|
"grad_norm": 0.11080143600702286, |
|
"learning_rate": 1e-05, |
|
"loss": 2.198, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.1260256679991585, |
|
"grad_norm": 0.10381001979112625, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3384, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.1277088154849568, |
|
"grad_norm": 0.1026921421289444, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2458, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.1293919629707554, |
|
"grad_norm": 0.10585295408964157, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1859, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.1310751104565537, |
|
"grad_norm": 0.10650487244129181, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2662, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.1327582579423523, |
|
"grad_norm": 0.10717649012804031, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3088, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 1.1344414054281506, |
|
"grad_norm": 0.10479724407196045, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3042, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.136124552913949, |
|
"grad_norm": 0.10629065334796906, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3481, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.1378077003997475, |
|
"grad_norm": 0.10375174880027771, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3845, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.1394908478855459, |
|
"grad_norm": 0.10122872143983841, |
|
"learning_rate": 1e-05, |
|
"loss": 2.335, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 1.1411739953713445, |
|
"grad_norm": 0.09846247732639313, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4028, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.1428571428571428, |
|
"grad_norm": 0.11501342058181763, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2419, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 1.1445402903429414, |
|
"grad_norm": 0.11248493194580078, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1294, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.1462234378287397, |
|
"grad_norm": 0.1141652762889862, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2842, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.1479065853145383, |
|
"grad_norm": 0.10232444107532501, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1798, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.1495897328003366, |
|
"grad_norm": 0.10624698549509048, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2474, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 1.1512728802861352, |
|
"grad_norm": 0.10583934187889099, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2917, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.1529560277719335, |
|
"grad_norm": 0.10667344182729721, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2581, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.1546391752577319, |
|
"grad_norm": 0.10415381193161011, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3325, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.1563223227435304, |
|
"grad_norm": 0.109574094414711, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3306, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.1580054702293288, |
|
"grad_norm": 0.10537154227495193, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3396, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.1596886177151273, |
|
"grad_norm": 0.10670781880617142, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2518, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 1.1613717652009257, |
|
"grad_norm": 0.10296822339296341, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3911, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.1630549126867242, |
|
"grad_norm": 0.10323610156774521, |
|
"learning_rate": 1e-05, |
|
"loss": 2.415, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 1.1647380601725226, |
|
"grad_norm": 0.09952528029680252, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3674, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 1.1664212076583211, |
|
"grad_norm": 0.10683920234441757, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1606, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 1.1681043551441195, |
|
"grad_norm": 0.10594907402992249, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3633, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 1.169787502629918, |
|
"grad_norm": 0.1164483055472374, |
|
"learning_rate": 1e-05, |
|
"loss": 2.272, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.1714706501157164, |
|
"grad_norm": 0.1053275316953659, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3361, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.1731537976015147, |
|
"grad_norm": 0.11722961068153381, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1008, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 1.1748369450873133, |
|
"grad_norm": 0.11388476192951202, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3129, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.1765200925731116, |
|
"grad_norm": 0.1149948239326477, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3503, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 1.1782032400589102, |
|
"grad_norm": 0.09305736422538757, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3811, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.1798863875447085, |
|
"grad_norm": 0.1027708575129509, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3262, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 1.181569535030507, |
|
"grad_norm": 0.1058826595544815, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2576, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.1832526825163054, |
|
"grad_norm": 0.1003696396946907, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2759, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 1.184935830002104, |
|
"grad_norm": 0.11113473027944565, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4163, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.1866189774879023, |
|
"grad_norm": 0.10945228487253189, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2725, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.188302124973701, |
|
"grad_norm": 0.1079326868057251, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3048, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 1.1899852724594993, |
|
"grad_norm": 0.10752802342176437, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2145, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 1.1916684199452976, |
|
"grad_norm": 0.10588284581899643, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3025, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 1.1933515674310962, |
|
"grad_norm": 0.1051083654165268, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3198, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 1.1950347149168945, |
|
"grad_norm": 0.11915988475084305, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2456, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.196717862402693, |
|
"grad_norm": 0.10947719216346741, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3479, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 1.1984010098884914, |
|
"grad_norm": 0.11522776633501053, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2898, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 1.20008415737429, |
|
"grad_norm": 0.10741020739078522, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3198, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 1.2017673048600883, |
|
"grad_norm": 0.10589215159416199, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2812, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 1.2034504523458869, |
|
"grad_norm": 0.10151232033967972, |
|
"learning_rate": 1e-05, |
|
"loss": 2.429, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.2051335998316852, |
|
"grad_norm": 0.11951622366905212, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1932, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 1.2068167473174838, |
|
"grad_norm": 0.11722715198993683, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2356, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 1.2084998948032821, |
|
"grad_norm": 0.11441315710544586, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2891, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 1.2101830422890805, |
|
"grad_norm": 0.10936987400054932, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2843, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 1.211866189774879, |
|
"grad_norm": 0.12374020367860794, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2944, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.2135493372606774, |
|
"grad_norm": 0.11024117469787598, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2595, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 1.215232484746476, |
|
"grad_norm": 0.09707245975732803, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3867, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 1.2169156322322743, |
|
"grad_norm": 0.11022404581308365, |
|
"learning_rate": 1e-05, |
|
"loss": 2.375, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 1.2185987797180728, |
|
"grad_norm": 0.10732002556324005, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3674, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 1.2202819272038712, |
|
"grad_norm": 0.11548677086830139, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3284, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.2219650746896698, |
|
"grad_norm": 0.10313412547111511, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4128, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 1.223648222175468, |
|
"grad_norm": 0.12717945873737335, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2847, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 1.2253313696612667, |
|
"grad_norm": 0.11565182358026505, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2695, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 1.227014517147065, |
|
"grad_norm": 0.10489466041326523, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3394, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 1.2286976646328633, |
|
"grad_norm": 0.11056289076805115, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4165, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.230380812118662, |
|
"grad_norm": 0.12048956751823425, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2289, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 1.2320639596044602, |
|
"grad_norm": 0.10263136774301529, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3306, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 1.2337471070902588, |
|
"grad_norm": 0.11179950088262558, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3481, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 1.2354302545760572, |
|
"grad_norm": 0.10484311729669571, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2703, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 1.2371134020618557, |
|
"grad_norm": 0.1182483434677124, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2328, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.238796549547654, |
|
"grad_norm": 0.11377429217100143, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3657, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 1.2404796970334526, |
|
"grad_norm": 0.11151503771543503, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3542, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 1.242162844519251, |
|
"grad_norm": 0.12628555297851562, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2634, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 1.2438459920050495, |
|
"grad_norm": 0.10311713814735413, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2717, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 1.2455291394908479, |
|
"grad_norm": 0.12768767774105072, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1725, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.2472122869766462, |
|
"grad_norm": 0.12390502542257309, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1708, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 1.2488954344624448, |
|
"grad_norm": 0.10566207021474838, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3469, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 1.2505785819482433, |
|
"grad_norm": 0.10176009684801102, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3159, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 1.2522617294340417, |
|
"grad_norm": 0.10881732404232025, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2966, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 1.25394487691984, |
|
"grad_norm": 0.11917608976364136, |
|
"learning_rate": 1e-05, |
|
"loss": 2.395, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.2556280244056386, |
|
"grad_norm": 0.09600858390331268, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3479, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 1.257311171891437, |
|
"grad_norm": 0.11550504714250565, |
|
"learning_rate": 1e-05, |
|
"loss": 2.301, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 1.2589943193772355, |
|
"grad_norm": 0.10588584840297699, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4163, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 1.2606774668630338, |
|
"grad_norm": 0.10998673737049103, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3379, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 1.2623606143488324, |
|
"grad_norm": 0.10513128340244293, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3795, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.2640437618346307, |
|
"grad_norm": 0.11185754835605621, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2583, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 1.265726909320429, |
|
"grad_norm": 0.10794227570295334, |
|
"learning_rate": 1e-05, |
|
"loss": 2.285, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 1.2674100568062276, |
|
"grad_norm": 0.12522459030151367, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2292, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 1.2690932042920262, |
|
"grad_norm": 0.11628364026546478, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3342, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 1.2707763517778246, |
|
"grad_norm": 0.12842795252799988, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1455, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.272459499263623, |
|
"grad_norm": 0.11268262565135956, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2241, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 1.2741426467494215, |
|
"grad_norm": 0.11674508452415466, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2677, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 1.2758257942352198, |
|
"grad_norm": 0.11475373059511185, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4075, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 1.2775089417210184, |
|
"grad_norm": 0.11378497630357742, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3032, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 1.2791920892068167, |
|
"grad_norm": 0.10426255315542221, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2488, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.2808752366926153, |
|
"grad_norm": 0.11820263415575027, |
|
"learning_rate": 1e-05, |
|
"loss": 2.197, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 1.2825583841784136, |
|
"grad_norm": 0.10741489380598068, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2811, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 1.284241531664212, |
|
"grad_norm": 0.115534208714962, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3105, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 1.2859246791500105, |
|
"grad_norm": 0.1159248948097229, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2963, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 1.287607826635809, |
|
"grad_norm": 0.11940732598304749, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3274, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.2892909741216074, |
|
"grad_norm": 0.11882008612155914, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2405, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 1.2909741216074058, |
|
"grad_norm": 0.10939499735832214, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3008, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 1.2926572690932043, |
|
"grad_norm": 0.11414020508527756, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3164, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.2943404165790027, |
|
"grad_norm": 0.11446741968393326, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2524, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 1.2960235640648012, |
|
"grad_norm": 0.12233757227659225, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3997, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.2977067115505996, |
|
"grad_norm": 0.11746780574321747, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2241, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 1.2993898590363981, |
|
"grad_norm": 0.12653754651546478, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2181, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 1.3010730065221965, |
|
"grad_norm": 0.11092430353164673, |
|
"learning_rate": 1e-05, |
|
"loss": 2.194, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 1.3027561540079948, |
|
"grad_norm": 0.11273445188999176, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2821, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 1.3044393014937934, |
|
"grad_norm": 0.10755831003189087, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3381, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.306122448979592, |
|
"grad_norm": 0.10324183851480484, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4531, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 1.3078055964653903, |
|
"grad_norm": 0.1238187626004219, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2378, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 1.3094887439511886, |
|
"grad_norm": 0.10919329524040222, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3157, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 1.3111718914369872, |
|
"grad_norm": 0.11661651730537415, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3889, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 1.3128550389227855, |
|
"grad_norm": 0.11324804276227951, |
|
"learning_rate": 1e-05, |
|
"loss": 2.366, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.314538186408584, |
|
"grad_norm": 0.11539211124181747, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2661, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 1.3162213338943825, |
|
"grad_norm": 0.12013803422451019, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2388, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 1.317904481380181, |
|
"grad_norm": 0.1297876238822937, |
|
"learning_rate": 1e-05, |
|
"loss": 2.338, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 1.3195876288659794, |
|
"grad_norm": 0.11792443692684174, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3162, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 1.3212707763517777, |
|
"grad_norm": 0.11543410271406174, |
|
"learning_rate": 1e-05, |
|
"loss": 2.325, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.3229539238375763, |
|
"grad_norm": 0.11507069319486618, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3389, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 1.3246370713233748, |
|
"grad_norm": 0.11883421987295151, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3784, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 1.3263202188091732, |
|
"grad_norm": 0.11997753381729126, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2183, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 1.3280033662949715, |
|
"grad_norm": 0.12312667816877365, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2661, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 1.32968651378077, |
|
"grad_norm": 0.1280994415283203, |
|
"learning_rate": 1e-05, |
|
"loss": 2.235, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.3313696612665684, |
|
"grad_norm": 0.12460897862911224, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2775, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 1.333052808752367, |
|
"grad_norm": 0.11441405862569809, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2642, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 1.3347359562381653, |
|
"grad_norm": 0.1078685000538826, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3174, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 1.3364191037239639, |
|
"grad_norm": 0.11945922672748566, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3101, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 1.3381022512097622, |
|
"grad_norm": 0.11506087332963943, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3167, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.3397853986955606, |
|
"grad_norm": 0.12365138530731201, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3044, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 1.3414685461813591, |
|
"grad_norm": 0.12331211566925049, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2058, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 1.3431516936671577, |
|
"grad_norm": 0.12298640608787537, |
|
"learning_rate": 1e-05, |
|
"loss": 2.21, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 1.344834841152956, |
|
"grad_norm": 0.12047012150287628, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2781, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 1.3465179886387544, |
|
"grad_norm": 0.12428031861782074, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3032, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.348201136124553, |
|
"grad_norm": 0.1128249540925026, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3135, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 1.3498842836103513, |
|
"grad_norm": 0.12616464495658875, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1487, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 1.3515674310961499, |
|
"grad_norm": 0.11388704925775528, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2346, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 1.3532505785819482, |
|
"grad_norm": 0.10213828831911087, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2859, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 1.3549337260677468, |
|
"grad_norm": 0.1226121038198471, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2183, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.356616873553545, |
|
"grad_norm": 0.11445735394954681, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3784, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 1.3583000210393434, |
|
"grad_norm": 0.11648505181074142, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3442, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 1.359983168525142, |
|
"grad_norm": 0.1296563744544983, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2469, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 1.3616663160109406, |
|
"grad_norm": 0.12322400510311127, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2915, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 1.363349463496739, |
|
"grad_norm": 0.11419309675693512, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3024, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.3650326109825373, |
|
"grad_norm": 0.12253374606370926, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2969, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 1.3667157584683358, |
|
"grad_norm": 0.1254422962665558, |
|
"learning_rate": 1e-05, |
|
"loss": 2.364, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 1.3683989059541342, |
|
"grad_norm": 0.12984994053840637, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2936, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 1.3700820534399327, |
|
"grad_norm": 0.1182006224989891, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2673, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 1.371765200925731, |
|
"grad_norm": 0.12920832633972168, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1582, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 1.3734483484115296, |
|
"grad_norm": 0.1216689869761467, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3479, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 1.375131495897328, |
|
"grad_norm": 0.12459319084882736, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1868, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 1.3768146433831263, |
|
"grad_norm": 0.11144936084747314, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3663, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 1.3784977908689249, |
|
"grad_norm": 0.1110294982790947, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3164, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 1.3801809383547234, |
|
"grad_norm": 0.11903022974729538, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2589, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.3818640858405218, |
|
"grad_norm": 0.10610275715589523, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4153, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 1.3835472333263201, |
|
"grad_norm": 0.11972808837890625, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3901, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 1.3852303808121187, |
|
"grad_norm": 0.10772975534200668, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3555, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 1.386913528297917, |
|
"grad_norm": 0.11757270246744156, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2677, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 1.3885966757837156, |
|
"grad_norm": 0.1217508539557457, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2267, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.390279823269514, |
|
"grad_norm": 0.10996967554092407, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3965, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 1.3919629707553125, |
|
"grad_norm": 0.13068005442619324, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1991, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 1.3936461182411108, |
|
"grad_norm": 0.12149260193109512, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2775, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 1.3953292657269092, |
|
"grad_norm": 0.1100870743393898, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2571, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 1.3970124132127077, |
|
"grad_norm": 0.10005280375480652, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2808, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.3986955606985063, |
|
"grad_norm": 0.11633820086717606, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3215, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 1.4003787081843047, |
|
"grad_norm": 0.11901983618736267, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4236, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 1.402061855670103, |
|
"grad_norm": 0.11173246055841446, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3457, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 1.4037450031559016, |
|
"grad_norm": 0.10333243012428284, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2659, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 1.4054281506417, |
|
"grad_norm": 0.13903972506523132, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1946, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.4071112981274985, |
|
"grad_norm": 0.11832322925329208, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3223, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 1.4087944456132968, |
|
"grad_norm": 0.10906493663787842, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4316, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 1.4104775930990954, |
|
"grad_norm": 0.10980133712291718, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3525, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 1.4121607405848937, |
|
"grad_norm": 0.12958386540412903, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3081, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 1.413843888070692, |
|
"grad_norm": 0.1342059075832367, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3564, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.4155270355564906, |
|
"grad_norm": 0.1362716406583786, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2435, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 1.4172101830422892, |
|
"grad_norm": 0.10814797878265381, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3373, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 1.4188933305280875, |
|
"grad_norm": 0.111182801425457, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2921, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 1.4205764780138859, |
|
"grad_norm": 0.11161399632692337, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3816, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 1.4222596254996844, |
|
"grad_norm": 0.1261526495218277, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4082, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 1.4239427729854828, |
|
"grad_norm": 0.10805182158946991, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3622, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 1.4256259204712813, |
|
"grad_norm": 0.12294517457485199, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3638, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 1.4273090679570797, |
|
"grad_norm": 0.10903607308864594, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3484, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 1.4289922154428782, |
|
"grad_norm": 0.12460491806268692, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2046, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 1.4306753629286766, |
|
"grad_norm": 0.13793089985847473, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2437, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.4323585104144752, |
|
"grad_norm": 0.11700379103422165, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2288, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 1.4340416579002735, |
|
"grad_norm": 0.11343109607696533, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2501, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 1.435724805386072, |
|
"grad_norm": 0.10918331891298294, |
|
"learning_rate": 1e-05, |
|
"loss": 2.47, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 1.4374079528718704, |
|
"grad_norm": 0.12782573699951172, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2281, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 1.4390911003576687, |
|
"grad_norm": 0.12039442360401154, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2766, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 1.4407742478434673, |
|
"grad_norm": 0.13949096202850342, |
|
"learning_rate": 1e-05, |
|
"loss": 2.198, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 1.4424573953292656, |
|
"grad_norm": 0.13327306509017944, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2253, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 1.4441405428150642, |
|
"grad_norm": 0.1229238212108612, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3147, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 1.4458236903008626, |
|
"grad_norm": 0.13407859206199646, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2532, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 1.4475068377866611, |
|
"grad_norm": 0.1280384659767151, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3174, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.4491899852724595, |
|
"grad_norm": 0.1532362997531891, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1671, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 1.450873132758258, |
|
"grad_norm": 0.1134854182600975, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3607, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 1.4525562802440564, |
|
"grad_norm": 0.11682198196649551, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4041, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 1.454239427729855, |
|
"grad_norm": 0.11356412619352341, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2756, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 1.4559225752156533, |
|
"grad_norm": 0.11278104037046432, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2983, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 1.4576057227014516, |
|
"grad_norm": 0.13442599773406982, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2593, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 1.4592888701872502, |
|
"grad_norm": 0.1254800707101822, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3213, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 1.4609720176730487, |
|
"grad_norm": 0.12374315410852432, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4221, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 1.462655165158847, |
|
"grad_norm": 0.13577024638652802, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2473, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 1.4643383126446454, |
|
"grad_norm": 0.12822799384593964, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3057, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.466021460130444, |
|
"grad_norm": 0.1283286213874817, |
|
"learning_rate": 1e-05, |
|
"loss": 2.374, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 1.4677046076162423, |
|
"grad_norm": 0.12054271996021271, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3369, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 1.469387755102041, |
|
"grad_norm": 0.127189502120018, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3167, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 1.4710709025878392, |
|
"grad_norm": 0.12767814099788666, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2695, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 1.4727540500736378, |
|
"grad_norm": 0.12026406079530716, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3313, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.4744371975594361, |
|
"grad_norm": 0.13317981362342834, |
|
"learning_rate": 1e-05, |
|
"loss": 2.209, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 1.4761203450452345, |
|
"grad_norm": 0.12904947996139526, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2344, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 1.477803492531033, |
|
"grad_norm": 0.13126946985721588, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2888, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 1.4794866400168316, |
|
"grad_norm": 0.128869891166687, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1996, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 1.48116978750263, |
|
"grad_norm": 0.1279861181974411, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1873, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.4828529349884283, |
|
"grad_norm": 0.11732237040996552, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3259, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 1.4845360824742269, |
|
"grad_norm": 0.1279248595237732, |
|
"learning_rate": 1e-05, |
|
"loss": 2.386, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 1.4862192299600252, |
|
"grad_norm": 0.13578535616397858, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2937, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 1.4879023774458238, |
|
"grad_norm": 0.13534606993198395, |
|
"learning_rate": 1e-05, |
|
"loss": 2.239, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 1.489585524931622, |
|
"grad_norm": 0.12359879165887833, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3572, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 1.4912686724174207, |
|
"grad_norm": 0.1236250028014183, |
|
"learning_rate": 1e-05, |
|
"loss": 2.188, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 1.492951819903219, |
|
"grad_norm": 0.12695659697055817, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2637, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 1.4946349673890174, |
|
"grad_norm": 0.1281343400478363, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2961, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 1.496318114874816, |
|
"grad_norm": 0.12446150928735733, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3362, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 1.4980012623606145, |
|
"grad_norm": 0.12564988434314728, |
|
"learning_rate": 1e-05, |
|
"loss": 2.288, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.4996844098464128, |
|
"grad_norm": 0.14049400389194489, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2867, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 1.5013675573322112, |
|
"grad_norm": 0.12252961844205856, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3511, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 1.5030507048180097, |
|
"grad_norm": 0.15993735194206238, |
|
"learning_rate": 1e-05, |
|
"loss": 2.0931, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 1.504733852303808, |
|
"grad_norm": 0.13673749566078186, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2998, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 1.5064169997896064, |
|
"grad_norm": 0.11770147830247879, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2883, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 1.508100147275405, |
|
"grad_norm": 0.11792504787445068, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1893, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 1.5097832947612035, |
|
"grad_norm": 0.1405222862958908, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2645, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 1.5114664422470019, |
|
"grad_norm": 0.1401311457157135, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2085, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 1.5131495897328002, |
|
"grad_norm": 0.14068666100502014, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2711, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 1.5148327372185988, |
|
"grad_norm": 0.12995976209640503, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2883, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.5165158847043974, |
|
"grad_norm": 0.12454178184270859, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2515, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 1.5181990321901957, |
|
"grad_norm": 0.12165191769599915, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3621, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 1.519882179675994, |
|
"grad_norm": 0.1413601189851761, |
|
"learning_rate": 1e-05, |
|
"loss": 2.27, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 1.5215653271617926, |
|
"grad_norm": 0.13545894622802734, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3008, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 1.523248474647591, |
|
"grad_norm": 0.12211872637271881, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3921, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 1.5249316221333893, |
|
"grad_norm": 0.13053253293037415, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2434, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 1.5266147696191879, |
|
"grad_norm": 0.12977124750614166, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2366, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 1.5282979171049864, |
|
"grad_norm": 0.13451719284057617, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3154, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 1.5299810645907848, |
|
"grad_norm": 0.11067184805870056, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3296, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 1.531664212076583, |
|
"grad_norm": 0.12281223386526108, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2479, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.5333473595623817, |
|
"grad_norm": 0.12240397185087204, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3416, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 1.5350305070481802, |
|
"grad_norm": 0.14465166628360748, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1801, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 1.5367136545339786, |
|
"grad_norm": 0.1263197958469391, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2583, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 1.538396802019777, |
|
"grad_norm": 0.14653970301151276, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2939, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 1.5400799495055755, |
|
"grad_norm": 0.1311267763376236, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2517, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 1.5417630969913738, |
|
"grad_norm": 0.13173674046993256, |
|
"learning_rate": 1e-05, |
|
"loss": 2.309, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 1.5434462444771722, |
|
"grad_norm": 0.13140322268009186, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1447, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 1.5451293919629707, |
|
"grad_norm": 0.12431302666664124, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3315, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 1.5468125394487693, |
|
"grad_norm": 0.14358630776405334, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2634, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 1.5484956869345676, |
|
"grad_norm": 0.1297353357076645, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2489, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.550178834420366, |
|
"grad_norm": 0.12963449954986572, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1533, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 1.5518619819061645, |
|
"grad_norm": 0.11558603495359421, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2688, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 1.553545129391963, |
|
"grad_norm": 0.14222054183483124, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2385, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 1.5552282768777614, |
|
"grad_norm": 0.1376868486404419, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2051, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 1.5569114243635598, |
|
"grad_norm": 0.12993879616260529, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3445, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.5585945718493583, |
|
"grad_norm": 0.14503213763237, |
|
"learning_rate": 1e-05, |
|
"loss": 2.215, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 1.560277719335157, |
|
"grad_norm": 0.1302722692489624, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1945, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 1.561960866820955, |
|
"grad_norm": 0.13545845448970795, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3059, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 1.5636440143067536, |
|
"grad_norm": 0.12279404699802399, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3511, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 1.5653271617925522, |
|
"grad_norm": 0.13220550119876862, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2837, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.5670103092783505, |
|
"grad_norm": 0.1407599151134491, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2905, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 1.5686934567641488, |
|
"grad_norm": 0.12597431242465973, |
|
"learning_rate": 1e-05, |
|
"loss": 2.366, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 1.5703766042499474, |
|
"grad_norm": 0.12998835742473602, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1067, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 1.572059751735746, |
|
"grad_norm": 0.14708921313285828, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2687, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 1.5737428992215443, |
|
"grad_norm": 0.13333402574062347, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3381, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 1.5754260467073427, |
|
"grad_norm": 0.14774633944034576, |
|
"learning_rate": 1e-05, |
|
"loss": 2.163, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 1.5771091941931412, |
|
"grad_norm": 0.1283462792634964, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3892, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 1.5787923416789398, |
|
"grad_norm": 0.12011823058128357, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2758, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 1.580475489164738, |
|
"grad_norm": 0.11618427187204361, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2545, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 1.5821586366505365, |
|
"grad_norm": 0.12683863937854767, |
|
"learning_rate": 1e-05, |
|
"loss": 2.291, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.583841784136335, |
|
"grad_norm": 0.13158243894577026, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3066, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 1.5855249316221334, |
|
"grad_norm": 0.13269281387329102, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3442, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 1.5872080791079317, |
|
"grad_norm": 0.14047692716121674, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3092, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 1.5888912265937303, |
|
"grad_norm": 0.1387140154838562, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1482, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 1.5905743740795288, |
|
"grad_norm": 0.13907848298549652, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3484, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 1.5922575215653272, |
|
"grad_norm": 0.13114407658576965, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2195, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 1.5939406690511255, |
|
"grad_norm": 0.1368924379348755, |
|
"learning_rate": 1e-05, |
|
"loss": 2.322, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 1.595623816536924, |
|
"grad_norm": 0.141913041472435, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2336, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 1.5973069640227227, |
|
"grad_norm": 0.13295848667621613, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3081, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 1.5989901115085208, |
|
"grad_norm": 0.12306110560894012, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3354, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.6006732589943193, |
|
"grad_norm": 0.12122649699449539, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2839, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 1.602356406480118, |
|
"grad_norm": 0.13046576082706451, |
|
"learning_rate": 1e-05, |
|
"loss": 2.385, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 1.6040395539659162, |
|
"grad_norm": 0.1272476315498352, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4153, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 1.6057227014517146, |
|
"grad_norm": 0.13073799014091492, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2854, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 1.6074058489375131, |
|
"grad_norm": 0.12583526968955994, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3318, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.6090889964233117, |
|
"grad_norm": 0.1474972665309906, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2542, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 1.61077214390911, |
|
"grad_norm": 0.13445797562599182, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3645, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 1.6124552913949084, |
|
"grad_norm": 0.13466110825538635, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3394, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 1.614138438880707, |
|
"grad_norm": 0.13525816798210144, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2471, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 1.6158215863665055, |
|
"grad_norm": 0.1377459019422531, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2478, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.6175047338523036, |
|
"grad_norm": 0.1405583918094635, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2146, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 1.6191878813381022, |
|
"grad_norm": 0.11743167042732239, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3555, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 1.6208710288239008, |
|
"grad_norm": 0.13644517958164215, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2155, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 1.6225541763096991, |
|
"grad_norm": 0.12609997391700745, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2593, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 1.6242373237954975, |
|
"grad_norm": 0.13276560604572296, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1737, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 1.625920471281296, |
|
"grad_norm": 0.13567714393138885, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3336, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 1.6276036187670946, |
|
"grad_norm": 0.12559200823307037, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3494, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 1.629286766252893, |
|
"grad_norm": 0.13090649247169495, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1851, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 1.6309699137386913, |
|
"grad_norm": 0.15777987241744995, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2205, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 1.6326530612244898, |
|
"grad_norm": 0.1433715522289276, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2295, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.6343362087102884, |
|
"grad_norm": 0.1218508929014206, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3762, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 1.6360193561960865, |
|
"grad_norm": 0.14540942013263702, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2139, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 1.637702503681885, |
|
"grad_norm": 0.14829136431217194, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2871, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 1.6393856511676836, |
|
"grad_norm": 0.12728969752788544, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2917, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 1.641068798653482, |
|
"grad_norm": 0.1471221148967743, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2012, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.6427519461392803, |
|
"grad_norm": 0.13320200145244598, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2771, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 1.644435093625079, |
|
"grad_norm": 0.1363966464996338, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3086, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 1.6461182411108775, |
|
"grad_norm": 0.13870568573474884, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2898, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 1.6478013885966758, |
|
"grad_norm": 0.15152350068092346, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2994, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 1.6494845360824741, |
|
"grad_norm": 0.13830937445163727, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2108, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.6511676835682727, |
|
"grad_norm": 0.15544220805168152, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4043, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 1.6528508310540713, |
|
"grad_norm": 0.13135483860969543, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2373, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 1.6545339785398696, |
|
"grad_norm": 0.12355194985866547, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4163, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 1.656217126025668, |
|
"grad_norm": 0.14110660552978516, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2031, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 1.6579002735114665, |
|
"grad_norm": 0.13077346980571747, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3601, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 1.6595834209972649, |
|
"grad_norm": 0.14212660491466522, |
|
"learning_rate": 1e-05, |
|
"loss": 2.197, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 1.6612665684830632, |
|
"grad_norm": 0.12336140871047974, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4146, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 1.6629497159688618, |
|
"grad_norm": 0.15291054546833038, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2764, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 1.6646328634546603, |
|
"grad_norm": 0.1272605061531067, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2703, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 1.6663160109404587, |
|
"grad_norm": 0.13462689518928528, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3188, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.667999158426257, |
|
"grad_norm": 0.13290910422801971, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2172, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 1.6696823059120556, |
|
"grad_norm": 0.15105758607387543, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2156, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 1.6713654533978541, |
|
"grad_norm": 0.13150456547737122, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3362, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 1.6730486008836525, |
|
"grad_norm": 0.13139204680919647, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3833, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 1.6747317483694508, |
|
"grad_norm": 0.14886420965194702, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1893, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 1.6764148958552494, |
|
"grad_norm": 0.13227102160453796, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4055, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 1.6780980433410477, |
|
"grad_norm": 0.12545333802700043, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3311, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 1.679781190826846, |
|
"grad_norm": 0.13391169905662537, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3022, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 1.6814643383126446, |
|
"grad_norm": 0.13013269007205963, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2318, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 1.6831474857984432, |
|
"grad_norm": 0.1331031173467636, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3022, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.6848306332842415, |
|
"grad_norm": 0.14438873529434204, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2388, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 1.6865137807700399, |
|
"grad_norm": 0.1422380954027176, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3145, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 1.6881969282558384, |
|
"grad_norm": 0.13909044861793518, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2249, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 1.689880075741637, |
|
"grad_norm": 0.14147858321666718, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3179, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 1.6915632232274354, |
|
"grad_norm": 0.13203288614749908, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1912, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 1.6932463707132337, |
|
"grad_norm": 0.14461839199066162, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1982, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 1.6949295181990323, |
|
"grad_norm": 0.14539021253585815, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2917, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 1.6966126656848306, |
|
"grad_norm": 0.14774973690509796, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2639, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 1.698295813170629, |
|
"grad_norm": 0.14927157759666443, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1956, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 1.6999789606564275, |
|
"grad_norm": 0.1286613643169403, |
|
"learning_rate": 1e-05, |
|
"loss": 2.292, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.701662108142226, |
|
"grad_norm": 0.12883049249649048, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2573, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 1.7033452556280244, |
|
"grad_norm": 0.14129754900932312, |
|
"learning_rate": 1e-05, |
|
"loss": 2.334, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 1.7050284031138228, |
|
"grad_norm": 0.13216479122638702, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2664, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 1.7067115505996213, |
|
"grad_norm": 0.12611788511276245, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3159, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 1.7083946980854199, |
|
"grad_norm": 0.14012207090854645, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4026, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 1.7100778455712182, |
|
"grad_norm": 0.14449255168437958, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3313, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 1.7117609930570166, |
|
"grad_norm": 0.15093393623828888, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2075, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 1.7134441405428151, |
|
"grad_norm": 0.15169350802898407, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1926, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 1.7151272880286135, |
|
"grad_norm": 0.13613849878311157, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3394, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 1.7168104355144118, |
|
"grad_norm": 0.13525283336639404, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2234, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.7184935830002104, |
|
"grad_norm": 0.1529736965894699, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1866, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 1.720176730486009, |
|
"grad_norm": 0.13723863661289215, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3027, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 1.7218598779718073, |
|
"grad_norm": 0.16251115500926971, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3428, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 1.7235430254576056, |
|
"grad_norm": 0.1440790742635727, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3298, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 1.7252261729434042, |
|
"grad_norm": 0.13486018776893616, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3826, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.7269093204292028, |
|
"grad_norm": 0.15616028010845184, |
|
"learning_rate": 1e-05, |
|
"loss": 2.0817, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 1.728592467915001, |
|
"grad_norm": 0.15306299924850464, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2601, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 1.7302756154007994, |
|
"grad_norm": 0.14421014487743378, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1998, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 1.731958762886598, |
|
"grad_norm": 0.14438478648662567, |
|
"learning_rate": 1e-05, |
|
"loss": 2.262, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 1.7336419103723963, |
|
"grad_norm": 0.13325351476669312, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2852, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.7353250578581947, |
|
"grad_norm": 0.14232920110225677, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3147, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 1.7370082053439933, |
|
"grad_norm": 0.1394515186548233, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2781, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 1.7386913528297918, |
|
"grad_norm": 0.12838682532310486, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2827, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 1.7403745003155902, |
|
"grad_norm": 0.15612417459487915, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3108, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 1.7420576478013885, |
|
"grad_norm": 0.14740139245986938, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2412, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 1.743740795287187, |
|
"grad_norm": 0.1541980355978012, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3156, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 1.7454239427729856, |
|
"grad_norm": 0.14056488871574402, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1829, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 1.747107090258784, |
|
"grad_norm": 0.143393874168396, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2717, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 1.7487902377445823, |
|
"grad_norm": 0.14296631515026093, |
|
"learning_rate": 1e-05, |
|
"loss": 2.342, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 1.7504733852303809, |
|
"grad_norm": 0.13753627240657806, |
|
"learning_rate": 1e-05, |
|
"loss": 2.324, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.7521565327161792, |
|
"grad_norm": 0.13361461460590363, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3549, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 1.7538396802019776, |
|
"grad_norm": 0.16176526248455048, |
|
"learning_rate": 1e-05, |
|
"loss": 2.0996, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 1.7555228276877761, |
|
"grad_norm": 0.14512574672698975, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3289, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 1.7572059751735747, |
|
"grad_norm": 0.14329467713832855, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2429, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 1.758889122659373, |
|
"grad_norm": 0.1415308713912964, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2976, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 1.7605722701451714, |
|
"grad_norm": 0.13017630577087402, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3142, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 1.76225541763097, |
|
"grad_norm": 0.14865103363990784, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2659, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 1.7639385651167685, |
|
"grad_norm": 0.13973674178123474, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1975, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 1.7656217126025668, |
|
"grad_norm": 0.12378077954053879, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4469, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 1.7673048600883652, |
|
"grad_norm": 0.13462629914283752, |
|
"learning_rate": 1e-05, |
|
"loss": 2.332, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.7689880075741637, |
|
"grad_norm": 0.14375431835651398, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2834, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 1.770671155059962, |
|
"grad_norm": 0.1413864940404892, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2769, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 1.7723543025457604, |
|
"grad_norm": 0.15052342414855957, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2522, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 1.774037450031559, |
|
"grad_norm": 0.15616975724697113, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1501, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 1.7757205975173576, |
|
"grad_norm": 0.16257071495056152, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1545, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 1.777403745003156, |
|
"grad_norm": 0.13512100279331207, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2218, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 1.7790868924889542, |
|
"grad_norm": 0.1581428200006485, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1865, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 1.7807700399747528, |
|
"grad_norm": 0.13829343020915985, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3337, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 1.7824531874605514, |
|
"grad_norm": 0.16639141738414764, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2325, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 1.7841363349463497, |
|
"grad_norm": 0.1412006914615631, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3389, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.785819482432148, |
|
"grad_norm": 0.13130658864974976, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3376, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 1.7875026299179466, |
|
"grad_norm": 0.1495353728532791, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2666, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 1.789185777403745, |
|
"grad_norm": 0.15077506005764008, |
|
"learning_rate": 1e-05, |
|
"loss": 2.228, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 1.7908689248895433, |
|
"grad_norm": 0.1426386535167694, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2727, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 1.7925520723753419, |
|
"grad_norm": 0.14268244802951813, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3643, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 1.7942352198611404, |
|
"grad_norm": 0.14923584461212158, |
|
"learning_rate": 1e-05, |
|
"loss": 2.333, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 1.7959183673469388, |
|
"grad_norm": 0.15571311116218567, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3171, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 1.7976015148327371, |
|
"grad_norm": 0.13931907713413239, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2164, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 1.7992846623185357, |
|
"grad_norm": 0.1513443887233734, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2885, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 1.8009678098043342, |
|
"grad_norm": 0.14123128354549408, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3517, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.8026509572901326, |
|
"grad_norm": 0.16668306291103363, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1907, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 1.804334104775931, |
|
"grad_norm": 0.14049063622951508, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4216, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 1.8060172522617295, |
|
"grad_norm": 0.13806495070457458, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3367, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 1.8077003997475278, |
|
"grad_norm": 0.14562048017978668, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2303, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 1.8093835472333262, |
|
"grad_norm": 0.16803675889968872, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2404, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.8110666947191247, |
|
"grad_norm": 0.14971864223480225, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1941, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 1.8127498422049233, |
|
"grad_norm": 0.162116140127182, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2034, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 1.8144329896907216, |
|
"grad_norm": 0.1417408138513565, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2991, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 1.81611613717652, |
|
"grad_norm": 0.14334024488925934, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3796, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 1.8177992846623185, |
|
"grad_norm": 0.13600003719329834, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2322, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.8194824321481171, |
|
"grad_norm": 0.1557435244321823, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2151, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 1.8211655796339155, |
|
"grad_norm": 0.14444471895694733, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2778, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 1.8228487271197138, |
|
"grad_norm": 0.15237338840961456, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1863, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 1.8245318746055124, |
|
"grad_norm": 0.1488647758960724, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1194, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 1.8262150220913107, |
|
"grad_norm": 0.14532509446144104, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3018, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 1.827898169577109, |
|
"grad_norm": 0.1438300609588623, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3542, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 1.8295813170629076, |
|
"grad_norm": 0.13162897527217865, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3762, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 1.8312644645487062, |
|
"grad_norm": 0.14388734102249146, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3097, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 1.8329476120345045, |
|
"grad_norm": 0.1633898764848709, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1975, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 1.8346307595203029, |
|
"grad_norm": 0.14513400197029114, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3562, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.8363139070061014, |
|
"grad_norm": 0.1562061607837677, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2384, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 1.8379970544919, |
|
"grad_norm": 0.14833082258701324, |
|
"learning_rate": 1e-05, |
|
"loss": 2.199, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 1.8396802019776983, |
|
"grad_norm": 0.14182843267917633, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2632, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 1.8413633494634967, |
|
"grad_norm": 0.16517210006713867, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2719, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 1.8430464969492952, |
|
"grad_norm": 0.1563366949558258, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2285, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 1.8447296444350936, |
|
"grad_norm": 0.1349581480026245, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2998, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 1.846412791920892, |
|
"grad_norm": 0.14647842943668365, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2588, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 1.8480959394066905, |
|
"grad_norm": 0.1527308076620102, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1945, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 1.849779086892489, |
|
"grad_norm": 0.16208425164222717, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1692, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 1.8514622343782874, |
|
"grad_norm": 0.15897248685359955, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3582, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.8531453818640857, |
|
"grad_norm": 0.14687612652778625, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3057, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 1.8548285293498843, |
|
"grad_norm": 0.1631488800048828, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2521, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 1.8565116768356829, |
|
"grad_norm": 0.14686156809329987, |
|
"learning_rate": 1e-05, |
|
"loss": 2.313, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 1.8581948243214812, |
|
"grad_norm": 0.162966787815094, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1968, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 1.8598779718072795, |
|
"grad_norm": 0.15387648344039917, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3059, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 1.861561119293078, |
|
"grad_norm": 0.1489906907081604, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2195, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 1.8632442667788764, |
|
"grad_norm": 0.14351260662078857, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2656, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 1.8649274142646748, |
|
"grad_norm": 0.16010256111621857, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3252, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 1.8666105617504734, |
|
"grad_norm": 0.14475148916244507, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2878, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 1.868293709236272, |
|
"grad_norm": 0.14097367227077484, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3716, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.8699768567220703, |
|
"grad_norm": 0.15699978172779083, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1678, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 1.8716600042078686, |
|
"grad_norm": 0.1370065063238144, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3315, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 1.8733431516936672, |
|
"grad_norm": 0.1498231291770935, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2949, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 1.8750262991794657, |
|
"grad_norm": 0.13267523050308228, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3535, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 1.876709446665264, |
|
"grad_norm": 0.1453379988670349, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2791, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 1.8783925941510624, |
|
"grad_norm": 0.15499484539031982, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2085, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 1.880075741636861, |
|
"grad_norm": 0.14418251812458038, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2793, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 1.8817588891226595, |
|
"grad_norm": 0.13686548173427582, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4175, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 1.8834420366084577, |
|
"grad_norm": 0.17202888429164886, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2196, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 1.8851251840942562, |
|
"grad_norm": 0.1437048763036728, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2688, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.8868083315800548, |
|
"grad_norm": 0.13868288695812225, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2971, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 1.8884914790658531, |
|
"grad_norm": 0.133874773979187, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3228, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 1.8901746265516515, |
|
"grad_norm": 0.15967018902301788, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2346, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 1.89185777403745, |
|
"grad_norm": 0.15074019134044647, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3577, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 1.8935409215232486, |
|
"grad_norm": 0.13931475579738617, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3789, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.895224069009047, |
|
"grad_norm": 0.15354882180690765, |
|
"learning_rate": 1e-05, |
|
"loss": 2.184, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 1.8969072164948453, |
|
"grad_norm": 0.15907764434814453, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3638, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 1.8985903639806438, |
|
"grad_norm": 0.13138049840927124, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4543, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 1.9002735114664424, |
|
"grad_norm": 0.14568856358528137, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3064, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 1.9019566589522405, |
|
"grad_norm": 0.1426182985305786, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3223, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.903639806438039, |
|
"grad_norm": 0.13313454389572144, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3953, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 1.9053229539238377, |
|
"grad_norm": 0.16987952589988708, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1274, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 1.907006101409636, |
|
"grad_norm": 0.1408863216638565, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3242, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 1.9086892488954343, |
|
"grad_norm": 0.14704225957393646, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3687, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 1.910372396381233, |
|
"grad_norm": 0.18410103023052216, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1222, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 1.9120555438670315, |
|
"grad_norm": 0.13889069855213165, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3165, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 1.9137386913528298, |
|
"grad_norm": 0.1532329022884369, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2913, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 1.9154218388386282, |
|
"grad_norm": 0.14806988835334778, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2239, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 1.9171049863244267, |
|
"grad_norm": 0.14964371919631958, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2639, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 1.9187881338102253, |
|
"grad_norm": 0.15137715637683868, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3096, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.9204712812960234, |
|
"grad_norm": 0.15892736613750458, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3163, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 1.922154428781822, |
|
"grad_norm": 0.15544387698173523, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1825, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 1.9238375762676205, |
|
"grad_norm": 0.14712852239608765, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2659, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 1.9255207237534189, |
|
"grad_norm": 0.1436305195093155, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3101, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 1.9272038712392172, |
|
"grad_norm": 0.16642406582832336, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2156, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 1.9288870187250158, |
|
"grad_norm": 0.16517338156700134, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2561, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 1.9305701662108143, |
|
"grad_norm": 0.1337500959634781, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3818, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 1.9322533136966127, |
|
"grad_norm": 0.15977586805820465, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2377, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 1.933936461182411, |
|
"grad_norm": 0.14951424300670624, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2269, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 1.9356196086682096, |
|
"grad_norm": 0.13450993597507477, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3442, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.9373027561540082, |
|
"grad_norm": 0.16469308733940125, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3123, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 1.9389859036398063, |
|
"grad_norm": 0.14135532081127167, |
|
"learning_rate": 1e-05, |
|
"loss": 2.387, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 1.9406690511256048, |
|
"grad_norm": 0.13864876329898834, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2661, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 1.9423521986114034, |
|
"grad_norm": 0.16291983425617218, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2617, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 1.9440353460972017, |
|
"grad_norm": 0.13341820240020752, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4299, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 1.945718493583, |
|
"grad_norm": 0.15701517462730408, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2211, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 1.9474016410687987, |
|
"grad_norm": 0.16075365245342255, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1801, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 1.9490847885545972, |
|
"grad_norm": 0.15631234645843506, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2152, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 1.9507679360403956, |
|
"grad_norm": 0.16927126049995422, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1776, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 1.952451083526194, |
|
"grad_norm": 0.15192179381847382, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2812, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.9541342310119925, |
|
"grad_norm": 0.145833820104599, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3124, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 1.955817378497791, |
|
"grad_norm": 0.16952313482761383, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1085, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 1.9575005259835891, |
|
"grad_norm": 0.1629469394683838, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2267, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 1.9591836734693877, |
|
"grad_norm": 0.16672489047050476, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3783, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 1.9608668209551863, |
|
"grad_norm": 0.14810308814048767, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3723, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 1.9625499684409846, |
|
"grad_norm": 0.1435479074716568, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2615, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 1.964233115926783, |
|
"grad_norm": 0.149140864610672, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2134, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 1.9659162634125815, |
|
"grad_norm": 0.17785809934139252, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1993, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 1.96759941089838, |
|
"grad_norm": 0.15931861102581024, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1807, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 1.9692825583841784, |
|
"grad_norm": 0.16015268862247467, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2737, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.9709657058699768, |
|
"grad_norm": 0.14189362525939941, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3416, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 1.9726488533557753, |
|
"grad_norm": 0.1655077338218689, |
|
"learning_rate": 1e-05, |
|
"loss": 2.184, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 1.974332000841574, |
|
"grad_norm": 0.17838408052921295, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2466, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 1.9760151483273722, |
|
"grad_norm": 0.16605247557163239, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2019, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 1.9776982958131706, |
|
"grad_norm": 0.15444627404212952, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2382, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.9793814432989691, |
|
"grad_norm": 0.15730591118335724, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3335, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 1.9810645907847675, |
|
"grad_norm": 0.17332051694393158, |
|
"learning_rate": 1e-05, |
|
"loss": 2.17, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 1.9827477382705658, |
|
"grad_norm": 0.15129022300243378, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2584, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 1.9844308857563644, |
|
"grad_norm": 0.16302135586738586, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1904, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 1.986114033242163, |
|
"grad_norm": 0.14117322862148285, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3611, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.9877971807279613, |
|
"grad_norm": 0.14415599405765533, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3503, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 1.9894803282137596, |
|
"grad_norm": 0.15894141793251038, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2253, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 1.9911634756995582, |
|
"grad_norm": 0.15063215792179108, |
|
"learning_rate": 1e-05, |
|
"loss": 2.303, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 1.9928466231853568, |
|
"grad_norm": 0.15843670070171356, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2959, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 1.9945297706711551, |
|
"grad_norm": 0.1457902193069458, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3396, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 1.9962129181569535, |
|
"grad_norm": 0.1694038361310959, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3169, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 1.997896065642752, |
|
"grad_norm": 0.16121593117713928, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2754, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 1.9995792131285504, |
|
"grad_norm": 0.16226674616336823, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2498, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 1.9995792131285504, |
|
"step": 1188, |
|
"total_flos": 2.494777289795961e+18, |
|
"train_loss": 2.3332799018834174, |
|
"train_runtime": 81586.2049, |
|
"train_samples_per_second": 0.932, |
|
"train_steps_per_second": 0.015 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 1188, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.494777289795961e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|