|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9982466393921683, |
|
"eval_steps": 500, |
|
"global_step": 427, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0023378141437755697, |
|
"grad_norm": 27.288526825189052, |
|
"learning_rate": 0.0, |
|
"loss": 2.2488, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004675628287551139, |
|
"grad_norm": 18.402597061645437, |
|
"learning_rate": 1.3511907721365987e-06, |
|
"loss": 1.7216, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0070134424313267095, |
|
"grad_norm": 20.827793127279097, |
|
"learning_rate": 2.1415867051569737e-06, |
|
"loss": 1.8096, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.009351256575102279, |
|
"grad_norm": 17.887138033549412, |
|
"learning_rate": 2.7023815442731975e-06, |
|
"loss": 1.7273, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.011689070718877849, |
|
"grad_norm": 13.059331628307975, |
|
"learning_rate": 3.137367815376517e-06, |
|
"loss": 1.3944, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.014026884862653419, |
|
"grad_norm": 12.915220380638836, |
|
"learning_rate": 3.4927774772935725e-06, |
|
"loss": 1.3309, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01636469900642899, |
|
"grad_norm": 9.438170554483921, |
|
"learning_rate": 3.7932720647964956e-06, |
|
"loss": 1.1377, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.018702513150204558, |
|
"grad_norm": 22.34498398581149, |
|
"learning_rate": 4.053572316409796e-06, |
|
"loss": 1.276, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02104032729398013, |
|
"grad_norm": 11.688684740044469, |
|
"learning_rate": 4.2831734103139475e-06, |
|
"loss": 1.1626, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.023378141437755698, |
|
"grad_norm": 7.399035066119634, |
|
"learning_rate": 4.488558587513117e-06, |
|
"loss": 1.0172, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02571595558153127, |
|
"grad_norm": 6.844826809549996, |
|
"learning_rate": 4.674352079940294e-06, |
|
"loss": 1.0159, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.028053769725306838, |
|
"grad_norm": 7.435042287761434, |
|
"learning_rate": 4.843968249430172e-06, |
|
"loss": 1.0191, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.030391583869082407, |
|
"grad_norm": 8.142142185574825, |
|
"learning_rate": 5e-06, |
|
"loss": 1.0605, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03272939801285798, |
|
"grad_norm": 6.515223941306322, |
|
"learning_rate": 5e-06, |
|
"loss": 0.9758, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03506721215663355, |
|
"grad_norm": 6.0290403123052965, |
|
"learning_rate": 4.98792270531401e-06, |
|
"loss": 0.995, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.037405026300409115, |
|
"grad_norm": 5.319952909218166, |
|
"learning_rate": 4.97584541062802e-06, |
|
"loss": 0.9515, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.03974284044418469, |
|
"grad_norm": 5.748688589731486, |
|
"learning_rate": 4.963768115942029e-06, |
|
"loss": 1.0102, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04208065458796026, |
|
"grad_norm": 5.855208805312462, |
|
"learning_rate": 4.951690821256039e-06, |
|
"loss": 0.9919, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04441846873173583, |
|
"grad_norm": 5.223174635069425, |
|
"learning_rate": 4.939613526570048e-06, |
|
"loss": 0.9656, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.046756282875511396, |
|
"grad_norm": 6.065299105647371, |
|
"learning_rate": 4.927536231884059e-06, |
|
"loss": 0.9285, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.049094097019286964, |
|
"grad_norm": 6.07241545277926, |
|
"learning_rate": 4.915458937198068e-06, |
|
"loss": 1.0017, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.05143191116306254, |
|
"grad_norm": 5.246067572533348, |
|
"learning_rate": 4.903381642512078e-06, |
|
"loss": 1.0095, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.05376972530683811, |
|
"grad_norm": 5.990327031501364, |
|
"learning_rate": 4.891304347826087e-06, |
|
"loss": 0.9395, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.056107539450613676, |
|
"grad_norm": 5.165210682799403, |
|
"learning_rate": 4.879227053140097e-06, |
|
"loss": 0.9407, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.058445353594389245, |
|
"grad_norm": 5.126974516324422, |
|
"learning_rate": 4.867149758454107e-06, |
|
"loss": 0.949, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06078316773816481, |
|
"grad_norm": 5.363449994215859, |
|
"learning_rate": 4.855072463768117e-06, |
|
"loss": 0.9574, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.06312098188194039, |
|
"grad_norm": 5.071572733466881, |
|
"learning_rate": 4.8429951690821256e-06, |
|
"loss": 0.9008, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.06545879602571596, |
|
"grad_norm": 5.410969697138812, |
|
"learning_rate": 4.830917874396135e-06, |
|
"loss": 0.9422, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.06779661016949153, |
|
"grad_norm": 4.602908185422313, |
|
"learning_rate": 4.818840579710145e-06, |
|
"loss": 0.8725, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0701344243132671, |
|
"grad_norm": 4.8619656541333836, |
|
"learning_rate": 4.806763285024155e-06, |
|
"loss": 0.9414, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07247223845704266, |
|
"grad_norm": 5.730566605120532, |
|
"learning_rate": 4.794685990338165e-06, |
|
"loss": 0.9039, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.07481005260081823, |
|
"grad_norm": 5.038586687201418, |
|
"learning_rate": 4.782608695652174e-06, |
|
"loss": 0.9208, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0771478667445938, |
|
"grad_norm": 5.0552868730926335, |
|
"learning_rate": 4.770531400966184e-06, |
|
"loss": 0.8567, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.07948568088836938, |
|
"grad_norm": 4.718130108871858, |
|
"learning_rate": 4.758454106280194e-06, |
|
"loss": 0.9145, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.08182349503214495, |
|
"grad_norm": 5.492137838168964, |
|
"learning_rate": 4.746376811594204e-06, |
|
"loss": 0.8846, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08416130917592052, |
|
"grad_norm": 4.796280317690393, |
|
"learning_rate": 4.7342995169082125e-06, |
|
"loss": 0.8973, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.08649912331969609, |
|
"grad_norm": 5.097877561946411, |
|
"learning_rate": 4.722222222222222e-06, |
|
"loss": 0.9225, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.08883693746347165, |
|
"grad_norm": 5.149693059570453, |
|
"learning_rate": 4.710144927536232e-06, |
|
"loss": 0.9087, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.09117475160724722, |
|
"grad_norm": 4.769756789814799, |
|
"learning_rate": 4.698067632850242e-06, |
|
"loss": 0.8372, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.09351256575102279, |
|
"grad_norm": 4.303036243240873, |
|
"learning_rate": 4.6859903381642516e-06, |
|
"loss": 0.899, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09585037989479836, |
|
"grad_norm": 5.053977102743315, |
|
"learning_rate": 4.673913043478261e-06, |
|
"loss": 0.8242, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.09818819403857393, |
|
"grad_norm": 4.448509206619331, |
|
"learning_rate": 4.661835748792271e-06, |
|
"loss": 0.8513, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.1005260081823495, |
|
"grad_norm": 4.8321447335981595, |
|
"learning_rate": 4.649758454106281e-06, |
|
"loss": 0.8224, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.10286382232612508, |
|
"grad_norm": 5.416510574830531, |
|
"learning_rate": 4.637681159420291e-06, |
|
"loss": 0.9078, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.10520163646990065, |
|
"grad_norm": 5.548877279459332, |
|
"learning_rate": 4.6256038647342995e-06, |
|
"loss": 0.9292, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10753945061367622, |
|
"grad_norm": 5.023304416916682, |
|
"learning_rate": 4.613526570048309e-06, |
|
"loss": 0.8678, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.10987726475745178, |
|
"grad_norm": 5.3492127097713995, |
|
"learning_rate": 4.601449275362319e-06, |
|
"loss": 0.8999, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.11221507890122735, |
|
"grad_norm": 4.59060914495858, |
|
"learning_rate": 4.589371980676329e-06, |
|
"loss": 0.8611, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.11455289304500292, |
|
"grad_norm": 4.659978410728117, |
|
"learning_rate": 4.5772946859903385e-06, |
|
"loss": 0.8553, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.11689070718877849, |
|
"grad_norm": 4.869606947497931, |
|
"learning_rate": 4.565217391304348e-06, |
|
"loss": 0.8353, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11922852133255406, |
|
"grad_norm": 4.595092369703616, |
|
"learning_rate": 4.553140096618358e-06, |
|
"loss": 0.8852, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.12156633547632963, |
|
"grad_norm": 4.846379704504629, |
|
"learning_rate": 4.541062801932368e-06, |
|
"loss": 0.8516, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.12390414962010521, |
|
"grad_norm": 4.762019560202168, |
|
"learning_rate": 4.5289855072463775e-06, |
|
"loss": 0.8621, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.12624196376388078, |
|
"grad_norm": 4.823419642191392, |
|
"learning_rate": 4.516908212560387e-06, |
|
"loss": 0.849, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.12857977790765635, |
|
"grad_norm": 4.678982332878243, |
|
"learning_rate": 4.504830917874396e-06, |
|
"loss": 0.8595, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1309175920514319, |
|
"grad_norm": 4.695208099270892, |
|
"learning_rate": 4.492753623188406e-06, |
|
"loss": 0.918, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.13325540619520748, |
|
"grad_norm": 4.692012801651267, |
|
"learning_rate": 4.480676328502416e-06, |
|
"loss": 0.8879, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.13559322033898305, |
|
"grad_norm": 4.459981999724462, |
|
"learning_rate": 4.4685990338164255e-06, |
|
"loss": 0.8878, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.13793103448275862, |
|
"grad_norm": 4.2801494436823, |
|
"learning_rate": 4.456521739130435e-06, |
|
"loss": 0.8695, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.1402688486265342, |
|
"grad_norm": 4.86123317504702, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.839, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14260666277030976, |
|
"grad_norm": 4.621713656381368, |
|
"learning_rate": 4.432367149758455e-06, |
|
"loss": 0.8264, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.14494447691408532, |
|
"grad_norm": 4.437318825045428, |
|
"learning_rate": 4.4202898550724645e-06, |
|
"loss": 0.8575, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.1472822910578609, |
|
"grad_norm": 4.191896550350781, |
|
"learning_rate": 4.408212560386474e-06, |
|
"loss": 0.8231, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.14962010520163646, |
|
"grad_norm": 4.934485372283743, |
|
"learning_rate": 4.396135265700483e-06, |
|
"loss": 0.908, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.15195791934541203, |
|
"grad_norm": 5.164473939972992, |
|
"learning_rate": 4.384057971014493e-06, |
|
"loss": 0.8157, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.1542957334891876, |
|
"grad_norm": 4.6359554134854655, |
|
"learning_rate": 4.371980676328503e-06, |
|
"loss": 0.8553, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.15663354763296317, |
|
"grad_norm": 4.586287061115779, |
|
"learning_rate": 4.3599033816425124e-06, |
|
"loss": 0.8434, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.15897136177673876, |
|
"grad_norm": 4.8424129486531, |
|
"learning_rate": 4.347826086956522e-06, |
|
"loss": 0.8788, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.16130917592051433, |
|
"grad_norm": 6.155570830239365, |
|
"learning_rate": 4.335748792270532e-06, |
|
"loss": 0.9687, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.1636469900642899, |
|
"grad_norm": 4.812494847678857, |
|
"learning_rate": 4.323671497584541e-06, |
|
"loss": 0.9001, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16598480420806547, |
|
"grad_norm": 4.5207315366098255, |
|
"learning_rate": 4.3115942028985515e-06, |
|
"loss": 0.8164, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.16832261835184104, |
|
"grad_norm": 4.468118689699742, |
|
"learning_rate": 4.299516908212561e-06, |
|
"loss": 0.828, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.1706604324956166, |
|
"grad_norm": 4.957803820804726, |
|
"learning_rate": 4.28743961352657e-06, |
|
"loss": 0.8509, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.17299824663939217, |
|
"grad_norm": 4.994668979616406, |
|
"learning_rate": 4.27536231884058e-06, |
|
"loss": 0.8264, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.17533606078316774, |
|
"grad_norm": 5.051317651149785, |
|
"learning_rate": 4.26328502415459e-06, |
|
"loss": 0.8575, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1776738749269433, |
|
"grad_norm": 4.982871471593161, |
|
"learning_rate": 4.251207729468599e-06, |
|
"loss": 0.7766, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.18001168907071888, |
|
"grad_norm": 4.812654963388801, |
|
"learning_rate": 4.239130434782609e-06, |
|
"loss": 0.842, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.18234950321449445, |
|
"grad_norm": 4.849638872368005, |
|
"learning_rate": 4.227053140096619e-06, |
|
"loss": 0.8493, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.18468731735827001, |
|
"grad_norm": 4.941754403496056, |
|
"learning_rate": 4.214975845410628e-06, |
|
"loss": 0.8705, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.18702513150204558, |
|
"grad_norm": 4.118521255369774, |
|
"learning_rate": 4.202898550724638e-06, |
|
"loss": 0.9022, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18936294564582115, |
|
"grad_norm": 5.048580106033392, |
|
"learning_rate": 4.190821256038647e-06, |
|
"loss": 0.8431, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.19170075978959672, |
|
"grad_norm": 5.383766123063546, |
|
"learning_rate": 4.178743961352658e-06, |
|
"loss": 0.8892, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.1940385739333723, |
|
"grad_norm": 4.850111002487489, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.9147, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.19637638807714786, |
|
"grad_norm": 4.703827358788699, |
|
"learning_rate": 4.154589371980677e-06, |
|
"loss": 0.8407, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.19871420222092342, |
|
"grad_norm": 4.5132494951253275, |
|
"learning_rate": 4.142512077294686e-06, |
|
"loss": 0.859, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.201052016364699, |
|
"grad_norm": 4.425801289741148, |
|
"learning_rate": 4.130434782608696e-06, |
|
"loss": 0.8643, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.2033898305084746, |
|
"grad_norm": 4.6519866473202285, |
|
"learning_rate": 4.118357487922706e-06, |
|
"loss": 0.8559, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.20572764465225016, |
|
"grad_norm": 4.271767242791549, |
|
"learning_rate": 4.106280193236716e-06, |
|
"loss": 0.8115, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.20806545879602573, |
|
"grad_norm": 5.056579518750136, |
|
"learning_rate": 4.0942028985507246e-06, |
|
"loss": 0.8447, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.2104032729398013, |
|
"grad_norm": 4.075100416572746, |
|
"learning_rate": 4.082125603864734e-06, |
|
"loss": 0.7837, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21274108708357686, |
|
"grad_norm": 4.393779666632264, |
|
"learning_rate": 4.070048309178744e-06, |
|
"loss": 0.8368, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.21507890122735243, |
|
"grad_norm": 4.322824034406939, |
|
"learning_rate": 4.057971014492754e-06, |
|
"loss": 0.7942, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.217416715371128, |
|
"grad_norm": 4.691982719838354, |
|
"learning_rate": 4.045893719806764e-06, |
|
"loss": 0.8384, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.21975452951490357, |
|
"grad_norm": 4.749714290659545, |
|
"learning_rate": 4.033816425120773e-06, |
|
"loss": 0.86, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.22209234365867914, |
|
"grad_norm": 4.49073749526097, |
|
"learning_rate": 4.021739130434783e-06, |
|
"loss": 0.8796, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2244301578024547, |
|
"grad_norm": 4.612026680332374, |
|
"learning_rate": 4.009661835748793e-06, |
|
"loss": 0.7836, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.22676797194623027, |
|
"grad_norm": 4.5466671401291165, |
|
"learning_rate": 3.997584541062803e-06, |
|
"loss": 0.8213, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.22910578609000584, |
|
"grad_norm": 4.578959418279228, |
|
"learning_rate": 3.9855072463768115e-06, |
|
"loss": 0.8302, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.2314436002337814, |
|
"grad_norm": 4.471310182272502, |
|
"learning_rate": 3.973429951690821e-06, |
|
"loss": 0.8386, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.23378141437755698, |
|
"grad_norm": 4.444066950873127, |
|
"learning_rate": 3.961352657004831e-06, |
|
"loss": 0.8672, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23611922852133255, |
|
"grad_norm": 4.08994098536812, |
|
"learning_rate": 3.949275362318841e-06, |
|
"loss": 0.7914, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.23845704266510812, |
|
"grad_norm": 5.867972858556011, |
|
"learning_rate": 3.9371980676328506e-06, |
|
"loss": 0.834, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.24079485680888368, |
|
"grad_norm": 4.33178424044995, |
|
"learning_rate": 3.92512077294686e-06, |
|
"loss": 0.8312, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.24313267095265925, |
|
"grad_norm": 4.422360019571021, |
|
"learning_rate": 3.91304347826087e-06, |
|
"loss": 0.8054, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.24547048509643482, |
|
"grad_norm": 4.540760031449362, |
|
"learning_rate": 3.90096618357488e-06, |
|
"loss": 0.8011, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.24780829924021042, |
|
"grad_norm": 4.577644817701169, |
|
"learning_rate": 3.88888888888889e-06, |
|
"loss": 0.7851, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.25014611338398596, |
|
"grad_norm": 4.750903595759052, |
|
"learning_rate": 3.8768115942028985e-06, |
|
"loss": 0.8496, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.25248392752776155, |
|
"grad_norm": 4.744977001781623, |
|
"learning_rate": 3.864734299516908e-06, |
|
"loss": 0.8218, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.2548217416715371, |
|
"grad_norm": 4.548950141262851, |
|
"learning_rate": 3.852657004830918e-06, |
|
"loss": 0.8053, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.2571595558153127, |
|
"grad_norm": 4.44828603075951, |
|
"learning_rate": 3.840579710144928e-06, |
|
"loss": 0.8231, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25949736995908823, |
|
"grad_norm": 4.672161591073822, |
|
"learning_rate": 3.8285024154589375e-06, |
|
"loss": 0.8389, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.2618351841028638, |
|
"grad_norm": 4.526274586937092, |
|
"learning_rate": 3.816425120772947e-06, |
|
"loss": 0.8683, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.26417299824663937, |
|
"grad_norm": 4.603415978914653, |
|
"learning_rate": 3.804347826086957e-06, |
|
"loss": 0.8206, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.26651081239041496, |
|
"grad_norm": 4.343843088593362, |
|
"learning_rate": 3.792270531400967e-06, |
|
"loss": 0.823, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.2688486265341905, |
|
"grad_norm": 4.131180727748698, |
|
"learning_rate": 3.780193236714976e-06, |
|
"loss": 0.7964, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2711864406779661, |
|
"grad_norm": 5.611563677944062, |
|
"learning_rate": 3.768115942028986e-06, |
|
"loss": 0.8529, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.2735242548217417, |
|
"grad_norm": 4.315382063517201, |
|
"learning_rate": 3.7560386473429956e-06, |
|
"loss": 0.7849, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.27586206896551724, |
|
"grad_norm": 4.3301657812789776, |
|
"learning_rate": 3.743961352657005e-06, |
|
"loss": 0.8392, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.27819988310929283, |
|
"grad_norm": 4.763659062354643, |
|
"learning_rate": 3.7318840579710147e-06, |
|
"loss": 0.7846, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.2805376972530684, |
|
"grad_norm": 4.531318611414816, |
|
"learning_rate": 3.7198067632850245e-06, |
|
"loss": 0.8335, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.28287551139684397, |
|
"grad_norm": 4.4418077648050485, |
|
"learning_rate": 3.707729468599034e-06, |
|
"loss": 0.7858, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.2852133255406195, |
|
"grad_norm": 4.39068842397474, |
|
"learning_rate": 3.6956521739130436e-06, |
|
"loss": 0.8408, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.2875511396843951, |
|
"grad_norm": 4.585137838540199, |
|
"learning_rate": 3.6835748792270538e-06, |
|
"loss": 0.8316, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.28988895382817065, |
|
"grad_norm": 4.319672080062613, |
|
"learning_rate": 3.6714975845410635e-06, |
|
"loss": 0.8241, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.29222676797194624, |
|
"grad_norm": 4.131090234388279, |
|
"learning_rate": 3.659420289855073e-06, |
|
"loss": 0.7416, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2945645821157218, |
|
"grad_norm": 4.081456252490184, |
|
"learning_rate": 3.6473429951690826e-06, |
|
"loss": 0.7958, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.2969023962594974, |
|
"grad_norm": 4.090503599319394, |
|
"learning_rate": 3.635265700483092e-06, |
|
"loss": 0.8096, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.2992402104032729, |
|
"grad_norm": 4.129285724564573, |
|
"learning_rate": 3.6231884057971017e-06, |
|
"loss": 0.7918, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.3015780245470485, |
|
"grad_norm": 4.506022555765926, |
|
"learning_rate": 3.6111111111111115e-06, |
|
"loss": 0.8333, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.30391583869082406, |
|
"grad_norm": 4.151575198600969, |
|
"learning_rate": 3.5990338164251208e-06, |
|
"loss": 0.7713, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.30625365283459965, |
|
"grad_norm": 4.614683656771631, |
|
"learning_rate": 3.5869565217391305e-06, |
|
"loss": 0.8298, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.3085914669783752, |
|
"grad_norm": 4.6094981031628075, |
|
"learning_rate": 3.5748792270531403e-06, |
|
"loss": 0.8217, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.3109292811221508, |
|
"grad_norm": 4.2999582776551675, |
|
"learning_rate": 3.5628019323671496e-06, |
|
"loss": 0.7968, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.31326709526592633, |
|
"grad_norm": 4.864198700798981, |
|
"learning_rate": 3.55072463768116e-06, |
|
"loss": 0.8141, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.31560490940970193, |
|
"grad_norm": 4.601546334463328, |
|
"learning_rate": 3.5386473429951696e-06, |
|
"loss": 0.7925, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.3179427235534775, |
|
"grad_norm": 4.089485101723296, |
|
"learning_rate": 3.5265700483091793e-06, |
|
"loss": 0.7873, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.32028053769725306, |
|
"grad_norm": 4.0777367885745806, |
|
"learning_rate": 3.5144927536231887e-06, |
|
"loss": 0.7985, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.32261835184102866, |
|
"grad_norm": 4.832689220436005, |
|
"learning_rate": 3.5024154589371984e-06, |
|
"loss": 0.8306, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.3249561659848042, |
|
"grad_norm": 4.888417681228503, |
|
"learning_rate": 3.490338164251208e-06, |
|
"loss": 0.8353, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.3272939801285798, |
|
"grad_norm": 4.28948650105686, |
|
"learning_rate": 3.4782608695652175e-06, |
|
"loss": 0.8057, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.32963179427235534, |
|
"grad_norm": 4.203178774124529, |
|
"learning_rate": 3.4661835748792273e-06, |
|
"loss": 0.7788, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.33196960841613093, |
|
"grad_norm": 4.637106026831514, |
|
"learning_rate": 3.4541062801932366e-06, |
|
"loss": 0.8521, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.3343074225599065, |
|
"grad_norm": 4.350395114537057, |
|
"learning_rate": 3.4420289855072464e-06, |
|
"loss": 0.7968, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.33664523670368207, |
|
"grad_norm": 4.474607456827939, |
|
"learning_rate": 3.4299516908212565e-06, |
|
"loss": 0.8208, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.3389830508474576, |
|
"grad_norm": 4.100288353060924, |
|
"learning_rate": 3.4178743961352663e-06, |
|
"loss": 0.8165, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.3413208649912332, |
|
"grad_norm": 4.6247997756094845, |
|
"learning_rate": 3.4057971014492756e-06, |
|
"loss": 0.8294, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.34365867913500875, |
|
"grad_norm": 4.525169765596723, |
|
"learning_rate": 3.3937198067632854e-06, |
|
"loss": 0.7713, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.34599649327878435, |
|
"grad_norm": 4.442206881786442, |
|
"learning_rate": 3.381642512077295e-06, |
|
"loss": 0.82, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.3483343074225599, |
|
"grad_norm": 4.225556484795958, |
|
"learning_rate": 3.3695652173913045e-06, |
|
"loss": 0.7886, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.3506721215663355, |
|
"grad_norm": 4.268743583707888, |
|
"learning_rate": 3.3574879227053142e-06, |
|
"loss": 0.7762, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.353009935710111, |
|
"grad_norm": 4.338428118785664, |
|
"learning_rate": 3.345410628019324e-06, |
|
"loss": 0.7719, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.3553477498538866, |
|
"grad_norm": 4.188696391446484, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.7745, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.35768556399766216, |
|
"grad_norm": 4.310914121193176, |
|
"learning_rate": 3.321256038647343e-06, |
|
"loss": 0.8188, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.36002337814143776, |
|
"grad_norm": 4.391656829031555, |
|
"learning_rate": 3.3091787439613533e-06, |
|
"loss": 0.8148, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.36236119228521335, |
|
"grad_norm": 4.104259018738402, |
|
"learning_rate": 3.2971014492753626e-06, |
|
"loss": 0.8519, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.3646990064289889, |
|
"grad_norm": 4.25629990334181, |
|
"learning_rate": 3.2850241545893724e-06, |
|
"loss": 0.7983, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.3670368205727645, |
|
"grad_norm": 4.134990269789036, |
|
"learning_rate": 3.272946859903382e-06, |
|
"loss": 0.7852, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.36937463471654003, |
|
"grad_norm": 4.0420970622040135, |
|
"learning_rate": 3.2608695652173914e-06, |
|
"loss": 0.7992, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.3717124488603156, |
|
"grad_norm": 4.341222672754024, |
|
"learning_rate": 3.248792270531401e-06, |
|
"loss": 0.7704, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.37405026300409117, |
|
"grad_norm": 4.115523347634753, |
|
"learning_rate": 3.236714975845411e-06, |
|
"loss": 0.791, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.37638807714786676, |
|
"grad_norm": 4.136587110231359, |
|
"learning_rate": 3.2246376811594203e-06, |
|
"loss": 0.7752, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.3787258912916423, |
|
"grad_norm": 4.504460772929252, |
|
"learning_rate": 3.21256038647343e-06, |
|
"loss": 0.8176, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.3810637054354179, |
|
"grad_norm": 4.629377407126395, |
|
"learning_rate": 3.20048309178744e-06, |
|
"loss": 0.8275, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.38340151957919344, |
|
"grad_norm": 4.422477761962599, |
|
"learning_rate": 3.188405797101449e-06, |
|
"loss": 0.7847, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.38573933372296904, |
|
"grad_norm": 3.9888038106102153, |
|
"learning_rate": 3.1763285024154593e-06, |
|
"loss": 0.7939, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3880771478667446, |
|
"grad_norm": 4.125918892903183, |
|
"learning_rate": 3.164251207729469e-06, |
|
"loss": 0.7717, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.3904149620105202, |
|
"grad_norm": 6.885413034719951, |
|
"learning_rate": 3.152173913043479e-06, |
|
"loss": 0.8514, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.3927527761542957, |
|
"grad_norm": 4.446340003037039, |
|
"learning_rate": 3.140096618357488e-06, |
|
"loss": 0.813, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.3950905902980713, |
|
"grad_norm": 3.9959566422822346, |
|
"learning_rate": 3.128019323671498e-06, |
|
"loss": 0.7776, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.39742840444184685, |
|
"grad_norm": 4.627421389612255, |
|
"learning_rate": 3.1159420289855073e-06, |
|
"loss": 0.8395, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.39976621858562245, |
|
"grad_norm": 4.118715295949323, |
|
"learning_rate": 3.103864734299517e-06, |
|
"loss": 0.7824, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.402104032729398, |
|
"grad_norm": 4.109354113391549, |
|
"learning_rate": 3.0917874396135268e-06, |
|
"loss": 0.7961, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.4044418468731736, |
|
"grad_norm": 4.439845150489727, |
|
"learning_rate": 3.079710144927536e-06, |
|
"loss": 0.8035, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.4067796610169492, |
|
"grad_norm": 4.358250626799815, |
|
"learning_rate": 3.067632850241546e-06, |
|
"loss": 0.7829, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.4091174751607247, |
|
"grad_norm": 4.43053050152037, |
|
"learning_rate": 3.055555555555556e-06, |
|
"loss": 0.7554, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.4114552893045003, |
|
"grad_norm": 4.324105830729812, |
|
"learning_rate": 3.043478260869566e-06, |
|
"loss": 0.7763, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.41379310344827586, |
|
"grad_norm": 4.505708676229393, |
|
"learning_rate": 3.031400966183575e-06, |
|
"loss": 0.8052, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.41613091759205145, |
|
"grad_norm": 4.198009455233572, |
|
"learning_rate": 3.019323671497585e-06, |
|
"loss": 0.8036, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.418468731735827, |
|
"grad_norm": 4.255888057785401, |
|
"learning_rate": 3.0072463768115946e-06, |
|
"loss": 0.8675, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.4208065458796026, |
|
"grad_norm": 4.166498155365259, |
|
"learning_rate": 2.995169082125604e-06, |
|
"loss": 0.8099, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.42314436002337813, |
|
"grad_norm": 4.471408293419965, |
|
"learning_rate": 2.9830917874396137e-06, |
|
"loss": 0.8025, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.4254821741671537, |
|
"grad_norm": 4.910816764257679, |
|
"learning_rate": 2.9710144927536235e-06, |
|
"loss": 0.7702, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.42781998831092927, |
|
"grad_norm": 4.071039233797094, |
|
"learning_rate": 2.958937198067633e-06, |
|
"loss": 0.8143, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.43015780245470486, |
|
"grad_norm": 4.738565032335615, |
|
"learning_rate": 2.9468599033816426e-06, |
|
"loss": 0.8158, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.4324956165984804, |
|
"grad_norm": 4.2936029356268195, |
|
"learning_rate": 2.9347826086956528e-06, |
|
"loss": 0.7874, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.434833430742256, |
|
"grad_norm": 4.206590096270031, |
|
"learning_rate": 2.922705314009662e-06, |
|
"loss": 0.7997, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.43717124488603154, |
|
"grad_norm": 4.2051171328892085, |
|
"learning_rate": 2.910628019323672e-06, |
|
"loss": 0.787, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.43950905902980714, |
|
"grad_norm": 4.245918333471198, |
|
"learning_rate": 2.8985507246376816e-06, |
|
"loss": 0.7997, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.4418468731735827, |
|
"grad_norm": 4.179370789694772, |
|
"learning_rate": 2.886473429951691e-06, |
|
"loss": 0.7759, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.4441846873173583, |
|
"grad_norm": 4.41515451612343, |
|
"learning_rate": 2.8743961352657007e-06, |
|
"loss": 0.7886, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4465225014611338, |
|
"grad_norm": 4.227222440463386, |
|
"learning_rate": 2.8623188405797105e-06, |
|
"loss": 0.8294, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.4488603156049094, |
|
"grad_norm": 4.095256731977333, |
|
"learning_rate": 2.85024154589372e-06, |
|
"loss": 0.7604, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.451198129748685, |
|
"grad_norm": 4.286339845869899, |
|
"learning_rate": 2.8381642512077295e-06, |
|
"loss": 0.8237, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.45353594389246055, |
|
"grad_norm": 4.141328341649525, |
|
"learning_rate": 2.8260869565217393e-06, |
|
"loss": 0.7961, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.45587375803623614, |
|
"grad_norm": 4.522982085235291, |
|
"learning_rate": 2.8140096618357486e-06, |
|
"loss": 0.7918, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.4582115721800117, |
|
"grad_norm": 4.9933547683547745, |
|
"learning_rate": 2.801932367149759e-06, |
|
"loss": 0.8151, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.4605493863237873, |
|
"grad_norm": 3.8642864404581463, |
|
"learning_rate": 2.7898550724637686e-06, |
|
"loss": 0.7411, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.4628872004675628, |
|
"grad_norm": 4.304180579272247, |
|
"learning_rate": 2.7777777777777783e-06, |
|
"loss": 0.7975, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.4652250146113384, |
|
"grad_norm": 4.246581554029021, |
|
"learning_rate": 2.7657004830917877e-06, |
|
"loss": 0.829, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.46756282875511396, |
|
"grad_norm": 4.257923593734172, |
|
"learning_rate": 2.7536231884057974e-06, |
|
"loss": 0.7475, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.46990064289888955, |
|
"grad_norm": 4.001585884428085, |
|
"learning_rate": 2.7415458937198068e-06, |
|
"loss": 0.7866, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.4722384570426651, |
|
"grad_norm": 4.064057741085377, |
|
"learning_rate": 2.7294685990338165e-06, |
|
"loss": 0.7861, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.4745762711864407, |
|
"grad_norm": 4.10748108128691, |
|
"learning_rate": 2.7173913043478263e-06, |
|
"loss": 0.7735, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.47691408533021623, |
|
"grad_norm": 3.9433247912828455, |
|
"learning_rate": 2.7053140096618356e-06, |
|
"loss": 0.7494, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.4792518994739918, |
|
"grad_norm": 4.368990885761068, |
|
"learning_rate": 2.6932367149758454e-06, |
|
"loss": 0.7961, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.48158971361776737, |
|
"grad_norm": 4.323297445539955, |
|
"learning_rate": 2.6811594202898555e-06, |
|
"loss": 0.7498, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.48392752776154296, |
|
"grad_norm": 4.276797241413841, |
|
"learning_rate": 2.6690821256038653e-06, |
|
"loss": 0.79, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.4862653419053185, |
|
"grad_norm": 4.29615738858519, |
|
"learning_rate": 2.6570048309178746e-06, |
|
"loss": 0.7762, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.4886031560490941, |
|
"grad_norm": 4.24658335062537, |
|
"learning_rate": 2.6449275362318844e-06, |
|
"loss": 0.7547, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.49094097019286964, |
|
"grad_norm": 4.140652638469078, |
|
"learning_rate": 2.632850241545894e-06, |
|
"loss": 0.7568, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.49327878433664524, |
|
"grad_norm": 4.355835930781116, |
|
"learning_rate": 2.6207729468599035e-06, |
|
"loss": 0.8005, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.49561659848042083, |
|
"grad_norm": 4.1002906789316045, |
|
"learning_rate": 2.6086956521739132e-06, |
|
"loss": 0.7791, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.4979544126241964, |
|
"grad_norm": 4.210038749172179, |
|
"learning_rate": 2.596618357487923e-06, |
|
"loss": 0.7777, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.5002922267679719, |
|
"grad_norm": 4.1435757469488985, |
|
"learning_rate": 2.5845410628019323e-06, |
|
"loss": 0.7824, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.5026300409117476, |
|
"grad_norm": 4.309944612009968, |
|
"learning_rate": 2.572463768115942e-06, |
|
"loss": 0.7625, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.5049678550555231, |
|
"grad_norm": 4.662526042139382, |
|
"learning_rate": 2.5603864734299523e-06, |
|
"loss": 0.7873, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.5073056691992986, |
|
"grad_norm": 4.473614799031895, |
|
"learning_rate": 2.5483091787439616e-06, |
|
"loss": 0.7737, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.5096434833430742, |
|
"grad_norm": 4.54082051832202, |
|
"learning_rate": 2.5362318840579714e-06, |
|
"loss": 0.782, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.5119812974868498, |
|
"grad_norm": 3.9808775866846817, |
|
"learning_rate": 2.524154589371981e-06, |
|
"loss": 0.7592, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.5143191116306254, |
|
"grad_norm": 4.233088111283031, |
|
"learning_rate": 2.5120772946859904e-06, |
|
"loss": 0.774, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5166569257744009, |
|
"grad_norm": 4.179314655537464, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.7936, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.5189947399181765, |
|
"grad_norm": 4.808766886416466, |
|
"learning_rate": 2.48792270531401e-06, |
|
"loss": 0.7961, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.5213325540619521, |
|
"grad_norm": 4.088801764052967, |
|
"learning_rate": 2.4758454106280193e-06, |
|
"loss": 0.7693, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.5236703682057277, |
|
"grad_norm": 4.1844548782576005, |
|
"learning_rate": 2.4637681159420295e-06, |
|
"loss": 0.7961, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.5260081823495032, |
|
"grad_norm": 3.909844659514703, |
|
"learning_rate": 2.451690821256039e-06, |
|
"loss": 0.7304, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5283459964932787, |
|
"grad_norm": 3.7096435860994346, |
|
"learning_rate": 2.4396135265700486e-06, |
|
"loss": 0.7712, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.5306838106370544, |
|
"grad_norm": 4.0389484559123305, |
|
"learning_rate": 2.4275362318840583e-06, |
|
"loss": 0.7711, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.5330216247808299, |
|
"grad_norm": 4.171802534409844, |
|
"learning_rate": 2.4154589371980677e-06, |
|
"loss": 0.7768, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.5353594389246055, |
|
"grad_norm": 4.636520882862149, |
|
"learning_rate": 2.4033816425120774e-06, |
|
"loss": 0.7832, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.537697253068381, |
|
"grad_norm": 4.2073440647978675, |
|
"learning_rate": 2.391304347826087e-06, |
|
"loss": 0.7816, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5400350672121567, |
|
"grad_norm": 4.115009346971059, |
|
"learning_rate": 2.379227053140097e-06, |
|
"loss": 0.7152, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.5423728813559322, |
|
"grad_norm": 4.47134068227285, |
|
"learning_rate": 2.3671497584541063e-06, |
|
"loss": 0.7898, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.5447106954997077, |
|
"grad_norm": 4.78251740854767, |
|
"learning_rate": 2.355072463768116e-06, |
|
"loss": 0.8101, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.5470485096434834, |
|
"grad_norm": 4.735288223469208, |
|
"learning_rate": 2.3429951690821258e-06, |
|
"loss": 0.7864, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.5493863237872589, |
|
"grad_norm": 4.445520808429391, |
|
"learning_rate": 2.3309178743961355e-06, |
|
"loss": 0.7986, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5517241379310345, |
|
"grad_norm": 4.83504723163877, |
|
"learning_rate": 2.3188405797101453e-06, |
|
"loss": 0.8231, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.55406195207481, |
|
"grad_norm": 3.9498177063802897, |
|
"learning_rate": 2.3067632850241546e-06, |
|
"loss": 0.7834, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.5563997662185857, |
|
"grad_norm": 4.190234074575243, |
|
"learning_rate": 2.2946859903381644e-06, |
|
"loss": 0.7839, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.5587375803623612, |
|
"grad_norm": 4.76462271734834, |
|
"learning_rate": 2.282608695652174e-06, |
|
"loss": 0.8258, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.5610753945061367, |
|
"grad_norm": 4.369965626736373, |
|
"learning_rate": 2.270531400966184e-06, |
|
"loss": 0.7927, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5634132086499123, |
|
"grad_norm": 4.423067504974851, |
|
"learning_rate": 2.2584541062801937e-06, |
|
"loss": 0.8181, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.5657510227936879, |
|
"grad_norm": 4.117514088831818, |
|
"learning_rate": 2.246376811594203e-06, |
|
"loss": 0.7471, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.5680888369374635, |
|
"grad_norm": 4.208191494707427, |
|
"learning_rate": 2.2342995169082127e-06, |
|
"loss": 0.7936, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.570426651081239, |
|
"grad_norm": 4.30348767627021, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.8087, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.5727644652250146, |
|
"grad_norm": 4.08781387103947, |
|
"learning_rate": 2.2101449275362323e-06, |
|
"loss": 0.7712, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5751022793687902, |
|
"grad_norm": 4.255214633571236, |
|
"learning_rate": 2.1980676328502416e-06, |
|
"loss": 0.7327, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.5774400935125658, |
|
"grad_norm": 4.249395888532918, |
|
"learning_rate": 2.1859903381642513e-06, |
|
"loss": 0.8115, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.5797779076563413, |
|
"grad_norm": 4.048350886158577, |
|
"learning_rate": 2.173913043478261e-06, |
|
"loss": 0.7629, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.5821157218001168, |
|
"grad_norm": 4.286991029118236, |
|
"learning_rate": 2.1618357487922704e-06, |
|
"loss": 0.7748, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.5844535359438925, |
|
"grad_norm": 4.473519294462659, |
|
"learning_rate": 2.1497584541062806e-06, |
|
"loss": 0.7786, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.586791350087668, |
|
"grad_norm": 4.511510327301669, |
|
"learning_rate": 2.13768115942029e-06, |
|
"loss": 0.8125, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.5891291642314436, |
|
"grad_norm": 4.198745204040387, |
|
"learning_rate": 2.1256038647342997e-06, |
|
"loss": 0.7843, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.5914669783752192, |
|
"grad_norm": 4.3568648354588655, |
|
"learning_rate": 2.1135265700483095e-06, |
|
"loss": 0.7346, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.5938047925189948, |
|
"grad_norm": 3.8942460823301412, |
|
"learning_rate": 2.101449275362319e-06, |
|
"loss": 0.7879, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.5961426066627703, |
|
"grad_norm": 4.221148903821956, |
|
"learning_rate": 2.089371980676329e-06, |
|
"loss": 0.799, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5984804208065458, |
|
"grad_norm": 4.041691704636457, |
|
"learning_rate": 2.0772946859903383e-06, |
|
"loss": 0.767, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.6008182349503215, |
|
"grad_norm": 4.03197715174544, |
|
"learning_rate": 2.065217391304348e-06, |
|
"loss": 0.7487, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.603156049094097, |
|
"grad_norm": 4.082902353599498, |
|
"learning_rate": 2.053140096618358e-06, |
|
"loss": 0.7874, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.6054938632378726, |
|
"grad_norm": 3.7781639431570557, |
|
"learning_rate": 2.041062801932367e-06, |
|
"loss": 0.7721, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.6078316773816481, |
|
"grad_norm": 4.280421267303715, |
|
"learning_rate": 2.028985507246377e-06, |
|
"loss": 0.783, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6101694915254238, |
|
"grad_norm": 4.073869260462684, |
|
"learning_rate": 2.0169082125603867e-06, |
|
"loss": 0.7759, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.6125073056691993, |
|
"grad_norm": 3.935130784068012, |
|
"learning_rate": 2.0048309178743964e-06, |
|
"loss": 0.7669, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.6148451198129748, |
|
"grad_norm": 4.40643829592683, |
|
"learning_rate": 1.9927536231884058e-06, |
|
"loss": 0.7572, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.6171829339567504, |
|
"grad_norm": 4.337844456783807, |
|
"learning_rate": 1.9806763285024155e-06, |
|
"loss": 0.7605, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.619520748100526, |
|
"grad_norm": 4.281102087431204, |
|
"learning_rate": 1.9685990338164253e-06, |
|
"loss": 0.7393, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.6218585622443016, |
|
"grad_norm": 4.23207914041172, |
|
"learning_rate": 1.956521739130435e-06, |
|
"loss": 0.7794, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.6241963763880771, |
|
"grad_norm": 3.9282868393703896, |
|
"learning_rate": 1.944444444444445e-06, |
|
"loss": 0.7782, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.6265341905318527, |
|
"grad_norm": 4.098138917146235, |
|
"learning_rate": 1.932367149758454e-06, |
|
"loss": 0.7725, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.6288720046756283, |
|
"grad_norm": 4.141313603560724, |
|
"learning_rate": 1.920289855072464e-06, |
|
"loss": 0.7785, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.6312098188194039, |
|
"grad_norm": 4.611198038918517, |
|
"learning_rate": 1.9082125603864736e-06, |
|
"loss": 0.8185, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6335476329631794, |
|
"grad_norm": 4.452172749748544, |
|
"learning_rate": 1.8961352657004834e-06, |
|
"loss": 0.7703, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.635885447106955, |
|
"grad_norm": 4.454099100217199, |
|
"learning_rate": 1.884057971014493e-06, |
|
"loss": 0.7756, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.6382232612507306, |
|
"grad_norm": 4.159216947583455, |
|
"learning_rate": 1.8719806763285025e-06, |
|
"loss": 0.7358, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.6405610753945061, |
|
"grad_norm": 4.0088196320012885, |
|
"learning_rate": 1.8599033816425122e-06, |
|
"loss": 0.8002, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.6428988895382817, |
|
"grad_norm": 4.197686175636046, |
|
"learning_rate": 1.8478260869565218e-06, |
|
"loss": 0.7998, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.6452367036820573, |
|
"grad_norm": 4.373828840174765, |
|
"learning_rate": 1.8357487922705318e-06, |
|
"loss": 0.742, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.6475745178258329, |
|
"grad_norm": 4.212073348085054, |
|
"learning_rate": 1.8236714975845413e-06, |
|
"loss": 0.7678, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.6499123319696084, |
|
"grad_norm": 3.972532257275605, |
|
"learning_rate": 1.8115942028985508e-06, |
|
"loss": 0.7757, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.6522501461133839, |
|
"grad_norm": 4.141324887414669, |
|
"learning_rate": 1.7995169082125604e-06, |
|
"loss": 0.7447, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.6545879602571596, |
|
"grad_norm": 4.319306461683, |
|
"learning_rate": 1.7874396135265702e-06, |
|
"loss": 0.7669, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6569257744009351, |
|
"grad_norm": 4.13159761798667, |
|
"learning_rate": 1.77536231884058e-06, |
|
"loss": 0.753, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.6592635885447107, |
|
"grad_norm": 4.261205598617194, |
|
"learning_rate": 1.7632850241545897e-06, |
|
"loss": 0.7867, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.6616014026884862, |
|
"grad_norm": 4.043224440888056, |
|
"learning_rate": 1.7512077294685992e-06, |
|
"loss": 0.7634, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.6639392168322619, |
|
"grad_norm": 4.221366014724788, |
|
"learning_rate": 1.7391304347826088e-06, |
|
"loss": 0.8032, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.6662770309760374, |
|
"grad_norm": 4.2120362159497935, |
|
"learning_rate": 1.7270531400966183e-06, |
|
"loss": 0.7449, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.668614845119813, |
|
"grad_norm": 4.330019099169185, |
|
"learning_rate": 1.7149758454106283e-06, |
|
"loss": 0.7641, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.6709526592635885, |
|
"grad_norm": 4.234551345137344, |
|
"learning_rate": 1.7028985507246378e-06, |
|
"loss": 0.7785, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.6732904734073641, |
|
"grad_norm": 4.789360597178873, |
|
"learning_rate": 1.6908212560386476e-06, |
|
"loss": 0.7517, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.6756282875511397, |
|
"grad_norm": 4.087545337483895, |
|
"learning_rate": 1.6787439613526571e-06, |
|
"loss": 0.7398, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.6779661016949152, |
|
"grad_norm": 4.048928229662754, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.7759, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6803039158386909, |
|
"grad_norm": 4.258228190717208, |
|
"learning_rate": 1.6545893719806766e-06, |
|
"loss": 0.7816, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.6826417299824664, |
|
"grad_norm": 4.207730290983508, |
|
"learning_rate": 1.6425120772946862e-06, |
|
"loss": 0.7492, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.684979544126242, |
|
"grad_norm": 4.211632269620855, |
|
"learning_rate": 1.6304347826086957e-06, |
|
"loss": 0.8045, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.6873173582700175, |
|
"grad_norm": 4.2791266083196575, |
|
"learning_rate": 1.6183574879227055e-06, |
|
"loss": 0.7686, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.6896551724137931, |
|
"grad_norm": 4.400251918863611, |
|
"learning_rate": 1.606280193236715e-06, |
|
"loss": 0.7346, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6919929865575687, |
|
"grad_norm": 3.8930859729711, |
|
"learning_rate": 1.5942028985507246e-06, |
|
"loss": 0.7476, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.6943308007013442, |
|
"grad_norm": 4.179140087181349, |
|
"learning_rate": 1.5821256038647345e-06, |
|
"loss": 0.7758, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.6966686148451198, |
|
"grad_norm": 4.1025982230247005, |
|
"learning_rate": 1.570048309178744e-06, |
|
"loss": 0.764, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.6990064289888954, |
|
"grad_norm": 4.54359763623282, |
|
"learning_rate": 1.5579710144927536e-06, |
|
"loss": 0.813, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.701344243132671, |
|
"grad_norm": 3.8868646182191333, |
|
"learning_rate": 1.5458937198067634e-06, |
|
"loss": 0.7809, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7036820572764465, |
|
"grad_norm": 4.027087287618028, |
|
"learning_rate": 1.533816425120773e-06, |
|
"loss": 0.7421, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.706019871420222, |
|
"grad_norm": 4.221180533576584, |
|
"learning_rate": 1.521739130434783e-06, |
|
"loss": 0.7437, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.7083576855639977, |
|
"grad_norm": 4.025585601097397, |
|
"learning_rate": 1.5096618357487924e-06, |
|
"loss": 0.7587, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.7106954997077732, |
|
"grad_norm": 4.082415548970675, |
|
"learning_rate": 1.497584541062802e-06, |
|
"loss": 0.7437, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.7130333138515488, |
|
"grad_norm": 3.9885030268207764, |
|
"learning_rate": 1.4855072463768117e-06, |
|
"loss": 0.7342, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.7153711279953243, |
|
"grad_norm": 4.110847006439374, |
|
"learning_rate": 1.4734299516908213e-06, |
|
"loss": 0.7643, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.7177089421391, |
|
"grad_norm": 4.018479338411149, |
|
"learning_rate": 1.461352657004831e-06, |
|
"loss": 0.7524, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.7200467562828755, |
|
"grad_norm": 3.8679633701250835, |
|
"learning_rate": 1.4492753623188408e-06, |
|
"loss": 0.7854, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.722384570426651, |
|
"grad_norm": 4.308222321507237, |
|
"learning_rate": 1.4371980676328504e-06, |
|
"loss": 0.7805, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.7247223845704267, |
|
"grad_norm": 3.8916559653506018, |
|
"learning_rate": 1.42512077294686e-06, |
|
"loss": 0.6789, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7270601987142022, |
|
"grad_norm": 4.208472724847014, |
|
"learning_rate": 1.4130434782608697e-06, |
|
"loss": 0.7624, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.7293980128579778, |
|
"grad_norm": 4.541098999570629, |
|
"learning_rate": 1.4009661835748794e-06, |
|
"loss": 0.7754, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.7317358270017533, |
|
"grad_norm": 3.894542881557041, |
|
"learning_rate": 1.3888888888888892e-06, |
|
"loss": 0.7327, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.734073641145529, |
|
"grad_norm": 4.316419064602019, |
|
"learning_rate": 1.3768115942028987e-06, |
|
"loss": 0.7785, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.7364114552893045, |
|
"grad_norm": 3.840444616763943, |
|
"learning_rate": 1.3647342995169083e-06, |
|
"loss": 0.7296, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.7387492694330801, |
|
"grad_norm": 4.0101608921412835, |
|
"learning_rate": 1.3526570048309178e-06, |
|
"loss": 0.7199, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.7410870835768556, |
|
"grad_norm": 4.02178577481216, |
|
"learning_rate": 1.3405797101449278e-06, |
|
"loss": 0.7662, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.7434248977206313, |
|
"grad_norm": 3.955088131738884, |
|
"learning_rate": 1.3285024154589373e-06, |
|
"loss": 0.7196, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.7457627118644068, |
|
"grad_norm": 4.130879922008592, |
|
"learning_rate": 1.316425120772947e-06, |
|
"loss": 0.787, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.7481005260081823, |
|
"grad_norm": 4.0739088224040705, |
|
"learning_rate": 1.3043478260869566e-06, |
|
"loss": 0.7509, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7504383401519579, |
|
"grad_norm": 4.2499948389358595, |
|
"learning_rate": 1.2922705314009662e-06, |
|
"loss": 0.7373, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.7527761542957335, |
|
"grad_norm": 4.048557241149405, |
|
"learning_rate": 1.2801932367149761e-06, |
|
"loss": 0.781, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.7551139684395091, |
|
"grad_norm": 4.2499198906024205, |
|
"learning_rate": 1.2681159420289857e-06, |
|
"loss": 0.7674, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.7574517825832846, |
|
"grad_norm": 4.1878094914635255, |
|
"learning_rate": 1.2560386473429952e-06, |
|
"loss": 0.726, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.7597895967270601, |
|
"grad_norm": 4.531895242987001, |
|
"learning_rate": 1.243961352657005e-06, |
|
"loss": 0.7849, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.7621274108708358, |
|
"grad_norm": 4.042779330179229, |
|
"learning_rate": 1.2318840579710147e-06, |
|
"loss": 0.7532, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.7644652250146113, |
|
"grad_norm": 3.9930786810311254, |
|
"learning_rate": 1.2198067632850243e-06, |
|
"loss": 0.7286, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.7668030391583869, |
|
"grad_norm": 5.948998810978814, |
|
"learning_rate": 1.2077294685990338e-06, |
|
"loss": 0.8127, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.7691408533021625, |
|
"grad_norm": 4.144487299852383, |
|
"learning_rate": 1.1956521739130436e-06, |
|
"loss": 0.7691, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.7714786674459381, |
|
"grad_norm": 4.128733708034505, |
|
"learning_rate": 1.1835748792270531e-06, |
|
"loss": 0.768, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7738164815897136, |
|
"grad_norm": 4.530767375631303, |
|
"learning_rate": 1.1714975845410629e-06, |
|
"loss": 0.7798, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.7761542957334892, |
|
"grad_norm": 3.9238729668993835, |
|
"learning_rate": 1.1594202898550726e-06, |
|
"loss": 0.7642, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.7784921098772648, |
|
"grad_norm": 4.111531783109019, |
|
"learning_rate": 1.1473429951690822e-06, |
|
"loss": 0.7616, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.7808299240210403, |
|
"grad_norm": 4.140234356572554, |
|
"learning_rate": 1.135265700483092e-06, |
|
"loss": 0.8308, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.7831677381648159, |
|
"grad_norm": 4.5225578335616845, |
|
"learning_rate": 1.1231884057971015e-06, |
|
"loss": 0.7688, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7855055523085914, |
|
"grad_norm": 4.253055596048113, |
|
"learning_rate": 1.111111111111111e-06, |
|
"loss": 0.7823, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.7878433664523671, |
|
"grad_norm": 4.214973850734774, |
|
"learning_rate": 1.0990338164251208e-06, |
|
"loss": 0.7015, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.7901811805961426, |
|
"grad_norm": 4.242093529547378, |
|
"learning_rate": 1.0869565217391306e-06, |
|
"loss": 0.7902, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.7925189947399182, |
|
"grad_norm": 4.27860016507252, |
|
"learning_rate": 1.0748792270531403e-06, |
|
"loss": 0.7893, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.7948568088836937, |
|
"grad_norm": 4.193517659739712, |
|
"learning_rate": 1.0628019323671499e-06, |
|
"loss": 0.7932, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7971946230274694, |
|
"grad_norm": 3.861888360541971, |
|
"learning_rate": 1.0507246376811594e-06, |
|
"loss": 0.7271, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.7995324371712449, |
|
"grad_norm": 4.044324859369637, |
|
"learning_rate": 1.0386473429951692e-06, |
|
"loss": 0.7651, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.8018702513150204, |
|
"grad_norm": 4.143848474405527, |
|
"learning_rate": 1.026570048309179e-06, |
|
"loss": 0.7991, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.804208065458796, |
|
"grad_norm": 4.543740361976109, |
|
"learning_rate": 1.0144927536231885e-06, |
|
"loss": 0.7871, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.8065458796025716, |
|
"grad_norm": 4.053324740509495, |
|
"learning_rate": 1.0024154589371982e-06, |
|
"loss": 0.7181, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.8088836937463472, |
|
"grad_norm": 3.91170761323185, |
|
"learning_rate": 9.903381642512078e-07, |
|
"loss": 0.7167, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.8112215078901227, |
|
"grad_norm": 3.9769619064751174, |
|
"learning_rate": 9.782608695652175e-07, |
|
"loss": 0.7152, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.8135593220338984, |
|
"grad_norm": 4.141477101296879, |
|
"learning_rate": 9.66183574879227e-07, |
|
"loss": 0.806, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.8158971361776739, |
|
"grad_norm": 3.9266793661338566, |
|
"learning_rate": 9.541062801932368e-07, |
|
"loss": 0.74, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.8182349503214494, |
|
"grad_norm": 3.905819434278297, |
|
"learning_rate": 9.420289855072465e-07, |
|
"loss": 0.7621, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.820572764465225, |
|
"grad_norm": 4.271457136544383, |
|
"learning_rate": 9.299516908212561e-07, |
|
"loss": 0.7108, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.8229105786090006, |
|
"grad_norm": 3.9018935668444907, |
|
"learning_rate": 9.178743961352659e-07, |
|
"loss": 0.7326, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.8252483927527762, |
|
"grad_norm": 3.842764627332658, |
|
"learning_rate": 9.057971014492754e-07, |
|
"loss": 0.769, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.8275862068965517, |
|
"grad_norm": 4.12270406926976, |
|
"learning_rate": 8.937198067632851e-07, |
|
"loss": 0.7462, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.8299240210403273, |
|
"grad_norm": 4.25238665717318, |
|
"learning_rate": 8.816425120772948e-07, |
|
"loss": 0.7417, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.8322618351841029, |
|
"grad_norm": 4.10405871770544, |
|
"learning_rate": 8.695652173913044e-07, |
|
"loss": 0.7769, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.8345996493278784, |
|
"grad_norm": 4.132898802117579, |
|
"learning_rate": 8.574879227053141e-07, |
|
"loss": 0.7334, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.836937463471654, |
|
"grad_norm": 3.9812833871444573, |
|
"learning_rate": 8.454106280193238e-07, |
|
"loss": 0.7437, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.8392752776154295, |
|
"grad_norm": 4.819360178352156, |
|
"learning_rate": 8.333333333333333e-07, |
|
"loss": 0.7594, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.8416130917592052, |
|
"grad_norm": 4.27077723520544, |
|
"learning_rate": 8.212560386473431e-07, |
|
"loss": 0.7282, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8439509059029807, |
|
"grad_norm": 4.234704486935872, |
|
"learning_rate": 8.091787439613527e-07, |
|
"loss": 0.7844, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.8462887200467563, |
|
"grad_norm": 3.660518143878683, |
|
"learning_rate": 7.971014492753623e-07, |
|
"loss": 0.6846, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.8486265341905318, |
|
"grad_norm": 4.382898231252646, |
|
"learning_rate": 7.85024154589372e-07, |
|
"loss": 0.7378, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.8509643483343075, |
|
"grad_norm": 4.03693007471031, |
|
"learning_rate": 7.729468599033817e-07, |
|
"loss": 0.7321, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.853302162478083, |
|
"grad_norm": 4.061417655548705, |
|
"learning_rate": 7.608695652173914e-07, |
|
"loss": 0.7427, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.8556399766218585, |
|
"grad_norm": 4.033537459659518, |
|
"learning_rate": 7.48792270531401e-07, |
|
"loss": 0.7631, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.8579777907656342, |
|
"grad_norm": 3.8672964986217377, |
|
"learning_rate": 7.367149758454106e-07, |
|
"loss": 0.7277, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.8603156049094097, |
|
"grad_norm": 4.1614750880483795, |
|
"learning_rate": 7.246376811594204e-07, |
|
"loss": 0.7821, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.8626534190531853, |
|
"grad_norm": 4.0347237221296846, |
|
"learning_rate": 7.1256038647343e-07, |
|
"loss": 0.7229, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.8649912331969608, |
|
"grad_norm": 4.419235250329394, |
|
"learning_rate": 7.004830917874397e-07, |
|
"loss": 0.7912, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8673290473407365, |
|
"grad_norm": 4.0395927745176925, |
|
"learning_rate": 6.884057971014494e-07, |
|
"loss": 0.7781, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.869666861484512, |
|
"grad_norm": 4.323154501136669, |
|
"learning_rate": 6.763285024154589e-07, |
|
"loss": 0.7489, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.8720046756282875, |
|
"grad_norm": 4.0036925914792, |
|
"learning_rate": 6.642512077294687e-07, |
|
"loss": 0.7488, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.8743424897720631, |
|
"grad_norm": 4.081792943103691, |
|
"learning_rate": 6.521739130434783e-07, |
|
"loss": 0.7506, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.8766803039158387, |
|
"grad_norm": 3.961593904598705, |
|
"learning_rate": 6.400966183574881e-07, |
|
"loss": 0.7365, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8790181180596143, |
|
"grad_norm": 5.343637922572841, |
|
"learning_rate": 6.280193236714976e-07, |
|
"loss": 0.8142, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.8813559322033898, |
|
"grad_norm": 4.234613953777181, |
|
"learning_rate": 6.159420289855074e-07, |
|
"loss": 0.7685, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.8836937463471654, |
|
"grad_norm": 3.914888154011919, |
|
"learning_rate": 6.038647342995169e-07, |
|
"loss": 0.7442, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.886031560490941, |
|
"grad_norm": 3.998960956090034, |
|
"learning_rate": 5.917874396135266e-07, |
|
"loss": 0.7724, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.8883693746347165, |
|
"grad_norm": 3.7467228875291885, |
|
"learning_rate": 5.797101449275363e-07, |
|
"loss": 0.7157, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8907071887784921, |
|
"grad_norm": 3.921411494491602, |
|
"learning_rate": 5.67632850241546e-07, |
|
"loss": 0.7604, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.8930450029222676, |
|
"grad_norm": 4.171395377831423, |
|
"learning_rate": 5.555555555555555e-07, |
|
"loss": 0.7498, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.8953828170660433, |
|
"grad_norm": 4.1347642411133725, |
|
"learning_rate": 5.434782608695653e-07, |
|
"loss": 0.7472, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.8977206312098188, |
|
"grad_norm": 4.092973708302494, |
|
"learning_rate": 5.314009661835749e-07, |
|
"loss": 0.7237, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.9000584453535944, |
|
"grad_norm": 3.9933326706118875, |
|
"learning_rate": 5.193236714975846e-07, |
|
"loss": 0.7389, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.90239625949737, |
|
"grad_norm": 3.8068860103615174, |
|
"learning_rate": 5.072463768115942e-07, |
|
"loss": 0.7177, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.9047340736411456, |
|
"grad_norm": 4.25980749026596, |
|
"learning_rate": 4.951690821256039e-07, |
|
"loss": 0.758, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.9070718877849211, |
|
"grad_norm": 3.8688206778681278, |
|
"learning_rate": 4.830917874396135e-07, |
|
"loss": 0.7577, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.9094097019286966, |
|
"grad_norm": 4.072604714599362, |
|
"learning_rate": 4.7101449275362324e-07, |
|
"loss": 0.7655, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.9117475160724723, |
|
"grad_norm": 4.216731514011164, |
|
"learning_rate": 4.5893719806763294e-07, |
|
"loss": 0.7572, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9140853302162478, |
|
"grad_norm": 4.204400645393741, |
|
"learning_rate": 4.4685990338164254e-07, |
|
"loss": 0.7595, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.9164231443600234, |
|
"grad_norm": 4.327014987328045, |
|
"learning_rate": 4.347826086956522e-07, |
|
"loss": 0.7347, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.9187609585037989, |
|
"grad_norm": 4.381847799007514, |
|
"learning_rate": 4.227053140096619e-07, |
|
"loss": 0.7505, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.9210987726475746, |
|
"grad_norm": 4.019350453750999, |
|
"learning_rate": 4.1062801932367154e-07, |
|
"loss": 0.7488, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.9234365867913501, |
|
"grad_norm": 3.958102022071496, |
|
"learning_rate": 3.9855072463768114e-07, |
|
"loss": 0.7436, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.9257744009351256, |
|
"grad_norm": 4.3569068621437745, |
|
"learning_rate": 3.8647342995169085e-07, |
|
"loss": 0.7323, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.9281122150789012, |
|
"grad_norm": 3.9242746982918777, |
|
"learning_rate": 3.743961352657005e-07, |
|
"loss": 0.7255, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.9304500292226768, |
|
"grad_norm": 3.91121815410949, |
|
"learning_rate": 3.623188405797102e-07, |
|
"loss": 0.7471, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.9327878433664524, |
|
"grad_norm": 3.973005041304068, |
|
"learning_rate": 3.5024154589371985e-07, |
|
"loss": 0.6823, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.9351256575102279, |
|
"grad_norm": 3.988161090830406, |
|
"learning_rate": 3.3816425120772945e-07, |
|
"loss": 0.6871, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9374634716540035, |
|
"grad_norm": 4.296337191130102, |
|
"learning_rate": 3.2608695652173915e-07, |
|
"loss": 0.7236, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.9398012857977791, |
|
"grad_norm": 4.3179225277967515, |
|
"learning_rate": 3.140096618357488e-07, |
|
"loss": 0.7582, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.9421390999415546, |
|
"grad_norm": 4.191674727829652, |
|
"learning_rate": 3.0193236714975846e-07, |
|
"loss": 0.7238, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.9444769140853302, |
|
"grad_norm": 3.8257966103380765, |
|
"learning_rate": 2.8985507246376816e-07, |
|
"loss": 0.7475, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.9468147282291058, |
|
"grad_norm": 4.06630469936539, |
|
"learning_rate": 2.7777777777777776e-07, |
|
"loss": 0.7109, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.9491525423728814, |
|
"grad_norm": 4.583718694034358, |
|
"learning_rate": 2.6570048309178746e-07, |
|
"loss": 0.7623, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.9514903565166569, |
|
"grad_norm": 3.9553370864295694, |
|
"learning_rate": 2.536231884057971e-07, |
|
"loss": 0.7911, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.9538281706604325, |
|
"grad_norm": 4.221184826167876, |
|
"learning_rate": 2.4154589371980677e-07, |
|
"loss": 0.7322, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.9561659848042081, |
|
"grad_norm": 4.196761181297048, |
|
"learning_rate": 2.2946859903381647e-07, |
|
"loss": 0.7476, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.9585037989479837, |
|
"grad_norm": 4.185489684411542, |
|
"learning_rate": 2.173913043478261e-07, |
|
"loss": 0.7548, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.9608416130917592, |
|
"grad_norm": 4.371686498083367, |
|
"learning_rate": 2.0531400966183577e-07, |
|
"loss": 0.7328, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.9631794272355347, |
|
"grad_norm": 4.314986686818614, |
|
"learning_rate": 1.9323671497584542e-07, |
|
"loss": 0.7304, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.9655172413793104, |
|
"grad_norm": 3.9822912414587806, |
|
"learning_rate": 1.811594202898551e-07, |
|
"loss": 0.7395, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.9678550555230859, |
|
"grad_norm": 4.218523033535868, |
|
"learning_rate": 1.6908212560386473e-07, |
|
"loss": 0.7302, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.9701928696668615, |
|
"grad_norm": 4.092187481356195, |
|
"learning_rate": 1.570048309178744e-07, |
|
"loss": 0.7351, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.972530683810637, |
|
"grad_norm": 4.184125537002853, |
|
"learning_rate": 1.4492753623188408e-07, |
|
"loss": 0.7413, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.9748684979544127, |
|
"grad_norm": 3.889649663413063, |
|
"learning_rate": 1.3285024154589373e-07, |
|
"loss": 0.7365, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.9772063120981882, |
|
"grad_norm": 4.139378543594781, |
|
"learning_rate": 1.2077294685990338e-07, |
|
"loss": 0.7626, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.9795441262419637, |
|
"grad_norm": 4.016007817051792, |
|
"learning_rate": 1.0869565217391305e-07, |
|
"loss": 0.7428, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.9818819403857393, |
|
"grad_norm": 4.31935746465498, |
|
"learning_rate": 9.661835748792271e-08, |
|
"loss": 0.7886, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9842197545295149, |
|
"grad_norm": 4.305755648868578, |
|
"learning_rate": 8.454106280193236e-08, |
|
"loss": 0.7552, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.9865575686732905, |
|
"grad_norm": 4.324910095691635, |
|
"learning_rate": 7.246376811594204e-08, |
|
"loss": 0.7465, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.988895382817066, |
|
"grad_norm": 3.694300442393254, |
|
"learning_rate": 6.038647342995169e-08, |
|
"loss": 0.7093, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.9912331969608417, |
|
"grad_norm": 3.829444377626212, |
|
"learning_rate": 4.8309178743961356e-08, |
|
"loss": 0.7241, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.9935710111046172, |
|
"grad_norm": 4.179549227414933, |
|
"learning_rate": 3.623188405797102e-08, |
|
"loss": 0.7663, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9959088252483927, |
|
"grad_norm": 3.832323286806212, |
|
"learning_rate": 2.4154589371980678e-08, |
|
"loss": 0.7859, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.9982466393921683, |
|
"grad_norm": 4.123264294362188, |
|
"learning_rate": 1.2077294685990339e-08, |
|
"loss": 0.7678, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.9982466393921683, |
|
"step": 427, |
|
"total_flos": 77746305761280.0, |
|
"train_loss": 0.8145518043281323, |
|
"train_runtime": 5184.5045, |
|
"train_samples_per_second": 10.56, |
|
"train_steps_per_second": 0.082 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 427, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 77746305761280.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|