{ "best_metric": 0.29094862937927246, "best_model_checkpoint": "miner_id_24/checkpoint-150", "epoch": 0.7761966364812419, "eval_steps": 25, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00517464424320828, "grad_norm": 137.61221313476562, "learning_rate": 5.555555555555556e-06, "loss": 45.3923, "step": 1 }, { "epoch": 0.00517464424320828, "eval_loss": 0.879862904548645, "eval_runtime": 2.4641, "eval_samples_per_second": 20.291, "eval_steps_per_second": 5.276, "step": 1 }, { "epoch": 0.01034928848641656, "grad_norm": 145.64358520507812, "learning_rate": 1.1111111111111112e-05, "loss": 47.4644, "step": 2 }, { "epoch": 0.015523932729624839, "grad_norm": 143.3395233154297, "learning_rate": 1.6666666666666667e-05, "loss": 46.5953, "step": 3 }, { "epoch": 0.02069857697283312, "grad_norm": 131.87200927734375, "learning_rate": 2.2222222222222223e-05, "loss": 46.5328, "step": 4 }, { "epoch": 0.0258732212160414, "grad_norm": 114.46612548828125, "learning_rate": 2.777777777777778e-05, "loss": 44.6468, "step": 5 }, { "epoch": 0.031047865459249677, "grad_norm": 86.16083526611328, "learning_rate": 3.3333333333333335e-05, "loss": 40.958, "step": 6 }, { "epoch": 0.03622250970245795, "grad_norm": 135.6624755859375, "learning_rate": 3.888888888888889e-05, "loss": 39.1718, "step": 7 }, { "epoch": 0.04139715394566624, "grad_norm": 88.87834167480469, "learning_rate": 4.4444444444444447e-05, "loss": 35.5996, "step": 8 }, { "epoch": 0.04657179818887452, "grad_norm": 134.31365966796875, "learning_rate": 5e-05, "loss": 44.2182, "step": 9 }, { "epoch": 0.0517464424320828, "grad_norm": 112.15583038330078, "learning_rate": 5.555555555555556e-05, "loss": 40.9168, "step": 10 }, { "epoch": 0.056921086675291076, "grad_norm": 116.23684692382812, "learning_rate": 6.111111111111112e-05, "loss": 40.278, "step": 11 }, { "epoch": 0.062095730918499355, "grad_norm": 91.58599853515625, "learning_rate": 6.666666666666667e-05, "loss": 39.5289, "step": 12 }, { "epoch": 0.06727037516170763, "grad_norm": 84.65013122558594, "learning_rate": 7.222222222222222e-05, "loss": 34.301, "step": 13 }, { "epoch": 0.0724450194049159, "grad_norm": 63.37664031982422, "learning_rate": 7.777777777777778e-05, "loss": 30.2999, "step": 14 }, { "epoch": 0.07761966364812418, "grad_norm": 39.683807373046875, "learning_rate": 8.333333333333334e-05, "loss": 29.2968, "step": 15 }, { "epoch": 0.08279430789133248, "grad_norm": 31.897132873535156, "learning_rate": 8.888888888888889e-05, "loss": 28.7925, "step": 16 }, { "epoch": 0.08796895213454076, "grad_norm": 34.73341751098633, "learning_rate": 9.444444444444444e-05, "loss": 28.2201, "step": 17 }, { "epoch": 0.09314359637774904, "grad_norm": 33.116825103759766, "learning_rate": 0.0001, "loss": 27.4134, "step": 18 }, { "epoch": 0.09831824062095731, "grad_norm": 29.32647132873535, "learning_rate": 9.999884773765534e-05, "loss": 27.2748, "step": 19 }, { "epoch": 0.1034928848641656, "grad_norm": 34.262603759765625, "learning_rate": 9.999539100963065e-05, "loss": 26.7995, "step": 20 }, { "epoch": 0.10866752910737387, "grad_norm": 34.93130111694336, "learning_rate": 9.998962999295068e-05, "loss": 30.0288, "step": 21 }, { "epoch": 0.11384217335058215, "grad_norm": 45.51692581176758, "learning_rate": 9.998156498264669e-05, "loss": 32.0179, "step": 22 }, { "epoch": 0.11901681759379043, "grad_norm": 53.52436828613281, "learning_rate": 9.997119639174122e-05, "loss": 31.3302, "step": 23 }, { "epoch": 0.12419146183699871, "grad_norm": 53.14868927001953, "learning_rate": 9.995852475122702e-05, "loss": 31.3536, "step": 24 }, { "epoch": 0.129366106080207, "grad_norm": 55.426658630371094, "learning_rate": 9.994355071003984e-05, "loss": 30.1863, "step": 25 }, { "epoch": 0.129366106080207, "eval_loss": 0.4283739924430847, "eval_runtime": 2.4658, "eval_samples_per_second": 20.277, "eval_steps_per_second": 5.272, "step": 25 }, { "epoch": 0.13454075032341525, "grad_norm": 59.29961395263672, "learning_rate": 9.992627503502517e-05, "loss": 25.8217, "step": 26 }, { "epoch": 0.13971539456662355, "grad_norm": 32.18626403808594, "learning_rate": 9.990669861089904e-05, "loss": 24.9423, "step": 27 }, { "epoch": 0.1448900388098318, "grad_norm": 42.07598876953125, "learning_rate": 9.988482244020256e-05, "loss": 24.4099, "step": 28 }, { "epoch": 0.1500646830530401, "grad_norm": 32.522953033447266, "learning_rate": 9.986064764325079e-05, "loss": 23.9485, "step": 29 }, { "epoch": 0.15523932729624837, "grad_norm": 32.758941650390625, "learning_rate": 9.983417545807521e-05, "loss": 23.9998, "step": 30 }, { "epoch": 0.16041397153945666, "grad_norm": 30.58071517944336, "learning_rate": 9.980540724036031e-05, "loss": 23.4842, "step": 31 }, { "epoch": 0.16558861578266496, "grad_norm": 29.740413665771484, "learning_rate": 9.977434446337431e-05, "loss": 23.3358, "step": 32 }, { "epoch": 0.17076326002587322, "grad_norm": 31.934289932250977, "learning_rate": 9.974098871789359e-05, "loss": 24.1982, "step": 33 }, { "epoch": 0.1759379042690815, "grad_norm": 52.297630310058594, "learning_rate": 9.970534171212117e-05, "loss": 29.5956, "step": 34 }, { "epoch": 0.18111254851228978, "grad_norm": 44.05955123901367, "learning_rate": 9.966740527159945e-05, "loss": 28.9523, "step": 35 }, { "epoch": 0.18628719275549807, "grad_norm": 48.28145217895508, "learning_rate": 9.962718133911648e-05, "loss": 27.3425, "step": 36 }, { "epoch": 0.19146183699870634, "grad_norm": 50.967891693115234, "learning_rate": 9.958467197460662e-05, "loss": 28.0913, "step": 37 }, { "epoch": 0.19663648124191463, "grad_norm": 48.36716842651367, "learning_rate": 9.953987935504497e-05, "loss": 24.9412, "step": 38 }, { "epoch": 0.2018111254851229, "grad_norm": 34.47467803955078, "learning_rate": 9.949280577433593e-05, "loss": 22.663, "step": 39 }, { "epoch": 0.2069857697283312, "grad_norm": 24.475858688354492, "learning_rate": 9.944345364319571e-05, "loss": 22.2558, "step": 40 }, { "epoch": 0.21216041397153945, "grad_norm": 24.5921630859375, "learning_rate": 9.939182548902883e-05, "loss": 23.2109, "step": 41 }, { "epoch": 0.21733505821474774, "grad_norm": 22.380992889404297, "learning_rate": 9.933792395579877e-05, "loss": 22.2931, "step": 42 }, { "epoch": 0.222509702457956, "grad_norm": 27.86241340637207, "learning_rate": 9.928175180389254e-05, "loss": 21.9427, "step": 43 }, { "epoch": 0.2276843467011643, "grad_norm": 25.076570510864258, "learning_rate": 9.922331190997922e-05, "loss": 21.5344, "step": 44 }, { "epoch": 0.23285899094437257, "grad_norm": 26.751014709472656, "learning_rate": 9.916260726686278e-05, "loss": 21.4259, "step": 45 }, { "epoch": 0.23803363518758086, "grad_norm": 34.77674865722656, "learning_rate": 9.909964098332879e-05, "loss": 24.7821, "step": 46 }, { "epoch": 0.24320827943078913, "grad_norm": 45.179378509521484, "learning_rate": 9.903441628398511e-05, "loss": 25.4269, "step": 47 }, { "epoch": 0.24838292367399742, "grad_norm": 49.48162078857422, "learning_rate": 9.896693650909686e-05, "loss": 26.9395, "step": 48 }, { "epoch": 0.2535575679172057, "grad_norm": 51.22860336303711, "learning_rate": 9.889720511441532e-05, "loss": 24.3481, "step": 49 }, { "epoch": 0.258732212160414, "grad_norm": 45.01136779785156, "learning_rate": 9.882522567100093e-05, "loss": 25.6337, "step": 50 }, { "epoch": 0.258732212160414, "eval_loss": 0.3623690903186798, "eval_runtime": 2.4658, "eval_samples_per_second": 20.278, "eval_steps_per_second": 5.272, "step": 50 }, { "epoch": 0.26390685640362227, "grad_norm": 34.13395690917969, "learning_rate": 9.875100186504046e-05, "loss": 22.0404, "step": 51 }, { "epoch": 0.2690815006468305, "grad_norm": 25.99323272705078, "learning_rate": 9.867453749765821e-05, "loss": 21.5824, "step": 52 }, { "epoch": 0.2742561448900388, "grad_norm": 25.306379318237305, "learning_rate": 9.859583648472133e-05, "loss": 21.0068, "step": 53 }, { "epoch": 0.2794307891332471, "grad_norm": 24.23813247680664, "learning_rate": 9.851490285663937e-05, "loss": 20.8086, "step": 54 }, { "epoch": 0.2846054333764554, "grad_norm": 26.623321533203125, "learning_rate": 9.84317407581577e-05, "loss": 20.7912, "step": 55 }, { "epoch": 0.2897800776196636, "grad_norm": 24.135229110717773, "learning_rate": 9.834635444814545e-05, "loss": 21.1925, "step": 56 }, { "epoch": 0.2949547218628719, "grad_norm": 23.138614654541016, "learning_rate": 9.825874829937722e-05, "loss": 20.337, "step": 57 }, { "epoch": 0.3001293661060802, "grad_norm": 25.857755661010742, "learning_rate": 9.816892679830937e-05, "loss": 20.4409, "step": 58 }, { "epoch": 0.3053040103492885, "grad_norm": 51.40500259399414, "learning_rate": 9.807689454485e-05, "loss": 27.1275, "step": 59 }, { "epoch": 0.31047865459249674, "grad_norm": 46.01802444458008, "learning_rate": 9.798265625212358e-05, "loss": 24.8525, "step": 60 }, { "epoch": 0.31565329883570503, "grad_norm": 44.5273323059082, "learning_rate": 9.788621674622949e-05, "loss": 25.6639, "step": 61 }, { "epoch": 0.3208279430789133, "grad_norm": 55.26118469238281, "learning_rate": 9.778758096599488e-05, "loss": 24.8307, "step": 62 }, { "epoch": 0.3260025873221216, "grad_norm": 39.436317443847656, "learning_rate": 9.76867539627218e-05, "loss": 23.664, "step": 63 }, { "epoch": 0.3311772315653299, "grad_norm": 27.185758590698242, "learning_rate": 9.758374089992841e-05, "loss": 20.8368, "step": 64 }, { "epoch": 0.33635187580853815, "grad_norm": 24.435218811035156, "learning_rate": 9.747854705308464e-05, "loss": 20.0758, "step": 65 }, { "epoch": 0.34152652005174644, "grad_norm": 24.6842041015625, "learning_rate": 9.737117780934197e-05, "loss": 20.2967, "step": 66 }, { "epoch": 0.34670116429495473, "grad_norm": 24.836183547973633, "learning_rate": 9.726163866725763e-05, "loss": 19.7435, "step": 67 }, { "epoch": 0.351875808538163, "grad_norm": 23.984460830688477, "learning_rate": 9.714993523651283e-05, "loss": 19.9251, "step": 68 }, { "epoch": 0.35705045278137126, "grad_norm": 24.694488525390625, "learning_rate": 9.703607323762569e-05, "loss": 19.8217, "step": 69 }, { "epoch": 0.36222509702457956, "grad_norm": 24.874622344970703, "learning_rate": 9.692005850165816e-05, "loss": 20.1879, "step": 70 }, { "epoch": 0.36739974126778785, "grad_norm": 27.704814910888672, "learning_rate": 9.680189696991742e-05, "loss": 21.443, "step": 71 }, { "epoch": 0.37257438551099614, "grad_norm": 47.33317565917969, "learning_rate": 9.668159469365163e-05, "loss": 24.7285, "step": 72 }, { "epoch": 0.3777490297542044, "grad_norm": 39.55653762817383, "learning_rate": 9.655915783374005e-05, "loss": 23.3597, "step": 73 }, { "epoch": 0.3829236739974127, "grad_norm": 41.66143798828125, "learning_rate": 9.643459266037744e-05, "loss": 22.6492, "step": 74 }, { "epoch": 0.38809831824062097, "grad_norm": 61.841583251953125, "learning_rate": 9.630790555275313e-05, "loss": 23.7358, "step": 75 }, { "epoch": 0.38809831824062097, "eval_loss": 0.3401211202144623, "eval_runtime": 2.4648, "eval_samples_per_second": 20.286, "eval_steps_per_second": 5.274, "step": 75 }, { "epoch": 0.39327296248382926, "grad_norm": 47.54359817504883, "learning_rate": 9.617910299872416e-05, "loss": 20.79, "step": 76 }, { "epoch": 0.3984476067270375, "grad_norm": 28.126712799072266, "learning_rate": 9.604819159448309e-05, "loss": 20.1379, "step": 77 }, { "epoch": 0.4036222509702458, "grad_norm": 29.2869930267334, "learning_rate": 9.591517804422023e-05, "loss": 19.5183, "step": 78 }, { "epoch": 0.4087968952134541, "grad_norm": 30.21017074584961, "learning_rate": 9.578006915978022e-05, "loss": 19.2306, "step": 79 }, { "epoch": 0.4139715394566624, "grad_norm": 23.017995834350586, "learning_rate": 9.564287186031333e-05, "loss": 19.4107, "step": 80 }, { "epoch": 0.4191461836998706, "grad_norm": 24.11803436279297, "learning_rate": 9.550359317192096e-05, "loss": 19.2999, "step": 81 }, { "epoch": 0.4243208279430789, "grad_norm": 25.78137969970703, "learning_rate": 9.536224022729591e-05, "loss": 19.0366, "step": 82 }, { "epoch": 0.4294954721862872, "grad_norm": 23.27781867980957, "learning_rate": 9.521882026535708e-05, "loss": 18.6447, "step": 83 }, { "epoch": 0.4346701164294955, "grad_norm": 43.739017486572266, "learning_rate": 9.50733406308788e-05, "loss": 23.4598, "step": 84 }, { "epoch": 0.4398447606727037, "grad_norm": 39.98222351074219, "learning_rate": 9.492580877411456e-05, "loss": 22.9503, "step": 85 }, { "epoch": 0.445019404915912, "grad_norm": 39.081974029541016, "learning_rate": 9.477623225041565e-05, "loss": 23.2451, "step": 86 }, { "epoch": 0.4501940491591203, "grad_norm": 45.064517974853516, "learning_rate": 9.462461871984411e-05, "loss": 22.8303, "step": 87 }, { "epoch": 0.4553686934023286, "grad_norm": 35.90961456298828, "learning_rate": 9.447097594678046e-05, "loss": 20.3165, "step": 88 }, { "epoch": 0.46054333764553684, "grad_norm": 29.820219039916992, "learning_rate": 9.431531179952613e-05, "loss": 19.7648, "step": 89 }, { "epoch": 0.46571798188874514, "grad_norm": 24.659387588500977, "learning_rate": 9.415763424990047e-05, "loss": 19.644, "step": 90 }, { "epoch": 0.47089262613195343, "grad_norm": 23.654937744140625, "learning_rate": 9.39979513728325e-05, "loss": 18.7199, "step": 91 }, { "epoch": 0.4760672703751617, "grad_norm": 25.759111404418945, "learning_rate": 9.383627134594741e-05, "loss": 19.023, "step": 92 }, { "epoch": 0.48124191461837, "grad_norm": 26.290327072143555, "learning_rate": 9.367260244914768e-05, "loss": 19.474, "step": 93 }, { "epoch": 0.48641655886157825, "grad_norm": 22.56080436706543, "learning_rate": 9.350695306418922e-05, "loss": 18.6015, "step": 94 }, { "epoch": 0.49159120310478654, "grad_norm": 24.438447952270508, "learning_rate": 9.333933167425194e-05, "loss": 18.6668, "step": 95 }, { "epoch": 0.49676584734799484, "grad_norm": 26.50576400756836, "learning_rate": 9.316974686350542e-05, "loss": 20.2677, "step": 96 }, { "epoch": 0.5019404915912031, "grad_norm": 34.5866584777832, "learning_rate": 9.299820731666933e-05, "loss": 22.8894, "step": 97 }, { "epoch": 0.5071151358344114, "grad_norm": 37.455406188964844, "learning_rate": 9.282472181856854e-05, "loss": 22.4006, "step": 98 }, { "epoch": 0.5122897800776197, "grad_norm": 42.7640380859375, "learning_rate": 9.264929925368338e-05, "loss": 22.2345, "step": 99 }, { "epoch": 0.517464424320828, "grad_norm": 38.7821044921875, "learning_rate": 9.247194860569454e-05, "loss": 20.2589, "step": 100 }, { "epoch": 0.517464424320828, "eval_loss": 0.30999085307121277, "eval_runtime": 2.4658, "eval_samples_per_second": 20.278, "eval_steps_per_second": 5.272, "step": 100 }, { "epoch": 0.5226390685640362, "grad_norm": 36.86570739746094, "learning_rate": 9.229267895702307e-05, "loss": 19.0464, "step": 101 }, { "epoch": 0.5278137128072445, "grad_norm": 25.674314498901367, "learning_rate": 9.211149948836523e-05, "loss": 18.7845, "step": 102 }, { "epoch": 0.5329883570504528, "grad_norm": 26.45655632019043, "learning_rate": 9.192841947822232e-05, "loss": 18.6392, "step": 103 }, { "epoch": 0.538163001293661, "grad_norm": 21.524782180786133, "learning_rate": 9.17434483024255e-05, "loss": 17.6237, "step": 104 }, { "epoch": 0.5433376455368694, "grad_norm": 22.33011817932129, "learning_rate": 9.155659543365574e-05, "loss": 18.1102, "step": 105 }, { "epoch": 0.5485122897800776, "grad_norm": 25.29583168029785, "learning_rate": 9.136787044095856e-05, "loss": 17.8555, "step": 106 }, { "epoch": 0.553686934023286, "grad_norm": 23.13666343688965, "learning_rate": 9.117728298925407e-05, "loss": 17.7725, "step": 107 }, { "epoch": 0.5588615782664942, "grad_norm": 29.024078369140625, "learning_rate": 9.0984842838842e-05, "loss": 19.2546, "step": 108 }, { "epoch": 0.5640362225097024, "grad_norm": 50.17615509033203, "learning_rate": 9.079055984490186e-05, "loss": 23.3918, "step": 109 }, { "epoch": 0.5692108667529108, "grad_norm": 35.480079650878906, "learning_rate": 9.059444395698823e-05, "loss": 22.019, "step": 110 }, { "epoch": 0.574385510996119, "grad_norm": 38.16156005859375, "learning_rate": 9.039650521852124e-05, "loss": 20.7054, "step": 111 }, { "epoch": 0.5795601552393272, "grad_norm": 46.26918411254883, "learning_rate": 9.019675376627223e-05, "loss": 22.4658, "step": 112 }, { "epoch": 0.5847347994825356, "grad_norm": 39.86201095581055, "learning_rate": 8.99951998298446e-05, "loss": 19.7684, "step": 113 }, { "epoch": 0.5899094437257438, "grad_norm": 31.37214469909668, "learning_rate": 8.979185373114996e-05, "loss": 18.5229, "step": 114 }, { "epoch": 0.5950840879689522, "grad_norm": 23.724746704101562, "learning_rate": 8.958672588387953e-05, "loss": 18.7853, "step": 115 }, { "epoch": 0.6002587322121604, "grad_norm": 20.492137908935547, "learning_rate": 8.937982679297084e-05, "loss": 17.696, "step": 116 }, { "epoch": 0.6054333764553687, "grad_norm": 21.25743293762207, "learning_rate": 8.917116705406973e-05, "loss": 18.5685, "step": 117 }, { "epoch": 0.610608020698577, "grad_norm": 20.935260772705078, "learning_rate": 8.89607573529878e-05, "loss": 17.7144, "step": 118 }, { "epoch": 0.6157826649417852, "grad_norm": 22.177967071533203, "learning_rate": 8.8748608465155e-05, "loss": 17.7657, "step": 119 }, { "epoch": 0.6209573091849935, "grad_norm": 23.44055938720703, "learning_rate": 8.853473125506803e-05, "loss": 17.5972, "step": 120 }, { "epoch": 0.6261319534282018, "grad_norm": 28.071083068847656, "learning_rate": 8.831913667573379e-05, "loss": 20.4785, "step": 121 }, { "epoch": 0.6313065976714101, "grad_norm": 41.40524673461914, "learning_rate": 8.810183576810856e-05, "loss": 22.6656, "step": 122 }, { "epoch": 0.6364812419146184, "grad_norm": 35.51282501220703, "learning_rate": 8.788283966053244e-05, "loss": 20.7936, "step": 123 }, { "epoch": 0.6416558861578266, "grad_norm": 36.20588302612305, "learning_rate": 8.766215956815959e-05, "loss": 20.6696, "step": 124 }, { "epoch": 0.6468305304010349, "grad_norm": 45.860923767089844, "learning_rate": 8.743980679238385e-05, "loss": 20.0276, "step": 125 }, { "epoch": 0.6468305304010349, "eval_loss": 0.3056069612503052, "eval_runtime": 2.4651, "eval_samples_per_second": 20.283, "eval_steps_per_second": 5.274, "step": 125 }, { "epoch": 0.6520051746442432, "grad_norm": 29.0579891204834, "learning_rate": 8.721579272025989e-05, "loss": 18.0009, "step": 126 }, { "epoch": 0.6571798188874515, "grad_norm": 24.840801239013672, "learning_rate": 8.699012882392018e-05, "loss": 18.2828, "step": 127 }, { "epoch": 0.6623544631306598, "grad_norm": 21.309526443481445, "learning_rate": 8.676282665998736e-05, "loss": 17.479, "step": 128 }, { "epoch": 0.6675291073738681, "grad_norm": 19.316205978393555, "learning_rate": 8.653389786898255e-05, "loss": 17.4983, "step": 129 }, { "epoch": 0.6727037516170763, "grad_norm": 19.56000328063965, "learning_rate": 8.630335417472909e-05, "loss": 17.2595, "step": 130 }, { "epoch": 0.6778783958602846, "grad_norm": 21.257116317749023, "learning_rate": 8.607120738375219e-05, "loss": 17.2479, "step": 131 }, { "epoch": 0.6830530401034929, "grad_norm": 22.04469108581543, "learning_rate": 8.583746938467436e-05, "loss": 17.3352, "step": 132 }, { "epoch": 0.6882276843467011, "grad_norm": 22.032026290893555, "learning_rate": 8.560215214760647e-05, "loss": 17.1702, "step": 133 }, { "epoch": 0.6934023285899095, "grad_norm": 35.43321228027344, "learning_rate": 8.53652677235348e-05, "loss": 21.7454, "step": 134 }, { "epoch": 0.6985769728331177, "grad_norm": 35.82448196411133, "learning_rate": 8.512682824370386e-05, "loss": 21.3486, "step": 135 }, { "epoch": 0.703751617076326, "grad_norm": 33.74680709838867, "learning_rate": 8.48868459189952e-05, "loss": 19.9599, "step": 136 }, { "epoch": 0.7089262613195343, "grad_norm": 34.64817428588867, "learning_rate": 8.464533303930195e-05, "loss": 20.4086, "step": 137 }, { "epoch": 0.7141009055627425, "grad_norm": 30.78453826904297, "learning_rate": 8.440230197289955e-05, "loss": 19.2218, "step": 138 }, { "epoch": 0.7192755498059509, "grad_norm": 21.971385955810547, "learning_rate": 8.415776516581229e-05, "loss": 17.5228, "step": 139 }, { "epoch": 0.7244501940491591, "grad_norm": 25.183547973632812, "learning_rate": 8.391173514117591e-05, "loss": 17.9851, "step": 140 }, { "epoch": 0.7296248382923674, "grad_norm": 21.053089141845703, "learning_rate": 8.366422449859635e-05, "loss": 17.2393, "step": 141 }, { "epoch": 0.7347994825355757, "grad_norm": 20.95396614074707, "learning_rate": 8.34152459135044e-05, "loss": 16.9388, "step": 142 }, { "epoch": 0.7399741267787839, "grad_norm": 20.432722091674805, "learning_rate": 8.316481213650668e-05, "loss": 16.9324, "step": 143 }, { "epoch": 0.7451487710219923, "grad_norm": 21.420108795166016, "learning_rate": 8.291293599273253e-05, "loss": 17.3103, "step": 144 }, { "epoch": 0.7503234152652005, "grad_norm": 22.44611167907715, "learning_rate": 8.265963038117736e-05, "loss": 16.4267, "step": 145 }, { "epoch": 0.7554980595084088, "grad_norm": 29.85533905029297, "learning_rate": 8.240490827404196e-05, "loss": 19.9061, "step": 146 }, { "epoch": 0.7606727037516171, "grad_norm": 36.1021614074707, "learning_rate": 8.21487827160682e-05, "loss": 21.6027, "step": 147 }, { "epoch": 0.7658473479948253, "grad_norm": 35.79337692260742, "learning_rate": 8.189126682387103e-05, "loss": 21.6038, "step": 148 }, { "epoch": 0.7710219922380336, "grad_norm": 39.69446563720703, "learning_rate": 8.163237378526669e-05, "loss": 21.0512, "step": 149 }, { "epoch": 0.7761966364812419, "grad_norm": 37.87336730957031, "learning_rate": 8.137211685859739e-05, "loss": 20.3793, "step": 150 }, { "epoch": 0.7761966364812419, "eval_loss": 0.29094862937927246, "eval_runtime": 2.466, "eval_samples_per_second": 20.276, "eval_steps_per_second": 5.272, "step": 150 } ], "logging_steps": 1, "max_steps": 457, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.500900642665267e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }