{ "best_metric": 11.907214164733887, "best_model_checkpoint": "miner_id_24/checkpoint-100", "epoch": 1.2698412698412698, "eval_steps": 50, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.012698412698412698, "grad_norm": 0.06955482065677643, "learning_rate": 5.000000000000001e-07, "loss": 11.9294, "step": 1 }, { "epoch": 0.012698412698412698, "eval_loss": 11.907544136047363, "eval_runtime": 0.7222, "eval_samples_per_second": 184.151, "eval_steps_per_second": 47.076, "step": 1 }, { "epoch": 0.025396825396825397, "grad_norm": 0.07735534012317657, "learning_rate": 1.0000000000000002e-06, "loss": 11.9145, "step": 2 }, { "epoch": 0.0380952380952381, "grad_norm": 0.07554805278778076, "learning_rate": 1.5e-06, "loss": 11.9167, "step": 3 }, { "epoch": 0.050793650793650794, "grad_norm": 0.061444349586963654, "learning_rate": 2.0000000000000003e-06, "loss": 11.9136, "step": 4 }, { "epoch": 0.06349206349206349, "grad_norm": 0.06736243516206741, "learning_rate": 2.5e-06, "loss": 11.9108, "step": 5 }, { "epoch": 0.0761904761904762, "grad_norm": 0.06474306434392929, "learning_rate": 3e-06, "loss": 11.9145, "step": 6 }, { "epoch": 0.08888888888888889, "grad_norm": 0.07440496981143951, "learning_rate": 3.5e-06, "loss": 11.903, "step": 7 }, { "epoch": 0.10158730158730159, "grad_norm": 0.07693646103143692, "learning_rate": 4.000000000000001e-06, "loss": 11.9106, "step": 8 }, { "epoch": 0.11428571428571428, "grad_norm": 0.08138622343540192, "learning_rate": 4.5e-06, "loss": 11.9101, "step": 9 }, { "epoch": 0.12698412698412698, "grad_norm": 0.07329347729682922, "learning_rate": 5e-06, "loss": 11.9046, "step": 10 }, { "epoch": 0.13968253968253969, "grad_norm": 0.09403933584690094, "learning_rate": 4.99847706754774e-06, "loss": 11.9114, "step": 11 }, { "epoch": 0.1523809523809524, "grad_norm": 0.09672688692808151, "learning_rate": 4.993910125649561e-06, "loss": 11.9026, "step": 12 }, { "epoch": 0.16507936507936508, "grad_norm": 0.0922018513083458, "learning_rate": 4.986304738420684e-06, "loss": 11.9029, "step": 13 }, { "epoch": 0.17777777777777778, "grad_norm": 0.09022434055805206, "learning_rate": 4.975670171853926e-06, "loss": 11.9119, "step": 14 }, { "epoch": 0.19047619047619047, "grad_norm": 0.0931537076830864, "learning_rate": 4.962019382530521e-06, "loss": 11.9057, "step": 15 }, { "epoch": 0.20317460317460317, "grad_norm": 0.12171955406665802, "learning_rate": 4.9453690018345144e-06, "loss": 11.8854, "step": 16 }, { "epoch": 0.21587301587301588, "grad_norm": 0.11611567437648773, "learning_rate": 4.925739315689991e-06, "loss": 11.8884, "step": 17 }, { "epoch": 0.22857142857142856, "grad_norm": 0.15797491371631622, "learning_rate": 4.903154239845798e-06, "loss": 11.8931, "step": 18 }, { "epoch": 0.24126984126984127, "grad_norm": 0.19213564693927765, "learning_rate": 4.8776412907378845e-06, "loss": 11.8706, "step": 19 }, { "epoch": 0.25396825396825395, "grad_norm": 0.06431716680526733, "learning_rate": 4.849231551964771e-06, "loss": 11.9236, "step": 20 }, { "epoch": 0.26666666666666666, "grad_norm": 0.06191357225179672, "learning_rate": 4.817959636416969e-06, "loss": 11.911, "step": 21 }, { "epoch": 0.27936507936507937, "grad_norm": 0.059085745364427567, "learning_rate": 4.783863644106502e-06, "loss": 11.9261, "step": 22 }, { "epoch": 0.2920634920634921, "grad_norm": 0.06924884766340256, "learning_rate": 4.746985115747918e-06, "loss": 11.916, "step": 23 }, { "epoch": 0.3047619047619048, "grad_norm": 0.08857474476099014, "learning_rate": 4.707368982147318e-06, "loss": 11.9093, "step": 24 }, { "epoch": 0.31746031746031744, "grad_norm": 0.08003026992082596, "learning_rate": 4.665063509461098e-06, "loss": 11.9074, "step": 25 }, { "epoch": 0.33015873015873015, "grad_norm": 0.07391119003295898, "learning_rate": 4.620120240391065e-06, "loss": 11.9155, "step": 26 }, { "epoch": 0.34285714285714286, "grad_norm": 0.09267326444387436, "learning_rate": 4.572593931387604e-06, "loss": 11.9004, "step": 27 }, { "epoch": 0.35555555555555557, "grad_norm": 0.073005810379982, "learning_rate": 4.522542485937369e-06, "loss": 11.9071, "step": 28 }, { "epoch": 0.3682539682539683, "grad_norm": 0.0769127830862999, "learning_rate": 4.470026884016805e-06, "loss": 11.9119, "step": 29 }, { "epoch": 0.38095238095238093, "grad_norm": 0.06750453263521194, "learning_rate": 4.415111107797445e-06, "loss": 11.9088, "step": 30 }, { "epoch": 0.39365079365079364, "grad_norm": 0.08498089015483856, "learning_rate": 4.357862063693486e-06, "loss": 11.9037, "step": 31 }, { "epoch": 0.40634920634920635, "grad_norm": 0.0716579332947731, "learning_rate": 4.2983495008466285e-06, "loss": 11.9094, "step": 32 }, { "epoch": 0.41904761904761906, "grad_norm": 0.07841960340738297, "learning_rate": 4.236645926147493e-06, "loss": 11.9058, "step": 33 }, { "epoch": 0.43174603174603177, "grad_norm": 0.0928238183259964, "learning_rate": 4.172826515897146e-06, "loss": 11.9035, "step": 34 }, { "epoch": 0.4444444444444444, "grad_norm": 0.0969252958893776, "learning_rate": 4.106969024216348e-06, "loss": 11.9045, "step": 35 }, { "epoch": 0.45714285714285713, "grad_norm": 0.09704837203025818, "learning_rate": 4.039153688314146e-06, "loss": 11.9084, "step": 36 }, { "epoch": 0.46984126984126984, "grad_norm": 0.16046535968780518, "learning_rate": 3.969463130731183e-06, "loss": 11.8861, "step": 37 }, { "epoch": 0.48253968253968255, "grad_norm": 0.19013437628746033, "learning_rate": 3.897982258676867e-06, "loss": 11.8702, "step": 38 }, { "epoch": 0.49523809523809526, "grad_norm": 0.06615746766328812, "learning_rate": 3.824798160583012e-06, "loss": 11.926, "step": 39 }, { "epoch": 0.5079365079365079, "grad_norm": 0.07797185331583023, "learning_rate": 3.7500000000000005e-06, "loss": 11.9191, "step": 40 }, { "epoch": 0.5206349206349207, "grad_norm": 0.05537969619035721, "learning_rate": 3.6736789069647273e-06, "loss": 11.9241, "step": 41 }, { "epoch": 0.5333333333333333, "grad_norm": 0.06839220970869064, "learning_rate": 3.595927866972694e-06, "loss": 11.9077, "step": 42 }, { "epoch": 0.546031746031746, "grad_norm": 0.07454146444797516, "learning_rate": 3.516841607689501e-06, "loss": 11.9059, "step": 43 }, { "epoch": 0.5587301587301587, "grad_norm": 0.06679458171129227, "learning_rate": 3.436516483539781e-06, "loss": 11.9227, "step": 44 }, { "epoch": 0.5714285714285714, "grad_norm": 0.07610110938549042, "learning_rate": 3.3550503583141726e-06, "loss": 11.9148, "step": 45 }, { "epoch": 0.5841269841269842, "grad_norm": 0.08206192404031754, "learning_rate": 3.272542485937369e-06, "loss": 11.9083, "step": 46 }, { "epoch": 0.5968253968253968, "grad_norm": 0.07684988528490067, "learning_rate": 3.189093389542498e-06, "loss": 11.9023, "step": 47 }, { "epoch": 0.6095238095238096, "grad_norm": 0.0734647810459137, "learning_rate": 3.1048047389991693e-06, "loss": 11.9137, "step": 48 }, { "epoch": 0.6222222222222222, "grad_norm": 0.07913163304328918, "learning_rate": 3.019779227044398e-06, "loss": 11.9117, "step": 49 }, { "epoch": 0.6349206349206349, "grad_norm": 0.08139149099588394, "learning_rate": 2.9341204441673267e-06, "loss": 11.9072, "step": 50 }, { "epoch": 0.6349206349206349, "eval_loss": 11.907240867614746, "eval_runtime": 0.6961, "eval_samples_per_second": 191.061, "eval_steps_per_second": 48.843, "step": 50 }, { "epoch": 0.6476190476190476, "grad_norm": 0.0918021872639656, "learning_rate": 2.847932752400164e-06, "loss": 11.911, "step": 51 }, { "epoch": 0.6603174603174603, "grad_norm": 0.10645179450511932, "learning_rate": 2.761321158169134e-06, "loss": 11.9001, "step": 52 }, { "epoch": 0.6730158730158731, "grad_norm": 0.09653421491384506, "learning_rate": 2.6743911843603134e-06, "loss": 11.9115, "step": 53 }, { "epoch": 0.6857142857142857, "grad_norm": 0.08707209676504135, "learning_rate": 2.587248741756253e-06, "loss": 11.9098, "step": 54 }, { "epoch": 0.6984126984126984, "grad_norm": 0.12399673461914062, "learning_rate": 2.5e-06, "loss": 11.8801, "step": 55 }, { "epoch": 0.7111111111111111, "grad_norm": 0.14888297021389008, "learning_rate": 2.4127512582437486e-06, "loss": 11.8865, "step": 56 }, { "epoch": 0.7238095238095238, "grad_norm": 0.22429193556308746, "learning_rate": 2.325608815639687e-06, "loss": 11.8665, "step": 57 }, { "epoch": 0.7365079365079366, "grad_norm": 0.07361437380313873, "learning_rate": 2.238678841830867e-06, "loss": 11.9164, "step": 58 }, { "epoch": 0.7492063492063492, "grad_norm": 0.0654204860329628, "learning_rate": 2.1520672475998374e-06, "loss": 11.9189, "step": 59 }, { "epoch": 0.7619047619047619, "grad_norm": 0.066929891705513, "learning_rate": 2.0658795558326745e-06, "loss": 11.9177, "step": 60 }, { "epoch": 0.7746031746031746, "grad_norm": 0.07191995531320572, "learning_rate": 1.9802207729556023e-06, "loss": 11.9053, "step": 61 }, { "epoch": 0.7873015873015873, "grad_norm": 0.06091824918985367, "learning_rate": 1.895195261000831e-06, "loss": 11.9175, "step": 62 }, { "epoch": 0.8, "grad_norm": 0.0758826956152916, "learning_rate": 1.8109066104575023e-06, "loss": 11.9064, "step": 63 }, { "epoch": 0.8126984126984127, "grad_norm": 0.059873126447200775, "learning_rate": 1.7274575140626318e-06, "loss": 11.9099, "step": 64 }, { "epoch": 0.8253968253968254, "grad_norm": 0.07108975946903229, "learning_rate": 1.6449496416858285e-06, "loss": 11.9052, "step": 65 }, { "epoch": 0.8380952380952381, "grad_norm": 0.05853763967752457, "learning_rate": 1.56348351646022e-06, "loss": 11.9186, "step": 66 }, { "epoch": 0.8507936507936508, "grad_norm": 0.08004617691040039, "learning_rate": 1.4831583923105e-06, "loss": 11.9209, "step": 67 }, { "epoch": 0.8634920634920635, "grad_norm": 0.07879564166069031, "learning_rate": 1.4040721330273063e-06, "loss": 11.9119, "step": 68 }, { "epoch": 0.8761904761904762, "grad_norm": 0.08058635145425797, "learning_rate": 1.3263210930352737e-06, "loss": 11.9255, "step": 69 }, { "epoch": 0.8888888888888888, "grad_norm": 0.10125672817230225, "learning_rate": 1.2500000000000007e-06, "loss": 11.9015, "step": 70 }, { "epoch": 0.9015873015873016, "grad_norm": 0.0930691808462143, "learning_rate": 1.1752018394169882e-06, "loss": 11.9045, "step": 71 }, { "epoch": 0.9142857142857143, "grad_norm": 0.10147691518068314, "learning_rate": 1.1020177413231334e-06, "loss": 11.9, "step": 72 }, { "epoch": 0.926984126984127, "grad_norm": 0.09452462196350098, "learning_rate": 1.0305368692688175e-06, "loss": 11.8952, "step": 73 }, { "epoch": 0.9396825396825397, "grad_norm": 0.12057901173830032, "learning_rate": 9.608463116858544e-07, "loss": 11.8964, "step": 74 }, { "epoch": 0.9523809523809523, "grad_norm": 0.1297084391117096, "learning_rate": 8.930309757836517e-07, "loss": 11.9046, "step": 75 }, { "epoch": 0.9650793650793651, "grad_norm": 0.20787179470062256, "learning_rate": 8.271734841028553e-07, "loss": 11.8712, "step": 76 }, { "epoch": 0.9777777777777777, "grad_norm": 0.06375299394130707, "learning_rate": 7.633540738525066e-07, "loss": 11.9155, "step": 77 }, { "epoch": 0.9904761904761905, "grad_norm": 0.07694946974515915, "learning_rate": 7.016504991533727e-07, "loss": 11.9059, "step": 78 }, { "epoch": 1.0031746031746032, "grad_norm": 0.1421554982662201, "learning_rate": 6.421379363065142e-07, "loss": 14.8775, "step": 79 }, { "epoch": 1.0158730158730158, "grad_norm": 0.08853979408740997, "learning_rate": 5.848888922025553e-07, "loss": 11.8709, "step": 80 }, { "epoch": 1.0285714285714285, "grad_norm": 0.05940350517630577, "learning_rate": 5.299731159831953e-07, "loss": 11.9567, "step": 81 }, { "epoch": 1.0412698412698413, "grad_norm": 0.06864917278289795, "learning_rate": 4.774575140626317e-07, "loss": 11.9147, "step": 82 }, { "epoch": 1.053968253968254, "grad_norm": 0.07743862271308899, "learning_rate": 4.27406068612396e-07, "loss": 11.9113, "step": 83 }, { "epoch": 1.0666666666666667, "grad_norm": 0.07003061473369598, "learning_rate": 3.798797596089351e-07, "loss": 11.9181, "step": 84 }, { "epoch": 1.0793650793650793, "grad_norm": 0.06729806214570999, "learning_rate": 3.3493649053890325e-07, "loss": 11.9099, "step": 85 }, { "epoch": 1.0920634920634922, "grad_norm": 0.06610359251499176, "learning_rate": 2.9263101785268253e-07, "loss": 11.9, "step": 86 }, { "epoch": 1.1047619047619048, "grad_norm": 0.08963724225759506, "learning_rate": 2.53014884252083e-07, "loss": 11.8981, "step": 87 }, { "epoch": 1.1174603174603175, "grad_norm": 0.07728394865989685, "learning_rate": 2.1613635589349756e-07, "loss": 11.9115, "step": 88 }, { "epoch": 1.1301587301587301, "grad_norm": 0.06739490479230881, "learning_rate": 1.8204036358303173e-07, "loss": 11.9023, "step": 89 }, { "epoch": 1.1428571428571428, "grad_norm": 0.07926109433174133, "learning_rate": 1.507684480352292e-07, "loss": 11.9129, "step": 90 }, { "epoch": 1.1555555555555554, "grad_norm": 0.0931377112865448, "learning_rate": 1.223587092621162e-07, "loss": 11.8987, "step": 91 }, { "epoch": 1.1682539682539683, "grad_norm": 0.09764450788497925, "learning_rate": 9.684576015420277e-08, "loss": 11.9014, "step": 92 }, { "epoch": 1.180952380952381, "grad_norm": 0.09037233144044876, "learning_rate": 7.426068431000883e-08, "loss": 11.8993, "step": 93 }, { "epoch": 1.1936507936507936, "grad_norm": 0.08760291337966919, "learning_rate": 5.463099816548578e-08, "loss": 11.902, "step": 94 }, { "epoch": 1.2063492063492063, "grad_norm": 0.10378438234329224, "learning_rate": 3.798061746947995e-08, "loss": 11.9043, "step": 95 }, { "epoch": 1.2190476190476192, "grad_norm": 0.129079669713974, "learning_rate": 2.4329828146074096e-08, "loss": 11.8942, "step": 96 }, { "epoch": 1.2317460317460318, "grad_norm": 0.19534048438072205, "learning_rate": 1.3695261579316776e-08, "loss": 11.8815, "step": 97 }, { "epoch": 1.2444444444444445, "grad_norm": 0.19826172292232513, "learning_rate": 6.089874350439507e-09, "loss": 11.872, "step": 98 }, { "epoch": 1.2571428571428571, "grad_norm": 0.07281696051359177, "learning_rate": 1.5229324522605949e-09, "loss": 11.921, "step": 99 }, { "epoch": 1.2698412698412698, "grad_norm": 0.07083765417337418, "learning_rate": 0.0, "loss": 11.9176, "step": 100 }, { "epoch": 1.2698412698412698, "eval_loss": 11.907214164733887, "eval_runtime": 0.7217, "eval_samples_per_second": 184.294, "eval_steps_per_second": 47.113, "step": 100 } ], "logging_steps": 1, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 150268674048.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }