{ "best_metric": 2.3959906101226807, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.04893265643158603, "eval_steps": 25, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0009786531286317206, "grad_norm": 0.8316358923912048, "learning_rate": 5e-05, "loss": 2.4264, "step": 1 }, { "epoch": 0.0009786531286317206, "eval_loss": 2.862811326980591, "eval_runtime": 3.8608, "eval_samples_per_second": 12.951, "eval_steps_per_second": 3.367, "step": 1 }, { "epoch": 0.0019573062572634413, "grad_norm": 0.9247562885284424, "learning_rate": 0.0001, "loss": 2.5591, "step": 2 }, { "epoch": 0.002935959385895162, "grad_norm": 0.8996179699897766, "learning_rate": 9.990365154573717e-05, "loss": 2.611, "step": 3 }, { "epoch": 0.0039146125145268825, "grad_norm": 0.6197950839996338, "learning_rate": 9.961501876182148e-05, "loss": 2.4587, "step": 4 }, { "epoch": 0.004893265643158603, "grad_norm": 0.5590848326683044, "learning_rate": 9.913533761814537e-05, "loss": 2.5131, "step": 5 }, { "epoch": 0.005871918771790324, "grad_norm": 0.8712125420570374, "learning_rate": 9.846666218300807e-05, "loss": 2.4739, "step": 6 }, { "epoch": 0.006850571900422044, "grad_norm": 0.6620206832885742, "learning_rate": 9.761185582727977e-05, "loss": 2.4627, "step": 7 }, { "epoch": 0.007829225029053765, "grad_norm": 0.49366331100463867, "learning_rate": 9.657457896300791e-05, "loss": 2.443, "step": 8 }, { "epoch": 0.008807878157685486, "grad_norm": 0.48205986618995667, "learning_rate": 9.535927336897098e-05, "loss": 2.4574, "step": 9 }, { "epoch": 0.009786531286317205, "grad_norm": 0.530735194683075, "learning_rate": 9.397114317029975e-05, "loss": 2.4442, "step": 10 }, { "epoch": 0.010765184414948926, "grad_norm": 0.5516226291656494, "learning_rate": 9.241613255361455e-05, "loss": 2.3873, "step": 11 }, { "epoch": 0.011743837543580648, "grad_norm": 0.6918548941612244, "learning_rate": 9.070090031310558e-05, "loss": 2.3429, "step": 12 }, { "epoch": 0.012722490672212369, "grad_norm": 0.5481558442115784, "learning_rate": 8.883279133655399e-05, "loss": 2.1984, "step": 13 }, { "epoch": 0.013701143800844088, "grad_norm": 0.6193399429321289, "learning_rate": 8.681980515339464e-05, "loss": 2.3338, "step": 14 }, { "epoch": 0.014679796929475809, "grad_norm": 0.4581556022167206, "learning_rate": 8.467056167950311e-05, "loss": 2.2973, "step": 15 }, { "epoch": 0.01565845005810753, "grad_norm": 0.32524430751800537, "learning_rate": 8.239426430539243e-05, "loss": 2.2362, "step": 16 }, { "epoch": 0.01663710318673925, "grad_norm": 0.2754608690738678, "learning_rate": 8.000066048588211e-05, "loss": 2.3678, "step": 17 }, { "epoch": 0.017615756315370972, "grad_norm": 0.2892760634422302, "learning_rate": 7.75e-05, "loss": 2.3839, "step": 18 }, { "epoch": 0.01859440944400269, "grad_norm": 0.3418925106525421, "learning_rate": 7.490299105985507e-05, "loss": 2.3053, "step": 19 }, { "epoch": 0.01957306257263441, "grad_norm": 0.3567667305469513, "learning_rate": 7.222075445642904e-05, "loss": 2.3271, "step": 20 }, { "epoch": 0.020551715701266134, "grad_norm": 0.3940460979938507, "learning_rate": 6.946477593864228e-05, "loss": 2.3585, "step": 21 }, { "epoch": 0.021530368829897853, "grad_norm": 0.43177926540374756, "learning_rate": 6.664685702961344e-05, "loss": 2.3478, "step": 22 }, { "epoch": 0.022509021958529572, "grad_norm": 0.45484182238578796, "learning_rate": 6.377906449072578e-05, "loss": 2.3482, "step": 23 }, { "epoch": 0.023487675087161295, "grad_norm": 0.4482674300670624, "learning_rate": 6.087367864990233e-05, "loss": 2.3099, "step": 24 }, { "epoch": 0.024466328215793014, "grad_norm": 0.5691775679588318, "learning_rate": 5.794314081535644e-05, "loss": 2.4265, "step": 25 }, { "epoch": 0.024466328215793014, "eval_loss": 2.410771131515503, "eval_runtime": 3.4876, "eval_samples_per_second": 14.336, "eval_steps_per_second": 3.727, "step": 25 }, { "epoch": 0.025444981344424737, "grad_norm": 0.19927345216274261, "learning_rate": 5.500000000000001e-05, "loss": 2.1858, "step": 26 }, { "epoch": 0.026423634473056457, "grad_norm": 0.24525891244411469, "learning_rate": 5.205685918464356e-05, "loss": 2.2758, "step": 27 }, { "epoch": 0.027402287601688176, "grad_norm": 0.28260111808776855, "learning_rate": 4.912632135009769e-05, "loss": 2.3156, "step": 28 }, { "epoch": 0.0283809407303199, "grad_norm": 0.30988043546676636, "learning_rate": 4.6220935509274235e-05, "loss": 2.3343, "step": 29 }, { "epoch": 0.029359593858951618, "grad_norm": 0.3335058093070984, "learning_rate": 4.3353142970386564e-05, "loss": 2.2584, "step": 30 }, { "epoch": 0.030338246987583337, "grad_norm": 0.32451608777046204, "learning_rate": 4.053522406135775e-05, "loss": 2.291, "step": 31 }, { "epoch": 0.03131690011621506, "grad_norm": 0.342134028673172, "learning_rate": 3.777924554357096e-05, "loss": 2.28, "step": 32 }, { "epoch": 0.03229555324484678, "grad_norm": 0.3195323348045349, "learning_rate": 3.509700894014496e-05, "loss": 2.3299, "step": 33 }, { "epoch": 0.0332742063734785, "grad_norm": 0.3132987916469574, "learning_rate": 3.250000000000001e-05, "loss": 2.3096, "step": 34 }, { "epoch": 0.03425285950211022, "grad_norm": 0.37992510199546814, "learning_rate": 2.9999339514117912e-05, "loss": 2.3594, "step": 35 }, { "epoch": 0.035231512630741944, "grad_norm": 0.376974880695343, "learning_rate": 2.760573569460757e-05, "loss": 2.2945, "step": 36 }, { "epoch": 0.036210165759373664, "grad_norm": 0.42907246947288513, "learning_rate": 2.53294383204969e-05, "loss": 2.3236, "step": 37 }, { "epoch": 0.03718881888800538, "grad_norm": 0.1865878850221634, "learning_rate": 2.3180194846605367e-05, "loss": 2.1561, "step": 38 }, { "epoch": 0.0381674720166371, "grad_norm": 0.18192711472511292, "learning_rate": 2.1167208663446025e-05, "loss": 2.2334, "step": 39 }, { "epoch": 0.03914612514526882, "grad_norm": 0.2045622020959854, "learning_rate": 1.9299099686894423e-05, "loss": 2.2695, "step": 40 }, { "epoch": 0.04012477827390054, "grad_norm": 0.21859441697597504, "learning_rate": 1.758386744638546e-05, "loss": 2.2435, "step": 41 }, { "epoch": 0.04110343140253227, "grad_norm": 0.23119929432868958, "learning_rate": 1.602885682970026e-05, "loss": 2.2306, "step": 42 }, { "epoch": 0.04208208453116399, "grad_norm": 0.25025153160095215, "learning_rate": 1.464072663102903e-05, "loss": 2.3045, "step": 43 }, { "epoch": 0.043060737659795706, "grad_norm": 0.26086995005607605, "learning_rate": 1.3425421036992098e-05, "loss": 2.2781, "step": 44 }, { "epoch": 0.044039390788427425, "grad_norm": 0.27447056770324707, "learning_rate": 1.2388144172720251e-05, "loss": 2.2896, "step": 45 }, { "epoch": 0.045018043917059145, "grad_norm": 0.2887076437473297, "learning_rate": 1.1533337816991932e-05, "loss": 2.2735, "step": 46 }, { "epoch": 0.04599669704569087, "grad_norm": 0.3213229179382324, "learning_rate": 1.0864662381854632e-05, "loss": 2.3138, "step": 47 }, { "epoch": 0.04697535017432259, "grad_norm": 0.3773607313632965, "learning_rate": 1.0384981238178534e-05, "loss": 2.3575, "step": 48 }, { "epoch": 0.04795400330295431, "grad_norm": 0.39213332533836365, "learning_rate": 1.0096348454262845e-05, "loss": 2.2916, "step": 49 }, { "epoch": 0.04893265643158603, "grad_norm": 0.47484299540519714, "learning_rate": 1e-05, "loss": 2.2866, "step": 50 }, { "epoch": 0.04893265643158603, "eval_loss": 2.3959906101226807, "eval_runtime": 3.4848, "eval_samples_per_second": 14.348, "eval_steps_per_second": 3.731, "step": 50 } ], "logging_steps": 1, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.969948642440643e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }