{ "best_metric": 0.0, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 1.6069287141905395, "eval_steps": 25, "global_step": 75, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.021319120586275817, "grad_norm": 38.527584075927734, "learning_rate": 2e-05, "loss": 9.4748, "step": 1 }, { "epoch": 0.021319120586275817, "eval_loss": 9.035676002502441, "eval_runtime": 1.5697, "eval_samples_per_second": 31.853, "eval_steps_per_second": 8.282, "step": 1 }, { "epoch": 0.04263824117255163, "grad_norm": 42.24845504760742, "learning_rate": 4e-05, "loss": 9.1219, "step": 2 }, { "epoch": 0.06395736175882745, "grad_norm": 37.02716064453125, "learning_rate": 6e-05, "loss": 8.3995, "step": 3 }, { "epoch": 0.08527648234510327, "grad_norm": 42.479209899902344, "learning_rate": 8e-05, "loss": 5.5619, "step": 4 }, { "epoch": 0.10659560293137908, "grad_norm": 60.852420806884766, "learning_rate": 0.0001, "loss": 2.314, "step": 5 }, { "epoch": 0.1279147235176549, "grad_norm": 28.67313575744629, "learning_rate": 9.997539658034168e-05, "loss": 0.3238, "step": 6 }, { "epoch": 0.1492338441039307, "grad_norm": 1.2146679162979126, "learning_rate": 9.990161322484486e-05, "loss": 0.0161, "step": 7 }, { "epoch": 0.17055296469020653, "grad_norm": 0.004744780249893665, "learning_rate": 9.977873061452552e-05, "loss": 0.0, "step": 8 }, { "epoch": 0.19187208527648233, "grad_norm": 0.00017941300757229328, "learning_rate": 9.96068831197139e-05, "loss": 0.0, "step": 9 }, { "epoch": 0.21319120586275817, "grad_norm": 1.0545414625084959e-05, "learning_rate": 9.938625865312251e-05, "loss": 0.0, "step": 10 }, { "epoch": 0.23451032644903397, "grad_norm": 6.76497677432053e-07, "learning_rate": 9.911709846436641e-05, "loss": 0.0, "step": 11 }, { "epoch": 0.2558294470353098, "grad_norm": 5.097327709197998, "learning_rate": 9.879969687616027e-05, "loss": 0.7237, "step": 12 }, { "epoch": 0.27714856762158563, "grad_norm": 2.436632087210455e-07, "learning_rate": 9.84344009624807e-05, "loss": 0.0, "step": 13 }, { "epoch": 0.2984676882078614, "grad_norm": 2.3601765519742912e-07, "learning_rate": 9.80216101690461e-05, "loss": 0.0, "step": 14 }, { "epoch": 0.31978680879413723, "grad_norm": 2.44479565481015e-07, "learning_rate": 9.756177587652856e-05, "loss": 0.0, "step": 15 }, { "epoch": 0.34110592938041306, "grad_norm": 2.484931087565201e-07, "learning_rate": 9.705540090697575e-05, "loss": 0.0, "step": 16 }, { "epoch": 0.3624250499666889, "grad_norm": 2.6767651206682785e-07, "learning_rate": 9.650303897398232e-05, "loss": 0.0, "step": 17 }, { "epoch": 0.38374417055296467, "grad_norm": 4.7151920057331154e-07, "learning_rate": 9.590529407721231e-05, "loss": 0.0, "step": 18 }, { "epoch": 0.4050632911392405, "grad_norm": 1.3120588846504688e-06, "learning_rate": 9.526281984193436e-05, "loss": 0.0, "step": 19 }, { "epoch": 0.42638241172551633, "grad_norm": 3.3715880363160977e-06, "learning_rate": 9.4576318804292e-05, "loss": 0.0, "step": 20 }, { "epoch": 0.44770153231179216, "grad_norm": 1.5351124602602795e-05, "learning_rate": 9.384654164309083e-05, "loss": 0.0, "step": 21 }, { "epoch": 0.46902065289806794, "grad_norm": 4.627073212759569e-05, "learning_rate": 9.30742863589421e-05, "loss": 0.0, "step": 22 }, { "epoch": 0.49033977348434377, "grad_norm": 0.00010404508793726563, "learning_rate": 9.226039740166091e-05, "loss": 0.0, "step": 23 }, { "epoch": 0.5116588940706196, "grad_norm": 2.138131856918335, "learning_rate": 9.140576474687264e-05, "loss": 0.04, "step": 24 }, { "epoch": 0.5329780146568954, "grad_norm": 0.00029895795159973204, "learning_rate": 9.051132292283771e-05, "loss": 0.0, "step": 25 }, { "epoch": 0.5329780146568954, "eval_loss": 4.335143785283435e-06, "eval_runtime": 1.5705, "eval_samples_per_second": 31.838, "eval_steps_per_second": 8.278, "step": 25 }, { "epoch": 0.5542971352431713, "grad_norm": 0.000780619157012552, "learning_rate": 8.957804998855866e-05, "loss": 0.0, "step": 26 }, { "epoch": 0.5756162558294471, "grad_norm": 0.0015821302076801658, "learning_rate": 8.860696646428693e-05, "loss": 0.0, "step": 27 }, { "epoch": 0.5969353764157228, "grad_norm": 0.002287358045578003, "learning_rate": 8.759913421559902e-05, "loss": 0.0, "step": 28 }, { "epoch": 0.6182544970019986, "grad_norm": 0.003032292239367962, "learning_rate": 8.655565529226198e-05, "loss": 0.0, "step": 29 }, { "epoch": 0.6395736175882745, "grad_norm": 0.0019944601226598024, "learning_rate": 8.547767072315835e-05, "loss": 0.0, "step": 30 }, { "epoch": 0.6608927381745503, "grad_norm": 0.0021756617352366447, "learning_rate": 8.436635926858759e-05, "loss": 0.0, "step": 31 }, { "epoch": 0.6822118587608261, "grad_norm": 0.002514640800654888, "learning_rate": 8.322293613130917e-05, "loss": 0.0, "step": 32 }, { "epoch": 0.703530979347102, "grad_norm": 0.002495438326150179, "learning_rate": 8.204865162773613e-05, "loss": 0.0, "step": 33 }, { "epoch": 0.7248500999333778, "grad_norm": 0.001426941016688943, "learning_rate": 8.084478982073247e-05, "loss": 0.0, "step": 34 }, { "epoch": 0.7461692205196535, "grad_norm": 0.2643156945705414, "learning_rate": 7.961266711550922e-05, "loss": 0.0023, "step": 35 }, { "epoch": 0.7674883411059293, "grad_norm": 0.00021771328465547413, "learning_rate": 7.835363082015468e-05, "loss": 0.0, "step": 36 }, { "epoch": 0.7888074616922052, "grad_norm": 0.0002205753407906741, "learning_rate": 7.706905767237288e-05, "loss": 0.0, "step": 37 }, { "epoch": 0.810126582278481, "grad_norm": 0.00031268602469936013, "learning_rate": 7.576035233404096e-05, "loss": 0.0, "step": 38 }, { "epoch": 0.8314457028647568, "grad_norm": 0.000729731866158545, "learning_rate": 7.442894585523218e-05, "loss": 0.0, "step": 39 }, { "epoch": 0.8527648234510327, "grad_norm": 0.0007519057835452259, "learning_rate": 7.307629410938363e-05, "loss": 0.0, "step": 40 }, { "epoch": 0.8740839440373085, "grad_norm": 0.0008386191911995411, "learning_rate": 7.170387620131993e-05, "loss": 0.0, "step": 41 }, { "epoch": 0.8954030646235843, "grad_norm": 0.0006938808946870267, "learning_rate": 7.031319284987394e-05, "loss": 0.0, "step": 42 }, { "epoch": 0.91672218520986, "grad_norm": 0.0003899640869349241, "learning_rate": 6.890576474687263e-05, "loss": 0.0, "step": 43 }, { "epoch": 0.9380413057961359, "grad_norm": 0.0005169313517399132, "learning_rate": 6.7483130894283e-05, "loss": 0.0, "step": 44 }, { "epoch": 0.9593604263824117, "grad_norm": 0.00022478660685010254, "learning_rate": 6.604684692133597e-05, "loss": 0.0, "step": 45 }, { "epoch": 0.9806795469686875, "grad_norm": 0.0003345914592500776, "learning_rate": 6.459848338346861e-05, "loss": 0.0, "step": 46 }, { "epoch": 1.0099933377748167, "grad_norm": 1.6928021907806396, "learning_rate": 6.313962404494496e-05, "loss": 0.0555, "step": 47 }, { "epoch": 1.0313124583610926, "grad_norm": 2.7820638024422806e-06, "learning_rate": 6.167186414703289e-05, "loss": 0.0, "step": 48 }, { "epoch": 1.0526315789473684, "grad_norm": 4.1377119508467786e-09, "learning_rate": 6.019680866363139e-05, "loss": 0.0, "step": 49 }, { "epoch": 1.0739506995336443, "grad_norm": 2.0653538879145117e-09, "learning_rate": 5.8716070546254966e-05, "loss": 0.0, "step": 50 }, { "epoch": 1.0739506995336443, "eval_loss": 0.0, "eval_runtime": 1.5707, "eval_samples_per_second": 31.832, "eval_steps_per_second": 8.276, "step": 50 }, { "epoch": 1.09526982011992, "grad_norm": 1.4509647927596347e-09, "learning_rate": 5.7231268960295e-05, "loss": 0.0, "step": 51 }, { "epoch": 1.116588940706196, "grad_norm": 1.2944647576063062e-09, "learning_rate": 5.574402751448614e-05, "loss": 0.0, "step": 52 }, { "epoch": 1.1379080612924717, "grad_norm": 1.404435456819897e-09, "learning_rate": 5.425597248551387e-05, "loss": 0.0, "step": 53 }, { "epoch": 1.1592271818787476, "grad_norm": 1.8385311051360986e-09, "learning_rate": 5.2768731039705e-05, "loss": 0.0, "step": 54 }, { "epoch": 1.1805463024650233, "grad_norm": 2.188858427842888e-09, "learning_rate": 5.128392945374505e-05, "loss": 0.0, "step": 55 }, { "epoch": 1.201865423051299, "grad_norm": 2.473932170232729e-09, "learning_rate": 4.980319133636863e-05, "loss": 0.0, "step": 56 }, { "epoch": 1.223184543637575, "grad_norm": 2.957512679202523e-09, "learning_rate": 4.83281358529671e-05, "loss": 0.0, "step": 57 }, { "epoch": 1.2445036642238507, "grad_norm": 4.090505267839717e-09, "learning_rate": 4.686037595505507e-05, "loss": 0.0, "step": 58 }, { "epoch": 1.2658227848101267, "grad_norm": 0.028839975595474243, "learning_rate": 4.54015166165314e-05, "loss": 0.0001, "step": 59 }, { "epoch": 1.2871419053964024, "grad_norm": 3.436689599567444e-09, "learning_rate": 4.395315307866405e-05, "loss": 0.0, "step": 60 }, { "epoch": 1.308461025982678, "grad_norm": 3.736273512799926e-09, "learning_rate": 4.2516869105717004e-05, "loss": 0.0, "step": 61 }, { "epoch": 1.329780146568954, "grad_norm": 4.177445500630483e-09, "learning_rate": 4.109423525312738e-05, "loss": 0.0, "step": 62 }, { "epoch": 1.3510992671552298, "grad_norm": 4.131688324804372e-09, "learning_rate": 3.968680715012606e-05, "loss": 0.0, "step": 63 }, { "epoch": 1.3724183877415057, "grad_norm": 4.780627449463282e-09, "learning_rate": 3.829612379868006e-05, "loss": 0.0, "step": 64 }, { "epoch": 1.3937375083277814, "grad_norm": 5.516650247727739e-09, "learning_rate": 3.692370589061639e-05, "loss": 0.0, "step": 65 }, { "epoch": 1.4150566289140574, "grad_norm": 6.456162715551272e-09, "learning_rate": 3.557105414476782e-05, "loss": 0.0, "step": 66 }, { "epoch": 1.436375749500333, "grad_norm": 6.741116553854454e-09, "learning_rate": 3.423964766595906e-05, "loss": 0.0, "step": 67 }, { "epoch": 1.457694870086609, "grad_norm": 7.689346936956554e-09, "learning_rate": 3.293094232762715e-05, "loss": 0.0, "step": 68 }, { "epoch": 1.4790139906728847, "grad_norm": 7.789759060017332e-09, "learning_rate": 3.164636917984534e-05, "loss": 0.0, "step": 69 }, { "epoch": 1.5003331112591605, "grad_norm": 0.00078203045995906, "learning_rate": 3.0387332884490805e-05, "loss": 0.0, "step": 70 }, { "epoch": 1.5216522318454364, "grad_norm": 5.782083700722751e-09, "learning_rate": 2.9155210179267546e-05, "loss": 0.0, "step": 71 }, { "epoch": 1.5429713524317124, "grad_norm": 6.204281532973255e-09, "learning_rate": 2.7951348372263875e-05, "loss": 0.0, "step": 72 }, { "epoch": 1.564290473017988, "grad_norm": 7.148269087764447e-09, "learning_rate": 2.677706386869083e-05, "loss": 0.0, "step": 73 }, { "epoch": 1.5856095936042638, "grad_norm": 6.854582679238774e-09, "learning_rate": 2.5633640731412412e-05, "loss": 0.0, "step": 74 }, { "epoch": 1.6069287141905395, "grad_norm": 7.026794257569691e-09, "learning_rate": 2.4522329276841663e-05, "loss": 0.0, "step": 75 }, { "epoch": 1.6069287141905395, "eval_loss": 0.0, "eval_runtime": 1.5715, "eval_samples_per_second": 31.817, "eval_steps_per_second": 8.272, "step": 75 } ], "logging_steps": 1, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 1 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.031389058160394e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }