|
{ |
|
"best_metric": 3.924217939376831, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-75", |
|
"epoch": 0.1782001782001782, |
|
"eval_steps": 25, |
|
"global_step": 75, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002376002376002376, |
|
"grad_norm": 4.380615711212158, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 4.9543, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002376002376002376, |
|
"eval_loss": 5.8989152908325195, |
|
"eval_runtime": 0.4626, |
|
"eval_samples_per_second": 108.088, |
|
"eval_steps_per_second": 28.103, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004752004752004752, |
|
"grad_norm": 5.492649555206299, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 5.3281, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.007128007128007128, |
|
"grad_norm": 5.611356258392334, |
|
"learning_rate": 0.0001, |
|
"loss": 5.5476, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.009504009504009503, |
|
"grad_norm": 5.1926751136779785, |
|
"learning_rate": 9.99571699711836e-05, |
|
"loss": 5.4407, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01188001188001188, |
|
"grad_norm": 4.4678144454956055, |
|
"learning_rate": 9.982876141412856e-05, |
|
"loss": 4.9533, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.014256014256014256, |
|
"grad_norm": 3.666590929031372, |
|
"learning_rate": 9.961501876182148e-05, |
|
"loss": 4.8957, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.016632016632016633, |
|
"grad_norm": 2.9076924324035645, |
|
"learning_rate": 9.931634888554937e-05, |
|
"loss": 4.7412, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.019008019008019007, |
|
"grad_norm": 3.1712443828582764, |
|
"learning_rate": 9.893332032039701e-05, |
|
"loss": 4.7591, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.021384021384021384, |
|
"grad_norm": 3.3876755237579346, |
|
"learning_rate": 9.846666218300807e-05, |
|
"loss": 4.9002, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02376002376002376, |
|
"grad_norm": 3.2135019302368164, |
|
"learning_rate": 9.791726278367022e-05, |
|
"loss": 4.8693, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.026136026136026135, |
|
"grad_norm": 3.6509037017822266, |
|
"learning_rate": 9.728616793536588e-05, |
|
"loss": 5.2601, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.02851202851202851, |
|
"grad_norm": 4.232973575592041, |
|
"learning_rate": 9.657457896300791e-05, |
|
"loss": 5.1388, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03088803088803089, |
|
"grad_norm": 4.702571868896484, |
|
"learning_rate": 9.578385041664925e-05, |
|
"loss": 4.6445, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.033264033264033266, |
|
"grad_norm": 5.038473606109619, |
|
"learning_rate": 9.491548749301997e-05, |
|
"loss": 4.1849, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03564003564003564, |
|
"grad_norm": 3.6792800426483154, |
|
"learning_rate": 9.397114317029975e-05, |
|
"loss": 4.1729, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03801603801603801, |
|
"grad_norm": 2.92476224899292, |
|
"learning_rate": 9.295261506157986e-05, |
|
"loss": 4.1779, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04039204039204039, |
|
"grad_norm": 2.7897653579711914, |
|
"learning_rate": 9.186184199300464e-05, |
|
"loss": 4.1821, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04276804276804277, |
|
"grad_norm": 3.0643630027770996, |
|
"learning_rate": 9.070090031310558e-05, |
|
"loss": 4.2447, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.045144045144045145, |
|
"grad_norm": 3.3225109577178955, |
|
"learning_rate": 8.947199994035401e-05, |
|
"loss": 4.1242, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04752004752004752, |
|
"grad_norm": 3.201786994934082, |
|
"learning_rate": 8.817748015645558e-05, |
|
"loss": 4.3685, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0498960498960499, |
|
"grad_norm": 3.1338696479797363, |
|
"learning_rate": 8.681980515339464e-05, |
|
"loss": 4.2878, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.05227205227205227, |
|
"grad_norm": 3.1590020656585693, |
|
"learning_rate": 8.540155934270471e-05, |
|
"loss": 4.497, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.054648054648054646, |
|
"grad_norm": 2.928370714187622, |
|
"learning_rate": 8.392544243589427e-05, |
|
"loss": 4.542, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.05702405702405702, |
|
"grad_norm": 3.1304128170013428, |
|
"learning_rate": 8.239426430539243e-05, |
|
"loss": 4.7395, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0594000594000594, |
|
"grad_norm": 3.5850155353546143, |
|
"learning_rate": 8.081093963579707e-05, |
|
"loss": 5.2024, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0594000594000594, |
|
"eval_loss": 4.197417736053467, |
|
"eval_runtime": 0.4632, |
|
"eval_samples_per_second": 107.934, |
|
"eval_steps_per_second": 28.063, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06177606177606178, |
|
"grad_norm": 5.812683582305908, |
|
"learning_rate": 7.917848237560709e-05, |
|
"loss": 3.9788, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.06415206415206415, |
|
"grad_norm": 5.340697288513184, |
|
"learning_rate": 7.75e-05, |
|
"loss": 4.1852, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.06652806652806653, |
|
"grad_norm": 5.233508586883545, |
|
"learning_rate": 7.577868759557654e-05, |
|
"loss": 4.0927, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0689040689040689, |
|
"grad_norm": 4.724637985229492, |
|
"learning_rate": 7.401782177833148e-05, |
|
"loss": 4.184, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.07128007128007129, |
|
"grad_norm": 3.7798030376434326, |
|
"learning_rate": 7.222075445642904e-05, |
|
"loss": 3.9799, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07365607365607366, |
|
"grad_norm": 3.1549389362335205, |
|
"learning_rate": 7.03909064496551e-05, |
|
"loss": 4.0357, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.07603207603207603, |
|
"grad_norm": 2.810375213623047, |
|
"learning_rate": 6.853176097769229e-05, |
|
"loss": 4.2323, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.07840807840807841, |
|
"grad_norm": 2.2946324348449707, |
|
"learning_rate": 6.664685702961344e-05, |
|
"loss": 4.1228, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.08078408078408078, |
|
"grad_norm": 2.794386386871338, |
|
"learning_rate": 6.473978262721463e-05, |
|
"loss": 4.3795, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.08316008316008316, |
|
"grad_norm": 2.846055507659912, |
|
"learning_rate": 6.281416799501188e-05, |
|
"loss": 4.338, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08553608553608553, |
|
"grad_norm": 3.5053131580352783, |
|
"learning_rate": 6.087367864990233e-05, |
|
"loss": 4.6534, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.08791208791208792, |
|
"grad_norm": 4.065090179443359, |
|
"learning_rate": 5.8922008423644624e-05, |
|
"loss": 4.7563, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.09028809028809029, |
|
"grad_norm": 2.289839744567871, |
|
"learning_rate": 5.696287243144013e-05, |
|
"loss": 4.1748, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.09266409266409266, |
|
"grad_norm": 2.2016358375549316, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 3.8301, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.09504009504009504, |
|
"grad_norm": 2.1572742462158203, |
|
"learning_rate": 5.303712756855988e-05, |
|
"loss": 3.8959, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09741609741609741, |
|
"grad_norm": 2.1548614501953125, |
|
"learning_rate": 5.107799157635538e-05, |
|
"loss": 4.0368, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0997920997920998, |
|
"grad_norm": 1.97099769115448, |
|
"learning_rate": 4.912632135009769e-05, |
|
"loss": 3.8731, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.10216810216810217, |
|
"grad_norm": 1.972032904624939, |
|
"learning_rate": 4.718583200498814e-05, |
|
"loss": 4.0455, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.10454410454410454, |
|
"grad_norm": 2.277165412902832, |
|
"learning_rate": 4.526021737278538e-05, |
|
"loss": 4.0799, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.10692010692010692, |
|
"grad_norm": 2.229562997817993, |
|
"learning_rate": 4.3353142970386564e-05, |
|
"loss": 4.2358, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10929610929610929, |
|
"grad_norm": 2.1845741271972656, |
|
"learning_rate": 4.146823902230772e-05, |
|
"loss": 4.1261, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.11167211167211168, |
|
"grad_norm": 2.513690948486328, |
|
"learning_rate": 3.960909355034491e-05, |
|
"loss": 4.249, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.11404811404811405, |
|
"grad_norm": 2.7216529846191406, |
|
"learning_rate": 3.777924554357096e-05, |
|
"loss": 4.3592, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.11642411642411643, |
|
"grad_norm": 2.8171885013580322, |
|
"learning_rate": 3.598217822166854e-05, |
|
"loss": 4.59, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.1188001188001188, |
|
"grad_norm": 4.377580165863037, |
|
"learning_rate": 3.422131240442349e-05, |
|
"loss": 5.0176, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1188001188001188, |
|
"eval_loss": 3.993185520172119, |
|
"eval_runtime": 0.463, |
|
"eval_samples_per_second": 107.981, |
|
"eval_steps_per_second": 28.075, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12117612117612117, |
|
"grad_norm": 2.3711788654327393, |
|
"learning_rate": 3.250000000000001e-05, |
|
"loss": 3.7217, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.12355212355212356, |
|
"grad_norm": 2.324312686920166, |
|
"learning_rate": 3.082151762439293e-05, |
|
"loss": 3.7561, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.12592812592812594, |
|
"grad_norm": 2.2597808837890625, |
|
"learning_rate": 2.9189060364202943e-05, |
|
"loss": 3.9246, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.1283041283041283, |
|
"grad_norm": 2.233111619949341, |
|
"learning_rate": 2.760573569460757e-05, |
|
"loss": 3.9591, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.13068013068013068, |
|
"grad_norm": 2.11525559425354, |
|
"learning_rate": 2.6074557564105727e-05, |
|
"loss": 4.0181, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.13305613305613306, |
|
"grad_norm": 2.1554830074310303, |
|
"learning_rate": 2.459844065729529e-05, |
|
"loss": 4.0492, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.13543213543213542, |
|
"grad_norm": 2.260748863220215, |
|
"learning_rate": 2.3180194846605367e-05, |
|
"loss": 4.096, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.1378081378081378, |
|
"grad_norm": 2.5770089626312256, |
|
"learning_rate": 2.1822519843544424e-05, |
|
"loss": 4.1185, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.1401841401841402, |
|
"grad_norm": 2.5284464359283447, |
|
"learning_rate": 2.0528000059645997e-05, |
|
"loss": 4.2158, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.14256014256014257, |
|
"grad_norm": 2.5339860916137695, |
|
"learning_rate": 1.9299099686894423e-05, |
|
"loss": 4.3166, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14493614493614493, |
|
"grad_norm": 3.2956223487854004, |
|
"learning_rate": 1.8138158006995364e-05, |
|
"loss": 4.51, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.1473121473121473, |
|
"grad_norm": 3.547050952911377, |
|
"learning_rate": 1.7047384938420154e-05, |
|
"loss": 4.6738, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.1496881496881497, |
|
"grad_norm": 2.2729122638702393, |
|
"learning_rate": 1.602885682970026e-05, |
|
"loss": 4.1461, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.15206415206415205, |
|
"grad_norm": 2.3304686546325684, |
|
"learning_rate": 1.5084512506980026e-05, |
|
"loss": 3.7979, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.15444015444015444, |
|
"grad_norm": 2.1547152996063232, |
|
"learning_rate": 1.4216149583350754e-05, |
|
"loss": 3.7298, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.15681615681615682, |
|
"grad_norm": 2.1138386726379395, |
|
"learning_rate": 1.3425421036992098e-05, |
|
"loss": 3.9149, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.1591921591921592, |
|
"grad_norm": 2.197359561920166, |
|
"learning_rate": 1.2713832064634126e-05, |
|
"loss": 3.9229, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.16156816156816156, |
|
"grad_norm": 2.0438430309295654, |
|
"learning_rate": 1.2082737216329794e-05, |
|
"loss": 3.9128, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.16394416394416395, |
|
"grad_norm": 2.085602045059204, |
|
"learning_rate": 1.1533337816991932e-05, |
|
"loss": 4.0383, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.16632016632016633, |
|
"grad_norm": 2.3776140213012695, |
|
"learning_rate": 1.1066679679603e-05, |
|
"loss": 3.9447, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16869616869616869, |
|
"grad_norm": 2.2178096771240234, |
|
"learning_rate": 1.0683651114450641e-05, |
|
"loss": 4.0903, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.17107217107217107, |
|
"grad_norm": 2.8414366245269775, |
|
"learning_rate": 1.0384981238178534e-05, |
|
"loss": 4.2017, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.17344817344817345, |
|
"grad_norm": 2.834662914276123, |
|
"learning_rate": 1.017123858587145e-05, |
|
"loss": 4.3855, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.17582417582417584, |
|
"grad_norm": 3.376075267791748, |
|
"learning_rate": 1.00428300288164e-05, |
|
"loss": 4.5297, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.1782001782001782, |
|
"grad_norm": 4.6905436515808105, |
|
"learning_rate": 1e-05, |
|
"loss": 5.0961, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1782001782001782, |
|
"eval_loss": 3.924217939376831, |
|
"eval_runtime": 0.4633, |
|
"eval_samples_per_second": 107.912, |
|
"eval_steps_per_second": 28.057, |
|
"step": 75 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 75, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.174564302225408e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|