|
{ |
|
"best_metric": 11.923681259155273, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.37174721189591076, |
|
"eval_steps": 50, |
|
"global_step": 75, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004956629491945477, |
|
"grad_norm": 0.029702121391892433, |
|
"learning_rate": 5e-06, |
|
"loss": 11.9338, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004956629491945477, |
|
"eval_loss": 11.932662010192871, |
|
"eval_runtime": 3.4335, |
|
"eval_samples_per_second": 99.025, |
|
"eval_steps_per_second": 24.756, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.009913258983890954, |
|
"grad_norm": 0.029551642015576363, |
|
"learning_rate": 1e-05, |
|
"loss": 11.933, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01486988847583643, |
|
"grad_norm": 0.02150936797261238, |
|
"learning_rate": 1.5e-05, |
|
"loss": 11.9333, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01982651796778191, |
|
"grad_norm": 0.031518660485744476, |
|
"learning_rate": 2e-05, |
|
"loss": 11.9315, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.024783147459727387, |
|
"grad_norm": 0.01814820058643818, |
|
"learning_rate": 2.5e-05, |
|
"loss": 11.9329, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02973977695167286, |
|
"grad_norm": 0.022795800119638443, |
|
"learning_rate": 3e-05, |
|
"loss": 11.9295, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03469640644361834, |
|
"grad_norm": 0.02691086195409298, |
|
"learning_rate": 3.5e-05, |
|
"loss": 11.9328, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.03965303593556382, |
|
"grad_norm": 0.021017029881477356, |
|
"learning_rate": 4e-05, |
|
"loss": 11.9363, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.04460966542750929, |
|
"grad_norm": 0.02902752347290516, |
|
"learning_rate": 4.5e-05, |
|
"loss": 11.9329, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.04956629491945477, |
|
"grad_norm": 0.025535089895129204, |
|
"learning_rate": 5e-05, |
|
"loss": 11.9316, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05452292441140025, |
|
"grad_norm": 0.018343951553106308, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 11.9298, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.05947955390334572, |
|
"grad_norm": 0.028216585516929626, |
|
"learning_rate": 6e-05, |
|
"loss": 11.9309, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0644361833952912, |
|
"grad_norm": 0.04306847229599953, |
|
"learning_rate": 6.500000000000001e-05, |
|
"loss": 11.9312, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.06939281288723669, |
|
"grad_norm": 0.032149795442819595, |
|
"learning_rate": 7e-05, |
|
"loss": 11.9318, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.07434944237918216, |
|
"grad_norm": 0.031103266403079033, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 11.9308, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.07930607187112763, |
|
"grad_norm": 0.023666374385356903, |
|
"learning_rate": 8e-05, |
|
"loss": 11.9331, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.08426270136307311, |
|
"grad_norm": 0.04397472366690636, |
|
"learning_rate": 8.5e-05, |
|
"loss": 11.9318, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.08921933085501858, |
|
"grad_norm": 0.03761634603142738, |
|
"learning_rate": 9e-05, |
|
"loss": 11.9327, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.09417596034696406, |
|
"grad_norm": 0.03000902198255062, |
|
"learning_rate": 9.5e-05, |
|
"loss": 11.9312, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.09913258983890955, |
|
"grad_norm": 0.04588532820343971, |
|
"learning_rate": 0.0001, |
|
"loss": 11.9285, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.10408921933085502, |
|
"grad_norm": 0.05068815499544144, |
|
"learning_rate": 9.991845519630678e-05, |
|
"loss": 11.9334, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.1090458488228005, |
|
"grad_norm": 0.0575312003493309, |
|
"learning_rate": 9.967408676742751e-05, |
|
"loss": 11.9303, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.11400247831474597, |
|
"grad_norm": 0.05008804053068161, |
|
"learning_rate": 9.926769179238466e-05, |
|
"loss": 11.9305, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.11895910780669144, |
|
"grad_norm": 0.04055539891123772, |
|
"learning_rate": 9.870059584711668e-05, |
|
"loss": 11.9322, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.12391573729863693, |
|
"grad_norm": 0.05720638111233711, |
|
"learning_rate": 9.797464868072488e-05, |
|
"loss": 11.928, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.1288723667905824, |
|
"grad_norm": 0.06328973174095154, |
|
"learning_rate": 9.709221818197624e-05, |
|
"loss": 11.9308, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.13382899628252787, |
|
"grad_norm": 0.05939590185880661, |
|
"learning_rate": 9.60561826557425e-05, |
|
"loss": 11.9294, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.13878562577447337, |
|
"grad_norm": 0.0752195417881012, |
|
"learning_rate": 9.486992143456792e-05, |
|
"loss": 11.9302, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.14374225526641884, |
|
"grad_norm": 0.04964336380362511, |
|
"learning_rate": 9.353730385598887e-05, |
|
"loss": 11.9312, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.14869888475836432, |
|
"grad_norm": 0.07646752148866653, |
|
"learning_rate": 9.206267664155907e-05, |
|
"loss": 11.9305, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1536555142503098, |
|
"grad_norm": 0.08100369572639465, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 11.9292, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.15861214374225527, |
|
"grad_norm": 0.05317220091819763, |
|
"learning_rate": 8.870708053195413e-05, |
|
"loss": 11.9305, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.16356877323420074, |
|
"grad_norm": 0.11869339644908905, |
|
"learning_rate": 8.683705689382024e-05, |
|
"loss": 11.9257, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.16852540272614622, |
|
"grad_norm": 0.07948697358369827, |
|
"learning_rate": 8.484687843276469e-05, |
|
"loss": 11.9298, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.1734820322180917, |
|
"grad_norm": 0.08774244040250778, |
|
"learning_rate": 8.274303669726426e-05, |
|
"loss": 11.9288, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.17843866171003717, |
|
"grad_norm": 0.13008320331573486, |
|
"learning_rate": 8.053239398177191e-05, |
|
"loss": 11.9263, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.18339529120198264, |
|
"grad_norm": 0.0970505028963089, |
|
"learning_rate": 7.822216094333847e-05, |
|
"loss": 11.9288, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.18835192069392812, |
|
"grad_norm": 0.12324848026037216, |
|
"learning_rate": 7.58198730819481e-05, |
|
"loss": 11.9239, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.19330855018587362, |
|
"grad_norm": 0.08435509353876114, |
|
"learning_rate": 7.333336616128369e-05, |
|
"loss": 11.9262, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.1982651796778191, |
|
"grad_norm": 0.07982442528009415, |
|
"learning_rate": 7.077075065009433e-05, |
|
"loss": 11.9263, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.20322180916976457, |
|
"grad_norm": 0.08218646049499512, |
|
"learning_rate": 6.814038526753205e-05, |
|
"loss": 11.9271, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.20817843866171004, |
|
"grad_norm": 0.10249483585357666, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 11.9276, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.21313506815365552, |
|
"grad_norm": 0.09215902537107468, |
|
"learning_rate": 6.271091670967436e-05, |
|
"loss": 11.9282, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.218091697645601, |
|
"grad_norm": 0.09024009853601456, |
|
"learning_rate": 5.992952333228728e-05, |
|
"loss": 11.925, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.22304832713754646, |
|
"grad_norm": 0.06474387645721436, |
|
"learning_rate": 5.7115741913664264e-05, |
|
"loss": 11.9266, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.22800495662949194, |
|
"grad_norm": 0.08036696910858154, |
|
"learning_rate": 5.427875042394199e-05, |
|
"loss": 11.9252, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.23296158612143741, |
|
"grad_norm": 0.07694265991449356, |
|
"learning_rate": 5.142780253968481e-05, |
|
"loss": 11.9237, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.2379182156133829, |
|
"grad_norm": 0.05151360481977463, |
|
"learning_rate": 4.85721974603152e-05, |
|
"loss": 11.9229, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.24287484510532836, |
|
"grad_norm": 0.09331781417131424, |
|
"learning_rate": 4.5721249576058027e-05, |
|
"loss": 11.9232, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.24783147459727387, |
|
"grad_norm": 0.09337204694747925, |
|
"learning_rate": 4.288425808633575e-05, |
|
"loss": 11.9223, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.24783147459727387, |
|
"eval_loss": 11.923681259155273, |
|
"eval_runtime": 3.4546, |
|
"eval_samples_per_second": 98.419, |
|
"eval_steps_per_second": 24.605, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2527881040892193, |
|
"grad_norm": 0.06785538047552109, |
|
"learning_rate": 4.007047666771274e-05, |
|
"loss": 11.9238, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.2577447335811648, |
|
"grad_norm": 0.06856627762317657, |
|
"learning_rate": 3.728908329032567e-05, |
|
"loss": 11.9219, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.26270136307311026, |
|
"grad_norm": 0.08525048196315765, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 11.9235, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.26765799256505574, |
|
"grad_norm": 0.06322482973337173, |
|
"learning_rate": 3.1859614732467954e-05, |
|
"loss": 11.9243, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.27261462205700127, |
|
"grad_norm": 0.07685398310422897, |
|
"learning_rate": 2.9229249349905684e-05, |
|
"loss": 11.9228, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.27757125154894674, |
|
"grad_norm": 0.06677208840847015, |
|
"learning_rate": 2.6666633838716314e-05, |
|
"loss": 11.9268, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.2825278810408922, |
|
"grad_norm": 0.06735136359930038, |
|
"learning_rate": 2.418012691805191e-05, |
|
"loss": 11.9235, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.2874845105328377, |
|
"grad_norm": 0.0659952163696289, |
|
"learning_rate": 2.1777839056661554e-05, |
|
"loss": 11.9245, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.29244114002478316, |
|
"grad_norm": 0.08097010850906372, |
|
"learning_rate": 1.946760601822809e-05, |
|
"loss": 11.9218, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.29739776951672864, |
|
"grad_norm": 0.07043331116437912, |
|
"learning_rate": 1.725696330273575e-05, |
|
"loss": 11.9238, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.3023543990086741, |
|
"grad_norm": 0.07436829805374146, |
|
"learning_rate": 1.5153121567235335e-05, |
|
"loss": 11.9221, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.3073110285006196, |
|
"grad_norm": 0.07238686829805374, |
|
"learning_rate": 1.3162943106179749e-05, |
|
"loss": 11.9243, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.31226765799256506, |
|
"grad_norm": 0.06303081661462784, |
|
"learning_rate": 1.1292919468045877e-05, |
|
"loss": 11.924, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.31722428748451054, |
|
"grad_norm": 0.050584156066179276, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 11.9225, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.322180916976456, |
|
"grad_norm": 0.06821806728839874, |
|
"learning_rate": 7.937323358440935e-06, |
|
"loss": 11.9226, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.3271375464684015, |
|
"grad_norm": 0.07609910517930984, |
|
"learning_rate": 6.462696144011149e-06, |
|
"loss": 11.9227, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.33209417596034696, |
|
"grad_norm": 0.09269632399082184, |
|
"learning_rate": 5.13007856543209e-06, |
|
"loss": 11.9239, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.33705080545229243, |
|
"grad_norm": 0.07459668070077896, |
|
"learning_rate": 3.9438173442575e-06, |
|
"loss": 11.9219, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.3420074349442379, |
|
"grad_norm": 0.054816748946905136, |
|
"learning_rate": 2.9077818180237693e-06, |
|
"loss": 11.9238, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.3469640644361834, |
|
"grad_norm": 0.07526088505983353, |
|
"learning_rate": 2.0253513192751373e-06, |
|
"loss": 11.9224, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.35192069392812886, |
|
"grad_norm": 0.06288407742977142, |
|
"learning_rate": 1.2994041528833266e-06, |
|
"loss": 11.9252, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.35687732342007433, |
|
"grad_norm": 0.07722307741641998, |
|
"learning_rate": 7.323082076153509e-07, |
|
"loss": 11.9235, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.3618339529120198, |
|
"grad_norm": 0.0703897774219513, |
|
"learning_rate": 3.2591323257248893e-07, |
|
"loss": 11.9213, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.3667905824039653, |
|
"grad_norm": 0.06519778072834015, |
|
"learning_rate": 8.15448036932176e-08, |
|
"loss": 11.9269, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.37174721189591076, |
|
"grad_norm": 0.05321093648672104, |
|
"learning_rate": 0.0, |
|
"loss": 11.9229, |
|
"step": 75 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 75, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 723124224000.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|