|
{ |
|
"best_metric": 0.8565043210983276, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.009915470613023971, |
|
"eval_steps": 50, |
|
"global_step": 75, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0001322062748403196, |
|
"grad_norm": 9.740524291992188, |
|
"learning_rate": 5e-06, |
|
"loss": 28.5912, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0001322062748403196, |
|
"eval_loss": 1.562753677368164, |
|
"eval_runtime": 9999.8963, |
|
"eval_samples_per_second": 2.548, |
|
"eval_steps_per_second": 0.637, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0002644125496806392, |
|
"grad_norm": 10.005927085876465, |
|
"learning_rate": 1e-05, |
|
"loss": 25.9699, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00039661882452095884, |
|
"grad_norm": 9.001063346862793, |
|
"learning_rate": 1.5e-05, |
|
"loss": 23.1696, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0005288250993612784, |
|
"grad_norm": 9.349806785583496, |
|
"learning_rate": 2e-05, |
|
"loss": 25.9893, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0006610313742015981, |
|
"grad_norm": 10.730752944946289, |
|
"learning_rate": 2.5e-05, |
|
"loss": 25.5698, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0007932376490419177, |
|
"grad_norm": 9.830964088439941, |
|
"learning_rate": 3e-05, |
|
"loss": 25.1505, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0009254439238822373, |
|
"grad_norm": 10.586780548095703, |
|
"learning_rate": 3.5e-05, |
|
"loss": 23.8969, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.001057650198722557, |
|
"grad_norm": 10.991456985473633, |
|
"learning_rate": 4e-05, |
|
"loss": 21.7006, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0011898564735628764, |
|
"grad_norm": 10.33070182800293, |
|
"learning_rate": 4.5e-05, |
|
"loss": 19.3531, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0013220627484031961, |
|
"grad_norm": 11.994871139526367, |
|
"learning_rate": 5e-05, |
|
"loss": 20.8966, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0014542690232435156, |
|
"grad_norm": 8.332467079162598, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 16.9087, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0015864752980838353, |
|
"grad_norm": 7.184693336486816, |
|
"learning_rate": 6e-05, |
|
"loss": 17.9883, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0017186815729241548, |
|
"grad_norm": 10.19297981262207, |
|
"learning_rate": 6.500000000000001e-05, |
|
"loss": 20.0634, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0018508878477644746, |
|
"grad_norm": 11.294490814208984, |
|
"learning_rate": 7e-05, |
|
"loss": 18.7819, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0019830941226047943, |
|
"grad_norm": 9.1318998336792, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 17.7652, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.002115300397445114, |
|
"grad_norm": 8.196518898010254, |
|
"learning_rate": 8e-05, |
|
"loss": 17.3877, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0022475066722854333, |
|
"grad_norm": 7.292929172515869, |
|
"learning_rate": 8.5e-05, |
|
"loss": 15.1944, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.002379712947125753, |
|
"grad_norm": 8.115635871887207, |
|
"learning_rate": 9e-05, |
|
"loss": 16.4276, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0025119192219660727, |
|
"grad_norm": 5.500359058380127, |
|
"learning_rate": 9.5e-05, |
|
"loss": 16.1685, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0026441254968063922, |
|
"grad_norm": 5.809587478637695, |
|
"learning_rate": 0.0001, |
|
"loss": 13.4936, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0027763317716467117, |
|
"grad_norm": 6.296661376953125, |
|
"learning_rate": 9.991845519630678e-05, |
|
"loss": 14.7314, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0029085380464870312, |
|
"grad_norm": 5.297625541687012, |
|
"learning_rate": 9.967408676742751e-05, |
|
"loss": 15.7083, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.003040744321327351, |
|
"grad_norm": 5.627785682678223, |
|
"learning_rate": 9.926769179238466e-05, |
|
"loss": 13.4142, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0031729505961676707, |
|
"grad_norm": 5.334050178527832, |
|
"learning_rate": 9.870059584711668e-05, |
|
"loss": 13.8942, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.00330515687100799, |
|
"grad_norm": 5.117323398590088, |
|
"learning_rate": 9.797464868072488e-05, |
|
"loss": 14.3471, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0034373631458483097, |
|
"grad_norm": 4.972148418426514, |
|
"learning_rate": 9.709221818197624e-05, |
|
"loss": 15.7968, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0035695694206886296, |
|
"grad_norm": 5.201072692871094, |
|
"learning_rate": 9.60561826557425e-05, |
|
"loss": 15.8741, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.003701775695528949, |
|
"grad_norm": 5.539172172546387, |
|
"learning_rate": 9.486992143456792e-05, |
|
"loss": 15.6628, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0038339819703692686, |
|
"grad_norm": 5.0465779304504395, |
|
"learning_rate": 9.353730385598887e-05, |
|
"loss": 16.0894, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.003966188245209589, |
|
"grad_norm": 3.873826503753662, |
|
"learning_rate": 9.206267664155907e-05, |
|
"loss": 13.6148, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.004098394520049908, |
|
"grad_norm": 5.14940881729126, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 14.1108, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.004230600794890228, |
|
"grad_norm": 4.079708576202393, |
|
"learning_rate": 8.870708053195413e-05, |
|
"loss": 15.0809, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.004362807069730547, |
|
"grad_norm": 4.550064563751221, |
|
"learning_rate": 8.683705689382024e-05, |
|
"loss": 15.0017, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.004495013344570867, |
|
"grad_norm": 4.309603691101074, |
|
"learning_rate": 8.484687843276469e-05, |
|
"loss": 12.8122, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0046272196194111865, |
|
"grad_norm": 4.0590596199035645, |
|
"learning_rate": 8.274303669726426e-05, |
|
"loss": 16.4249, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.004759425894251506, |
|
"grad_norm": 3.855443239212036, |
|
"learning_rate": 8.053239398177191e-05, |
|
"loss": 13.3902, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0048916321690918255, |
|
"grad_norm": 4.2185845375061035, |
|
"learning_rate": 7.822216094333847e-05, |
|
"loss": 14.7503, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0050238384439321455, |
|
"grad_norm": 3.751422166824341, |
|
"learning_rate": 7.58198730819481e-05, |
|
"loss": 13.5052, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0051560447187724645, |
|
"grad_norm": 4.427120685577393, |
|
"learning_rate": 7.333336616128369e-05, |
|
"loss": 14.5314, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0052882509936127845, |
|
"grad_norm": 3.970076084136963, |
|
"learning_rate": 7.077075065009433e-05, |
|
"loss": 11.716, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0054204572684531035, |
|
"grad_norm": 4.443355083465576, |
|
"learning_rate": 6.814038526753205e-05, |
|
"loss": 13.1136, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0055526635432934235, |
|
"grad_norm": 3.5732617378234863, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 13.5725, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.005684869818133743, |
|
"grad_norm": 4.124619483947754, |
|
"learning_rate": 6.271091670967436e-05, |
|
"loss": 12.9581, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.0058170760929740625, |
|
"grad_norm": 4.425747871398926, |
|
"learning_rate": 5.992952333228728e-05, |
|
"loss": 14.8279, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.005949282367814382, |
|
"grad_norm": 3.609567403793335, |
|
"learning_rate": 5.7115741913664264e-05, |
|
"loss": 14.1364, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.006081488642654702, |
|
"grad_norm": 3.978991746902466, |
|
"learning_rate": 5.427875042394199e-05, |
|
"loss": 13.0212, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.0062136949174950214, |
|
"grad_norm": 3.3201518058776855, |
|
"learning_rate": 5.142780253968481e-05, |
|
"loss": 13.4931, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.006345901192335341, |
|
"grad_norm": 3.9002010822296143, |
|
"learning_rate": 4.85721974603152e-05, |
|
"loss": 15.1584, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0064781074671756604, |
|
"grad_norm": 5.216058254241943, |
|
"learning_rate": 4.5721249576058027e-05, |
|
"loss": 13.2532, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.00661031374201598, |
|
"grad_norm": 4.110899925231934, |
|
"learning_rate": 4.288425808633575e-05, |
|
"loss": 15.4582, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.00661031374201598, |
|
"eval_loss": 0.8565043210983276, |
|
"eval_runtime": 10039.4222, |
|
"eval_samples_per_second": 2.538, |
|
"eval_steps_per_second": 0.634, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0067425200168563, |
|
"grad_norm": 4.067450046539307, |
|
"learning_rate": 4.007047666771274e-05, |
|
"loss": 12.9902, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.006874726291696619, |
|
"grad_norm": 4.162344455718994, |
|
"learning_rate": 3.728908329032567e-05, |
|
"loss": 14.1395, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.007006932566536939, |
|
"grad_norm": 3.732872486114502, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 14.143, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.007139138841377259, |
|
"grad_norm": 3.6808900833129883, |
|
"learning_rate": 3.1859614732467954e-05, |
|
"loss": 12.1621, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.007271345116217578, |
|
"grad_norm": 4.079517841339111, |
|
"learning_rate": 2.9229249349905684e-05, |
|
"loss": 14.2904, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.007403551391057898, |
|
"grad_norm": 3.8573484420776367, |
|
"learning_rate": 2.6666633838716314e-05, |
|
"loss": 11.8412, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.007535757665898217, |
|
"grad_norm": 4.240724086761475, |
|
"learning_rate": 2.418012691805191e-05, |
|
"loss": 15.1161, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.007667963940738537, |
|
"grad_norm": 4.519715785980225, |
|
"learning_rate": 2.1777839056661554e-05, |
|
"loss": 12.7284, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.007800170215578857, |
|
"grad_norm": 3.937958240509033, |
|
"learning_rate": 1.946760601822809e-05, |
|
"loss": 13.8901, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.007932376490419177, |
|
"grad_norm": 4.240633010864258, |
|
"learning_rate": 1.725696330273575e-05, |
|
"loss": 16.1377, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.008064582765259495, |
|
"grad_norm": 3.8049137592315674, |
|
"learning_rate": 1.5153121567235335e-05, |
|
"loss": 12.7364, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.008196789040099815, |
|
"grad_norm": 4.874120235443115, |
|
"learning_rate": 1.3162943106179749e-05, |
|
"loss": 13.4756, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.008328995314940135, |
|
"grad_norm": 3.806867837905884, |
|
"learning_rate": 1.1292919468045877e-05, |
|
"loss": 14.6096, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.008461201589780455, |
|
"grad_norm": 3.8149616718292236, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 13.8567, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.008593407864620775, |
|
"grad_norm": 3.4338436126708984, |
|
"learning_rate": 7.937323358440935e-06, |
|
"loss": 12.3285, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.008725614139461093, |
|
"grad_norm": 3.7605013847351074, |
|
"learning_rate": 6.462696144011149e-06, |
|
"loss": 12.8175, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.008857820414301413, |
|
"grad_norm": 3.6850969791412354, |
|
"learning_rate": 5.13007856543209e-06, |
|
"loss": 14.5401, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.008990026689141733, |
|
"grad_norm": 3.94093656539917, |
|
"learning_rate": 3.9438173442575e-06, |
|
"loss": 14.2592, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.009122232963982053, |
|
"grad_norm": 4.350830554962158, |
|
"learning_rate": 2.9077818180237693e-06, |
|
"loss": 15.4778, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.009254439238822373, |
|
"grad_norm": 4.318081855773926, |
|
"learning_rate": 2.0253513192751373e-06, |
|
"loss": 16.1385, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.009386645513662693, |
|
"grad_norm": 4.209683418273926, |
|
"learning_rate": 1.2994041528833266e-06, |
|
"loss": 13.5816, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.009518851788503011, |
|
"grad_norm": 3.6689677238464355, |
|
"learning_rate": 7.323082076153509e-07, |
|
"loss": 13.6826, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.009651058063343331, |
|
"grad_norm": 3.67459774017334, |
|
"learning_rate": 3.2591323257248893e-07, |
|
"loss": 12.583, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.009783264338183651, |
|
"grad_norm": 4.068629741668701, |
|
"learning_rate": 8.15448036932176e-08, |
|
"loss": 13.2489, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.009915470613023971, |
|
"grad_norm": 4.4557976722717285, |
|
"learning_rate": 0.0, |
|
"loss": 12.7368, |
|
"step": 75 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 75, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.72814168653824e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|