|
{ |
|
"best_metric": 0.6968957781791687, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 3.005658852061439, |
|
"eval_steps": 25, |
|
"global_step": 116, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.025869037995149554, |
|
"grad_norm": 348.736083984375, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 274.9102, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.025869037995149554, |
|
"eval_loss": 8.283905982971191, |
|
"eval_runtime": 0.6332, |
|
"eval_samples_per_second": 78.958, |
|
"eval_steps_per_second": 3.158, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05173807599029911, |
|
"grad_norm": 348.5337829589844, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 269.3359, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.07760711398544867, |
|
"grad_norm": 336.2130126953125, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 265.1719, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.10347615198059822, |
|
"grad_norm": 333.7207946777344, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 249.0039, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.1293451899757478, |
|
"grad_norm": 298.2082824707031, |
|
"learning_rate": 0.00015, |
|
"loss": 206.7969, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.15521422797089734, |
|
"grad_norm": 250.3636474609375, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 161.4229, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.18108326596604687, |
|
"grad_norm": 226.34564208984375, |
|
"learning_rate": 0.00020999999999999998, |
|
"loss": 120.4238, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.20695230396119643, |
|
"grad_norm": 308.28143310546875, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 84.0972, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.232821341956346, |
|
"grad_norm": 549.4783325195312, |
|
"learning_rate": 0.00027, |
|
"loss": 52.3253, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.2586903799514956, |
|
"grad_norm": 572.8822021484375, |
|
"learning_rate": 0.0003, |
|
"loss": 46.6969, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.28455941794664513, |
|
"grad_norm": 67.11377716064453, |
|
"learning_rate": 0.00029993412547631913, |
|
"loss": 24.6701, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.3104284559417947, |
|
"grad_norm": 113.78333282470703, |
|
"learning_rate": 0.0002997365597646482, |
|
"loss": 26.2216, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.33629749393694425, |
|
"grad_norm": 129.92559814453125, |
|
"learning_rate": 0.0002994074763922825, |
|
"loss": 24.956, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.36216653193209375, |
|
"grad_norm": 104.27074432373047, |
|
"learning_rate": 0.0002989471644020275, |
|
"loss": 24.2584, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.3880355699272433, |
|
"grad_norm": 119.985595703125, |
|
"learning_rate": 0.00029835602809832456, |
|
"loss": 25.2181, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.41390460792239286, |
|
"grad_norm": 48.832435607910156, |
|
"learning_rate": 0.0002976345866921395, |
|
"loss": 22.767, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.4397736459175424, |
|
"grad_norm": 81.3912353515625, |
|
"learning_rate": 0.0002967834738449256, |
|
"loss": 23.8924, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.465642683912692, |
|
"grad_norm": 54.908119201660156, |
|
"learning_rate": 0.0002958034371120616, |
|
"loss": 24.4696, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.49151172190784154, |
|
"grad_norm": 51.236083984375, |
|
"learning_rate": 0.00029469533728625376, |
|
"loss": 23.9197, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.5173807599029911, |
|
"grad_norm": 47.18868637084961, |
|
"learning_rate": 0.00029346014764147836, |
|
"loss": 23.3075, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5432497978981407, |
|
"grad_norm": 19.380447387695312, |
|
"learning_rate": 0.0002920989530781287, |
|
"loss": 22.372, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.5691188358932903, |
|
"grad_norm": 51.02498245239258, |
|
"learning_rate": 0.00029061294917011814, |
|
"loss": 23.7828, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.5949878738884398, |
|
"grad_norm": 61.17385482788086, |
|
"learning_rate": 0.000289003441114775, |
|
"loss": 25.444, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.6208569118835894, |
|
"grad_norm": 34.5933723449707, |
|
"learning_rate": 0.0002872718425864527, |
|
"loss": 23.0492, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.6467259498787389, |
|
"grad_norm": 19.800092697143555, |
|
"learning_rate": 0.0002854196744948615, |
|
"loss": 22.5537, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.6467259498787389, |
|
"eval_loss": 0.741091787815094, |
|
"eval_runtime": 0.6281, |
|
"eval_samples_per_second": 79.606, |
|
"eval_steps_per_second": 3.184, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.6725949878738885, |
|
"grad_norm": 28.665796279907227, |
|
"learning_rate": 0.0002834485636492121, |
|
"loss": 23.0459, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.698464025869038, |
|
"grad_norm": 54.957481384277344, |
|
"learning_rate": 0.0002813602413293455, |
|
"loss": 25.5733, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.7243330638641875, |
|
"grad_norm": 28.139915466308594, |
|
"learning_rate": 0.0002791565417651033, |
|
"loss": 23.6223, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.7502021018593371, |
|
"grad_norm": 29.63477897644043, |
|
"learning_rate": 0.0002768394005252739, |
|
"loss": 23.6755, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.7760711398544866, |
|
"grad_norm": 24.49903678894043, |
|
"learning_rate": 0.00027441085281753024, |
|
"loss": 22.999, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.8019401778496362, |
|
"grad_norm": 16.341554641723633, |
|
"learning_rate": 0.0002718730317008522, |
|
"loss": 22.6584, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.8278092158447857, |
|
"grad_norm": 19.589542388916016, |
|
"learning_rate": 0.000269228166212003, |
|
"loss": 23.1001, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.8536782538399353, |
|
"grad_norm": 20.623701095581055, |
|
"learning_rate": 0.00026647857940770634, |
|
"loss": 23.1374, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.8795472918350848, |
|
"grad_norm": 24.688899993896484, |
|
"learning_rate": 0.000263626686324243, |
|
"loss": 23.4097, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.9054163298302345, |
|
"grad_norm": 22.096830368041992, |
|
"learning_rate": 0.0002606749918562591, |
|
"loss": 23.1316, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.931285367825384, |
|
"grad_norm": 27.559904098510742, |
|
"learning_rate": 0.00025762608855664965, |
|
"loss": 23.2516, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.9571544058205336, |
|
"grad_norm": 15.820242881774902, |
|
"learning_rate": 0.00025448265435944954, |
|
"loss": 22.5069, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.9830234438156831, |
|
"grad_norm": 18.462657928466797, |
|
"learning_rate": 0.0002512474502277316, |
|
"loss": 22.9305, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 1.0105092966855296, |
|
"grad_norm": 16.62592887878418, |
|
"learning_rate": 0.0002479233177285782, |
|
"loss": 22.217, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 1.036378334680679, |
|
"grad_norm": 19.09794044494629, |
|
"learning_rate": 0.0002445131765372567, |
|
"loss": 22.9307, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.0622473726758286, |
|
"grad_norm": 17.65207290649414, |
|
"learning_rate": 0.000241020021872789, |
|
"loss": 22.5269, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 1.0881164106709782, |
|
"grad_norm": 23.124086380004883, |
|
"learning_rate": 0.00023744692186717078, |
|
"loss": 22.7486, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.1139854486661278, |
|
"grad_norm": 3.4357526302337646, |
|
"learning_rate": 0.00023379701487054785, |
|
"loss": 22.0516, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 1.1398544866612772, |
|
"grad_norm": 30.857542037963867, |
|
"learning_rate": 0.00023007350669471862, |
|
"loss": 23.7532, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.1657235246564268, |
|
"grad_norm": 40.20075988769531, |
|
"learning_rate": 0.00022627966779738306, |
|
"loss": 24.4793, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.1915925626515764, |
|
"grad_norm": 24.608131408691406, |
|
"learning_rate": 0.00022241883040961173, |
|
"loss": 22.423, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 1.217461600646726, |
|
"grad_norm": 11.946351051330566, |
|
"learning_rate": 0.00021849438560905693, |
|
"loss": 22.8885, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.2433306386418754, |
|
"grad_norm": 11.997475624084473, |
|
"learning_rate": 0.00021450978034147806, |
|
"loss": 22.3267, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.269199676637025, |
|
"grad_norm": 24.218584060668945, |
|
"learning_rate": 0.00021046851439319585, |
|
"loss": 22.9163, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 1.2950687146321747, |
|
"grad_norm": 21.68750762939453, |
|
"learning_rate": 0.0002063741373171357, |
|
"loss": 22.702, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.2950687146321747, |
|
"eval_loss": 0.7224195599555969, |
|
"eval_runtime": 0.6276, |
|
"eval_samples_per_second": 79.667, |
|
"eval_steps_per_second": 3.187, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.3209377526273243, |
|
"grad_norm": 15.50403118133545, |
|
"learning_rate": 0.0002022302453151598, |
|
"loss": 22.3089, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 1.3468067906224737, |
|
"grad_norm": 24.226417541503906, |
|
"learning_rate": 0.0001980404780794256, |
|
"loss": 23.1566, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.3726758286176233, |
|
"grad_norm": 20.176647186279297, |
|
"learning_rate": 0.00019380851559554636, |
|
"loss": 22.929, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 1.3985448666127729, |
|
"grad_norm": 15.459396362304688, |
|
"learning_rate": 0.00018953807491036011, |
|
"loss": 22.6978, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 1.4244139046079223, |
|
"grad_norm": 14.14704418182373, |
|
"learning_rate": 0.00018523290686714756, |
|
"loss": 22.7141, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.450282942603072, |
|
"grad_norm": 7.990035533905029, |
|
"learning_rate": 0.00018089679281116472, |
|
"loss": 23.1633, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.4761519805982215, |
|
"grad_norm": 3.211017608642578, |
|
"learning_rate": 0.00017653354126838592, |
|
"loss": 22.3353, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 1.502021018593371, |
|
"grad_norm": 18.740083694458008, |
|
"learning_rate": 0.00017214698460037218, |
|
"loss": 23.5309, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 1.5278900565885207, |
|
"grad_norm": 16.11014175415039, |
|
"learning_rate": 0.00016774097563820485, |
|
"loss": 22.8019, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 1.5537590945836701, |
|
"grad_norm": 26.232606887817383, |
|
"learning_rate": 0.00016331938429844022, |
|
"loss": 23.5109, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.5796281325788197, |
|
"grad_norm": 16.256412506103516, |
|
"learning_rate": 0.00015888609418405713, |
|
"loss": 22.8009, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 1.6054971705739693, |
|
"grad_norm": 11.629958152770996, |
|
"learning_rate": 0.00015444499917338395, |
|
"loss": 22.3203, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.6313662085691187, |
|
"grad_norm": 11.147138595581055, |
|
"learning_rate": 0.00015, |
|
"loss": 22.4757, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.6572352465642683, |
|
"grad_norm": 5.99025297164917, |
|
"learning_rate": 0.00014555500082661602, |
|
"loss": 22.2444, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.683104284559418, |
|
"grad_norm": 11.468669891357422, |
|
"learning_rate": 0.00014111390581594284, |
|
"loss": 22.2462, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.7089733225545674, |
|
"grad_norm": 14.979022979736328, |
|
"learning_rate": 0.00013668061570155978, |
|
"loss": 21.7589, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.7348423605497172, |
|
"grad_norm": 12.94080924987793, |
|
"learning_rate": 0.00013225902436179513, |
|
"loss": 22.4269, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.7607113985448666, |
|
"grad_norm": 11.411182403564453, |
|
"learning_rate": 0.00012785301539962782, |
|
"loss": 21.7354, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.7865804365400162, |
|
"grad_norm": 27.090801239013672, |
|
"learning_rate": 0.00012346645873161408, |
|
"loss": 23.5318, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.8124494745351658, |
|
"grad_norm": 17.46219825744629, |
|
"learning_rate": 0.00011910320718883525, |
|
"loss": 22.8003, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.8383185125303152, |
|
"grad_norm": 17.276792526245117, |
|
"learning_rate": 0.00011476709313285244, |
|
"loss": 22.7198, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 1.8641875505254648, |
|
"grad_norm": 13.101729393005371, |
|
"learning_rate": 0.00011046192508963989, |
|
"loss": 22.2413, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.8900565885206144, |
|
"grad_norm": 10.330924987792969, |
|
"learning_rate": 0.00010619148440445364, |
|
"loss": 21.9412, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 1.9159256265157638, |
|
"grad_norm": 16.028894424438477, |
|
"learning_rate": 0.00010195952192057438, |
|
"loss": 22.5098, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.9417946645109136, |
|
"grad_norm": 8.1192626953125, |
|
"learning_rate": 9.776975468484019e-05, |
|
"loss": 22.1182, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.9417946645109136, |
|
"eval_loss": 0.7175214886665344, |
|
"eval_runtime": 0.6276, |
|
"eval_samples_per_second": 79.669, |
|
"eval_steps_per_second": 3.187, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.967663702506063, |
|
"grad_norm": 11.423409461975098, |
|
"learning_rate": 9.36258626828643e-05, |
|
"loss": 22.3389, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.9935327405012127, |
|
"grad_norm": 12.934334754943848, |
|
"learning_rate": 8.953148560680418e-05, |
|
"loss": 22.7501, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 2.021018593371059, |
|
"grad_norm": 22.10219383239746, |
|
"learning_rate": 8.549021965852197e-05, |
|
"loss": 23.1807, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 2.0468876313662085, |
|
"grad_norm": 15.90378475189209, |
|
"learning_rate": 8.150561439094303e-05, |
|
"loss": 22.5372, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 2.072756669361358, |
|
"grad_norm": 10.656487464904785, |
|
"learning_rate": 7.758116959038828e-05, |
|
"loss": 22.1827, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.0986257073565078, |
|
"grad_norm": 22.766876220703125, |
|
"learning_rate": 7.372033220261696e-05, |
|
"loss": 22.6163, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 2.124494745351657, |
|
"grad_norm": 11.259724617004395, |
|
"learning_rate": 6.992649330528145e-05, |
|
"loss": 22.0147, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 2.1503637833468066, |
|
"grad_norm": 12.66515827178955, |
|
"learning_rate": 6.620298512945214e-05, |
|
"loss": 21.9512, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 2.1762328213419564, |
|
"grad_norm": 5.229973793029785, |
|
"learning_rate": 6.255307813282921e-05, |
|
"loss": 22.17, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 2.202101859337106, |
|
"grad_norm": 6.952908992767334, |
|
"learning_rate": 5.897997812721103e-05, |
|
"loss": 22.418, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 2.2279708973322556, |
|
"grad_norm": 9.438949584960938, |
|
"learning_rate": 5.5486823462743344e-05, |
|
"loss": 22.334, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 2.253839935327405, |
|
"grad_norm": 13.546004295349121, |
|
"learning_rate": 5.2076682271421774e-05, |
|
"loss": 22.3634, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 2.2797089733225544, |
|
"grad_norm": 14.096308708190918, |
|
"learning_rate": 4.8752549772268444e-05, |
|
"loss": 22.6631, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 2.3055780113177042, |
|
"grad_norm": 18.847871780395508, |
|
"learning_rate": 4.551734564055049e-05, |
|
"loss": 22.0801, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 2.3314470493128536, |
|
"grad_norm": 7.903066635131836, |
|
"learning_rate": 4.2373911443350286e-05, |
|
"loss": 22.043, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.3573160873080035, |
|
"grad_norm": 16.976978302001953, |
|
"learning_rate": 3.932500814374089e-05, |
|
"loss": 22.2002, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 2.383185125303153, |
|
"grad_norm": 11.1248140335083, |
|
"learning_rate": 3.637331367575698e-05, |
|
"loss": 22.1329, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 2.4090541632983022, |
|
"grad_norm": 5.761756896972656, |
|
"learning_rate": 3.352142059229365e-05, |
|
"loss": 22.0856, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 2.434923201293452, |
|
"grad_norm": 12.847921371459961, |
|
"learning_rate": 3.077183378799699e-05, |
|
"loss": 22.0646, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 2.4607922392886015, |
|
"grad_norm": 9.289769172668457, |
|
"learning_rate": 2.81269682991478e-05, |
|
"loss": 21.8848, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 2.486661277283751, |
|
"grad_norm": 13.644316673278809, |
|
"learning_rate": 2.5589147182469732e-05, |
|
"loss": 23.1436, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.5125303152789007, |
|
"grad_norm": 16.434682846069336, |
|
"learning_rate": 2.316059947472607e-05, |
|
"loss": 22.212, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 2.53839935327405, |
|
"grad_norm": 6.969300270080566, |
|
"learning_rate": 2.0843458234896666e-05, |
|
"loss": 22.2793, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 2.5642683912691995, |
|
"grad_norm": 21.42749786376953, |
|
"learning_rate": 1.8639758670654486e-05, |
|
"loss": 22.3692, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 2.5901374292643493, |
|
"grad_norm": 13.674956321716309, |
|
"learning_rate": 1.6551436350787918e-05, |
|
"loss": 22.2481, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.5901374292643493, |
|
"eval_loss": 0.6968957781791687, |
|
"eval_runtime": 0.6272, |
|
"eval_samples_per_second": 79.714, |
|
"eval_steps_per_second": 3.189, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.6160064672594987, |
|
"grad_norm": 18.59197235107422, |
|
"learning_rate": 1.4580325505138468e-05, |
|
"loss": 22.2291, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 2.6418755052546485, |
|
"grad_norm": 8.891879081726074, |
|
"learning_rate": 1.272815741354723e-05, |
|
"loss": 22.3545, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 2.667744543249798, |
|
"grad_norm": 8.696002006530762, |
|
"learning_rate": 1.0996558885224993e-05, |
|
"loss": 22.1393, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 2.6936135812449473, |
|
"grad_norm": 6.413660049438477, |
|
"learning_rate": 9.387050829881865e-06, |
|
"loss": 22.9287, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 2.719482619240097, |
|
"grad_norm": 13.523515701293945, |
|
"learning_rate": 7.90104692187129e-06, |
|
"loss": 22.8497, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 2.7453516572352465, |
|
"grad_norm": 8.049901008605957, |
|
"learning_rate": 6.539852358521636e-06, |
|
"loss": 22.0333, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 2.7712206952303964, |
|
"grad_norm": 8.899979591369629, |
|
"learning_rate": 5.304662713746205e-06, |
|
"loss": 22.1953, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 2.7970897332255458, |
|
"grad_norm": 9.008318901062012, |
|
"learning_rate": 4.1965628879383875e-06, |
|
"loss": 22.1504, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 2.822958771220695, |
|
"grad_norm": 13.858719825744629, |
|
"learning_rate": 3.2165261550743946e-06, |
|
"loss": 22.0938, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 2.8488278092158446, |
|
"grad_norm": 6.062250137329102, |
|
"learning_rate": 2.3654133078604753e-06, |
|
"loss": 22.1504, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.8746968472109944, |
|
"grad_norm": 5.169662952423096, |
|
"learning_rate": 1.643971901675395e-06, |
|
"loss": 22.1182, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 2.900565885206144, |
|
"grad_norm": 9.194791793823242, |
|
"learning_rate": 1.0528355979724624e-06, |
|
"loss": 22.0225, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 2.9264349232012936, |
|
"grad_norm": 5.832217693328857, |
|
"learning_rate": 5.925236077174655e-07, |
|
"loss": 22.2256, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 2.952303961196443, |
|
"grad_norm": 3.4554474353790283, |
|
"learning_rate": 2.634402353517973e-07, |
|
"loss": 22.0733, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 2.9781729991915924, |
|
"grad_norm": 10.36470890045166, |
|
"learning_rate": 6.587452368084779e-08, |
|
"loss": 22.0811, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 3.005658852061439, |
|
"grad_norm": 10.728325843811035, |
|
"learning_rate": 0.0, |
|
"loss": 21.647, |
|
"step": 116 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 116, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1836675900991078e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|