|
{ |
|
"best_metric": 0.2850205600261688, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 0.02386029795547072, |
|
"eval_steps": 25, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00023860297955470718, |
|
"grad_norm": 3.9051380157470703, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 3.7044, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00023860297955470718, |
|
"eval_loss": 6.847388744354248, |
|
"eval_runtime": 4.8319, |
|
"eval_samples_per_second": 10.348, |
|
"eval_steps_per_second": 10.348, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00047720595910941436, |
|
"grad_norm": 4.679457187652588, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 4.7098, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0007158089386641215, |
|
"grad_norm": 9.108196258544922, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 5.1787, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0009544119182188287, |
|
"grad_norm": 6.725158214569092, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 5.0075, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.001193014897773536, |
|
"grad_norm": 7.477461814880371, |
|
"learning_rate": 0.00015, |
|
"loss": 4.907, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.001431617877328243, |
|
"grad_norm": 6.358967304229736, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 3.436, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0016702208568829503, |
|
"grad_norm": 5.095180034637451, |
|
"learning_rate": 0.00020999999999999998, |
|
"loss": 2.5721, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0019088238364376574, |
|
"grad_norm": 6.78561544418335, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 2.2853, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.002147426815992365, |
|
"grad_norm": 6.9174957275390625, |
|
"learning_rate": 0.00027, |
|
"loss": 1.0685, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.002386029795547072, |
|
"grad_norm": 22.914854049682617, |
|
"learning_rate": 0.0003, |
|
"loss": 1.4754, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.002624632775101779, |
|
"grad_norm": 24.160343170166016, |
|
"learning_rate": 0.0002999794957488703, |
|
"loss": 1.4382, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.002863235754656486, |
|
"grad_norm": 4.804904937744141, |
|
"learning_rate": 0.0002999179886011389, |
|
"loss": 0.5051, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0031018387342111936, |
|
"grad_norm": 4.3781657218933105, |
|
"learning_rate": 0.0002998154953722457, |
|
"loss": 0.4762, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0033404417137659005, |
|
"grad_norm": 4.082606315612793, |
|
"learning_rate": 0.00029967204408281613, |
|
"loss": 0.3869, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.003579044693320608, |
|
"grad_norm": 3.5891997814178467, |
|
"learning_rate": 0.00029948767395100045, |
|
"loss": 0.4191, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.003817647672875315, |
|
"grad_norm": 3.904242515563965, |
|
"learning_rate": 0.0002992624353817517, |
|
"loss": 0.4828, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.004056250652430022, |
|
"grad_norm": 4.260214805603027, |
|
"learning_rate": 0.0002989963899530457, |
|
"loss": 0.5526, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.00429485363198473, |
|
"grad_norm": 3.2408618927001953, |
|
"learning_rate": 0.00029868961039904624, |
|
"loss": 0.4121, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.004533456611539436, |
|
"grad_norm": 2.9760124683380127, |
|
"learning_rate": 0.00029834218059022024, |
|
"loss": 0.3412, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.004772059591094144, |
|
"grad_norm": 5.297372817993164, |
|
"learning_rate": 0.00029795419551040833, |
|
"loss": 0.4715, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.005010662570648851, |
|
"grad_norm": 4.5428338050842285, |
|
"learning_rate": 0.00029752576123085736, |
|
"loss": 0.5035, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.005249265550203558, |
|
"grad_norm": 2.42583966255188, |
|
"learning_rate": 0.0002970569948812214, |
|
"loss": 0.2874, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.005487868529758265, |
|
"grad_norm": 1.9801936149597168, |
|
"learning_rate": 0.0002965480246175399, |
|
"loss": 0.2981, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.005726471509312972, |
|
"grad_norm": 5.431916236877441, |
|
"learning_rate": 0.0002959989895872009, |
|
"loss": 0.4561, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.00596507448886768, |
|
"grad_norm": 3.5752675533294678, |
|
"learning_rate": 0.0002954100398908995, |
|
"loss": 0.372, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.00596507448886768, |
|
"eval_loss": 0.5305390357971191, |
|
"eval_runtime": 4.9396, |
|
"eval_samples_per_second": 10.122, |
|
"eval_steps_per_second": 10.122, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.006203677468422387, |
|
"grad_norm": 5.811082363128662, |
|
"learning_rate": 0.0002947813365416023, |
|
"loss": 0.5621, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0064422804479770945, |
|
"grad_norm": 3.068842887878418, |
|
"learning_rate": 0.0002941130514205272, |
|
"loss": 0.3989, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.006680883427531801, |
|
"grad_norm": 2.5508522987365723, |
|
"learning_rate": 0.0002934053672301536, |
|
"loss": 0.4297, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0069194864070865085, |
|
"grad_norm": 3.687291145324707, |
|
"learning_rate": 0.00029265847744427303, |
|
"loss": 0.3715, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.007158089386641216, |
|
"grad_norm": 2.5837249755859375, |
|
"learning_rate": 0.00029187258625509513, |
|
"loss": 0.3211, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.007396692366195923, |
|
"grad_norm": 1.6409099102020264, |
|
"learning_rate": 0.00029104790851742417, |
|
"loss": 0.2163, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.00763529534575063, |
|
"grad_norm": 1.997084617614746, |
|
"learning_rate": 0.0002901846696899191, |
|
"loss": 0.2928, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.007873898325305337, |
|
"grad_norm": 1.8158376216888428, |
|
"learning_rate": 0.00028928310577345606, |
|
"loss": 0.187, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.008112501304860045, |
|
"grad_norm": 2.2966806888580322, |
|
"learning_rate": 0.0002883434632466077, |
|
"loss": 0.3337, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.008351104284414752, |
|
"grad_norm": 2.18530011177063, |
|
"learning_rate": 0.00028736599899825856, |
|
"loss": 0.2818, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.00858970726396946, |
|
"grad_norm": 1.9994254112243652, |
|
"learning_rate": 0.00028635098025737434, |
|
"loss": 0.2846, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.008828310243524167, |
|
"grad_norm": 2.239995241165161, |
|
"learning_rate": 0.00028529868451994384, |
|
"loss": 0.3391, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.009066913223078872, |
|
"grad_norm": 1.9311068058013916, |
|
"learning_rate": 0.0002842093994731145, |
|
"loss": 0.2792, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.00930551620263358, |
|
"grad_norm": 2.596693992614746, |
|
"learning_rate": 0.00028308342291654174, |
|
"loss": 0.265, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.009544119182188287, |
|
"grad_norm": 1.8563891649246216, |
|
"learning_rate": 0.00028192106268097334, |
|
"loss": 0.2592, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.009782722161742995, |
|
"grad_norm": 3.4870285987854004, |
|
"learning_rate": 0.00028072263654409154, |
|
"loss": 0.5375, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.010021325141297702, |
|
"grad_norm": 2.2381041049957275, |
|
"learning_rate": 0.0002794884721436361, |
|
"loss": 0.5, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.01025992812085241, |
|
"grad_norm": 2.5200259685516357, |
|
"learning_rate": 0.00027821890688783083, |
|
"loss": 0.4273, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.010498531100407117, |
|
"grad_norm": 1.4027842283248901, |
|
"learning_rate": 0.0002769142878631403, |
|
"loss": 0.3226, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.010737134079961824, |
|
"grad_norm": 2.0196170806884766, |
|
"learning_rate": 0.00027557497173937923, |
|
"loss": 0.3148, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.01097573705951653, |
|
"grad_norm": 1.3912042379379272, |
|
"learning_rate": 0.000274201324672203, |
|
"loss": 0.3065, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.011214340039071237, |
|
"grad_norm": 1.8494808673858643, |
|
"learning_rate": 0.00027279372220300385, |
|
"loss": 0.3245, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.011452943018625945, |
|
"grad_norm": 1.9083304405212402, |
|
"learning_rate": 0.0002713525491562421, |
|
"loss": 0.3276, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.011691545998180652, |
|
"grad_norm": 1.7160670757293701, |
|
"learning_rate": 0.00026987819953423867, |
|
"loss": 0.3039, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.01193014897773536, |
|
"grad_norm": 1.9495787620544434, |
|
"learning_rate": 0.00026837107640945905, |
|
"loss": 0.1993, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01193014897773536, |
|
"eval_loss": 0.3537612855434418, |
|
"eval_runtime": 4.9424, |
|
"eval_samples_per_second": 10.117, |
|
"eval_steps_per_second": 10.117, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.012168751957290067, |
|
"grad_norm": 1.6003544330596924, |
|
"learning_rate": 0.0002668315918143169, |
|
"loss": 0.3157, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.012407354936844774, |
|
"grad_norm": 1.2938302755355835, |
|
"learning_rate": 0.00026526016662852886, |
|
"loss": 0.374, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.012645957916399482, |
|
"grad_norm": 1.600176215171814, |
|
"learning_rate": 0.00026365723046405023, |
|
"loss": 0.3464, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.012884560895954189, |
|
"grad_norm": 1.3842864036560059, |
|
"learning_rate": 0.0002620232215476231, |
|
"loss": 0.1721, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.013123163875508895, |
|
"grad_norm": 1.3391578197479248, |
|
"learning_rate": 0.0002603585866009697, |
|
"loss": 0.2234, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.013361766855063602, |
|
"grad_norm": 1.2759987115859985, |
|
"learning_rate": 0.00025866378071866334, |
|
"loss": 0.1884, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.01360036983461831, |
|
"grad_norm": 1.2416960000991821, |
|
"learning_rate": 0.00025693926724370956, |
|
"loss": 0.2361, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.013838972814173017, |
|
"grad_norm": 1.3021742105484009, |
|
"learning_rate": 0.00025518551764087326, |
|
"loss": 0.1999, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.014077575793727724, |
|
"grad_norm": 3.010028600692749, |
|
"learning_rate": 0.00025340301136778483, |
|
"loss": 0.2434, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.014316178773282432, |
|
"grad_norm": 1.4798487424850464, |
|
"learning_rate": 0.00025159223574386114, |
|
"loss": 0.1968, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.014554781752837139, |
|
"grad_norm": 1.279167652130127, |
|
"learning_rate": 0.0002497536858170772, |
|
"loss": 0.2113, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.014793384732391847, |
|
"grad_norm": 1.1728774309158325, |
|
"learning_rate": 0.00024788786422862526, |
|
"loss": 0.2935, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.015031987711946552, |
|
"grad_norm": 1.197998285293579, |
|
"learning_rate": 0.00024599528107549745, |
|
"loss": 0.2799, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.01527059069150126, |
|
"grad_norm": 0.7270916104316711, |
|
"learning_rate": 0.00024407645377103054, |
|
"loss": 0.1325, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.015509193671055967, |
|
"grad_norm": 1.4482795000076294, |
|
"learning_rate": 0.00024213190690345018, |
|
"loss": 0.095, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.015747796650610674, |
|
"grad_norm": 1.8109567165374756, |
|
"learning_rate": 0.00024016217209245374, |
|
"loss": 0.2474, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.01598639963016538, |
|
"grad_norm": 1.784266710281372, |
|
"learning_rate": 0.00023816778784387094, |
|
"loss": 0.1641, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.01622500260972009, |
|
"grad_norm": 1.81675386428833, |
|
"learning_rate": 0.0002361492994024415, |
|
"loss": 0.1561, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.016463605589274795, |
|
"grad_norm": 2.7498233318328857, |
|
"learning_rate": 0.0002341072586027509, |
|
"loss": 0.3468, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.016702208568829504, |
|
"grad_norm": 2.1186411380767822, |
|
"learning_rate": 0.00023204222371836405, |
|
"loss": 0.1536, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01694081154838421, |
|
"grad_norm": 2.016608238220215, |
|
"learning_rate": 0.00022995475930919905, |
|
"loss": 0.2577, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.01717941452793892, |
|
"grad_norm": 2.4243433475494385, |
|
"learning_rate": 0.00022784543606718227, |
|
"loss": 0.3607, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.017418017507493624, |
|
"grad_norm": 2.7519173622131348, |
|
"learning_rate": 0.00022571483066022657, |
|
"loss": 0.4148, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.017656620487048334, |
|
"grad_norm": 2.557842493057251, |
|
"learning_rate": 0.0002235635255745762, |
|
"loss": 0.4217, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.01789522346660304, |
|
"grad_norm": 2.0161449909210205, |
|
"learning_rate": 0.00022139210895556104, |
|
"loss": 0.4113, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.01789522346660304, |
|
"eval_loss": 0.33875352144241333, |
|
"eval_runtime": 4.9431, |
|
"eval_samples_per_second": 10.115, |
|
"eval_steps_per_second": 10.115, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.018133826446157745, |
|
"grad_norm": 1.2532881498336792, |
|
"learning_rate": 0.00021920117444680317, |
|
"loss": 0.1656, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.018372429425712454, |
|
"grad_norm": 1.2047579288482666, |
|
"learning_rate": 0.00021699132102792097, |
|
"loss": 0.2783, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.01861103240526716, |
|
"grad_norm": 1.158872365951538, |
|
"learning_rate": 0.0002147631528507739, |
|
"loss": 0.1534, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.01884963538482187, |
|
"grad_norm": 1.1011089086532593, |
|
"learning_rate": 0.00021251727907429355, |
|
"loss": 0.2037, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.019088238364376574, |
|
"grad_norm": 1.56781005859375, |
|
"learning_rate": 0.0002102543136979454, |
|
"loss": 0.3891, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.019326841343931284, |
|
"grad_norm": 1.317081332206726, |
|
"learning_rate": 0.0002079748753938678, |
|
"loss": 0.3069, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.01956544432348599, |
|
"grad_norm": 1.2627768516540527, |
|
"learning_rate": 0.0002056795873377331, |
|
"loss": 0.3146, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.0198040473030407, |
|
"grad_norm": 1.6412408351898193, |
|
"learning_rate": 0.00020336907703837748, |
|
"loss": 0.2902, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.020042650282595404, |
|
"grad_norm": 1.0755120515823364, |
|
"learning_rate": 0.00020104397616624645, |
|
"loss": 0.2345, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.02028125326215011, |
|
"grad_norm": 0.9814028143882751, |
|
"learning_rate": 0.00019870492038070252, |
|
"loss": 0.1788, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.02051985624170482, |
|
"grad_norm": 1.0559604167938232, |
|
"learning_rate": 0.0001963525491562421, |
|
"loss": 0.2431, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.020758459221259525, |
|
"grad_norm": 0.9600143432617188, |
|
"learning_rate": 0.0001939875056076697, |
|
"loss": 0.2243, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.020997062200814234, |
|
"grad_norm": 1.8274849653244019, |
|
"learning_rate": 0.00019161043631427666, |
|
"loss": 0.3017, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.02123566518036894, |
|
"grad_norm": 1.2286986112594604, |
|
"learning_rate": 0.00018922199114307294, |
|
"loss": 0.293, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.02147426815992365, |
|
"grad_norm": 0.8336792588233948, |
|
"learning_rate": 0.00018682282307111987, |
|
"loss": 0.165, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.021712871139478354, |
|
"grad_norm": 2.1570754051208496, |
|
"learning_rate": 0.00018441358800701273, |
|
"loss": 0.2711, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.02195147411903306, |
|
"grad_norm": 1.6255114078521729, |
|
"learning_rate": 0.00018199494461156203, |
|
"loss": 0.4418, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.02219007709858777, |
|
"grad_norm": 1.1934351921081543, |
|
"learning_rate": 0.000179567554117722, |
|
"loss": 0.3236, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.022428680078142475, |
|
"grad_norm": 1.2108948230743408, |
|
"learning_rate": 0.00017713208014981648, |
|
"loss": 0.2587, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.022667283057697184, |
|
"grad_norm": 2.282270908355713, |
|
"learning_rate": 0.00017468918854211007, |
|
"loss": 0.3602, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.02290588603725189, |
|
"grad_norm": 1.5604966878890991, |
|
"learning_rate": 0.00017223954715677627, |
|
"loss": 0.2781, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.0231444890168066, |
|
"grad_norm": 1.4344300031661987, |
|
"learning_rate": 0.00016978382570131034, |
|
"loss": 0.3214, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.023383091996361304, |
|
"grad_norm": 1.685638666152954, |
|
"learning_rate": 0.00016732269554543794, |
|
"loss": 0.3074, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.023621694975916013, |
|
"grad_norm": 1.0541924238204956, |
|
"learning_rate": 0.00016485682953756942, |
|
"loss": 0.1001, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.02386029795547072, |
|
"grad_norm": 1.225070834159851, |
|
"learning_rate": 0.00016238690182084986, |
|
"loss": 0.1181, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02386029795547072, |
|
"eval_loss": 0.2850205600261688, |
|
"eval_runtime": 4.9375, |
|
"eval_samples_per_second": 10.127, |
|
"eval_steps_per_second": 10.127, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.576510477565952e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|