|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9987565282268093, |
|
"eval_steps": 500, |
|
"global_step": 502, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.001989554837105198, |
|
"grad_norm": 2.019071375807948, |
|
"learning_rate": 5.88235294117647e-06, |
|
"loss": 1.4931, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.009947774185525988, |
|
"grad_norm": 1.5423217337624162, |
|
"learning_rate": 2.941176470588235e-05, |
|
"loss": 1.4424, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.019895548371051976, |
|
"grad_norm": 0.8308012142463063, |
|
"learning_rate": 5.88235294117647e-05, |
|
"loss": 1.2728, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.029843322556577966, |
|
"grad_norm": 0.5145126949446309, |
|
"learning_rate": 8.823529411764705e-05, |
|
"loss": 1.1614, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03979109674210395, |
|
"grad_norm": 0.36488357130003074, |
|
"learning_rate": 0.0001176470588235294, |
|
"loss": 1.1138, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04973887092762994, |
|
"grad_norm": 0.45152250150726514, |
|
"learning_rate": 0.00014705882352941175, |
|
"loss": 1.07, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05968664511315593, |
|
"grad_norm": 0.3640886200970852, |
|
"learning_rate": 0.0001764705882352941, |
|
"loss": 1.0509, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06963441929868192, |
|
"grad_norm": 1.7718021462555353, |
|
"learning_rate": 0.00020588235294117645, |
|
"loss": 1.0168, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0795821934842079, |
|
"grad_norm": 0.3768307338988579, |
|
"learning_rate": 0.0002352941176470588, |
|
"loss": 1.0086, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0895299676697339, |
|
"grad_norm": 0.33093766782578965, |
|
"learning_rate": 0.00026470588235294115, |
|
"loss": 1.0008, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.09947774185525989, |
|
"grad_norm": 0.37314906796958397, |
|
"learning_rate": 0.0002941176470588235, |
|
"loss": 0.9975, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10942551604078588, |
|
"grad_norm": 0.39939561360809905, |
|
"learning_rate": 0.00029994177629874796, |
|
"loss": 0.9884, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.11937329022631187, |
|
"grad_norm": 0.3919443406321306, |
|
"learning_rate": 0.00029970531997706437, |
|
"loss": 0.9843, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12932106441183785, |
|
"grad_norm": 0.3915947062044229, |
|
"learning_rate": 0.00029928727864250395, |
|
"loss": 0.9913, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.13926883859736383, |
|
"grad_norm": 0.3534055778790222, |
|
"learning_rate": 0.00029868815935814996, |
|
"loss": 0.9893, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.14921661278288983, |
|
"grad_norm": 0.36735315351449394, |
|
"learning_rate": 0.0002979086888255182, |
|
"loss": 0.9775, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1591643869684158, |
|
"grad_norm": 0.3088341190569696, |
|
"learning_rate": 0.00029694981250310496, |
|
"loss": 0.981, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.16911216115394181, |
|
"grad_norm": 0.325839880040256, |
|
"learning_rate": 0.0002958126934595933, |
|
"loss": 0.9659, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.1790599353394678, |
|
"grad_norm": 1.4788476638739554, |
|
"learning_rate": 0.0002944987109631094, |
|
"loss": 0.9681, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1890077095249938, |
|
"grad_norm": 0.3505592753718136, |
|
"learning_rate": 0.00029300945880823956, |
|
"loss": 0.9653, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.19895548371051977, |
|
"grad_norm": 0.28451648461527196, |
|
"learning_rate": 0.0002913467433828382, |
|
"loss": 0.9511, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.20890325789604575, |
|
"grad_norm": 0.3138636289880952, |
|
"learning_rate": 0.00028951258147696967, |
|
"loss": 0.9572, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.21885103208157175, |
|
"grad_norm": 0.2732249403521582, |
|
"learning_rate": 0.00028750919783664407, |
|
"loss": 0.9617, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.22879880626709773, |
|
"grad_norm": 0.311679721602736, |
|
"learning_rate": 0.000285339022465312, |
|
"loss": 0.9484, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.23874658045262373, |
|
"grad_norm": 0.2676711284506792, |
|
"learning_rate": 0.00028300468767639305, |
|
"loss": 0.9397, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2486943546381497, |
|
"grad_norm": 0.25074023315013144, |
|
"learning_rate": 0.00028050902490041194, |
|
"loss": 0.9457, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2586421288236757, |
|
"grad_norm": 0.27129765694428115, |
|
"learning_rate": 0.00027785506125061604, |
|
"loss": 0.9268, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2685899030092017, |
|
"grad_norm": 2.105526076305897, |
|
"learning_rate": 0.00027504601585123963, |
|
"loss": 0.9459, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.27853767719472766, |
|
"grad_norm": 0.27202583577769746, |
|
"learning_rate": 0.00027208529593286804, |
|
"loss": 0.9395, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.28848545138025367, |
|
"grad_norm": 0.26240068306380243, |
|
"learning_rate": 0.00026897649269963866, |
|
"loss": 0.9166, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.29843322556577967, |
|
"grad_norm": 0.2631437322326041, |
|
"learning_rate": 0.00026572337697329144, |
|
"loss": 0.92, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3083809997513056, |
|
"grad_norm": 0.2780992526939389, |
|
"learning_rate": 0.00026232989461935164, |
|
"loss": 0.929, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.3183287739368316, |
|
"grad_norm": 0.2644307921832001, |
|
"learning_rate": 0.000258800161760994, |
|
"loss": 0.9119, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3282765481223576, |
|
"grad_norm": 0.27009906970951136, |
|
"learning_rate": 0.0002551384597863925, |
|
"loss": 0.9141, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.33822432230788363, |
|
"grad_norm": 0.24907417369935828, |
|
"learning_rate": 0.0002513492301556124, |
|
"loss": 0.9045, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3481720964934096, |
|
"grad_norm": 0.4637150153647203, |
|
"learning_rate": 0.0002474370690133423, |
|
"loss": 0.9185, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3581198706789356, |
|
"grad_norm": 0.28404376319877433, |
|
"learning_rate": 0.00024340672161400278, |
|
"loss": 0.9224, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3680676448644616, |
|
"grad_norm": 0.2633604383062445, |
|
"learning_rate": 0.00023926307656599145, |
|
"loss": 0.9049, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.3780154190499876, |
|
"grad_norm": 0.3089691505145177, |
|
"learning_rate": 0.00023501115990204728, |
|
"loss": 0.906, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.38796319323551354, |
|
"grad_norm": 0.2712113108379062, |
|
"learning_rate": 0.00023065612898292607, |
|
"loss": 0.9033, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.39791096742103954, |
|
"grad_norm": 0.2759532883918752, |
|
"learning_rate": 0.00022620326624178135, |
|
"loss": 0.9047, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.40785874160656554, |
|
"grad_norm": 0.25413106263769636, |
|
"learning_rate": 0.0002216579727768394, |
|
"loss": 0.8884, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.4178065157920915, |
|
"grad_norm": 0.2679789855086274, |
|
"learning_rate": 0.00021702576180013906, |
|
"loss": 0.892, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4277542899776175, |
|
"grad_norm": 0.2531713028754476, |
|
"learning_rate": 0.00021231225195028297, |
|
"loss": 0.8907, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.4377020641631435, |
|
"grad_norm": 0.24842966918028864, |
|
"learning_rate": 0.00020752316047731214, |
|
"loss": 0.882, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4476498383486695, |
|
"grad_norm": 0.23591143252036872, |
|
"learning_rate": 0.00020266429630796956, |
|
"loss": 0.8846, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.45759761253419545, |
|
"grad_norm": 0.23767648270009806, |
|
"learning_rate": 0.00019774155299976477, |
|
"loss": 0.8793, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.46754538671972146, |
|
"grad_norm": 0.2271591056583853, |
|
"learning_rate": 0.00019276090159238524, |
|
"loss": 0.8741, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.47749316090524746, |
|
"grad_norm": 0.22901636532179012, |
|
"learning_rate": 0.000187728383365126, |
|
"loss": 0.8837, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.48744093509077346, |
|
"grad_norm": 0.22668623781094616, |
|
"learning_rate": 0.0001826501025091223, |
|
"loss": 0.8735, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.4973887092762994, |
|
"grad_norm": 0.23947671322760095, |
|
"learning_rate": 0.00017753221872327318, |
|
"loss": 0.8692, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5073364834618255, |
|
"grad_norm": 0.26156533719751, |
|
"learning_rate": 0.00017238093974283674, |
|
"loss": 0.8625, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5172842576473514, |
|
"grad_norm": 0.25671509792902836, |
|
"learning_rate": 0.00016720251380976007, |
|
"loss": 0.8604, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5272320318328774, |
|
"grad_norm": 0.24704965941321674, |
|
"learning_rate": 0.00016200322209387663, |
|
"loss": 0.8626, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5371798060184034, |
|
"grad_norm": 0.2514007967545614, |
|
"learning_rate": 0.00015678937107416343, |
|
"loss": 0.8528, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5471275802039294, |
|
"grad_norm": 0.24028475499604857, |
|
"learning_rate": 0.00015156728488929967, |
|
"loss": 0.8574, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5570753543894553, |
|
"grad_norm": 0.23213673166180135, |
|
"learning_rate": 0.0001463432976668051, |
|
"loss": 0.86, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5670231285749814, |
|
"grad_norm": 0.23102662796096035, |
|
"learning_rate": 0.00014112374584006253, |
|
"loss": 0.8617, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.5769709027605073, |
|
"grad_norm": 0.23635078987821154, |
|
"learning_rate": 0.00013591496046254278, |
|
"loss": 0.8468, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5869186769460333, |
|
"grad_norm": 0.2473298815208931, |
|
"learning_rate": 0.00013072325952855624, |
|
"loss": 0.8465, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.5968664511315593, |
|
"grad_norm": 0.22199589321301555, |
|
"learning_rate": 0.00012555494030984393, |
|
"loss": 0.8474, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6068142253170853, |
|
"grad_norm": 0.224149793992071, |
|
"learning_rate": 0.00012041627171730368, |
|
"loss": 0.8523, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6167619995026112, |
|
"grad_norm": 0.2216854964579631, |
|
"learning_rate": 0.00011531348669711734, |
|
"loss": 0.8296, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6267097736881373, |
|
"grad_norm": 0.25823608221836053, |
|
"learning_rate": 0.00011025277467050076, |
|
"loss": 0.8275, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6366575478736632, |
|
"grad_norm": 0.25511921593962283, |
|
"learning_rate": 0.00010524027402624775, |
|
"loss": 0.8379, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6466053220591893, |
|
"grad_norm": 0.2169176240841302, |
|
"learning_rate": 0.00010028206467517357, |
|
"loss": 0.842, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6565530962447153, |
|
"grad_norm": 0.23684621611339568, |
|
"learning_rate": 9.538416067548939e-05, |
|
"loss": 0.8363, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.6665008704302412, |
|
"grad_norm": 0.21588755800082085, |
|
"learning_rate": 9.055250293805247e-05, |
|
"loss": 0.8257, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.6764486446157673, |
|
"grad_norm": 0.22987884680681675, |
|
"learning_rate": 8.579295202034084e-05, |
|
"loss": 0.8434, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.6863964188012932, |
|
"grad_norm": 0.23044188787166803, |
|
"learning_rate": 8.111128101789177e-05, |
|
"loss": 0.8368, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.6963441929868192, |
|
"grad_norm": 0.20871504598447846, |
|
"learning_rate": 7.651316856182797e-05, |
|
"loss": 0.8235, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7062919671723452, |
|
"grad_norm": 0.21764586591268964, |
|
"learning_rate": 7.200419193096416e-05, |
|
"loss": 0.8366, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7162397413578712, |
|
"grad_norm": 0.20093905811705248, |
|
"learning_rate": 6.758982028684842e-05, |
|
"loss": 0.8212, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7261875155433971, |
|
"grad_norm": 0.20201151370456955, |
|
"learning_rate": 6.327540803994507e-05, |
|
"loss": 0.8132, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7361352897289232, |
|
"grad_norm": 0.20205806825962228, |
|
"learning_rate": 5.9066188355004337e-05, |
|
"loss": 0.8115, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7460830639144491, |
|
"grad_norm": 0.4492212407917222, |
|
"learning_rate": 5.4967266803496726e-05, |
|
"loss": 0.8178, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.7560308380999752, |
|
"grad_norm": 0.20165879598033634, |
|
"learning_rate": 5.0983615170812656e-05, |
|
"loss": 0.8202, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.7659786122855011, |
|
"grad_norm": 0.20731013148125454, |
|
"learning_rate": 4.7120065425736744e-05, |
|
"loss": 0.8224, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.7759263864710271, |
|
"grad_norm": 0.19813282037840257, |
|
"learning_rate": 4.3381303859513076e-05, |
|
"loss": 0.8031, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.7858741606565531, |
|
"grad_norm": 0.19914236210141723, |
|
"learning_rate": 3.977186540161016e-05, |
|
"loss": 0.8146, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.7958219348420791, |
|
"grad_norm": 0.20390757189890973, |
|
"learning_rate": 3.629612811907965e-05, |
|
"loss": 0.8132, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.805769709027605, |
|
"grad_norm": 0.19393901917008016, |
|
"learning_rate": 3.295830790618167e-05, |
|
"loss": 0.8142, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8157174832131311, |
|
"grad_norm": 0.1907323400363912, |
|
"learning_rate": 2.976245337071748e-05, |
|
"loss": 0.8129, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.825665257398657, |
|
"grad_norm": 0.199622342411806, |
|
"learning_rate": 2.671244092327191e-05, |
|
"loss": 0.7951, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.835613031584183, |
|
"grad_norm": 0.20412611674653627, |
|
"learning_rate": 2.38119700753228e-05, |
|
"loss": 0.8143, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.845560805769709, |
|
"grad_norm": 0.19936335181674378, |
|
"learning_rate": 2.106455895191985e-05, |
|
"loss": 0.802, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.855508579955235, |
|
"grad_norm": 0.19669632646100263, |
|
"learning_rate": 1.847354002437588e-05, |
|
"loss": 0.7948, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.865456354140761, |
|
"grad_norm": 0.19474265071015343, |
|
"learning_rate": 1.6042056068147402e-05, |
|
"loss": 0.8078, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.875404128326287, |
|
"grad_norm": 0.19134910562287546, |
|
"learning_rate": 1.3773056350806022e-05, |
|
"loss": 0.8067, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.885351902511813, |
|
"grad_norm": 0.18803806717884775, |
|
"learning_rate": 1.1669293054725392e-05, |
|
"loss": 0.7952, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.895299676697339, |
|
"grad_norm": 0.18932335388504165, |
|
"learning_rate": 9.7333179388228e-06, |
|
"loss": 0.8102, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.905247450882865, |
|
"grad_norm": 0.19773216218571474, |
|
"learning_rate": 7.967479243403913e-06, |
|
"loss": 0.8015, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9151952250683909, |
|
"grad_norm": 0.18837812375636145, |
|
"learning_rate": 6.373918841865727e-06, |
|
"loss": 0.7997, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.925142999253917, |
|
"grad_norm": 0.18803298571004357, |
|
"learning_rate": 4.954569642711964e-06, |
|
"loss": 0.8068, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9350907734394429, |
|
"grad_norm": 0.1810703612957102, |
|
"learning_rate": 3.711153245032361e-06, |
|
"loss": 0.7992, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9450385476249689, |
|
"grad_norm": 0.18601132727527311, |
|
"learning_rate": 2.645177850289787e-06, |
|
"loss": 0.8039, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.9549863218104949, |
|
"grad_norm": 0.18789475936900635, |
|
"learning_rate": 1.7579364329477375e-06, |
|
"loss": 0.8024, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.9649340959960209, |
|
"grad_norm": 0.7153792725698854, |
|
"learning_rate": 1.0505051721574398e-06, |
|
"loss": 0.8047, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.9748818701815469, |
|
"grad_norm": 0.19016384162520752, |
|
"learning_rate": 5.23742146406858e-07, |
|
"loss": 0.8004, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.9848296443670729, |
|
"grad_norm": 0.19392554346569527, |
|
"learning_rate": 1.7828629271456894e-07, |
|
"loss": 0.7991, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.9947774185525988, |
|
"grad_norm": 0.19065289345162353, |
|
"learning_rate": 1.4556631631429393e-08, |
|
"loss": 0.8032, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9987565282268093, |
|
"eval_loss": 1.1664291620254517, |
|
"eval_runtime": 1405.6452, |
|
"eval_samples_per_second": 16.582, |
|
"eval_steps_per_second": 1.037, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.9987565282268093, |
|
"step": 502, |
|
"total_flos": 234414617395200.0, |
|
"train_loss": 0.8937448220423968, |
|
"train_runtime": 13768.4166, |
|
"train_samples_per_second": 4.672, |
|
"train_steps_per_second": 0.036 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 502, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 234414617395200.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|