monet-vd-1.4B-100BT-chat-hf / trainer_state.json
monet9736's picture
Upload folder using huggingface_hub
12b011c verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9987565282268093,
"eval_steps": 500,
"global_step": 502,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.001989554837105198,
"grad_norm": 2.019071375807948,
"learning_rate": 5.88235294117647e-06,
"loss": 1.4931,
"step": 1
},
{
"epoch": 0.009947774185525988,
"grad_norm": 1.5423217337624162,
"learning_rate": 2.941176470588235e-05,
"loss": 1.4424,
"step": 5
},
{
"epoch": 0.019895548371051976,
"grad_norm": 0.8308012142463063,
"learning_rate": 5.88235294117647e-05,
"loss": 1.2728,
"step": 10
},
{
"epoch": 0.029843322556577966,
"grad_norm": 0.5145126949446309,
"learning_rate": 8.823529411764705e-05,
"loss": 1.1614,
"step": 15
},
{
"epoch": 0.03979109674210395,
"grad_norm": 0.36488357130003074,
"learning_rate": 0.0001176470588235294,
"loss": 1.1138,
"step": 20
},
{
"epoch": 0.04973887092762994,
"grad_norm": 0.45152250150726514,
"learning_rate": 0.00014705882352941175,
"loss": 1.07,
"step": 25
},
{
"epoch": 0.05968664511315593,
"grad_norm": 0.3640886200970852,
"learning_rate": 0.0001764705882352941,
"loss": 1.0509,
"step": 30
},
{
"epoch": 0.06963441929868192,
"grad_norm": 1.7718021462555353,
"learning_rate": 0.00020588235294117645,
"loss": 1.0168,
"step": 35
},
{
"epoch": 0.0795821934842079,
"grad_norm": 0.3768307338988579,
"learning_rate": 0.0002352941176470588,
"loss": 1.0086,
"step": 40
},
{
"epoch": 0.0895299676697339,
"grad_norm": 0.33093766782578965,
"learning_rate": 0.00026470588235294115,
"loss": 1.0008,
"step": 45
},
{
"epoch": 0.09947774185525989,
"grad_norm": 0.37314906796958397,
"learning_rate": 0.0002941176470588235,
"loss": 0.9975,
"step": 50
},
{
"epoch": 0.10942551604078588,
"grad_norm": 0.39939561360809905,
"learning_rate": 0.00029994177629874796,
"loss": 0.9884,
"step": 55
},
{
"epoch": 0.11937329022631187,
"grad_norm": 0.3919443406321306,
"learning_rate": 0.00029970531997706437,
"loss": 0.9843,
"step": 60
},
{
"epoch": 0.12932106441183785,
"grad_norm": 0.3915947062044229,
"learning_rate": 0.00029928727864250395,
"loss": 0.9913,
"step": 65
},
{
"epoch": 0.13926883859736383,
"grad_norm": 0.3534055778790222,
"learning_rate": 0.00029868815935814996,
"loss": 0.9893,
"step": 70
},
{
"epoch": 0.14921661278288983,
"grad_norm": 0.36735315351449394,
"learning_rate": 0.0002979086888255182,
"loss": 0.9775,
"step": 75
},
{
"epoch": 0.1591643869684158,
"grad_norm": 0.3088341190569696,
"learning_rate": 0.00029694981250310496,
"loss": 0.981,
"step": 80
},
{
"epoch": 0.16911216115394181,
"grad_norm": 0.325839880040256,
"learning_rate": 0.0002958126934595933,
"loss": 0.9659,
"step": 85
},
{
"epoch": 0.1790599353394678,
"grad_norm": 1.4788476638739554,
"learning_rate": 0.0002944987109631094,
"loss": 0.9681,
"step": 90
},
{
"epoch": 0.1890077095249938,
"grad_norm": 0.3505592753718136,
"learning_rate": 0.00029300945880823956,
"loss": 0.9653,
"step": 95
},
{
"epoch": 0.19895548371051977,
"grad_norm": 0.28451648461527196,
"learning_rate": 0.0002913467433828382,
"loss": 0.9511,
"step": 100
},
{
"epoch": 0.20890325789604575,
"grad_norm": 0.3138636289880952,
"learning_rate": 0.00028951258147696967,
"loss": 0.9572,
"step": 105
},
{
"epoch": 0.21885103208157175,
"grad_norm": 0.2732249403521582,
"learning_rate": 0.00028750919783664407,
"loss": 0.9617,
"step": 110
},
{
"epoch": 0.22879880626709773,
"grad_norm": 0.311679721602736,
"learning_rate": 0.000285339022465312,
"loss": 0.9484,
"step": 115
},
{
"epoch": 0.23874658045262373,
"grad_norm": 0.2676711284506792,
"learning_rate": 0.00028300468767639305,
"loss": 0.9397,
"step": 120
},
{
"epoch": 0.2486943546381497,
"grad_norm": 0.25074023315013144,
"learning_rate": 0.00028050902490041194,
"loss": 0.9457,
"step": 125
},
{
"epoch": 0.2586421288236757,
"grad_norm": 0.27129765694428115,
"learning_rate": 0.00027785506125061604,
"loss": 0.9268,
"step": 130
},
{
"epoch": 0.2685899030092017,
"grad_norm": 2.105526076305897,
"learning_rate": 0.00027504601585123963,
"loss": 0.9459,
"step": 135
},
{
"epoch": 0.27853767719472766,
"grad_norm": 0.27202583577769746,
"learning_rate": 0.00027208529593286804,
"loss": 0.9395,
"step": 140
},
{
"epoch": 0.28848545138025367,
"grad_norm": 0.26240068306380243,
"learning_rate": 0.00026897649269963866,
"loss": 0.9166,
"step": 145
},
{
"epoch": 0.29843322556577967,
"grad_norm": 0.2631437322326041,
"learning_rate": 0.00026572337697329144,
"loss": 0.92,
"step": 150
},
{
"epoch": 0.3083809997513056,
"grad_norm": 0.2780992526939389,
"learning_rate": 0.00026232989461935164,
"loss": 0.929,
"step": 155
},
{
"epoch": 0.3183287739368316,
"grad_norm": 0.2644307921832001,
"learning_rate": 0.000258800161760994,
"loss": 0.9119,
"step": 160
},
{
"epoch": 0.3282765481223576,
"grad_norm": 0.27009906970951136,
"learning_rate": 0.0002551384597863925,
"loss": 0.9141,
"step": 165
},
{
"epoch": 0.33822432230788363,
"grad_norm": 0.24907417369935828,
"learning_rate": 0.0002513492301556124,
"loss": 0.9045,
"step": 170
},
{
"epoch": 0.3481720964934096,
"grad_norm": 0.4637150153647203,
"learning_rate": 0.0002474370690133423,
"loss": 0.9185,
"step": 175
},
{
"epoch": 0.3581198706789356,
"grad_norm": 0.28404376319877433,
"learning_rate": 0.00024340672161400278,
"loss": 0.9224,
"step": 180
},
{
"epoch": 0.3680676448644616,
"grad_norm": 0.2633604383062445,
"learning_rate": 0.00023926307656599145,
"loss": 0.9049,
"step": 185
},
{
"epoch": 0.3780154190499876,
"grad_norm": 0.3089691505145177,
"learning_rate": 0.00023501115990204728,
"loss": 0.906,
"step": 190
},
{
"epoch": 0.38796319323551354,
"grad_norm": 0.2712113108379062,
"learning_rate": 0.00023065612898292607,
"loss": 0.9033,
"step": 195
},
{
"epoch": 0.39791096742103954,
"grad_norm": 0.2759532883918752,
"learning_rate": 0.00022620326624178135,
"loss": 0.9047,
"step": 200
},
{
"epoch": 0.40785874160656554,
"grad_norm": 0.25413106263769636,
"learning_rate": 0.0002216579727768394,
"loss": 0.8884,
"step": 205
},
{
"epoch": 0.4178065157920915,
"grad_norm": 0.2679789855086274,
"learning_rate": 0.00021702576180013906,
"loss": 0.892,
"step": 210
},
{
"epoch": 0.4277542899776175,
"grad_norm": 0.2531713028754476,
"learning_rate": 0.00021231225195028297,
"loss": 0.8907,
"step": 215
},
{
"epoch": 0.4377020641631435,
"grad_norm": 0.24842966918028864,
"learning_rate": 0.00020752316047731214,
"loss": 0.882,
"step": 220
},
{
"epoch": 0.4476498383486695,
"grad_norm": 0.23591143252036872,
"learning_rate": 0.00020266429630796956,
"loss": 0.8846,
"step": 225
},
{
"epoch": 0.45759761253419545,
"grad_norm": 0.23767648270009806,
"learning_rate": 0.00019774155299976477,
"loss": 0.8793,
"step": 230
},
{
"epoch": 0.46754538671972146,
"grad_norm": 0.2271591056583853,
"learning_rate": 0.00019276090159238524,
"loss": 0.8741,
"step": 235
},
{
"epoch": 0.47749316090524746,
"grad_norm": 0.22901636532179012,
"learning_rate": 0.000187728383365126,
"loss": 0.8837,
"step": 240
},
{
"epoch": 0.48744093509077346,
"grad_norm": 0.22668623781094616,
"learning_rate": 0.0001826501025091223,
"loss": 0.8735,
"step": 245
},
{
"epoch": 0.4973887092762994,
"grad_norm": 0.23947671322760095,
"learning_rate": 0.00017753221872327318,
"loss": 0.8692,
"step": 250
},
{
"epoch": 0.5073364834618255,
"grad_norm": 0.26156533719751,
"learning_rate": 0.00017238093974283674,
"loss": 0.8625,
"step": 255
},
{
"epoch": 0.5172842576473514,
"grad_norm": 0.25671509792902836,
"learning_rate": 0.00016720251380976007,
"loss": 0.8604,
"step": 260
},
{
"epoch": 0.5272320318328774,
"grad_norm": 0.24704965941321674,
"learning_rate": 0.00016200322209387663,
"loss": 0.8626,
"step": 265
},
{
"epoch": 0.5371798060184034,
"grad_norm": 0.2514007967545614,
"learning_rate": 0.00015678937107416343,
"loss": 0.8528,
"step": 270
},
{
"epoch": 0.5471275802039294,
"grad_norm": 0.24028475499604857,
"learning_rate": 0.00015156728488929967,
"loss": 0.8574,
"step": 275
},
{
"epoch": 0.5570753543894553,
"grad_norm": 0.23213673166180135,
"learning_rate": 0.0001463432976668051,
"loss": 0.86,
"step": 280
},
{
"epoch": 0.5670231285749814,
"grad_norm": 0.23102662796096035,
"learning_rate": 0.00014112374584006253,
"loss": 0.8617,
"step": 285
},
{
"epoch": 0.5769709027605073,
"grad_norm": 0.23635078987821154,
"learning_rate": 0.00013591496046254278,
"loss": 0.8468,
"step": 290
},
{
"epoch": 0.5869186769460333,
"grad_norm": 0.2473298815208931,
"learning_rate": 0.00013072325952855624,
"loss": 0.8465,
"step": 295
},
{
"epoch": 0.5968664511315593,
"grad_norm": 0.22199589321301555,
"learning_rate": 0.00012555494030984393,
"loss": 0.8474,
"step": 300
},
{
"epoch": 0.6068142253170853,
"grad_norm": 0.224149793992071,
"learning_rate": 0.00012041627171730368,
"loss": 0.8523,
"step": 305
},
{
"epoch": 0.6167619995026112,
"grad_norm": 0.2216854964579631,
"learning_rate": 0.00011531348669711734,
"loss": 0.8296,
"step": 310
},
{
"epoch": 0.6267097736881373,
"grad_norm": 0.25823608221836053,
"learning_rate": 0.00011025277467050076,
"loss": 0.8275,
"step": 315
},
{
"epoch": 0.6366575478736632,
"grad_norm": 0.25511921593962283,
"learning_rate": 0.00010524027402624775,
"loss": 0.8379,
"step": 320
},
{
"epoch": 0.6466053220591893,
"grad_norm": 0.2169176240841302,
"learning_rate": 0.00010028206467517357,
"loss": 0.842,
"step": 325
},
{
"epoch": 0.6565530962447153,
"grad_norm": 0.23684621611339568,
"learning_rate": 9.538416067548939e-05,
"loss": 0.8363,
"step": 330
},
{
"epoch": 0.6665008704302412,
"grad_norm": 0.21588755800082085,
"learning_rate": 9.055250293805247e-05,
"loss": 0.8257,
"step": 335
},
{
"epoch": 0.6764486446157673,
"grad_norm": 0.22987884680681675,
"learning_rate": 8.579295202034084e-05,
"loss": 0.8434,
"step": 340
},
{
"epoch": 0.6863964188012932,
"grad_norm": 0.23044188787166803,
"learning_rate": 8.111128101789177e-05,
"loss": 0.8368,
"step": 345
},
{
"epoch": 0.6963441929868192,
"grad_norm": 0.20871504598447846,
"learning_rate": 7.651316856182797e-05,
"loss": 0.8235,
"step": 350
},
{
"epoch": 0.7062919671723452,
"grad_norm": 0.21764586591268964,
"learning_rate": 7.200419193096416e-05,
"loss": 0.8366,
"step": 355
},
{
"epoch": 0.7162397413578712,
"grad_norm": 0.20093905811705248,
"learning_rate": 6.758982028684842e-05,
"loss": 0.8212,
"step": 360
},
{
"epoch": 0.7261875155433971,
"grad_norm": 0.20201151370456955,
"learning_rate": 6.327540803994507e-05,
"loss": 0.8132,
"step": 365
},
{
"epoch": 0.7361352897289232,
"grad_norm": 0.20205806825962228,
"learning_rate": 5.9066188355004337e-05,
"loss": 0.8115,
"step": 370
},
{
"epoch": 0.7460830639144491,
"grad_norm": 0.4492212407917222,
"learning_rate": 5.4967266803496726e-05,
"loss": 0.8178,
"step": 375
},
{
"epoch": 0.7560308380999752,
"grad_norm": 0.20165879598033634,
"learning_rate": 5.0983615170812656e-05,
"loss": 0.8202,
"step": 380
},
{
"epoch": 0.7659786122855011,
"grad_norm": 0.20731013148125454,
"learning_rate": 4.7120065425736744e-05,
"loss": 0.8224,
"step": 385
},
{
"epoch": 0.7759263864710271,
"grad_norm": 0.19813282037840257,
"learning_rate": 4.3381303859513076e-05,
"loss": 0.8031,
"step": 390
},
{
"epoch": 0.7858741606565531,
"grad_norm": 0.19914236210141723,
"learning_rate": 3.977186540161016e-05,
"loss": 0.8146,
"step": 395
},
{
"epoch": 0.7958219348420791,
"grad_norm": 0.20390757189890973,
"learning_rate": 3.629612811907965e-05,
"loss": 0.8132,
"step": 400
},
{
"epoch": 0.805769709027605,
"grad_norm": 0.19393901917008016,
"learning_rate": 3.295830790618167e-05,
"loss": 0.8142,
"step": 405
},
{
"epoch": 0.8157174832131311,
"grad_norm": 0.1907323400363912,
"learning_rate": 2.976245337071748e-05,
"loss": 0.8129,
"step": 410
},
{
"epoch": 0.825665257398657,
"grad_norm": 0.199622342411806,
"learning_rate": 2.671244092327191e-05,
"loss": 0.7951,
"step": 415
},
{
"epoch": 0.835613031584183,
"grad_norm": 0.20412611674653627,
"learning_rate": 2.38119700753228e-05,
"loss": 0.8143,
"step": 420
},
{
"epoch": 0.845560805769709,
"grad_norm": 0.19936335181674378,
"learning_rate": 2.106455895191985e-05,
"loss": 0.802,
"step": 425
},
{
"epoch": 0.855508579955235,
"grad_norm": 0.19669632646100263,
"learning_rate": 1.847354002437588e-05,
"loss": 0.7948,
"step": 430
},
{
"epoch": 0.865456354140761,
"grad_norm": 0.19474265071015343,
"learning_rate": 1.6042056068147402e-05,
"loss": 0.8078,
"step": 435
},
{
"epoch": 0.875404128326287,
"grad_norm": 0.19134910562287546,
"learning_rate": 1.3773056350806022e-05,
"loss": 0.8067,
"step": 440
},
{
"epoch": 0.885351902511813,
"grad_norm": 0.18803806717884775,
"learning_rate": 1.1669293054725392e-05,
"loss": 0.7952,
"step": 445
},
{
"epoch": 0.895299676697339,
"grad_norm": 0.18932335388504165,
"learning_rate": 9.7333179388228e-06,
"loss": 0.8102,
"step": 450
},
{
"epoch": 0.905247450882865,
"grad_norm": 0.19773216218571474,
"learning_rate": 7.967479243403913e-06,
"loss": 0.8015,
"step": 455
},
{
"epoch": 0.9151952250683909,
"grad_norm": 0.18837812375636145,
"learning_rate": 6.373918841865727e-06,
"loss": 0.7997,
"step": 460
},
{
"epoch": 0.925142999253917,
"grad_norm": 0.18803298571004357,
"learning_rate": 4.954569642711964e-06,
"loss": 0.8068,
"step": 465
},
{
"epoch": 0.9350907734394429,
"grad_norm": 0.1810703612957102,
"learning_rate": 3.711153245032361e-06,
"loss": 0.7992,
"step": 470
},
{
"epoch": 0.9450385476249689,
"grad_norm": 0.18601132727527311,
"learning_rate": 2.645177850289787e-06,
"loss": 0.8039,
"step": 475
},
{
"epoch": 0.9549863218104949,
"grad_norm": 0.18789475936900635,
"learning_rate": 1.7579364329477375e-06,
"loss": 0.8024,
"step": 480
},
{
"epoch": 0.9649340959960209,
"grad_norm": 0.7153792725698854,
"learning_rate": 1.0505051721574398e-06,
"loss": 0.8047,
"step": 485
},
{
"epoch": 0.9748818701815469,
"grad_norm": 0.19016384162520752,
"learning_rate": 5.23742146406858e-07,
"loss": 0.8004,
"step": 490
},
{
"epoch": 0.9848296443670729,
"grad_norm": 0.19392554346569527,
"learning_rate": 1.7828629271456894e-07,
"loss": 0.7991,
"step": 495
},
{
"epoch": 0.9947774185525988,
"grad_norm": 0.19065289345162353,
"learning_rate": 1.4556631631429393e-08,
"loss": 0.8032,
"step": 500
},
{
"epoch": 0.9987565282268093,
"eval_loss": 1.1664291620254517,
"eval_runtime": 1405.6452,
"eval_samples_per_second": 16.582,
"eval_steps_per_second": 1.037,
"step": 502
},
{
"epoch": 0.9987565282268093,
"step": 502,
"total_flos": 234414617395200.0,
"train_loss": 0.8937448220423968,
"train_runtime": 13768.4166,
"train_samples_per_second": 4.672,
"train_steps_per_second": 0.036
}
],
"logging_steps": 5,
"max_steps": 502,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 234414617395200.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}