BioMistral-Clinical-7B / trainer_state.json
ZiweiChen's picture
Upload 5 files
40d82a5 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.7346053772766696,
"eval_steps": 200,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03469210754553339,
"eval_loss": 1.8023786544799805,
"eval_runtime": 572.3832,
"eval_samples_per_second": 10.074,
"eval_steps_per_second": 1.26,
"step": 200
},
{
"epoch": 0.06938421509106678,
"eval_loss": 1.7305923700332642,
"eval_runtime": 572.203,
"eval_samples_per_second": 10.077,
"eval_steps_per_second": 1.26,
"step": 400
},
{
"epoch": 0.08673026886383348,
"grad_norm": 6.57196569442749,
"learning_rate": 2.4566348655680836e-05,
"loss": 1.8068,
"step": 500
},
{
"epoch": 0.10407632263660017,
"eval_loss": 1.7001726627349854,
"eval_runtime": 572.0907,
"eval_samples_per_second": 10.079,
"eval_steps_per_second": 1.26,
"step": 600
},
{
"epoch": 0.13876843018213356,
"eval_loss": 1.6665369272232056,
"eval_runtime": 572.5138,
"eval_samples_per_second": 10.071,
"eval_steps_per_second": 1.259,
"step": 800
},
{
"epoch": 0.17346053772766695,
"grad_norm": 6.407934665679932,
"learning_rate": 2.4132697311361666e-05,
"loss": 1.6773,
"step": 1000
},
{
"epoch": 0.17346053772766695,
"eval_loss": 1.645666480064392,
"eval_runtime": 572.4669,
"eval_samples_per_second": 10.072,
"eval_steps_per_second": 1.259,
"step": 1000
},
{
"epoch": 0.20815264527320035,
"eval_loss": 1.6295970678329468,
"eval_runtime": 572.6705,
"eval_samples_per_second": 10.069,
"eval_steps_per_second": 1.259,
"step": 1200
},
{
"epoch": 0.24284475281873374,
"eval_loss": 1.6119849681854248,
"eval_runtime": 572.6602,
"eval_samples_per_second": 10.069,
"eval_steps_per_second": 1.259,
"step": 1400
},
{
"epoch": 0.26019080659150046,
"grad_norm": 6.23416805267334,
"learning_rate": 2.36990459670425e-05,
"loss": 1.6291,
"step": 1500
},
{
"epoch": 0.2775368603642671,
"eval_loss": 1.5977734327316284,
"eval_runtime": 572.802,
"eval_samples_per_second": 10.066,
"eval_steps_per_second": 1.259,
"step": 1600
},
{
"epoch": 0.31222896790980054,
"eval_loss": 1.5906885862350464,
"eval_runtime": 572.7238,
"eval_samples_per_second": 10.068,
"eval_steps_per_second": 1.259,
"step": 1800
},
{
"epoch": 0.3469210754553339,
"grad_norm": 5.846036434173584,
"learning_rate": 2.326539462272333e-05,
"loss": 1.6032,
"step": 2000
},
{
"epoch": 0.3469210754553339,
"eval_loss": 1.5792902708053589,
"eval_runtime": 572.6421,
"eval_samples_per_second": 10.069,
"eval_steps_per_second": 1.259,
"step": 2000
},
{
"epoch": 0.38161318300086733,
"eval_loss": 1.5674443244934082,
"eval_runtime": 572.94,
"eval_samples_per_second": 10.064,
"eval_steps_per_second": 1.258,
"step": 2200
},
{
"epoch": 0.4163052905464007,
"eval_loss": 1.5650794506072998,
"eval_runtime": 573.1561,
"eval_samples_per_second": 10.06,
"eval_steps_per_second": 1.258,
"step": 2400
},
{
"epoch": 0.4336513443191674,
"grad_norm": 6.9578962326049805,
"learning_rate": 2.2831743278404163e-05,
"loss": 1.5699,
"step": 2500
},
{
"epoch": 0.45099739809193407,
"eval_loss": 1.5550028085708618,
"eval_runtime": 572.973,
"eval_samples_per_second": 10.063,
"eval_steps_per_second": 1.258,
"step": 2600
},
{
"epoch": 0.4856895056374675,
"eval_loss": 1.539338231086731,
"eval_runtime": 573.1731,
"eval_samples_per_second": 10.06,
"eval_steps_per_second": 1.258,
"step": 2800
},
{
"epoch": 0.5203816131830009,
"grad_norm": 6.062795639038086,
"learning_rate": 2.2398091934084997e-05,
"loss": 1.5555,
"step": 3000
},
{
"epoch": 0.5203816131830009,
"eval_loss": 1.533992886543274,
"eval_runtime": 573.3108,
"eval_samples_per_second": 10.057,
"eval_steps_per_second": 1.258,
"step": 3000
},
{
"epoch": 0.5550737207285342,
"eval_loss": 1.5279603004455566,
"eval_runtime": 573.3234,
"eval_samples_per_second": 10.057,
"eval_steps_per_second": 1.258,
"step": 3200
},
{
"epoch": 0.5897658282740676,
"eval_loss": 1.5221937894821167,
"eval_runtime": 573.2462,
"eval_samples_per_second": 10.059,
"eval_steps_per_second": 1.258,
"step": 3400
},
{
"epoch": 0.6071118820468343,
"grad_norm": 5.474059581756592,
"learning_rate": 2.196444058976583e-05,
"loss": 1.5258,
"step": 3500
},
{
"epoch": 0.6244579358196011,
"eval_loss": 1.5145606994628906,
"eval_runtime": 573.1527,
"eval_samples_per_second": 10.06,
"eval_steps_per_second": 1.258,
"step": 3600
},
{
"epoch": 0.6591500433651344,
"eval_loss": 1.5087436437606812,
"eval_runtime": 573.3236,
"eval_samples_per_second": 10.057,
"eval_steps_per_second": 1.258,
"step": 3800
},
{
"epoch": 0.6938421509106678,
"grad_norm": 4.400829315185547,
"learning_rate": 2.1530789245446662e-05,
"loss": 1.5145,
"step": 4000
},
{
"epoch": 0.6938421509106678,
"eval_loss": 1.501986026763916,
"eval_runtime": 572.9788,
"eval_samples_per_second": 10.063,
"eval_steps_per_second": 1.258,
"step": 4000
},
{
"epoch": 0.7285342584562012,
"eval_loss": 1.4961259365081787,
"eval_runtime": 572.9318,
"eval_samples_per_second": 10.064,
"eval_steps_per_second": 1.258,
"step": 4200
},
{
"epoch": 0.7632263660017347,
"eval_loss": 1.4921443462371826,
"eval_runtime": 573.2197,
"eval_samples_per_second": 10.059,
"eval_steps_per_second": 1.258,
"step": 4400
},
{
"epoch": 0.7805724197745013,
"grad_norm": 5.124959945678711,
"learning_rate": 2.1097137901127496e-05,
"loss": 1.4981,
"step": 4500
},
{
"epoch": 0.797918473547268,
"eval_loss": 1.48764967918396,
"eval_runtime": 573.3463,
"eval_samples_per_second": 10.057,
"eval_steps_per_second": 1.258,
"step": 4600
},
{
"epoch": 0.8326105810928014,
"eval_loss": 1.4827669858932495,
"eval_runtime": 573.3276,
"eval_samples_per_second": 10.057,
"eval_steps_per_second": 1.258,
"step": 4800
},
{
"epoch": 0.8673026886383348,
"grad_norm": 5.631836414337158,
"learning_rate": 2.0663486556808327e-05,
"loss": 1.4758,
"step": 5000
},
{
"epoch": 0.8673026886383348,
"eval_loss": 1.4766356945037842,
"eval_runtime": 573.3049,
"eval_samples_per_second": 10.057,
"eval_steps_per_second": 1.258,
"step": 5000
},
{
"epoch": 0.9019947961838681,
"eval_loss": 1.4708250761032104,
"eval_runtime": 573.3902,
"eval_samples_per_second": 10.056,
"eval_steps_per_second": 1.257,
"step": 5200
},
{
"epoch": 0.9366869037294016,
"eval_loss": 1.4667783975601196,
"eval_runtime": 573.338,
"eval_samples_per_second": 10.057,
"eval_steps_per_second": 1.258,
"step": 5400
},
{
"epoch": 0.9540329575021682,
"grad_norm": 4.832674980163574,
"learning_rate": 2.0229835212489158e-05,
"loss": 1.4818,
"step": 5500
},
{
"epoch": 0.971379011274935,
"eval_loss": 1.4649358987808228,
"eval_runtime": 573.5907,
"eval_samples_per_second": 10.052,
"eval_steps_per_second": 1.257,
"step": 5600
},
{
"epoch": 1.0060711188204683,
"eval_loss": 1.4911904335021973,
"eval_runtime": 573.8034,
"eval_samples_per_second": 10.049,
"eval_steps_per_second": 1.257,
"step": 5800
},
{
"epoch": 1.0407632263660018,
"grad_norm": 6.181447982788086,
"learning_rate": 1.9796183868169993e-05,
"loss": 1.3108,
"step": 6000
},
{
"epoch": 1.0407632263660018,
"eval_loss": 1.5114498138427734,
"eval_runtime": 573.9439,
"eval_samples_per_second": 10.046,
"eval_steps_per_second": 1.256,
"step": 6000
},
{
"epoch": 1.0754553339115351,
"eval_loss": 1.5078836679458618,
"eval_runtime": 573.7341,
"eval_samples_per_second": 10.05,
"eval_steps_per_second": 1.257,
"step": 6200
},
{
"epoch": 1.1101474414570685,
"eval_loss": 1.512686848640442,
"eval_runtime": 573.5532,
"eval_samples_per_second": 10.053,
"eval_steps_per_second": 1.257,
"step": 6400
},
{
"epoch": 1.1274934952298352,
"grad_norm": 6.276436805725098,
"learning_rate": 1.9362532523850823e-05,
"loss": 1.1338,
"step": 6500
},
{
"epoch": 1.144839549002602,
"eval_loss": 1.5086950063705444,
"eval_runtime": 573.502,
"eval_samples_per_second": 10.054,
"eval_steps_per_second": 1.257,
"step": 6600
},
{
"epoch": 1.1795316565481353,
"eval_loss": 1.5138036012649536,
"eval_runtime": 573.4778,
"eval_samples_per_second": 10.054,
"eval_steps_per_second": 1.257,
"step": 6800
},
{
"epoch": 1.2142237640936686,
"grad_norm": 5.294378280639648,
"learning_rate": 1.8928881179531658e-05,
"loss": 1.1411,
"step": 7000
},
{
"epoch": 1.2142237640936686,
"eval_loss": 1.5119119882583618,
"eval_runtime": 573.2773,
"eval_samples_per_second": 10.058,
"eval_steps_per_second": 1.258,
"step": 7000
},
{
"epoch": 1.2489158716392021,
"eval_loss": 1.5059071779251099,
"eval_runtime": 573.2436,
"eval_samples_per_second": 10.059,
"eval_steps_per_second": 1.258,
"step": 7200
},
{
"epoch": 1.2836079791847355,
"eval_loss": 1.4931423664093018,
"eval_runtime": 573.2431,
"eval_samples_per_second": 10.059,
"eval_steps_per_second": 1.258,
"step": 7400
},
{
"epoch": 1.3009540329575022,
"grad_norm": 5.875624179840088,
"learning_rate": 1.8495229835212492e-05,
"loss": 1.1482,
"step": 7500
},
{
"epoch": 1.318300086730269,
"eval_loss": 1.4929821491241455,
"eval_runtime": 572.8059,
"eval_samples_per_second": 10.066,
"eval_steps_per_second": 1.259,
"step": 7600
},
{
"epoch": 1.3529921942758023,
"eval_loss": 1.490503191947937,
"eval_runtime": 572.7436,
"eval_samples_per_second": 10.067,
"eval_steps_per_second": 1.259,
"step": 7800
},
{
"epoch": 1.3876843018213356,
"grad_norm": 5.962628364562988,
"learning_rate": 1.8061578490893323e-05,
"loss": 1.1534,
"step": 8000
},
{
"epoch": 1.3876843018213356,
"eval_loss": 1.4796279668807983,
"eval_runtime": 572.5741,
"eval_samples_per_second": 10.07,
"eval_steps_per_second": 1.259,
"step": 8000
},
{
"epoch": 1.4223764093668692,
"eval_loss": 1.4942739009857178,
"eval_runtime": 572.7895,
"eval_samples_per_second": 10.067,
"eval_steps_per_second": 1.259,
"step": 8200
},
{
"epoch": 1.4570685169124025,
"eval_loss": 1.478100299835205,
"eval_runtime": 574.02,
"eval_samples_per_second": 10.045,
"eval_steps_per_second": 1.256,
"step": 8400
},
{
"epoch": 1.4744145706851692,
"grad_norm": 5.818081855773926,
"learning_rate": 1.7627927146574154e-05,
"loss": 1.1493,
"step": 8500
},
{
"epoch": 1.4917606244579358,
"eval_loss": 1.4706262350082397,
"eval_runtime": 573.645,
"eval_samples_per_second": 10.052,
"eval_steps_per_second": 1.257,
"step": 8600
},
{
"epoch": 1.5264527320034693,
"eval_loss": 1.4702831506729126,
"eval_runtime": 573.6402,
"eval_samples_per_second": 10.052,
"eval_steps_per_second": 1.257,
"step": 8800
},
{
"epoch": 1.5611448395490026,
"grad_norm": 6.020638465881348,
"learning_rate": 1.7194275802254988e-05,
"loss": 1.1517,
"step": 9000
},
{
"epoch": 1.5611448395490026,
"eval_loss": 1.4639151096343994,
"eval_runtime": 573.5071,
"eval_samples_per_second": 10.054,
"eval_steps_per_second": 1.257,
"step": 9000
},
{
"epoch": 1.595836947094536,
"eval_loss": 1.4722236394882202,
"eval_runtime": 573.4545,
"eval_samples_per_second": 10.055,
"eval_steps_per_second": 1.257,
"step": 9200
},
{
"epoch": 1.6305290546400695,
"eval_loss": 1.4613826274871826,
"eval_runtime": 573.2765,
"eval_samples_per_second": 10.058,
"eval_steps_per_second": 1.258,
"step": 9400
},
{
"epoch": 1.647875108412836,
"grad_norm": 5.535754680633545,
"learning_rate": 1.676062445793582e-05,
"loss": 1.1428,
"step": 9500
},
{
"epoch": 1.6652211621856028,
"eval_loss": 1.4539824724197388,
"eval_runtime": 573.1598,
"eval_samples_per_second": 10.06,
"eval_steps_per_second": 1.258,
"step": 9600
},
{
"epoch": 1.699913269731136,
"eval_loss": 1.457112431526184,
"eval_runtime": 573.1778,
"eval_samples_per_second": 10.06,
"eval_steps_per_second": 1.258,
"step": 9800
},
{
"epoch": 1.7346053772766696,
"grad_norm": 6.019700527191162,
"learning_rate": 1.6326973113616653e-05,
"loss": 1.1466,
"step": 10000
},
{
"epoch": 1.7346053772766696,
"eval_loss": 1.4443352222442627,
"eval_runtime": 573.1133,
"eval_samples_per_second": 10.061,
"eval_steps_per_second": 1.258,
"step": 10000
}
],
"logging_steps": 500,
"max_steps": 28825,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 5000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.74751582519296e+18,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}