wav2vec2-xls-r-1b-hy / trainer_state.json
arampacha's picture
model upd
925711f
{
"best_metric": 0.1725786179304123,
"best_model_checkpoint": "./checkpoint-4000",
"epoch": 70.17543859649123,
"global_step": 4000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.75,
"learning_rate": 1.26275e-05,
"loss": 6.0202,
"step": 100
},
{
"epoch": 3.51,
"learning_rate": 2.5002499999999997e-05,
"loss": 2.9203,
"step": 200
},
{
"epoch": 5.26,
"learning_rate": 3.73775e-05,
"loss": 1.7768,
"step": 300
},
{
"epoch": 7.02,
"learning_rate": 4.9752499999999995e-05,
"loss": 1.331,
"step": 400
},
{
"epoch": 7.02,
"eval_cer": 0.08315629742033384,
"eval_loss": 0.32805994153022766,
"eval_runtime": 15.7786,
"eval_samples_per_second": 21.231,
"eval_steps_per_second": 0.38,
"eval_wer": 0.46174863387978143,
"step": 400
},
{
"epoch": 8.77,
"learning_rate": 5e-05,
"loss": 1.165,
"step": 500
},
{
"epoch": 10.53,
"learning_rate": 5e-05,
"loss": 1.0755,
"step": 600
},
{
"epoch": 12.28,
"learning_rate": 5e-05,
"loss": 1.0359,
"step": 700
},
{
"epoch": 14.04,
"learning_rate": 5e-05,
"loss": 0.9968,
"step": 800
},
{
"epoch": 14.04,
"eval_cer": 0.05275670207384927,
"eval_loss": 0.21139617264270782,
"eval_runtime": 15.2468,
"eval_samples_per_second": 21.972,
"eval_steps_per_second": 0.394,
"eval_wer": 0.3114754098360656,
"step": 800
},
{
"epoch": 15.79,
"learning_rate": 5e-05,
"loss": 0.9746,
"step": 900
},
{
"epoch": 17.54,
"learning_rate": 5e-05,
"loss": 0.9629,
"step": 1000
},
{
"epoch": 19.3,
"learning_rate": 5e-05,
"loss": 0.9639,
"step": 1100
},
{
"epoch": 21.05,
"learning_rate": 5e-05,
"loss": 0.9367,
"step": 1200
},
{
"epoch": 21.05,
"eval_cer": 0.048052604957005564,
"eval_loss": 0.19284144043922424,
"eval_runtime": 15.2894,
"eval_samples_per_second": 21.911,
"eval_steps_per_second": 0.392,
"eval_wer": 0.27712724434035907,
"step": 1200
},
{
"epoch": 22.81,
"learning_rate": 5e-05,
"loss": 0.9265,
"step": 1300
},
{
"epoch": 24.56,
"learning_rate": 5e-05,
"loss": 0.9093,
"step": 1400
},
{
"epoch": 26.32,
"learning_rate": 5e-05,
"loss": 0.9126,
"step": 1500
},
{
"epoch": 28.07,
"learning_rate": 5e-05,
"loss": 0.9066,
"step": 1600
},
{
"epoch": 28.07,
"eval_cer": 0.047698533131006575,
"eval_loss": 0.19045932590961456,
"eval_runtime": 15.3651,
"eval_samples_per_second": 21.803,
"eval_steps_per_second": 0.39,
"eval_wer": 0.2728337236533958,
"step": 1600
},
{
"epoch": 29.82,
"learning_rate": 5e-05,
"loss": 0.8954,
"step": 1700
},
{
"epoch": 31.58,
"learning_rate": 5e-05,
"loss": 0.8807,
"step": 1800
},
{
"epoch": 33.33,
"learning_rate": 5e-05,
"loss": 0.8753,
"step": 1900
},
{
"epoch": 35.09,
"learning_rate": 5e-05,
"loss": 0.869,
"step": 2000
},
{
"epoch": 35.09,
"eval_cer": 0.04486595852301467,
"eval_loss": 0.181670144200325,
"eval_runtime": 15.5069,
"eval_samples_per_second": 21.603,
"eval_steps_per_second": 0.387,
"eval_wer": 0.25644028103044497,
"step": 2000
},
{
"epoch": 36.84,
"learning_rate": 4.76725e-05,
"loss": 0.8637,
"step": 2100
},
{
"epoch": 38.6,
"learning_rate": 4.5297500000000005e-05,
"loss": 0.8557,
"step": 2200
},
{
"epoch": 40.35,
"learning_rate": 4.29225e-05,
"loss": 0.8537,
"step": 2300
},
{
"epoch": 42.11,
"learning_rate": 4.0547500000000004e-05,
"loss": 0.8319,
"step": 2400
},
{
"epoch": 42.11,
"eval_cer": 0.04334850784016186,
"eval_loss": 0.18102943897247314,
"eval_runtime": 15.5131,
"eval_samples_per_second": 21.595,
"eval_steps_per_second": 0.387,
"eval_wer": 0.24902419984387197,
"step": 2400
},
{
"epoch": 43.86,
"learning_rate": 3.81725e-05,
"loss": 0.837,
"step": 2500
},
{
"epoch": 45.61,
"learning_rate": 3.57975e-05,
"loss": 0.8098,
"step": 2600
},
{
"epoch": 47.37,
"learning_rate": 3.34225e-05,
"loss": 0.8212,
"step": 2700
},
{
"epoch": 49.12,
"learning_rate": 3.1047500000000004e-05,
"loss": 0.8113,
"step": 2800
},
{
"epoch": 49.12,
"eval_cer": 0.042134547293879616,
"eval_loss": 0.17691758275032043,
"eval_runtime": 15.2685,
"eval_samples_per_second": 21.941,
"eval_steps_per_second": 0.393,
"eval_wer": 0.24043715846994534,
"step": 2800
},
{
"epoch": 50.88,
"learning_rate": 2.8672500000000004e-05,
"loss": 0.7928,
"step": 2900
},
{
"epoch": 52.63,
"learning_rate": 2.6297500000000004e-05,
"loss": 0.7871,
"step": 3000
},
{
"epoch": 54.39,
"learning_rate": 2.3922500000000003e-05,
"loss": 0.777,
"step": 3100
},
{
"epoch": 56.14,
"learning_rate": 2.1571249999999998e-05,
"loss": 0.7624,
"step": 3200
},
{
"epoch": 56.14,
"eval_cer": 0.04319676277187658,
"eval_loss": 0.1837695837020874,
"eval_runtime": 15.5608,
"eval_samples_per_second": 21.528,
"eval_steps_per_second": 0.386,
"eval_wer": 0.24434035909445745,
"step": 3200
},
{
"epoch": 57.89,
"learning_rate": 1.9196249999999998e-05,
"loss": 0.7624,
"step": 3300
},
{
"epoch": 59.65,
"learning_rate": 1.682125e-05,
"loss": 0.7517,
"step": 3400
},
{
"epoch": 61.4,
"learning_rate": 1.444625e-05,
"loss": 0.7417,
"step": 3500
},
{
"epoch": 63.16,
"learning_rate": 1.207125e-05,
"loss": 0.7328,
"step": 3600
},
{
"epoch": 63.16,
"eval_cer": 0.041476985331310064,
"eval_loss": 0.17556767165660858,
"eval_runtime": 15.151,
"eval_samples_per_second": 22.111,
"eval_steps_per_second": 0.396,
"eval_wer": 0.2302888368462139,
"step": 3600
},
{
"epoch": 64.91,
"learning_rate": 9.69625e-06,
"loss": 0.7334,
"step": 3700
},
{
"epoch": 66.67,
"learning_rate": 7.321250000000002e-06,
"loss": 0.7261,
"step": 3800
},
{
"epoch": 68.42,
"learning_rate": 4.946250000000002e-06,
"loss": 0.7268,
"step": 3900
},
{
"epoch": 70.18,
"learning_rate": 2.5712500000000027e-06,
"loss": 0.7209,
"step": 4000
},
{
"epoch": 70.18,
"eval_cer": 0.04046535154274153,
"eval_loss": 0.1725786179304123,
"eval_runtime": 14.417,
"eval_samples_per_second": 23.236,
"eval_steps_per_second": 0.416,
"eval_wer": 0.2259953161592506,
"step": 4000
},
{
"epoch": 70.18,
"step": 4000,
"total_flos": 3.2826266564131357e+20,
"train_loss": 0.07268305778503419,
"train_runtime": 4050.6325,
"train_samples_per_second": 126.4,
"train_steps_per_second": 0.988
}
],
"max_steps": 4000,
"num_train_epochs": 71,
"total_flos": 3.2826266564131357e+20,
"trial_name": null,
"trial_params": null
}