wav2vec2-xlsr-53-ft-cy-en-withlm / trainer_state.json
DewiBrynJones's picture
End of training
09e9fbb verified
raw
history blame
No virus
8.68 kB
{
"best_metric": 0.2764733135700226,
"best_model_checkpoint": "/models/hfhub/DewiBrynJones/wav2vec2-xlsr-53-ft-ccv-en-cy/checkpoint-9000",
"epoch": 4.524886877828054,
"eval_steps": 500,
"global_step": 9000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2,
"grad_norm": 1.6815159320831299,
"learning_rate": 0.00014774999999999999,
"loss": 5.9898,
"step": 400
},
{
"epoch": 0.25,
"eval_loss": 1.3093085289001465,
"eval_runtime": 3176.8808,
"eval_samples_per_second": 8.256,
"eval_steps_per_second": 0.258,
"eval_wer": 0.7970769457237188,
"step": 500
},
{
"epoch": 0.4,
"grad_norm": 2.3292043209075928,
"learning_rate": 0.00029775,
"loss": 1.0749,
"step": 800
},
{
"epoch": 0.5,
"eval_loss": 0.5815957188606262,
"eval_runtime": 1123.0217,
"eval_samples_per_second": 23.355,
"eval_steps_per_second": 0.73,
"eval_wer": 0.4617458414821357,
"step": 1000
},
{
"epoch": 0.6,
"grad_norm": 2.9652466773986816,
"learning_rate": 0.0002855853658536585,
"loss": 0.4332,
"step": 1200
},
{
"epoch": 0.75,
"eval_loss": 0.48338082432746887,
"eval_runtime": 1101.5414,
"eval_samples_per_second": 23.81,
"eval_steps_per_second": 0.744,
"eval_wer": 0.4091476878430383,
"step": 1500
},
{
"epoch": 0.8,
"grad_norm": 3.57124924659729,
"learning_rate": 0.0002709512195121951,
"loss": 0.3655,
"step": 1600
},
{
"epoch": 1.01,
"grad_norm": 1.1706229448318481,
"learning_rate": 0.0002563170731707317,
"loss": 0.3303,
"step": 2000
},
{
"epoch": 1.01,
"eval_loss": 0.42033129930496216,
"eval_runtime": 1101.1368,
"eval_samples_per_second": 23.819,
"eval_steps_per_second": 0.745,
"eval_wer": 0.3419174394885707,
"step": 2000
},
{
"epoch": 1.21,
"grad_norm": 0.8928599953651428,
"learning_rate": 0.0002416829268292683,
"loss": 0.276,
"step": 2400
},
{
"epoch": 1.26,
"eval_loss": 0.3909631669521332,
"eval_runtime": 1098.4606,
"eval_samples_per_second": 23.877,
"eval_steps_per_second": 0.746,
"eval_wer": 0.3186423569490884,
"step": 2500
},
{
"epoch": 1.41,
"grad_norm": 0.6678148508071899,
"learning_rate": 0.00022704878048780485,
"loss": 0.2591,
"step": 2800
},
{
"epoch": 1.51,
"eval_loss": 0.39008986949920654,
"eval_runtime": 1093.6554,
"eval_samples_per_second": 23.982,
"eval_steps_per_second": 0.75,
"eval_wer": 0.3067188190019557,
"step": 3000
},
{
"epoch": 1.61,
"grad_norm": 0.7449674606323242,
"learning_rate": 0.00021241463414634144,
"loss": 0.2501,
"step": 3200
},
{
"epoch": 1.76,
"eval_loss": 0.3645510971546173,
"eval_runtime": 1101.42,
"eval_samples_per_second": 23.813,
"eval_steps_per_second": 0.744,
"eval_wer": 0.2895379891910079,
"step": 3500
},
{
"epoch": 1.81,
"grad_norm": 0.994420051574707,
"learning_rate": 0.00019778048780487803,
"loss": 0.2332,
"step": 3600
},
{
"epoch": 2.01,
"grad_norm": 0.632382333278656,
"learning_rate": 0.00018314634146341462,
"loss": 0.224,
"step": 4000
},
{
"epoch": 2.01,
"eval_loss": 0.35174447298049927,
"eval_runtime": 1113.1837,
"eval_samples_per_second": 23.561,
"eval_steps_per_second": 0.737,
"eval_wer": 0.2805501230206296,
"step": 4000
},
{
"epoch": 2.21,
"grad_norm": 0.5861485600471497,
"learning_rate": 0.00016851219512195123,
"loss": 0.182,
"step": 4400
},
{
"epoch": 2.26,
"eval_loss": 0.33475443720817566,
"eval_runtime": 1111.5845,
"eval_samples_per_second": 23.595,
"eval_steps_per_second": 0.738,
"eval_wer": 0.2655689441255021,
"step": 4500
},
{
"epoch": 2.41,
"grad_norm": 0.585738480091095,
"learning_rate": 0.0001538780487804878,
"loss": 0.1777,
"step": 4800
},
{
"epoch": 2.51,
"eval_loss": 0.32769647240638733,
"eval_runtime": 1109.5932,
"eval_samples_per_second": 23.637,
"eval_steps_per_second": 0.739,
"eval_wer": 0.2611948772948079,
"step": 5000
},
{
"epoch": 2.61,
"grad_norm": 0.6404664516448975,
"learning_rate": 0.00013924390243902438,
"loss": 0.1734,
"step": 5200
},
{
"epoch": 2.77,
"eval_loss": 0.33233708143234253,
"eval_runtime": 1114.1252,
"eval_samples_per_second": 23.541,
"eval_steps_per_second": 0.736,
"eval_wer": 0.2643113999116775,
"step": 5500
},
{
"epoch": 2.82,
"grad_norm": 1.567084550857544,
"learning_rate": 0.00012460975609756097,
"loss": 0.1704,
"step": 5600
},
{
"epoch": 3.02,
"grad_norm": 1.35818612575531,
"learning_rate": 0.00010997560975609755,
"loss": 0.1629,
"step": 6000
},
{
"epoch": 3.02,
"eval_loss": 0.31713536381721497,
"eval_runtime": 1084.7842,
"eval_samples_per_second": 24.178,
"eval_steps_per_second": 0.756,
"eval_wer": 0.24851428931928585,
"step": 6000
},
{
"epoch": 3.22,
"grad_norm": 1.0975894927978516,
"learning_rate": 9.534146341463413e-05,
"loss": 0.1338,
"step": 6400
},
{
"epoch": 3.27,
"eval_loss": 0.310283362865448,
"eval_runtime": 1090.9879,
"eval_samples_per_second": 24.041,
"eval_steps_per_second": 0.752,
"eval_wer": 0.23984186066073643,
"step": 6500
},
{
"epoch": 3.42,
"grad_norm": 1.2747470140457153,
"learning_rate": 8.070731707317072e-05,
"loss": 0.1292,
"step": 6800
},
{
"epoch": 3.52,
"eval_loss": 0.2933865785598755,
"eval_runtime": 1076.7354,
"eval_samples_per_second": 24.359,
"eval_steps_per_second": 0.762,
"eval_wer": 0.22680798267196603,
"step": 7000
},
{
"epoch": 3.62,
"grad_norm": 0.5606548190116882,
"learning_rate": 6.607317073170731e-05,
"loss": 0.1264,
"step": 7200
},
{
"epoch": 3.77,
"eval_loss": 0.29226595163345337,
"eval_runtime": 1074.899,
"eval_samples_per_second": 24.4,
"eval_steps_per_second": 0.763,
"eval_wer": 0.22483965259815364,
"step": 7500
},
{
"epoch": 3.82,
"grad_norm": 1.5185168981552124,
"learning_rate": 5.14390243902439e-05,
"loss": 0.1241,
"step": 7600
},
{
"epoch": 4.02,
"grad_norm": 0.7815582156181335,
"learning_rate": 3.680487804878048e-05,
"loss": 0.118,
"step": 8000
},
{
"epoch": 4.02,
"eval_loss": 0.28800907731056213,
"eval_runtime": 1083.1518,
"eval_samples_per_second": 24.215,
"eval_steps_per_second": 0.757,
"eval_wer": 0.21931739322440225,
"step": 8000
},
{
"epoch": 4.22,
"grad_norm": 1.339690089225769,
"learning_rate": 2.217073170731707e-05,
"loss": 0.0996,
"step": 8400
},
{
"epoch": 4.27,
"eval_loss": 0.2792861759662628,
"eval_runtime": 1078.7477,
"eval_samples_per_second": 24.313,
"eval_steps_per_second": 0.76,
"eval_wer": 0.21242403213256786,
"step": 8500
},
{
"epoch": 4.42,
"grad_norm": 1.0311238765716553,
"learning_rate": 7.536585365853659e-06,
"loss": 0.0969,
"step": 8800
},
{
"epoch": 4.52,
"eval_loss": 0.2764733135700226,
"eval_runtime": 1074.1619,
"eval_samples_per_second": 24.417,
"eval_steps_per_second": 0.763,
"eval_wer": 0.21145248459613483,
"step": 9000
},
{
"epoch": 4.52,
"step": 9000,
"total_flos": 8.933861078537978e+19,
"train_loss": 0.49683192168341744,
"train_runtime": 52483.3074,
"train_samples_per_second": 10.975,
"train_steps_per_second": 0.171
}
],
"logging_steps": 400,
"max_steps": 9000,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"total_flos": 8.933861078537978e+19,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}