wav2vec2-xls-r-300m-tr / trainer_state.json
Sercan's picture
End of training
73932b2
raw
history blame
16.4 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 30.0,
"global_step": 16950,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.71,
"eval_cer": 0.479669124113056,
"eval_loss": 1.7289525270462036,
"eval_runtime": 352.9433,
"eval_samples_per_second": 28.738,
"eval_steps_per_second": 3.593,
"eval_wer": 0.9804149925270289,
"step": 400
},
{
"epoch": 0.88,
"learning_rate": 0.0002982,
"loss": 4.5435,
"step": 500
},
{
"epoch": 1.42,
"eval_cer": 0.1449701156451013,
"eval_loss": 0.48100030422210693,
"eval_runtime": 355.0295,
"eval_samples_per_second": 28.569,
"eval_steps_per_second": 3.572,
"eval_wer": 0.5774374502610687,
"step": 800
},
{
"epoch": 1.77,
"learning_rate": 0.0002909361702127659,
"loss": 0.523,
"step": 1000
},
{
"epoch": 2.12,
"eval_cer": 0.11564221810245158,
"eval_loss": 0.3859069347381592,
"eval_runtime": 358.2561,
"eval_samples_per_second": 28.312,
"eval_steps_per_second": 3.539,
"eval_wer": 0.48120111026999746,
"step": 1200
},
{
"epoch": 2.65,
"learning_rate": 0.0002818176291793313,
"loss": 0.3449,
"step": 1500
},
{
"epoch": 2.83,
"eval_cer": 0.1094981215960235,
"eval_loss": 0.34923675656318665,
"eval_runtime": 353.6657,
"eval_samples_per_second": 28.68,
"eval_steps_per_second": 3.585,
"eval_wer": 0.44977581086589413,
"step": 1600
},
{
"epoch": 3.54,
"learning_rate": 0.0002726990881458966,
"loss": 0.2814,
"step": 2000
},
{
"epoch": 3.54,
"eval_cer": 0.10994501740010437,
"eval_loss": 0.36604171991348267,
"eval_runtime": 352.2579,
"eval_samples_per_second": 28.794,
"eval_steps_per_second": 3.6,
"eval_wer": 0.44657310895009605,
"step": 2000
},
{
"epoch": 4.25,
"eval_cer": 0.10429683107626926,
"eval_loss": 0.37658488750457764,
"eval_runtime": 352.034,
"eval_samples_per_second": 28.813,
"eval_steps_per_second": 3.602,
"eval_wer": 0.4235330654709913,
"step": 2400
},
{
"epoch": 4.42,
"learning_rate": 0.000263580547112462,
"loss": 0.2463,
"step": 2500
},
{
"epoch": 4.96,
"eval_cer": 0.10097250293365471,
"eval_loss": 0.34164857864379883,
"eval_runtime": 355.8322,
"eval_samples_per_second": 28.505,
"eval_steps_per_second": 3.563,
"eval_wer": 0.4119256973155535,
"step": 2800
},
{
"epoch": 5.31,
"learning_rate": 0.0002544620060790273,
"loss": 0.2296,
"step": 3000
},
{
"epoch": 5.66,
"eval_cer": 0.09793072826716873,
"eval_loss": 0.3322136402130127,
"eval_runtime": 355.102,
"eval_samples_per_second": 28.564,
"eval_steps_per_second": 3.571,
"eval_wer": 0.4012500242628933,
"step": 3200
},
{
"epoch": 6.19,
"learning_rate": 0.00024534346504559266,
"loss": 0.2143,
"step": 3500
},
{
"epoch": 6.37,
"eval_cer": 0.09717821339707125,
"eval_loss": 0.3369796872138977,
"eval_runtime": 355.7454,
"eval_samples_per_second": 28.512,
"eval_steps_per_second": 3.564,
"eval_wer": 0.395601622702304,
"step": 3600
},
{
"epoch": 7.08,
"learning_rate": 0.00023622492401215801,
"loss": 0.1955,
"step": 4000
},
{
"epoch": 7.08,
"eval_cer": 0.09977020906074034,
"eval_loss": 0.3401270806789398,
"eval_runtime": 359.7076,
"eval_samples_per_second": 28.198,
"eval_steps_per_second": 3.525,
"eval_wer": 0.40330751761486056,
"step": 4000
},
{
"epoch": 7.79,
"eval_cer": 0.09622099141671736,
"eval_loss": 0.33754295110702515,
"eval_runtime": 352.1651,
"eval_samples_per_second": 28.802,
"eval_steps_per_second": 3.601,
"eval_wer": 0.3889244744657311,
"step": 4400
},
{
"epoch": 7.96,
"learning_rate": 0.0002271063829787234,
"loss": 0.1845,
"step": 4500
},
{
"epoch": 8.5,
"eval_cer": 0.09233155632184571,
"eval_loss": 0.34551626443862915,
"eval_runtime": 357.5651,
"eval_samples_per_second": 28.367,
"eval_steps_per_second": 3.546,
"eval_wer": 0.37524020264368485,
"step": 4800
},
{
"epoch": 8.85,
"learning_rate": 0.00021798784194528871,
"loss": 0.1752,
"step": 5000
},
{
"epoch": 9.2,
"eval_cer": 0.09245553386749396,
"eval_loss": 0.3335849642753601,
"eval_runtime": 351.3613,
"eval_samples_per_second": 28.868,
"eval_steps_per_second": 3.609,
"eval_wer": 0.37176575632291,
"step": 5200
},
{
"epoch": 9.73,
"learning_rate": 0.0002088693009118541,
"loss": 0.1705,
"step": 5500
},
{
"epoch": 9.91,
"eval_cer": 0.08918598650086351,
"eval_loss": 0.3145359754562378,
"eval_runtime": 355.7051,
"eval_samples_per_second": 28.515,
"eval_steps_per_second": 3.565,
"eval_wer": 0.3653021215473903,
"step": 5600
},
{
"epoch": 10.62,
"learning_rate": 0.00019975075987841941,
"loss": 0.1585,
"step": 6000
},
{
"epoch": 10.62,
"eval_cer": 0.09218451318630942,
"eval_loss": 0.34097233414649963,
"eval_runtime": 352.3797,
"eval_samples_per_second": 28.784,
"eval_steps_per_second": 3.598,
"eval_wer": 0.37370678778703004,
"step": 6000
},
{
"epoch": 11.33,
"eval_cer": 0.08989237019118491,
"eval_loss": 0.3296053409576416,
"eval_runtime": 350.4695,
"eval_samples_per_second": 28.941,
"eval_steps_per_second": 3.618,
"eval_wer": 0.3664279197965799,
"step": 6400
},
{
"epoch": 11.5,
"learning_rate": 0.0001906322188449848,
"loss": 0.1474,
"step": 6500
},
{
"epoch": 12.04,
"eval_cer": 0.08988083739624089,
"eval_loss": 0.34918734431266785,
"eval_runtime": 352.3033,
"eval_samples_per_second": 28.791,
"eval_steps_per_second": 3.599,
"eval_wer": 0.3589937692890002,
"step": 6800
},
{
"epoch": 12.39,
"learning_rate": 0.00018153191489361702,
"loss": 0.1485,
"step": 7000
},
{
"epoch": 12.74,
"eval_cer": 0.08672950117778669,
"eval_loss": 0.31763964891433716,
"eval_runtime": 352.8906,
"eval_samples_per_second": 28.743,
"eval_steps_per_second": 3.593,
"eval_wer": 0.3506085133640016,
"step": 7200
},
{
"epoch": 13.27,
"learning_rate": 0.00017241337386018235,
"loss": 0.137,
"step": 7500
},
{
"epoch": 13.45,
"eval_cer": 0.08901587777543918,
"eval_loss": 0.3532153367996216,
"eval_runtime": 357.5465,
"eval_samples_per_second": 28.368,
"eval_steps_per_second": 3.546,
"eval_wer": 0.360041926279625,
"step": 7600
},
{
"epoch": 14.16,
"learning_rate": 0.00016329483282674772,
"loss": 0.1291,
"step": 8000
},
{
"epoch": 14.16,
"eval_cer": 0.08733497291234787,
"eval_loss": 0.33181944489479065,
"eval_runtime": 352.5023,
"eval_samples_per_second": 28.774,
"eval_steps_per_second": 3.597,
"eval_wer": 0.3570527378248801,
"step": 8000
},
{
"epoch": 14.87,
"eval_cer": 0.08829796129017377,
"eval_loss": 0.33532437682151794,
"eval_runtime": 357.1338,
"eval_samples_per_second": 28.401,
"eval_steps_per_second": 3.55,
"eval_wer": 0.3547623206972185,
"step": 8400
},
{
"epoch": 15.04,
"learning_rate": 0.00015417629179331305,
"loss": 0.1274,
"step": 8500
},
{
"epoch": 15.58,
"eval_cer": 0.08226342633571389,
"eval_loss": 0.32346823811531067,
"eval_runtime": 349.5389,
"eval_samples_per_second": 29.018,
"eval_steps_per_second": 3.628,
"eval_wer": 0.339602864962441,
"step": 8800
},
{
"epoch": 15.93,
"learning_rate": 0.00014505775075987842,
"loss": 0.1198,
"step": 9000
},
{
"epoch": 16.28,
"eval_cer": 0.08322353151480379,
"eval_loss": 0.32590439915657043,
"eval_runtime": 352.9664,
"eval_samples_per_second": 28.736,
"eval_steps_per_second": 3.592,
"eval_wer": 0.33894291426464024,
"step": 9200
},
{
"epoch": 16.81,
"learning_rate": 0.00013595744680851063,
"loss": 0.1164,
"step": 9500
},
{
"epoch": 16.99,
"eval_cer": 0.084353745419318,
"eval_loss": 0.32632604241371155,
"eval_runtime": 355.0498,
"eval_samples_per_second": 28.568,
"eval_steps_per_second": 3.571,
"eval_wer": 0.3411362798190959,
"step": 9600
},
{
"epoch": 17.7,
"learning_rate": 0.00012683890577507598,
"loss": 0.1119,
"step": 10000
},
{
"epoch": 17.7,
"eval_cer": 0.08243353506113824,
"eval_loss": 0.32535773515701294,
"eval_runtime": 352.3077,
"eval_samples_per_second": 28.79,
"eval_steps_per_second": 3.599,
"eval_wer": 0.3377006541276034,
"step": 10000
},
{
"epoch": 18.41,
"eval_cer": 0.0811533948223517,
"eval_loss": 0.3243008255958557,
"eval_runtime": 356.0414,
"eval_samples_per_second": 28.488,
"eval_steps_per_second": 3.561,
"eval_wer": 0.3330615889283565,
"step": 10400
},
{
"epoch": 18.58,
"learning_rate": 0.00011772036474164133,
"loss": 0.1054,
"step": 10500
},
{
"epoch": 19.12,
"eval_cer": 0.07895063098804338,
"eval_loss": 0.32234683632850647,
"eval_runtime": 353.14,
"eval_samples_per_second": 28.722,
"eval_steps_per_second": 3.591,
"eval_wer": 0.3239387410469924,
"step": 10800
},
{
"epoch": 19.47,
"learning_rate": 0.00010860182370820666,
"loss": 0.1017,
"step": 11000
},
{
"epoch": 19.82,
"eval_cer": 0.07741965245922436,
"eval_loss": 0.305361270904541,
"eval_runtime": 348.3343,
"eval_samples_per_second": 29.119,
"eval_steps_per_second": 3.64,
"eval_wer": 0.3189502901842039,
"step": 11200
},
{
"epoch": 20.35,
"learning_rate": 9.948328267477204e-05,
"loss": 0.0964,
"step": 11500
},
{
"epoch": 20.53,
"eval_cer": 0.07850373518396249,
"eval_loss": 0.32777705788612366,
"eval_runtime": 353.3356,
"eval_samples_per_second": 28.706,
"eval_steps_per_second": 3.589,
"eval_wer": 0.3236669966420156,
"step": 11600
},
{
"epoch": 21.24,
"learning_rate": 9.036474164133739e-05,
"loss": 0.0903,
"step": 12000
},
{
"epoch": 21.24,
"eval_cer": 0.07744560124784841,
"eval_loss": 0.3166551887989044,
"eval_runtime": 353.0166,
"eval_samples_per_second": 28.732,
"eval_steps_per_second": 3.592,
"eval_wer": 0.3177274403618083,
"step": 12000
},
{
"epoch": 21.95,
"eval_cer": 0.07655469283842266,
"eval_loss": 0.33310163021087646,
"eval_runtime": 354.4672,
"eval_samples_per_second": 28.615,
"eval_steps_per_second": 3.577,
"eval_wer": 0.3124478347794018,
"step": 12400
},
{
"epoch": 22.12,
"learning_rate": 8.124620060790274e-05,
"loss": 0.0886,
"step": 12500
},
{
"epoch": 22.65,
"eval_cer": 0.07452492092827466,
"eval_loss": 0.3098578155040741,
"eval_runtime": 354.4398,
"eval_samples_per_second": 28.617,
"eval_steps_per_second": 3.577,
"eval_wer": 0.30889574720006213,
"step": 12800
},
{
"epoch": 23.01,
"learning_rate": 7.214589665653494e-05,
"loss": 0.0836,
"step": 13000
},
{
"epoch": 23.36,
"eval_cer": 0.07314963513119996,
"eval_loss": 0.3170570433139801,
"eval_runtime": 351.2874,
"eval_samples_per_second": 28.874,
"eval_steps_per_second": 3.61,
"eval_wer": 0.3047613501814864,
"step": 13200
},
{
"epoch": 23.89,
"learning_rate": 6.30273556231003e-05,
"loss": 0.0796,
"step": 13500
},
{
"epoch": 24.07,
"eval_cer": 0.07325919668316817,
"eval_loss": 0.315768837928772,
"eval_runtime": 354.0965,
"eval_samples_per_second": 28.645,
"eval_steps_per_second": 3.581,
"eval_wer": 0.30410139948368564,
"step": 13600
},
{
"epoch": 24.78,
"learning_rate": 5.390881458966565e-05,
"loss": 0.0739,
"step": 14000
},
{
"epoch": 24.78,
"eval_cer": 0.07206266920772582,
"eval_loss": 0.3202644884586334,
"eval_runtime": 351.0812,
"eval_samples_per_second": 28.891,
"eval_steps_per_second": 3.612,
"eval_wer": 0.3002775674993692,
"step": 14000
},
{
"epoch": 25.49,
"eval_cer": 0.07125249036290822,
"eval_loss": 0.3138331472873688,
"eval_runtime": 353.8044,
"eval_samples_per_second": 28.668,
"eval_steps_per_second": 3.584,
"eval_wer": 0.2973660203031891,
"step": 14400
},
{
"epoch": 25.66,
"learning_rate": 4.4790273556231e-05,
"loss": 0.0742,
"step": 14500
},
{
"epoch": 26.19,
"eval_cer": 0.07109103123369191,
"eval_loss": 0.3196839392185211,
"eval_runtime": 353.996,
"eval_samples_per_second": 28.653,
"eval_steps_per_second": 3.582,
"eval_wer": 0.2958520157611755,
"step": 14800
},
{
"epoch": 26.55,
"learning_rate": 3.568996960486322e-05,
"loss": 0.07,
"step": 15000
},
{
"epoch": 26.9,
"eval_cer": 0.07031256757497037,
"eval_loss": 0.3232352137565613,
"eval_runtime": 355.2159,
"eval_samples_per_second": 28.554,
"eval_steps_per_second": 3.57,
"eval_wer": 0.2951726547487335,
"step": 15200
},
{
"epoch": 27.43,
"learning_rate": 2.6571428571428566e-05,
"loss": 0.0654,
"step": 15500
},
{
"epoch": 27.61,
"eval_cer": 0.07008479487482593,
"eval_loss": 0.3242589831352234,
"eval_runtime": 352.5643,
"eval_samples_per_second": 28.769,
"eval_steps_per_second": 3.597,
"eval_wer": 0.2938527533531319,
"step": 15600
},
{
"epoch": 28.32,
"learning_rate": 1.745288753799392e-05,
"loss": 0.0631,
"step": 16000
},
{
"epoch": 28.32,
"eval_cer": 0.06881907062971944,
"eval_loss": 0.3212898373603821,
"eval_runtime": 352.1058,
"eval_samples_per_second": 28.807,
"eval_steps_per_second": 3.601,
"eval_wer": 0.2875638114093829,
"step": 16000
},
{
"epoch": 29.03,
"eval_cer": 0.06853940035232689,
"eval_loss": 0.3151107728481293,
"eval_runtime": 353.743,
"eval_samples_per_second": 28.673,
"eval_steps_per_second": 3.585,
"eval_wer": 0.28799083833148936,
"step": 16400
},
{
"epoch": 29.2,
"learning_rate": 8.33434650455927e-06,
"loss": 0.0607,
"step": 16500
},
{
"epoch": 29.73,
"eval_cer": 0.06810115414445403,
"eval_loss": 0.31835824251174927,
"eval_runtime": 352.8555,
"eval_samples_per_second": 28.745,
"eval_steps_per_second": 3.594,
"eval_wer": 0.28665152662124654,
"step": 16800
},
{
"epoch": 30.0,
"step": 16950,
"total_flos": 1.1955997003691401e+20,
"train_loss": 0.2775966154790558,
"train_runtime": 58895.0057,
"train_samples_per_second": 18.401,
"train_steps_per_second": 0.288
}
],
"max_steps": 16950,
"num_train_epochs": 30,
"total_flos": 1.1955997003691401e+20,
"trial_name": null,
"trial_params": null
}