xlsr_jako_exp2 / checkpoint-16000 /trainer_state.json
yesj1234's picture
Upload folder using huggingface_hub
5528843
{
"best_metric": 0.25546682656055525,
"best_model_checkpoint": "./jako-xlsr/checkpoint-11000",
"epoch": 18.275271273557966,
"eval_steps": 1000,
"global_step": 16000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.38,
"learning_rate": 0.00012563451776649744,
"loss": 24.8869,
"step": 330
},
{
"epoch": 0.75,
"learning_rate": 0.0002512690355329949,
"loss": 4.8854,
"step": 660
},
{
"epoch": 1.13,
"learning_rate": 0.00029761998271934643,
"loss": 3.5667,
"step": 990
},
{
"epoch": 1.14,
"eval_cer": 0.5188284138707948,
"eval_loss": 2.2322683334350586,
"eval_runtime": 333.3102,
"eval_samples_per_second": 10.522,
"eval_steps_per_second": 0.66,
"step": 1000
},
{
"epoch": 1.51,
"learning_rate": 0.00029373183567669466,
"loss": 2.1771,
"step": 1320
},
{
"epoch": 1.88,
"learning_rate": 0.0002898436886340429,
"loss": 1.8324,
"step": 1650
},
{
"epoch": 2.26,
"learning_rate": 0.00028595554159139105,
"loss": 1.5569,
"step": 1980
},
{
"epoch": 2.28,
"eval_cer": 0.3527113824461948,
"eval_loss": 1.3106200695037842,
"eval_runtime": 333.5876,
"eval_samples_per_second": 10.513,
"eval_steps_per_second": 0.659,
"step": 2000
},
{
"epoch": 2.64,
"learning_rate": 0.00028206739454873927,
"loss": 1.4453,
"step": 2310
},
{
"epoch": 3.02,
"learning_rate": 0.0002781792475060875,
"loss": 1.38,
"step": 2640
},
{
"epoch": 3.39,
"learning_rate": 0.0002742911004634357,
"loss": 1.2238,
"step": 2970
},
{
"epoch": 3.43,
"eval_cer": 0.3098580033250661,
"eval_loss": 1.1108620166778564,
"eval_runtime": 331.1347,
"eval_samples_per_second": 10.591,
"eval_steps_per_second": 0.664,
"step": 3000
},
{
"epoch": 3.77,
"learning_rate": 0.0002704029534207839,
"loss": 1.203,
"step": 3300
},
{
"epoch": 4.15,
"learning_rate": 0.0002665148063781321,
"loss": 1.151,
"step": 3630
},
{
"epoch": 4.52,
"learning_rate": 0.0002626266593354803,
"loss": 1.0593,
"step": 3960
},
{
"epoch": 4.57,
"eval_cer": 0.28911722220708075,
"eval_loss": 1.0389584302902222,
"eval_runtime": 331.5451,
"eval_samples_per_second": 10.578,
"eval_steps_per_second": 0.664,
"step": 4000
},
{
"epoch": 4.9,
"learning_rate": 0.0002587385122928285,
"loss": 1.0674,
"step": 4290
},
{
"epoch": 5.28,
"learning_rate": 0.0002548503652501767,
"loss": 0.974,
"step": 4620
},
{
"epoch": 5.65,
"learning_rate": 0.00025096221820752494,
"loss": 0.9658,
"step": 4950
},
{
"epoch": 5.71,
"eval_cer": 0.29180634669719185,
"eval_loss": 0.973089337348938,
"eval_runtime": 331.0672,
"eval_samples_per_second": 10.593,
"eval_steps_per_second": 0.665,
"step": 5000
},
{
"epoch": 6.03,
"learning_rate": 0.0002470740711648731,
"loss": 0.9596,
"step": 5280
},
{
"epoch": 6.41,
"learning_rate": 0.00024318592412222133,
"loss": 0.8626,
"step": 5610
},
{
"epoch": 6.78,
"learning_rate": 0.00023929777707956955,
"loss": 0.8796,
"step": 5940
},
{
"epoch": 6.85,
"eval_cer": 0.26956656037357024,
"eval_loss": 0.9479135870933533,
"eval_runtime": 330.5292,
"eval_samples_per_second": 10.61,
"eval_steps_per_second": 0.666,
"step": 6000
},
{
"epoch": 7.16,
"learning_rate": 0.00023540963003691772,
"loss": 0.8311,
"step": 6270
},
{
"epoch": 7.54,
"learning_rate": 0.00023152148299426594,
"loss": 0.7889,
"step": 6600
},
{
"epoch": 7.92,
"learning_rate": 0.00022763333595161414,
"loss": 0.8022,
"step": 6930
},
{
"epoch": 8.0,
"eval_cer": 0.2710383109391041,
"eval_loss": 0.9330962300300598,
"eval_runtime": 330.2975,
"eval_samples_per_second": 10.618,
"eval_steps_per_second": 0.666,
"step": 7000
},
{
"epoch": 8.29,
"learning_rate": 0.00022374518890896236,
"loss": 0.7418,
"step": 7260
},
{
"epoch": 8.67,
"learning_rate": 0.00021985704186631055,
"loss": 0.739,
"step": 7590
},
{
"epoch": 9.05,
"learning_rate": 0.00021596889482365878,
"loss": 0.7392,
"step": 7920
},
{
"epoch": 9.14,
"eval_cer": 0.2745814141524261,
"eval_loss": 0.925165593624115,
"eval_runtime": 338.4979,
"eval_samples_per_second": 10.36,
"eval_steps_per_second": 0.65,
"step": 8000
},
{
"epoch": 9.42,
"learning_rate": 0.000212080747781007,
"loss": 0.6765,
"step": 8250
},
{
"epoch": 9.8,
"learning_rate": 0.00020819260073835517,
"loss": 0.6778,
"step": 8580
},
{
"epoch": 10.18,
"learning_rate": 0.0002043044536957034,
"loss": 0.6694,
"step": 8910
},
{
"epoch": 10.28,
"eval_cer": 0.2590008448938432,
"eval_loss": 0.9317852854728699,
"eval_runtime": 336.5027,
"eval_samples_per_second": 10.422,
"eval_steps_per_second": 0.654,
"step": 9000
},
{
"epoch": 10.55,
"learning_rate": 0.00020041630665305158,
"loss": 0.621,
"step": 9240
},
{
"epoch": 10.93,
"learning_rate": 0.0001965281596103998,
"loss": 0.6494,
"step": 9570
},
{
"epoch": 11.31,
"learning_rate": 0.000192640012567748,
"loss": 0.5977,
"step": 9900
},
{
"epoch": 11.42,
"eval_cer": 0.2674225286855087,
"eval_loss": 0.9348525404930115,
"eval_runtime": 338.5078,
"eval_samples_per_second": 10.36,
"eval_steps_per_second": 0.65,
"step": 10000
},
{
"epoch": 11.68,
"learning_rate": 0.00018875186552509622,
"loss": 0.5786,
"step": 10230
},
{
"epoch": 12.06,
"learning_rate": 0.0001848637184824444,
"loss": 0.582,
"step": 10560
},
{
"epoch": 12.44,
"learning_rate": 0.00018097557143979261,
"loss": 0.5484,
"step": 10890
},
{
"epoch": 12.56,
"eval_cer": 0.25546682656055525,
"eval_loss": 0.9408797025680542,
"eval_runtime": 334.3327,
"eval_samples_per_second": 10.49,
"eval_steps_per_second": 0.658,
"step": 11000
},
{
"epoch": 12.82,
"learning_rate": 0.0001770874243971408,
"loss": 0.5508,
"step": 11220
},
{
"epoch": 13.19,
"learning_rate": 0.00017319927735448903,
"loss": 0.5314,
"step": 11550
},
{
"epoch": 13.57,
"learning_rate": 0.00016931113031183723,
"loss": 0.5154,
"step": 11880
},
{
"epoch": 13.71,
"eval_cer": 0.27192862918245164,
"eval_loss": 0.9510018229484558,
"eval_runtime": 332.097,
"eval_samples_per_second": 10.56,
"eval_steps_per_second": 0.662,
"step": 12000
},
{
"epoch": 13.95,
"learning_rate": 0.00016542298326918545,
"loss": 0.5096,
"step": 12210
},
{
"epoch": 14.32,
"learning_rate": 0.00016153483622653367,
"loss": 0.477,
"step": 12540
},
{
"epoch": 14.7,
"learning_rate": 0.00015764668918388184,
"loss": 0.4767,
"step": 12870
},
{
"epoch": 14.85,
"eval_cer": 0.2623531656264479,
"eval_loss": 0.9555571675300598,
"eval_runtime": 337.0597,
"eval_samples_per_second": 10.405,
"eval_steps_per_second": 0.653,
"step": 13000
},
{
"epoch": 15.08,
"learning_rate": 0.00015375854214123006,
"loss": 0.4733,
"step": 13200
},
{
"epoch": 15.45,
"learning_rate": 0.00014987039509857826,
"loss": 0.437,
"step": 13530
},
{
"epoch": 15.83,
"learning_rate": 0.00014598224805592645,
"loss": 0.4536,
"step": 13860
},
{
"epoch": 15.99,
"eval_cer": 0.26838552596912957,
"eval_loss": 0.9849810600280762,
"eval_runtime": 331.7906,
"eval_samples_per_second": 10.57,
"eval_steps_per_second": 0.663,
"step": 14000
},
{
"epoch": 16.21,
"learning_rate": 0.00014209410101327467,
"loss": 0.4291,
"step": 14190
},
{
"epoch": 16.58,
"learning_rate": 0.0001382059539706229,
"loss": 0.4195,
"step": 14520
},
{
"epoch": 16.96,
"learning_rate": 0.0001343178069279711,
"loss": 0.4195,
"step": 14850
},
{
"epoch": 17.13,
"eval_cer": 0.2590008448938432,
"eval_loss": 0.9893819093704224,
"eval_runtime": 336.3435,
"eval_samples_per_second": 10.427,
"eval_steps_per_second": 0.654,
"step": 15000
},
{
"epoch": 17.34,
"learning_rate": 0.0001304296598853193,
"loss": 0.3817,
"step": 15180
},
{
"epoch": 17.72,
"learning_rate": 0.0001265415128426675,
"loss": 0.3905,
"step": 15510
},
{
"epoch": 18.09,
"learning_rate": 0.0001226533658000157,
"loss": 0.3937,
"step": 15840
},
{
"epoch": 18.28,
"eval_cer": 0.26977551261435595,
"eval_loss": 1.019676923751831,
"eval_runtime": 335.5807,
"eval_samples_per_second": 10.451,
"eval_steps_per_second": 0.656,
"step": 16000
}
],
"logging_steps": 330,
"max_steps": 26250,
"num_train_epochs": 30,
"save_steps": 1000,
"total_flos": 7.669732886426786e+19,
"trial_name": null,
"trial_params": null
}