whisper-large-v3-ft-cv-cy / trainer_state.json
DewiBrynJones's picture
End of training
8c88e6b verified
raw
history blame
36.7 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.080321285140563,
"eval_steps": 1000,
"global_step": 5000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.10040160642570281,
"grad_norm": 5.931593418121338,
"learning_rate": 5.000000000000001e-07,
"loss": 1.0153,
"step": 25
},
{
"epoch": 0.20080321285140562,
"grad_norm": 4.555520534515381,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.7001,
"step": 50
},
{
"epoch": 0.30120481927710846,
"grad_norm": 3.582505941390991,
"learning_rate": 1.5e-06,
"loss": 0.4808,
"step": 75
},
{
"epoch": 0.40160642570281124,
"grad_norm": 4.382739067077637,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.4384,
"step": 100
},
{
"epoch": 0.5020080321285141,
"grad_norm": 4.169747352600098,
"learning_rate": 2.5e-06,
"loss": 0.4109,
"step": 125
},
{
"epoch": 0.6024096385542169,
"grad_norm": 3.8426809310913086,
"learning_rate": 3e-06,
"loss": 0.3934,
"step": 150
},
{
"epoch": 0.7028112449799196,
"grad_norm": 3.972766399383545,
"learning_rate": 3.5e-06,
"loss": 0.3788,
"step": 175
},
{
"epoch": 0.8032128514056225,
"grad_norm": 3.828845739364624,
"learning_rate": 4.000000000000001e-06,
"loss": 0.372,
"step": 200
},
{
"epoch": 0.9036144578313253,
"grad_norm": 4.017838954925537,
"learning_rate": 4.5e-06,
"loss": 0.362,
"step": 225
},
{
"epoch": 1.0040160642570282,
"grad_norm": 2.762294054031372,
"learning_rate": 5e-06,
"loss": 0.3486,
"step": 250
},
{
"epoch": 1.104417670682731,
"grad_norm": 3.553866386413574,
"learning_rate": 5.500000000000001e-06,
"loss": 0.2512,
"step": 275
},
{
"epoch": 1.2048192771084336,
"grad_norm": 3.850424289703369,
"learning_rate": 6e-06,
"loss": 0.2581,
"step": 300
},
{
"epoch": 1.3052208835341366,
"grad_norm": 3.749298572540283,
"learning_rate": 6.5000000000000004e-06,
"loss": 0.2579,
"step": 325
},
{
"epoch": 1.4056224899598393,
"grad_norm": 3.389807939529419,
"learning_rate": 7e-06,
"loss": 0.2721,
"step": 350
},
{
"epoch": 1.5060240963855422,
"grad_norm": 3.3439886569976807,
"learning_rate": 7.500000000000001e-06,
"loss": 0.2516,
"step": 375
},
{
"epoch": 1.606425702811245,
"grad_norm": 3.6428940296173096,
"learning_rate": 8.000000000000001e-06,
"loss": 0.2577,
"step": 400
},
{
"epoch": 1.7068273092369477,
"grad_norm": 3.5750339031219482,
"learning_rate": 8.5e-06,
"loss": 0.2484,
"step": 425
},
{
"epoch": 1.8072289156626506,
"grad_norm": 3.3576269149780273,
"learning_rate": 9e-06,
"loss": 0.2418,
"step": 450
},
{
"epoch": 1.9076305220883534,
"grad_norm": 3.252156972885132,
"learning_rate": 9.5e-06,
"loss": 0.247,
"step": 475
},
{
"epoch": 2.0080321285140563,
"grad_norm": 2.6496663093566895,
"learning_rate": 1e-05,
"loss": 0.2256,
"step": 500
},
{
"epoch": 2.108433734939759,
"grad_norm": 3.2420847415924072,
"learning_rate": 9.944444444444445e-06,
"loss": 0.1409,
"step": 525
},
{
"epoch": 2.208835341365462,
"grad_norm": 2.1582603454589844,
"learning_rate": 9.88888888888889e-06,
"loss": 0.1367,
"step": 550
},
{
"epoch": 2.3092369477911645,
"grad_norm": 2.708833932876587,
"learning_rate": 9.833333333333333e-06,
"loss": 0.1284,
"step": 575
},
{
"epoch": 2.4096385542168672,
"grad_norm": 2.957946300506592,
"learning_rate": 9.777777777777779e-06,
"loss": 0.1352,
"step": 600
},
{
"epoch": 2.5100401606425704,
"grad_norm": 2.999298334121704,
"learning_rate": 9.722222222222223e-06,
"loss": 0.1346,
"step": 625
},
{
"epoch": 2.610441767068273,
"grad_norm": 2.4836103916168213,
"learning_rate": 9.666666666666667e-06,
"loss": 0.1257,
"step": 650
},
{
"epoch": 2.710843373493976,
"grad_norm": 2.5539424419403076,
"learning_rate": 9.611111111111112e-06,
"loss": 0.1359,
"step": 675
},
{
"epoch": 2.8112449799196786,
"grad_norm": 2.5347061157226562,
"learning_rate": 9.555555555555556e-06,
"loss": 0.1363,
"step": 700
},
{
"epoch": 2.9116465863453813,
"grad_norm": 2.7126588821411133,
"learning_rate": 9.5e-06,
"loss": 0.137,
"step": 725
},
{
"epoch": 3.0120481927710845,
"grad_norm": 1.696603775024414,
"learning_rate": 9.444444444444445e-06,
"loss": 0.1269,
"step": 750
},
{
"epoch": 3.112449799196787,
"grad_norm": 2.1536343097686768,
"learning_rate": 9.38888888888889e-06,
"loss": 0.0626,
"step": 775
},
{
"epoch": 3.21285140562249,
"grad_norm": 2.163684844970703,
"learning_rate": 9.333333333333334e-06,
"loss": 0.0625,
"step": 800
},
{
"epoch": 3.3132530120481927,
"grad_norm": 1.543696641921997,
"learning_rate": 9.277777777777778e-06,
"loss": 0.0633,
"step": 825
},
{
"epoch": 3.4136546184738954,
"grad_norm": 2.221062660217285,
"learning_rate": 9.222222222222224e-06,
"loss": 0.0619,
"step": 850
},
{
"epoch": 3.5140562248995986,
"grad_norm": 2.847231149673462,
"learning_rate": 9.166666666666666e-06,
"loss": 0.065,
"step": 875
},
{
"epoch": 3.6144578313253013,
"grad_norm": 2.0118203163146973,
"learning_rate": 9.111111111111112e-06,
"loss": 0.0671,
"step": 900
},
{
"epoch": 3.714859437751004,
"grad_norm": 1.9212677478790283,
"learning_rate": 9.055555555555556e-06,
"loss": 0.0656,
"step": 925
},
{
"epoch": 3.8152610441767068,
"grad_norm": 2.1838502883911133,
"learning_rate": 9e-06,
"loss": 0.0693,
"step": 950
},
{
"epoch": 3.9156626506024095,
"grad_norm": 2.106473445892334,
"learning_rate": 8.944444444444446e-06,
"loss": 0.0658,
"step": 975
},
{
"epoch": 4.016064257028113,
"grad_norm": 1.0204880237579346,
"learning_rate": 8.888888888888888e-06,
"loss": 0.0619,
"step": 1000
},
{
"epoch": 4.016064257028113,
"eval_loss": 0.3070617914199829,
"eval_runtime": 2205.2781,
"eval_samples_per_second": 2.44,
"eval_steps_per_second": 0.153,
"eval_wer": 0.21380571792118971,
"step": 1000
},
{
"epoch": 4.116465863453815,
"grad_norm": 1.0052565336227417,
"learning_rate": 8.833333333333334e-06,
"loss": 0.0339,
"step": 1025
},
{
"epoch": 4.216867469879518,
"grad_norm": 1.4552688598632812,
"learning_rate": 8.777777777777778e-06,
"loss": 0.033,
"step": 1050
},
{
"epoch": 4.317269076305221,
"grad_norm": 1.3099792003631592,
"learning_rate": 8.722222222222224e-06,
"loss": 0.0346,
"step": 1075
},
{
"epoch": 4.417670682730924,
"grad_norm": 1.9144798517227173,
"learning_rate": 8.666666666666668e-06,
"loss": 0.0343,
"step": 1100
},
{
"epoch": 4.518072289156627,
"grad_norm": 1.7304869890213013,
"learning_rate": 8.611111111111112e-06,
"loss": 0.0353,
"step": 1125
},
{
"epoch": 4.618473895582329,
"grad_norm": 1.9012507200241089,
"learning_rate": 8.555555555555556e-06,
"loss": 0.0338,
"step": 1150
},
{
"epoch": 4.718875502008032,
"grad_norm": 1.9311727285385132,
"learning_rate": 8.5e-06,
"loss": 0.0377,
"step": 1175
},
{
"epoch": 4.8192771084337345,
"grad_norm": 2.47721266746521,
"learning_rate": 8.444444444444446e-06,
"loss": 0.0376,
"step": 1200
},
{
"epoch": 4.919678714859438,
"grad_norm": 2.12300181388855,
"learning_rate": 8.38888888888889e-06,
"loss": 0.0407,
"step": 1225
},
{
"epoch": 5.020080321285141,
"grad_norm": 1.4431580305099487,
"learning_rate": 8.333333333333334e-06,
"loss": 0.0345,
"step": 1250
},
{
"epoch": 5.120481927710843,
"grad_norm": 1.7874999046325684,
"learning_rate": 8.277777777777778e-06,
"loss": 0.0204,
"step": 1275
},
{
"epoch": 5.220883534136546,
"grad_norm": 0.6316199898719788,
"learning_rate": 8.222222222222222e-06,
"loss": 0.02,
"step": 1300
},
{
"epoch": 5.321285140562249,
"grad_norm": 1.5528149604797363,
"learning_rate": 8.166666666666668e-06,
"loss": 0.019,
"step": 1325
},
{
"epoch": 5.421686746987952,
"grad_norm": 0.8070461750030518,
"learning_rate": 8.111111111111112e-06,
"loss": 0.019,
"step": 1350
},
{
"epoch": 5.522088353413655,
"grad_norm": 1.8495326042175293,
"learning_rate": 8.055555555555557e-06,
"loss": 0.0248,
"step": 1375
},
{
"epoch": 5.622489959839357,
"grad_norm": 0.8098943829536438,
"learning_rate": 8.000000000000001e-06,
"loss": 0.022,
"step": 1400
},
{
"epoch": 5.72289156626506,
"grad_norm": 1.6946097612380981,
"learning_rate": 7.944444444444445e-06,
"loss": 0.0202,
"step": 1425
},
{
"epoch": 5.823293172690763,
"grad_norm": 3.2773447036743164,
"learning_rate": 7.88888888888889e-06,
"loss": 0.0199,
"step": 1450
},
{
"epoch": 5.923694779116466,
"grad_norm": 1.4831568002700806,
"learning_rate": 7.833333333333333e-06,
"loss": 0.0216,
"step": 1475
},
{
"epoch": 6.024096385542169,
"grad_norm": 0.6036717891693115,
"learning_rate": 7.77777777777778e-06,
"loss": 0.0226,
"step": 1500
},
{
"epoch": 6.124497991967871,
"grad_norm": 0.7774052619934082,
"learning_rate": 7.722222222222223e-06,
"loss": 0.0138,
"step": 1525
},
{
"epoch": 6.224899598393574,
"grad_norm": 0.9542578458786011,
"learning_rate": 7.666666666666667e-06,
"loss": 0.0116,
"step": 1550
},
{
"epoch": 6.325301204819277,
"grad_norm": 0.5289311408996582,
"learning_rate": 7.611111111111111e-06,
"loss": 0.0117,
"step": 1575
},
{
"epoch": 6.42570281124498,
"grad_norm": 0.8694401383399963,
"learning_rate": 7.555555555555556e-06,
"loss": 0.0126,
"step": 1600
},
{
"epoch": 6.526104417670683,
"grad_norm": 1.1825799942016602,
"learning_rate": 7.500000000000001e-06,
"loss": 0.0116,
"step": 1625
},
{
"epoch": 6.626506024096385,
"grad_norm": 1.5035139322280884,
"learning_rate": 7.444444444444445e-06,
"loss": 0.0141,
"step": 1650
},
{
"epoch": 6.7269076305220885,
"grad_norm": 0.9342186450958252,
"learning_rate": 7.38888888888889e-06,
"loss": 0.0128,
"step": 1675
},
{
"epoch": 6.827309236947791,
"grad_norm": 1.0788260698318481,
"learning_rate": 7.333333333333333e-06,
"loss": 0.0126,
"step": 1700
},
{
"epoch": 6.927710843373494,
"grad_norm": 1.3436901569366455,
"learning_rate": 7.277777777777778e-06,
"loss": 0.0137,
"step": 1725
},
{
"epoch": 7.028112449799197,
"grad_norm": 0.681151807308197,
"learning_rate": 7.222222222222223e-06,
"loss": 0.0124,
"step": 1750
},
{
"epoch": 7.128514056224899,
"grad_norm": 0.9661208987236023,
"learning_rate": 7.166666666666667e-06,
"loss": 0.008,
"step": 1775
},
{
"epoch": 7.228915662650603,
"grad_norm": 1.0090680122375488,
"learning_rate": 7.111111111111112e-06,
"loss": 0.0074,
"step": 1800
},
{
"epoch": 7.329317269076305,
"grad_norm": 1.4430378675460815,
"learning_rate": 7.055555555555557e-06,
"loss": 0.0079,
"step": 1825
},
{
"epoch": 7.429718875502008,
"grad_norm": 1.111413836479187,
"learning_rate": 7e-06,
"loss": 0.0085,
"step": 1850
},
{
"epoch": 7.530120481927711,
"grad_norm": 0.9819089770317078,
"learning_rate": 6.944444444444445e-06,
"loss": 0.0083,
"step": 1875
},
{
"epoch": 7.6305220883534135,
"grad_norm": 0.962616503238678,
"learning_rate": 6.88888888888889e-06,
"loss": 0.008,
"step": 1900
},
{
"epoch": 7.730923694779117,
"grad_norm": 0.8837612271308899,
"learning_rate": 6.833333333333334e-06,
"loss": 0.01,
"step": 1925
},
{
"epoch": 7.831325301204819,
"grad_norm": 0.9247878789901733,
"learning_rate": 6.777777777777779e-06,
"loss": 0.0095,
"step": 1950
},
{
"epoch": 7.931726907630522,
"grad_norm": 0.6101934909820557,
"learning_rate": 6.7222222222222235e-06,
"loss": 0.0092,
"step": 1975
},
{
"epoch": 8.032128514056225,
"grad_norm": 0.9556750655174255,
"learning_rate": 6.666666666666667e-06,
"loss": 0.0069,
"step": 2000
},
{
"epoch": 8.032128514056225,
"eval_loss": 0.36700183153152466,
"eval_runtime": 2231.9147,
"eval_samples_per_second": 2.411,
"eval_steps_per_second": 0.151,
"eval_wer": 0.20452105451509006,
"step": 2000
},
{
"epoch": 8.132530120481928,
"grad_norm": 0.511352002620697,
"learning_rate": 6.6111111111111115e-06,
"loss": 0.0051,
"step": 2025
},
{
"epoch": 8.23293172690763,
"grad_norm": 0.762175977230072,
"learning_rate": 6.555555555555556e-06,
"loss": 0.0058,
"step": 2050
},
{
"epoch": 8.333333333333334,
"grad_norm": 0.09607477486133575,
"learning_rate": 6.5000000000000004e-06,
"loss": 0.0038,
"step": 2075
},
{
"epoch": 8.433734939759036,
"grad_norm": 0.6300207376480103,
"learning_rate": 6.444444444444445e-06,
"loss": 0.0048,
"step": 2100
},
{
"epoch": 8.534136546184738,
"grad_norm": 0.21270623803138733,
"learning_rate": 6.3888888888888885e-06,
"loss": 0.0091,
"step": 2125
},
{
"epoch": 8.634538152610443,
"grad_norm": 0.6666727066040039,
"learning_rate": 6.333333333333333e-06,
"loss": 0.0058,
"step": 2150
},
{
"epoch": 8.734939759036145,
"grad_norm": 1.9148590564727783,
"learning_rate": 6.277777777777778e-06,
"loss": 0.0063,
"step": 2175
},
{
"epoch": 8.835341365461847,
"grad_norm": 0.272132009267807,
"learning_rate": 6.222222222222223e-06,
"loss": 0.0056,
"step": 2200
},
{
"epoch": 8.93574297188755,
"grad_norm": 1.3808581829071045,
"learning_rate": 6.166666666666667e-06,
"loss": 0.0062,
"step": 2225
},
{
"epoch": 9.036144578313253,
"grad_norm": 0.6878290772438049,
"learning_rate": 6.111111111111112e-06,
"loss": 0.0034,
"step": 2250
},
{
"epoch": 9.136546184738956,
"grad_norm": 0.1612684726715088,
"learning_rate": 6.055555555555555e-06,
"loss": 0.0034,
"step": 2275
},
{
"epoch": 9.236947791164658,
"grad_norm": 0.4624500572681427,
"learning_rate": 6e-06,
"loss": 0.003,
"step": 2300
},
{
"epoch": 9.337349397590362,
"grad_norm": 0.5339928865432739,
"learning_rate": 5.944444444444445e-06,
"loss": 0.0024,
"step": 2325
},
{
"epoch": 9.437751004016064,
"grad_norm": 0.4839150905609131,
"learning_rate": 5.88888888888889e-06,
"loss": 0.0033,
"step": 2350
},
{
"epoch": 9.538152610441767,
"grad_norm": 0.11668159067630768,
"learning_rate": 5.833333333333334e-06,
"loss": 0.0033,
"step": 2375
},
{
"epoch": 9.638554216867469,
"grad_norm": 0.2360651195049286,
"learning_rate": 5.777777777777778e-06,
"loss": 0.0031,
"step": 2400
},
{
"epoch": 9.738955823293173,
"grad_norm": 0.4457962214946747,
"learning_rate": 5.722222222222222e-06,
"loss": 0.0025,
"step": 2425
},
{
"epoch": 9.839357429718875,
"grad_norm": 0.41899001598358154,
"learning_rate": 5.666666666666667e-06,
"loss": 0.0031,
"step": 2450
},
{
"epoch": 9.939759036144578,
"grad_norm": 0.4257371425628662,
"learning_rate": 5.611111111111112e-06,
"loss": 0.0052,
"step": 2475
},
{
"epoch": 10.040160642570282,
"grad_norm": 0.4485076367855072,
"learning_rate": 5.555555555555557e-06,
"loss": 0.0025,
"step": 2500
},
{
"epoch": 10.140562248995984,
"grad_norm": 0.30146104097366333,
"learning_rate": 5.500000000000001e-06,
"loss": 0.0021,
"step": 2525
},
{
"epoch": 10.240963855421686,
"grad_norm": 0.13998575508594513,
"learning_rate": 5.444444444444445e-06,
"loss": 0.0025,
"step": 2550
},
{
"epoch": 10.34136546184739,
"grad_norm": 1.0045596361160278,
"learning_rate": 5.388888888888889e-06,
"loss": 0.0023,
"step": 2575
},
{
"epoch": 10.441767068273093,
"grad_norm": 0.07078930735588074,
"learning_rate": 5.333333333333334e-06,
"loss": 0.0017,
"step": 2600
},
{
"epoch": 10.542168674698795,
"grad_norm": 0.042963068932294846,
"learning_rate": 5.2777777777777785e-06,
"loss": 0.0019,
"step": 2625
},
{
"epoch": 10.642570281124499,
"grad_norm": 0.13671617209911346,
"learning_rate": 5.2222222222222226e-06,
"loss": 0.0018,
"step": 2650
},
{
"epoch": 10.742971887550201,
"grad_norm": 0.6172053217887878,
"learning_rate": 5.1666666666666675e-06,
"loss": 0.0048,
"step": 2675
},
{
"epoch": 10.843373493975903,
"grad_norm": 2.069375991821289,
"learning_rate": 5.1111111111111115e-06,
"loss": 0.0022,
"step": 2700
},
{
"epoch": 10.943775100401606,
"grad_norm": 0.2148715853691101,
"learning_rate": 5.0555555555555555e-06,
"loss": 0.0021,
"step": 2725
},
{
"epoch": 11.04417670682731,
"grad_norm": 0.045111846178770065,
"learning_rate": 5e-06,
"loss": 0.0012,
"step": 2750
},
{
"epoch": 11.144578313253012,
"grad_norm": 0.03575390577316284,
"learning_rate": 4.944444444444445e-06,
"loss": 0.0009,
"step": 2775
},
{
"epoch": 11.244979919678714,
"grad_norm": 0.036444906145334244,
"learning_rate": 4.888888888888889e-06,
"loss": 0.0012,
"step": 2800
},
{
"epoch": 11.345381526104418,
"grad_norm": 0.18367384374141693,
"learning_rate": 4.833333333333333e-06,
"loss": 0.0015,
"step": 2825
},
{
"epoch": 11.44578313253012,
"grad_norm": 0.03629644960165024,
"learning_rate": 4.777777777777778e-06,
"loss": 0.0011,
"step": 2850
},
{
"epoch": 11.546184738955823,
"grad_norm": 0.08484747260808945,
"learning_rate": 4.722222222222222e-06,
"loss": 0.0011,
"step": 2875
},
{
"epoch": 11.646586345381525,
"grad_norm": 0.2961013913154602,
"learning_rate": 4.666666666666667e-06,
"loss": 0.0011,
"step": 2900
},
{
"epoch": 11.74698795180723,
"grad_norm": 0.04454226791858673,
"learning_rate": 4.611111111111112e-06,
"loss": 0.0019,
"step": 2925
},
{
"epoch": 11.847389558232932,
"grad_norm": 0.06762082129716873,
"learning_rate": 4.555555555555556e-06,
"loss": 0.001,
"step": 2950
},
{
"epoch": 11.947791164658634,
"grad_norm": 0.051877710968256,
"learning_rate": 4.5e-06,
"loss": 0.0019,
"step": 2975
},
{
"epoch": 12.048192771084338,
"grad_norm": 0.04771376773715019,
"learning_rate": 4.444444444444444e-06,
"loss": 0.0013,
"step": 3000
},
{
"epoch": 12.048192771084338,
"eval_loss": 0.3645249009132385,
"eval_runtime": 2193.5952,
"eval_samples_per_second": 2.453,
"eval_steps_per_second": 0.154,
"eval_wer": 0.19776134239930018,
"step": 3000
},
{
"epoch": 12.14859437751004,
"grad_norm": 0.020401790738105774,
"learning_rate": 4.388888888888889e-06,
"loss": 0.0007,
"step": 3025
},
{
"epoch": 12.248995983935743,
"grad_norm": 0.037684116512537,
"learning_rate": 4.333333333333334e-06,
"loss": 0.0006,
"step": 3050
},
{
"epoch": 12.349397590361447,
"grad_norm": 0.025216449052095413,
"learning_rate": 4.277777777777778e-06,
"loss": 0.0005,
"step": 3075
},
{
"epoch": 12.449799196787149,
"grad_norm": 0.021326890215277672,
"learning_rate": 4.222222222222223e-06,
"loss": 0.0005,
"step": 3100
},
{
"epoch": 12.550200803212851,
"grad_norm": 0.02904532290995121,
"learning_rate": 4.166666666666667e-06,
"loss": 0.0009,
"step": 3125
},
{
"epoch": 12.650602409638553,
"grad_norm": 0.15470072627067566,
"learning_rate": 4.111111111111111e-06,
"loss": 0.0006,
"step": 3150
},
{
"epoch": 12.751004016064257,
"grad_norm": 0.5353085398674011,
"learning_rate": 4.055555555555556e-06,
"loss": 0.0008,
"step": 3175
},
{
"epoch": 12.85140562248996,
"grad_norm": 0.1263090819120407,
"learning_rate": 4.000000000000001e-06,
"loss": 0.0005,
"step": 3200
},
{
"epoch": 12.951807228915662,
"grad_norm": 0.018346522003412247,
"learning_rate": 3.944444444444445e-06,
"loss": 0.0005,
"step": 3225
},
{
"epoch": 13.052208835341366,
"grad_norm": 0.012502867728471756,
"learning_rate": 3.88888888888889e-06,
"loss": 0.0005,
"step": 3250
},
{
"epoch": 13.152610441767068,
"grad_norm": 0.035849809646606445,
"learning_rate": 3.833333333333334e-06,
"loss": 0.0005,
"step": 3275
},
{
"epoch": 13.25301204819277,
"grad_norm": 0.013340278528630733,
"learning_rate": 3.777777777777778e-06,
"loss": 0.0004,
"step": 3300
},
{
"epoch": 13.353413654618475,
"grad_norm": 0.01812613196671009,
"learning_rate": 3.7222222222222225e-06,
"loss": 0.0004,
"step": 3325
},
{
"epoch": 13.453815261044177,
"grad_norm": 0.016993574798107147,
"learning_rate": 3.6666666666666666e-06,
"loss": 0.0004,
"step": 3350
},
{
"epoch": 13.55421686746988,
"grad_norm": 0.012604492716491222,
"learning_rate": 3.6111111111111115e-06,
"loss": 0.0004,
"step": 3375
},
{
"epoch": 13.654618473895582,
"grad_norm": 0.013149112462997437,
"learning_rate": 3.555555555555556e-06,
"loss": 0.0007,
"step": 3400
},
{
"epoch": 13.755020080321286,
"grad_norm": 0.01593812368810177,
"learning_rate": 3.5e-06,
"loss": 0.0004,
"step": 3425
},
{
"epoch": 13.855421686746988,
"grad_norm": 0.01589050143957138,
"learning_rate": 3.444444444444445e-06,
"loss": 0.0004,
"step": 3450
},
{
"epoch": 13.95582329317269,
"grad_norm": 0.014971195720136166,
"learning_rate": 3.3888888888888893e-06,
"loss": 0.0004,
"step": 3475
},
{
"epoch": 14.056224899598394,
"grad_norm": 0.01241573691368103,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.0003,
"step": 3500
},
{
"epoch": 14.156626506024097,
"grad_norm": 0.011940378695726395,
"learning_rate": 3.277777777777778e-06,
"loss": 0.0003,
"step": 3525
},
{
"epoch": 14.257028112449799,
"grad_norm": 0.01354218740016222,
"learning_rate": 3.2222222222222227e-06,
"loss": 0.0003,
"step": 3550
},
{
"epoch": 14.357429718875501,
"grad_norm": 0.011458562687039375,
"learning_rate": 3.1666666666666667e-06,
"loss": 0.0003,
"step": 3575
},
{
"epoch": 14.457831325301205,
"grad_norm": 0.010744108818471432,
"learning_rate": 3.1111111111111116e-06,
"loss": 0.0003,
"step": 3600
},
{
"epoch": 14.558232931726907,
"grad_norm": 0.01174489688128233,
"learning_rate": 3.055555555555556e-06,
"loss": 0.0003,
"step": 3625
},
{
"epoch": 14.65863453815261,
"grad_norm": 0.01333660539239645,
"learning_rate": 3e-06,
"loss": 0.0003,
"step": 3650
},
{
"epoch": 14.759036144578314,
"grad_norm": 0.012421938590705395,
"learning_rate": 2.944444444444445e-06,
"loss": 0.0004,
"step": 3675
},
{
"epoch": 14.859437751004016,
"grad_norm": 0.01224998664110899,
"learning_rate": 2.888888888888889e-06,
"loss": 0.0003,
"step": 3700
},
{
"epoch": 14.959839357429718,
"grad_norm": 0.015473966486752033,
"learning_rate": 2.8333333333333335e-06,
"loss": 0.0003,
"step": 3725
},
{
"epoch": 15.060240963855422,
"grad_norm": 0.012373683042824268,
"learning_rate": 2.7777777777777783e-06,
"loss": 0.0003,
"step": 3750
},
{
"epoch": 15.160642570281125,
"grad_norm": 0.010334338992834091,
"learning_rate": 2.7222222222222224e-06,
"loss": 0.0004,
"step": 3775
},
{
"epoch": 15.261044176706827,
"grad_norm": 0.015428266488015652,
"learning_rate": 2.666666666666667e-06,
"loss": 0.0003,
"step": 3800
},
{
"epoch": 15.36144578313253,
"grad_norm": 0.011229559779167175,
"learning_rate": 2.6111111111111113e-06,
"loss": 0.0003,
"step": 3825
},
{
"epoch": 15.461847389558233,
"grad_norm": 0.009039835073053837,
"learning_rate": 2.5555555555555557e-06,
"loss": 0.0003,
"step": 3850
},
{
"epoch": 15.562248995983936,
"grad_norm": 0.0124340346083045,
"learning_rate": 2.5e-06,
"loss": 0.0003,
"step": 3875
},
{
"epoch": 15.662650602409638,
"grad_norm": 0.009659750387072563,
"learning_rate": 2.4444444444444447e-06,
"loss": 0.0003,
"step": 3900
},
{
"epoch": 15.763052208835342,
"grad_norm": 0.013714014552533627,
"learning_rate": 2.388888888888889e-06,
"loss": 0.0003,
"step": 3925
},
{
"epoch": 15.863453815261044,
"grad_norm": 0.00784530583769083,
"learning_rate": 2.3333333333333336e-06,
"loss": 0.0003,
"step": 3950
},
{
"epoch": 15.963855421686747,
"grad_norm": 0.010499561205506325,
"learning_rate": 2.277777777777778e-06,
"loss": 0.0003,
"step": 3975
},
{
"epoch": 16.06425702811245,
"grad_norm": 0.01295757107436657,
"learning_rate": 2.222222222222222e-06,
"loss": 0.0003,
"step": 4000
},
{
"epoch": 16.06425702811245,
"eval_loss": 0.4106931984424591,
"eval_runtime": 2194.2451,
"eval_samples_per_second": 2.452,
"eval_steps_per_second": 0.154,
"eval_wer": 0.19143902342041433,
"step": 4000
},
{
"epoch": 16.164658634538153,
"grad_norm": 0.010303654707968235,
"learning_rate": 2.166666666666667e-06,
"loss": 0.0003,
"step": 4025
},
{
"epoch": 16.265060240963855,
"grad_norm": 0.008325839415192604,
"learning_rate": 2.1111111111111114e-06,
"loss": 0.0003,
"step": 4050
},
{
"epoch": 16.365461847389557,
"grad_norm": 0.00958819966763258,
"learning_rate": 2.0555555555555555e-06,
"loss": 0.0003,
"step": 4075
},
{
"epoch": 16.46586345381526,
"grad_norm": 0.008696039207279682,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.0003,
"step": 4100
},
{
"epoch": 16.566265060240966,
"grad_norm": 0.010361140593886375,
"learning_rate": 1.944444444444445e-06,
"loss": 0.0003,
"step": 4125
},
{
"epoch": 16.666666666666668,
"grad_norm": 0.008520281873643398,
"learning_rate": 1.888888888888889e-06,
"loss": 0.0003,
"step": 4150
},
{
"epoch": 16.76706827309237,
"grad_norm": 0.012094419449567795,
"learning_rate": 1.8333333333333333e-06,
"loss": 0.0004,
"step": 4175
},
{
"epoch": 16.867469879518072,
"grad_norm": 0.00969509407877922,
"learning_rate": 1.777777777777778e-06,
"loss": 0.0003,
"step": 4200
},
{
"epoch": 16.967871485943775,
"grad_norm": 0.010107293725013733,
"learning_rate": 1.7222222222222224e-06,
"loss": 0.0003,
"step": 4225
},
{
"epoch": 17.068273092369477,
"grad_norm": 0.008444724604487419,
"learning_rate": 1.6666666666666667e-06,
"loss": 0.0002,
"step": 4250
},
{
"epoch": 17.16867469879518,
"grad_norm": 0.008527095429599285,
"learning_rate": 1.6111111111111113e-06,
"loss": 0.0003,
"step": 4275
},
{
"epoch": 17.269076305220885,
"grad_norm": 0.007767422124743462,
"learning_rate": 1.5555555555555558e-06,
"loss": 0.0003,
"step": 4300
},
{
"epoch": 17.369477911646587,
"grad_norm": 0.008298359811306,
"learning_rate": 1.5e-06,
"loss": 0.0002,
"step": 4325
},
{
"epoch": 17.46987951807229,
"grad_norm": 0.00869645643979311,
"learning_rate": 1.4444444444444445e-06,
"loss": 0.0002,
"step": 4350
},
{
"epoch": 17.570281124497992,
"grad_norm": 0.009767497889697552,
"learning_rate": 1.3888888888888892e-06,
"loss": 0.0002,
"step": 4375
},
{
"epoch": 17.670682730923694,
"grad_norm": 0.009676006622612476,
"learning_rate": 1.3333333333333334e-06,
"loss": 0.0002,
"step": 4400
},
{
"epoch": 17.771084337349397,
"grad_norm": 0.010664808563888073,
"learning_rate": 1.2777777777777779e-06,
"loss": 0.0003,
"step": 4425
},
{
"epoch": 17.8714859437751,
"grad_norm": 0.011917660012841225,
"learning_rate": 1.2222222222222223e-06,
"loss": 0.0003,
"step": 4450
},
{
"epoch": 17.971887550200805,
"grad_norm": 0.008082253858447075,
"learning_rate": 1.1666666666666668e-06,
"loss": 0.0002,
"step": 4475
},
{
"epoch": 18.072289156626507,
"grad_norm": 0.008449643850326538,
"learning_rate": 1.111111111111111e-06,
"loss": 0.0002,
"step": 4500
},
{
"epoch": 18.17269076305221,
"grad_norm": 0.00893787294626236,
"learning_rate": 1.0555555555555557e-06,
"loss": 0.0002,
"step": 4525
},
{
"epoch": 18.27309236947791,
"grad_norm": 0.007984068244695663,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.0003,
"step": 4550
},
{
"epoch": 18.373493975903614,
"grad_norm": 0.008762707002460957,
"learning_rate": 9.444444444444445e-07,
"loss": 0.0002,
"step": 4575
},
{
"epoch": 18.473895582329316,
"grad_norm": 0.008354073390364647,
"learning_rate": 8.88888888888889e-07,
"loss": 0.0002,
"step": 4600
},
{
"epoch": 18.57429718875502,
"grad_norm": 0.010145510546863079,
"learning_rate": 8.333333333333333e-07,
"loss": 0.0002,
"step": 4625
},
{
"epoch": 18.674698795180724,
"grad_norm": 0.009271888993680477,
"learning_rate": 7.777777777777779e-07,
"loss": 0.0002,
"step": 4650
},
{
"epoch": 18.775100401606426,
"grad_norm": 0.007404220290482044,
"learning_rate": 7.222222222222222e-07,
"loss": 0.0002,
"step": 4675
},
{
"epoch": 18.87550200803213,
"grad_norm": 0.00828209612518549,
"learning_rate": 6.666666666666667e-07,
"loss": 0.0002,
"step": 4700
},
{
"epoch": 18.97590361445783,
"grad_norm": 0.008047865703701973,
"learning_rate": 6.111111111111112e-07,
"loss": 0.0002,
"step": 4725
},
{
"epoch": 19.076305220883533,
"grad_norm": 0.00727940583601594,
"learning_rate": 5.555555555555555e-07,
"loss": 0.0002,
"step": 4750
},
{
"epoch": 19.176706827309236,
"grad_norm": 0.008462085388600826,
"learning_rate": 5.000000000000001e-07,
"loss": 0.0002,
"step": 4775
},
{
"epoch": 19.27710843373494,
"grad_norm": 0.008368249051272869,
"learning_rate": 4.444444444444445e-07,
"loss": 0.0002,
"step": 4800
},
{
"epoch": 19.377510040160644,
"grad_norm": 0.007355119101703167,
"learning_rate": 3.8888888888888895e-07,
"loss": 0.0002,
"step": 4825
},
{
"epoch": 19.477911646586346,
"grad_norm": 0.00911016296595335,
"learning_rate": 3.3333333333333335e-07,
"loss": 0.0002,
"step": 4850
},
{
"epoch": 19.57831325301205,
"grad_norm": 0.00813527312129736,
"learning_rate": 2.7777777777777776e-07,
"loss": 0.0002,
"step": 4875
},
{
"epoch": 19.67871485943775,
"grad_norm": 0.007565053179860115,
"learning_rate": 2.2222222222222224e-07,
"loss": 0.0002,
"step": 4900
},
{
"epoch": 19.779116465863453,
"grad_norm": 0.008976846002042294,
"learning_rate": 1.6666666666666668e-07,
"loss": 0.0002,
"step": 4925
},
{
"epoch": 19.879518072289155,
"grad_norm": 0.007831977680325508,
"learning_rate": 1.1111111111111112e-07,
"loss": 0.0002,
"step": 4950
},
{
"epoch": 19.97991967871486,
"grad_norm": 0.0071573760360479355,
"learning_rate": 5.555555555555556e-08,
"loss": 0.0002,
"step": 4975
},
{
"epoch": 20.080321285140563,
"grad_norm": 0.008769778534770012,
"learning_rate": 0.0,
"loss": 0.0002,
"step": 5000
},
{
"epoch": 20.080321285140563,
"eval_loss": 0.41853219270706177,
"eval_runtime": 2165.4621,
"eval_samples_per_second": 2.485,
"eval_steps_per_second": 0.156,
"eval_wer": 0.19108115630840192,
"step": 5000
},
{
"epoch": 20.080321285140563,
"step": 5000,
"total_flos": 5.435997290496e+20,
"train_loss": 0.05159526972509921,
"train_runtime": 59448.8268,
"train_samples_per_second": 2.691,
"train_steps_per_second": 0.084
}
],
"logging_steps": 25,
"max_steps": 5000,
"num_input_tokens_seen": 0,
"num_train_epochs": 21,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.435997290496e+20,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}