wav2vec2-xlsr-53-ft-btb-ccv-cy / trainer_state.json
DewiBrynJones's picture
End of training
9e6382b verified
raw
history blame
49.8 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.1788282447247436,
"eval_steps": 200,
"global_step": 30000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.007858854964831625,
"eval_loss": 3.1892831325531006,
"eval_runtime": 159.5257,
"eval_samples_per_second": 35.455,
"eval_steps_per_second": 4.432,
"eval_wer": 1.0,
"step": 200
},
{
"epoch": 0.01571770992966325,
"eval_loss": 2.780208110809326,
"eval_runtime": 157.5706,
"eval_samples_per_second": 35.895,
"eval_steps_per_second": 4.487,
"eval_wer": 1.0,
"step": 400
},
{
"epoch": 0.01964713741207906,
"grad_norm": 4.9997968673706055,
"learning_rate": 0.0002982,
"loss": 4.719,
"step": 500
},
{
"epoch": 0.023576564894494872,
"eval_loss": 1.4220576286315918,
"eval_runtime": 158.6041,
"eval_samples_per_second": 35.661,
"eval_steps_per_second": 4.458,
"eval_wer": 0.8876923817624497,
"step": 600
},
{
"epoch": 0.0314354198593265,
"eval_loss": 1.227359414100647,
"eval_runtime": 158.5238,
"eval_samples_per_second": 35.679,
"eval_steps_per_second": 4.46,
"eval_wer": 0.8224390557044503,
"step": 800
},
{
"epoch": 0.03929427482415812,
"grad_norm": 2.6001393795013428,
"learning_rate": 0.0002949457627118644,
"loss": 1.0441,
"step": 1000
},
{
"epoch": 0.03929427482415812,
"eval_loss": 1.1094719171524048,
"eval_runtime": 161.2687,
"eval_samples_per_second": 35.072,
"eval_steps_per_second": 4.384,
"eval_wer": 0.7886729469917029,
"step": 1000
},
{
"epoch": 0.047153129788989744,
"eval_loss": 1.091428279876709,
"eval_runtime": 158.6043,
"eval_samples_per_second": 35.661,
"eval_steps_per_second": 4.458,
"eval_wer": 0.7549228868097125,
"step": 1200
},
{
"epoch": 0.055011984753821366,
"eval_loss": 1.0177329778671265,
"eval_runtime": 159.8661,
"eval_samples_per_second": 35.38,
"eval_steps_per_second": 4.422,
"eval_wer": 0.7354881160629745,
"step": 1400
},
{
"epoch": 0.05894141223623718,
"grad_norm": 2.7494542598724365,
"learning_rate": 0.0002898610169491525,
"loss": 0.8033,
"step": 1500
},
{
"epoch": 0.062870839718653,
"eval_loss": 0.9907466769218445,
"eval_runtime": 159.6438,
"eval_samples_per_second": 35.429,
"eval_steps_per_second": 4.429,
"eval_wer": 0.7232912326876475,
"step": 1600
},
{
"epoch": 0.07072969468348461,
"eval_loss": 0.9761303067207336,
"eval_runtime": 159.1571,
"eval_samples_per_second": 35.537,
"eval_steps_per_second": 4.442,
"eval_wer": 0.7145287348943204,
"step": 1800
},
{
"epoch": 0.07858854964831624,
"grad_norm": 2.753251314163208,
"learning_rate": 0.00028477627118644064,
"loss": 0.7227,
"step": 2000
},
{
"epoch": 0.07858854964831624,
"eval_loss": 0.9555273056030273,
"eval_runtime": 159.3414,
"eval_samples_per_second": 35.496,
"eval_steps_per_second": 4.437,
"eval_wer": 0.6902794049204796,
"step": 2000
},
{
"epoch": 0.08644740461314787,
"eval_loss": 0.8994919061660767,
"eval_runtime": 159.0426,
"eval_samples_per_second": 35.563,
"eval_steps_per_second": 4.445,
"eval_wer": 0.6747765242092086,
"step": 2200
},
{
"epoch": 0.09430625957797949,
"eval_loss": 0.8897404670715332,
"eval_runtime": 158.5492,
"eval_samples_per_second": 35.673,
"eval_steps_per_second": 4.459,
"eval_wer": 0.66655967646162,
"step": 2400
},
{
"epoch": 0.0982356870603953,
"grad_norm": 2.404499053955078,
"learning_rate": 0.00027969152542372877,
"loss": 0.6794,
"step": 2500
},
{
"epoch": 0.10216511454281112,
"eval_loss": 0.8826168775558472,
"eval_runtime": 159.8456,
"eval_samples_per_second": 35.384,
"eval_steps_per_second": 4.423,
"eval_wer": 0.6559676461619939,
"step": 2600
},
{
"epoch": 0.11002396950764273,
"eval_loss": 0.8744593858718872,
"eval_runtime": 159.6838,
"eval_samples_per_second": 35.42,
"eval_steps_per_second": 4.427,
"eval_wer": 0.6445571407937604,
"step": 2800
},
{
"epoch": 0.11788282447247436,
"grad_norm": 2.406255006790161,
"learning_rate": 0.00027460677966101695,
"loss": 0.6513,
"step": 3000
},
{
"epoch": 0.11788282447247436,
"eval_loss": 0.8450209498405457,
"eval_runtime": 159.2776,
"eval_samples_per_second": 35.51,
"eval_steps_per_second": 4.439,
"eval_wer": 0.6436905201328819,
"step": 3000
},
{
"epoch": 0.125741679437306,
"eval_loss": 0.8596389293670654,
"eval_runtime": 159.3028,
"eval_samples_per_second": 35.505,
"eval_steps_per_second": 4.438,
"eval_wer": 0.6510888928118631,
"step": 3200
},
{
"epoch": 0.13360053440213762,
"eval_loss": 0.8597909212112427,
"eval_runtime": 159.9064,
"eval_samples_per_second": 35.371,
"eval_steps_per_second": 4.421,
"eval_wer": 0.6376402240374893,
"step": 3400
},
{
"epoch": 0.1375299618845534,
"grad_norm": 2.2046961784362793,
"learning_rate": 0.000269522033898305,
"loss": 0.6147,
"step": 3500
},
{
"epoch": 0.14145938936696922,
"eval_loss": 0.8516111969947815,
"eval_runtime": 160.4338,
"eval_samples_per_second": 35.254,
"eval_steps_per_second": 4.407,
"eval_wer": 0.6375439328529473,
"step": 3600
},
{
"epoch": 0.14931824433180085,
"eval_loss": 0.8251617550849915,
"eval_runtime": 160.6004,
"eval_samples_per_second": 35.218,
"eval_steps_per_second": 4.402,
"eval_wer": 0.6100367511354335,
"step": 3800
},
{
"epoch": 0.15717709929663248,
"grad_norm": 1.520897388458252,
"learning_rate": 0.0002644372881355932,
"loss": 0.6092,
"step": 4000
},
{
"epoch": 0.15717709929663248,
"eval_loss": 0.8579581379890442,
"eval_runtime": 159.0993,
"eval_samples_per_second": 35.55,
"eval_steps_per_second": 4.444,
"eval_wer": 0.6822551395419749,
"step": 4000
},
{
"epoch": 0.1650359542614641,
"eval_loss": 0.8204948306083679,
"eval_runtime": 159.818,
"eval_samples_per_second": 35.39,
"eval_steps_per_second": 4.424,
"eval_wer": 0.6135674279019756,
"step": 4200
},
{
"epoch": 0.17289480922629574,
"eval_loss": 0.8033376336097717,
"eval_runtime": 159.2754,
"eval_samples_per_second": 35.511,
"eval_steps_per_second": 4.439,
"eval_wer": 0.6385068446983678,
"step": 4400
},
{
"epoch": 0.17682423670871153,
"grad_norm": 2.3011837005615234,
"learning_rate": 0.00025936271186440674,
"loss": 0.5928,
"step": 4500
},
{
"epoch": 0.18075366419112734,
"eval_loss": 0.7927771210670471,
"eval_runtime": 160.1079,
"eval_samples_per_second": 35.326,
"eval_steps_per_second": 4.416,
"eval_wer": 0.6005039238657701,
"step": 4600
},
{
"epoch": 0.18861251915595897,
"eval_loss": 0.7911030054092407,
"eval_runtime": 160.2559,
"eval_samples_per_second": 35.294,
"eval_steps_per_second": 4.412,
"eval_wer": 0.5923512702412094,
"step": 4800
},
{
"epoch": 0.1964713741207906,
"grad_norm": 6.133739948272705,
"learning_rate": 0.0002542779661016949,
"loss": 0.5681,
"step": 5000
},
{
"epoch": 0.1964713741207906,
"eval_loss": 0.7968648076057434,
"eval_runtime": 160.0012,
"eval_samples_per_second": 35.35,
"eval_steps_per_second": 4.419,
"eval_wer": 0.5944375792396206,
"step": 5000
},
{
"epoch": 0.20433022908562223,
"eval_loss": 0.7932958602905273,
"eval_runtime": 159.7818,
"eval_samples_per_second": 35.398,
"eval_steps_per_second": 4.425,
"eval_wer": 0.5898958450353871,
"step": 5200
},
{
"epoch": 0.21218908405045384,
"eval_loss": 0.7830468416213989,
"eval_runtime": 160.2841,
"eval_samples_per_second": 35.287,
"eval_steps_per_second": 4.411,
"eval_wer": 0.6012742533421065,
"step": 5400
},
{
"epoch": 0.21611851153286965,
"grad_norm": 2.9641568660736084,
"learning_rate": 0.00024919322033898305,
"loss": 0.5806,
"step": 5500
},
{
"epoch": 0.22004793901528547,
"eval_loss": 0.7702626585960388,
"eval_runtime": 160.806,
"eval_samples_per_second": 35.173,
"eval_steps_per_second": 4.397,
"eval_wer": 0.5789026014668357,
"step": 5600
},
{
"epoch": 0.2279067939801171,
"eval_loss": 0.7665734887123108,
"eval_runtime": 160.6796,
"eval_samples_per_second": 35.2,
"eval_steps_per_second": 4.4,
"eval_wer": 0.589831650912359,
"step": 5800
},
{
"epoch": 0.23576564894494872,
"grad_norm": 2.6571083068847656,
"learning_rate": 0.00024410847457627117,
"loss": 0.5608,
"step": 6000
},
{
"epoch": 0.23576564894494872,
"eval_loss": 0.7580233216285706,
"eval_runtime": 160.371,
"eval_samples_per_second": 35.268,
"eval_steps_per_second": 4.409,
"eval_wer": 0.5694500168509573,
"step": 6000
},
{
"epoch": 0.24362450390978035,
"eval_loss": 0.7478851675987244,
"eval_runtime": 162.164,
"eval_samples_per_second": 34.878,
"eval_steps_per_second": 4.36,
"eval_wer": 0.5650848164850508,
"step": 6200
},
{
"epoch": 0.251483358874612,
"eval_loss": 0.7638738751411438,
"eval_runtime": 160.257,
"eval_samples_per_second": 35.293,
"eval_steps_per_second": 4.412,
"eval_wer": 0.5846640240086021,
"step": 6400
},
{
"epoch": 0.2554127863570278,
"grad_norm": 1.5677289962768555,
"learning_rate": 0.0002390237288135593,
"loss": 0.5333,
"step": 6500
},
{
"epoch": 0.2593422138394436,
"eval_loss": 0.7297228574752808,
"eval_runtime": 160.7223,
"eval_samples_per_second": 35.191,
"eval_steps_per_second": 4.399,
"eval_wer": 0.5676044358139012,
"step": 6600
},
{
"epoch": 0.26720106880427524,
"eval_loss": 0.7441245913505554,
"eval_runtime": 160.37,
"eval_samples_per_second": 35.268,
"eval_steps_per_second": 4.409,
"eval_wer": 0.5590345203896583,
"step": 6800
},
{
"epoch": 0.2750599237691068,
"grad_norm": 3.644160032272339,
"learning_rate": 0.00023393898305084743,
"loss": 0.5406,
"step": 7000
},
{
"epoch": 0.2750599237691068,
"eval_loss": 0.7404661774635315,
"eval_runtime": 160.5995,
"eval_samples_per_second": 35.218,
"eval_steps_per_second": 4.402,
"eval_wer": 0.5491165283818267,
"step": 7000
},
{
"epoch": 0.28291877873393845,
"eval_loss": 0.7237815856933594,
"eval_runtime": 160.4373,
"eval_samples_per_second": 35.254,
"eval_steps_per_second": 4.407,
"eval_wer": 0.5529039816404808,
"step": 7200
},
{
"epoch": 0.2907776336987701,
"eval_loss": 0.7328305840492249,
"eval_runtime": 161.925,
"eval_samples_per_second": 34.93,
"eval_steps_per_second": 4.366,
"eval_wer": 0.5543964950008826,
"step": 7400
},
{
"epoch": 0.2947070611811859,
"grad_norm": 3.6030795574188232,
"learning_rate": 0.00022885423728813558,
"loss": 0.535,
"step": 7500
},
{
"epoch": 0.2986364886636017,
"eval_loss": 0.7263395190238953,
"eval_runtime": 160.6865,
"eval_samples_per_second": 35.199,
"eval_steps_per_second": 4.4,
"eval_wer": 0.5598690439890228,
"step": 7600
},
{
"epoch": 0.30649534362843334,
"eval_loss": 0.7421374320983887,
"eval_runtime": 160.2249,
"eval_samples_per_second": 35.3,
"eval_steps_per_second": 4.413,
"eval_wer": 0.5594357336585836,
"step": 7800
},
{
"epoch": 0.31435419859326497,
"grad_norm": 3.376089096069336,
"learning_rate": 0.0002237694915254237,
"loss": 0.5195,
"step": 8000
},
{
"epoch": 0.31435419859326497,
"eval_loss": 0.7434934377670288,
"eval_runtime": 161.1972,
"eval_samples_per_second": 35.087,
"eval_steps_per_second": 4.386,
"eval_wer": 0.5543804464701256,
"step": 8000
},
{
"epoch": 0.3222130535580966,
"eval_loss": 0.7186952233314514,
"eval_runtime": 162.677,
"eval_samples_per_second": 34.768,
"eval_steps_per_second": 4.346,
"eval_wer": 0.5423921939946398,
"step": 8200
},
{
"epoch": 0.3300719085229282,
"eval_loss": 0.6976691484451294,
"eval_runtime": 159.5716,
"eval_samples_per_second": 35.445,
"eval_steps_per_second": 4.431,
"eval_wer": 0.5353308404615558,
"step": 8400
},
{
"epoch": 0.33400133600534404,
"grad_norm": 1.9758217334747314,
"learning_rate": 0.00021868474576271186,
"loss": 0.5023,
"step": 8500
},
{
"epoch": 0.33793076348775986,
"eval_loss": 0.6949788928031921,
"eval_runtime": 160.6972,
"eval_samples_per_second": 35.197,
"eval_steps_per_second": 4.4,
"eval_wer": 0.5385565951437146,
"step": 8600
},
{
"epoch": 0.3457896184525915,
"eval_loss": 0.7155033946037292,
"eval_runtime": 159.9521,
"eval_samples_per_second": 35.361,
"eval_steps_per_second": 4.42,
"eval_wer": 0.5450883471618173,
"step": 8800
},
{
"epoch": 0.35364847341742306,
"grad_norm": 3.3146464824676514,
"learning_rate": 0.00021361016949152543,
"loss": 0.5106,
"step": 9000
},
{
"epoch": 0.35364847341742306,
"eval_loss": 0.6857195496559143,
"eval_runtime": 160.5474,
"eval_samples_per_second": 35.229,
"eval_steps_per_second": 4.404,
"eval_wer": 0.5379467509749483,
"step": 9000
},
{
"epoch": 0.3615073283822547,
"eval_loss": 0.68482905626297,
"eval_runtime": 161.0662,
"eval_samples_per_second": 35.116,
"eval_steps_per_second": 4.389,
"eval_wer": 0.5329075123172473,
"step": 9200
},
{
"epoch": 0.3693661833470863,
"eval_loss": 0.6732301712036133,
"eval_runtime": 160.6243,
"eval_samples_per_second": 35.213,
"eval_steps_per_second": 4.402,
"eval_wer": 0.5202291730192101,
"step": 9400
},
{
"epoch": 0.37329561082950213,
"grad_norm": 4.61689567565918,
"learning_rate": 0.00020852542372881352,
"loss": 0.4968,
"step": 9500
},
{
"epoch": 0.37722503831191795,
"eval_loss": 0.6839133501052856,
"eval_runtime": 161.2367,
"eval_samples_per_second": 35.079,
"eval_steps_per_second": 4.385,
"eval_wer": 0.5274510118598642,
"step": 9600
},
{
"epoch": 0.3850838932767496,
"eval_loss": 0.6766842603683472,
"eval_runtime": 160.827,
"eval_samples_per_second": 35.168,
"eval_steps_per_second": 4.396,
"eval_wer": 0.5198279597502848,
"step": 9800
},
{
"epoch": 0.3929427482415812,
"grad_norm": 3.5624563694000244,
"learning_rate": 0.0002034508474576271,
"loss": 0.4824,
"step": 10000
},
{
"epoch": 0.3929427482415812,
"eval_loss": 0.6718243956565857,
"eval_runtime": 161.1794,
"eval_samples_per_second": 35.091,
"eval_steps_per_second": 4.386,
"eval_wer": 0.5334531623629857,
"step": 10000
},
{
"epoch": 0.40080160320641284,
"eval_loss": 0.6593254804611206,
"eval_runtime": 160.9535,
"eval_samples_per_second": 35.141,
"eval_steps_per_second": 4.393,
"eval_wer": 0.5175169713212755,
"step": 10200
},
{
"epoch": 0.40866045817124447,
"eval_loss": 0.6799437403678894,
"eval_runtime": 159.6664,
"eval_samples_per_second": 35.424,
"eval_steps_per_second": 4.428,
"eval_wer": 0.5173885830752195,
"step": 10400
},
{
"epoch": 0.4125898856536603,
"grad_norm": 2.189781427383423,
"learning_rate": 0.00019836610169491524,
"loss": 0.48,
"step": 10500
},
{
"epoch": 0.4165193131360761,
"eval_loss": 0.6662308573722839,
"eval_runtime": 160.8779,
"eval_samples_per_second": 35.157,
"eval_steps_per_second": 4.395,
"eval_wer": 0.5128949944632569,
"step": 10600
},
{
"epoch": 0.42437816810090767,
"eval_loss": 0.6619213223457336,
"eval_runtime": 160.6185,
"eval_samples_per_second": 35.214,
"eval_steps_per_second": 4.402,
"eval_wer": 0.5005536743111169,
"step": 10800
},
{
"epoch": 0.4322370230657393,
"grad_norm": 10.41739559173584,
"learning_rate": 0.00019328135593220337,
"loss": 0.4693,
"step": 11000
},
{
"epoch": 0.4322370230657393,
"eval_loss": 0.6576216220855713,
"eval_runtime": 160.9844,
"eval_samples_per_second": 35.134,
"eval_steps_per_second": 4.392,
"eval_wer": 0.519940299465584,
"step": 11000
},
{
"epoch": 0.44009587803057093,
"eval_loss": 0.6406122446060181,
"eval_runtime": 160.4456,
"eval_samples_per_second": 35.252,
"eval_steps_per_second": 4.406,
"eval_wer": 0.5018696538331916,
"step": 11200
},
{
"epoch": 0.44795473299540256,
"eval_loss": 0.6408420205116272,
"eval_runtime": 161.6075,
"eval_samples_per_second": 34.998,
"eval_steps_per_second": 4.375,
"eval_wer": 0.5066039704065093,
"step": 11400
},
{
"epoch": 0.4518841604778184,
"grad_norm": 3.5733156204223633,
"learning_rate": 0.00018819661016949152,
"loss": 0.4691,
"step": 11500
},
{
"epoch": 0.4558135879602342,
"eval_loss": 0.6476473212242126,
"eval_runtime": 161.2518,
"eval_samples_per_second": 35.076,
"eval_steps_per_second": 4.384,
"eval_wer": 0.5019498964869766,
"step": 11600
},
{
"epoch": 0.4636724429250658,
"eval_loss": 0.6423429846763611,
"eval_runtime": 161.3676,
"eval_samples_per_second": 35.05,
"eval_steps_per_second": 4.381,
"eval_wer": 0.4945996694002664,
"step": 11800
},
{
"epoch": 0.47153129788989745,
"grad_norm": 2.3962831497192383,
"learning_rate": 0.00018311186440677962,
"loss": 0.4444,
"step": 12000
},
{
"epoch": 0.47153129788989745,
"eval_loss": 0.6374172568321228,
"eval_runtime": 162.3359,
"eval_samples_per_second": 34.841,
"eval_steps_per_second": 4.355,
"eval_wer": 0.4975846961210701,
"step": 12000
},
{
"epoch": 0.4793901528547291,
"eval_loss": 0.6312358379364014,
"eval_runtime": 162.5747,
"eval_samples_per_second": 34.79,
"eval_steps_per_second": 4.349,
"eval_wer": 0.4961403283529393,
"step": 12200
},
{
"epoch": 0.4872490078195607,
"eval_loss": 0.6170411109924316,
"eval_runtime": 161.58,
"eval_samples_per_second": 35.004,
"eval_steps_per_second": 4.376,
"eval_wer": 0.4818571359792011,
"step": 12400
},
{
"epoch": 0.4911784353019765,
"grad_norm": 2.623764753341675,
"learning_rate": 0.0001780372881355932,
"loss": 0.4474,
"step": 12500
},
{
"epoch": 0.49510786278439234,
"eval_loss": 0.6300910115242004,
"eval_runtime": 164.417,
"eval_samples_per_second": 34.4,
"eval_steps_per_second": 4.3,
"eval_wer": 0.49325159281667763,
"step": 12600
},
{
"epoch": 0.502966717749224,
"eval_loss": 0.6253496408462524,
"eval_runtime": 161.3418,
"eval_samples_per_second": 35.056,
"eval_steps_per_second": 4.382,
"eval_wer": 0.4862383848758646,
"step": 12800
},
{
"epoch": 0.5108255727140556,
"grad_norm": 2.9566869735717773,
"learning_rate": 0.00017295254237288134,
"loss": 0.4471,
"step": 13000
},
{
"epoch": 0.5108255727140556,
"eval_loss": 0.622020959854126,
"eval_runtime": 161.5861,
"eval_samples_per_second": 35.003,
"eval_steps_per_second": 4.375,
"eval_wer": 0.4849224053537899,
"step": 13000
},
{
"epoch": 0.5186844276788872,
"eval_loss": 0.6201028823852539,
"eval_runtime": 160.9515,
"eval_samples_per_second": 35.141,
"eval_steps_per_second": 4.393,
"eval_wer": 0.48527547303044405,
"step": 13200
},
{
"epoch": 0.5265432826437189,
"eval_loss": 0.6168439984321594,
"eval_runtime": 162.0987,
"eval_samples_per_second": 34.892,
"eval_steps_per_second": 4.362,
"eval_wer": 0.4848261141692478,
"step": 13400
},
{
"epoch": 0.5304727101261346,
"grad_norm": 1.5596935749053955,
"learning_rate": 0.0001678677966101695,
"loss": 0.4323,
"step": 13500
},
{
"epoch": 0.5344021376085505,
"eval_loss": 0.6172667741775513,
"eval_runtime": 162.3681,
"eval_samples_per_second": 34.834,
"eval_steps_per_second": 4.354,
"eval_wer": 0.47707467381361235,
"step": 13600
},
{
"epoch": 0.542260992573382,
"eval_loss": 0.603190004825592,
"eval_runtime": 161.2926,
"eval_samples_per_second": 35.067,
"eval_steps_per_second": 4.383,
"eval_wer": 0.4656160228531078,
"step": 13800
},
{
"epoch": 0.5501198475382136,
"grad_norm": 2.978868246078491,
"learning_rate": 0.0001627830508474576,
"loss": 0.4575,
"step": 14000
},
{
"epoch": 0.5501198475382136,
"eval_loss": 0.6097469925880432,
"eval_runtime": 161.1042,
"eval_samples_per_second": 35.108,
"eval_steps_per_second": 4.388,
"eval_wer": 0.4678307200975751,
"step": 14000
},
{
"epoch": 0.5579787025030453,
"eval_loss": 0.5970696806907654,
"eval_runtime": 161.5846,
"eval_samples_per_second": 35.003,
"eval_steps_per_second": 4.375,
"eval_wer": 0.4673653127056218,
"step": 14200
},
{
"epoch": 0.5658375574678769,
"eval_loss": 0.5976916551589966,
"eval_runtime": 161.7136,
"eval_samples_per_second": 34.975,
"eval_steps_per_second": 4.372,
"eval_wer": 0.4697565437884162,
"step": 14400
},
{
"epoch": 0.5697669849502928,
"grad_norm": 3.0501327514648438,
"learning_rate": 0.00015769830508474575,
"loss": 0.4395,
"step": 14500
},
{
"epoch": 0.5736964124327085,
"eval_loss": 0.6056780815124512,
"eval_runtime": 162.5963,
"eval_samples_per_second": 34.786,
"eval_steps_per_second": 4.348,
"eval_wer": 0.4734316573317713,
"step": 14600
},
{
"epoch": 0.5815552673975402,
"eval_loss": 0.582733690738678,
"eval_runtime": 162.9467,
"eval_samples_per_second": 34.711,
"eval_steps_per_second": 4.339,
"eval_wer": 0.4574152236362761,
"step": 14800
},
{
"epoch": 0.5894141223623718,
"grad_norm": 4.3484697341918945,
"learning_rate": 0.00015261355932203388,
"loss": 0.4119,
"step": 15000
},
{
"epoch": 0.5894141223623718,
"eval_loss": 0.5946210622787476,
"eval_runtime": 162.2892,
"eval_samples_per_second": 34.851,
"eval_steps_per_second": 4.356,
"eval_wer": 0.4640432668389209,
"step": 15000
},
{
"epoch": 0.5972729773272034,
"eval_loss": 0.602292001247406,
"eval_runtime": 161.4334,
"eval_samples_per_second": 35.036,
"eval_steps_per_second": 4.38,
"eval_wer": 0.47707467381361235,
"step": 15200
},
{
"epoch": 0.605131832292035,
"eval_loss": 0.6129310727119446,
"eval_runtime": 161.8649,
"eval_samples_per_second": 34.943,
"eval_steps_per_second": 4.368,
"eval_wer": 0.47266132785543485,
"step": 15400
},
{
"epoch": 0.6090612597744509,
"grad_norm": 4.229031085968018,
"learning_rate": 0.00014752881355932203,
"loss": 0.4125,
"step": 15500
},
{
"epoch": 0.6129906872568667,
"eval_loss": 0.590186595916748,
"eval_runtime": 162.4898,
"eval_samples_per_second": 34.808,
"eval_steps_per_second": 4.351,
"eval_wer": 0.45837813548169665,
"step": 15600
},
{
"epoch": 0.6208495422216983,
"eval_loss": 0.5955421328544617,
"eval_runtime": 161.8228,
"eval_samples_per_second": 34.952,
"eval_steps_per_second": 4.369,
"eval_wer": 0.46537529489175267,
"step": 15800
},
{
"epoch": 0.6287083971865299,
"grad_norm": 1.4181621074676514,
"learning_rate": 0.00014244406779661016,
"loss": 0.4039,
"step": 16000
},
{
"epoch": 0.6287083971865299,
"eval_loss": 0.5955237150192261,
"eval_runtime": 161.3699,
"eval_samples_per_second": 35.05,
"eval_steps_per_second": 4.381,
"eval_wer": 0.45946943557317327,
"step": 16000
},
{
"epoch": 0.6365672521513616,
"eval_loss": 0.578912079334259,
"eval_runtime": 163.2091,
"eval_samples_per_second": 34.655,
"eval_steps_per_second": 4.332,
"eval_wer": 0.4497279774036687,
"step": 16200
},
{
"epoch": 0.6444261071161932,
"eval_loss": 0.5779294371604919,
"eval_runtime": 164.0491,
"eval_samples_per_second": 34.477,
"eval_steps_per_second": 4.31,
"eval_wer": 0.4630322094012293,
"step": 16400
},
{
"epoch": 0.648355534598609,
"grad_norm": 2.0229876041412354,
"learning_rate": 0.00013736949152542372,
"loss": 0.3969,
"step": 16500
},
{
"epoch": 0.6522849620810248,
"eval_loss": 0.5677434802055359,
"eval_runtime": 161.201,
"eval_samples_per_second": 35.087,
"eval_steps_per_second": 4.386,
"eval_wer": 0.45507213814575276,
"step": 16600
},
{
"epoch": 0.6601438170458565,
"eval_loss": 0.586939811706543,
"eval_runtime": 161.4539,
"eval_samples_per_second": 35.032,
"eval_steps_per_second": 4.379,
"eval_wer": 0.46062492978767794,
"step": 16800
},
{
"epoch": 0.6680026720106881,
"grad_norm": 4.166793346405029,
"learning_rate": 0.00013229491525423729,
"loss": 0.3923,
"step": 17000
},
{
"epoch": 0.6680026720106881,
"eval_loss": 0.5710186958312988,
"eval_runtime": 160.5637,
"eval_samples_per_second": 35.226,
"eval_steps_per_second": 4.403,
"eval_wer": 0.45017733626486495,
"step": 17000
},
{
"epoch": 0.6758615269755197,
"eval_loss": 0.5639811158180237,
"eval_runtime": 161.7944,
"eval_samples_per_second": 34.958,
"eval_steps_per_second": 4.37,
"eval_wer": 0.44741698897465937,
"step": 17200
},
{
"epoch": 0.6837203819403513,
"eval_loss": 0.5841760039329529,
"eval_runtime": 161.0184,
"eval_samples_per_second": 35.126,
"eval_steps_per_second": 4.391,
"eval_wer": 0.4497921715266967,
"step": 17400
},
{
"epoch": 0.6876498094227671,
"grad_norm": 3.127680778503418,
"learning_rate": 0.0001272101694915254,
"loss": 0.386,
"step": 17500
},
{
"epoch": 0.691579236905183,
"eval_loss": 0.5596618056297302,
"eval_runtime": 160.919,
"eval_samples_per_second": 35.148,
"eval_steps_per_second": 4.394,
"eval_wer": 0.44403074898493045,
"step": 17600
},
{
"epoch": 0.6994380918700145,
"eval_loss": 0.5620830059051514,
"eval_runtime": 160.6614,
"eval_samples_per_second": 35.204,
"eval_steps_per_second": 4.401,
"eval_wer": 0.43812488966635105,
"step": 17800
},
{
"epoch": 0.7072969468348461,
"grad_norm": 17.387800216674805,
"learning_rate": 0.00012213559322033898,
"loss": 0.3851,
"step": 18000
},
{
"epoch": 0.7072969468348461,
"eval_loss": 0.566453218460083,
"eval_runtime": 161.6574,
"eval_samples_per_second": 34.988,
"eval_steps_per_second": 4.373,
"eval_wer": 0.434562115838295,
"step": 18000
},
{
"epoch": 0.7151558017996777,
"eval_loss": 0.5572646260261536,
"eval_runtime": 162.4898,
"eval_samples_per_second": 34.808,
"eval_steps_per_second": 4.351,
"eval_wer": 0.4356213188682576,
"step": 18200
},
{
"epoch": 0.7230146567645094,
"eval_loss": 0.5548349022865295,
"eval_runtime": 161.0153,
"eval_samples_per_second": 35.127,
"eval_steps_per_second": 4.391,
"eval_wer": 0.4344337275922389,
"step": 18400
},
{
"epoch": 0.7269440842469252,
"grad_norm": 9.4507417678833,
"learning_rate": 0.00011705084745762712,
"loss": 0.369,
"step": 18500
},
{
"epoch": 0.730873511729341,
"eval_loss": 0.5616690516471863,
"eval_runtime": 161.4318,
"eval_samples_per_second": 35.036,
"eval_steps_per_second": 4.38,
"eval_wer": 0.43637559981383706,
"step": 18600
},
{
"epoch": 0.7387323666941726,
"eval_loss": 0.5595532655715942,
"eval_runtime": 160.8301,
"eval_samples_per_second": 35.168,
"eval_steps_per_second": 4.396,
"eval_wer": 0.4393927235961548,
"step": 18800
},
{
"epoch": 0.7465912216590043,
"grad_norm": 1.8793506622314453,
"learning_rate": 0.00011196610169491524,
"loss": 0.3738,
"step": 19000
},
{
"epoch": 0.7465912216590043,
"eval_loss": 0.549248218536377,
"eval_runtime": 161.3194,
"eval_samples_per_second": 35.061,
"eval_steps_per_second": 4.383,
"eval_wer": 0.42923400362696795,
"step": 19000
},
{
"epoch": 0.7544500766238359,
"eval_loss": 0.5478147268295288,
"eval_runtime": 162.2231,
"eval_samples_per_second": 34.866,
"eval_steps_per_second": 4.358,
"eval_wer": 0.4372261719439585,
"step": 19200
},
{
"epoch": 0.7623089315886675,
"eval_loss": 0.5375632047653198,
"eval_runtime": 161.0297,
"eval_samples_per_second": 35.124,
"eval_steps_per_second": 4.39,
"eval_wer": 0.42873649917350065,
"step": 19400
},
{
"epoch": 0.7662383590710834,
"grad_norm": 2.159616708755493,
"learning_rate": 0.00010688135593220338,
"loss": 0.368,
"step": 19500
},
{
"epoch": 0.7701677865534992,
"eval_loss": 0.5282244086265564,
"eval_runtime": 163.0357,
"eval_samples_per_second": 34.692,
"eval_steps_per_second": 4.336,
"eval_wer": 0.4193481086806503,
"step": 19600
},
{
"epoch": 0.7780266415183308,
"eval_loss": 0.5348193049430847,
"eval_runtime": 162.5531,
"eval_samples_per_second": 34.795,
"eval_steps_per_second": 4.349,
"eval_wer": 0.42507743416090255,
"step": 19800
},
{
"epoch": 0.7858854964831624,
"grad_norm": 2.2020351886749268,
"learning_rate": 0.00010179661016949151,
"loss": 0.3629,
"step": 20000
},
{
"epoch": 0.7858854964831624,
"eval_loss": 0.5367931723594666,
"eval_runtime": 162.0053,
"eval_samples_per_second": 34.912,
"eval_steps_per_second": 4.364,
"eval_wer": 0.43130426409462214,
"step": 20000
},
{
"epoch": 0.793744351447994,
"eval_loss": 0.5550614595413208,
"eval_runtime": 161.9948,
"eval_samples_per_second": 34.915,
"eval_steps_per_second": 4.364,
"eval_wer": 0.44123830463321084,
"step": 20200
},
{
"epoch": 0.8016032064128257,
"eval_loss": 0.5251778364181519,
"eval_runtime": 162.6214,
"eval_samples_per_second": 34.78,
"eval_steps_per_second": 4.348,
"eval_wer": 0.4105214167642952,
"step": 20400
},
{
"epoch": 0.8055326338952414,
"grad_norm": 2.7725887298583984,
"learning_rate": 9.671186440677966e-05,
"loss": 0.3638,
"step": 20500
},
{
"epoch": 0.8094620613776573,
"eval_loss": 0.5242481827735901,
"eval_runtime": 162.5731,
"eval_samples_per_second": 34.791,
"eval_steps_per_second": 4.349,
"eval_wer": 0.41174110510182793,
"step": 20600
},
{
"epoch": 0.8173209163424889,
"eval_loss": 0.5233432054519653,
"eval_runtime": 161.9438,
"eval_samples_per_second": 34.926,
"eval_steps_per_second": 4.366,
"eval_wer": 0.4165877613904447,
"step": 20800
},
{
"epoch": 0.8251797713073206,
"grad_norm": 2.733196496963501,
"learning_rate": 9.162711864406779e-05,
"loss": 0.3512,
"step": 21000
},
{
"epoch": 0.8251797713073206,
"eval_loss": 0.524342954158783,
"eval_runtime": 161.947,
"eval_samples_per_second": 34.925,
"eval_steps_per_second": 4.366,
"eval_wer": 0.4160581598754634,
"step": 21000
},
{
"epoch": 0.8330386262721522,
"eval_loss": 0.5150259733200073,
"eval_runtime": 162.0793,
"eval_samples_per_second": 34.896,
"eval_steps_per_second": 4.362,
"eval_wer": 0.4123028036783232,
"step": 21200
},
{
"epoch": 0.8408974812369838,
"eval_loss": 0.5088914632797241,
"eval_runtime": 161.2392,
"eval_samples_per_second": 35.078,
"eval_steps_per_second": 4.385,
"eval_wer": 0.4079536518431738,
"step": 21400
},
{
"epoch": 0.8448269087193996,
"grad_norm": 4.562708377838135,
"learning_rate": 8.654237288135593e-05,
"loss": 0.3536,
"step": 21500
},
{
"epoch": 0.8487563362018153,
"eval_loss": 0.515373170375824,
"eval_runtime": 162.8063,
"eval_samples_per_second": 34.741,
"eval_steps_per_second": 4.343,
"eval_wer": 0.40899680634237934,
"step": 21600
},
{
"epoch": 0.856615191166647,
"eval_loss": 0.5161571502685547,
"eval_runtime": 162.7678,
"eval_samples_per_second": 34.749,
"eval_steps_per_second": 4.344,
"eval_wer": 0.4091893887114635,
"step": 21800
},
{
"epoch": 0.8644740461314786,
"grad_norm": 2.272256374359131,
"learning_rate": 8.146779661016948e-05,
"loss": 0.3464,
"step": 22000
},
{
"epoch": 0.8644740461314786,
"eval_loss": 0.5097736716270447,
"eval_runtime": 162.1935,
"eval_samples_per_second": 34.872,
"eval_steps_per_second": 4.359,
"eval_wer": 0.40527354720675324,
"step": 22000
},
{
"epoch": 0.8723329010963102,
"eval_loss": 0.5069981813430786,
"eval_runtime": 162.5966,
"eval_samples_per_second": 34.785,
"eval_steps_per_second": 4.348,
"eval_wer": 0.4022724719551925,
"step": 22200
},
{
"epoch": 0.8801917560611419,
"eval_loss": 0.5070444345474243,
"eval_runtime": 162.5617,
"eval_samples_per_second": 34.793,
"eval_steps_per_second": 4.349,
"eval_wer": 0.40707098265153824,
"step": 22400
},
{
"epoch": 0.8841211835435577,
"grad_norm": 2.9740068912506104,
"learning_rate": 7.638305084745762e-05,
"loss": 0.3377,
"step": 22500
},
{
"epoch": 0.8880506110259735,
"eval_loss": 0.5028176307678223,
"eval_runtime": 162.4451,
"eval_samples_per_second": 34.818,
"eval_steps_per_second": 4.352,
"eval_wer": 0.39670363178251034,
"step": 22600
},
{
"epoch": 0.8959094659908051,
"eval_loss": 0.5036062002182007,
"eval_runtime": 162.5763,
"eval_samples_per_second": 34.79,
"eval_steps_per_second": 4.349,
"eval_wer": 0.39784307746625797,
"step": 22800
},
{
"epoch": 0.9037683209556368,
"grad_norm": 1.9388916492462158,
"learning_rate": 7.129830508474575e-05,
"loss": 0.3272,
"step": 23000
},
{
"epoch": 0.9037683209556368,
"eval_loss": 0.5020586848258972,
"eval_runtime": 161.6894,
"eval_samples_per_second": 34.981,
"eval_steps_per_second": 4.373,
"eval_wer": 0.39538765226043554,
"step": 23000
},
{
"epoch": 0.9116271759204684,
"eval_loss": 0.5032612085342407,
"eval_runtime": 163.6786,
"eval_samples_per_second": 34.556,
"eval_steps_per_second": 4.319,
"eval_wer": 0.3984529216350243,
"step": 23200
},
{
"epoch": 0.9194860308853,
"eval_loss": 0.49842530488967896,
"eval_runtime": 162.0701,
"eval_samples_per_second": 34.898,
"eval_steps_per_second": 4.362,
"eval_wer": 0.3971850877052206,
"step": 23400
},
{
"epoch": 0.9234154583677158,
"grad_norm": 3.9436373710632324,
"learning_rate": 6.621355932203389e-05,
"loss": 0.319,
"step": 23500
},
{
"epoch": 0.9273448858501316,
"eval_loss": 0.4928737282752991,
"eval_runtime": 163.9597,
"eval_samples_per_second": 34.496,
"eval_steps_per_second": 4.312,
"eval_wer": 0.39243472260114587,
"step": 23600
},
{
"epoch": 0.9352037408149633,
"eval_loss": 0.49405232071876526,
"eval_runtime": 161.8803,
"eval_samples_per_second": 34.939,
"eval_steps_per_second": 4.367,
"eval_wer": 0.4013095601097719,
"step": 23800
},
{
"epoch": 0.9430625957797949,
"grad_norm": 3.4186201095581055,
"learning_rate": 6.112881355932203e-05,
"loss": 0.3184,
"step": 24000
},
{
"epoch": 0.9430625957797949,
"eval_loss": 0.4856198728084564,
"eval_runtime": 163.6122,
"eval_samples_per_second": 34.57,
"eval_steps_per_second": 4.321,
"eval_wer": 0.387411532474202,
"step": 24000
},
{
"epoch": 0.9509214507446265,
"eval_loss": 0.48915818333625793,
"eval_runtime": 162.8317,
"eval_samples_per_second": 34.735,
"eval_steps_per_second": 4.342,
"eval_wer": 0.3913755195711833,
"step": 24200
},
{
"epoch": 0.9587803057094582,
"eval_loss": 0.48598504066467285,
"eval_runtime": 160.6269,
"eval_samples_per_second": 35.212,
"eval_steps_per_second": 4.402,
"eval_wer": 0.3813772849095665,
"step": 24400
},
{
"epoch": 0.9627097331918739,
"grad_norm": 2.70164155960083,
"learning_rate": 5.6044067796610164e-05,
"loss": 0.3091,
"step": 24500
},
{
"epoch": 0.9666391606742898,
"eval_loss": 0.4825168251991272,
"eval_runtime": 162.6242,
"eval_samples_per_second": 34.78,
"eval_steps_per_second": 4.347,
"eval_wer": 0.38336730272343567,
"step": 24600
},
{
"epoch": 0.9744980156391214,
"eval_loss": 0.4784228205680847,
"eval_runtime": 162.0189,
"eval_samples_per_second": 34.91,
"eval_steps_per_second": 4.364,
"eval_wer": 0.3866893485901366,
"step": 24800
},
{
"epoch": 0.982356870603953,
"grad_norm": 9.408166885375977,
"learning_rate": 5.096949152542373e-05,
"loss": 0.3154,
"step": 25000
},
{
"epoch": 0.982356870603953,
"eval_loss": 0.47507792711257935,
"eval_runtime": 161.9422,
"eval_samples_per_second": 34.926,
"eval_steps_per_second": 4.366,
"eval_wer": 0.3807834892715572,
"step": 25000
},
{
"epoch": 0.9902157255687847,
"eval_loss": 0.4778765141963959,
"eval_runtime": 162.3405,
"eval_samples_per_second": 34.84,
"eval_steps_per_second": 4.355,
"eval_wer": 0.38492401020686556,
"step": 25200
},
{
"epoch": 0.9980745805336163,
"eval_loss": 0.477267324924469,
"eval_runtime": 161.2107,
"eval_samples_per_second": 35.085,
"eval_steps_per_second": 4.386,
"eval_wer": 0.38084768339458525,
"step": 25400
},
{
"epoch": 1.002004008016032,
"grad_norm": 0.7003775835037231,
"learning_rate": 4.589491525423728e-05,
"loss": 0.312,
"step": 25500
},
{
"epoch": 1.005933435498448,
"eval_loss": 0.47774726152420044,
"eval_runtime": 160.8535,
"eval_samples_per_second": 35.162,
"eval_steps_per_second": 4.395,
"eval_wer": 0.3757923962061273,
"step": 25600
},
{
"epoch": 1.0137922904632795,
"eval_loss": 0.4752050042152405,
"eval_runtime": 159.7765,
"eval_samples_per_second": 35.399,
"eval_steps_per_second": 4.425,
"eval_wer": 0.3820513232013609,
"step": 25800
},
{
"epoch": 1.0216511454281112,
"grad_norm": 0.702942430973053,
"learning_rate": 4.081016949152542e-05,
"loss": 0.2651,
"step": 26000
},
{
"epoch": 1.0216511454281112,
"eval_loss": 0.4700838327407837,
"eval_runtime": 163.2858,
"eval_samples_per_second": 34.639,
"eval_steps_per_second": 4.33,
"eval_wer": 0.37750958899712733,
"step": 26000
},
{
"epoch": 1.0295100003929427,
"eval_loss": 0.47011885046958923,
"eval_runtime": 160.7741,
"eval_samples_per_second": 35.18,
"eval_steps_per_second": 4.397,
"eval_wer": 0.3760652212289965,
"step": 26200
},
{
"epoch": 1.0373688553577745,
"eval_loss": 0.471804678440094,
"eval_runtime": 160.2455,
"eval_samples_per_second": 35.296,
"eval_steps_per_second": 4.412,
"eval_wer": 0.37755773458939834,
"step": 26400
},
{
"epoch": 1.0412982828401902,
"grad_norm": 0.98069828748703,
"learning_rate": 3.572542372881355e-05,
"loss": 0.2627,
"step": 26500
},
{
"epoch": 1.045227710322606,
"eval_loss": 0.4638473391532898,
"eval_runtime": 160.1121,
"eval_samples_per_second": 35.325,
"eval_steps_per_second": 4.416,
"eval_wer": 0.37296785479289374,
"step": 26600
},
{
"epoch": 1.0530865652874377,
"eval_loss": 0.4677112400531769,
"eval_runtime": 159.9389,
"eval_samples_per_second": 35.364,
"eval_steps_per_second": 4.42,
"eval_wer": 0.3720370400089872,
"step": 26800
},
{
"epoch": 1.0609454202522692,
"grad_norm": 0.8780287504196167,
"learning_rate": 3.0640677966101693e-05,
"loss": 0.2427,
"step": 27000
},
{
"epoch": 1.0609454202522692,
"eval_loss": 0.4642546474933624,
"eval_runtime": 160.0541,
"eval_samples_per_second": 35.338,
"eval_steps_per_second": 4.417,
"eval_wer": 0.36985443982603394,
"step": 27000
},
{
"epoch": 1.0688042752171008,
"eval_loss": 0.46017909049987793,
"eval_runtime": 159.9066,
"eval_samples_per_second": 35.371,
"eval_steps_per_second": 4.421,
"eval_wer": 0.3713469531864358,
"step": 27200
},
{
"epoch": 1.0766631301819325,
"eval_loss": 0.46644654870033264,
"eval_runtime": 160.7516,
"eval_samples_per_second": 35.185,
"eval_steps_per_second": 4.398,
"eval_wer": 0.3703037986872302,
"step": 27400
},
{
"epoch": 1.0805925576643483,
"grad_norm": 0.8659859895706177,
"learning_rate": 2.556610169491525e-05,
"loss": 0.2464,
"step": 27500
},
{
"epoch": 1.0845219851467642,
"eval_loss": 0.4609028100967407,
"eval_runtime": 161.4502,
"eval_samples_per_second": 35.032,
"eval_steps_per_second": 4.379,
"eval_wer": 0.36770393670459467,
"step": 27600
},
{
"epoch": 1.0923808401115958,
"eval_loss": 0.4613707363605499,
"eval_runtime": 160.5963,
"eval_samples_per_second": 35.219,
"eval_steps_per_second": 4.402,
"eval_wer": 0.3687310426730433,
"step": 27800
},
{
"epoch": 1.1002396950764273,
"grad_norm": 1.6944918632507324,
"learning_rate": 2.0481355932203388e-05,
"loss": 0.2537,
"step": 28000
},
{
"epoch": 1.1002396950764273,
"eval_loss": 0.45553678274154663,
"eval_runtime": 160.1154,
"eval_samples_per_second": 35.325,
"eval_steps_per_second": 4.416,
"eval_wer": 0.36545714239861343,
"step": 28000
},
{
"epoch": 1.108098550041259,
"eval_loss": 0.456032931804657,
"eval_runtime": 160.97,
"eval_samples_per_second": 35.137,
"eval_steps_per_second": 4.392,
"eval_wer": 0.36447818202243587,
"step": 28200
},
{
"epoch": 1.1159574050060905,
"eval_loss": 0.45427000522613525,
"eval_runtime": 160.1348,
"eval_samples_per_second": 35.32,
"eval_steps_per_second": 4.415,
"eval_wer": 0.36261655245462276,
"step": 28400
},
{
"epoch": 1.1198868324885065,
"grad_norm": 0.8318812251091003,
"learning_rate": 1.5396610169491525e-05,
"loss": 0.2313,
"step": 28500
},
{
"epoch": 1.1238162599709223,
"eval_loss": 0.45402956008911133,
"eval_runtime": 160.7545,
"eval_samples_per_second": 35.184,
"eval_steps_per_second": 4.398,
"eval_wer": 0.3631461539696041,
"step": 28600
},
{
"epoch": 1.1316751149357538,
"eval_loss": 0.4536111354827881,
"eval_runtime": 165.4654,
"eval_samples_per_second": 34.182,
"eval_steps_per_second": 4.273,
"eval_wer": 0.3626326009853798,
"step": 28800
},
{
"epoch": 1.1395339699005855,
"grad_norm": 0.7866860032081604,
"learning_rate": 1.031186440677966e-05,
"loss": 0.2451,
"step": 29000
},
{
"epoch": 1.1395339699005855,
"eval_loss": 0.45293620228767395,
"eval_runtime": 160.3649,
"eval_samples_per_second": 35.27,
"eval_steps_per_second": 4.409,
"eval_wer": 0.3617338832629873,
"step": 29000
},
{
"epoch": 1.147392824865417,
"eval_loss": 0.4530145823955536,
"eval_runtime": 160.576,
"eval_samples_per_second": 35.223,
"eval_steps_per_second": 4.403,
"eval_wer": 0.3598401566336602,
"step": 29200
},
{
"epoch": 1.1552516798302488,
"eval_loss": 0.4515323042869568,
"eval_runtime": 160.1136,
"eval_samples_per_second": 35.325,
"eval_steps_per_second": 4.416,
"eval_wer": 0.3591500698111088,
"step": 29400
},
{
"epoch": 1.1591811073126645,
"grad_norm": 3.2193210124969482,
"learning_rate": 5.227118644067796e-06,
"loss": 0.2445,
"step": 29500
},
{
"epoch": 1.1631105347950803,
"eval_loss": 0.451358437538147,
"eval_runtime": 160.6595,
"eval_samples_per_second": 35.205,
"eval_steps_per_second": 4.401,
"eval_wer": 0.3590056330342957,
"step": 29600
},
{
"epoch": 1.170969389759912,
"eval_loss": 0.4514302611351013,
"eval_runtime": 160.1434,
"eval_samples_per_second": 35.318,
"eval_steps_per_second": 4.415,
"eval_wer": 0.3588772447882396,
"step": 29800
},
{
"epoch": 1.1788282447247436,
"grad_norm": 0.5669330358505249,
"learning_rate": 1.423728813559322e-07,
"loss": 0.2364,
"step": 30000
},
{
"epoch": 1.1788282447247436,
"eval_loss": 0.4510672390460968,
"eval_runtime": 160.6855,
"eval_samples_per_second": 35.199,
"eval_steps_per_second": 4.4,
"eval_wer": 0.3591179727495948,
"step": 30000
},
{
"epoch": 1.1788282447247436,
"step": 30000,
"total_flos": 3.731985674211105e+19,
"train_loss": 0.5082863594055176,
"train_runtime": 37313.8627,
"train_samples_per_second": 6.432,
"train_steps_per_second": 0.804
}
],
"logging_steps": 500,
"max_steps": 30000,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 400,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.731985674211105e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}