{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.662514156285391, "eval_steps": 100, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.056625141562853906, "eval_loss": 4.158196449279785, "eval_runtime": 151.6577, "eval_samples_per_second": 37.295, "eval_steps_per_second": 4.662, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.11325028312570781, "eval_loss": 3.127609968185425, "eval_runtime": 148.7785, "eval_samples_per_second": 38.016, "eval_steps_per_second": 4.752, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.16987542468856173, "eval_loss": 3.407195568084717, "eval_runtime": 152.9346, "eval_samples_per_second": 36.983, "eval_steps_per_second": 4.623, "eval_wer": 1.0, "step": 300 }, { "epoch": 0.22650056625141562, "eval_loss": 2.1966774463653564, "eval_runtime": 150.1881, "eval_samples_per_second": 37.659, "eval_steps_per_second": 4.707, "eval_wer": 0.9898733770923271, "step": 400 }, { "epoch": 0.28312570781426954, "grad_norm": 1.805786371231079, "learning_rate": 0.0001491, "loss": 4.6703, "step": 500 }, { "epoch": 0.28312570781426954, "eval_loss": 1.0575733184814453, "eval_runtime": 149.8633, "eval_samples_per_second": 37.741, "eval_steps_per_second": 4.718, "eval_wer": 0.7174335189613391, "step": 500 }, { "epoch": 0.33975084937712347, "eval_loss": 0.8877128958702087, "eval_runtime": 152.3639, "eval_samples_per_second": 37.122, "eval_steps_per_second": 4.64, "eval_wer": 0.640464765450723, "step": 600 }, { "epoch": 0.39637599093997733, "eval_loss": 0.7894217371940613, "eval_runtime": 156.1616, "eval_samples_per_second": 36.219, "eval_steps_per_second": 4.527, "eval_wer": 0.5861244403074899, "step": 700 }, { "epoch": 0.45300113250283125, "eval_loss": 0.7698957324028015, "eval_runtime": 153.3515, "eval_samples_per_second": 36.883, "eval_steps_per_second": 4.61, "eval_wer": 0.5876650992601627, "step": 800 }, { "epoch": 0.5096262740656852, "eval_loss": 0.7604610919952393, "eval_runtime": 153.0775, "eval_samples_per_second": 36.949, "eval_steps_per_second": 4.619, "eval_wer": 0.5402577394039576, "step": 900 }, { "epoch": 0.5662514156285391, "grad_norm": 1.723030686378479, "learning_rate": 0.0002988, "loss": 0.5242, "step": 1000 }, { "epoch": 0.5662514156285391, "eval_loss": 0.742854654788971, "eval_runtime": 152.8719, "eval_samples_per_second": 36.998, "eval_steps_per_second": 4.625, "eval_wer": 0.5494856445892379, "step": 1000 }, { "epoch": 0.622876557191393, "eval_loss": 0.6761639714241028, "eval_runtime": 152.3166, "eval_samples_per_second": 37.133, "eval_steps_per_second": 4.642, "eval_wer": 0.5081125322976682, "step": 1100 }, { "epoch": 0.6795016987542469, "eval_loss": 0.67025226354599, "eval_runtime": 153.9119, "eval_samples_per_second": 36.748, "eval_steps_per_second": 4.594, "eval_wer": 0.5071977660445186, "step": 1200 }, { "epoch": 0.7361268403171007, "eval_loss": 0.6186646223068237, "eval_runtime": 155.1162, "eval_samples_per_second": 36.463, "eval_steps_per_second": 4.558, "eval_wer": 0.4623421225786779, "step": 1300 }, { "epoch": 0.7927519818799547, "eval_loss": 0.6205167174339294, "eval_runtime": 155.3219, "eval_samples_per_second": 36.415, "eval_steps_per_second": 4.552, "eval_wer": 0.4741698897465937, "step": 1400 }, { "epoch": 0.8493771234428086, "grad_norm": 1.6596413850784302, "learning_rate": 0.00028349999999999995, "loss": 0.4093, "step": 1500 }, { "epoch": 0.8493771234428086, "eval_loss": 0.6089454889297485, "eval_runtime": 156.2384, "eval_samples_per_second": 36.201, "eval_steps_per_second": 4.525, "eval_wer": 0.46065702684919196, "step": 1500 }, { "epoch": 0.9060022650056625, "eval_loss": 0.6079407334327698, "eval_runtime": 151.5221, "eval_samples_per_second": 37.328, "eval_steps_per_second": 4.666, "eval_wer": 0.4563720691370705, "step": 1600 }, { "epoch": 0.9626274065685164, "eval_loss": 0.5751839280128479, "eval_runtime": 157.3729, "eval_samples_per_second": 35.94, "eval_steps_per_second": 4.493, "eval_wer": 0.4487169199659771, "step": 1700 }, { "epoch": 1.0192525481313703, "eval_loss": 0.5519313812255859, "eval_runtime": 157.5034, "eval_samples_per_second": 35.91, "eval_steps_per_second": 4.489, "eval_wer": 0.41742228498980916, "step": 1800 }, { "epoch": 1.0758776896942241, "eval_loss": 0.5467554926872253, "eval_runtime": 156.3131, "eval_samples_per_second": 36.184, "eval_steps_per_second": 4.523, "eval_wer": 0.41071399913337936, "step": 1900 }, { "epoch": 1.1325028312570782, "grad_norm": 0.5150347352027893, "learning_rate": 0.0002669, "loss": 0.3366, "step": 2000 }, { "epoch": 1.1325028312570782, "eval_loss": 0.5371935963630676, "eval_runtime": 153.7972, "eval_samples_per_second": 36.776, "eval_steps_per_second": 4.597, "eval_wer": 0.4080338944969588, "step": 2000 }, { "epoch": 1.189127972819932, "eval_loss": 0.53590327501297, "eval_runtime": 153.8993, "eval_samples_per_second": 36.751, "eval_steps_per_second": 4.594, "eval_wer": 0.40715122530532327, "step": 2100 }, { "epoch": 1.245753114382786, "eval_loss": 0.530412495136261, "eval_runtime": 152.2809, "eval_samples_per_second": 37.142, "eval_steps_per_second": 4.643, "eval_wer": 0.4022885204859495, "step": 2200 }, { "epoch": 1.3023782559456398, "eval_loss": 0.5311455130577087, "eval_runtime": 152.7946, "eval_samples_per_second": 37.017, "eval_steps_per_second": 4.627, "eval_wer": 0.40111697774068783, "step": 2300 }, { "epoch": 1.3590033975084936, "eval_loss": 0.5186213254928589, "eval_runtime": 152.2204, "eval_samples_per_second": 37.157, "eval_steps_per_second": 4.645, "eval_wer": 0.38644862062878144, "step": 2400 }, { "epoch": 1.4156285390713477, "grad_norm": 0.7118180394172668, "learning_rate": 0.00025026666666666666, "loss": 0.2939, "step": 2500 }, { "epoch": 1.4156285390713477, "eval_loss": 0.5234143733978271, "eval_runtime": 153.4973, "eval_samples_per_second": 36.848, "eval_steps_per_second": 4.606, "eval_wer": 0.3934297315080804, "step": 2500 }, { "epoch": 1.4722536806342017, "eval_loss": 0.5212889909744263, "eval_runtime": 153.2132, "eval_samples_per_second": 36.916, "eval_steps_per_second": 4.614, "eval_wer": 0.39726533035900563, "step": 2600 }, { "epoch": 1.5288788221970555, "eval_loss": 0.5155624151229858, "eval_runtime": 153.779, "eval_samples_per_second": 36.78, "eval_steps_per_second": 4.598, "eval_wer": 0.3876522604355571, "step": 2700 }, { "epoch": 1.5855039637599093, "eval_loss": 0.5052253007888794, "eval_runtime": 153.3545, "eval_samples_per_second": 36.882, "eval_steps_per_second": 4.61, "eval_wer": 0.3897546179647253, "step": 2800 }, { "epoch": 1.6421291053227633, "eval_loss": 0.49809539318084717, "eval_runtime": 154.1154, "eval_samples_per_second": 36.7, "eval_steps_per_second": 4.587, "eval_wer": 0.38331915713116466, "step": 2900 }, { "epoch": 1.6987542468856174, "grad_norm": 0.7669665217399597, "learning_rate": 0.00023359999999999996, "loss": 0.2838, "step": 3000 }, { "epoch": 1.6987542468856174, "eval_loss": 0.49897971749305725, "eval_runtime": 152.4639, "eval_samples_per_second": 37.097, "eval_steps_per_second": 4.637, "eval_wer": 0.380414373064146, "step": 3000 }, { "epoch": 1.7553793884484712, "eval_loss": 0.49996882677078247, "eval_runtime": 151.3476, "eval_samples_per_second": 37.371, "eval_steps_per_second": 4.671, "eval_wer": 0.38070324661777216, "step": 3100 }, { "epoch": 1.812004530011325, "eval_loss": 0.49606603384017944, "eval_runtime": 151.8767, "eval_samples_per_second": 37.241, "eval_steps_per_second": 4.655, "eval_wer": 0.37513440644508994, "step": 3200 }, { "epoch": 1.8686296715741788, "eval_loss": 0.48593518137931824, "eval_runtime": 150.9613, "eval_samples_per_second": 37.467, "eval_steps_per_second": 4.683, "eval_wer": 0.3730962430389498, "step": 3300 }, { "epoch": 1.9252548131370328, "eval_loss": 0.481240451335907, "eval_runtime": 151.7124, "eval_samples_per_second": 37.281, "eval_steps_per_second": 4.66, "eval_wer": 0.3657139188907256, "step": 3400 }, { "epoch": 1.9818799546998869, "grad_norm": 1.395547866821289, "learning_rate": 0.00021696666666666664, "loss": 0.2694, "step": 3500 }, { "epoch": 1.9818799546998869, "eval_loss": 0.47789159417152405, "eval_runtime": 151.9051, "eval_samples_per_second": 37.234, "eval_steps_per_second": 4.654, "eval_wer": 0.36197461122434244, "step": 3500 }, { "epoch": 2.0385050962627407, "eval_loss": 0.4943128526210785, "eval_runtime": 152.1624, "eval_samples_per_second": 37.171, "eval_steps_per_second": 4.646, "eval_wer": 0.36325849368490315, "step": 3600 }, { "epoch": 2.0951302378255945, "eval_loss": 0.48801928758621216, "eval_runtime": 151.6345, "eval_samples_per_second": 37.3, "eval_steps_per_second": 4.663, "eval_wer": 0.3677360337661087, "step": 3700 }, { "epoch": 2.1517553793884483, "eval_loss": 0.49899762868881226, "eval_runtime": 151.9237, "eval_samples_per_second": 37.229, "eval_steps_per_second": 4.654, "eval_wer": 0.3661632777519218, "step": 3800 }, { "epoch": 2.2083805209513026, "eval_loss": 0.5101335644721985, "eval_runtime": 151.3463, "eval_samples_per_second": 37.371, "eval_steps_per_second": 4.671, "eval_wer": 0.36991863394906194, "step": 3900 }, { "epoch": 2.2650056625141564, "grad_norm": 1.0641744136810303, "learning_rate": 0.00020036666666666664, "loss": 0.2419, "step": 4000 }, { "epoch": 2.2650056625141564, "eval_loss": 0.5392731428146362, "eval_runtime": 151.0047, "eval_samples_per_second": 37.456, "eval_steps_per_second": 4.682, "eval_wer": 0.3901718797644076, "step": 4000 }, { "epoch": 2.32163080407701, "eval_loss": 0.6454418301582336, "eval_runtime": 151.5484, "eval_samples_per_second": 37.321, "eval_steps_per_second": 4.665, "eval_wer": 0.4513328304793696, "step": 4100 }, { "epoch": 2.378255945639864, "eval_loss": 0.989225447177887, "eval_runtime": 151.0915, "eval_samples_per_second": 37.434, "eval_steps_per_second": 4.679, "eval_wer": 0.5936672497632842, "step": 4200 }, { "epoch": 2.434881087202718, "eval_loss": 0.7711612582206726, "eval_runtime": 151.4711, "eval_samples_per_second": 37.34, "eval_steps_per_second": 4.668, "eval_wer": 0.5166984962526681, "step": 4300 }, { "epoch": 2.491506228765572, "eval_loss": 0.6337701678276062, "eval_runtime": 152.602, "eval_samples_per_second": 37.064, "eval_steps_per_second": 4.633, "eval_wer": 0.47877581807385533, "step": 4400 }, { "epoch": 2.548131370328426, "grad_norm": 1.0334250926971436, "learning_rate": 0.00018373333333333332, "loss": 0.46, "step": 4500 }, { "epoch": 2.548131370328426, "eval_loss": 0.5562150478363037, "eval_runtime": 152.8682, "eval_samples_per_second": 36.999, "eval_steps_per_second": 4.625, "eval_wer": 0.4155606554219961, "step": 4500 }, { "epoch": 2.6047565118912797, "eval_loss": 0.5376870036125183, "eval_runtime": 151.9955, "eval_samples_per_second": 37.212, "eval_steps_per_second": 4.651, "eval_wer": 0.3905730930333328, "step": 4600 }, { "epoch": 2.661381653454134, "eval_loss": 0.5686676502227783, "eval_runtime": 151.879, "eval_samples_per_second": 37.24, "eval_steps_per_second": 4.655, "eval_wer": 0.4008120556563047, "step": 4700 }, { "epoch": 2.7180067950169873, "eval_loss": 0.6321017742156982, "eval_runtime": 152.7183, "eval_samples_per_second": 37.036, "eval_steps_per_second": 4.629, "eval_wer": 0.42905746978864084, "step": 4800 }, { "epoch": 2.7746319365798415, "eval_loss": 0.5834416151046753, "eval_runtime": 152.1246, "eval_samples_per_second": 37.18, "eval_steps_per_second": 4.648, "eval_wer": 0.4202789234645568, "step": 4900 }, { "epoch": 2.8312570781426953, "grad_norm": 0.5010664463043213, "learning_rate": 0.00016706666666666664, "loss": 0.299, "step": 5000 }, { "epoch": 2.8312570781426953, "eval_loss": 0.5302273631095886, "eval_runtime": 152.1036, "eval_samples_per_second": 37.185, "eval_steps_per_second": 4.648, "eval_wer": 0.3929643241161272, "step": 5000 }, { "epoch": 2.887882219705549, "eval_loss": 0.5315878987312317, "eval_runtime": 151.9165, "eval_samples_per_second": 37.231, "eval_steps_per_second": 4.654, "eval_wer": 0.3860153102983422, "step": 5100 }, { "epoch": 2.9445073612684034, "eval_loss": 0.5343597531318665, "eval_runtime": 163.0683, "eval_samples_per_second": 34.685, "eval_steps_per_second": 4.336, "eval_wer": 0.38002920832597775, "step": 5200 }, { "epoch": 3.001132502831257, "eval_loss": 0.534857451915741, "eval_runtime": 153.2271, "eval_samples_per_second": 36.913, "eval_steps_per_second": 4.614, "eval_wer": 0.38415368073052913, "step": 5300 }, { "epoch": 3.057757644394111, "eval_loss": 0.5775672793388367, "eval_runtime": 151.3492, "eval_samples_per_second": 37.371, "eval_steps_per_second": 4.671, "eval_wer": 0.4182728571199307, "step": 5400 }, { "epoch": 3.114382785956965, "grad_norm": 1.614545226097107, "learning_rate": 0.00015039999999999997, "loss": 0.2839, "step": 5500 }, { "epoch": 3.114382785956965, "eval_loss": 0.5882839560508728, "eval_runtime": 150.3863, "eval_samples_per_second": 37.61, "eval_steps_per_second": 4.701, "eval_wer": 0.41002391231082796, "step": 5500 }, { "epoch": 3.1710079275198186, "eval_loss": 0.5722731947898865, "eval_runtime": 154.6093, "eval_samples_per_second": 36.583, "eval_steps_per_second": 4.573, "eval_wer": 0.4043587809536037, "step": 5600 }, { "epoch": 3.227633069082673, "eval_loss": 0.5630057454109192, "eval_runtime": 153.3174, "eval_samples_per_second": 36.891, "eval_steps_per_second": 4.611, "eval_wer": 0.40779316653560366, "step": 5700 }, { "epoch": 3.2842582106455267, "eval_loss": 0.5810334086418152, "eval_runtime": 153.0372, "eval_samples_per_second": 36.958, "eval_steps_per_second": 4.62, "eval_wer": 0.4191394777808092, "step": 5800 }, { "epoch": 3.3408833522083805, "eval_loss": 0.5995615720748901, "eval_runtime": 152.3384, "eval_samples_per_second": 37.128, "eval_steps_per_second": 4.641, "eval_wer": 0.4228306398549213, "step": 5900 }, { "epoch": 3.3975084937712343, "grad_norm": 9.246959686279297, "learning_rate": 0.00013373333333333332, "loss": 0.3019, "step": 6000 }, { "epoch": 3.3975084937712343, "eval_loss": 0.5681526064872742, "eval_runtime": 153.1128, "eval_samples_per_second": 36.94, "eval_steps_per_second": 4.618, "eval_wer": 0.4015502880711271, "step": 6000 }, { "epoch": 3.454133635334088, "eval_loss": 0.5560505390167236, "eval_runtime": 152.0848, "eval_samples_per_second": 37.19, "eval_steps_per_second": 4.649, "eval_wer": 0.40569080900643545, "step": 6100 }, { "epoch": 3.5107587768969424, "eval_loss": 0.5905264616012573, "eval_runtime": 152.0038, "eval_samples_per_second": 37.21, "eval_steps_per_second": 4.651, "eval_wer": 0.41458169504581854, "step": 6200 }, { "epoch": 3.567383918459796, "eval_loss": 0.5875168442726135, "eval_runtime": 151.4059, "eval_samples_per_second": 37.357, "eval_steps_per_second": 4.67, "eval_wer": 0.4190271380655101, "step": 6300 }, { "epoch": 3.62400906002265, "eval_loss": 0.5877885818481445, "eval_runtime": 150.4489, "eval_samples_per_second": 37.594, "eval_steps_per_second": 4.699, "eval_wer": 0.44462454462293977, "step": 6400 }, { "epoch": 3.680634201585504, "grad_norm": 1.084346890449524, "learning_rate": 0.00011709999999999999, "loss": 0.2944, "step": 6500 }, { "epoch": 3.680634201585504, "eval_loss": 0.5938708782196045, "eval_runtime": 150.586, "eval_samples_per_second": 37.56, "eval_steps_per_second": 4.695, "eval_wer": 0.4403877325030893, "step": 6500 }, { "epoch": 3.7372593431483576, "eval_loss": 0.590270459651947, "eval_runtime": 150.087, "eval_samples_per_second": 37.685, "eval_steps_per_second": 4.711, "eval_wer": 0.4183049541814447, "step": 6600 }, { "epoch": 3.793884484711212, "eval_loss": 0.5807533264160156, "eval_runtime": 149.6534, "eval_samples_per_second": 37.794, "eval_steps_per_second": 4.724, "eval_wer": 0.4059475854985476, "step": 6700 }, { "epoch": 3.8505096262740657, "eval_loss": 0.6154611706733704, "eval_runtime": 150.8194, "eval_samples_per_second": 37.502, "eval_steps_per_second": 4.688, "eval_wer": 0.410056009372342, "step": 6800 }, { "epoch": 3.9071347678369195, "eval_loss": 0.7987228631973267, "eval_runtime": 151.5296, "eval_samples_per_second": 37.326, "eval_steps_per_second": 4.666, "eval_wer": 0.5822727929258077, "step": 6900 }, { "epoch": 3.9637599093997737, "grad_norm": 9.350592613220215, "learning_rate": 0.0001005, "loss": 0.3918, "step": 7000 }, { "epoch": 3.9637599093997737, "eval_loss": 0.9750258326530457, "eval_runtime": 151.1967, "eval_samples_per_second": 37.408, "eval_steps_per_second": 4.676, "eval_wer": 0.5545409317776957, "step": 7000 }, { "epoch": 4.020385050962627, "eval_loss": 1.0540127754211426, "eval_runtime": 150.2681, "eval_samples_per_second": 37.639, "eval_steps_per_second": 4.705, "eval_wer": 0.5689043668052189, "step": 7100 }, { "epoch": 4.077010192525481, "eval_loss": 0.6850923299789429, "eval_runtime": 150.3993, "eval_samples_per_second": 37.607, "eval_steps_per_second": 4.701, "eval_wer": 0.4396013544959959, "step": 7200 }, { "epoch": 4.133635334088336, "eval_loss": 0.7331786155700684, "eval_runtime": 151.5017, "eval_samples_per_second": 37.333, "eval_steps_per_second": 4.667, "eval_wer": 0.49731187109820096, "step": 7300 }, { "epoch": 4.190260475651189, "eval_loss": 0.9466453194618225, "eval_runtime": 150.2633, "eval_samples_per_second": 37.641, "eval_steps_per_second": 4.705, "eval_wer": 0.6394697565437885, "step": 7400 }, { "epoch": 4.246885617214043, "grad_norm": 4.335416793823242, "learning_rate": 8.386666666666665e-05, "loss": 0.5378, "step": 7500 }, { "epoch": 4.246885617214043, "eval_loss": 0.8257068991661072, "eval_runtime": 151.0198, "eval_samples_per_second": 37.452, "eval_steps_per_second": 4.682, "eval_wer": 0.485147084784388, "step": 7500 }, { "epoch": 4.303510758776897, "eval_loss": 0.8490071296691895, "eval_runtime": 150.818, "eval_samples_per_second": 37.502, "eval_steps_per_second": 4.688, "eval_wer": 0.4867037922678179, "step": 7600 }, { "epoch": 4.360135900339751, "eval_loss": 0.8716742396354675, "eval_runtime": 150.797, "eval_samples_per_second": 37.507, "eval_steps_per_second": 4.688, "eval_wer": 0.47112066890276194, "step": 7700 }, { "epoch": 4.416761041902605, "eval_loss": 0.883883535861969, "eval_runtime": 150.6157, "eval_samples_per_second": 37.553, "eval_steps_per_second": 4.694, "eval_wer": 0.5860281491229478, "step": 7800 }, { "epoch": 4.4733861834654585, "eval_loss": 2.911261558532715, "eval_runtime": 150.8548, "eval_samples_per_second": 37.493, "eval_steps_per_second": 4.687, "eval_wer": 1.0, "step": 7900 }, { "epoch": 4.530011325028313, "grad_norm": 0.2805318534374237, "learning_rate": 6.723333333333333e-05, "loss": 1.3847, "step": 8000 }, { "epoch": 4.530011325028313, "eval_loss": 2.8575596809387207, "eval_runtime": 151.2637, "eval_samples_per_second": 37.392, "eval_steps_per_second": 4.674, "eval_wer": 1.0, "step": 8000 }, { "epoch": 4.586636466591166, "eval_loss": 2.83913516998291, "eval_runtime": 151.4914, "eval_samples_per_second": 37.335, "eval_steps_per_second": 4.667, "eval_wer": 1.0, "step": 8100 }, { "epoch": 4.64326160815402, "eval_loss": 2.840644121170044, "eval_runtime": 150.8361, "eval_samples_per_second": 37.498, "eval_steps_per_second": 4.687, "eval_wer": 1.0, "step": 8200 }, { "epoch": 4.699886749716875, "eval_loss": 2.856635093688965, "eval_runtime": 150.3023, "eval_samples_per_second": 37.631, "eval_steps_per_second": 4.704, "eval_wer": 1.0, "step": 8300 }, { "epoch": 4.756511891279728, "eval_loss": 2.8454244136810303, "eval_runtime": 150.6924, "eval_samples_per_second": 37.533, "eval_steps_per_second": 4.692, "eval_wer": 0.999823466161673, "step": 8400 }, { "epoch": 4.813137032842582, "grad_norm": 0.4468824863433838, "learning_rate": 5.06e-05, "loss": 2.8136, "step": 8500 }, { "epoch": 4.813137032842582, "eval_loss": 2.8339831829071045, "eval_runtime": 151.2056, "eval_samples_per_second": 37.406, "eval_steps_per_second": 4.676, "eval_wer": 0.999919757346215, "step": 8500 }, { "epoch": 4.869762174405436, "eval_loss": 2.836662530899048, "eval_runtime": 152.389, "eval_samples_per_second": 37.116, "eval_steps_per_second": 4.639, "eval_wer": 0.999935805876972, "step": 8600 }, { "epoch": 4.92638731596829, "eval_loss": 2.833369016647339, "eval_runtime": 151.3614, "eval_samples_per_second": 37.368, "eval_steps_per_second": 4.671, "eval_wer": 0.9998876602847009, "step": 8700 }, { "epoch": 4.983012457531144, "eval_loss": 2.8321285247802734, "eval_runtime": 157.5502, "eval_samples_per_second": 35.9, "eval_steps_per_second": 4.487, "eval_wer": 0.999935805876972, "step": 8800 }, { "epoch": 5.0396375990939974, "eval_loss": 2.802509307861328, "eval_runtime": 151.5865, "eval_samples_per_second": 37.312, "eval_steps_per_second": 4.664, "eval_wer": 0.9982186130859719, "step": 8900 }, { "epoch": 5.096262740656852, "grad_norm": 0.3411979377269745, "learning_rate": 3.393333333333333e-05, "loss": 2.8007, "step": 9000 }, { "epoch": 5.096262740656852, "eval_loss": 2.8024423122406006, "eval_runtime": 151.6682, "eval_samples_per_second": 37.292, "eval_steps_per_second": 4.661, "eval_wer": 0.9974964292019066, "step": 9000 }, { "epoch": 5.152887882219706, "eval_loss": 2.8043477535247803, "eval_runtime": 153.1811, "eval_samples_per_second": 36.924, "eval_steps_per_second": 4.615, "eval_wer": 0.9981062733706729, "step": 9100 }, { "epoch": 5.209513023782559, "eval_loss": 2.810584783554077, "eval_runtime": 151.6694, "eval_samples_per_second": 37.292, "eval_steps_per_second": 4.661, "eval_wer": 0.9992457190544206, "step": 9200 }, { "epoch": 5.266138165345414, "eval_loss": 2.8066723346710205, "eval_runtime": 151.6427, "eval_samples_per_second": 37.298, "eval_steps_per_second": 4.662, "eval_wer": 0.9993259617082056, "step": 9300 }, { "epoch": 5.322763306908267, "eval_loss": 2.805284023284912, "eval_runtime": 151.8566, "eval_samples_per_second": 37.246, "eval_steps_per_second": 4.656, "eval_wer": 0.9985877292933832, "step": 9400 }, { "epoch": 5.379388448471121, "grad_norm": 0.15991327166557312, "learning_rate": 1.7299999999999997e-05, "loss": 2.7935, "step": 9500 }, { "epoch": 5.379388448471121, "eval_loss": 2.807739734649658, "eval_runtime": 152.8905, "eval_samples_per_second": 36.994, "eval_steps_per_second": 4.624, "eval_wer": 0.9978173998170468, "step": 9500 }, { "epoch": 5.436013590033975, "eval_loss": 2.8082854747772217, "eval_runtime": 151.7494, "eval_samples_per_second": 37.272, "eval_steps_per_second": 4.659, "eval_wer": 0.9987161175394392, "step": 9600 }, { "epoch": 5.492638731596829, "eval_loss": 2.8080484867095947, "eval_runtime": 151.6292, "eval_samples_per_second": 37.302, "eval_steps_per_second": 4.663, "eval_wer": 0.9988926513777664, "step": 9700 }, { "epoch": 5.549263873159683, "eval_loss": 2.808607578277588, "eval_runtime": 153.2376, "eval_samples_per_second": 36.91, "eval_steps_per_second": 4.614, "eval_wer": 0.9986037778241402, "step": 9800 }, { "epoch": 5.605889014722536, "eval_loss": 2.8079216480255127, "eval_runtime": 151.6181, "eval_samples_per_second": 37.304, "eval_steps_per_second": 4.663, "eval_wer": 0.998186516024458, "step": 9900 }, { "epoch": 5.662514156285391, "grad_norm": 0.3985973000526428, "learning_rate": 7e-07, "loss": 2.7861, "step": 10000 }, { "epoch": 5.662514156285391, "eval_loss": 2.807446241378784, "eval_runtime": 151.2977, "eval_samples_per_second": 37.383, "eval_steps_per_second": 4.673, "eval_wer": 0.998266758678243, "step": 10000 }, { "epoch": 5.662514156285391, "step": 10000, "total_flos": 9.138710330328565e+19, "train_loss": 1.1088516311645509, "train_runtime": 39423.4073, "train_samples_per_second": 16.234, "train_steps_per_second": 0.254 } ], "logging_steps": 500, "max_steps": 10000, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 400, "total_flos": 9.138710330328565e+19, "train_batch_size": 64, "trial_name": null, "trial_params": null }