{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0726161106939827, "eval_steps": 200, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.021452322213879653, "eval_loss": Infinity, "eval_runtime": 109.0854, "eval_samples_per_second": 35.761, "eval_steps_per_second": 0.559, "eval_wer": 0.5591701685746027, "step": 200 }, { "epoch": 0.042904644427759306, "eval_loss": Infinity, "eval_runtime": 105.5961, "eval_samples_per_second": 36.943, "eval_steps_per_second": 0.578, "eval_wer": 0.4289474955320485, "step": 400 }, { "epoch": 0.05363080553469913, "grad_norm": 2.6033225059509277, "learning_rate": 0.0001494, "loss": 2.1964, "step": 500 }, { "epoch": 0.06435696664163895, "eval_loss": Infinity, "eval_runtime": 104.8581, "eval_samples_per_second": 37.203, "eval_steps_per_second": 0.582, "eval_wer": 0.43744867893542, "step": 600 }, { "epoch": 0.08580928885551861, "eval_loss": Infinity, "eval_runtime": 104.5471, "eval_samples_per_second": 37.313, "eval_steps_per_second": 0.583, "eval_wer": 0.4944452494807516, "step": 800 }, { "epoch": 0.10726161106939826, "grad_norm": 4.917770862579346, "learning_rate": 0.0002988, "loss": 0.8327, "step": 1000 }, { "epoch": 0.10726161106939826, "eval_loss": Infinity, "eval_runtime": 104.2475, "eval_samples_per_second": 37.421, "eval_steps_per_second": 0.585, "eval_wer": 0.5149736753127566, "step": 1000 }, { "epoch": 0.1287139332832779, "eval_loss": Infinity, "eval_runtime": 104.0588, "eval_samples_per_second": 37.488, "eval_steps_per_second": 0.586, "eval_wer": 0.5633966091870743, "step": 1200 }, { "epoch": 0.15016625549715756, "eval_loss": Infinity, "eval_runtime": 104.6247, "eval_samples_per_second": 37.286, "eval_steps_per_second": 0.583, "eval_wer": 0.5355021011447616, "step": 1400 }, { "epoch": 0.1608924166040974, "grad_norm": 2.821734666824341, "learning_rate": 0.00028346666666666665, "loss": 0.91, "step": 1500 }, { "epoch": 0.17161857771103722, "eval_loss": Infinity, "eval_runtime": 104.8778, "eval_samples_per_second": 37.196, "eval_steps_per_second": 0.582, "eval_wer": 0.515239337294112, "step": 1600 }, { "epoch": 0.19307089992491688, "eval_loss": Infinity, "eval_runtime": 105.4509, "eval_samples_per_second": 36.994, "eval_steps_per_second": 0.578, "eval_wer": 0.5594599816451722, "step": 1800 }, { "epoch": 0.21452322213879652, "grad_norm": 9.015162467956543, "learning_rate": 0.0002668, "loss": 0.8721, "step": 2000 }, { "epoch": 0.21452322213879652, "eval_loss": Infinity, "eval_runtime": 105.3779, "eval_samples_per_second": 37.019, "eval_steps_per_second": 0.579, "eval_wer": 0.5056513548761049, "step": 2000 }, { "epoch": 0.23597554435267618, "eval_loss": Infinity, "eval_runtime": 105.6302, "eval_samples_per_second": 36.931, "eval_steps_per_second": 0.577, "eval_wer": 0.5041298362556151, "step": 2200 }, { "epoch": 0.2574278665665558, "eval_loss": Infinity, "eval_runtime": 105.5892, "eval_samples_per_second": 36.945, "eval_steps_per_second": 0.578, "eval_wer": 0.5145631067961165, "step": 2400 }, { "epoch": 0.26815402767349567, "grad_norm": 5.016167163848877, "learning_rate": 0.0002501333333333333, "loss": 0.8218, "step": 2500 }, { "epoch": 0.27888018878043547, "eval_loss": Infinity, "eval_runtime": 106.4951, "eval_samples_per_second": 36.631, "eval_steps_per_second": 0.573, "eval_wer": 0.5018113316910593, "step": 2600 }, { "epoch": 0.3003325109943151, "eval_loss": Infinity, "eval_runtime": 106.2902, "eval_samples_per_second": 36.701, "eval_steps_per_second": 0.574, "eval_wer": 0.5090566584552964, "step": 2800 }, { "epoch": 0.3217848332081948, "grad_norm": 2.5943267345428467, "learning_rate": 0.00023346666666666666, "loss": 0.8469, "step": 3000 }, { "epoch": 0.3217848332081948, "eval_loss": Infinity, "eval_runtime": 105.8685, "eval_samples_per_second": 36.848, "eval_steps_per_second": 0.576, "eval_wer": 0.5036709655605468, "step": 3000 }, { "epoch": 0.34323715542207445, "eval_loss": Infinity, "eval_runtime": 105.9311, "eval_samples_per_second": 36.826, "eval_steps_per_second": 0.576, "eval_wer": 0.4703183113558421, "step": 3200 }, { "epoch": 0.3646894776359541, "eval_loss": Infinity, "eval_runtime": 105.4279, "eval_samples_per_second": 37.002, "eval_steps_per_second": 0.579, "eval_wer": 0.47951987634642323, "step": 3400 }, { "epoch": 0.3754156387428939, "grad_norm": 4.555402755737305, "learning_rate": 0.0002168333333333333, "loss": 0.8142, "step": 3500 }, { "epoch": 0.38614179984983377, "eval_loss": Infinity, "eval_runtime": 105.6085, "eval_samples_per_second": 36.938, "eval_steps_per_second": 0.578, "eval_wer": 0.4714051103704777, "step": 3600 }, { "epoch": 0.40759412206371337, "eval_loss": Infinity, "eval_runtime": 105.5848, "eval_samples_per_second": 36.947, "eval_steps_per_second": 0.578, "eval_wer": 0.4553929382215138, "step": 3800 }, { "epoch": 0.42904644427759303, "grad_norm": 15.551188468933105, "learning_rate": 0.0002002, "loss": 0.8085, "step": 4000 }, { "epoch": 0.42904644427759303, "eval_loss": Infinity, "eval_runtime": 105.8874, "eval_samples_per_second": 36.841, "eval_steps_per_second": 0.576, "eval_wer": 0.4505868714679032, "step": 4000 }, { "epoch": 0.4504987664914727, "eval_loss": Infinity, "eval_runtime": 107.6738, "eval_samples_per_second": 36.23, "eval_steps_per_second": 0.567, "eval_wer": 0.4457566536250785, "step": 4200 }, { "epoch": 0.47195108870535235, "eval_loss": Infinity, "eval_runtime": 105.5778, "eval_samples_per_second": 36.949, "eval_steps_per_second": 0.578, "eval_wer": 0.43669999516978214, "step": 4400 }, { "epoch": 0.4826772498122922, "grad_norm": 4.841684818267822, "learning_rate": 0.0001835333333333333, "loss": 0.7802, "step": 4500 }, { "epoch": 0.493403410919232, "eval_loss": Infinity, "eval_runtime": 105.9573, "eval_samples_per_second": 36.817, "eval_steps_per_second": 0.576, "eval_wer": 0.4401052987489736, "step": 4600 }, { "epoch": 0.5148557331331116, "eval_loss": Infinity, "eval_runtime": 105.3523, "eval_samples_per_second": 37.028, "eval_steps_per_second": 0.579, "eval_wer": 0.43336714485823313, "step": 4800 }, { "epoch": 0.5363080553469913, "grad_norm": 7.372885227203369, "learning_rate": 0.0001669, "loss": 0.7493, "step": 5000 }, { "epoch": 0.5363080553469913, "eval_loss": Infinity, "eval_runtime": 107.6356, "eval_samples_per_second": 36.243, "eval_steps_per_second": 0.567, "eval_wer": 0.4224267014442351, "step": 5000 }, { "epoch": 0.5577603775608709, "eval_loss": Infinity, "eval_runtime": 107.4854, "eval_samples_per_second": 36.293, "eval_steps_per_second": 0.568, "eval_wer": 0.43278751871709414, "step": 5200 }, { "epoch": 0.5792126997747507, "eval_loss": Infinity, "eval_runtime": 105.275, "eval_samples_per_second": 37.055, "eval_steps_per_second": 0.579, "eval_wer": 0.41764478577983866, "step": 5400 }, { "epoch": 0.5899388608816905, "grad_norm": 2.7961230278015137, "learning_rate": 0.00015023333333333332, "loss": 0.7668, "step": 5500 }, { "epoch": 0.6006650219886303, "eval_loss": Infinity, "eval_runtime": 105.464, "eval_samples_per_second": 36.989, "eval_steps_per_second": 0.578, "eval_wer": 0.41829686518862, "step": 5600 }, { "epoch": 0.62211734420251, "eval_loss": Infinity, "eval_runtime": 104.96, "eval_samples_per_second": 37.167, "eval_steps_per_second": 0.581, "eval_wer": 0.40296092353765156, "step": 5800 }, { "epoch": 0.6435696664163896, "grad_norm": 6.007960319519043, "learning_rate": 0.0001336, "loss": 0.6999, "step": 6000 }, { "epoch": 0.6435696664163896, "eval_loss": Infinity, "eval_runtime": 104.9116, "eval_samples_per_second": 37.184, "eval_steps_per_second": 0.581, "eval_wer": 0.4124523015988021, "step": 6000 }, { "epoch": 0.6650219886302692, "eval_loss": Infinity, "eval_runtime": 108.5507, "eval_samples_per_second": 35.937, "eval_steps_per_second": 0.562, "eval_wer": 0.40759793266676325, "step": 6200 }, { "epoch": 0.6864743108441489, "eval_loss": Infinity, "eval_runtime": 104.9858, "eval_samples_per_second": 37.157, "eval_steps_per_second": 0.581, "eval_wer": 0.39170651596386996, "step": 6400 }, { "epoch": 0.6972004719510887, "grad_norm": 44.30250549316406, "learning_rate": 0.00011693333333333332, "loss": 0.6918, "step": 6500 }, { "epoch": 0.7079266330580285, "eval_loss": Infinity, "eval_runtime": 106.5414, "eval_samples_per_second": 36.615, "eval_steps_per_second": 0.573, "eval_wer": 0.4004250591701686, "step": 6600 }, { "epoch": 0.7293789552719082, "eval_loss": Infinity, "eval_runtime": 104.9171, "eval_samples_per_second": 37.182, "eval_steps_per_second": 0.581, "eval_wer": 0.38653818287204755, "step": 6800 }, { "epoch": 0.7508312774857878, "grad_norm": 3.788344144821167, "learning_rate": 0.00010029999999999998, "loss": 0.6888, "step": 7000 }, { "epoch": 0.7508312774857878, "eval_loss": Infinity, "eval_runtime": 105.3057, "eval_samples_per_second": 37.045, "eval_steps_per_second": 0.579, "eval_wer": 0.3785200212529585, "step": 7000 }, { "epoch": 0.7722835996996675, "eval_loss": Infinity, "eval_runtime": 104.7325, "eval_samples_per_second": 37.247, "eval_steps_per_second": 0.582, "eval_wer": 0.3824083466164324, "step": 7200 }, { "epoch": 0.7937359219135471, "eval_loss": Infinity, "eval_runtime": 105.0488, "eval_samples_per_second": 37.135, "eval_steps_per_second": 0.581, "eval_wer": 0.37426942955127274, "step": 7400 }, { "epoch": 0.8044620830204869, "grad_norm": 5.486635684967041, "learning_rate": 8.363333333333332e-05, "loss": 0.646, "step": 7500 }, { "epoch": 0.8151882441274267, "eval_loss": Infinity, "eval_runtime": 106.6481, "eval_samples_per_second": 36.578, "eval_steps_per_second": 0.572, "eval_wer": 0.3673139158576052, "step": 7600 }, { "epoch": 0.8366405663413065, "eval_loss": Infinity, "eval_runtime": 104.8533, "eval_samples_per_second": 37.204, "eval_steps_per_second": 0.582, "eval_wer": 0.36668598753803794, "step": 7800 }, { "epoch": 0.8580928885551861, "grad_norm": 3.9184212684631348, "learning_rate": 6.696666666666666e-05, "loss": 0.6324, "step": 8000 }, { "epoch": 0.8580928885551861, "eval_loss": Infinity, "eval_runtime": 105.6572, "eval_samples_per_second": 36.921, "eval_steps_per_second": 0.577, "eval_wer": 0.3661546635753272, "step": 8000 }, { "epoch": 0.8795452107690658, "eval_loss": Infinity, "eval_runtime": 104.6274, "eval_samples_per_second": 37.285, "eval_steps_per_second": 0.583, "eval_wer": 0.36009274018258225, "step": 8200 }, { "epoch": 0.9009975329829454, "eval_loss": Infinity, "eval_runtime": 104.9439, "eval_samples_per_second": 37.172, "eval_steps_per_second": 0.581, "eval_wer": 0.35345119064869823, "step": 8400 }, { "epoch": 0.9117236940898852, "grad_norm": 3.5586395263671875, "learning_rate": 5.0299999999999996e-05, "loss": 0.6221, "step": 8500 }, { "epoch": 0.9224498551968251, "eval_loss": Infinity, "eval_runtime": 105.157, "eval_samples_per_second": 37.097, "eval_steps_per_second": 0.58, "eval_wer": 0.35258175143698983, "step": 8600 }, { "epoch": 0.9439021774107047, "eval_loss": Infinity, "eval_runtime": 105.1867, "eval_samples_per_second": 37.086, "eval_steps_per_second": 0.58, "eval_wer": 0.34874172825194416, "step": 8800 }, { "epoch": 0.9653544996245844, "grad_norm": 3.914166212081909, "learning_rate": 3.363333333333333e-05, "loss": 0.6215, "step": 9000 }, { "epoch": 0.9653544996245844, "eval_loss": Infinity, "eval_runtime": 105.9335, "eval_samples_per_second": 36.825, "eval_steps_per_second": 0.576, "eval_wer": 0.34811379993237695, "step": 9000 }, { "epoch": 0.986806821838464, "eval_loss": Infinity, "eval_runtime": 105.4283, "eval_samples_per_second": 37.001, "eval_steps_per_second": 0.579, "eval_wer": 0.3447084963531855, "step": 9200 }, { "epoch": 1.0082591440523436, "eval_loss": Infinity, "eval_runtime": 105.2731, "eval_samples_per_second": 37.056, "eval_steps_per_second": 0.579, "eval_wer": 0.34103753079263877, "step": 9400 }, { "epoch": 1.0189853051592834, "grad_norm": 3.3699042797088623, "learning_rate": 1.6966666666666665e-05, "loss": 0.5603, "step": 9500 }, { "epoch": 1.0297114662662232, "eval_loss": Infinity, "eval_runtime": 105.4958, "eval_samples_per_second": 36.978, "eval_steps_per_second": 0.578, "eval_wer": 0.34053035791914216, "step": 9600 }, { "epoch": 1.051163788480103, "eval_loss": Infinity, "eval_runtime": 105.0872, "eval_samples_per_second": 37.122, "eval_steps_per_second": 0.58, "eval_wer": 0.34120658841713764, "step": 9800 }, { "epoch": 1.0726161106939827, "grad_norm": 6.575745582580566, "learning_rate": 3.333333333333333e-07, "loss": 0.5284, "step": 10000 }, { "epoch": 1.0726161106939827, "eval_loss": Infinity, "eval_runtime": 108.9867, "eval_samples_per_second": 35.793, "eval_steps_per_second": 0.56, "eval_wer": 0.3401922426701444, "step": 10000 }, { "epoch": 1.0726161106939827, "step": 10000, "total_flos": 4.496412338111517e+18, "train_loss": 0.8045109680175782, "train_runtime": 7510.9356, "train_samples_per_second": 5.326, "train_steps_per_second": 1.331 } ], "logging_steps": 500, "max_steps": 10000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.496412338111517e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }