{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.3166023166023164, "eval_steps": 200, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07722007722007722, "eval_loss": Infinity, "eval_runtime": 199.2963, "eval_samples_per_second": 35.234, "eval_steps_per_second": 4.406, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.15444015444015444, "eval_loss": Infinity, "eval_runtime": 187.0234, "eval_samples_per_second": 37.546, "eval_steps_per_second": 4.695, "eval_wer": 0.9937672860426162, "step": 400 }, { "epoch": 0.19305019305019305, "grad_norm": 4.210392951965332, "learning_rate": 0.00029699999999999996, "loss": 3.9317, "step": 500 }, { "epoch": 0.23166023166023167, "eval_loss": Infinity, "eval_runtime": 180.1705, "eval_samples_per_second": 38.974, "eval_steps_per_second": 4.873, "eval_wer": 0.7576253359822368, "step": 600 }, { "epoch": 0.3088803088803089, "eval_loss": Infinity, "eval_runtime": 179.0087, "eval_samples_per_second": 39.227, "eval_steps_per_second": 4.905, "eval_wer": 0.6941685170036228, "step": 800 }, { "epoch": 0.3861003861003861, "grad_norm": 3.149083137512207, "learning_rate": 0.0002730545454545454, "loss": 0.9699, "step": 1000 }, { "epoch": 0.3861003861003861, "eval_loss": Infinity, "eval_runtime": 179.6063, "eval_samples_per_second": 39.097, "eval_steps_per_second": 4.888, "eval_wer": 0.5762923142846013, "step": 1000 }, { "epoch": 0.46332046332046334, "eval_loss": Infinity, "eval_runtime": 180.175, "eval_samples_per_second": 38.973, "eval_steps_per_second": 4.873, "eval_wer": 0.5518548816433589, "step": 1200 }, { "epoch": 0.5405405405405406, "eval_loss": Infinity, "eval_runtime": 180.7382, "eval_samples_per_second": 38.852, "eval_steps_per_second": 4.858, "eval_wer": 0.5173931673873242, "step": 1400 }, { "epoch": 0.5791505791505791, "grad_norm": 4.50093412399292, "learning_rate": 0.0002458363636363636, "loss": 0.8031, "step": 1500 }, { "epoch": 0.6177606177606177, "eval_loss": Infinity, "eval_runtime": 180.2438, "eval_samples_per_second": 38.958, "eval_steps_per_second": 4.871, "eval_wer": 0.5338059808084349, "step": 1600 }, { "epoch": 0.694980694980695, "eval_loss": Infinity, "eval_runtime": 180.7932, "eval_samples_per_second": 38.84, "eval_steps_per_second": 4.856, "eval_wer": 0.47772454001272513, "step": 1800 }, { "epoch": 0.7722007722007722, "grad_norm": 3.7705626487731934, "learning_rate": 0.0002186181818181818, "loss": 0.7169, "step": 2000 }, { "epoch": 0.7722007722007722, "eval_loss": Infinity, "eval_runtime": 179.9877, "eval_samples_per_second": 39.014, "eval_steps_per_second": 4.878, "eval_wer": 0.45044343162842637, "step": 2000 }, { "epoch": 0.8494208494208494, "eval_loss": Infinity, "eval_runtime": 182.9186, "eval_samples_per_second": 38.389, "eval_steps_per_second": 4.8, "eval_wer": 0.4499500084401335, "step": 2200 }, { "epoch": 0.9266409266409267, "eval_loss": Infinity, "eval_runtime": 180.2372, "eval_samples_per_second": 38.96, "eval_steps_per_second": 4.871, "eval_wer": 0.4431719320114786, "step": 2400 }, { "epoch": 0.9652509652509652, "grad_norm": 3.5582635402679443, "learning_rate": 0.00019134545454545454, "loss": 0.6687, "step": 2500 }, { "epoch": 1.0038610038610039, "eval_loss": Infinity, "eval_runtime": 180.3998, "eval_samples_per_second": 38.925, "eval_steps_per_second": 4.867, "eval_wer": 0.4175658655032267, "step": 2600 }, { "epoch": 1.0810810810810811, "eval_loss": Infinity, "eval_runtime": 180.1365, "eval_samples_per_second": 38.982, "eval_steps_per_second": 4.874, "eval_wer": 0.40537311882409466, "step": 2800 }, { "epoch": 1.1583011583011582, "grad_norm": 0.5179678201675415, "learning_rate": 0.0001641272727272727, "loss": 0.5609, "step": 3000 }, { "epoch": 1.1583011583011582, "eval_loss": Infinity, "eval_runtime": 180.8398, "eval_samples_per_second": 38.83, "eval_steps_per_second": 4.855, "eval_wer": 0.4009193253087141, "step": 3000 }, { "epoch": 1.2355212355212355, "eval_loss": Infinity, "eval_runtime": 181.6098, "eval_samples_per_second": 38.665, "eval_steps_per_second": 4.835, "eval_wer": 0.4022567618454027, "step": 3200 }, { "epoch": 1.3127413127413128, "eval_loss": Infinity, "eval_runtime": 182.1259, "eval_samples_per_second": 38.556, "eval_steps_per_second": 4.821, "eval_wer": 0.39188189007050755, "step": 3400 }, { "epoch": 1.3513513513513513, "grad_norm": 0.5364826321601868, "learning_rate": 0.00013696363636363636, "loss": 0.5324, "step": 3500 }, { "epoch": 1.3899613899613898, "eval_loss": Infinity, "eval_runtime": 181.3225, "eval_samples_per_second": 38.727, "eval_steps_per_second": 4.842, "eval_wer": 0.3794813862594627, "step": 3600 }, { "epoch": 1.4671814671814671, "eval_loss": Infinity, "eval_runtime": 181.5845, "eval_samples_per_second": 38.671, "eval_steps_per_second": 4.835, "eval_wer": 0.37523534987599494, "step": 3800 }, { "epoch": 1.5444015444015444, "grad_norm": 1.197091817855835, "learning_rate": 0.00010974545454545454, "loss": 0.5196, "step": 4000 }, { "epoch": 1.5444015444015444, "eval_loss": Infinity, "eval_runtime": 181.3235, "eval_samples_per_second": 38.726, "eval_steps_per_second": 4.842, "eval_wer": 0.36617194499629935, "step": 4000 }, { "epoch": 1.6216216216216215, "eval_loss": Infinity, "eval_runtime": 181.9653, "eval_samples_per_second": 38.59, "eval_steps_per_second": 4.825, "eval_wer": 0.37034007245529976, "step": 4200 }, { "epoch": 1.698841698841699, "eval_loss": Infinity, "eval_runtime": 181.5241, "eval_samples_per_second": 38.684, "eval_steps_per_second": 4.837, "eval_wer": 0.3613935309623051, "step": 4400 }, { "epoch": 1.7374517374517375, "grad_norm": 0.7301501631736755, "learning_rate": 8.247272727272728e-05, "loss": 0.4967, "step": 4500 }, { "epoch": 1.776061776061776, "eval_loss": Infinity, "eval_runtime": 181.6896, "eval_samples_per_second": 38.648, "eval_steps_per_second": 4.832, "eval_wer": 0.3530313064028151, "step": 4600 }, { "epoch": 1.8532818532818531, "eval_loss": Infinity, "eval_runtime": 184.0459, "eval_samples_per_second": 38.154, "eval_steps_per_second": 4.771, "eval_wer": 0.34805812005765263, "step": 4800 }, { "epoch": 1.9305019305019306, "grad_norm": 1.2918003797531128, "learning_rate": 5.519999999999999e-05, "loss": 0.4735, "step": 5000 }, { "epoch": 1.9305019305019306, "eval_loss": Infinity, "eval_runtime": 182.1737, "eval_samples_per_second": 38.546, "eval_steps_per_second": 4.82, "eval_wer": 0.35057717528209525, "step": 5000 }, { "epoch": 2.0077220077220077, "eval_loss": Infinity, "eval_runtime": 182.5384, "eval_samples_per_second": 38.469, "eval_steps_per_second": 4.81, "eval_wer": 0.3432277667406801, "step": 5200 }, { "epoch": 2.0849420849420848, "eval_loss": Infinity, "eval_runtime": 182.9254, "eval_samples_per_second": 38.387, "eval_steps_per_second": 4.8, "eval_wer": 0.33689117421733994, "step": 5400 }, { "epoch": 2.1235521235521237, "grad_norm": 0.6086732745170593, "learning_rate": 2.7927272727272724e-05, "loss": 0.4244, "step": 5500 }, { "epoch": 2.1621621621621623, "eval_loss": Infinity, "eval_runtime": 182.4452, "eval_samples_per_second": 38.488, "eval_steps_per_second": 4.812, "eval_wer": 0.32959370495890306, "step": 5600 }, { "epoch": 2.2393822393822393, "eval_loss": Infinity, "eval_runtime": 182.1862, "eval_samples_per_second": 38.543, "eval_steps_per_second": 4.819, "eval_wer": 0.32954176567592486, "step": 5800 }, { "epoch": 2.3166023166023164, "grad_norm": 1.0216798782348633, "learning_rate": 6.545454545454546e-07, "loss": 0.3674, "step": 6000 }, { "epoch": 2.3166023166023164, "eval_loss": Infinity, "eval_runtime": 182.1846, "eval_samples_per_second": 38.543, "eval_steps_per_second": 4.819, "eval_wer": 0.3289055094594419, "step": 6000 }, { "epoch": 2.3166023166023164, "step": 6000, "total_flos": 1.261526897313927e+19, "train_loss": 0.8721037826538086, "train_runtime": 9008.7975, "train_samples_per_second": 10.656, "train_steps_per_second": 0.666 } ], "logging_steps": 500, "max_steps": 6000, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.261526897313927e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }