{ "best_metric": null, "best_model_checkpoint": null, "epoch": 7.722007722007722, "eval_steps": 200, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15444015444015444, "eval_loss": Infinity, "eval_runtime": 214.0401, "eval_samples_per_second": 32.807, "eval_steps_per_second": 4.102, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.3088803088803089, "eval_loss": Infinity, "eval_runtime": 192.4639, "eval_samples_per_second": 36.485, "eval_steps_per_second": 4.562, "eval_wer": 0.8748003583810525, "step": 400 }, { "epoch": 0.3861003861003861, "grad_norm": 3.6337695121765137, "learning_rate": 0.00029699999999999996, "loss": 3.7622, "step": 500 }, { "epoch": 0.46332046332046334, "eval_loss": Infinity, "eval_runtime": 193.7023, "eval_samples_per_second": 36.251, "eval_steps_per_second": 4.533, "eval_wer": 0.6793917909963253, "step": 600 }, { "epoch": 0.6177606177606177, "eval_loss": Infinity, "eval_runtime": 193.7574, "eval_samples_per_second": 36.241, "eval_steps_per_second": 4.531, "eval_wer": 0.5749289081064236, "step": 800 }, { "epoch": 0.7722007722007722, "grad_norm": 2.0483286380767822, "learning_rate": 0.00028443157894736843, "loss": 0.8615, "step": 1000 }, { "epoch": 0.7722007722007722, "eval_loss": Infinity, "eval_runtime": 193.3223, "eval_samples_per_second": 36.323, "eval_steps_per_second": 4.542, "eval_wer": 0.5193149208575175, "step": 1000 }, { "epoch": 0.9266409266409267, "eval_loss": Infinity, "eval_runtime": 193.499, "eval_samples_per_second": 36.29, "eval_steps_per_second": 4.537, "eval_wer": 0.5194317842442185, "step": 1200 }, { "epoch": 1.0810810810810811, "eval_loss": Infinity, "eval_runtime": 194.7994, "eval_samples_per_second": 36.047, "eval_steps_per_second": 4.507, "eval_wer": 0.4780491605313389, "step": 1400 }, { "epoch": 1.1583011583011582, "grad_norm": 0.5093265175819397, "learning_rate": 0.00026864210526315787, "loss": 0.6742, "step": 1500 }, { "epoch": 1.2355212355212355, "eval_loss": Infinity, "eval_runtime": 193.9458, "eval_samples_per_second": 36.206, "eval_steps_per_second": 4.527, "eval_wer": 0.4446911560385909, "step": 1600 }, { "epoch": 1.3899613899613898, "eval_loss": Infinity, "eval_runtime": 194.4956, "eval_samples_per_second": 36.104, "eval_steps_per_second": 4.514, "eval_wer": 0.42839520600418113, "step": 1800 }, { "epoch": 1.5444015444015444, "grad_norm": 0.8163366317749023, "learning_rate": 0.00025288421052631577, "loss": 0.5813, "step": 2000 }, { "epoch": 1.5444015444015444, "eval_loss": Infinity, "eval_runtime": 196.3165, "eval_samples_per_second": 35.769, "eval_steps_per_second": 4.472, "eval_wer": 0.4189552413228935, "step": 2000 }, { "epoch": 1.698841698841699, "eval_loss": Infinity, "eval_runtime": 194.273, "eval_samples_per_second": 36.145, "eval_steps_per_second": 4.519, "eval_wer": 0.4159687325516471, "step": 2200 }, { "epoch": 1.8532818532818531, "eval_loss": Infinity, "eval_runtime": 194.413, "eval_samples_per_second": 36.119, "eval_steps_per_second": 4.516, "eval_wer": 0.41259267915806425, "step": 2400 }, { "epoch": 1.9305019305019306, "grad_norm": 0.7404251098632812, "learning_rate": 0.00023718947368421052, "loss": 0.568, "step": 2500 }, { "epoch": 2.0077220077220077, "eval_loss": Infinity, "eval_runtime": 194.5283, "eval_samples_per_second": 36.098, "eval_steps_per_second": 4.513, "eval_wer": 0.3879085349226754, "step": 2600 }, { "epoch": 2.1621621621621623, "eval_loss": Infinity, "eval_runtime": 194.5649, "eval_samples_per_second": 36.091, "eval_steps_per_second": 4.513, "eval_wer": 0.3813382156259333, "step": 2800 }, { "epoch": 2.3166023166023164, "grad_norm": 0.8203113079071045, "learning_rate": 0.0002214315789473684, "loss": 0.4909, "step": 3000 }, { "epoch": 2.3166023166023164, "eval_loss": Infinity, "eval_runtime": 195.9563, "eval_samples_per_second": 35.835, "eval_steps_per_second": 4.481, "eval_wer": 0.38915507771415214, "step": 3000 }, { "epoch": 2.471042471042471, "eval_loss": Infinity, "eval_runtime": 195.0882, "eval_samples_per_second": 35.994, "eval_steps_per_second": 4.501, "eval_wer": 0.4131899809123135, "step": 3200 }, { "epoch": 2.6254826254826256, "eval_loss": Infinity, "eval_runtime": 195.2925, "eval_samples_per_second": 35.956, "eval_steps_per_second": 4.496, "eval_wer": 0.5341825406100269, "step": 3400 }, { "epoch": 2.7027027027027026, "grad_norm": 13.376907348632812, "learning_rate": 0.0002057052631578947, "loss": 0.5703, "step": 3500 }, { "epoch": 2.7799227799227797, "eval_loss": Infinity, "eval_runtime": 196.3521, "eval_samples_per_second": 35.762, "eval_steps_per_second": 4.472, "eval_wer": 0.5748769688234454, "step": 3600 }, { "epoch": 2.9343629343629343, "eval_loss": Infinity, "eval_runtime": 195.7385, "eval_samples_per_second": 35.874, "eval_steps_per_second": 4.486, "eval_wer": 0.7368366379702128, "step": 3800 }, { "epoch": 3.088803088803089, "grad_norm": 6.712296485900879, "learning_rate": 0.00018994736842105263, "loss": 1.1938, "step": 4000 }, { "epoch": 3.088803088803089, "eval_loss": Infinity, "eval_runtime": 195.4685, "eval_samples_per_second": 35.924, "eval_steps_per_second": 4.492, "eval_wer": 0.83566410865698, "step": 4000 }, { "epoch": 3.2432432432432434, "eval_loss": Infinity, "eval_runtime": 196.3276, "eval_samples_per_second": 35.767, "eval_steps_per_second": 4.472, "eval_wer": 0.81303156609923, "step": 4200 }, { "epoch": 3.3976833976833976, "eval_loss": Infinity, "eval_runtime": 196.3772, "eval_samples_per_second": 35.758, "eval_steps_per_second": 4.471, "eval_wer": 0.6570319296742109, "step": 4400 }, { "epoch": 3.474903474903475, "grad_norm": 1.9403835535049438, "learning_rate": 0.0001741894736842105, "loss": 1.1654, "step": 4500 }, { "epoch": 3.552123552123552, "eval_loss": Infinity, "eval_runtime": 195.2624, "eval_samples_per_second": 35.962, "eval_steps_per_second": 4.497, "eval_wer": 0.5719294145144326, "step": 4600 }, { "epoch": 3.7065637065637067, "eval_loss": Infinity, "eval_runtime": 195.7006, "eval_samples_per_second": 35.881, "eval_steps_per_second": 4.486, "eval_wer": 0.6034176048199654, "step": 4800 }, { "epoch": 3.861003861003861, "grad_norm": 5.047078609466553, "learning_rate": 0.0001584315789473684, "loss": 1.0408, "step": 5000 }, { "epoch": 3.861003861003861, "eval_loss": Infinity, "eval_runtime": 196.2029, "eval_samples_per_second": 35.789, "eval_steps_per_second": 4.475, "eval_wer": 0.6953371508706322, "step": 5000 }, { "epoch": 4.015444015444015, "eval_loss": Infinity, "eval_runtime": 196.4203, "eval_samples_per_second": 35.75, "eval_steps_per_second": 4.47, "eval_wer": 0.998039292067573, "step": 5200 }, { "epoch": 4.1698841698841695, "eval_loss": Infinity, "eval_runtime": 197.5397, "eval_samples_per_second": 35.547, "eval_steps_per_second": 4.445, "eval_wer": 0.9477101268616986, "step": 5400 }, { "epoch": 4.2471042471042475, "grad_norm": 0.5360209941864014, "learning_rate": 0.00014270526315789472, "loss": 1.7263, "step": 5500 }, { "epoch": 4.324324324324325, "eval_loss": Infinity, "eval_runtime": 196.1748, "eval_samples_per_second": 35.795, "eval_steps_per_second": 4.476, "eval_wer": 0.9963123109085479, "step": 5600 }, { "epoch": 4.478764478764479, "eval_loss": Infinity, "eval_runtime": 197.0555, "eval_samples_per_second": 35.635, "eval_steps_per_second": 4.456, "eval_wer": 0.9998311973303209, "step": 5800 }, { "epoch": 4.633204633204633, "grad_norm": 0.9715490341186523, "learning_rate": 0.0001269157894736842, "loss": 2.8212, "step": 6000 }, { "epoch": 4.633204633204633, "eval_loss": Infinity, "eval_runtime": 195.6507, "eval_samples_per_second": 35.89, "eval_steps_per_second": 4.488, "eval_wer": 0.9975848233415138, "step": 6000 }, { "epoch": 4.787644787644788, "eval_loss": Infinity, "eval_runtime": 194.5522, "eval_samples_per_second": 36.093, "eval_steps_per_second": 4.513, "eval_wer": 0.9958578421824886, "step": 6200 }, { "epoch": 4.942084942084942, "eval_loss": Infinity, "eval_runtime": 195.551, "eval_samples_per_second": 35.909, "eval_steps_per_second": 4.49, "eval_wer": 0.9917416540064664, "step": 6400 }, { "epoch": 5.019305019305019, "grad_norm": 1.0234254598617554, "learning_rate": 0.00011125263157894737, "loss": 2.7652, "step": 6500 }, { "epoch": 5.096525096525096, "eval_loss": Infinity, "eval_runtime": 196.4433, "eval_samples_per_second": 35.746, "eval_steps_per_second": 4.469, "eval_wer": 0.9897290067910612, "step": 6600 }, { "epoch": 5.250965250965251, "eval_loss": Infinity, "eval_runtime": 195.3652, "eval_samples_per_second": 35.943, "eval_steps_per_second": 4.494, "eval_wer": 0.9902094451586095, "step": 6800 }, { "epoch": 5.405405405405405, "grad_norm": 0.9109746217727661, "learning_rate": 9.549473684210525e-05, "loss": 2.7358, "step": 7000 }, { "epoch": 5.405405405405405, "eval_loss": Infinity, "eval_runtime": 194.6043, "eval_samples_per_second": 36.083, "eval_steps_per_second": 4.512, "eval_wer": 0.9889499175463883, "step": 7000 }, { "epoch": 5.559845559845559, "eval_loss": Infinity, "eval_runtime": 193.7693, "eval_samples_per_second": 36.239, "eval_steps_per_second": 4.531, "eval_wer": 0.990456156752756, "step": 7200 }, { "epoch": 5.714285714285714, "eval_loss": Infinity, "eval_runtime": 195.6666, "eval_samples_per_second": 35.888, "eval_steps_per_second": 4.487, "eval_wer": 0.9887161907729863, "step": 7400 }, { "epoch": 5.7915057915057915, "grad_norm": 1.3490198850631714, "learning_rate": 7.973684210526315e-05, "loss": 2.7122, "step": 7500 }, { "epoch": 5.8687258687258685, "eval_loss": Infinity, "eval_runtime": 194.1989, "eval_samples_per_second": 36.159, "eval_steps_per_second": 4.521, "eval_wer": 0.9878202381416125, "step": 7600 }, { "epoch": 6.023166023166024, "eval_loss": Infinity, "eval_runtime": 194.0207, "eval_samples_per_second": 36.192, "eval_steps_per_second": 4.525, "eval_wer": 0.9847298508044097, "step": 7800 }, { "epoch": 6.177606177606178, "grad_norm": 2.4652857780456543, "learning_rate": 6.401052631578946e-05, "loss": 2.7345, "step": 8000 }, { "epoch": 6.177606177606178, "eval_loss": Infinity, "eval_runtime": 193.3926, "eval_samples_per_second": 36.31, "eval_steps_per_second": 4.54, "eval_wer": 0.9842494124368613, "step": 8000 }, { "epoch": 6.332046332046332, "eval_loss": Infinity, "eval_runtime": 195.2901, "eval_samples_per_second": 35.957, "eval_steps_per_second": 4.496, "eval_wer": 0.9882227675846935, "step": 8200 }, { "epoch": 6.486486486486487, "eval_loss": Infinity, "eval_runtime": 195.6895, "eval_samples_per_second": 35.883, "eval_steps_per_second": 4.487, "eval_wer": 0.9871580122836404, "step": 8400 }, { "epoch": 6.563706563706564, "grad_norm": 2.1805906295776367, "learning_rate": 4.828421052631579e-05, "loss": 3.035, "step": 8500 }, { "epoch": 6.640926640926641, "eval_loss": Infinity, "eval_runtime": 196.9379, "eval_samples_per_second": 35.656, "eval_steps_per_second": 4.458, "eval_wer": 0.9920532897043356, "step": 8600 }, { "epoch": 6.795366795366795, "eval_loss": Infinity, "eval_runtime": 196.3085, "eval_samples_per_second": 35.77, "eval_steps_per_second": 4.473, "eval_wer": 0.990573020139457, "step": 8800 }, { "epoch": 6.94980694980695, "grad_norm": 0.7723463177680969, "learning_rate": 3.25578947368421e-05, "loss": 3.688, "step": 9000 }, { "epoch": 6.94980694980695, "eval_loss": Infinity, "eval_runtime": 196.6042, "eval_samples_per_second": 35.716, "eval_steps_per_second": 4.466, "eval_wer": 0.9915858361575318, "step": 9000 }, { "epoch": 7.104247104247104, "eval_loss": Infinity, "eval_runtime": 195.2386, "eval_samples_per_second": 35.966, "eval_steps_per_second": 4.497, "eval_wer": 0.990573020139457, "step": 9200 }, { "epoch": 7.258687258687258, "eval_loss": Infinity, "eval_runtime": 196.3464, "eval_samples_per_second": 35.763, "eval_steps_per_second": 4.472, "eval_wer": 0.9908067469128589, "step": 9400 }, { "epoch": 7.335907335907336, "grad_norm": 0.0, "learning_rate": 1.6831578947368418e-05, "loss": 3.7017, "step": 9500 }, { "epoch": 7.413127413127413, "eval_loss": Infinity, "eval_runtime": 196.9124, "eval_samples_per_second": 35.661, "eval_steps_per_second": 4.459, "eval_wer": 0.9911962915351954, "step": 9600 }, { "epoch": 7.5675675675675675, "eval_loss": Infinity, "eval_runtime": 195.3324, "eval_samples_per_second": 35.949, "eval_steps_per_second": 4.495, "eval_wer": 0.9913001701011518, "step": 9800 }, { "epoch": 7.722007722007722, "grad_norm": 0.0, "learning_rate": 1.1052631578947367e-06, "loss": 3.7327, "step": 10000 }, { "epoch": 7.722007722007722, "eval_loss": Infinity, "eval_runtime": 198.1495, "eval_samples_per_second": 35.438, "eval_steps_per_second": 4.431, "eval_wer": 0.9913261397426408, "step": 10000 }, { "epoch": 7.722007722007722, "step": 10000, "total_flos": 4.216639119976582e+19, "train_loss": 2.028042752075195, "train_runtime": 21383.256, "train_samples_per_second": 14.965, "train_steps_per_second": 0.468 } ], "logging_steps": 500, "max_steps": 10000, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.216639119976582e+19, "train_batch_size": 32, "trial_name": null, "trial_params": null }