{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.8933417481855475, "eval_steps": 200, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06311139160618491, "eval_loss": 3.0427980422973633, "eval_runtime": 195.72, "eval_samples_per_second": 35.878, "eval_steps_per_second": 0.562, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.12622278321236982, "eval_loss": 3.1055634021759033, "eval_runtime": 193.3912, "eval_samples_per_second": 36.31, "eval_steps_per_second": 0.569, "eval_wer": 1.0, "step": 400 }, { "epoch": 0.1577784790154623, "grad_norm": 6.020185947418213, "learning_rate": 0.00024599999999999996, "loss": 4.232, "step": 500 }, { "epoch": 0.18933417481855475, "eval_loss": 1.2093147039413452, "eval_runtime": 194.3659, "eval_samples_per_second": 36.128, "eval_steps_per_second": 0.566, "eval_wer": 0.8286185040991775, "step": 600 }, { "epoch": 0.25244556642473964, "eval_loss": 1.0886054039001465, "eval_runtime": 194.9365, "eval_samples_per_second": 36.022, "eval_steps_per_second": 0.564, "eval_wer": 0.7324838225328533, "step": 800 }, { "epoch": 0.3155569580309246, "grad_norm": 4.922729969024658, "learning_rate": 0.00027822222222222224, "loss": 1.0379, "step": 1000 }, { "epoch": 0.3155569580309246, "eval_loss": 0.9017586708068848, "eval_runtime": 193.3111, "eval_samples_per_second": 36.325, "eval_steps_per_second": 0.569, "eval_wer": 0.673049070543058, "step": 1000 }, { "epoch": 0.3786683496371095, "eval_loss": 0.8131064772605896, "eval_runtime": 192.8235, "eval_samples_per_second": 36.417, "eval_steps_per_second": 0.57, "eval_wer": 0.5803292629452956, "step": 1200 }, { "epoch": 0.4417797412432944, "eval_loss": 0.7567442655563354, "eval_runtime": 192.8955, "eval_samples_per_second": 36.403, "eval_steps_per_second": 0.57, "eval_wer": 0.5543788782736948, "step": 1400 }, { "epoch": 0.47333543704638686, "grad_norm": 25.353076934814453, "learning_rate": 0.00025049999999999996, "loss": 0.8008, "step": 1500 }, { "epoch": 0.5048911328494793, "eval_loss": 0.704021155834198, "eval_runtime": 193.2182, "eval_samples_per_second": 36.342, "eval_steps_per_second": 0.569, "eval_wer": 0.5137857266240583, "step": 1600 }, { "epoch": 0.5680025244556642, "eval_loss": 0.6949470043182373, "eval_runtime": 192.7198, "eval_samples_per_second": 36.436, "eval_steps_per_second": 0.571, "eval_wer": 0.5236450125566378, "step": 1800 }, { "epoch": 0.6311139160618492, "grad_norm": NaN, "learning_rate": 0.00022277777777777774, "loss": 0.7212, "step": 2000 }, { "epoch": 0.6311139160618492, "eval_loss": 0.672233521938324, "eval_runtime": 194.6494, "eval_samples_per_second": 36.075, "eval_steps_per_second": 0.565, "eval_wer": 0.4992758341195073, "step": 2000 }, { "epoch": 0.694225307668034, "eval_loss": 0.6403974890708923, "eval_runtime": 192.2183, "eval_samples_per_second": 36.531, "eval_steps_per_second": 0.572, "eval_wer": 0.4761689631804834, "step": 2200 }, { "epoch": 0.757336699274219, "eval_loss": 0.6335896849632263, "eval_runtime": 192.0564, "eval_samples_per_second": 36.562, "eval_steps_per_second": 0.573, "eval_wer": 0.4685552558497987, "step": 2400 }, { "epoch": 0.7888923950773115, "grad_norm": 5.390285968780518, "learning_rate": 0.00019505555555555555, "loss": 0.6639, "step": 2500 }, { "epoch": 0.8204480908804039, "eval_loss": 0.593280553817749, "eval_runtime": 191.8655, "eval_samples_per_second": 36.599, "eval_steps_per_second": 0.573, "eval_wer": 0.45866939502252224, "step": 2600 }, { "epoch": 0.8835594824865888, "eval_loss": 0.5996308326721191, "eval_runtime": 192.1279, "eval_samples_per_second": 36.549, "eval_steps_per_second": 0.573, "eval_wer": 0.45444398676570247, "step": 2800 }, { "epoch": 0.9466708740927737, "grad_norm": 7.231433391571045, "learning_rate": 0.00016733333333333333, "loss": 0.6278, "step": 3000 }, { "epoch": 0.9466708740927737, "eval_loss": 0.5639352202415466, "eval_runtime": 195.2391, "eval_samples_per_second": 35.966, "eval_steps_per_second": 0.563, "eval_wer": 0.42424161894258494, "step": 3000 }, { "epoch": 1.0097822656989586, "eval_loss": 0.566460371017456, "eval_runtime": 191.9621, "eval_samples_per_second": 36.58, "eval_steps_per_second": 0.573, "eval_wer": 0.4227534248395541, "step": 3200 }, { "epoch": 1.0728936573051435, "eval_loss": 0.5476272106170654, "eval_runtime": 193.6396, "eval_samples_per_second": 36.263, "eval_steps_per_second": 0.568, "eval_wer": 0.41909937681871934, "step": 3400 }, { "epoch": 1.104449353108236, "grad_norm": 4.519629955291748, "learning_rate": 0.00013955555555555555, "loss": 0.5528, "step": 3500 }, { "epoch": 1.1360050489113285, "eval_loss": 0.5440065860748291, "eval_runtime": 191.7996, "eval_samples_per_second": 36.611, "eval_steps_per_second": 0.574, "eval_wer": 0.41887349021379505, "step": 3600 }, { "epoch": 1.1991164405175134, "eval_loss": 0.5297770500183105, "eval_runtime": 191.4083, "eval_samples_per_second": 36.686, "eval_steps_per_second": 0.575, "eval_wer": 0.4070875244156845, "step": 3800 }, { "epoch": 1.2622278321236984, "grad_norm": 1.4278947114944458, "learning_rate": 0.00011183333333333332, "loss": 0.5103, "step": 4000 }, { "epoch": 1.2622278321236984, "eval_loss": 0.5384453535079956, "eval_runtime": 195.1408, "eval_samples_per_second": 35.984, "eval_steps_per_second": 0.564, "eval_wer": 0.4025033550804555, "step": 4000 }, { "epoch": 1.325339223729883, "eval_loss": 0.531086266040802, "eval_runtime": 192.0473, "eval_samples_per_second": 36.564, "eval_steps_per_second": 0.573, "eval_wer": 0.39976614092666657, "step": 4200 }, { "epoch": 1.388450615336068, "eval_loss": 0.5395579934120178, "eval_runtime": 191.3816, "eval_samples_per_second": 36.691, "eval_steps_per_second": 0.575, "eval_wer": 0.4039383993940924, "step": 4400 }, { "epoch": 1.4200063111391605, "grad_norm": 2.4821906089782715, "learning_rate": 8.411111111111111e-05, "loss": 0.5194, "step": 4500 }, { "epoch": 1.451562006942253, "eval_loss": 0.5501742959022522, "eval_runtime": 190.6511, "eval_samples_per_second": 36.832, "eval_steps_per_second": 0.577, "eval_wer": 0.4048818081558352, "step": 4600 }, { "epoch": 1.514673398548438, "eval_loss": 0.6632032990455627, "eval_runtime": 193.4271, "eval_samples_per_second": 36.303, "eval_steps_per_second": 0.569, "eval_wer": 0.4365059328452411, "step": 4800 }, { "epoch": 1.577784790154623, "grad_norm": 11.192009925842285, "learning_rate": 5.649999999999999e-05, "loss": 0.6034, "step": 5000 }, { "epoch": 1.577784790154623, "eval_loss": 0.7074605226516724, "eval_runtime": 191.8555, "eval_samples_per_second": 36.6, "eval_steps_per_second": 0.573, "eval_wer": 0.4368115441342564, "step": 5000 }, { "epoch": 1.6408961817608079, "eval_loss": 0.7465850710868835, "eval_runtime": 191.8575, "eval_samples_per_second": 36.6, "eval_steps_per_second": 0.573, "eval_wer": 0.44187406157403103, "step": 5200 }, { "epoch": 1.7040075733669928, "eval_loss": 0.7624653577804565, "eval_runtime": 189.4967, "eval_samples_per_second": 37.056, "eval_steps_per_second": 0.58, "eval_wer": 0.449819955088428, "step": 5400 }, { "epoch": 1.7355632691700853, "grad_norm": 17.51552963256836, "learning_rate": 2.8777777777777776e-05, "loss": 0.74, "step": 5500 }, { "epoch": 1.7671189649731778, "eval_loss": 0.7502115368843079, "eval_runtime": 192.6564, "eval_samples_per_second": 36.448, "eval_steps_per_second": 0.571, "eval_wer": 0.4474813643550937, "step": 5600 }, { "epoch": 1.8302303565793627, "eval_loss": 0.7739897966384888, "eval_runtime": 189.3036, "eval_samples_per_second": 37.094, "eval_steps_per_second": 0.581, "eval_wer": 0.45959951633691654, "step": 5800 }, { "epoch": 1.8933417481855475, "grad_norm": 10.696316719055176, "learning_rate": 1e-06, "loss": 0.7844, "step": 6000 }, { "epoch": 1.8933417481855475, "eval_loss": 0.7821305990219116, "eval_runtime": 189.7595, "eval_samples_per_second": 37.005, "eval_steps_per_second": 0.58, "eval_wer": 0.45755324944524906, "step": 6000 }, { "epoch": 1.8933417481855475, "step": 6000, "total_flos": 1.3297699695003722e+19, "train_loss": 0.9828314208984374, "train_runtime": 9865.427, "train_samples_per_second": 9.731, "train_steps_per_second": 0.608 } ], "logging_steps": 500, "max_steps": 6000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.3297699695003722e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }