{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0161290322580645, "eval_steps": 200, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06720430107526881, "eval_loss": 3.080493211746216, "eval_runtime": 198.5178, "eval_samples_per_second": 35.372, "eval_steps_per_second": 0.554, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.13440860215053763, "eval_loss": 2.92264461517334, "eval_runtime": 201.3326, "eval_samples_per_second": 34.878, "eval_steps_per_second": 0.546, "eval_wer": 1.0, "step": 400 }, { "epoch": 0.16801075268817203, "grad_norm": 5.131893634796143, "learning_rate": 0.00024599999999999996, "loss": 4.4646, "step": 500 }, { "epoch": 0.20161290322580644, "eval_loss": 1.3392444849014282, "eval_runtime": 198.1589, "eval_samples_per_second": 35.436, "eval_steps_per_second": 0.555, "eval_wer": 0.8646141989662366, "step": 600 }, { "epoch": 0.26881720430107525, "eval_loss": 1.0115001201629639, "eval_runtime": 200.6717, "eval_samples_per_second": 34.992, "eval_steps_per_second": 0.548, "eval_wer": 0.7168179221089837, "step": 800 }, { "epoch": 0.33602150537634407, "grad_norm": 6.313941478729248, "learning_rate": 0.0002782777777777778, "loss": 1.1042, "step": 1000 }, { "epoch": 0.33602150537634407, "eval_loss": 0.9008844494819641, "eval_runtime": 199.6418, "eval_samples_per_second": 35.173, "eval_steps_per_second": 0.551, "eval_wer": 0.6766765436691957, "step": 1000 }, { "epoch": 0.4032258064516129, "eval_loss": 0.8225907683372498, "eval_runtime": 199.7483, "eval_samples_per_second": 35.154, "eval_steps_per_second": 0.551, "eval_wer": 0.5947727182131041, "step": 1200 }, { "epoch": 0.47043010752688175, "eval_loss": 0.8068214058876038, "eval_runtime": 197.9234, "eval_samples_per_second": 35.478, "eval_steps_per_second": 0.556, "eval_wer": 0.5977358189718174, "step": 1400 }, { "epoch": 0.5040322580645161, "grad_norm": 8.026273727416992, "learning_rate": 0.0002505555555555555, "loss": 0.8421, "step": 1500 }, { "epoch": 0.5376344086021505, "eval_loss": 0.7186344861984253, "eval_runtime": 197.8905, "eval_samples_per_second": 35.484, "eval_steps_per_second": 0.556, "eval_wer": 0.5375171075884612, "step": 1600 }, { "epoch": 0.6048387096774194, "eval_loss": 0.7200700640678406, "eval_runtime": 197.0513, "eval_samples_per_second": 35.635, "eval_steps_per_second": 0.558, "eval_wer": 0.5560796715343016, "step": 1800 }, { "epoch": 0.6720430107526881, "grad_norm": 4.496196746826172, "learning_rate": 0.00022283333333333332, "loss": 0.7637, "step": 2000 }, { "epoch": 0.6720430107526881, "eval_loss": 0.6747044920921326, "eval_runtime": 198.0259, "eval_samples_per_second": 35.46, "eval_steps_per_second": 0.555, "eval_wer": 0.5086966342895867, "step": 2000 }, { "epoch": 0.739247311827957, "eval_loss": 0.6709386706352234, "eval_runtime": 199.0402, "eval_samples_per_second": 35.279, "eval_steps_per_second": 0.553, "eval_wer": 0.5050558737161004, "step": 2200 }, { "epoch": 0.8064516129032258, "eval_loss": 0.6484177708625793, "eval_runtime": 193.7002, "eval_samples_per_second": 36.252, "eval_steps_per_second": 0.568, "eval_wer": 0.48504497800927465, "step": 2400 }, { "epoch": 0.8400537634408602, "grad_norm": 4.925416469573975, "learning_rate": 0.00019516666666666665, "loss": 0.7134, "step": 2500 }, { "epoch": 0.8736559139784946, "eval_loss": 0.6387702226638794, "eval_runtime": 191.5475, "eval_samples_per_second": 36.659, "eval_steps_per_second": 0.574, "eval_wer": 0.4915159648679892, "step": 2600 }, { "epoch": 0.9408602150537635, "eval_loss": 0.627938985824585, "eval_runtime": 192.2841, "eval_samples_per_second": 36.519, "eval_steps_per_second": 0.572, "eval_wer": 0.4705217980573752, "step": 2800 }, { "epoch": 1.0080645161290323, "grad_norm": 2.991081714630127, "learning_rate": 0.00016738888888888888, "loss": 0.6968, "step": 3000 }, { "epoch": 1.0080645161290323, "eval_loss": 0.6451985836029053, "eval_runtime": 192.2109, "eval_samples_per_second": 36.533, "eval_steps_per_second": 0.572, "eval_wer": 0.4833441847486679, "step": 3000 }, { "epoch": 1.075268817204301, "eval_loss": 0.6439228057861328, "eval_runtime": 191.7283, "eval_samples_per_second": 36.625, "eval_steps_per_second": 0.574, "eval_wer": 0.4896955845812461, "step": 3200 }, { "epoch": 1.14247311827957, "eval_loss": 0.6749800443649292, "eval_runtime": 191.0444, "eval_samples_per_second": 36.756, "eval_steps_per_second": 0.576, "eval_wer": 0.5010164897221595, "step": 3400 }, { "epoch": 1.1760752688172043, "grad_norm": 17.283771514892578, "learning_rate": 0.00013977777777777776, "loss": 0.667, "step": 3500 }, { "epoch": 1.2096774193548387, "eval_loss": 0.7904173135757446, "eval_runtime": 191.7132, "eval_samples_per_second": 36.628, "eval_steps_per_second": 0.574, "eval_wer": 0.5591756467664997, "step": 3600 }, { "epoch": 1.2768817204301075, "eval_loss": 0.9479840993881226, "eval_runtime": 192.8834, "eval_samples_per_second": 36.405, "eval_steps_per_second": 0.57, "eval_wer": 0.5800502265509773, "step": 3800 }, { "epoch": 1.3440860215053765, "grad_norm": 26.34053611755371, "learning_rate": 0.0001121111111111111, "loss": 0.9326, "step": 4000 }, { "epoch": 1.3440860215053765, "eval_loss": 1.1457170248031616, "eval_runtime": 189.9612, "eval_samples_per_second": 36.965, "eval_steps_per_second": 0.579, "eval_wer": 0.6581804169600978, "step": 4000 }, { "epoch": 1.4112903225806452, "eval_loss": 1.118348479270935, "eval_runtime": 190.0092, "eval_samples_per_second": 36.956, "eval_steps_per_second": 0.579, "eval_wer": 0.6742183659097251, "step": 4200 }, { "epoch": 1.478494623655914, "eval_loss": 1.3061094284057617, "eval_runtime": 190.4579, "eval_samples_per_second": 36.869, "eval_steps_per_second": 0.578, "eval_wer": 0.78073054385522, "step": 4400 }, { "epoch": 1.5120967741935485, "grad_norm": 15.129014015197754, "learning_rate": 8.433333333333331e-05, "loss": 1.2308, "step": 4500 }, { "epoch": 1.5456989247311828, "eval_loss": 1.2298626899719238, "eval_runtime": 190.6991, "eval_samples_per_second": 36.822, "eval_steps_per_second": 0.577, "eval_wer": 0.7672304973491543, "step": 4600 }, { "epoch": 1.6129032258064515, "eval_loss": 1.178423523902893, "eval_runtime": 190.1804, "eval_samples_per_second": 36.923, "eval_steps_per_second": 0.578, "eval_wer": 0.7516974713987696, "step": 4800 }, { "epoch": 1.6801075268817205, "grad_norm": 3.6261146068573, "learning_rate": 5.6555555555555555e-05, "loss": 1.2835, "step": 5000 }, { "epoch": 1.6801075268817205, "eval_loss": 1.339849829673767, "eval_runtime": 189.6315, "eval_samples_per_second": 37.03, "eval_steps_per_second": 0.58, "eval_wer": 0.7625267409877888, "step": 5000 }, { "epoch": 1.7473118279569892, "eval_loss": 1.5754368305206299, "eval_runtime": 190.0111, "eval_samples_per_second": 36.956, "eval_steps_per_second": 0.579, "eval_wer": 0.895573951288218, "step": 5200 }, { "epoch": 1.814516129032258, "eval_loss": 1.728023886680603, "eval_runtime": 190.3066, "eval_samples_per_second": 36.898, "eval_steps_per_second": 0.578, "eval_wer": 0.9705550166757464, "step": 5400 }, { "epoch": 1.8481182795698925, "grad_norm": 4.430452346801758, "learning_rate": 2.8777777777777776e-05, "loss": 1.6466, "step": 5500 }, { "epoch": 1.881720430107527, "eval_loss": 1.7603241205215454, "eval_runtime": 190.0315, "eval_samples_per_second": 36.952, "eval_steps_per_second": 0.579, "eval_wer": 0.9727208705935503, "step": 5600 }, { "epoch": 1.9489247311827957, "eval_loss": 1.8200371265411377, "eval_runtime": 190.0189, "eval_samples_per_second": 36.954, "eval_steps_per_second": 0.579, "eval_wer": 0.9943661223242403, "step": 5800 }, { "epoch": 2.0161290322580645, "grad_norm": 8.303234100341797, "learning_rate": 1.0555555555555555e-06, "loss": 1.8511, "step": 6000 }, { "epoch": 2.0161290322580645, "eval_loss": 1.8412970304489136, "eval_runtime": 189.9189, "eval_samples_per_second": 36.974, "eval_steps_per_second": 0.579, "eval_wer": 0.9956417172696953, "step": 6000 }, { "epoch": 2.0161290322580645, "step": 6000, "total_flos": 1.3379687773166643e+19, "train_loss": 1.3496905517578126, "train_runtime": 9965.7858, "train_samples_per_second": 9.633, "train_steps_per_second": 0.602 } ], "logging_steps": 500, "max_steps": 6000, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.3379687773166643e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }