{ "best_metric": null, "best_model_checkpoint": null, "epoch": 6.191950464396285, "eval_steps": 100, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.30959752321981426, "eval_loss": 3.562331438064575, "eval_runtime": 151.987, "eval_samples_per_second": 37.214, "eval_steps_per_second": 4.652, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.6191950464396285, "eval_loss": 3.1175434589385986, "eval_runtime": 149.1497, "eval_samples_per_second": 37.922, "eval_steps_per_second": 4.74, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.9287925696594427, "eval_loss": 3.293273687362671, "eval_runtime": 149.8183, "eval_samples_per_second": 37.752, "eval_steps_per_second": 4.719, "eval_wer": 1.0, "step": 300 }, { "epoch": 1.238390092879257, "eval_loss": 0.9764726161956787, "eval_runtime": 149.5566, "eval_samples_per_second": 37.818, "eval_steps_per_second": 4.727, "eval_wer": 0.6866203399078814, "step": 400 }, { "epoch": 1.5479876160990713, "grad_norm": 0.48750218749046326, "learning_rate": 0.00029519999999999997, "loss": 3.7398, "step": 500 }, { "epoch": 1.5479876160990713, "eval_loss": 0.6787042617797852, "eval_runtime": 150.878, "eval_samples_per_second": 37.487, "eval_steps_per_second": 4.686, "eval_wer": 0.492176341255958, "step": 500 }, { "epoch": 1.8575851393188856, "eval_loss": 0.6015087366104126, "eval_runtime": 150.6376, "eval_samples_per_second": 37.547, "eval_steps_per_second": 4.693, "eval_wer": 0.43465840702283703, "step": 600 }, { "epoch": 2.1671826625387, "eval_loss": 0.5698955059051514, "eval_runtime": 150.4703, "eval_samples_per_second": 37.589, "eval_steps_per_second": 4.699, "eval_wer": 0.42732422846688384, "step": 700 }, { "epoch": 2.476780185758514, "eval_loss": 0.5419376492500305, "eval_runtime": 150.5345, "eval_samples_per_second": 37.573, "eval_steps_per_second": 4.697, "eval_wer": 0.39578886552936077, "step": 800 }, { "epoch": 2.7863777089783284, "eval_loss": 0.49708282947540283, "eval_runtime": 150.3649, "eval_samples_per_second": 37.615, "eval_steps_per_second": 4.702, "eval_wer": 0.3729839033236507, "step": 900 }, { "epoch": 3.0959752321981426, "grad_norm": 0.4975910186767578, "learning_rate": 0.0002024, "loss": 0.5228, "step": 1000 }, { "epoch": 3.0959752321981426, "eval_loss": 0.49601131677627563, "eval_runtime": 150.6812, "eval_samples_per_second": 37.536, "eval_steps_per_second": 4.692, "eval_wer": 0.354303413522492, "step": 1000 }, { "epoch": 3.405572755417957, "eval_loss": 0.6976510286331177, "eval_runtime": 149.5565, "eval_samples_per_second": 37.818, "eval_steps_per_second": 4.727, "eval_wer": 0.4655036831378087, "step": 1100 }, { "epoch": 3.715170278637771, "eval_loss": 0.799861490726471, "eval_runtime": 151.2369, "eval_samples_per_second": 37.398, "eval_steps_per_second": 4.675, "eval_wer": 0.5316396783874436, "step": 1200 }, { "epoch": 4.024767801857585, "eval_loss": 1.0526387691497803, "eval_runtime": 150.3745, "eval_samples_per_second": 37.613, "eval_steps_per_second": 4.702, "eval_wer": 0.6214633050344242, "step": 1300 }, { "epoch": 4.3343653250774, "eval_loss": 1.1285134553909302, "eval_runtime": 150.4438, "eval_samples_per_second": 37.595, "eval_steps_per_second": 4.699, "eval_wer": 0.7571857296464509, "step": 1400 }, { "epoch": 4.643962848297214, "grad_norm": 3.2222559452056885, "learning_rate": 0.00010359999999999998, "loss": 0.9047, "step": 1500 }, { "epoch": 4.643962848297214, "eval_loss": 1.1592659950256348, "eval_runtime": 150.3382, "eval_samples_per_second": 37.622, "eval_steps_per_second": 4.703, "eval_wer": 0.7047070340710307, "step": 1500 }, { "epoch": 4.953560371517028, "eval_loss": 2.0400760173797607, "eval_runtime": 150.5385, "eval_samples_per_second": 37.572, "eval_steps_per_second": 4.696, "eval_wer": 0.966747444271477, "step": 1600 }, { "epoch": 5.2631578947368425, "eval_loss": 1.6264142990112305, "eval_runtime": 151.1274, "eval_samples_per_second": 37.425, "eval_steps_per_second": 4.678, "eval_wer": 0.8680489801158704, "step": 1700 }, { "epoch": 5.572755417956657, "eval_loss": 1.5916314125061035, "eval_runtime": 151.1725, "eval_samples_per_second": 37.414, "eval_steps_per_second": 4.677, "eval_wer": 0.8627369164353004, "step": 1800 }, { "epoch": 5.882352941176471, "eval_loss": 1.5764083862304688, "eval_runtime": 150.5972, "eval_samples_per_second": 37.557, "eval_steps_per_second": 4.695, "eval_wer": 0.8720129672128517, "step": 1900 }, { "epoch": 6.191950464396285, "grad_norm": 0.7321383953094482, "learning_rate": 4.399999999999999e-06, "loss": 1.6715, "step": 2000 }, { "epoch": 6.191950464396285, "eval_loss": 1.5562535524368286, "eval_runtime": 151.1236, "eval_samples_per_second": 37.426, "eval_steps_per_second": 4.678, "eval_wer": 0.8702315802988236, "step": 2000 }, { "epoch": 6.191950464396285, "step": 2000, "total_flos": 3.641759707440775e+19, "train_loss": 1.7097147216796875, "train_runtime": 12239.4176, "train_samples_per_second": 20.916, "train_steps_per_second": 0.163 } ], "logging_steps": 500, "max_steps": 2000, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 400, "total_flos": 3.641759707440775e+19, "train_batch_size": 128, "trial_name": null, "trial_params": null }