{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0080645161290323, "eval_steps": 200, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06720430107526881, "eval_loss": 2.9829530715942383, "eval_runtime": 176.5397, "eval_samples_per_second": 39.776, "eval_steps_per_second": 2.487, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.13440860215053763, "eval_loss": 1.5613375902175903, "eval_runtime": 174.5758, "eval_samples_per_second": 40.223, "eval_steps_per_second": 2.515, "eval_wer": 0.9657582481829415, "step": 400 }, { "epoch": 0.16801075268817203, "grad_norm": 2.138428211212158, "learning_rate": 0.00027833333333333334, "loss": 3.6118, "step": 500 }, { "epoch": 0.20161290322580644, "eval_loss": 1.0701438188552856, "eval_runtime": 174.6767, "eval_samples_per_second": 40.2, "eval_steps_per_second": 2.513, "eval_wer": 0.7648786191684716, "step": 600 }, { "epoch": 0.26881720430107525, "eval_loss": 0.8867517113685608, "eval_runtime": 174.8944, "eval_samples_per_second": 40.15, "eval_steps_per_second": 2.51, "eval_wer": 0.6947474720631419, "step": 800 }, { "epoch": 0.33602150537634407, "grad_norm": 2.852606773376465, "learning_rate": 0.00022288888888888887, "loss": 0.9333, "step": 1000 }, { "epoch": 0.33602150537634407, "eval_loss": 0.7679557204246521, "eval_runtime": 175.5538, "eval_samples_per_second": 39.999, "eval_steps_per_second": 2.501, "eval_wer": 0.6070503195631087, "step": 1000 }, { "epoch": 0.4032258064516129, "eval_loss": 0.7223904132843018, "eval_runtime": 175.6961, "eval_samples_per_second": 39.967, "eval_steps_per_second": 2.499, "eval_wer": 0.5453965638661157, "step": 1200 }, { "epoch": 0.47043010752688175, "eval_loss": 0.6732765436172485, "eval_runtime": 175.6064, "eval_samples_per_second": 39.987, "eval_steps_per_second": 2.5, "eval_wer": 0.5121646580475425, "step": 1400 }, { "epoch": 0.5040322580645161, "grad_norm": 4.027652740478516, "learning_rate": 0.00016744444444444443, "loss": 0.7446, "step": 1500 }, { "epoch": 0.5376344086021505, "eval_loss": 0.6437448859214783, "eval_runtime": 175.5304, "eval_samples_per_second": 40.004, "eval_steps_per_second": 2.501, "eval_wer": 0.4966449195445063, "step": 1600 }, { "epoch": 0.6048387096774194, "eval_loss": 0.6063565015792847, "eval_runtime": 175.877, "eval_samples_per_second": 39.926, "eval_steps_per_second": 2.496, "eval_wer": 0.4774312706785899, "step": 1800 }, { "epoch": 0.6720430107526881, "grad_norm": 3.676745653152466, "learning_rate": 0.000112, "loss": 0.6579, "step": 2000 }, { "epoch": 0.6720430107526881, "eval_loss": 0.5673760771751404, "eval_runtime": 176.4608, "eval_samples_per_second": 39.794, "eval_steps_per_second": 2.488, "eval_wer": 0.44613933217289625, "step": 2000 }, { "epoch": 0.739247311827957, "eval_loss": 0.5556111931800842, "eval_runtime": 175.9454, "eval_samples_per_second": 39.91, "eval_steps_per_second": 2.495, "eval_wer": 0.4325196986406941, "step": 2200 }, { "epoch": 0.8064516129032258, "eval_loss": 0.5264282822608948, "eval_runtime": 176.4778, "eval_samples_per_second": 39.79, "eval_steps_per_second": 2.488, "eval_wer": 0.4180363810308402, "step": 2400 }, { "epoch": 0.8400537634408602, "grad_norm": 2.7548441886901855, "learning_rate": 5.666666666666666e-05, "loss": 0.5823, "step": 2500 }, { "epoch": 0.8736559139784946, "eval_loss": 0.5129852890968323, "eval_runtime": 176.2054, "eval_samples_per_second": 39.851, "eval_steps_per_second": 2.491, "eval_wer": 0.4022110312387887, "step": 2600 }, { "epoch": 0.9408602150537635, "eval_loss": 0.49821802973747253, "eval_runtime": 176.2887, "eval_samples_per_second": 39.832, "eval_steps_per_second": 2.49, "eval_wer": 0.39360076535696725, "step": 2800 }, { "epoch": 1.0080645161290323, "grad_norm": 0.7031016945838928, "learning_rate": 1.111111111111111e-06, "loss": 0.5426, "step": 3000 }, { "epoch": 1.0080645161290323, "eval_loss": 0.4942198395729065, "eval_runtime": 177.2018, "eval_samples_per_second": 39.627, "eval_steps_per_second": 2.477, "eval_wer": 0.3917272352808302, "step": 3000 }, { "epoch": 1.0080645161290323, "step": 3000, "total_flos": 6.709869296482936e+18, "train_loss": 1.1787635701497396, "train_runtime": 4443.9077, "train_samples_per_second": 10.801, "train_steps_per_second": 0.675 } ], "logging_steps": 500, "max_steps": 3000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.709869296482936e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }