{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0080645161290323, "eval_steps": 200, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06720430107526881, "eval_loss": 3.1445324420928955, "eval_runtime": 187.2734, "eval_samples_per_second": 37.496, "eval_steps_per_second": 2.344, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.13440860215053763, "eval_loss": 2.7407212257385254, "eval_runtime": 185.5858, "eval_samples_per_second": 37.837, "eval_steps_per_second": 2.365, "eval_wer": 0.9999867125526515, "step": 400 }, { "epoch": 0.16801075268817203, "grad_norm": 8.384466171264648, "learning_rate": 0.00027866666666666665, "loss": 4.0188, "step": 500 }, { "epoch": 0.20161290322580644, "eval_loss": 1.2700377702713013, "eval_runtime": 187.6926, "eval_samples_per_second": 37.412, "eval_steps_per_second": 2.339, "eval_wer": 0.8484433755431244, "step": 600 }, { "epoch": 0.26881720430107525, "eval_loss": 0.9953192472457886, "eval_runtime": 187.642, "eval_samples_per_second": 37.422, "eval_steps_per_second": 2.34, "eval_wer": 0.7435389787267968, "step": 800 }, { "epoch": 0.33602150537634407, "grad_norm": 5.077725410461426, "learning_rate": 0.0002232222222222222, "loss": 1.0707, "step": 1000 }, { "epoch": 0.33602150537634407, "eval_loss": 0.8646696209907532, "eval_runtime": 187.682, "eval_samples_per_second": 37.414, "eval_steps_per_second": 2.339, "eval_wer": 0.6541277455188084, "step": 1000 }, { "epoch": 0.4032258064516129, "eval_loss": 0.7888585925102234, "eval_runtime": 187.1558, "eval_samples_per_second": 37.52, "eval_steps_per_second": 2.346, "eval_wer": 0.5784025830797646, "step": 1200 }, { "epoch": 0.47043010752688175, "eval_loss": 0.7465152740478516, "eval_runtime": 185.9206, "eval_samples_per_second": 37.769, "eval_steps_per_second": 2.361, "eval_wer": 0.5440412442365697, "step": 1400 }, { "epoch": 0.5040322580645161, "grad_norm": 7.621553897857666, "learning_rate": 0.00016777777777777776, "loss": 0.8175, "step": 1500 }, { "epoch": 0.5376344086021505, "eval_loss": 0.68277907371521, "eval_runtime": 187.6331, "eval_samples_per_second": 37.424, "eval_steps_per_second": 2.34, "eval_wer": 0.5042453394278426, "step": 1600 }, { "epoch": 0.6048387096774194, "eval_loss": 0.6549283862113953, "eval_runtime": 186.6705, "eval_samples_per_second": 37.617, "eval_steps_per_second": 2.352, "eval_wer": 0.4952098752308694, "step": 1800 }, { "epoch": 0.6720430107526881, "grad_norm": 6.983826637268066, "learning_rate": 0.00011233333333333333, "loss": 0.7148, "step": 2000 }, { "epoch": 0.6720430107526881, "eval_loss": 0.6289859414100647, "eval_runtime": 188.3084, "eval_samples_per_second": 37.29, "eval_steps_per_second": 2.331, "eval_wer": 0.4905858435535949, "step": 2000 }, { "epoch": 0.739247311827957, "eval_loss": 0.6112708449363708, "eval_runtime": 188.0636, "eval_samples_per_second": 37.338, "eval_steps_per_second": 2.334, "eval_wer": 0.45763297412934, "step": 2200 }, { "epoch": 0.8064516129032258, "eval_loss": 0.5718730688095093, "eval_runtime": 187.8424, "eval_samples_per_second": 37.382, "eval_steps_per_second": 2.337, "eval_wer": 0.4404788796024396, "step": 2400 }, { "epoch": 0.8400537634408602, "grad_norm": 6.760587215423584, "learning_rate": 5.6999999999999996e-05, "loss": 0.6374, "step": 2500 }, { "epoch": 0.8736559139784946, "eval_loss": 0.5643858313560486, "eval_runtime": 188.0058, "eval_samples_per_second": 37.35, "eval_steps_per_second": 2.335, "eval_wer": 0.431376978168724, "step": 2600 }, { "epoch": 0.9408602150537635, "eval_loss": 0.5482733249664307, "eval_runtime": 188.1104, "eval_samples_per_second": 37.329, "eval_steps_per_second": 2.334, "eval_wer": 0.41896650234523447, "step": 2800 }, { "epoch": 1.0080645161290323, "grad_norm": 1.7233390808105469, "learning_rate": 1.4444444444444445e-06, "loss": 0.6013, "step": 3000 }, { "epoch": 1.0080645161290323, "eval_loss": 0.5417820811271667, "eval_runtime": 189.023, "eval_samples_per_second": 37.149, "eval_steps_per_second": 2.322, "eval_wer": 0.4178370693206128, "step": 3000 }, { "epoch": 1.0080645161290323, "step": 3000, "total_flos": 6.709869296482936e+18, "train_loss": 1.3100875549316406, "train_runtime": 4950.9591, "train_samples_per_second": 9.695, "train_steps_per_second": 0.606 } ], "logging_steps": 500, "max_steps": 3000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.709869296482936e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }