{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0864197530864197, "eval_steps": 200, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.30864197530864196, "eval_loss": Infinity, "eval_runtime": 198.0112, "eval_samples_per_second": 35.463, "eval_steps_per_second": 4.434, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.6172839506172839, "eval_loss": Infinity, "eval_runtime": 196.4137, "eval_samples_per_second": 35.751, "eval_steps_per_second": 4.47, "eval_wer": 0.9468401438718138, "step": 400 }, { "epoch": 0.7716049382716049, "grad_norm": 2.496718645095825, "learning_rate": 0.0002958, "loss": 3.8579, "step": 500 }, { "epoch": 0.9259259259259259, "eval_loss": Infinity, "eval_runtime": 194.7508, "eval_samples_per_second": 36.056, "eval_steps_per_second": 4.508, "eval_wer": 0.682339345305338, "step": 600 }, { "epoch": 1.2345679012345678, "eval_loss": Infinity, "eval_runtime": 195.8415, "eval_samples_per_second": 35.856, "eval_steps_per_second": 4.483, "eval_wer": 0.5246257125420384, "step": 800 }, { "epoch": 1.5432098765432098, "grad_norm": 0.836155354976654, "learning_rate": 0.0002016, "loss": 0.7662, "step": 1000 }, { "epoch": 1.5432098765432098, "eval_loss": Infinity, "eval_runtime": 194.2625, "eval_samples_per_second": 36.147, "eval_steps_per_second": 4.52, "eval_wer": 0.45603988936932727, "step": 1000 }, { "epoch": 1.8518518518518519, "eval_loss": Infinity, "eval_runtime": 195.3379, "eval_samples_per_second": 35.948, "eval_steps_per_second": 4.495, "eval_wer": 0.4314076844169166, "step": 1200 }, { "epoch": 2.1604938271604937, "eval_loss": Infinity, "eval_runtime": 194.6062, "eval_samples_per_second": 36.083, "eval_steps_per_second": 4.512, "eval_wer": 0.43706906626154024, "step": 1400 }, { "epoch": 2.314814814814815, "grad_norm": 0.4859907329082489, "learning_rate": 0.00010239999999999998, "loss": 0.5916, "step": 1500 }, { "epoch": 2.4691358024691357, "eval_loss": Infinity, "eval_runtime": 194.4662, "eval_samples_per_second": 36.109, "eval_steps_per_second": 4.515, "eval_wer": 0.38610364483918297, "step": 1600 }, { "epoch": 2.7777777777777777, "eval_loss": Infinity, "eval_runtime": 194.3135, "eval_samples_per_second": 36.137, "eval_steps_per_second": 4.518, "eval_wer": 0.36491241738407804, "step": 1800 }, { "epoch": 3.0864197530864197, "grad_norm": 0.7149534821510315, "learning_rate": 2.9999999999999997e-06, "loss": 0.4977, "step": 2000 }, { "epoch": 3.0864197530864197, "eval_loss": Infinity, "eval_runtime": 195.0164, "eval_samples_per_second": 36.007, "eval_steps_per_second": 4.502, "eval_wer": 0.36645761105267943, "step": 2000 }, { "epoch": 3.0864197530864197, "step": 2000, "total_flos": 1.6964840215738495e+19, "train_loss": 1.4283542098999022, "train_runtime": 6370.4271, "train_samples_per_second": 20.093, "train_steps_per_second": 0.314 } ], "logging_steps": 500, "max_steps": 2000, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.6964840215738495e+19, "train_batch_size": 64, "trial_name": null, "trial_params": null }