{ "best_metric": null, "best_model_checkpoint": null, "epoch": 6.172839506172839, "eval_steps": 200, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.30864197530864196, "eval_loss": Infinity, "eval_runtime": 176.8693, "eval_samples_per_second": 39.702, "eval_steps_per_second": 4.964, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.6172839506172839, "eval_loss": Infinity, "eval_runtime": 175.9503, "eval_samples_per_second": 39.909, "eval_steps_per_second": 4.99, "eval_wer": 0.8138755794476257, "step": 400 }, { "epoch": 0.7716049382716049, "grad_norm": 2.980449914932251, "learning_rate": 0.0002958, "loss": 3.6854, "step": 500 }, { "epoch": 0.9259259259259259, "eval_loss": Infinity, "eval_runtime": 174.7168, "eval_samples_per_second": 40.191, "eval_steps_per_second": 5.025, "eval_wer": 0.6042746029891057, "step": 600 }, { "epoch": 1.2345679012345678, "eval_loss": Infinity, "eval_runtime": 176.3896, "eval_samples_per_second": 39.81, "eval_steps_per_second": 4.978, "eval_wer": 0.503447469907678, "step": 800 }, { "epoch": 1.5432098765432098, "grad_norm": 0.7878520488739014, "learning_rate": 0.0002578285714285714, "loss": 0.7236, "step": 1000 }, { "epoch": 1.5432098765432098, "eval_loss": Infinity, "eval_runtime": 175.5785, "eval_samples_per_second": 39.994, "eval_steps_per_second": 5.001, "eval_wer": 0.44518457922688376, "step": 1000 }, { "epoch": 1.8518518518518519, "eval_loss": Infinity, "eval_runtime": 175.3263, "eval_samples_per_second": 40.051, "eval_steps_per_second": 5.008, "eval_wer": 0.4584940204900471, "step": 1200 }, { "epoch": 2.1604938271604937, "eval_loss": Infinity, "eval_runtime": 174.0761, "eval_samples_per_second": 40.339, "eval_steps_per_second": 5.044, "eval_wer": 0.4735564125537247, "step": 1400 }, { "epoch": 2.314814814814815, "grad_norm": 1.6092010736465454, "learning_rate": 0.0002153142857142857, "loss": 0.6244, "step": 1500 }, { "epoch": 2.4691358024691357, "eval_loss": Infinity, "eval_runtime": 174.8676, "eval_samples_per_second": 40.156, "eval_steps_per_second": 5.021, "eval_wer": 0.4445742926518899, "step": 1600 }, { "epoch": 2.7777777777777777, "eval_loss": Infinity, "eval_runtime": 174.4183, "eval_samples_per_second": 40.26, "eval_steps_per_second": 5.034, "eval_wer": 0.4992274031656993, "step": 1800 }, { "epoch": 3.0864197530864197, "grad_norm": 13.045821189880371, "learning_rate": 0.0001727142857142857, "loss": 0.8045, "step": 2000 }, { "epoch": 3.0864197530864197, "eval_loss": Infinity, "eval_runtime": 176.0054, "eval_samples_per_second": 39.897, "eval_steps_per_second": 4.988, "eval_wer": 0.699323490839209, "step": 2000 }, { "epoch": 3.3950617283950617, "eval_loss": Infinity, "eval_runtime": 175.9958, "eval_samples_per_second": 39.899, "eval_steps_per_second": 4.989, "eval_wer": 0.9154558321322375, "step": 2200 }, { "epoch": 3.7037037037037037, "eval_loss": Infinity, "eval_runtime": 175.4175, "eval_samples_per_second": 40.03, "eval_steps_per_second": 5.005, "eval_wer": 1.0, "step": 2400 }, { "epoch": 3.8580246913580245, "grad_norm": 1.601144552230835, "learning_rate": 0.0001302, "loss": 2.3067, "step": 2500 }, { "epoch": 4.012345679012346, "eval_loss": Infinity, "eval_runtime": 175.9931, "eval_samples_per_second": 39.899, "eval_steps_per_second": 4.989, "eval_wer": 0.998558684897355, "step": 2600 }, { "epoch": 4.320987654320987, "eval_loss": Infinity, "eval_runtime": 175.0274, "eval_samples_per_second": 40.119, "eval_steps_per_second": 5.016, "eval_wer": 1.0, "step": 2800 }, { "epoch": 4.62962962962963, "grad_norm": 0.7471032738685608, "learning_rate": 8.759999999999999e-05, "loss": 3.008, "step": 3000 }, { "epoch": 4.62962962962963, "eval_loss": Infinity, "eval_runtime": 175.5031, "eval_samples_per_second": 40.011, "eval_steps_per_second": 5.003, "eval_wer": 1.0, "step": 3000 }, { "epoch": 4.938271604938271, "eval_loss": Infinity, "eval_runtime": 175.2017, "eval_samples_per_second": 40.08, "eval_steps_per_second": 5.011, "eval_wer": 1.0, "step": 3200 }, { "epoch": 5.246913580246914, "eval_loss": Infinity, "eval_runtime": 176.0971, "eval_samples_per_second": 39.876, "eval_steps_per_second": 4.986, "eval_wer": 1.0, "step": 3400 }, { "epoch": 5.401234567901234, "grad_norm": 1.3811966180801392, "learning_rate": 4.4999999999999996e-05, "loss": 3.032, "step": 3500 }, { "epoch": 5.555555555555555, "eval_loss": Infinity, "eval_runtime": 176.4078, "eval_samples_per_second": 39.805, "eval_steps_per_second": 4.977, "eval_wer": 1.0, "step": 3600 }, { "epoch": 5.864197530864198, "eval_loss": Infinity, "eval_runtime": 175.4996, "eval_samples_per_second": 40.011, "eval_steps_per_second": 5.003, "eval_wer": 1.0, "step": 3800 }, { "epoch": 6.172839506172839, "grad_norm": 0.011152578517794609, "learning_rate": 2.314285714285714e-06, "loss": 3.0173, "step": 4000 }, { "epoch": 6.172839506172839, "eval_loss": Infinity, "eval_runtime": 175.8629, "eval_samples_per_second": 39.929, "eval_steps_per_second": 4.993, "eval_wer": 1.0, "step": 4000 }, { "epoch": 6.172839506172839, "step": 4000, "total_flos": 3.3889468875697558e+19, "train_loss": 2.1502342529296876, "train_runtime": 11441.7295, "train_samples_per_second": 22.374, "train_steps_per_second": 0.35 } ], "logging_steps": 500, "max_steps": 4000, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.3889468875697558e+19, "train_batch_size": 64, "trial_name": null, "trial_params": null }