{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.008233910267182, "eval_steps": 500, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08401949252226516, "grad_norm": 19.443201065063477, "learning_rate": 0.0002455, "loss": 4.6277, "step": 500 }, { "epoch": 0.08401949252226516, "eval_loss": 1.7780689001083374, "eval_runtime": 184.9372, "eval_samples_per_second": 37.97, "eval_steps_per_second": 2.374, "eval_wer": 0.9507301452317996, "step": 500 }, { "epoch": 0.16803898504453033, "grad_norm": 10.621150970458984, "learning_rate": 0.0002782777777777778, "loss": 1.2514, "step": 1000 }, { "epoch": 0.16803898504453033, "eval_loss": 1.046749472618103, "eval_runtime": 183.9043, "eval_samples_per_second": 38.183, "eval_steps_per_second": 2.387, "eval_wer": 0.7116623925377695, "step": 1000 }, { "epoch": 0.2520584775667955, "grad_norm": 6.651599884033203, "learning_rate": 0.00025049999999999996, "loss": 0.986, "step": 1500 }, { "epoch": 0.2520584775667955, "eval_loss": 0.8940034508705139, "eval_runtime": 185.3076, "eval_samples_per_second": 37.894, "eval_steps_per_second": 2.369, "eval_wer": 0.6577817935396431, "step": 1500 }, { "epoch": 0.33607797008906065, "grad_norm": 16.908123016357422, "learning_rate": 0.00022277777777777774, "loss": 0.8702, "step": 2000 }, { "epoch": 0.33607797008906065, "eval_loss": 0.8157631158828735, "eval_runtime": 185.5186, "eval_samples_per_second": 37.851, "eval_steps_per_second": 2.366, "eval_wer": 0.5836644122297665, "step": 2000 }, { "epoch": 0.4200974626113258, "grad_norm": 10.515896797180176, "learning_rate": 0.00019505555555555555, "loss": 0.8063, "step": 2500 }, { "epoch": 0.4200974626113258, "eval_loss": 0.7131509184837341, "eval_runtime": 185.8391, "eval_samples_per_second": 37.785, "eval_steps_per_second": 2.362, "eval_wer": 0.5281095948657304, "step": 2500 }, { "epoch": 0.504116955133591, "grad_norm": 3.7379024028778076, "learning_rate": 0.00016727777777777778, "loss": 0.7518, "step": 3000 }, { "epoch": 0.504116955133591, "eval_loss": 0.7195897102355957, "eval_runtime": 183.5821, "eval_samples_per_second": 38.25, "eval_steps_per_second": 2.391, "eval_wer": 0.5187950942744389, "step": 3000 }, { "epoch": 0.5881364476558562, "grad_norm": 5.922393321990967, "learning_rate": 0.0001395, "loss": 0.7045, "step": 3500 }, { "epoch": 0.5881364476558562, "eval_loss": 0.6231045722961426, "eval_runtime": 184.2635, "eval_samples_per_second": 38.108, "eval_steps_per_second": 2.382, "eval_wer": 0.4771256593895747, "step": 3500 }, { "epoch": 0.6721559401781213, "grad_norm": 8.91370964050293, "learning_rate": 0.00011177777777777777, "loss": 0.6726, "step": 4000 }, { "epoch": 0.6721559401781213, "eval_loss": 0.6074743270874023, "eval_runtime": 184.3908, "eval_samples_per_second": 38.082, "eval_steps_per_second": 2.381, "eval_wer": 0.4517333475066105, "step": 4000 }, { "epoch": 0.7561754327003865, "grad_norm": 12.337292671203613, "learning_rate": 8.405555555555556e-05, "loss": 0.6355, "step": 4500 }, { "epoch": 0.7561754327003865, "eval_loss": 0.5601897239685059, "eval_runtime": 184.3021, "eval_samples_per_second": 38.1, "eval_steps_per_second": 2.382, "eval_wer": 0.4328784597191034, "step": 4500 }, { "epoch": 0.8401949252226516, "grad_norm": 18.518247604370117, "learning_rate": 5.6333333333333325e-05, "loss": 0.5938, "step": 5000 }, { "epoch": 0.8401949252226516, "eval_loss": 0.5438016653060913, "eval_runtime": 185.1047, "eval_samples_per_second": 37.935, "eval_steps_per_second": 2.372, "eval_wer": 0.4225939754713722, "step": 5000 }, { "epoch": 0.9242144177449169, "grad_norm": 5.622241020202637, "learning_rate": 2.8555555555555556e-05, "loss": 0.5842, "step": 5500 }, { "epoch": 0.9242144177449169, "eval_loss": 0.5245193839073181, "eval_runtime": 184.9427, "eval_samples_per_second": 37.969, "eval_steps_per_second": 2.374, "eval_wer": 0.4090939289653065, "step": 5500 }, { "epoch": 1.008233910267182, "grad_norm": 10.519143104553223, "learning_rate": 7.777777777777777e-07, "loss": 0.557, "step": 6000 }, { "epoch": 1.008233910267182, "eval_loss": 0.5155353546142578, "eval_runtime": 185.543, "eval_samples_per_second": 37.846, "eval_steps_per_second": 2.366, "eval_wer": 0.39834438406037814, "step": 6000 }, { "epoch": 1.008233910267182, "step": 6000, "total_flos": 6.665403914161851e+18, "train_loss": 1.086754165649414, "train_runtime": 4718.794, "train_samples_per_second": 10.172, "train_steps_per_second": 1.272 } ], "logging_steps": 500, "max_steps": 6000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.665403914161851e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }