{ "best_metric": null, "best_model_checkpoint": null, "epoch": 37.98470060670008, "eval_steps": 1500, "global_step": 18000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.17, "learning_rate": 0.0002917131696428571, "loss": 6.3673, "step": 1500 }, { "epoch": 3.17, "eval_cer": 0.16064462437366714, "eval_loss": 0.6103929877281189, "eval_runtime": 143.9739, "eval_samples_per_second": 26.185, "eval_steps_per_second": 3.278, "step": 1500 }, { "epoch": 6.33, "learning_rate": 0.00026660156249999997, "loss": 0.656, "step": 3000 }, { "epoch": 6.33, "eval_cer": 0.112913292993741, "eval_loss": 0.4318251311779022, "eval_runtime": 143.6016, "eval_samples_per_second": 26.253, "eval_steps_per_second": 3.287, "step": 3000 }, { "epoch": 9.5, "learning_rate": 0.00024148995535714285, "loss": 0.4729, "step": 4500 }, { "epoch": 9.5, "eval_cer": 0.10276193283284499, "eval_loss": 0.4009631276130676, "eval_runtime": 144.1302, "eval_samples_per_second": 26.157, "eval_steps_per_second": 3.275, "step": 4500 }, { "epoch": 12.66, "learning_rate": 0.0002163783482142857, "loss": 0.3789, "step": 6000 }, { "epoch": 12.66, "eval_cer": 0.09767324930214817, "eval_loss": 0.3866880536079407, "eval_runtime": 145.6008, "eval_samples_per_second": 25.893, "eval_steps_per_second": 3.242, "step": 6000 }, { "epoch": 15.83, "learning_rate": 0.00019128348214285712, "loss": 0.3166, "step": 7500 }, { "epoch": 15.83, "eval_cer": 0.0936248417913553, "eval_loss": 0.38572949171066284, "eval_runtime": 144.2607, "eval_samples_per_second": 26.133, "eval_steps_per_second": 3.272, "step": 7500 }, { "epoch": 18.99, "learning_rate": 0.00016618861607142855, "loss": 0.267, "step": 9000 }, { "epoch": 18.99, "eval_cer": 0.0911715241777485, "eval_loss": 0.389119952917099, "eval_runtime": 143.5612, "eval_samples_per_second": 26.261, "eval_steps_per_second": 3.288, "step": 9000 }, { "epoch": 22.16, "learning_rate": 0.00014107700892857143, "loss": 0.2286, "step": 10500 }, { "epoch": 22.16, "eval_cer": 0.09095480000693518, "eval_loss": 0.40744006633758545, "eval_runtime": 143.443, "eval_samples_per_second": 26.282, "eval_steps_per_second": 3.291, "step": 10500 }, { "epoch": 25.32, "learning_rate": 0.00011598214285714285, "loss": 0.1967, "step": 12000 }, { "epoch": 25.32, "eval_cer": 0.08783397194722332, "eval_loss": 0.4078831076622009, "eval_runtime": 142.9582, "eval_samples_per_second": 26.371, "eval_steps_per_second": 3.302, "step": 12000 }, { "epoch": 28.49, "learning_rate": 9.08705357142857e-05, "loss": 0.1712, "step": 13500 }, { "epoch": 28.49, "eval_cer": 0.08651628898867833, "eval_loss": 0.42893365025520325, "eval_runtime": 143.8273, "eval_samples_per_second": 26.212, "eval_steps_per_second": 3.282, "step": 13500 }, { "epoch": 31.65, "learning_rate": 6.577566964285713e-05, "loss": 0.1493, "step": 15000 }, { "epoch": 31.65, "eval_cer": 0.08501655772665014, "eval_loss": 0.4456014931201935, "eval_runtime": 144.3831, "eval_samples_per_second": 26.111, "eval_steps_per_second": 3.269, "step": 15000 }, { "epoch": 34.82, "learning_rate": 4.068080357142857e-05, "loss": 0.1333, "step": 16500 }, { "epoch": 34.82, "eval_cer": 0.08429703347954991, "eval_loss": 0.457296758890152, "eval_runtime": 144.9279, "eval_samples_per_second": 26.013, "eval_steps_per_second": 3.257, "step": 16500 }, { "epoch": 37.98, "learning_rate": 1.556919642857143e-05, "loss": 0.1191, "step": 18000 }, { "epoch": 37.98, "eval_cer": 0.08327409539331103, "eval_loss": 0.4633271098136902, "eval_runtime": 146.5025, "eval_samples_per_second": 25.733, "eval_steps_per_second": 3.222, "step": 18000 } ], "logging_steps": 1500, "max_steps": 18920, "num_train_epochs": 40, "save_steps": 1500, "total_flos": 1.471168372274983e+20, "trial_name": null, "trial_params": null }