{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.013162988772745, "eval_steps": 100, "global_step": 2600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07742934572202866, "eval_loss": 3.5325253009796143, "eval_runtime": 173.1277, "eval_samples_per_second": 32.67, "eval_steps_per_second": 4.084, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.1548586914440573, "eval_loss": 2.965233325958252, "eval_runtime": 171.2442, "eval_samples_per_second": 33.029, "eval_steps_per_second": 4.129, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.23228803716608595, "eval_loss": 2.8520941734313965, "eval_runtime": 171.3893, "eval_samples_per_second": 33.001, "eval_steps_per_second": 4.125, "eval_wer": 1.0, "step": 300 }, { "epoch": 0.3097173828881146, "eval_loss": 1.247292160987854, "eval_runtime": 171.2844, "eval_samples_per_second": 33.021, "eval_steps_per_second": 4.128, "eval_wer": 0.8264832854552165, "step": 400 }, { "epoch": 0.38714672861014326, "grad_norm": 1.9864516258239746, "learning_rate": 0.00029699999999999996, "loss": 3.7403, "step": 500 }, { "epoch": 0.38714672861014326, "eval_loss": 0.9730328321456909, "eval_runtime": 172.0915, "eval_samples_per_second": 32.866, "eval_steps_per_second": 4.108, "eval_wer": 0.7234356694644605, "step": 500 }, { "epoch": 0.4645760743321719, "eval_loss": 0.8328432440757751, "eval_runtime": 172.9073, "eval_samples_per_second": 32.711, "eval_steps_per_second": 4.089, "eval_wer": 0.6177560944295549, "step": 600 }, { "epoch": 0.5420054200542005, "eval_loss": 0.7426055073738098, "eval_runtime": 173.8661, "eval_samples_per_second": 32.531, "eval_steps_per_second": 4.066, "eval_wer": 0.5505127505576864, "step": 700 }, { "epoch": 0.6194347657762292, "eval_loss": 0.7127000093460083, "eval_runtime": 172.8776, "eval_samples_per_second": 32.717, "eval_steps_per_second": 4.09, "eval_wer": 0.5540113302627144, "step": 800 }, { "epoch": 0.6968641114982579, "eval_loss": 0.6691900491714478, "eval_runtime": 171.858, "eval_samples_per_second": 32.911, "eval_steps_per_second": 4.114, "eval_wer": 0.5079680955208551, "step": 900 }, { "epoch": 0.7742934572202865, "grad_norm": 3.1098098754882812, "learning_rate": 0.00022928571428571426, "loss": 0.7271, "step": 1000 }, { "epoch": 0.7742934572202865, "eval_loss": 0.6375711560249329, "eval_runtime": 171.5356, "eval_samples_per_second": 32.973, "eval_steps_per_second": 4.122, "eval_wer": 0.5256214793535652, "step": 1000 }, { "epoch": 0.8517228029423152, "eval_loss": 0.6119316816329956, "eval_runtime": 171.7234, "eval_samples_per_second": 32.937, "eval_steps_per_second": 4.117, "eval_wer": 0.47057501885702363, "step": 1100 }, { "epoch": 0.9291521486643438, "eval_loss": 0.5987285375595093, "eval_runtime": 171.8326, "eval_samples_per_second": 32.916, "eval_steps_per_second": 4.114, "eval_wer": 0.4651024698688835, "step": 1200 }, { "epoch": 1.0065814943863725, "eval_loss": 0.56138676404953, "eval_runtime": 173.244, "eval_samples_per_second": 32.648, "eval_steps_per_second": 4.081, "eval_wer": 0.4267304328288745, "step": 1300 }, { "epoch": 1.084010840108401, "eval_loss": 0.5463124513626099, "eval_runtime": 171.8011, "eval_samples_per_second": 32.922, "eval_steps_per_second": 4.115, "eval_wer": 0.4228948339779493, "step": 1400 }, { "epoch": 1.1614401858304297, "grad_norm": 0.41259104013442993, "learning_rate": 0.00015799999999999996, "loss": 0.5511, "step": 1500 }, { "epoch": 1.1614401858304297, "eval_loss": 0.5231888890266418, "eval_runtime": 173.7205, "eval_samples_per_second": 32.558, "eval_steps_per_second": 4.07, "eval_wer": 0.40788945772014573, "step": 1500 }, { "epoch": 1.2388695315524583, "eval_loss": 0.518454909324646, "eval_runtime": 172.0395, "eval_samples_per_second": 32.876, "eval_steps_per_second": 4.11, "eval_wer": 0.4029465102469869, "step": 1600 }, { "epoch": 1.316298877274487, "eval_loss": 0.5089535713195801, "eval_runtime": 171.996, "eval_samples_per_second": 32.884, "eval_steps_per_second": 4.111, "eval_wer": 0.4042303927075476, "step": 1700 }, { "epoch": 1.3937282229965158, "eval_loss": 0.47846707701683044, "eval_runtime": 173.133, "eval_samples_per_second": 32.669, "eval_steps_per_second": 4.084, "eval_wer": 0.38505239845292166, "step": 1800 }, { "epoch": 1.4711575687185443, "eval_loss": 0.47747060656547546, "eval_runtime": 172.0577, "eval_samples_per_second": 32.873, "eval_steps_per_second": 4.109, "eval_wer": 0.3802699362873329, "step": 1900 }, { "epoch": 1.5485869144405728, "grad_norm": 0.49539849162101746, "learning_rate": 8.685714285714285e-05, "loss": 0.4529, "step": 2000 }, { "epoch": 1.5485869144405728, "eval_loss": 0.46770602464675903, "eval_runtime": 172.1877, "eval_samples_per_second": 32.848, "eval_steps_per_second": 4.106, "eval_wer": 0.37218147678580027, "step": 2000 }, { "epoch": 1.6260162601626016, "eval_loss": 0.4573723077774048, "eval_runtime": 173.1529, "eval_samples_per_second": 32.665, "eval_steps_per_second": 4.083, "eval_wer": 0.3543997047070341, "step": 2100 }, { "epoch": 1.7034456058846303, "eval_loss": 0.4472625255584717, "eval_runtime": 173.3523, "eval_samples_per_second": 32.627, "eval_steps_per_second": 4.078, "eval_wer": 0.3561810916210621, "step": 2200 }, { "epoch": 1.7808749516066589, "eval_loss": 0.4436591863632202, "eval_runtime": 173.0139, "eval_samples_per_second": 32.691, "eval_steps_per_second": 4.086, "eval_wer": 0.34703342908956686, "step": 2300 }, { "epoch": 1.8583042973286876, "eval_loss": 0.43528568744659424, "eval_runtime": 173.1967, "eval_samples_per_second": 32.657, "eval_steps_per_second": 4.082, "eval_wer": 0.3450113142141837, "step": 2400 }, { "epoch": 1.9357336430507162, "grad_norm": 0.6141678094863892, "learning_rate": 1.557142857142857e-05, "loss": 0.4149, "step": 2500 }, { "epoch": 1.9357336430507162, "eval_loss": 0.4299843907356262, "eval_runtime": 172.6246, "eval_samples_per_second": 32.765, "eval_steps_per_second": 4.096, "eval_wer": 0.34008441527178185, "step": 2500 }, { "epoch": 2.013162988772745, "eval_loss": 0.4289664328098297, "eval_runtime": 173.2622, "eval_samples_per_second": 32.644, "eval_steps_per_second": 4.081, "eval_wer": 0.33783762096580056, "step": 2600 }, { "epoch": 2.013162988772745, "step": 2600, "total_flos": 1.1633941226063049e+19, "train_loss": 1.1463891924344576, "train_runtime": 8433.2953, "train_samples_per_second": 9.866, "train_steps_per_second": 0.308 } ], "logging_steps": 500, "max_steps": 2600, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 400, "total_flos": 1.1633941226063049e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }