{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.7084282460136673, "eval_steps": 200, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11389521640091116, "eval_loss": 0.7393712997436523, "eval_runtime": 197.0417, "eval_samples_per_second": 35.637, "eval_steps_per_second": 0.558, "eval_wer": 0.4965917697551123, "step": 200 }, { "epoch": 0.22779043280182232, "eval_loss": 0.6405971646308899, "eval_runtime": 190.1593, "eval_samples_per_second": 36.927, "eval_steps_per_second": 0.578, "eval_wer": 0.46169893301797793, "step": 400 }, { "epoch": 0.2847380410022779, "grad_norm": 11.500487327575684, "learning_rate": 0.00027833333333333334, "loss": 2.0443, "step": 500 }, { "epoch": 0.3416856492027335, "eval_loss": 0.6104918718338013, "eval_runtime": 188.7991, "eval_samples_per_second": 37.193, "eval_steps_per_second": 0.583, "eval_wer": 0.4496073559308521, "step": 600 }, { "epoch": 0.45558086560364464, "eval_loss": 0.5864666700363159, "eval_runtime": 188.9909, "eval_samples_per_second": 37.155, "eval_steps_per_second": 0.582, "eval_wer": 0.44139571346948536, "step": 800 }, { "epoch": 0.5694760820045558, "grad_norm": 3.202120542526245, "learning_rate": 0.00022288888888888887, "loss": 0.7145, "step": 1000 }, { "epoch": 0.5694760820045558, "eval_loss": 0.5439139008522034, "eval_runtime": 188.5161, "eval_samples_per_second": 37.249, "eval_steps_per_second": 0.584, "eval_wer": 0.41646846224371836, "step": 1000 }, { "epoch": 0.683371298405467, "eval_loss": 0.5428078174591064, "eval_runtime": 189.9544, "eval_samples_per_second": 36.967, "eval_steps_per_second": 0.579, "eval_wer": 0.41715940950583985, "step": 1200 }, { "epoch": 0.7972665148063781, "eval_loss": 0.5054725408554077, "eval_runtime": 189.6912, "eval_samples_per_second": 37.018, "eval_steps_per_second": 0.58, "eval_wer": 0.38716964084029815, "step": 1400 }, { "epoch": 0.8542141230068337, "grad_norm": 3.453420877456665, "learning_rate": 0.00016777777777777776, "loss": 0.6574, "step": 1500 }, { "epoch": 0.9111617312072893, "eval_loss": 0.49158021807670593, "eval_runtime": 190.7112, "eval_samples_per_second": 36.82, "eval_steps_per_second": 0.577, "eval_wer": 0.3802601682190834, "step": 1600 }, { "epoch": 1.0250569476082005, "eval_loss": 0.4755226671695709, "eval_runtime": 189.3317, "eval_samples_per_second": 37.088, "eval_steps_per_second": 0.581, "eval_wer": 0.3658034255039264, "step": 1800 }, { "epoch": 1.1389521640091116, "grad_norm": 4.045414447784424, "learning_rate": 0.00011244444444444443, "loss": 0.577, "step": 2000 }, { "epoch": 1.1389521640091116, "eval_loss": 0.469484806060791, "eval_runtime": 189.7257, "eval_samples_per_second": 37.011, "eval_steps_per_second": 0.58, "eval_wer": 0.3594254507766513, "step": 2000 }, { "epoch": 1.2528473804100229, "eval_loss": 0.4655653238296509, "eval_runtime": 190.2356, "eval_samples_per_second": 36.912, "eval_steps_per_second": 0.578, "eval_wer": 0.35054943594786003, "step": 2200 }, { "epoch": 1.366742596810934, "eval_loss": 0.5080298185348511, "eval_runtime": 189.8354, "eval_samples_per_second": 36.99, "eval_steps_per_second": 0.579, "eval_wer": 0.3546951195205889, "step": 2400 }, { "epoch": 1.4236902050113895, "grad_norm": 3.1688034534454346, "learning_rate": 5.733333333333333e-05, "loss": 0.534, "step": 2500 }, { "epoch": 1.4806378132118452, "eval_loss": 0.5481207370758057, "eval_runtime": 190.1317, "eval_samples_per_second": 36.932, "eval_steps_per_second": 0.579, "eval_wer": 0.36395647032248635, "step": 2600 }, { "epoch": 1.5945330296127562, "eval_loss": 0.6369026303291321, "eval_runtime": 191.1482, "eval_samples_per_second": 36.736, "eval_steps_per_second": 0.575, "eval_wer": 0.3902921909671933, "step": 2800 }, { "epoch": 1.7084282460136673, "grad_norm": 127.80626678466797, "learning_rate": 1.7777777777777775e-06, "loss": 0.7065, "step": 3000 }, { "epoch": 1.7084282460136673, "eval_loss": 0.6057178378105164, "eval_runtime": 190.1608, "eval_samples_per_second": 36.927, "eval_steps_per_second": 0.578, "eval_wer": 0.37381575625506586, "step": 3000 }, { "epoch": 1.7084282460136673, "step": 3000, "total_flos": 5.580592575460899e+18, "train_loss": 0.8723047383626302, "train_runtime": 4416.1126, "train_samples_per_second": 10.869, "train_steps_per_second": 0.679 } ], "logging_steps": 500, "max_steps": 3000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.580592575460899e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }