{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0959752321981426, "eval_steps": 100, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15479876160990713, "eval_loss": 3.5622992515563965, "eval_runtime": 158.441, "eval_samples_per_second": 35.698, "eval_steps_per_second": 4.462, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.30959752321981426, "eval_loss": 3.2967281341552734, "eval_runtime": 153.6671, "eval_samples_per_second": 36.807, "eval_steps_per_second": 4.601, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.46439628482972134, "eval_loss": 2.648355722427368, "eval_runtime": 153.3096, "eval_samples_per_second": 36.893, "eval_steps_per_second": 4.612, "eval_wer": 0.9999518544077289, "step": 300 }, { "epoch": 0.6191950464396285, "eval_loss": 1.060172200202942, "eval_runtime": 153.5019, "eval_samples_per_second": 36.846, "eval_steps_per_second": 4.606, "eval_wer": 0.7315080804352362, "step": 400 }, { "epoch": 0.7739938080495357, "grad_norm": 2.8772997856140137, "learning_rate": 0.00029699999999999996, "loss": 3.6398, "step": 500 }, { "epoch": 0.7739938080495357, "eval_loss": 0.8942204713821411, "eval_runtime": 154.5421, "eval_samples_per_second": 36.598, "eval_steps_per_second": 4.575, "eval_wer": 0.6695607517131806, "step": 500 }, { "epoch": 0.9287925696594427, "eval_loss": 0.7115893959999084, "eval_runtime": 155.1607, "eval_samples_per_second": 36.453, "eval_steps_per_second": 4.557, "eval_wer": 0.5361172184686492, "step": 600 }, { "epoch": 1.08359133126935, "eval_loss": 0.6647589802742004, "eval_runtime": 154.7086, "eval_samples_per_second": 36.559, "eval_steps_per_second": 4.57, "eval_wer": 0.5100704530500233, "step": 700 }, { "epoch": 1.238390092879257, "eval_loss": 0.5869073867797852, "eval_runtime": 154.314, "eval_samples_per_second": 36.653, "eval_steps_per_second": 4.582, "eval_wer": 0.45284139237052845, "step": 800 }, { "epoch": 1.3931888544891642, "eval_loss": 0.5697966814041138, "eval_runtime": 153.6252, "eval_samples_per_second": 36.817, "eval_steps_per_second": 4.602, "eval_wer": 0.43589414389112674, "step": 900 }, { "epoch": 1.5479876160990713, "grad_norm": 0.41244009137153625, "learning_rate": 0.0002016, "loss": 0.5976, "step": 1000 }, { "epoch": 1.5479876160990713, "eval_loss": 0.54075688123703, "eval_runtime": 154.0436, "eval_samples_per_second": 36.717, "eval_steps_per_second": 4.59, "eval_wer": 0.41122755211760365, "step": 1000 }, { "epoch": 1.7027863777089784, "eval_loss": 0.5229234099388123, "eval_runtime": 154.4025, "eval_samples_per_second": 36.632, "eval_steps_per_second": 4.579, "eval_wer": 0.41960488517276245, "step": 1100 }, { "epoch": 1.8575851393188856, "eval_loss": 0.5054619312286377, "eval_runtime": 154.4414, "eval_samples_per_second": 36.622, "eval_steps_per_second": 4.578, "eval_wer": 0.39546789491422063, "step": 1200 }, { "epoch": 2.0123839009287927, "eval_loss": 0.48077496886253357, "eval_runtime": 154.8241, "eval_samples_per_second": 36.532, "eval_steps_per_second": 4.566, "eval_wer": 0.3708815457944825, "step": 1300 }, { "epoch": 2.1671826625387, "eval_loss": 0.46668991446495056, "eval_runtime": 154.4804, "eval_samples_per_second": 36.613, "eval_steps_per_second": 4.577, "eval_wer": 0.3579785270658471, "step": 1400 }, { "epoch": 2.321981424148607, "grad_norm": 0.45199868083000183, "learning_rate": 0.00010179999999999998, "loss": 0.443, "step": 1500 }, { "epoch": 2.321981424148607, "eval_loss": 0.45730867981910706, "eval_runtime": 154.0073, "eval_samples_per_second": 36.726, "eval_steps_per_second": 4.591, "eval_wer": 0.35817110943493125, "step": 1500 }, { "epoch": 2.476780185758514, "eval_loss": 0.44747394323349, "eval_runtime": 156.828, "eval_samples_per_second": 36.065, "eval_steps_per_second": 4.508, "eval_wer": 0.3452199451140248, "step": 1600 }, { "epoch": 2.6315789473684212, "eval_loss": 0.43689531087875366, "eval_runtime": 155.5732, "eval_samples_per_second": 36.356, "eval_steps_per_second": 4.544, "eval_wer": 0.3477877100351463, "step": 1700 }, { "epoch": 2.7863777089783284, "eval_loss": 0.4227137863636017, "eval_runtime": 156.702, "eval_samples_per_second": 36.094, "eval_steps_per_second": 4.512, "eval_wer": 0.32978125852578194, "step": 1800 }, { "epoch": 2.9411764705882355, "eval_loss": 0.4168964922428131, "eval_runtime": 153.7827, "eval_samples_per_second": 36.779, "eval_steps_per_second": 4.597, "eval_wer": 0.3270690568278474, "step": 1900 }, { "epoch": 3.0959752321981426, "grad_norm": 0.4056684672832489, "learning_rate": 2.4e-06, "loss": 0.3475, "step": 2000 }, { "epoch": 3.0959752321981426, "eval_loss": 0.42000359296798706, "eval_runtime": 154.6879, "eval_samples_per_second": 36.564, "eval_steps_per_second": 4.57, "eval_wer": 0.3226557108696699, "step": 2000 }, { "epoch": 3.0959752321981426, "step": 2000, "total_flos": 1.8144283373471887e+19, "train_loss": 1.2569797668457032, "train_runtime": 7798.4407, "train_samples_per_second": 16.414, "train_steps_per_second": 0.256 } ], "logging_steps": 500, "max_steps": 2000, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 400, "total_flos": 1.8144283373471887e+19, "train_batch_size": 64, "trial_name": null, "trial_params": null }