{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.6412722842118763, "eval_steps": 500, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03206361421059382, "grad_norm": 8.037480354309082, "learning_rate": 0.0002465, "loss": 4.7051, "step": 500 }, { "epoch": 0.03206361421059382, "eval_loss": 1.7504417896270752, "eval_runtime": 184.6482, "eval_samples_per_second": 38.029, "eval_steps_per_second": 0.596, "eval_wer": 0.9570416827223323, "step": 500 }, { "epoch": 0.06412722842118763, "grad_norm": 7.763198375701904, "learning_rate": 0.0002874574468085106, "loss": 1.0409, "step": 1000 }, { "epoch": 0.06412722842118763, "eval_loss": 1.1511156558990479, "eval_runtime": 184.729, "eval_samples_per_second": 38.012, "eval_steps_per_second": 0.595, "eval_wer": 0.776093224730597, "step": 1000 }, { "epoch": 0.09619084263178146, "grad_norm": 3.2267072200775146, "learning_rate": 0.0002715, "loss": 0.8183, "step": 1500 }, { "epoch": 0.09619084263178146, "eval_loss": 1.0506497621536255, "eval_runtime": 186.2316, "eval_samples_per_second": 37.706, "eval_steps_per_second": 0.591, "eval_wer": 0.70972242522489, "step": 1500 }, { "epoch": 0.12825445684237527, "grad_norm": 6.372620582580566, "learning_rate": 0.00025554255319148935, "loss": 0.7091, "step": 2000 }, { "epoch": 0.12825445684237527, "eval_loss": 0.9421387314796448, "eval_runtime": 186.5946, "eval_samples_per_second": 37.632, "eval_steps_per_second": 0.59, "eval_wer": 0.6609707809032807, "step": 2000 }, { "epoch": 0.16031807105296908, "grad_norm": 5.675894260406494, "learning_rate": 0.0002395851063829787, "loss": 0.6547, "step": 2500 }, { "epoch": 0.16031807105296908, "eval_loss": 0.8725515007972717, "eval_runtime": 187.2013, "eval_samples_per_second": 37.51, "eval_steps_per_second": 0.588, "eval_wer": 0.6127639219229594, "step": 2500 }, { "epoch": 0.19238168526356292, "grad_norm": 6.913870334625244, "learning_rate": 0.00022362765957446805, "loss": 0.6088, "step": 3000 }, { "epoch": 0.19238168526356292, "eval_loss": 0.8246235847473145, "eval_runtime": 188.3497, "eval_samples_per_second": 37.282, "eval_steps_per_second": 0.584, "eval_wer": 0.5989582641278784, "step": 3000 }, { "epoch": 0.22444529947415673, "grad_norm": 4.30249547958374, "learning_rate": 0.00020767021276595744, "loss": 0.5781, "step": 3500 }, { "epoch": 0.22444529947415673, "eval_loss": 0.802536129951477, "eval_runtime": 187.0791, "eval_samples_per_second": 37.535, "eval_steps_per_second": 0.588, "eval_wer": 0.5747352476115813, "step": 3500 }, { "epoch": 0.25650891368475054, "grad_norm": 3.4820008277893066, "learning_rate": 0.0001917446808510638, "loss": 0.5429, "step": 4000 }, { "epoch": 0.25650891368475054, "eval_loss": 0.7359501123428345, "eval_runtime": 186.7747, "eval_samples_per_second": 37.596, "eval_steps_per_second": 0.589, "eval_wer": 0.53048804794111, "step": 4000 }, { "epoch": 0.2885725278953444, "grad_norm": 11.696717262268066, "learning_rate": 0.00017585106382978722, "loss": 0.5104, "step": 4500 }, { "epoch": 0.2885725278953444, "eval_loss": 0.7335178852081299, "eval_runtime": 187.3685, "eval_samples_per_second": 37.477, "eval_steps_per_second": 0.587, "eval_wer": 0.5394039251119468, "step": 4500 }, { "epoch": 0.32063614210593816, "grad_norm": 7.053103446960449, "learning_rate": 0.00015989361702127658, "loss": 0.501, "step": 5000 }, { "epoch": 0.32063614210593816, "eval_loss": 0.6932825446128845, "eval_runtime": 186.2726, "eval_samples_per_second": 37.697, "eval_steps_per_second": 0.591, "eval_wer": 0.5087763589736776, "step": 5000 }, { "epoch": 0.352699756316532, "grad_norm": 6.128586769104004, "learning_rate": 0.00014393617021276595, "loss": 0.4708, "step": 5500 }, { "epoch": 0.352699756316532, "eval_loss": 0.6770374774932861, "eval_runtime": 188.2655, "eval_samples_per_second": 37.298, "eval_steps_per_second": 0.584, "eval_wer": 0.5112743990751937, "step": 5500 }, { "epoch": 0.38476337052712584, "grad_norm": 7.154539108276367, "learning_rate": 0.00012801063829787234, "loss": 0.4526, "step": 6000 }, { "epoch": 0.38476337052712584, "eval_loss": 0.6608560681343079, "eval_runtime": 187.3283, "eval_samples_per_second": 37.485, "eval_steps_per_second": 0.587, "eval_wer": 0.48059368314753054, "step": 6000 }, { "epoch": 0.4168269847377196, "grad_norm": 5.313536643981934, "learning_rate": 0.0001120531914893617, "loss": 0.4235, "step": 6500 }, { "epoch": 0.4168269847377196, "eval_loss": 0.637322187423706, "eval_runtime": 186.315, "eval_samples_per_second": 37.689, "eval_steps_per_second": 0.59, "eval_wer": 0.485842224850184, "step": 6500 }, { "epoch": 0.44889059894831346, "grad_norm": 6.399425983428955, "learning_rate": 9.612765957446806e-05, "loss": 0.4032, "step": 7000 }, { "epoch": 0.44889059894831346, "eval_loss": 0.6047533750534058, "eval_runtime": 186.8155, "eval_samples_per_second": 37.588, "eval_steps_per_second": 0.589, "eval_wer": 0.4466176802774419, "step": 7000 }, { "epoch": 0.4809542131589073, "grad_norm": 11.48141098022461, "learning_rate": 8.017021276595744e-05, "loss": 0.3863, "step": 7500 }, { "epoch": 0.4809542131589073, "eval_loss": 0.5946004390716553, "eval_runtime": 186.2938, "eval_samples_per_second": 37.693, "eval_steps_per_second": 0.59, "eval_wer": 0.4432160937562285, "step": 7500 }, { "epoch": 0.5130178273695011, "grad_norm": 32.89252471923828, "learning_rate": 6.424468085106383e-05, "loss": 0.3766, "step": 8000 }, { "epoch": 0.5130178273695011, "eval_loss": 0.5737225413322449, "eval_runtime": 186.9085, "eval_samples_per_second": 37.569, "eval_steps_per_second": 0.589, "eval_wer": 0.4298489217236477, "step": 8000 }, { "epoch": 0.5450814415800949, "grad_norm": 4.741519451141357, "learning_rate": 4.8287234042553194e-05, "loss": 0.3746, "step": 8500 }, { "epoch": 0.5450814415800949, "eval_loss": 0.5668203234672546, "eval_runtime": 186.8619, "eval_samples_per_second": 37.579, "eval_steps_per_second": 0.589, "eval_wer": 0.4247731168365245, "step": 8500 }, { "epoch": 0.5771450557906888, "grad_norm": 13.890504837036133, "learning_rate": 3.232978723404255e-05, "loss": 0.3586, "step": 9000 }, { "epoch": 0.5771450557906888, "eval_loss": 0.5485312342643738, "eval_runtime": 187.9252, "eval_samples_per_second": 37.366, "eval_steps_per_second": 0.585, "eval_wer": 0.4100772000690947, "step": 9000 }, { "epoch": 0.6092086700012825, "grad_norm": 8.073569297790527, "learning_rate": 1.6372340425531912e-05, "loss": 0.3552, "step": 9500 }, { "epoch": 0.6092086700012825, "eval_loss": 0.5377594828605652, "eval_runtime": 187.0305, "eval_samples_per_second": 37.545, "eval_steps_per_second": 0.588, "eval_wer": 0.40320758978992544, "step": 9500 }, { "epoch": 0.6412722842118763, "grad_norm": 6.519000053405762, "learning_rate": 4.1489361702127654e-07, "loss": 0.3326, "step": 10000 }, { "epoch": 0.6412722842118763, "eval_loss": 0.5324302911758423, "eval_runtime": 186.1276, "eval_samples_per_second": 37.727, "eval_steps_per_second": 0.591, "eval_wer": 0.40138720950318235, "step": 10000 }, { "epoch": 0.6412722842118763, "step": 10000, "total_flos": 1.1393778193380235e+19, "train_loss": 0.73015986328125, "train_runtime": 7697.7754, "train_samples_per_second": 10.393, "train_steps_per_second": 1.299 } ], "logging_steps": 500, "max_steps": 10000, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1393778193380235e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }