{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9619084263178146, "eval_steps": 500, "global_step": 15000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03206361421059382, "grad_norm": 9.123078346252441, "learning_rate": 0.0002465, "loss": 4.6156, "step": 500 }, { "epoch": 0.03206361421059382, "eval_loss": 1.5867419242858887, "eval_runtime": 188.7881, "eval_samples_per_second": 37.195, "eval_steps_per_second": 0.583, "eval_wer": 0.9176576887814082, "step": 500 }, { "epoch": 0.06412722842118763, "grad_norm": 6.654547691345215, "learning_rate": 0.00029181249999999997, "loss": 1.0315, "step": 1000 }, { "epoch": 0.06412722842118763, "eval_loss": 1.1748294830322266, "eval_runtime": 190.3404, "eval_samples_per_second": 36.892, "eval_steps_per_second": 0.578, "eval_wer": 0.7888358867377988, "step": 1000 }, { "epoch": 0.09619084263178146, "grad_norm": 6.171149253845215, "learning_rate": 0.0002813958333333333, "loss": 0.834, "step": 1500 }, { "epoch": 0.09619084263178146, "eval_loss": 1.0392996072769165, "eval_runtime": 189.2832, "eval_samples_per_second": 37.098, "eval_steps_per_second": 0.581, "eval_wer": 0.7219867391275462, "step": 1500 }, { "epoch": 0.12825445684237527, "grad_norm": 6.896900177001953, "learning_rate": 0.00027097916666666666, "loss": 0.7184, "step": 2000 }, { "epoch": 0.12825445684237527, "eval_loss": 0.9616143703460693, "eval_runtime": 190.9944, "eval_samples_per_second": 36.765, "eval_steps_per_second": 0.576, "eval_wer": 0.663747857399115, "step": 2000 }, { "epoch": 0.16031807105296908, "grad_norm": 9.408955574035645, "learning_rate": 0.0002605625, "loss": 0.6655, "step": 2500 }, { "epoch": 0.16031807105296908, "eval_loss": 0.9033711552619934, "eval_runtime": 190.9851, "eval_samples_per_second": 36.767, "eval_steps_per_second": 0.576, "eval_wer": 0.6331335787081944, "step": 2500 }, { "epoch": 0.19238168526356292, "grad_norm": 6.4334211349487305, "learning_rate": 0.0002501458333333333, "loss": 0.6193, "step": 3000 }, { "epoch": 0.19238168526356292, "eval_loss": 0.8614802956581116, "eval_runtime": 191.2463, "eval_samples_per_second": 36.717, "eval_steps_per_second": 0.575, "eval_wer": 0.6238988028009939, "step": 3000 }, { "epoch": 0.22444529947415673, "grad_norm": 3.711681365966797, "learning_rate": 0.00023972916666666665, "loss": 0.5952, "step": 3500 }, { "epoch": 0.22444529947415673, "eval_loss": 0.8161324858665466, "eval_runtime": 191.2031, "eval_samples_per_second": 36.725, "eval_steps_per_second": 0.575, "eval_wer": 0.5866275129884798, "step": 3500 }, { "epoch": 0.25650891368475054, "grad_norm": 7.527787208557129, "learning_rate": 0.00022933333333333332, "loss": 0.5622, "step": 4000 }, { "epoch": 0.25650891368475054, "eval_loss": 0.811023473739624, "eval_runtime": 190.6985, "eval_samples_per_second": 36.823, "eval_steps_per_second": 0.577, "eval_wer": 0.5850728816487065, "step": 4000 }, { "epoch": 0.2885725278953444, "grad_norm": 11.801218032836914, "learning_rate": 0.0002189583333333333, "loss": 0.5341, "step": 4500 }, { "epoch": 0.2885725278953444, "eval_loss": 0.757978618144989, "eval_runtime": 192.268, "eval_samples_per_second": 36.522, "eval_steps_per_second": 0.572, "eval_wer": 0.5546579146680132, "step": 4500 }, { "epoch": 0.32063614210593816, "grad_norm": 9.381750106811523, "learning_rate": 0.00020854166666666664, "loss": 0.522, "step": 5000 }, { "epoch": 0.32063614210593816, "eval_loss": 0.7397128343582153, "eval_runtime": 191.0373, "eval_samples_per_second": 36.757, "eval_steps_per_second": 0.576, "eval_wer": 0.5411711556092959, "step": 5000 }, { "epoch": 0.352699756316532, "grad_norm": 6.341240882873535, "learning_rate": 0.00019812499999999998, "loss": 0.5123, "step": 5500 }, { "epoch": 0.352699756316532, "eval_loss": 0.7228623628616333, "eval_runtime": 191.6536, "eval_samples_per_second": 36.639, "eval_steps_per_second": 0.574, "eval_wer": 0.531737067991868, "step": 5500 }, { "epoch": 0.38476337052712584, "grad_norm": 6.53903341293335, "learning_rate": 0.00018772916666666666, "loss": 0.4884, "step": 6000 }, { "epoch": 0.38476337052712584, "eval_loss": 0.72346431016922, "eval_runtime": 191.4082, "eval_samples_per_second": 36.686, "eval_steps_per_second": 0.575, "eval_wer": 0.5164830784358017, "step": 6000 }, { "epoch": 0.4168269847377196, "grad_norm": 10.402660369873047, "learning_rate": 0.00017731249999999998, "loss": 0.4658, "step": 6500 }, { "epoch": 0.4168269847377196, "eval_loss": 0.681357204914093, "eval_runtime": 191.0697, "eval_samples_per_second": 36.751, "eval_steps_per_second": 0.576, "eval_wer": 0.5116995973903453, "step": 6500 }, { "epoch": 0.44889059894831346, "grad_norm": 11.663326263427734, "learning_rate": 0.00016691666666666667, "loss": 0.4471, "step": 7000 }, { "epoch": 0.44889059894831346, "eval_loss": 0.662290632724762, "eval_runtime": 191.4867, "eval_samples_per_second": 36.671, "eval_steps_per_second": 0.574, "eval_wer": 0.4890577871085186, "step": 7000 }, { "epoch": 0.4809542131589073, "grad_norm": 7.363061428070068, "learning_rate": 0.00015649999999999998, "loss": 0.4338, "step": 7500 }, { "epoch": 0.4809542131589073, "eval_loss": 0.6449915170669556, "eval_runtime": 190.9868, "eval_samples_per_second": 36.767, "eval_steps_per_second": 0.576, "eval_wer": 0.4913830903945043, "step": 7500 }, { "epoch": 0.5130178273695011, "grad_norm": 14.478469848632812, "learning_rate": 0.00014610416666666667, "loss": 0.4267, "step": 8000 }, { "epoch": 0.5130178273695011, "eval_loss": 0.6256160736083984, "eval_runtime": 190.8261, "eval_samples_per_second": 36.798, "eval_steps_per_second": 0.576, "eval_wer": 0.4685419684024502, "step": 8000 }, { "epoch": 0.5450814415800949, "grad_norm": 10.456161499023438, "learning_rate": 0.00013568749999999998, "loss": 0.4283, "step": 8500 }, { "epoch": 0.5450814415800949, "eval_loss": 0.6342806816101074, "eval_runtime": 190.609, "eval_samples_per_second": 36.84, "eval_steps_per_second": 0.577, "eval_wer": 0.4710665833986633, "step": 8500 }, { "epoch": 0.5771450557906888, "grad_norm": 9.847672462463379, "learning_rate": 0.00012527083333333333, "loss": 0.4131, "step": 9000 }, { "epoch": 0.5771450557906888, "eval_loss": 0.5988845229148865, "eval_runtime": 189.2404, "eval_samples_per_second": 37.106, "eval_steps_per_second": 0.581, "eval_wer": 0.4486506597217608, "step": 9000 }, { "epoch": 0.6092086700012825, "grad_norm": 7.610143661499023, "learning_rate": 0.00011485416666666666, "loss": 0.4317, "step": 9500 }, { "epoch": 0.6092086700012825, "eval_loss": 0.7167520523071289, "eval_runtime": 189.8256, "eval_samples_per_second": 36.992, "eval_steps_per_second": 0.579, "eval_wer": 0.4919677380778379, "step": 9500 }, { "epoch": 0.6412722842118763, "grad_norm": NaN, "learning_rate": 0.00010691666666666665, "loss": 0.5904, "step": 10000 }, { "epoch": 0.6412722842118763, "eval_loss": NaN, "eval_runtime": 190.1563, "eval_samples_per_second": 36.928, "eval_steps_per_second": 0.578, "eval_wer": 0.7309956284298224, "step": 10000 }, { "epoch": 0.6733358984224702, "grad_norm": NaN, "learning_rate": 0.000106875, "loss": 0.0513, "step": 10500 }, { "epoch": 0.6733358984224702, "eval_loss": NaN, "eval_runtime": 185.5416, "eval_samples_per_second": 37.846, "eval_steps_per_second": 0.593, "eval_wer": 1.0, "step": 10500 }, { "epoch": 0.705399512633064, "grad_norm": NaN, "learning_rate": 0.000106875, "loss": 0.0, "step": 11000 }, { "epoch": 0.705399512633064, "eval_loss": NaN, "eval_runtime": 185.2695, "eval_samples_per_second": 37.902, "eval_steps_per_second": 0.594, "eval_wer": 1.0, "step": 11000 }, { "epoch": 0.7374631268436578, "grad_norm": NaN, "learning_rate": 0.000106875, "loss": 0.0, "step": 11500 }, { "epoch": 0.7374631268436578, "eval_loss": NaN, "eval_runtime": 185.1794, "eval_samples_per_second": 37.92, "eval_steps_per_second": 0.594, "eval_wer": 1.0, "step": 11500 }, { "epoch": 0.7695267410542517, "grad_norm": NaN, "learning_rate": 0.000106875, "loss": 0.0, "step": 12000 }, { "epoch": 0.7695267410542517, "eval_loss": NaN, "eval_runtime": 184.8489, "eval_samples_per_second": 37.988, "eval_steps_per_second": 0.595, "eval_wer": 1.0, "step": 12000 }, { "epoch": 0.8015903552648455, "grad_norm": NaN, "learning_rate": 0.000106875, "loss": 0.0, "step": 12500 }, { "epoch": 0.8015903552648455, "eval_loss": NaN, "eval_runtime": 184.8249, "eval_samples_per_second": 37.993, "eval_steps_per_second": 0.595, "eval_wer": 1.0, "step": 12500 }, { "epoch": 0.8336539694754392, "grad_norm": NaN, "learning_rate": 0.000106875, "loss": 0.0, "step": 13000 }, { "epoch": 0.8336539694754392, "eval_loss": NaN, "eval_runtime": 185.2964, "eval_samples_per_second": 37.896, "eval_steps_per_second": 0.594, "eval_wer": 1.0, "step": 13000 }, { "epoch": 0.8657175836860331, "grad_norm": NaN, "learning_rate": 0.000106875, "loss": 0.0, "step": 13500 }, { "epoch": 0.8657175836860331, "eval_loss": NaN, "eval_runtime": 184.7613, "eval_samples_per_second": 38.006, "eval_steps_per_second": 0.595, "eval_wer": 1.0, "step": 13500 }, { "epoch": 0.8977811978966269, "grad_norm": NaN, "learning_rate": 0.000106875, "loss": 0.0, "step": 14000 }, { "epoch": 0.8977811978966269, "eval_loss": NaN, "eval_runtime": 184.7837, "eval_samples_per_second": 38.001, "eval_steps_per_second": 0.595, "eval_wer": 1.0, "step": 14000 }, { "epoch": 0.9298448121072207, "grad_norm": NaN, "learning_rate": 0.000106875, "loss": 0.0, "step": 14500 }, { "epoch": 0.9298448121072207, "eval_loss": NaN, "eval_runtime": 184.6054, "eval_samples_per_second": 38.038, "eval_steps_per_second": 0.596, "eval_wer": 1.0, "step": 14500 }, { "epoch": 0.9619084263178146, "grad_norm": NaN, "learning_rate": 0.000106875, "loss": 0.0, "step": 15000 }, { "epoch": 0.9619084263178146, "eval_loss": NaN, "eval_runtime": 184.8182, "eval_samples_per_second": 37.994, "eval_steps_per_second": 0.595, "eval_wer": 1.0, "step": 15000 }, { "epoch": 0.9619084263178146, "step": 15000, "total_flos": 1.7109669148845115e+19, "train_loss": 0.5128920831044514, "train_runtime": 11433.8652, "train_samples_per_second": 10.495, "train_steps_per_second": 1.312 } ], "logging_steps": 500, "max_steps": 15000, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.7109669148845115e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }