|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9619084263178146, |
|
"eval_steps": 500, |
|
"global_step": 15000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03206361421059382, |
|
"grad_norm": 9.123078346252441, |
|
"learning_rate": 0.0002465, |
|
"loss": 4.6156, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03206361421059382, |
|
"eval_loss": 1.5867419242858887, |
|
"eval_runtime": 188.7881, |
|
"eval_samples_per_second": 37.195, |
|
"eval_steps_per_second": 0.583, |
|
"eval_wer": 0.9176576887814082, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.06412722842118763, |
|
"grad_norm": 6.654547691345215, |
|
"learning_rate": 0.00029181249999999997, |
|
"loss": 1.0315, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.06412722842118763, |
|
"eval_loss": 1.1748294830322266, |
|
"eval_runtime": 190.3404, |
|
"eval_samples_per_second": 36.892, |
|
"eval_steps_per_second": 0.578, |
|
"eval_wer": 0.7888358867377988, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.09619084263178146, |
|
"grad_norm": 6.171149253845215, |
|
"learning_rate": 0.0002813958333333333, |
|
"loss": 0.834, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09619084263178146, |
|
"eval_loss": 1.0392996072769165, |
|
"eval_runtime": 189.2832, |
|
"eval_samples_per_second": 37.098, |
|
"eval_steps_per_second": 0.581, |
|
"eval_wer": 0.7219867391275462, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.12825445684237527, |
|
"grad_norm": 6.896900177001953, |
|
"learning_rate": 0.00027097916666666666, |
|
"loss": 0.7184, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12825445684237527, |
|
"eval_loss": 0.9616143703460693, |
|
"eval_runtime": 190.9944, |
|
"eval_samples_per_second": 36.765, |
|
"eval_steps_per_second": 0.576, |
|
"eval_wer": 0.663747857399115, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.16031807105296908, |
|
"grad_norm": 9.408955574035645, |
|
"learning_rate": 0.0002605625, |
|
"loss": 0.6655, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.16031807105296908, |
|
"eval_loss": 0.9033711552619934, |
|
"eval_runtime": 190.9851, |
|
"eval_samples_per_second": 36.767, |
|
"eval_steps_per_second": 0.576, |
|
"eval_wer": 0.6331335787081944, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.19238168526356292, |
|
"grad_norm": 6.4334211349487305, |
|
"learning_rate": 0.0002501458333333333, |
|
"loss": 0.6193, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.19238168526356292, |
|
"eval_loss": 0.8614802956581116, |
|
"eval_runtime": 191.2463, |
|
"eval_samples_per_second": 36.717, |
|
"eval_steps_per_second": 0.575, |
|
"eval_wer": 0.6238988028009939, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.22444529947415673, |
|
"grad_norm": 3.711681365966797, |
|
"learning_rate": 0.00023972916666666665, |
|
"loss": 0.5952, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.22444529947415673, |
|
"eval_loss": 0.8161324858665466, |
|
"eval_runtime": 191.2031, |
|
"eval_samples_per_second": 36.725, |
|
"eval_steps_per_second": 0.575, |
|
"eval_wer": 0.5866275129884798, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.25650891368475054, |
|
"grad_norm": 7.527787208557129, |
|
"learning_rate": 0.00022933333333333332, |
|
"loss": 0.5622, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.25650891368475054, |
|
"eval_loss": 0.811023473739624, |
|
"eval_runtime": 190.6985, |
|
"eval_samples_per_second": 36.823, |
|
"eval_steps_per_second": 0.577, |
|
"eval_wer": 0.5850728816487065, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.2885725278953444, |
|
"grad_norm": 11.801218032836914, |
|
"learning_rate": 0.0002189583333333333, |
|
"loss": 0.5341, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.2885725278953444, |
|
"eval_loss": 0.757978618144989, |
|
"eval_runtime": 192.268, |
|
"eval_samples_per_second": 36.522, |
|
"eval_steps_per_second": 0.572, |
|
"eval_wer": 0.5546579146680132, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.32063614210593816, |
|
"grad_norm": 9.381750106811523, |
|
"learning_rate": 0.00020854166666666664, |
|
"loss": 0.522, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.32063614210593816, |
|
"eval_loss": 0.7397128343582153, |
|
"eval_runtime": 191.0373, |
|
"eval_samples_per_second": 36.757, |
|
"eval_steps_per_second": 0.576, |
|
"eval_wer": 0.5411711556092959, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.352699756316532, |
|
"grad_norm": 6.341240882873535, |
|
"learning_rate": 0.00019812499999999998, |
|
"loss": 0.5123, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.352699756316532, |
|
"eval_loss": 0.7228623628616333, |
|
"eval_runtime": 191.6536, |
|
"eval_samples_per_second": 36.639, |
|
"eval_steps_per_second": 0.574, |
|
"eval_wer": 0.531737067991868, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.38476337052712584, |
|
"grad_norm": 6.53903341293335, |
|
"learning_rate": 0.00018772916666666666, |
|
"loss": 0.4884, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.38476337052712584, |
|
"eval_loss": 0.72346431016922, |
|
"eval_runtime": 191.4082, |
|
"eval_samples_per_second": 36.686, |
|
"eval_steps_per_second": 0.575, |
|
"eval_wer": 0.5164830784358017, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.4168269847377196, |
|
"grad_norm": 10.402660369873047, |
|
"learning_rate": 0.00017731249999999998, |
|
"loss": 0.4658, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.4168269847377196, |
|
"eval_loss": 0.681357204914093, |
|
"eval_runtime": 191.0697, |
|
"eval_samples_per_second": 36.751, |
|
"eval_steps_per_second": 0.576, |
|
"eval_wer": 0.5116995973903453, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.44889059894831346, |
|
"grad_norm": 11.663326263427734, |
|
"learning_rate": 0.00016691666666666667, |
|
"loss": 0.4471, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.44889059894831346, |
|
"eval_loss": 0.662290632724762, |
|
"eval_runtime": 191.4867, |
|
"eval_samples_per_second": 36.671, |
|
"eval_steps_per_second": 0.574, |
|
"eval_wer": 0.4890577871085186, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.4809542131589073, |
|
"grad_norm": 7.363061428070068, |
|
"learning_rate": 0.00015649999999999998, |
|
"loss": 0.4338, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.4809542131589073, |
|
"eval_loss": 0.6449915170669556, |
|
"eval_runtime": 190.9868, |
|
"eval_samples_per_second": 36.767, |
|
"eval_steps_per_second": 0.576, |
|
"eval_wer": 0.4913830903945043, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.5130178273695011, |
|
"grad_norm": 14.478469848632812, |
|
"learning_rate": 0.00014610416666666667, |
|
"loss": 0.4267, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.5130178273695011, |
|
"eval_loss": 0.6256160736083984, |
|
"eval_runtime": 190.8261, |
|
"eval_samples_per_second": 36.798, |
|
"eval_steps_per_second": 0.576, |
|
"eval_wer": 0.4685419684024502, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.5450814415800949, |
|
"grad_norm": 10.456161499023438, |
|
"learning_rate": 0.00013568749999999998, |
|
"loss": 0.4283, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.5450814415800949, |
|
"eval_loss": 0.6342806816101074, |
|
"eval_runtime": 190.609, |
|
"eval_samples_per_second": 36.84, |
|
"eval_steps_per_second": 0.577, |
|
"eval_wer": 0.4710665833986633, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.5771450557906888, |
|
"grad_norm": 9.847672462463379, |
|
"learning_rate": 0.00012527083333333333, |
|
"loss": 0.4131, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.5771450557906888, |
|
"eval_loss": 0.5988845229148865, |
|
"eval_runtime": 189.2404, |
|
"eval_samples_per_second": 37.106, |
|
"eval_steps_per_second": 0.581, |
|
"eval_wer": 0.4486506597217608, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.6092086700012825, |
|
"grad_norm": 7.610143661499023, |
|
"learning_rate": 0.00011485416666666666, |
|
"loss": 0.4317, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.6092086700012825, |
|
"eval_loss": 0.7167520523071289, |
|
"eval_runtime": 189.8256, |
|
"eval_samples_per_second": 36.992, |
|
"eval_steps_per_second": 0.579, |
|
"eval_wer": 0.4919677380778379, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.6412722842118763, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.00010691666666666665, |
|
"loss": 0.5904, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.6412722842118763, |
|
"eval_loss": NaN, |
|
"eval_runtime": 190.1563, |
|
"eval_samples_per_second": 36.928, |
|
"eval_steps_per_second": 0.578, |
|
"eval_wer": 0.7309956284298224, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.6733358984224702, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.000106875, |
|
"loss": 0.0513, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.6733358984224702, |
|
"eval_loss": NaN, |
|
"eval_runtime": 185.5416, |
|
"eval_samples_per_second": 37.846, |
|
"eval_steps_per_second": 0.593, |
|
"eval_wer": 1.0, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.705399512633064, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.000106875, |
|
"loss": 0.0, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.705399512633064, |
|
"eval_loss": NaN, |
|
"eval_runtime": 185.2695, |
|
"eval_samples_per_second": 37.902, |
|
"eval_steps_per_second": 0.594, |
|
"eval_wer": 1.0, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.7374631268436578, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.000106875, |
|
"loss": 0.0, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.7374631268436578, |
|
"eval_loss": NaN, |
|
"eval_runtime": 185.1794, |
|
"eval_samples_per_second": 37.92, |
|
"eval_steps_per_second": 0.594, |
|
"eval_wer": 1.0, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.7695267410542517, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.000106875, |
|
"loss": 0.0, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.7695267410542517, |
|
"eval_loss": NaN, |
|
"eval_runtime": 184.8489, |
|
"eval_samples_per_second": 37.988, |
|
"eval_steps_per_second": 0.595, |
|
"eval_wer": 1.0, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.8015903552648455, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.000106875, |
|
"loss": 0.0, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.8015903552648455, |
|
"eval_loss": NaN, |
|
"eval_runtime": 184.8249, |
|
"eval_samples_per_second": 37.993, |
|
"eval_steps_per_second": 0.595, |
|
"eval_wer": 1.0, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.8336539694754392, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.000106875, |
|
"loss": 0.0, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.8336539694754392, |
|
"eval_loss": NaN, |
|
"eval_runtime": 185.2964, |
|
"eval_samples_per_second": 37.896, |
|
"eval_steps_per_second": 0.594, |
|
"eval_wer": 1.0, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.8657175836860331, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.000106875, |
|
"loss": 0.0, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.8657175836860331, |
|
"eval_loss": NaN, |
|
"eval_runtime": 184.7613, |
|
"eval_samples_per_second": 38.006, |
|
"eval_steps_per_second": 0.595, |
|
"eval_wer": 1.0, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.8977811978966269, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.000106875, |
|
"loss": 0.0, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.8977811978966269, |
|
"eval_loss": NaN, |
|
"eval_runtime": 184.7837, |
|
"eval_samples_per_second": 38.001, |
|
"eval_steps_per_second": 0.595, |
|
"eval_wer": 1.0, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.9298448121072207, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.000106875, |
|
"loss": 0.0, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.9298448121072207, |
|
"eval_loss": NaN, |
|
"eval_runtime": 184.6054, |
|
"eval_samples_per_second": 38.038, |
|
"eval_steps_per_second": 0.596, |
|
"eval_wer": 1.0, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.9619084263178146, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.000106875, |
|
"loss": 0.0, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.9619084263178146, |
|
"eval_loss": NaN, |
|
"eval_runtime": 184.8182, |
|
"eval_samples_per_second": 37.994, |
|
"eval_steps_per_second": 0.595, |
|
"eval_wer": 1.0, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.9619084263178146, |
|
"step": 15000, |
|
"total_flos": 1.7109669148845115e+19, |
|
"train_loss": 0.5128920831044514, |
|
"train_runtime": 11433.8652, |
|
"train_samples_per_second": 10.495, |
|
"train_steps_per_second": 1.312 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 15000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.7109669148845115e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|