|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 50.0, |
|
"global_step": 2550, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 7.361999999999999e-05, |
|
"loss": 10.1224, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_loss": 3.5429043769836426, |
|
"eval_runtime": 12.8681, |
|
"eval_samples_per_second": 26.5, |
|
"eval_steps_per_second": 3.342, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 7.062e-05, |
|
"loss": 3.2411, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"eval_loss": 3.1785788536071777, |
|
"eval_runtime": 12.8876, |
|
"eval_samples_per_second": 26.46, |
|
"eval_steps_per_second": 3.337, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 6.761999999999999e-05, |
|
"loss": 3.1283, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"eval_loss": 3.057115316390991, |
|
"eval_runtime": 12.9515, |
|
"eval_samples_per_second": 26.329, |
|
"eval_steps_per_second": 3.32, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 6.462e-05, |
|
"loss": 3.0044, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"eval_loss": 2.956029176712036, |
|
"eval_runtime": 13.0792, |
|
"eval_samples_per_second": 26.072, |
|
"eval_steps_per_second": 3.288, |
|
"eval_wer": 0.9996309963099631, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 6.162e-05, |
|
"loss": 2.9388, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"eval_loss": 2.897726535797119, |
|
"eval_runtime": 13.249, |
|
"eval_samples_per_second": 25.738, |
|
"eval_steps_per_second": 3.246, |
|
"eval_wer": 1.0011070110701108, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 11.76, |
|
"learning_rate": 5.861999999999999e-05, |
|
"loss": 2.86, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 11.76, |
|
"eval_loss": 2.694431781768799, |
|
"eval_runtime": 12.8219, |
|
"eval_samples_per_second": 26.595, |
|
"eval_steps_per_second": 3.354, |
|
"eval_wer": 0.9952029520295202, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 13.73, |
|
"learning_rate": 5.562e-05, |
|
"loss": 2.5538, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 13.73, |
|
"eval_loss": 2.0967445373535156, |
|
"eval_runtime": 12.8896, |
|
"eval_samples_per_second": 26.456, |
|
"eval_steps_per_second": 3.336, |
|
"eval_wer": 0.9435424354243542, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 15.69, |
|
"learning_rate": 5.2619999999999994e-05, |
|
"loss": 2.1214, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 15.69, |
|
"eval_loss": 1.4815592765808105, |
|
"eval_runtime": 12.9225, |
|
"eval_samples_per_second": 26.388, |
|
"eval_steps_per_second": 3.328, |
|
"eval_wer": 0.8428044280442805, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 17.65, |
|
"learning_rate": 4.9619999999999996e-05, |
|
"loss": 1.8136, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 17.65, |
|
"eval_loss": 1.245869517326355, |
|
"eval_runtime": 12.7527, |
|
"eval_samples_per_second": 26.739, |
|
"eval_steps_per_second": 3.372, |
|
"eval_wer": 0.8047970479704797, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 19.61, |
|
"learning_rate": 4.662e-05, |
|
"loss": 1.6795, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 19.61, |
|
"eval_loss": 1.1232149600982666, |
|
"eval_runtime": 12.9316, |
|
"eval_samples_per_second": 26.37, |
|
"eval_steps_per_second": 3.325, |
|
"eval_wer": 0.7649446494464944, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 21.57, |
|
"learning_rate": 4.362e-05, |
|
"loss": 1.5571, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 21.57, |
|
"eval_loss": 1.0510376691818237, |
|
"eval_runtime": 12.7516, |
|
"eval_samples_per_second": 26.742, |
|
"eval_steps_per_second": 3.372, |
|
"eval_wer": 0.7431734317343174, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 23.53, |
|
"learning_rate": 4.0619999999999994e-05, |
|
"loss": 1.4975, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 23.53, |
|
"eval_loss": 1.0297844409942627, |
|
"eval_runtime": 12.7119, |
|
"eval_samples_per_second": 26.825, |
|
"eval_steps_per_second": 3.383, |
|
"eval_wer": 0.696309963099631, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 25.49, |
|
"learning_rate": 3.762e-05, |
|
"loss": 1.4485, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 25.49, |
|
"eval_loss": 0.9775241017341614, |
|
"eval_runtime": 12.651, |
|
"eval_samples_per_second": 26.954, |
|
"eval_steps_per_second": 3.399, |
|
"eval_wer": 0.707380073800738, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 27.45, |
|
"learning_rate": 3.462e-05, |
|
"loss": 1.3924, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 27.45, |
|
"eval_loss": 0.9797706007957458, |
|
"eval_runtime": 12.6253, |
|
"eval_samples_per_second": 27.009, |
|
"eval_steps_per_second": 3.406, |
|
"eval_wer": 0.6955719557195572, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 29.41, |
|
"learning_rate": 3.161999999999999e-05, |
|
"loss": 1.3604, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 29.41, |
|
"eval_loss": 0.9344653487205505, |
|
"eval_runtime": 12.7334, |
|
"eval_samples_per_second": 26.78, |
|
"eval_steps_per_second": 3.377, |
|
"eval_wer": 0.7092250922509226, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 31.37, |
|
"learning_rate": 2.8619999999999997e-05, |
|
"loss": 1.3224, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 31.37, |
|
"eval_loss": 0.9535229802131653, |
|
"eval_runtime": 12.7291, |
|
"eval_samples_per_second": 26.789, |
|
"eval_steps_per_second": 3.378, |
|
"eval_wer": 0.6830258302583025, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 2.562e-05, |
|
"loss": 1.2816, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"eval_loss": 0.9177776575088501, |
|
"eval_runtime": 12.8107, |
|
"eval_samples_per_second": 26.618, |
|
"eval_steps_per_second": 3.357, |
|
"eval_wer": 0.6678966789667896, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 35.29, |
|
"learning_rate": 2.2619999999999997e-05, |
|
"loss": 1.2623, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 35.29, |
|
"eval_loss": 0.924865186214447, |
|
"eval_runtime": 12.5463, |
|
"eval_samples_per_second": 27.179, |
|
"eval_steps_per_second": 3.427, |
|
"eval_wer": 0.6678966789667896, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 37.25, |
|
"learning_rate": 1.962e-05, |
|
"loss": 1.2421, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 37.25, |
|
"eval_loss": 0.9123861193656921, |
|
"eval_runtime": 12.7025, |
|
"eval_samples_per_second": 26.845, |
|
"eval_steps_per_second": 3.385, |
|
"eval_wer": 0.6734317343173432, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 39.22, |
|
"learning_rate": 1.6619999999999997e-05, |
|
"loss": 1.2208, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 39.22, |
|
"eval_loss": 0.8961586356163025, |
|
"eval_runtime": 12.5233, |
|
"eval_samples_per_second": 27.229, |
|
"eval_steps_per_second": 3.434, |
|
"eval_wer": 0.666420664206642, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 41.18, |
|
"learning_rate": 1.362e-05, |
|
"loss": 1.2145, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 41.18, |
|
"eval_loss": 0.8903014063835144, |
|
"eval_runtime": 12.6636, |
|
"eval_samples_per_second": 26.928, |
|
"eval_steps_per_second": 3.396, |
|
"eval_wer": 0.6734317343173432, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 43.14, |
|
"learning_rate": 1.062e-05, |
|
"loss": 1.1888, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 43.14, |
|
"eval_loss": 0.8883377313613892, |
|
"eval_runtime": 12.7667, |
|
"eval_samples_per_second": 26.71, |
|
"eval_steps_per_second": 3.368, |
|
"eval_wer": 0.6708487084870849, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 45.1, |
|
"learning_rate": 7.619999999999999e-06, |
|
"loss": 1.1933, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 45.1, |
|
"eval_loss": 0.8928494453430176, |
|
"eval_runtime": 12.6877, |
|
"eval_samples_per_second": 26.876, |
|
"eval_steps_per_second": 3.389, |
|
"eval_wer": 0.6723247232472325, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 47.06, |
|
"learning_rate": 4.62e-06, |
|
"loss": 1.1838, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 47.06, |
|
"eval_loss": 0.8868067860603333, |
|
"eval_runtime": 12.5901, |
|
"eval_samples_per_second": 27.085, |
|
"eval_steps_per_second": 3.415, |
|
"eval_wer": 0.6678966789667896, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 49.02, |
|
"learning_rate": 1.62e-06, |
|
"loss": 1.1634, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 49.02, |
|
"eval_loss": 0.888584554195404, |
|
"eval_runtime": 12.7157, |
|
"eval_samples_per_second": 26.817, |
|
"eval_steps_per_second": 3.382, |
|
"eval_wer": 0.6656826568265682, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"step": 2550, |
|
"total_flos": 4.920553620909019e+18, |
|
"train_loss": 2.1006999116785385, |
|
"train_runtime": 2771.7539, |
|
"train_samples_per_second": 14.612, |
|
"train_steps_per_second": 0.92 |
|
} |
|
], |
|
"max_steps": 2550, |
|
"num_train_epochs": 50, |
|
"total_flos": 4.920553620909019e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|