|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 29.26829268292683, |
|
"global_step": 7200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 5.7155, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_loss": 3.039255142211914, |
|
"eval_runtime": 56.5805, |
|
"eval_samples_per_second": 8.271, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 0.0002869186046511628, |
|
"loss": 1.6883, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"eval_loss": 0.7244638800621033, |
|
"eval_runtime": 58.2783, |
|
"eval_samples_per_second": 8.03, |
|
"eval_wer": 0.5629911280101394, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 0.0002694767441860465, |
|
"loss": 0.3776, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"eval_loss": 0.6811308264732361, |
|
"eval_runtime": 57.5618, |
|
"eval_samples_per_second": 8.13, |
|
"eval_wer": 0.49150823827629914, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 0.00025203488372093025, |
|
"loss": 0.2425, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"eval_loss": 0.629974365234375, |
|
"eval_runtime": 57.7361, |
|
"eval_samples_per_second": 8.106, |
|
"eval_wer": 0.45449936628643856, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"learning_rate": 0.00023459302325581392, |
|
"loss": 0.1872, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"eval_loss": 0.7140640616416931, |
|
"eval_runtime": 58.4584, |
|
"eval_samples_per_second": 8.006, |
|
"eval_wer": 0.44283903675538655, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 0.00021715116279069765, |
|
"loss": 0.1565, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"eval_loss": 0.7015160322189331, |
|
"eval_runtime": 57.3174, |
|
"eval_samples_per_second": 8.165, |
|
"eval_wer": 0.4385297845373891, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 11.38, |
|
"learning_rate": 0.00019970930232558135, |
|
"loss": 0.1232, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 11.38, |
|
"eval_loss": 0.7360302805900574, |
|
"eval_runtime": 57.5205, |
|
"eval_samples_per_second": 8.136, |
|
"eval_wer": 0.4326996197718631, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"learning_rate": 0.00018226744186046508, |
|
"loss": 0.1131, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"eval_loss": 0.921540379524231, |
|
"eval_runtime": 57.6005, |
|
"eval_samples_per_second": 8.125, |
|
"eval_wer": 0.44714828897338404, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 14.63, |
|
"learning_rate": 0.0001648255813953488, |
|
"loss": 0.1004, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 14.63, |
|
"eval_loss": 0.8807307481765747, |
|
"eval_runtime": 57.8571, |
|
"eval_samples_per_second": 8.089, |
|
"eval_wer": 0.4187579214195184, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 16.26, |
|
"learning_rate": 0.00014738372093023254, |
|
"loss": 0.0851, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 16.26, |
|
"eval_loss": 0.7986534237861633, |
|
"eval_runtime": 57.6046, |
|
"eval_samples_per_second": 8.124, |
|
"eval_wer": 0.43067173637515843, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 17.89, |
|
"learning_rate": 0.00012994186046511627, |
|
"loss": 0.0763, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 17.89, |
|
"eval_loss": 0.8154810667037964, |
|
"eval_runtime": 57.7278, |
|
"eval_samples_per_second": 8.107, |
|
"eval_wer": 0.41698352344740175, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 19.51, |
|
"learning_rate": 0.0001125, |
|
"loss": 0.0664, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 19.51, |
|
"eval_loss": 0.8099461197853088, |
|
"eval_runtime": 57.6128, |
|
"eval_samples_per_second": 8.123, |
|
"eval_wer": 0.4124207858048162, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 21.14, |
|
"learning_rate": 9.505813953488371e-05, |
|
"loss": 0.0611, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 21.14, |
|
"eval_loss": 0.8145996928215027, |
|
"eval_runtime": 57.1919, |
|
"eval_samples_per_second": 8.183, |
|
"eval_wer": 0.4114068441064639, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 22.76, |
|
"learning_rate": 7.761627906976744e-05, |
|
"loss": 0.0573, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 22.76, |
|
"eval_loss": 0.8513393402099609, |
|
"eval_runtime": 58.2107, |
|
"eval_samples_per_second": 8.04, |
|
"eval_wer": 0.4126742712294043, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 24.39, |
|
"learning_rate": 6.0174418604651156e-05, |
|
"loss": 0.0531, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 24.39, |
|
"eval_loss": 0.8830446600914001, |
|
"eval_runtime": 57.6531, |
|
"eval_samples_per_second": 8.118, |
|
"eval_wer": 0.4050697084917617, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 26.02, |
|
"learning_rate": 4.273255813953488e-05, |
|
"loss": 0.0498, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 26.02, |
|
"eval_loss": 0.8851639628410339, |
|
"eval_runtime": 57.3676, |
|
"eval_samples_per_second": 8.158, |
|
"eval_wer": 0.40481622306717363, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 27.64, |
|
"learning_rate": 2.52906976744186e-05, |
|
"loss": 0.0444, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 27.64, |
|
"eval_loss": 0.936349093914032, |
|
"eval_runtime": 57.896, |
|
"eval_samples_per_second": 8.083, |
|
"eval_wer": 0.40887198986058304, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 29.27, |
|
"learning_rate": 7.848837209302325e-06, |
|
"loss": 0.0392, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 29.27, |
|
"eval_loss": 0.9341886043548584, |
|
"eval_runtime": 57.5969, |
|
"eval_samples_per_second": 8.125, |
|
"eval_wer": 0.40126742712294045, |
|
"step": 7200 |
|
} |
|
], |
|
"max_steps": 7380, |
|
"num_train_epochs": 30, |
|
"total_flos": 4.018481177702621e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|