|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 27.11864406779661, |
|
"global_step": 8000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 4.94e-05, |
|
"loss": 6.7713, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_loss": 2.9682114124298096, |
|
"eval_runtime": 24.5806, |
|
"eval_samples_per_second": 15.663, |
|
"eval_steps_per_second": 1.993, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 9.94e-05, |
|
"loss": 1.7717, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"eval_loss": 1.053725004196167, |
|
"eval_runtime": 25.0326, |
|
"eval_samples_per_second": 15.38, |
|
"eval_steps_per_second": 1.957, |
|
"eval_wer": 0.7996899224806202, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 9.370700636942676e-05, |
|
"loss": 0.8188, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"eval_loss": 0.723295271396637, |
|
"eval_runtime": 24.8888, |
|
"eval_samples_per_second": 15.469, |
|
"eval_steps_per_second": 1.969, |
|
"eval_wer": 0.6505426356589147, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 8.73375796178344e-05, |
|
"loss": 0.5633, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"eval_loss": 0.6577075719833374, |
|
"eval_runtime": 24.4626, |
|
"eval_samples_per_second": 15.738, |
|
"eval_steps_per_second": 2.003, |
|
"eval_wer": 0.6083720930232558, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 8.096815286624205e-05, |
|
"loss": 0.4201, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"eval_loss": 0.6801705360412598, |
|
"eval_runtime": 23.9758, |
|
"eval_samples_per_second": 16.058, |
|
"eval_steps_per_second": 2.044, |
|
"eval_wer": 0.5937984496124031, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"learning_rate": 7.459872611464968e-05, |
|
"loss": 0.3465, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"eval_loss": 0.734240710735321, |
|
"eval_runtime": 24.3607, |
|
"eval_samples_per_second": 15.804, |
|
"eval_steps_per_second": 2.011, |
|
"eval_wer": 0.5792248062015504, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 6.822929936305733e-05, |
|
"loss": 0.2812, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"eval_loss": 0.654410183429718, |
|
"eval_runtime": 25.1886, |
|
"eval_samples_per_second": 15.285, |
|
"eval_steps_per_second": 1.945, |
|
"eval_wer": 0.56, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"learning_rate": 6.185987261146497e-05, |
|
"loss": 0.2362, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"eval_loss": 0.6740626692771912, |
|
"eval_runtime": 24.8977, |
|
"eval_samples_per_second": 15.463, |
|
"eval_steps_per_second": 1.968, |
|
"eval_wer": 0.5311627906976745, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 15.25, |
|
"learning_rate": 5.549044585987262e-05, |
|
"loss": 0.2042, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 15.25, |
|
"eval_loss": 0.7329800724983215, |
|
"eval_runtime": 24.5766, |
|
"eval_samples_per_second": 15.665, |
|
"eval_steps_per_second": 1.994, |
|
"eval_wer": 0.5221705426356589, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"learning_rate": 4.912101910828026e-05, |
|
"loss": 0.1881, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"eval_loss": 0.7085126042366028, |
|
"eval_runtime": 25.0265, |
|
"eval_samples_per_second": 15.384, |
|
"eval_steps_per_second": 1.958, |
|
"eval_wer": 0.5184496124031007, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 18.64, |
|
"learning_rate": 4.27515923566879e-05, |
|
"loss": 0.1632, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 18.64, |
|
"eval_loss": 0.6873669624328613, |
|
"eval_runtime": 24.3859, |
|
"eval_samples_per_second": 15.788, |
|
"eval_steps_per_second": 2.009, |
|
"eval_wer": 0.5255813953488372, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 20.34, |
|
"learning_rate": 3.638216560509554e-05, |
|
"loss": 0.1502, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 20.34, |
|
"eval_loss": 0.7731661200523376, |
|
"eval_runtime": 25.086, |
|
"eval_samples_per_second": 15.347, |
|
"eval_steps_per_second": 1.953, |
|
"eval_wer": 0.5193798449612403, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 22.03, |
|
"learning_rate": 3.0012738853503187e-05, |
|
"loss": 0.1338, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 22.03, |
|
"eval_loss": 0.7070124745368958, |
|
"eval_runtime": 24.9391, |
|
"eval_samples_per_second": 15.438, |
|
"eval_steps_per_second": 1.965, |
|
"eval_wer": 0.5041860465116279, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 23.73, |
|
"learning_rate": 2.3656050955414013e-05, |
|
"loss": 0.1295, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 23.73, |
|
"eval_loss": 0.7655993103981018, |
|
"eval_runtime": 24.8534, |
|
"eval_samples_per_second": 15.491, |
|
"eval_steps_per_second": 1.972, |
|
"eval_wer": 0.4951937984496124, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 25.42, |
|
"learning_rate": 1.7286624203821657e-05, |
|
"loss": 0.1143, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 25.42, |
|
"eval_loss": 0.7407109141349792, |
|
"eval_runtime": 24.926, |
|
"eval_samples_per_second": 15.446, |
|
"eval_steps_per_second": 1.966, |
|
"eval_wer": 0.4951937984496124, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 27.12, |
|
"learning_rate": 1.09171974522293e-05, |
|
"loss": 0.104, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 27.12, |
|
"eval_loss": 0.7474448680877686, |
|
"eval_runtime": 25.1716, |
|
"eval_samples_per_second": 15.295, |
|
"eval_steps_per_second": 1.947, |
|
"eval_wer": 0.4846511627906977, |
|
"step": 8000 |
|
} |
|
], |
|
"max_steps": 8850, |
|
"num_train_epochs": 30, |
|
"total_flos": 2.5455023655397975e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|