|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 14.936708860759493, |
|
"eval_steps": 100, |
|
"global_step": 1770, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 4.394069194793701, |
|
"eval_runtime": 118.8089, |
|
"eval_samples_per_second": 14.99, |
|
"eval_steps_per_second": 1.877, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_loss": 3.200528383255005, |
|
"eval_runtime": 116.974, |
|
"eval_samples_per_second": 15.226, |
|
"eval_steps_per_second": 1.906, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_loss": 2.7843868732452393, |
|
"eval_runtime": 118.9835, |
|
"eval_samples_per_second": 14.968, |
|
"eval_steps_per_second": 1.874, |
|
"eval_wer": 1.0144723900519883, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"eval_loss": 0.8691409230232239, |
|
"eval_runtime": 118.7268, |
|
"eval_samples_per_second": 15.001, |
|
"eval_steps_per_second": 1.878, |
|
"eval_wer": 1.000281017282563, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 0.0002988, |
|
"loss": 4.317, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"eval_loss": 0.684583306312561, |
|
"eval_runtime": 119.9053, |
|
"eval_samples_per_second": 14.853, |
|
"eval_steps_per_second": 1.86, |
|
"eval_wer": 0.8393986230153154, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"eval_loss": 0.6270079612731934, |
|
"eval_runtime": 120.1998, |
|
"eval_samples_per_second": 14.817, |
|
"eval_steps_per_second": 1.855, |
|
"eval_wer": 0.7789799072642968, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"eval_loss": 0.593498945236206, |
|
"eval_runtime": 120.001, |
|
"eval_samples_per_second": 14.842, |
|
"eval_steps_per_second": 1.858, |
|
"eval_wer": 0.7802444850358297, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"eval_loss": 0.570054829120636, |
|
"eval_runtime": 120.3712, |
|
"eval_samples_per_second": 14.796, |
|
"eval_steps_per_second": 1.853, |
|
"eval_wer": 0.7812280455247997, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"eval_loss": 0.564914345741272, |
|
"eval_runtime": 122.0077, |
|
"eval_samples_per_second": 14.597, |
|
"eval_steps_per_second": 1.828, |
|
"eval_wer": 0.7890965294365604, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 0.00018236220472440942, |
|
"loss": 0.3656, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"eval_loss": 0.6092184782028198, |
|
"eval_runtime": 120.3224, |
|
"eval_samples_per_second": 14.802, |
|
"eval_steps_per_second": 1.853, |
|
"eval_wer": 0.8177602922579739, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"eval_loss": 0.6092739701271057, |
|
"eval_runtime": 120.225, |
|
"eval_samples_per_second": 14.814, |
|
"eval_steps_per_second": 1.855, |
|
"eval_wer": 0.7720949838415062, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 10.13, |
|
"eval_loss": 0.6154232621192932, |
|
"eval_runtime": 120.1186, |
|
"eval_samples_per_second": 14.827, |
|
"eval_steps_per_second": 1.856, |
|
"eval_wer": 0.7286778136855416, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"eval_loss": 0.6283690333366394, |
|
"eval_runtime": 122.0867, |
|
"eval_samples_per_second": 14.588, |
|
"eval_steps_per_second": 1.827, |
|
"eval_wer": 0.7407615568357454, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 11.81, |
|
"eval_loss": 0.6343453526496887, |
|
"eval_runtime": 120.4977, |
|
"eval_samples_per_second": 14.78, |
|
"eval_steps_per_second": 1.851, |
|
"eval_wer": 0.7143459322748349, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 12.66, |
|
"learning_rate": 6.425196850393701e-05, |
|
"loss": 0.1681, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 12.66, |
|
"eval_loss": 0.6523498296737671, |
|
"eval_runtime": 119.5948, |
|
"eval_samples_per_second": 14.892, |
|
"eval_steps_per_second": 1.865, |
|
"eval_wer": 0.7362652803147394, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"eval_loss": 0.6542894244194031, |
|
"eval_runtime": 120.5721, |
|
"eval_samples_per_second": 14.771, |
|
"eval_steps_per_second": 1.85, |
|
"eval_wer": 0.7139244063509906, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"eval_loss": 0.6598775386810303, |
|
"eval_runtime": 124.634, |
|
"eval_samples_per_second": 14.29, |
|
"eval_steps_per_second": 1.789, |
|
"eval_wer": 0.7094281298299846, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 14.94, |
|
"step": 1770, |
|
"total_flos": 7.546311277696558e+18, |
|
"train_loss": 1.3876537419981876, |
|
"train_runtime": 7307.918, |
|
"train_samples_per_second": 7.775, |
|
"train_steps_per_second": 0.242 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1770, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 400, |
|
"total_flos": 7.546311277696558e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|