|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 6.191950464396285, |
|
"eval_steps": 100, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.30959752321981426, |
|
"eval_loss": 3.562331438064575, |
|
"eval_runtime": 151.987, |
|
"eval_samples_per_second": 37.214, |
|
"eval_steps_per_second": 4.652, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6191950464396285, |
|
"eval_loss": 3.1175434589385986, |
|
"eval_runtime": 149.1497, |
|
"eval_samples_per_second": 37.922, |
|
"eval_steps_per_second": 4.74, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.9287925696594427, |
|
"eval_loss": 3.293273687362671, |
|
"eval_runtime": 149.8183, |
|
"eval_samples_per_second": 37.752, |
|
"eval_steps_per_second": 4.719, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.238390092879257, |
|
"eval_loss": 0.9764726161956787, |
|
"eval_runtime": 149.5566, |
|
"eval_samples_per_second": 37.818, |
|
"eval_steps_per_second": 4.727, |
|
"eval_wer": 0.6866203399078814, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.5479876160990713, |
|
"grad_norm": 0.48750218749046326, |
|
"learning_rate": 0.00029519999999999997, |
|
"loss": 3.7398, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.5479876160990713, |
|
"eval_loss": 0.6787042617797852, |
|
"eval_runtime": 150.878, |
|
"eval_samples_per_second": 37.487, |
|
"eval_steps_per_second": 4.686, |
|
"eval_wer": 0.492176341255958, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.8575851393188856, |
|
"eval_loss": 0.6015087366104126, |
|
"eval_runtime": 150.6376, |
|
"eval_samples_per_second": 37.547, |
|
"eval_steps_per_second": 4.693, |
|
"eval_wer": 0.43465840702283703, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.1671826625387, |
|
"eval_loss": 0.5698955059051514, |
|
"eval_runtime": 150.4703, |
|
"eval_samples_per_second": 37.589, |
|
"eval_steps_per_second": 4.699, |
|
"eval_wer": 0.42732422846688384, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.476780185758514, |
|
"eval_loss": 0.5419376492500305, |
|
"eval_runtime": 150.5345, |
|
"eval_samples_per_second": 37.573, |
|
"eval_steps_per_second": 4.697, |
|
"eval_wer": 0.39578886552936077, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.7863777089783284, |
|
"eval_loss": 0.49708282947540283, |
|
"eval_runtime": 150.3649, |
|
"eval_samples_per_second": 37.615, |
|
"eval_steps_per_second": 4.702, |
|
"eval_wer": 0.3729839033236507, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.0959752321981426, |
|
"grad_norm": 0.4975910186767578, |
|
"learning_rate": 0.0002024, |
|
"loss": 0.5228, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0959752321981426, |
|
"eval_loss": 0.49601131677627563, |
|
"eval_runtime": 150.6812, |
|
"eval_samples_per_second": 37.536, |
|
"eval_steps_per_second": 4.692, |
|
"eval_wer": 0.354303413522492, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.405572755417957, |
|
"eval_loss": 0.6976510286331177, |
|
"eval_runtime": 149.5565, |
|
"eval_samples_per_second": 37.818, |
|
"eval_steps_per_second": 4.727, |
|
"eval_wer": 0.4655036831378087, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.715170278637771, |
|
"eval_loss": 0.799861490726471, |
|
"eval_runtime": 151.2369, |
|
"eval_samples_per_second": 37.398, |
|
"eval_steps_per_second": 4.675, |
|
"eval_wer": 0.5316396783874436, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.024767801857585, |
|
"eval_loss": 1.0526387691497803, |
|
"eval_runtime": 150.3745, |
|
"eval_samples_per_second": 37.613, |
|
"eval_steps_per_second": 4.702, |
|
"eval_wer": 0.6214633050344242, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.3343653250774, |
|
"eval_loss": 1.1285134553909302, |
|
"eval_runtime": 150.4438, |
|
"eval_samples_per_second": 37.595, |
|
"eval_steps_per_second": 4.699, |
|
"eval_wer": 0.7571857296464509, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.643962848297214, |
|
"grad_norm": 3.2222559452056885, |
|
"learning_rate": 0.00010359999999999998, |
|
"loss": 0.9047, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.643962848297214, |
|
"eval_loss": 1.1592659950256348, |
|
"eval_runtime": 150.3382, |
|
"eval_samples_per_second": 37.622, |
|
"eval_steps_per_second": 4.703, |
|
"eval_wer": 0.7047070340710307, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.953560371517028, |
|
"eval_loss": 2.0400760173797607, |
|
"eval_runtime": 150.5385, |
|
"eval_samples_per_second": 37.572, |
|
"eval_steps_per_second": 4.696, |
|
"eval_wer": 0.966747444271477, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 5.2631578947368425, |
|
"eval_loss": 1.6264142990112305, |
|
"eval_runtime": 151.1274, |
|
"eval_samples_per_second": 37.425, |
|
"eval_steps_per_second": 4.678, |
|
"eval_wer": 0.8680489801158704, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 5.572755417956657, |
|
"eval_loss": 1.5916314125061035, |
|
"eval_runtime": 151.1725, |
|
"eval_samples_per_second": 37.414, |
|
"eval_steps_per_second": 4.677, |
|
"eval_wer": 0.8627369164353004, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.882352941176471, |
|
"eval_loss": 1.5764083862304688, |
|
"eval_runtime": 150.5972, |
|
"eval_samples_per_second": 37.557, |
|
"eval_steps_per_second": 4.695, |
|
"eval_wer": 0.8720129672128517, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 6.191950464396285, |
|
"grad_norm": 0.7321383953094482, |
|
"learning_rate": 4.399999999999999e-06, |
|
"loss": 1.6715, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.191950464396285, |
|
"eval_loss": 1.5562535524368286, |
|
"eval_runtime": 151.1236, |
|
"eval_samples_per_second": 37.426, |
|
"eval_steps_per_second": 4.678, |
|
"eval_wer": 0.8702315802988236, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.191950464396285, |
|
"step": 2000, |
|
"total_flos": 3.641759707440775e+19, |
|
"train_loss": 1.7097147216796875, |
|
"train_runtime": 12239.4176, |
|
"train_samples_per_second": 20.916, |
|
"train_steps_per_second": 0.163 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 7, |
|
"save_steps": 400, |
|
"total_flos": 3.641759707440775e+19, |
|
"train_batch_size": 128, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|