|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.719665271966527, |
|
"eval_steps": 100, |
|
"global_step": 2600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.10460251046025104, |
|
"eval_loss": 3.505904197692871, |
|
"eval_runtime": 155.3669, |
|
"eval_samples_per_second": 36.404, |
|
"eval_steps_per_second": 4.551, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.20920502092050208, |
|
"eval_loss": 3.2686526775360107, |
|
"eval_runtime": 151.6355, |
|
"eval_samples_per_second": 37.3, |
|
"eval_steps_per_second": 4.662, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3138075313807531, |
|
"eval_loss": 2.7369139194488525, |
|
"eval_runtime": 151.6605, |
|
"eval_samples_per_second": 37.294, |
|
"eval_steps_per_second": 4.662, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.41841004184100417, |
|
"eval_loss": 1.303921103477478, |
|
"eval_runtime": 151.8516, |
|
"eval_samples_per_second": 37.247, |
|
"eval_steps_per_second": 4.656, |
|
"eval_wer": 0.8544558745646836, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5230125523012552, |
|
"grad_norm": 2.7574479579925537, |
|
"learning_rate": 0.0002958, |
|
"loss": 3.5426, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5230125523012552, |
|
"eval_loss": 1.051817536354065, |
|
"eval_runtime": 152.6837, |
|
"eval_samples_per_second": 37.044, |
|
"eval_steps_per_second": 4.63, |
|
"eval_wer": 0.7738120075107124, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6276150627615062, |
|
"eval_loss": 0.839580774307251, |
|
"eval_runtime": 153.5079, |
|
"eval_samples_per_second": 36.845, |
|
"eval_steps_per_second": 4.606, |
|
"eval_wer": 0.6148994559548073, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.7322175732217573, |
|
"eval_loss": 0.7548021674156189, |
|
"eval_runtime": 152.4899, |
|
"eval_samples_per_second": 37.091, |
|
"eval_steps_per_second": 4.636, |
|
"eval_wer": 0.5686636388438638, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.8368200836820083, |
|
"eval_loss": 0.6868751645088196, |
|
"eval_runtime": 153.2403, |
|
"eval_samples_per_second": 36.909, |
|
"eval_steps_per_second": 4.614, |
|
"eval_wer": 0.5185119802282101, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.9414225941422594, |
|
"eval_loss": 0.6501098871231079, |
|
"eval_runtime": 152.6877, |
|
"eval_samples_per_second": 37.043, |
|
"eval_steps_per_second": 4.63, |
|
"eval_wer": 0.4895283336810515, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.0460251046025104, |
|
"grad_norm": 1.2487365007400513, |
|
"learning_rate": 0.00022957142857142856, |
|
"loss": 0.7785, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.0460251046025104, |
|
"eval_loss": 0.5817079544067383, |
|
"eval_runtime": 153.4094, |
|
"eval_samples_per_second": 36.869, |
|
"eval_steps_per_second": 4.609, |
|
"eval_wer": 0.4500649965495659, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.1506276150627615, |
|
"eval_loss": 0.5646732449531555, |
|
"eval_runtime": 152.9118, |
|
"eval_samples_per_second": 36.989, |
|
"eval_steps_per_second": 4.624, |
|
"eval_wer": 0.4288006932965287, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.2552301255230125, |
|
"eval_loss": 0.5424243807792664, |
|
"eval_runtime": 152.6245, |
|
"eval_samples_per_second": 37.058, |
|
"eval_steps_per_second": 4.632, |
|
"eval_wer": 0.42786987851262215, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.3598326359832635, |
|
"eval_loss": 0.5264406800270081, |
|
"eval_runtime": 153.2158, |
|
"eval_samples_per_second": 36.915, |
|
"eval_steps_per_second": 4.614, |
|
"eval_wer": 0.4050809648376691, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.4644351464435146, |
|
"eval_loss": 0.5098912715911865, |
|
"eval_runtime": 153.9853, |
|
"eval_samples_per_second": 36.731, |
|
"eval_steps_per_second": 4.591, |
|
"eval_wer": 0.3977467862817159, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.5690376569037658, |
|
"grad_norm": 0.9644107222557068, |
|
"learning_rate": 0.0001582857142857143, |
|
"loss": 0.5795, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.5690376569037658, |
|
"eval_loss": 0.5057937502861023, |
|
"eval_runtime": 153.7283, |
|
"eval_samples_per_second": 36.792, |
|
"eval_steps_per_second": 4.599, |
|
"eval_wer": 0.3952913610758935, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.6736401673640167, |
|
"eval_loss": 0.48043006658554077, |
|
"eval_runtime": 153.7864, |
|
"eval_samples_per_second": 36.778, |
|
"eval_steps_per_second": 4.597, |
|
"eval_wer": 0.37892185970374415, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.778242677824268, |
|
"eval_loss": 0.46723026037216187, |
|
"eval_runtime": 153.9064, |
|
"eval_samples_per_second": 36.75, |
|
"eval_steps_per_second": 4.594, |
|
"eval_wer": 0.3698062942337629, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.8828451882845187, |
|
"eval_loss": 0.46052873134613037, |
|
"eval_runtime": 153.9609, |
|
"eval_samples_per_second": 36.737, |
|
"eval_steps_per_second": 4.592, |
|
"eval_wer": 0.3712185649403797, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.98744769874477, |
|
"eval_loss": 0.4490562677383423, |
|
"eval_runtime": 153.6441, |
|
"eval_samples_per_second": 36.812, |
|
"eval_steps_per_second": 4.602, |
|
"eval_wer": 0.35561939304456675, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.092050209205021, |
|
"grad_norm": 1.1409815549850464, |
|
"learning_rate": 8.714285714285714e-05, |
|
"loss": 0.5057, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.092050209205021, |
|
"eval_loss": 0.44942042231559753, |
|
"eval_runtime": 154.6428, |
|
"eval_samples_per_second": 36.575, |
|
"eval_steps_per_second": 4.572, |
|
"eval_wer": 0.34534833336008086, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.196652719665272, |
|
"eval_loss": 0.4431760907173157, |
|
"eval_runtime": 154.0218, |
|
"eval_samples_per_second": 36.722, |
|
"eval_steps_per_second": 4.59, |
|
"eval_wer": 0.3396832020028566, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.301255230125523, |
|
"eval_loss": 0.43776625394821167, |
|
"eval_runtime": 154.8082, |
|
"eval_samples_per_second": 36.536, |
|
"eval_steps_per_second": 4.567, |
|
"eval_wer": 0.335061225144838, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.405857740585774, |
|
"eval_loss": 0.42906874418258667, |
|
"eval_runtime": 154.6416, |
|
"eval_samples_per_second": 36.575, |
|
"eval_steps_per_second": 4.572, |
|
"eval_wer": 0.33104909245558567, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.510460251046025, |
|
"eval_loss": 0.4279017746448517, |
|
"eval_runtime": 154.5726, |
|
"eval_samples_per_second": 36.591, |
|
"eval_steps_per_second": 4.574, |
|
"eval_wer": 0.3294281908491278, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.6150627615062763, |
|
"grad_norm": 0.6568267345428467, |
|
"learning_rate": 1.6e-05, |
|
"loss": 0.3986, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.6150627615062763, |
|
"eval_loss": 0.4233950078487396, |
|
"eval_runtime": 154.8004, |
|
"eval_samples_per_second": 36.537, |
|
"eval_steps_per_second": 4.567, |
|
"eval_wer": 0.3259456596748568, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.719665271966527, |
|
"eval_loss": 0.41978269815444946, |
|
"eval_runtime": 153.7416, |
|
"eval_samples_per_second": 36.789, |
|
"eval_steps_per_second": 4.599, |
|
"eval_wer": 0.32485435958338016, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.719665271966527, |
|
"step": 2600, |
|
"total_flos": 1.0779565758702488e+19, |
|
"train_loss": 1.1314451643136831, |
|
"train_runtime": 7270.6329, |
|
"train_samples_per_second": 11.443, |
|
"train_steps_per_second": 0.358 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2600, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 400, |
|
"total_flos": 1.0779565758702488e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|