|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0080645161290323, |
|
"eval_steps": 200, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06720430107526881, |
|
"eval_loss": 2.9829530715942383, |
|
"eval_runtime": 176.5397, |
|
"eval_samples_per_second": 39.776, |
|
"eval_steps_per_second": 2.487, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13440860215053763, |
|
"eval_loss": 1.5613375902175903, |
|
"eval_runtime": 174.5758, |
|
"eval_samples_per_second": 40.223, |
|
"eval_steps_per_second": 2.515, |
|
"eval_wer": 0.9657582481829415, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.16801075268817203, |
|
"grad_norm": 2.138428211212158, |
|
"learning_rate": 0.00027833333333333334, |
|
"loss": 3.6118, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.20161290322580644, |
|
"eval_loss": 1.0701438188552856, |
|
"eval_runtime": 174.6767, |
|
"eval_samples_per_second": 40.2, |
|
"eval_steps_per_second": 2.513, |
|
"eval_wer": 0.7648786191684716, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.26881720430107525, |
|
"eval_loss": 0.8867517113685608, |
|
"eval_runtime": 174.8944, |
|
"eval_samples_per_second": 40.15, |
|
"eval_steps_per_second": 2.51, |
|
"eval_wer": 0.6947474720631419, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.33602150537634407, |
|
"grad_norm": 2.852606773376465, |
|
"learning_rate": 0.00022288888888888887, |
|
"loss": 0.9333, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.33602150537634407, |
|
"eval_loss": 0.7679557204246521, |
|
"eval_runtime": 175.5538, |
|
"eval_samples_per_second": 39.999, |
|
"eval_steps_per_second": 2.501, |
|
"eval_wer": 0.6070503195631087, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4032258064516129, |
|
"eval_loss": 0.7223904132843018, |
|
"eval_runtime": 175.6961, |
|
"eval_samples_per_second": 39.967, |
|
"eval_steps_per_second": 2.499, |
|
"eval_wer": 0.5453965638661157, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.47043010752688175, |
|
"eval_loss": 0.6732765436172485, |
|
"eval_runtime": 175.6064, |
|
"eval_samples_per_second": 39.987, |
|
"eval_steps_per_second": 2.5, |
|
"eval_wer": 0.5121646580475425, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.5040322580645161, |
|
"grad_norm": 4.027652740478516, |
|
"learning_rate": 0.00016744444444444443, |
|
"loss": 0.7446, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.5376344086021505, |
|
"eval_loss": 0.6437448859214783, |
|
"eval_runtime": 175.5304, |
|
"eval_samples_per_second": 40.004, |
|
"eval_steps_per_second": 2.501, |
|
"eval_wer": 0.4966449195445063, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.6048387096774194, |
|
"eval_loss": 0.6063565015792847, |
|
"eval_runtime": 175.877, |
|
"eval_samples_per_second": 39.926, |
|
"eval_steps_per_second": 2.496, |
|
"eval_wer": 0.4774312706785899, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.6720430107526881, |
|
"grad_norm": 3.676745653152466, |
|
"learning_rate": 0.000112, |
|
"loss": 0.6579, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.6720430107526881, |
|
"eval_loss": 0.5673760771751404, |
|
"eval_runtime": 176.4608, |
|
"eval_samples_per_second": 39.794, |
|
"eval_steps_per_second": 2.488, |
|
"eval_wer": 0.44613933217289625, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.739247311827957, |
|
"eval_loss": 0.5556111931800842, |
|
"eval_runtime": 175.9454, |
|
"eval_samples_per_second": 39.91, |
|
"eval_steps_per_second": 2.495, |
|
"eval_wer": 0.4325196986406941, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.8064516129032258, |
|
"eval_loss": 0.5264282822608948, |
|
"eval_runtime": 176.4778, |
|
"eval_samples_per_second": 39.79, |
|
"eval_steps_per_second": 2.488, |
|
"eval_wer": 0.4180363810308402, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.8400537634408602, |
|
"grad_norm": 2.7548441886901855, |
|
"learning_rate": 5.666666666666666e-05, |
|
"loss": 0.5823, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.8736559139784946, |
|
"eval_loss": 0.5129852890968323, |
|
"eval_runtime": 176.2054, |
|
"eval_samples_per_second": 39.851, |
|
"eval_steps_per_second": 2.491, |
|
"eval_wer": 0.4022110312387887, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.9408602150537635, |
|
"eval_loss": 0.49821802973747253, |
|
"eval_runtime": 176.2887, |
|
"eval_samples_per_second": 39.832, |
|
"eval_steps_per_second": 2.49, |
|
"eval_wer": 0.39360076535696725, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.0080645161290323, |
|
"grad_norm": 0.7031016945838928, |
|
"learning_rate": 1.111111111111111e-06, |
|
"loss": 0.5426, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.0080645161290323, |
|
"eval_loss": 0.4942198395729065, |
|
"eval_runtime": 177.2018, |
|
"eval_samples_per_second": 39.627, |
|
"eval_steps_per_second": 2.477, |
|
"eval_wer": 0.3917272352808302, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.0080645161290323, |
|
"step": 3000, |
|
"total_flos": 6.709869296482936e+18, |
|
"train_loss": 1.1787635701497396, |
|
"train_runtime": 4443.9077, |
|
"train_samples_per_second": 10.801, |
|
"train_steps_per_second": 0.675 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 3000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.709869296482936e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|