|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.7084282460136673, |
|
"eval_steps": 200, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11389521640091116, |
|
"eval_loss": 0.7393712997436523, |
|
"eval_runtime": 197.0417, |
|
"eval_samples_per_second": 35.637, |
|
"eval_steps_per_second": 0.558, |
|
"eval_wer": 0.4965917697551123, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22779043280182232, |
|
"eval_loss": 0.6405971646308899, |
|
"eval_runtime": 190.1593, |
|
"eval_samples_per_second": 36.927, |
|
"eval_steps_per_second": 0.578, |
|
"eval_wer": 0.46169893301797793, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2847380410022779, |
|
"grad_norm": 11.500487327575684, |
|
"learning_rate": 0.00027833333333333334, |
|
"loss": 2.0443, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3416856492027335, |
|
"eval_loss": 0.6104918718338013, |
|
"eval_runtime": 188.7991, |
|
"eval_samples_per_second": 37.193, |
|
"eval_steps_per_second": 0.583, |
|
"eval_wer": 0.4496073559308521, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.45558086560364464, |
|
"eval_loss": 0.5864666700363159, |
|
"eval_runtime": 188.9909, |
|
"eval_samples_per_second": 37.155, |
|
"eval_steps_per_second": 0.582, |
|
"eval_wer": 0.44139571346948536, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5694760820045558, |
|
"grad_norm": 3.202120542526245, |
|
"learning_rate": 0.00022288888888888887, |
|
"loss": 0.7145, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5694760820045558, |
|
"eval_loss": 0.5439139008522034, |
|
"eval_runtime": 188.5161, |
|
"eval_samples_per_second": 37.249, |
|
"eval_steps_per_second": 0.584, |
|
"eval_wer": 0.41646846224371836, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.683371298405467, |
|
"eval_loss": 0.5428078174591064, |
|
"eval_runtime": 189.9544, |
|
"eval_samples_per_second": 36.967, |
|
"eval_steps_per_second": 0.579, |
|
"eval_wer": 0.41715940950583985, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7972665148063781, |
|
"eval_loss": 0.5054725408554077, |
|
"eval_runtime": 189.6912, |
|
"eval_samples_per_second": 37.018, |
|
"eval_steps_per_second": 0.58, |
|
"eval_wer": 0.38716964084029815, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.8542141230068337, |
|
"grad_norm": 3.453420877456665, |
|
"learning_rate": 0.00016777777777777776, |
|
"loss": 0.6574, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.9111617312072893, |
|
"eval_loss": 0.49158021807670593, |
|
"eval_runtime": 190.7112, |
|
"eval_samples_per_second": 36.82, |
|
"eval_steps_per_second": 0.577, |
|
"eval_wer": 0.3802601682190834, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.0250569476082005, |
|
"eval_loss": 0.4755226671695709, |
|
"eval_runtime": 189.3317, |
|
"eval_samples_per_second": 37.088, |
|
"eval_steps_per_second": 0.581, |
|
"eval_wer": 0.3658034255039264, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.1389521640091116, |
|
"grad_norm": 4.045414447784424, |
|
"learning_rate": 0.00011244444444444443, |
|
"loss": 0.577, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.1389521640091116, |
|
"eval_loss": 0.469484806060791, |
|
"eval_runtime": 189.7257, |
|
"eval_samples_per_second": 37.011, |
|
"eval_steps_per_second": 0.58, |
|
"eval_wer": 0.3594254507766513, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.2528473804100229, |
|
"eval_loss": 0.4655653238296509, |
|
"eval_runtime": 190.2356, |
|
"eval_samples_per_second": 36.912, |
|
"eval_steps_per_second": 0.578, |
|
"eval_wer": 0.35054943594786003, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.366742596810934, |
|
"eval_loss": 0.5080298185348511, |
|
"eval_runtime": 189.8354, |
|
"eval_samples_per_second": 36.99, |
|
"eval_steps_per_second": 0.579, |
|
"eval_wer": 0.3546951195205889, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.4236902050113895, |
|
"grad_norm": 3.1688034534454346, |
|
"learning_rate": 5.733333333333333e-05, |
|
"loss": 0.534, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.4806378132118452, |
|
"eval_loss": 0.5481207370758057, |
|
"eval_runtime": 190.1317, |
|
"eval_samples_per_second": 36.932, |
|
"eval_steps_per_second": 0.579, |
|
"eval_wer": 0.36395647032248635, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.5945330296127562, |
|
"eval_loss": 0.6369026303291321, |
|
"eval_runtime": 191.1482, |
|
"eval_samples_per_second": 36.736, |
|
"eval_steps_per_second": 0.575, |
|
"eval_wer": 0.3902921909671933, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.7084282460136673, |
|
"grad_norm": 127.80626678466797, |
|
"learning_rate": 1.7777777777777775e-06, |
|
"loss": 0.7065, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.7084282460136673, |
|
"eval_loss": 0.6057178378105164, |
|
"eval_runtime": 190.1608, |
|
"eval_samples_per_second": 36.927, |
|
"eval_steps_per_second": 0.578, |
|
"eval_wer": 0.37381575625506586, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.7084282460136673, |
|
"step": 3000, |
|
"total_flos": 5.580592575460899e+18, |
|
"train_loss": 0.8723047383626302, |
|
"train_runtime": 4416.1126, |
|
"train_samples_per_second": 10.869, |
|
"train_steps_per_second": 0.679 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 3000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.580592575460899e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|