|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.3166023166023164, |
|
"eval_steps": 200, |
|
"global_step": 6000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07722007722007722, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 223.773, |
|
"eval_samples_per_second": 31.38, |
|
"eval_steps_per_second": 0.492, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15444015444015444, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 201.0458, |
|
"eval_samples_per_second": 34.927, |
|
"eval_steps_per_second": 0.547, |
|
"eval_wer": 0.8963291911755158, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.19305019305019305, |
|
"grad_norm": 4.3846588134765625, |
|
"learning_rate": 0.00024799999999999996, |
|
"loss": 3.9177, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.23166023166023167, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 194.7647, |
|
"eval_samples_per_second": 36.054, |
|
"eval_steps_per_second": 0.565, |
|
"eval_wer": 0.7594821653487074, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.3088803088803089, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 194.859, |
|
"eval_samples_per_second": 36.036, |
|
"eval_steps_per_second": 0.565, |
|
"eval_wer": 0.7512108345344293, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.3861003861003861, |
|
"grad_norm": 3.36423921585083, |
|
"learning_rate": 0.00027805555555555553, |
|
"loss": 0.9791, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3861003861003861, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 195.6019, |
|
"eval_samples_per_second": 35.899, |
|
"eval_steps_per_second": 0.562, |
|
"eval_wer": 0.598444418474803, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.46332046332046334, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 197.8836, |
|
"eval_samples_per_second": 35.486, |
|
"eval_steps_per_second": 0.556, |
|
"eval_wer": 0.5867710646254528, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5405405405405406, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 203.8782, |
|
"eval_samples_per_second": 34.442, |
|
"eval_steps_per_second": 0.54, |
|
"eval_wer": 0.5255476348149014, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.5791505791505791, |
|
"grad_norm": 2.6850786209106445, |
|
"learning_rate": 0.0002503333333333333, |
|
"loss": 0.805, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6177606177606177, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 199.2388, |
|
"eval_samples_per_second": 35.244, |
|
"eval_steps_per_second": 0.552, |
|
"eval_wer": 0.5281575837845559, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.694980694980695, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 194.9574, |
|
"eval_samples_per_second": 36.018, |
|
"eval_steps_per_second": 0.564, |
|
"eval_wer": 0.4768805266643294, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.7722007722007722, |
|
"grad_norm": 2.9242658615112305, |
|
"learning_rate": 0.0002226111111111111, |
|
"loss": 0.7184, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.7722007722007722, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 198.941, |
|
"eval_samples_per_second": 35.297, |
|
"eval_steps_per_second": 0.553, |
|
"eval_wer": 0.4743095321569086, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8494208494208494, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 207.8762, |
|
"eval_samples_per_second": 33.78, |
|
"eval_steps_per_second": 0.529, |
|
"eval_wer": 0.46802487891654654, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.9266409266409267, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 215.534, |
|
"eval_samples_per_second": 32.58, |
|
"eval_steps_per_second": 0.51, |
|
"eval_wer": 0.457026735745913, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.9652509652509652, |
|
"grad_norm": 3.865280866622925, |
|
"learning_rate": 0.00019483333333333332, |
|
"loss": 0.6704, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.0038610038610039, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 212.4212, |
|
"eval_samples_per_second": 33.057, |
|
"eval_steps_per_second": 0.518, |
|
"eval_wer": 0.4252528793840001, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.0810810810810811, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 211.2545, |
|
"eval_samples_per_second": 33.24, |
|
"eval_steps_per_second": 0.521, |
|
"eval_wer": 0.4163972316362173, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.1583011583011582, |
|
"grad_norm": 0.6646206974983215, |
|
"learning_rate": 0.00016716666666666665, |
|
"loss": 0.5664, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.1583011583011582, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 202.1583, |
|
"eval_samples_per_second": 34.735, |
|
"eval_steps_per_second": 0.544, |
|
"eval_wer": 0.41592977808941345, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.2355212355212355, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 198.8952, |
|
"eval_samples_per_second": 35.305, |
|
"eval_steps_per_second": 0.553, |
|
"eval_wer": 0.3995039798475582, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.3127413127413128, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 197.172, |
|
"eval_samples_per_second": 35.614, |
|
"eval_steps_per_second": 0.558, |
|
"eval_wer": 0.3940633399555919, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.3513513513513513, |
|
"grad_norm": 1.06748366355896, |
|
"learning_rate": 0.00013944444444444442, |
|
"loss": 0.5359, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.3899613899613898, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 194.4658, |
|
"eval_samples_per_second": 36.109, |
|
"eval_steps_per_second": 0.566, |
|
"eval_wer": 0.38185760845571526, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.4671814671814671, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 201.7117, |
|
"eval_samples_per_second": 34.812, |
|
"eval_steps_per_second": 0.545, |
|
"eval_wer": 0.3810785192110423, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.5444015444015444, |
|
"grad_norm": 0.8601678013801575, |
|
"learning_rate": 0.00011166666666666667, |
|
"loss": 0.5172, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.5444015444015444, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 196.5164, |
|
"eval_samples_per_second": 35.732, |
|
"eval_steps_per_second": 0.56, |
|
"eval_wer": 0.36905457520158935, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.6216216216216215, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 193.7969, |
|
"eval_samples_per_second": 36.234, |
|
"eval_steps_per_second": 0.568, |
|
"eval_wer": 0.36086115331177854, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.698841698841699, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 196.3411, |
|
"eval_samples_per_second": 35.764, |
|
"eval_steps_per_second": 0.56, |
|
"eval_wer": 0.3599652006804046, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.7374517374517375, |
|
"grad_norm": 0.6527121663093567, |
|
"learning_rate": 8.394444444444443e-05, |
|
"loss": 0.4817, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.776061776061776, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 194.384, |
|
"eval_samples_per_second": 36.124, |
|
"eval_steps_per_second": 0.566, |
|
"eval_wer": 0.35086284133847534, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.8532818532818531, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 196.7828, |
|
"eval_samples_per_second": 35.684, |
|
"eval_steps_per_second": 0.559, |
|
"eval_wer": 0.3529663822990924, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.9305019305019306, |
|
"grad_norm": 0.7631692886352539, |
|
"learning_rate": 5.6166666666666665e-05, |
|
"loss": 0.4818, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.9305019305019306, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 194.6791, |
|
"eval_samples_per_second": 36.07, |
|
"eval_steps_per_second": 0.565, |
|
"eval_wer": 0.34340955423110386, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.0077220077220077, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 200.454, |
|
"eval_samples_per_second": 35.03, |
|
"eval_steps_per_second": 0.549, |
|
"eval_wer": 0.336254918000857, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.0849420849420848, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 193.7233, |
|
"eval_samples_per_second": 36.248, |
|
"eval_steps_per_second": 0.568, |
|
"eval_wer": 0.33718982509446455, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.1235521235521237, |
|
"grad_norm": 1.005771279335022, |
|
"learning_rate": 2.844444444444444e-05, |
|
"loss": 0.4196, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.1621621621621623, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 198.8274, |
|
"eval_samples_per_second": 35.317, |
|
"eval_steps_per_second": 0.553, |
|
"eval_wer": 0.3320348512588784, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.2393822393822393, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 197.3434, |
|
"eval_samples_per_second": 35.583, |
|
"eval_steps_per_second": 0.557, |
|
"eval_wer": 0.3292690844402893, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.3166023166023164, |
|
"grad_norm": 1.702697515487671, |
|
"learning_rate": 7.222222222222222e-07, |
|
"loss": 0.3743, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.3166023166023164, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 192.6595, |
|
"eval_samples_per_second": 36.448, |
|
"eval_steps_per_second": 0.571, |
|
"eval_wer": 0.3263604845935102, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.3166023166023164, |
|
"step": 6000, |
|
"total_flos": 1.2607274134194512e+19, |
|
"train_loss": 0.8722912038167318, |
|
"train_runtime": 9671.7075, |
|
"train_samples_per_second": 9.926, |
|
"train_steps_per_second": 0.62 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 6000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.2607274134194512e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|