|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0864197530864197, |
|
"eval_steps": 200, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.30864197530864196, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 198.0112, |
|
"eval_samples_per_second": 35.463, |
|
"eval_steps_per_second": 4.434, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6172839506172839, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 196.4137, |
|
"eval_samples_per_second": 35.751, |
|
"eval_steps_per_second": 4.47, |
|
"eval_wer": 0.9468401438718138, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7716049382716049, |
|
"grad_norm": 2.496718645095825, |
|
"learning_rate": 0.0002958, |
|
"loss": 3.8579, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9259259259259259, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 194.7508, |
|
"eval_samples_per_second": 36.056, |
|
"eval_steps_per_second": 4.508, |
|
"eval_wer": 0.682339345305338, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.2345679012345678, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 195.8415, |
|
"eval_samples_per_second": 35.856, |
|
"eval_steps_per_second": 4.483, |
|
"eval_wer": 0.5246257125420384, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.5432098765432098, |
|
"grad_norm": 0.836155354976654, |
|
"learning_rate": 0.0002016, |
|
"loss": 0.7662, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.5432098765432098, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 194.2625, |
|
"eval_samples_per_second": 36.147, |
|
"eval_steps_per_second": 4.52, |
|
"eval_wer": 0.45603988936932727, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.8518518518518519, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 195.3379, |
|
"eval_samples_per_second": 35.948, |
|
"eval_steps_per_second": 4.495, |
|
"eval_wer": 0.4314076844169166, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.1604938271604937, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 194.6062, |
|
"eval_samples_per_second": 36.083, |
|
"eval_steps_per_second": 4.512, |
|
"eval_wer": 0.43706906626154024, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.314814814814815, |
|
"grad_norm": 0.4859907329082489, |
|
"learning_rate": 0.00010239999999999998, |
|
"loss": 0.5916, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.4691358024691357, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 194.4662, |
|
"eval_samples_per_second": 36.109, |
|
"eval_steps_per_second": 4.515, |
|
"eval_wer": 0.38610364483918297, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.7777777777777777, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 194.3135, |
|
"eval_samples_per_second": 36.137, |
|
"eval_steps_per_second": 4.518, |
|
"eval_wer": 0.36491241738407804, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.0864197530864197, |
|
"grad_norm": 0.7149534821510315, |
|
"learning_rate": 2.9999999999999997e-06, |
|
"loss": 0.4977, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.0864197530864197, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 195.0164, |
|
"eval_samples_per_second": 36.007, |
|
"eval_steps_per_second": 4.502, |
|
"eval_wer": 0.36645761105267943, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.0864197530864197, |
|
"step": 2000, |
|
"total_flos": 1.6964840215738495e+19, |
|
"train_loss": 1.4283542098999022, |
|
"train_runtime": 6370.4271, |
|
"train_samples_per_second": 20.093, |
|
"train_steps_per_second": 0.314 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.6964840215738495e+19, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|