|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.5444015444015444, |
|
"eval_steps": 200, |
|
"global_step": 4000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07722007722007722, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.8025, |
|
"eval_samples_per_second": 39.272, |
|
"eval_steps_per_second": 4.91, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15444015444015444, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 176.9522, |
|
"eval_samples_per_second": 39.683, |
|
"eval_steps_per_second": 4.962, |
|
"eval_wer": 0.9261812940672354, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.19305019305019305, |
|
"grad_norm": 3.265190362930298, |
|
"learning_rate": 0.00029699999999999996, |
|
"loss": 3.9094, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.23166023166023167, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.1009, |
|
"eval_samples_per_second": 39.65, |
|
"eval_steps_per_second": 4.958, |
|
"eval_wer": 0.7767000376559802, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.3088803088803089, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.4675, |
|
"eval_samples_per_second": 39.568, |
|
"eval_steps_per_second": 4.947, |
|
"eval_wer": 0.7365379870930882, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.3861003861003861, |
|
"grad_norm": 3.0774288177490234, |
|
"learning_rate": 0.00025765714285714284, |
|
"loss": 0.9885, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3861003861003861, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.9576, |
|
"eval_samples_per_second": 39.459, |
|
"eval_steps_per_second": 4.934, |
|
"eval_wer": 0.6104813473050005, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.46332046332046334, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 176.7839, |
|
"eval_samples_per_second": 39.721, |
|
"eval_steps_per_second": 4.967, |
|
"eval_wer": 0.5430511731785542, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5405405405405406, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.8714, |
|
"eval_samples_per_second": 39.478, |
|
"eval_steps_per_second": 4.936, |
|
"eval_wer": 0.5144845675405451, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.5791505791505791, |
|
"grad_norm": 3.033381700515747, |
|
"learning_rate": 0.00021488571428571426, |
|
"loss": 0.7974, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6177606177606177, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.2105, |
|
"eval_samples_per_second": 39.403, |
|
"eval_steps_per_second": 4.927, |
|
"eval_wer": 0.5230675340526924, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.694980694980695, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.6698, |
|
"eval_samples_per_second": 39.302, |
|
"eval_steps_per_second": 4.914, |
|
"eval_wer": 0.46214275511926556, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.7722007722007722, |
|
"grad_norm": 3.2283124923706055, |
|
"learning_rate": 0.0001721142857142857, |
|
"loss": 0.7068, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.7722007722007722, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 179.2886, |
|
"eval_samples_per_second": 39.166, |
|
"eval_steps_per_second": 4.897, |
|
"eval_wer": 0.4532481529092491, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8494208494208494, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 179.5146, |
|
"eval_samples_per_second": 39.117, |
|
"eval_steps_per_second": 4.891, |
|
"eval_wer": 0.4404061651928895, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.9266409266409267, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.4125, |
|
"eval_samples_per_second": 39.358, |
|
"eval_steps_per_second": 4.921, |
|
"eval_wer": 0.4195006037941646, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.9652509652509652, |
|
"grad_norm": 1.9969470500946045, |
|
"learning_rate": 0.00012925714285714286, |
|
"loss": 0.6447, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.0038610038610039, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 179.4629, |
|
"eval_samples_per_second": 39.128, |
|
"eval_steps_per_second": 4.892, |
|
"eval_wer": 0.4115928479607339, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.0810810810810811, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 182.2946, |
|
"eval_samples_per_second": 38.52, |
|
"eval_steps_per_second": 4.816, |
|
"eval_wer": 0.38897329022372845, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.1583011583011582, |
|
"grad_norm": 0.5247506499290466, |
|
"learning_rate": 8.648571428571429e-05, |
|
"loss": 0.5318, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.1583011583011582, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.6608, |
|
"eval_samples_per_second": 38.868, |
|
"eval_steps_per_second": 4.86, |
|
"eval_wer": 0.38040330853232573, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.2355212355212355, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.267, |
|
"eval_samples_per_second": 39.39, |
|
"eval_steps_per_second": 4.925, |
|
"eval_wer": 0.37341747497175803, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.3127413127413128, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 179.0534, |
|
"eval_samples_per_second": 39.217, |
|
"eval_steps_per_second": 4.904, |
|
"eval_wer": 0.36753535117447705, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.3513513513513513, |
|
"grad_norm": 0.6272704005241394, |
|
"learning_rate": 4.3799999999999994e-05, |
|
"loss": 0.494, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.3899613899613898, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.0451, |
|
"eval_samples_per_second": 39.001, |
|
"eval_steps_per_second": 4.877, |
|
"eval_wer": 0.3590173087660525, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.4671814671814671, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 179.7417, |
|
"eval_samples_per_second": 39.067, |
|
"eval_steps_per_second": 4.885, |
|
"eval_wer": 0.35345980548738526, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.5444015444015444, |
|
"grad_norm": 0.7731852531433105, |
|
"learning_rate": 1.0285714285714284e-06, |
|
"loss": 0.4794, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.5444015444015444, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 179.8748, |
|
"eval_samples_per_second": 39.038, |
|
"eval_steps_per_second": 4.881, |
|
"eval_wer": 0.3500967369145469, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.5444015444015444, |
|
"step": 4000, |
|
"total_flos": 8.411238439350073e+18, |
|
"train_loss": 1.0689988899230958, |
|
"train_runtime": 5961.6242, |
|
"train_samples_per_second": 10.735, |
|
"train_steps_per_second": 0.671 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 4000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.411238439350073e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|