{ "best_metric": 0.2764733135700226, "best_model_checkpoint": "/models/hfhub/DewiBrynJones/wav2vec2-xlsr-53-ft-ccv-en-cy/checkpoint-9000", "epoch": 4.524886877828054, "eval_steps": 500, "global_step": 9000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "grad_norm": 1.6815159320831299, "learning_rate": 0.00014774999999999999, "loss": 5.9898, "step": 400 }, { "epoch": 0.25, "eval_loss": 1.3093085289001465, "eval_runtime": 3176.8808, "eval_samples_per_second": 8.256, "eval_steps_per_second": 0.258, "eval_wer": 0.7970769457237188, "step": 500 }, { "epoch": 0.4, "grad_norm": 2.3292043209075928, "learning_rate": 0.00029775, "loss": 1.0749, "step": 800 }, { "epoch": 0.5, "eval_loss": 0.5815957188606262, "eval_runtime": 1123.0217, "eval_samples_per_second": 23.355, "eval_steps_per_second": 0.73, "eval_wer": 0.4617458414821357, "step": 1000 }, { "epoch": 0.6, "grad_norm": 2.9652466773986816, "learning_rate": 0.0002855853658536585, "loss": 0.4332, "step": 1200 }, { "epoch": 0.75, "eval_loss": 0.48338082432746887, "eval_runtime": 1101.5414, "eval_samples_per_second": 23.81, "eval_steps_per_second": 0.744, "eval_wer": 0.4091476878430383, "step": 1500 }, { "epoch": 0.8, "grad_norm": 3.57124924659729, "learning_rate": 0.0002709512195121951, "loss": 0.3655, "step": 1600 }, { "epoch": 1.01, "grad_norm": 1.1706229448318481, "learning_rate": 0.0002563170731707317, "loss": 0.3303, "step": 2000 }, { "epoch": 1.01, "eval_loss": 0.42033129930496216, "eval_runtime": 1101.1368, "eval_samples_per_second": 23.819, "eval_steps_per_second": 0.745, "eval_wer": 0.3419174394885707, "step": 2000 }, { "epoch": 1.21, "grad_norm": 0.8928599953651428, "learning_rate": 0.0002416829268292683, "loss": 0.276, "step": 2400 }, { "epoch": 1.26, "eval_loss": 0.3909631669521332, "eval_runtime": 1098.4606, "eval_samples_per_second": 23.877, "eval_steps_per_second": 0.746, "eval_wer": 0.3186423569490884, "step": 2500 }, { "epoch": 1.41, "grad_norm": 0.6678148508071899, "learning_rate": 0.00022704878048780485, "loss": 0.2591, "step": 2800 }, { "epoch": 1.51, "eval_loss": 0.39008986949920654, "eval_runtime": 1093.6554, "eval_samples_per_second": 23.982, "eval_steps_per_second": 0.75, "eval_wer": 0.3067188190019557, "step": 3000 }, { "epoch": 1.61, "grad_norm": 0.7449674606323242, "learning_rate": 0.00021241463414634144, "loss": 0.2501, "step": 3200 }, { "epoch": 1.76, "eval_loss": 0.3645510971546173, "eval_runtime": 1101.42, "eval_samples_per_second": 23.813, "eval_steps_per_second": 0.744, "eval_wer": 0.2895379891910079, "step": 3500 }, { "epoch": 1.81, "grad_norm": 0.994420051574707, "learning_rate": 0.00019778048780487803, "loss": 0.2332, "step": 3600 }, { "epoch": 2.01, "grad_norm": 0.632382333278656, "learning_rate": 0.00018314634146341462, "loss": 0.224, "step": 4000 }, { "epoch": 2.01, "eval_loss": 0.35174447298049927, "eval_runtime": 1113.1837, "eval_samples_per_second": 23.561, "eval_steps_per_second": 0.737, "eval_wer": 0.2805501230206296, "step": 4000 }, { "epoch": 2.21, "grad_norm": 0.5861485600471497, "learning_rate": 0.00016851219512195123, "loss": 0.182, "step": 4400 }, { "epoch": 2.26, "eval_loss": 0.33475443720817566, "eval_runtime": 1111.5845, "eval_samples_per_second": 23.595, "eval_steps_per_second": 0.738, "eval_wer": 0.2655689441255021, "step": 4500 }, { "epoch": 2.41, "grad_norm": 0.585738480091095, "learning_rate": 0.0001538780487804878, "loss": 0.1777, "step": 4800 }, { "epoch": 2.51, "eval_loss": 0.32769647240638733, "eval_runtime": 1109.5932, "eval_samples_per_second": 23.637, "eval_steps_per_second": 0.739, "eval_wer": 0.2611948772948079, "step": 5000 }, { "epoch": 2.61, "grad_norm": 0.6404664516448975, "learning_rate": 0.00013924390243902438, "loss": 0.1734, "step": 5200 }, { "epoch": 2.77, "eval_loss": 0.33233708143234253, "eval_runtime": 1114.1252, "eval_samples_per_second": 23.541, "eval_steps_per_second": 0.736, "eval_wer": 0.2643113999116775, "step": 5500 }, { "epoch": 2.82, "grad_norm": 1.567084550857544, "learning_rate": 0.00012460975609756097, "loss": 0.1704, "step": 5600 }, { "epoch": 3.02, "grad_norm": 1.35818612575531, "learning_rate": 0.00010997560975609755, "loss": 0.1629, "step": 6000 }, { "epoch": 3.02, "eval_loss": 0.31713536381721497, "eval_runtime": 1084.7842, "eval_samples_per_second": 24.178, "eval_steps_per_second": 0.756, "eval_wer": 0.24851428931928585, "step": 6000 }, { "epoch": 3.22, "grad_norm": 1.0975894927978516, "learning_rate": 9.534146341463413e-05, "loss": 0.1338, "step": 6400 }, { "epoch": 3.27, "eval_loss": 0.310283362865448, "eval_runtime": 1090.9879, "eval_samples_per_second": 24.041, "eval_steps_per_second": 0.752, "eval_wer": 0.23984186066073643, "step": 6500 }, { "epoch": 3.42, "grad_norm": 1.2747470140457153, "learning_rate": 8.070731707317072e-05, "loss": 0.1292, "step": 6800 }, { "epoch": 3.52, "eval_loss": 0.2933865785598755, "eval_runtime": 1076.7354, "eval_samples_per_second": 24.359, "eval_steps_per_second": 0.762, "eval_wer": 0.22680798267196603, "step": 7000 }, { "epoch": 3.62, "grad_norm": 0.5606548190116882, "learning_rate": 6.607317073170731e-05, "loss": 0.1264, "step": 7200 }, { "epoch": 3.77, "eval_loss": 0.29226595163345337, "eval_runtime": 1074.899, "eval_samples_per_second": 24.4, "eval_steps_per_second": 0.763, "eval_wer": 0.22483965259815364, "step": 7500 }, { "epoch": 3.82, "grad_norm": 1.5185168981552124, "learning_rate": 5.14390243902439e-05, "loss": 0.1241, "step": 7600 }, { "epoch": 4.02, "grad_norm": 0.7815582156181335, "learning_rate": 3.680487804878048e-05, "loss": 0.118, "step": 8000 }, { "epoch": 4.02, "eval_loss": 0.28800907731056213, "eval_runtime": 1083.1518, "eval_samples_per_second": 24.215, "eval_steps_per_second": 0.757, "eval_wer": 0.21931739322440225, "step": 8000 }, { "epoch": 4.22, "grad_norm": 1.339690089225769, "learning_rate": 2.217073170731707e-05, "loss": 0.0996, "step": 8400 }, { "epoch": 4.27, "eval_loss": 0.2792861759662628, "eval_runtime": 1078.7477, "eval_samples_per_second": 24.313, "eval_steps_per_second": 0.76, "eval_wer": 0.21242403213256786, "step": 8500 }, { "epoch": 4.42, "grad_norm": 1.0311238765716553, "learning_rate": 7.536585365853659e-06, "loss": 0.0969, "step": 8800 }, { "epoch": 4.52, "eval_loss": 0.2764733135700226, "eval_runtime": 1074.1619, "eval_samples_per_second": 24.417, "eval_steps_per_second": 0.763, "eval_wer": 0.21145248459613483, "step": 9000 }, { "epoch": 4.52, "step": 9000, "total_flos": 8.933861078537978e+19, "train_loss": 0.49683192168341744, "train_runtime": 52483.3074, "train_samples_per_second": 10.975, "train_steps_per_second": 0.171 } ], "logging_steps": 400, "max_steps": 9000, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 8.933861078537978e+19, "train_batch_size": 32, "trial_name": null, "trial_params": null }