{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.013162988772745, "eval_steps": 100, "global_step": 2600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07742934572202866, "eval_loss": 3.5345706939697266, "eval_runtime": 152.1587, "eval_samples_per_second": 37.172, "eval_steps_per_second": 4.646, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.1548586914440573, "eval_loss": 2.982929229736328, "eval_runtime": 150.2931, "eval_samples_per_second": 37.633, "eval_steps_per_second": 4.704, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.23228803716608595, "eval_loss": 2.770493268966675, "eval_runtime": 150.0834, "eval_samples_per_second": 37.686, "eval_steps_per_second": 4.711, "eval_wer": 1.0, "step": 300 }, { "epoch": 0.3097173828881146, "eval_loss": 1.3696156740188599, "eval_runtime": 150.2358, "eval_samples_per_second": 37.647, "eval_steps_per_second": 4.706, "eval_wer": 0.8535090112500201, "step": 400 }, { "epoch": 0.38714672861014326, "grad_norm": 3.0665018558502197, "learning_rate": 0.00029699999999999996, "loss": 3.7305, "step": 500 }, { "epoch": 0.38714672861014326, "eval_loss": 1.0936249494552612, "eval_runtime": 153.8749, "eval_samples_per_second": 36.757, "eval_steps_per_second": 4.595, "eval_wer": 0.7465295052237968, "step": 500 }, { "epoch": 0.4645760743321719, "eval_loss": 0.8456823825836182, "eval_runtime": 153.7133, "eval_samples_per_second": 36.796, "eval_steps_per_second": 4.599, "eval_wer": 0.6413313861116015, "step": 600 }, { "epoch": 0.5420054200542005, "eval_loss": 0.7860042452812195, "eval_runtime": 152.7402, "eval_samples_per_second": 37.03, "eval_steps_per_second": 4.629, "eval_wer": 0.5835566753863684, "step": 700 }, { "epoch": 0.6194347657762292, "eval_loss": 0.7366299629211426, "eval_runtime": 151.7301, "eval_samples_per_second": 37.277, "eval_steps_per_second": 4.66, "eval_wer": 0.563736739901462, "step": 800 }, { "epoch": 0.6968641114982579, "eval_loss": 0.7318999171257019, "eval_runtime": 152.5414, "eval_samples_per_second": 37.078, "eval_steps_per_second": 4.635, "eval_wer": 0.5493572563431818, "step": 900 }, { "epoch": 0.7742934572202865, "grad_norm": 2.5090723037719727, "learning_rate": 0.00022928571428571426, "loss": 0.7504, "step": 1000 }, { "epoch": 0.7742934572202865, "eval_loss": 0.6438552737236023, "eval_runtime": 151.1771, "eval_samples_per_second": 37.413, "eval_steps_per_second": 4.677, "eval_wer": 0.5104074721959204, "step": 1000 }, { "epoch": 0.8517228029423152, "eval_loss": 0.6213911175727844, "eval_runtime": 151.6033, "eval_samples_per_second": 37.308, "eval_steps_per_second": 4.663, "eval_wer": 0.4759191795991077, "step": 1100 }, { "epoch": 0.9291521486643438, "eval_loss": 0.5957211852073669, "eval_runtime": 152.1457, "eval_samples_per_second": 37.175, "eval_steps_per_second": 4.647, "eval_wer": 0.4628396270321452, "step": 1200 }, { "epoch": 1.0065814943863725, "eval_loss": 0.5717456340789795, "eval_runtime": 152.0809, "eval_samples_per_second": 37.191, "eval_steps_per_second": 4.649, "eval_wer": 0.43531639678387446, "step": 1300 }, { "epoch": 1.084010840108401, "eval_loss": 0.549981951713562, "eval_runtime": 151.522, "eval_samples_per_second": 37.328, "eval_steps_per_second": 4.666, "eval_wer": 0.4192197204345942, "step": 1400 }, { "epoch": 1.1614401858304297, "grad_norm": 0.4617447853088379, "learning_rate": 0.00015799999999999996, "loss": 0.5571, "step": 1500 }, { "epoch": 1.1614401858304297, "eval_loss": 0.5342110395431519, "eval_runtime": 152.7132, "eval_samples_per_second": 37.037, "eval_steps_per_second": 4.63, "eval_wer": 0.4073277591436504, "step": 1500 }, { "epoch": 1.2388695315524583, "eval_loss": 0.5206533670425415, "eval_runtime": 151.157, "eval_samples_per_second": 37.418, "eval_steps_per_second": 4.677, "eval_wer": 0.4023687631397346, "step": 1600 }, { "epoch": 1.316298877274487, "eval_loss": 0.5142083168029785, "eval_runtime": 151.9132, "eval_samples_per_second": 37.232, "eval_steps_per_second": 4.654, "eval_wer": 0.3968641170900804, "step": 1700 }, { "epoch": 1.3937282229965158, "eval_loss": 0.5083270072937012, "eval_runtime": 152.7068, "eval_samples_per_second": 37.038, "eval_steps_per_second": 4.63, "eval_wer": 0.39583701112163183, "step": 1800 }, { "epoch": 1.4711575687185443, "eval_loss": 0.4886321723461151, "eval_runtime": 152.3343, "eval_samples_per_second": 37.129, "eval_steps_per_second": 4.641, "eval_wer": 0.3825488276548282, "step": 1900 }, { "epoch": 1.5485869144405728, "grad_norm": 0.441532164812088, "learning_rate": 8.685714285714285e-05, "loss": 0.4603, "step": 2000 }, { "epoch": 1.5485869144405728, "eval_loss": 0.4732557237148285, "eval_runtime": 155.1846, "eval_samples_per_second": 36.447, "eval_steps_per_second": 4.556, "eval_wer": 0.3743480284379965, "step": 2000 }, { "epoch": 1.6260162601626016, "eval_loss": 0.4615860879421234, "eval_runtime": 155.1621, "eval_samples_per_second": 36.452, "eval_steps_per_second": 4.557, "eval_wer": 0.3618622715090433, "step": 2100 }, { "epoch": 1.7034456058846303, "eval_loss": 0.4535791277885437, "eval_runtime": 152.4739, "eval_samples_per_second": 37.095, "eval_steps_per_second": 4.637, "eval_wer": 0.3627288921699218, "step": 2200 }, { "epoch": 1.7808749516066589, "eval_loss": 0.44882732629776, "eval_runtime": 153.2132, "eval_samples_per_second": 36.916, "eval_steps_per_second": 4.614, "eval_wer": 0.3487185248190528, "step": 2300 }, { "epoch": 1.8583042973286876, "eval_loss": 0.4429319202899933, "eval_runtime": 154.0199, "eval_samples_per_second": 36.723, "eval_steps_per_second": 4.59, "eval_wer": 0.34810868065028644, "step": 2400 }, { "epoch": 1.9357336430507162, "grad_norm": 0.48590919375419617, "learning_rate": 1.557142857142857e-05, "loss": 0.4163, "step": 2500 }, { "epoch": 1.9357336430507162, "eval_loss": 0.4377308487892151, "eval_runtime": 154.6322, "eval_samples_per_second": 36.577, "eval_steps_per_second": 4.572, "eval_wer": 0.3418978992473239, "step": 2500 }, { "epoch": 2.013162988772745, "eval_loss": 0.4348967373371124, "eval_runtime": 154.2249, "eval_samples_per_second": 36.674, "eval_steps_per_second": 4.584, "eval_wer": 0.3390733578340903, "step": 2600 }, { "epoch": 2.013162988772745, "step": 2600, "total_flos": 1.1633941226063049e+19, "train_loss": 1.1519982103201059, "train_runtime": 7259.2244, "train_samples_per_second": 11.461, "train_steps_per_second": 0.358 } ], "logging_steps": 500, "max_steps": 2600, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 400, "total_flos": 1.1633941226063049e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }