{ "best_metric": 0.13566730916500092, "best_model_checkpoint": "wav2vec2-large-xlsr-mecita-coraa-portuguese-aug-random-all-03/checkpoint-3601", "epoch": 26.99902818270165, "global_step": 13891, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 3e-05, "loss": 26.0165, "step": 1 }, { "epoch": 0.19, "learning_rate": 2.9943968871595333e-05, "loss": 7.0565, "step": 100 }, { "epoch": 0.39, "learning_rate": 2.9885603112840468e-05, "loss": 3.041, "step": 200 }, { "epoch": 0.58, "learning_rate": 2.9827237354085602e-05, "loss": 2.9395, "step": 300 }, { "epoch": 0.78, "learning_rate": 2.976887159533074e-05, "loss": 2.9123, "step": 400 }, { "epoch": 0.97, "learning_rate": 2.9710505836575875e-05, "loss": 2.883, "step": 500 }, { "epoch": 1.0, "eval_cer": 0.9943331399759047, "eval_loss": 2.588413715362549, "eval_runtime": 6.3319, "eval_samples_per_second": 53.855, "eval_steps_per_second": 6.791, "eval_wer": 0.9954837176135013, "step": 514 }, { "epoch": 1.17, "learning_rate": 2.9652140077821013e-05, "loss": 2.4083, "step": 600 }, { "epoch": 1.36, "learning_rate": 2.9593774319066147e-05, "loss": 1.4564, "step": 700 }, { "epoch": 1.55, "learning_rate": 2.9535408560311285e-05, "loss": 1.2033, "step": 800 }, { "epoch": 1.75, "learning_rate": 2.947704280155642e-05, "loss": 1.0371, "step": 900 }, { "epoch": 1.94, "learning_rate": 2.9418677042801558e-05, "loss": 0.9481, "step": 1000 }, { "epoch": 2.0, "eval_cer": 0.04359466333496943, "eval_loss": 0.2559501826763153, "eval_runtime": 6.3303, "eval_samples_per_second": 53.868, "eval_steps_per_second": 6.793, "eval_wer": 0.14737342524364155, "step": 1029 }, { "epoch": 2.14, "learning_rate": 2.9360311284046692e-05, "loss": 0.8883, "step": 1100 }, { "epoch": 2.33, "learning_rate": 2.930194552529183e-05, "loss": 0.8307, "step": 1200 }, { "epoch": 2.53, "learning_rate": 2.9243579766536965e-05, "loss": 0.7553, "step": 1300 }, { "epoch": 2.72, "learning_rate": 2.9185214007782103e-05, "loss": 0.7774, "step": 1400 }, { "epoch": 2.92, "learning_rate": 2.9126848249027238e-05, "loss": 0.742, "step": 1500 }, { "epoch": 3.0, "eval_cer": 0.03404578108964348, "eval_loss": 0.18016588687896729, "eval_runtime": 7.4035, "eval_samples_per_second": 46.059, "eval_steps_per_second": 5.808, "eval_wer": 0.10981697171381032, "step": 1543 }, { "epoch": 3.11, "learning_rate": 2.9068482490272376e-05, "loss": 0.684, "step": 1600 }, { "epoch": 3.3, "learning_rate": 2.9010116731517514e-05, "loss": 0.6992, "step": 1700 }, { "epoch": 3.5, "learning_rate": 2.8951750972762645e-05, "loss": 0.6371, "step": 1800 }, { "epoch": 3.69, "learning_rate": 2.8893385214007783e-05, "loss": 0.6504, "step": 1900 }, { "epoch": 3.89, "learning_rate": 2.8835019455252917e-05, "loss": 0.625, "step": 2000 }, { "epoch": 4.0, "eval_cer": 0.030833073044487083, "eval_loss": 0.1589926928281784, "eval_runtime": 7.3132, "eval_samples_per_second": 46.628, "eval_steps_per_second": 5.88, "eval_wer": 0.09745661991918232, "step": 2058 }, { "epoch": 4.08, "learning_rate": 2.8776653696498055e-05, "loss": 0.641, "step": 2100 }, { "epoch": 4.28, "learning_rate": 2.871828793774319e-05, "loss": 0.6105, "step": 2200 }, { "epoch": 4.47, "learning_rate": 2.8659922178988328e-05, "loss": 0.5456, "step": 2300 }, { "epoch": 4.66, "learning_rate": 2.8601556420233462e-05, "loss": 0.611, "step": 2400 }, { "epoch": 4.86, "learning_rate": 2.85431906614786e-05, "loss": 0.6001, "step": 2500 }, { "epoch": 5.0, "eval_cer": 0.029226719021908883, "eval_loss": 0.14863738417625427, "eval_runtime": 6.6768, "eval_samples_per_second": 51.072, "eval_steps_per_second": 6.44, "eval_wer": 0.08866175421915855, "step": 2572 }, { "epoch": 5.05, "learning_rate": 2.8484824902723735e-05, "loss": 0.5914, "step": 2600 }, { "epoch": 5.25, "learning_rate": 2.8426459143968873e-05, "loss": 0.5734, "step": 2700 }, { "epoch": 5.44, "learning_rate": 2.8368093385214007e-05, "loss": 0.5532, "step": 2800 }, { "epoch": 5.64, "learning_rate": 2.8309727626459145e-05, "loss": 0.5305, "step": 2900 }, { "epoch": 5.83, "learning_rate": 2.8251361867704283e-05, "loss": 0.5208, "step": 3000 }, { "epoch": 6.0, "eval_cer": 0.028423542010619786, "eval_loss": 0.1423884481191635, "eval_runtime": 7.8913, "eval_samples_per_second": 43.212, "eval_steps_per_second": 5.449, "eval_wer": 0.09175184216781554, "step": 3087 }, { "epoch": 6.03, "learning_rate": 2.8192996108949418e-05, "loss": 0.5267, "step": 3100 }, { "epoch": 6.22, "learning_rate": 2.8134630350194556e-05, "loss": 0.526, "step": 3200 }, { "epoch": 6.41, "learning_rate": 2.807626459143969e-05, "loss": 0.5118, "step": 3300 }, { "epoch": 6.61, "learning_rate": 2.8017898832684825e-05, "loss": 0.5081, "step": 3400 }, { "epoch": 6.8, "learning_rate": 2.795953307392996e-05, "loss": 0.4917, "step": 3500 }, { "epoch": 7.0, "learning_rate": 2.7901167315175098e-05, "loss": 0.4857, "step": 3600 }, { "epoch": 7.0, "eval_cer": 0.026817187988041585, "eval_loss": 0.13566730916500092, "eval_runtime": 6.2287, "eval_samples_per_second": 54.747, "eval_steps_per_second": 6.904, "eval_wer": 0.08438317090563346, "step": 3601 }, { "epoch": 7.19, "learning_rate": 2.7842801556420232e-05, "loss": 0.4508, "step": 3700 }, { "epoch": 7.39, "learning_rate": 2.778443579766537e-05, "loss": 0.462, "step": 3800 }, { "epoch": 7.58, "learning_rate": 2.7726070038910505e-05, "loss": 0.4779, "step": 3900 }, { "epoch": 7.77, "learning_rate": 2.7667704280155643e-05, "loss": 0.4848, "step": 4000 }, { "epoch": 7.97, "learning_rate": 2.7609338521400777e-05, "loss": 0.4458, "step": 4100 }, { "epoch": 8.0, "eval_cer": 0.0316808710008478, "eval_loss": 0.13754041492938995, "eval_runtime": 8.163, "eval_samples_per_second": 41.774, "eval_steps_per_second": 5.268, "eval_wer": 0.08818635607321132, "step": 4116 }, { "epoch": 8.16, "learning_rate": 2.7550972762645915e-05, "loss": 0.431, "step": 4200 }, { "epoch": 8.36, "learning_rate": 2.749260700389105e-05, "loss": 0.4272, "step": 4300 }, { "epoch": 8.55, "learning_rate": 2.7434241245136188e-05, "loss": 0.4333, "step": 4400 }, { "epoch": 8.75, "learning_rate": 2.7375875486381326e-05, "loss": 0.4415, "step": 4500 }, { "epoch": 8.94, "learning_rate": 2.731750972762646e-05, "loss": 0.4158, "step": 4600 }, { "epoch": 9.0, "eval_cer": 0.0303422426486993, "eval_loss": 0.1411372721195221, "eval_runtime": 7.463, "eval_samples_per_second": 45.692, "eval_steps_per_second": 5.762, "eval_wer": 0.08390777275968624, "step": 4630 }, { "epoch": 9.14, "learning_rate": 2.72591439688716e-05, "loss": 0.4085, "step": 4700 }, { "epoch": 9.33, "learning_rate": 2.7200778210116733e-05, "loss": 0.4255, "step": 4800 }, { "epoch": 9.52, "learning_rate": 2.714241245136187e-05, "loss": 0.4186, "step": 4900 }, { "epoch": 9.72, "learning_rate": 2.7084046692607002e-05, "loss": 0.4326, "step": 5000 }, { "epoch": 9.91, "learning_rate": 2.702568093385214e-05, "loss": 0.3915, "step": 5100 }, { "epoch": 10.0, "eval_cer": 0.03190397572620588, "eval_loss": 0.14571979641914368, "eval_runtime": 6.3221, "eval_samples_per_second": 53.938, "eval_steps_per_second": 6.802, "eval_wer": 0.09151414309484193, "step": 5145 }, { "epoch": 10.11, "learning_rate": 2.6967315175097275e-05, "loss": 0.3886, "step": 5200 }, { "epoch": 10.3, "learning_rate": 2.6908949416342413e-05, "loss": 0.3821, "step": 5300 }, { "epoch": 10.5, "learning_rate": 2.6850583657587547e-05, "loss": 0.373, "step": 5400 }, { "epoch": 10.69, "learning_rate": 2.6792217898832685e-05, "loss": 0.385, "step": 5500 }, { "epoch": 10.88, "learning_rate": 2.673385214007782e-05, "loss": 0.3898, "step": 5600 }, { "epoch": 11.0, "eval_cer": 0.031011556824773547, "eval_loss": 0.14640487730503082, "eval_runtime": 7.7134, "eval_samples_per_second": 44.209, "eval_steps_per_second": 5.575, "eval_wer": 0.08699786070834324, "step": 5659 }, { "epoch": 11.08, "learning_rate": 2.6675486381322958e-05, "loss": 0.3711, "step": 5700 }, { "epoch": 11.27, "learning_rate": 2.6617120622568092e-05, "loss": 0.3698, "step": 5800 }, { "epoch": 11.47, "learning_rate": 2.655875486381323e-05, "loss": 0.3609, "step": 5900 }, { "epoch": 11.66, "learning_rate": 2.650038910505837e-05, "loss": 0.3577, "step": 6000 }, { "epoch": 11.86, "learning_rate": 2.6442023346303503e-05, "loss": 0.3562, "step": 6100 }, { "epoch": 12.0, "eval_cer": 0.0314131453304181, "eval_loss": 0.15003812313079834, "eval_runtime": 8.0687, "eval_samples_per_second": 42.262, "eval_steps_per_second": 5.329, "eval_wer": 0.08747325885429047, "step": 6174 }, { "epoch": 12.05, "learning_rate": 2.638365758754864e-05, "loss": 0.3992, "step": 6200 }, { "epoch": 12.24, "learning_rate": 2.6325291828793776e-05, "loss": 0.3677, "step": 6300 }, { "epoch": 12.44, "learning_rate": 2.6266926070038913e-05, "loss": 0.345, "step": 6400 }, { "epoch": 12.63, "learning_rate": 2.6208560311284048e-05, "loss": 0.3319, "step": 6500 }, { "epoch": 12.83, "learning_rate": 2.6150194552529183e-05, "loss": 0.3619, "step": 6600 }, { "epoch": 13.0, "eval_cer": 0.031279282495203245, "eval_loss": 0.15227940678596497, "eval_runtime": 6.2463, "eval_samples_per_second": 54.592, "eval_steps_per_second": 6.884, "eval_wer": 0.08771095792726408, "step": 6688 }, { "epoch": 13.02, "learning_rate": 2.6091828793774317e-05, "loss": 0.3291, "step": 6700 }, { "epoch": 13.22, "learning_rate": 2.6033463035019455e-05, "loss": 0.3535, "step": 6800 }, { "epoch": 13.41, "learning_rate": 2.597509727626459e-05, "loss": 0.3331, "step": 6900 }, { "epoch": 13.61, "learning_rate": 2.5916731517509728e-05, "loss": 0.334, "step": 7000 }, { "epoch": 13.8, "learning_rate": 2.5858365758754862e-05, "loss": 0.3435, "step": 7100 }, { "epoch": 13.99, "learning_rate": 2.58e-05, "loss": 0.3283, "step": 7200 }, { "epoch": 14.0, "eval_cer": 0.028958993351479185, "eval_loss": 0.1473035216331482, "eval_runtime": 6.4288, "eval_samples_per_second": 53.043, "eval_steps_per_second": 6.689, "eval_wer": 0.08557166627050154, "step": 7203 }, { "epoch": 14.19, "learning_rate": 2.5741634241245138e-05, "loss": 0.3169, "step": 7300 }, { "epoch": 14.38, "learning_rate": 2.5683268482490273e-05, "loss": 0.3146, "step": 7400 }, { "epoch": 14.58, "learning_rate": 2.562490272373541e-05, "loss": 0.3201, "step": 7500 }, { "epoch": 14.77, "learning_rate": 2.5566536964980545e-05, "loss": 0.3297, "step": 7600 }, { "epoch": 14.97, "learning_rate": 2.5508171206225683e-05, "loss": 0.3196, "step": 7700 }, { "epoch": 15.0, "eval_cer": 0.029896033197983132, "eval_loss": 0.14426732063293457, "eval_runtime": 6.3433, "eval_samples_per_second": 53.757, "eval_steps_per_second": 6.779, "eval_wer": 0.08438317090563346, "step": 7717 }, { "epoch": 15.16, "learning_rate": 2.5449805447470818e-05, "loss": 0.3124, "step": 7800 }, { "epoch": 15.35, "learning_rate": 2.5391439688715956e-05, "loss": 0.312, "step": 7900 }, { "epoch": 15.55, "learning_rate": 2.533307392996109e-05, "loss": 0.3089, "step": 8000 }, { "epoch": 15.74, "learning_rate": 2.527470817120623e-05, "loss": 0.2985, "step": 8100 }, { "epoch": 15.94, "learning_rate": 2.521634241245136e-05, "loss": 0.3165, "step": 8200 }, { "epoch": 16.0, "eval_cer": 0.028334300120476552, "eval_loss": 0.1413327008485794, "eval_runtime": 6.6839, "eval_samples_per_second": 51.018, "eval_steps_per_second": 6.433, "eval_wer": 0.08129308295697647, "step": 8232 }, { "epoch": 16.13, "learning_rate": 2.5157976653696498e-05, "loss": 0.2891, "step": 8300 }, { "epoch": 16.33, "learning_rate": 2.5100194552529184e-05, "loss": 0.3053, "step": 8400 }, { "epoch": 16.52, "learning_rate": 2.5041828793774322e-05, "loss": 0.3003, "step": 8500 }, { "epoch": 16.72, "learning_rate": 2.4983463035019457e-05, "loss": 0.311, "step": 8600 }, { "epoch": 16.91, "learning_rate": 2.4925097276264595e-05, "loss": 0.2954, "step": 8700 }, { "epoch": 17.0, "eval_cer": 0.028289679175404935, "eval_loss": 0.14512528479099274, "eval_runtime": 6.2782, "eval_samples_per_second": 54.315, "eval_steps_per_second": 6.849, "eval_wer": 0.08248157832184455, "step": 8746 }, { "epoch": 17.1, "learning_rate": 2.486673151750973e-05, "loss": 0.2687, "step": 8800 }, { "epoch": 17.3, "learning_rate": 2.4808365758754864e-05, "loss": 0.2746, "step": 8900 }, { "epoch": 17.49, "learning_rate": 2.475e-05, "loss": 0.2857, "step": 9000 }, { "epoch": 17.69, "learning_rate": 2.4691634241245136e-05, "loss": 0.2819, "step": 9100 }, { "epoch": 17.88, "learning_rate": 2.463326848249027e-05, "loss": 0.293, "step": 9200 }, { "epoch": 18.0, "eval_cer": 0.02860202579090625, "eval_loss": 0.15394243597984314, "eval_runtime": 6.29, "eval_samples_per_second": 54.213, "eval_steps_per_second": 6.836, "eval_wer": 0.08224387924887093, "step": 9261 }, { "epoch": 18.08, "learning_rate": 2.457490272373541e-05, "loss": 0.295, "step": 9300 }, { "epoch": 18.27, "learning_rate": 2.4516536964980544e-05, "loss": 0.3022, "step": 9400 }, { "epoch": 18.46, "learning_rate": 2.445817120622568e-05, "loss": 0.2714, "step": 9500 }, { "epoch": 18.66, "learning_rate": 2.4399805447470816e-05, "loss": 0.2881, "step": 9600 }, { "epoch": 18.85, "learning_rate": 2.4341439688715954e-05, "loss": 0.2821, "step": 9700 }, { "epoch": 19.0, "eval_cer": 0.029628307527553435, "eval_loss": 0.15518580377101898, "eval_runtime": 8.2771, "eval_samples_per_second": 41.198, "eval_steps_per_second": 5.195, "eval_wer": 0.08438317090563346, "step": 9775 }, { "epoch": 19.05, "learning_rate": 2.4283073929961092e-05, "loss": 0.271, "step": 9800 }, { "epoch": 19.24, "learning_rate": 2.4224708171206227e-05, "loss": 0.2696, "step": 9900 }, { "epoch": 19.44, "learning_rate": 2.4166342412451365e-05, "loss": 0.2715, "step": 10000 }, { "epoch": 19.63, "learning_rate": 2.41079766536965e-05, "loss": 0.2847, "step": 10100 }, { "epoch": 19.83, "learning_rate": 2.4049610894941637e-05, "loss": 0.2893, "step": 10200 }, { "epoch": 20.0, "eval_cer": 0.028468162955691403, "eval_loss": 0.1484398990869522, "eval_runtime": 6.2768, "eval_samples_per_second": 54.327, "eval_steps_per_second": 6.851, "eval_wer": 0.08200618017589731, "step": 10290 }, { "epoch": 20.02, "learning_rate": 2.3991245136186772e-05, "loss": 0.2666, "step": 10300 }, { "epoch": 20.21, "learning_rate": 2.393287937743191e-05, "loss": 0.2589, "step": 10400 }, { "epoch": 20.41, "learning_rate": 2.387451361867704e-05, "loss": 0.2579, "step": 10500 }, { "epoch": 20.6, "learning_rate": 2.381614785992218e-05, "loss": 0.2573, "step": 10600 }, { "epoch": 20.8, "learning_rate": 2.3757782101167314e-05, "loss": 0.2632, "step": 10700 }, { "epoch": 20.99, "learning_rate": 2.369941634241245e-05, "loss": 0.2609, "step": 10800 }, { "epoch": 21.0, "eval_cer": 0.03074383115434385, "eval_loss": 0.16356056928634644, "eval_runtime": 6.6619, "eval_samples_per_second": 51.187, "eval_steps_per_second": 6.455, "eval_wer": 0.08509626812455431, "step": 10804 }, { "epoch": 21.19, "learning_rate": 2.3641050583657586e-05, "loss": 0.2692, "step": 10900 }, { "epoch": 21.38, "learning_rate": 2.3583268482490276e-05, "loss": 0.2576, "step": 11000 }, { "epoch": 21.57, "learning_rate": 2.3524902723735407e-05, "loss": 0.2663, "step": 11100 }, { "epoch": 21.77, "learning_rate": 2.3466536964980545e-05, "loss": 0.2515, "step": 11200 }, { "epoch": 21.96, "learning_rate": 2.340817120622568e-05, "loss": 0.2526, "step": 11300 }, { "epoch": 22.0, "eval_cer": 0.029182098076837266, "eval_loss": 0.15198495984077454, "eval_runtime": 6.3434, "eval_samples_per_second": 53.756, "eval_steps_per_second": 6.779, "eval_wer": 0.08557166627050154, "step": 11319 }, { "epoch": 22.16, "learning_rate": 2.3349805447470818e-05, "loss": 0.2732, "step": 11400 }, { "epoch": 22.35, "learning_rate": 2.3291439688715952e-05, "loss": 0.2424, "step": 11500 }, { "epoch": 22.55, "learning_rate": 2.323307392996109e-05, "loss": 0.2586, "step": 11600 }, { "epoch": 22.74, "learning_rate": 2.3174708171206225e-05, "loss": 0.274, "step": 11700 }, { "epoch": 22.93, "learning_rate": 2.3116342412451363e-05, "loss": 0.2571, "step": 11800 }, { "epoch": 23.0, "eval_cer": 0.029092856186694036, "eval_loss": 0.14488764107227325, "eval_runtime": 6.7698, "eval_samples_per_second": 50.37, "eval_steps_per_second": 6.352, "eval_wer": 0.08509626812455431, "step": 11833 }, { "epoch": 23.13, "learning_rate": 2.3057976653696497e-05, "loss": 0.2513, "step": 11900 }, { "epoch": 23.32, "learning_rate": 2.2999610894941635e-05, "loss": 0.2577, "step": 12000 }, { "epoch": 23.52, "learning_rate": 2.294124513618677e-05, "loss": 0.2335, "step": 12100 }, { "epoch": 23.71, "learning_rate": 2.2882879377431908e-05, "loss": 0.2573, "step": 12200 }, { "epoch": 23.91, "learning_rate": 2.2824513618677043e-05, "loss": 0.2486, "step": 12300 }, { "epoch": 24.0, "eval_cer": 0.030699210209272233, "eval_loss": 0.15738651156425476, "eval_runtime": 7.9129, "eval_samples_per_second": 43.094, "eval_steps_per_second": 5.434, "eval_wer": 0.08652246256239601, "step": 12348 }, { "epoch": 24.1, "learning_rate": 2.276614785992218e-05, "loss": 0.2439, "step": 12400 }, { "epoch": 24.3, "learning_rate": 2.270778210116732e-05, "loss": 0.253, "step": 12500 }, { "epoch": 24.49, "learning_rate": 2.2649416342412453e-05, "loss": 0.2435, "step": 12600 }, { "epoch": 24.68, "learning_rate": 2.2591050583657588e-05, "loss": 0.2378, "step": 12700 }, { "epoch": 24.88, "learning_rate": 2.2532684824902722e-05, "loss": 0.2501, "step": 12800 }, { "epoch": 25.0, "eval_cer": 0.0295390656374102, "eval_loss": 0.1490197330713272, "eval_runtime": 6.0948, "eval_samples_per_second": 55.949, "eval_steps_per_second": 7.055, "eval_wer": 0.08557166627050154, "step": 12862 }, { "epoch": 25.07, "learning_rate": 2.247431906614786e-05, "loss": 0.2399, "step": 12900 }, { "epoch": 25.27, "learning_rate": 2.2415953307392995e-05, "loss": 0.2368, "step": 13000 }, { "epoch": 25.46, "learning_rate": 2.2357587548638133e-05, "loss": 0.2396, "step": 13100 }, { "epoch": 25.66, "learning_rate": 2.2299221789883267e-05, "loss": 0.2117, "step": 13200 }, { "epoch": 25.85, "learning_rate": 2.2240856031128405e-05, "loss": 0.2525, "step": 13300 }, { "epoch": 26.0, "eval_cer": 0.02940520280219535, "eval_loss": 0.1508348286151886, "eval_runtime": 6.2168, "eval_samples_per_second": 54.851, "eval_steps_per_second": 6.917, "eval_wer": 0.08271927739481816, "step": 13377 }, { "epoch": 26.04, "learning_rate": 2.218249027237354e-05, "loss": 0.2312, "step": 13400 }, { "epoch": 26.24, "learning_rate": 2.2124708171206226e-05, "loss": 0.2363, "step": 13500 }, { "epoch": 26.43, "learning_rate": 2.206634241245136e-05, "loss": 0.2262, "step": 13600 }, { "epoch": 26.63, "learning_rate": 2.20079766536965e-05, "loss": 0.2415, "step": 13700 }, { "epoch": 26.82, "learning_rate": 2.1949610894941634e-05, "loss": 0.2452, "step": 13800 }, { "epoch": 27.0, "eval_cer": 0.029003614296550802, "eval_loss": 0.15108107030391693, "eval_runtime": 6.0859, "eval_samples_per_second": 56.031, "eval_steps_per_second": 7.065, "eval_wer": 0.08081768481102923, "step": 13891 }, { "epoch": 27.0, "step": 13891, "total_flos": 1.1392249463344112e+20, "train_loss": 0.5354501710375033, "train_runtime": 10852.3147, "train_samples_per_second": 151.654, "train_steps_per_second": 4.736 } ], "max_steps": 51400, "num_train_epochs": 100, "total_flos": 1.1392249463344112e+20, "trial_name": null, "trial_params": null }