diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,36889 @@ +{ + "best_metric": 1.0, + "best_model_checkpoint": "./wavbert_fongbe/checkpoint-5200", + "epoch": 1.8220042046250877, + "eval_steps": 100, + "global_step": 5200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "grad_norm": Infinity, + "learning_rate": 0.0, + "loss": 9.8297, + "step": 1 + }, + { + "epoch": 0.0, + "grad_norm": Infinity, + "learning_rate": 0.0, + "loss": 9.9301, + "step": 2 + }, + { + "epoch": 0.0, + "grad_norm": 34.74740982055664, + "learning_rate": 6e-07, + "loss": 10.394, + "step": 3 + }, + { + "epoch": 0.0, + "grad_norm": 25.30831527709961, + "learning_rate": 1.2e-06, + "loss": 8.5656, + "step": 4 + }, + { + "epoch": 0.0, + "grad_norm": 38.40359115600586, + "learning_rate": 1.8e-06, + "loss": 11.2407, + "step": 5 + }, + { + "epoch": 0.0, + "grad_norm": 28.476762771606445, + "learning_rate": 2.4e-06, + "loss": 8.109, + "step": 6 + }, + { + "epoch": 0.0, + "grad_norm": Infinity, + "learning_rate": 2.4e-06, + "loss": 11.0636, + "step": 7 + }, + { + "epoch": 0.0, + "grad_norm": 34.81722640991211, + "learning_rate": 2.9999999999999997e-06, + "loss": 9.1192, + "step": 8 + }, + { + "epoch": 0.0, + "grad_norm": 53.64858627319336, + "learning_rate": 3.6e-06, + "loss": 11.2103, + "step": 9 + }, + { + "epoch": 0.0, + "grad_norm": 34.31063461303711, + "learning_rate": 4.2e-06, + "loss": 7.7964, + "step": 10 + }, + { + "epoch": 0.0, + "grad_norm": 40.947654724121094, + "learning_rate": 4.8e-06, + "loss": 8.9611, + "step": 11 + }, + { + "epoch": 0.0, + "grad_norm": 54.53398513793945, + "learning_rate": 5.399999999999999e-06, + "loss": 8.7025, + "step": 12 + }, + { + "epoch": 0.0, + "grad_norm": 74.66500091552734, + "learning_rate": 5.999999999999999e-06, + "loss": 9.4326, + "step": 13 + }, + { + "epoch": 0.0, + "grad_norm": 44.239418029785156, + "learning_rate": 6.599999999999999e-06, + "loss": 6.5448, + "step": 14 + }, + { + "epoch": 0.01, + "grad_norm": 43.60188293457031, + "learning_rate": 7.2e-06, + "loss": 5.5535, + "step": 15 + }, + { + "epoch": 0.01, + "grad_norm": 49.674495697021484, + "learning_rate": 7.799999999999998e-06, + "loss": 5.7704, + "step": 16 + }, + { + "epoch": 0.01, + "grad_norm": 70.16291046142578, + "learning_rate": 8.4e-06, + "loss": 5.6929, + "step": 17 + }, + { + "epoch": 0.01, + "grad_norm": 37.02568817138672, + "learning_rate": 8.999999999999999e-06, + "loss": 3.9512, + "step": 18 + }, + { + "epoch": 0.01, + "grad_norm": 8.776915550231934, + "learning_rate": 9.6e-06, + "loss": 3.7522, + "step": 19 + }, + { + "epoch": 0.01, + "grad_norm": 10.268403053283691, + "learning_rate": 1.02e-05, + "loss": 3.557, + "step": 20 + }, + { + "epoch": 0.01, + "grad_norm": 21.770320892333984, + "learning_rate": 1.0799999999999998e-05, + "loss": 3.8904, + "step": 21 + }, + { + "epoch": 0.01, + "grad_norm": 12.886055946350098, + "learning_rate": 1.14e-05, + "loss": 3.4933, + "step": 22 + }, + { + "epoch": 0.01, + "grad_norm": 20.950607299804688, + "learning_rate": 1.1999999999999999e-05, + "loss": 3.4173, + "step": 23 + }, + { + "epoch": 0.01, + "grad_norm": 22.54509925842285, + "learning_rate": 1.26e-05, + "loss": 3.5335, + "step": 24 + }, + { + "epoch": 0.01, + "grad_norm": 13.497519493103027, + "learning_rate": 1.3199999999999997e-05, + "loss": 2.9166, + "step": 25 + }, + { + "epoch": 0.01, + "grad_norm": 11.36432933807373, + "learning_rate": 1.3799999999999998e-05, + "loss": 3.6862, + "step": 26 + }, + { + "epoch": 0.01, + "grad_norm": 21.900163650512695, + "learning_rate": 1.44e-05, + "loss": 3.3244, + "step": 27 + }, + { + "epoch": 0.01, + "grad_norm": NaN, + "learning_rate": 1.44e-05, + "loss": 3.1803, + "step": 28 + }, + { + "epoch": 0.01, + "grad_norm": 8.995499610900879, + "learning_rate": 1.4999999999999999e-05, + "loss": 3.4045, + "step": 29 + }, + { + "epoch": 0.01, + "grad_norm": 11.834275245666504, + "learning_rate": 1.5599999999999996e-05, + "loss": 3.1981, + "step": 30 + }, + { + "epoch": 0.01, + "grad_norm": 6.6730780601501465, + "learning_rate": 1.6199999999999997e-05, + "loss": 3.2896, + "step": 31 + }, + { + "epoch": 0.01, + "grad_norm": 14.79286003112793, + "learning_rate": 1.68e-05, + "loss": 3.1652, + "step": 32 + }, + { + "epoch": 0.01, + "grad_norm": 5.442844867706299, + "learning_rate": 1.74e-05, + "loss": 3.102, + "step": 33 + }, + { + "epoch": 0.01, + "grad_norm": 6.1123127937316895, + "learning_rate": 1.7999999999999997e-05, + "loss": 3.1152, + "step": 34 + }, + { + "epoch": 0.01, + "grad_norm": 5.981231212615967, + "learning_rate": 1.8599999999999998e-05, + "loss": 3.1492, + "step": 35 + }, + { + "epoch": 0.01, + "grad_norm": 6.901692867279053, + "learning_rate": 1.92e-05, + "loss": 3.1973, + "step": 36 + }, + { + "epoch": 0.01, + "grad_norm": 17.36743927001953, + "learning_rate": 1.98e-05, + "loss": 3.0872, + "step": 37 + }, + { + "epoch": 0.01, + "grad_norm": 11.805011749267578, + "learning_rate": 2.04e-05, + "loss": 2.8694, + "step": 38 + }, + { + "epoch": 0.01, + "grad_norm": 8.926337242126465, + "learning_rate": 2.1e-05, + "loss": 3.5693, + "step": 39 + }, + { + "epoch": 0.01, + "grad_norm": 7.315157890319824, + "learning_rate": 2.1599999999999996e-05, + "loss": 2.776, + "step": 40 + }, + { + "epoch": 0.01, + "grad_norm": 8.084774017333984, + "learning_rate": 2.2199999999999998e-05, + "loss": 2.8658, + "step": 41 + }, + { + "epoch": 0.01, + "grad_norm": 10.755606651306152, + "learning_rate": 2.28e-05, + "loss": 2.7053, + "step": 42 + }, + { + "epoch": 0.02, + "grad_norm": 7.2747626304626465, + "learning_rate": 2.34e-05, + "loss": 2.6037, + "step": 43 + }, + { + "epoch": 0.02, + "grad_norm": 9.70805835723877, + "learning_rate": 2.3999999999999997e-05, + "loss": 3.1741, + "step": 44 + }, + { + "epoch": 0.02, + "grad_norm": 18.0622615814209, + "learning_rate": 2.4599999999999998e-05, + "loss": 3.0041, + "step": 45 + }, + { + "epoch": 0.02, + "grad_norm": 14.248910903930664, + "learning_rate": 2.52e-05, + "loss": 2.6704, + "step": 46 + }, + { + "epoch": 0.02, + "grad_norm": 7.843019485473633, + "learning_rate": 2.5799999999999997e-05, + "loss": 2.5436, + "step": 47 + }, + { + "epoch": 0.02, + "grad_norm": 10.0279541015625, + "learning_rate": 2.6399999999999995e-05, + "loss": 2.5453, + "step": 48 + }, + { + "epoch": 0.02, + "grad_norm": 47.676048278808594, + "learning_rate": 2.6999999999999996e-05, + "loss": 2.4351, + "step": 49 + }, + { + "epoch": 0.02, + "grad_norm": 10.947311401367188, + "learning_rate": 2.7599999999999997e-05, + "loss": 2.8235, + "step": 50 + }, + { + "epoch": 0.02, + "grad_norm": 5.891237735748291, + "learning_rate": 2.8199999999999998e-05, + "loss": 3.0562, + "step": 51 + }, + { + "epoch": 0.02, + "grad_norm": 7.135880947113037, + "learning_rate": 2.88e-05, + "loss": 3.0568, + "step": 52 + }, + { + "epoch": 0.02, + "grad_norm": 6.591911315917969, + "learning_rate": 2.94e-05, + "loss": 2.8868, + "step": 53 + }, + { + "epoch": 0.02, + "grad_norm": 7.592108726501465, + "learning_rate": 2.9999999999999997e-05, + "loss": 2.8324, + "step": 54 + }, + { + "epoch": 0.02, + "grad_norm": 7.499967098236084, + "learning_rate": 3.06e-05, + "loss": 2.9309, + "step": 55 + }, + { + "epoch": 0.02, + "grad_norm": 13.009428977966309, + "learning_rate": 3.119999999999999e-05, + "loss": 2.9151, + "step": 56 + }, + { + "epoch": 0.02, + "grad_norm": 10.796736717224121, + "learning_rate": 3.1799999999999994e-05, + "loss": 2.9887, + "step": 57 + }, + { + "epoch": 0.02, + "grad_norm": 6.898942947387695, + "learning_rate": 3.2399999999999995e-05, + "loss": 2.7634, + "step": 58 + }, + { + "epoch": 0.02, + "grad_norm": 4.915682792663574, + "learning_rate": 3.2999999999999996e-05, + "loss": 2.3548, + "step": 59 + }, + { + "epoch": 0.02, + "grad_norm": 6.942079067230225, + "learning_rate": 3.36e-05, + "loss": 2.8065, + "step": 60 + }, + { + "epoch": 0.02, + "grad_norm": 9.648048400878906, + "learning_rate": 3.42e-05, + "loss": 2.7106, + "step": 61 + }, + { + "epoch": 0.02, + "grad_norm": 11.939984321594238, + "learning_rate": 3.48e-05, + "loss": 2.6683, + "step": 62 + }, + { + "epoch": 0.02, + "grad_norm": 8.364795684814453, + "learning_rate": 3.539999999999999e-05, + "loss": 3.1089, + "step": 63 + }, + { + "epoch": 0.02, + "grad_norm": 6.097488880157471, + "learning_rate": 3.5999999999999994e-05, + "loss": 2.5706, + "step": 64 + }, + { + "epoch": 0.02, + "grad_norm": 6.490532398223877, + "learning_rate": 3.6599999999999995e-05, + "loss": 2.7647, + "step": 65 + }, + { + "epoch": 0.02, + "grad_norm": 7.321822166442871, + "learning_rate": 3.7199999999999996e-05, + "loss": 2.9021, + "step": 66 + }, + { + "epoch": 0.02, + "grad_norm": 8.508000373840332, + "learning_rate": 3.78e-05, + "loss": 2.4467, + "step": 67 + }, + { + "epoch": 0.02, + "grad_norm": 8.116604804992676, + "learning_rate": 3.84e-05, + "loss": 2.2756, + "step": 68 + }, + { + "epoch": 0.02, + "grad_norm": 11.946988105773926, + "learning_rate": 3.9e-05, + "loss": 2.9973, + "step": 69 + }, + { + "epoch": 0.02, + "grad_norm": 11.239936828613281, + "learning_rate": 3.96e-05, + "loss": 2.6493, + "step": 70 + }, + { + "epoch": 0.02, + "grad_norm": 9.772220611572266, + "learning_rate": 4.02e-05, + "loss": 2.5128, + "step": 71 + }, + { + "epoch": 0.03, + "grad_norm": 12.785487174987793, + "learning_rate": 4.08e-05, + "loss": 1.8853, + "step": 72 + }, + { + "epoch": 0.03, + "grad_norm": 8.324564933776855, + "learning_rate": 4.14e-05, + "loss": 2.7886, + "step": 73 + }, + { + "epoch": 0.03, + "grad_norm": 11.687192916870117, + "learning_rate": 4.2e-05, + "loss": 1.838, + "step": 74 + }, + { + "epoch": 0.03, + "grad_norm": 9.597622871398926, + "learning_rate": 4.259999999999999e-05, + "loss": 1.8491, + "step": 75 + }, + { + "epoch": 0.03, + "grad_norm": 7.367631912231445, + "learning_rate": 4.319999999999999e-05, + "loss": 2.6484, + "step": 76 + }, + { + "epoch": 0.03, + "grad_norm": 5.516895771026611, + "learning_rate": 4.3799999999999994e-05, + "loss": 2.1669, + "step": 77 + }, + { + "epoch": 0.03, + "grad_norm": 7.077077388763428, + "learning_rate": 4.4399999999999995e-05, + "loss": 2.3611, + "step": 78 + }, + { + "epoch": 0.03, + "grad_norm": 6.426537990570068, + "learning_rate": 4.4999999999999996e-05, + "loss": 1.8982, + "step": 79 + }, + { + "epoch": 0.03, + "grad_norm": 5.794361114501953, + "learning_rate": 4.56e-05, + "loss": 1.7709, + "step": 80 + }, + { + "epoch": 0.03, + "grad_norm": 6.885998725891113, + "learning_rate": 4.62e-05, + "loss": 1.7069, + "step": 81 + }, + { + "epoch": 0.03, + "grad_norm": 7.63657283782959, + "learning_rate": 4.68e-05, + "loss": 1.6549, + "step": 82 + }, + { + "epoch": 0.03, + "grad_norm": 6.502758979797363, + "learning_rate": 4.7399999999999993e-05, + "loss": 1.5249, + "step": 83 + }, + { + "epoch": 0.03, + "grad_norm": 7.67425537109375, + "learning_rate": 4.7999999999999994e-05, + "loss": 1.6269, + "step": 84 + }, + { + "epoch": 0.03, + "grad_norm": 5.933468818664551, + "learning_rate": 4.8599999999999995e-05, + "loss": 1.535, + "step": 85 + }, + { + "epoch": 0.03, + "grad_norm": 6.353681564331055, + "learning_rate": 4.9199999999999997e-05, + "loss": 1.2357, + "step": 86 + }, + { + "epoch": 0.03, + "grad_norm": 8.016544342041016, + "learning_rate": 4.98e-05, + "loss": 1.2392, + "step": 87 + }, + { + "epoch": 0.03, + "grad_norm": 6.43305778503418, + "learning_rate": 5.04e-05, + "loss": 1.5339, + "step": 88 + }, + { + "epoch": 0.03, + "grad_norm": 8.496820449829102, + "learning_rate": 5.1e-05, + "loss": 1.4414, + "step": 89 + }, + { + "epoch": 0.03, + "grad_norm": 12.498025894165039, + "learning_rate": 5.1599999999999994e-05, + "loss": 1.0707, + "step": 90 + }, + { + "epoch": 0.03, + "grad_norm": 6.985462188720703, + "learning_rate": 5.2199999999999995e-05, + "loss": 1.5268, + "step": 91 + }, + { + "epoch": 0.03, + "grad_norm": 10.915468215942383, + "learning_rate": 5.279999999999999e-05, + "loss": 1.4841, + "step": 92 + }, + { + "epoch": 0.03, + "grad_norm": 12.632097244262695, + "learning_rate": 5.339999999999999e-05, + "loss": 2.197, + "step": 93 + }, + { + "epoch": 0.03, + "grad_norm": 11.778656959533691, + "learning_rate": 5.399999999999999e-05, + "loss": 1.1379, + "step": 94 + }, + { + "epoch": 0.03, + "grad_norm": 11.089698791503906, + "learning_rate": 5.459999999999999e-05, + "loss": 1.3323, + "step": 95 + }, + { + "epoch": 0.03, + "grad_norm": 8.403290748596191, + "learning_rate": 5.519999999999999e-05, + "loss": 1.0058, + "step": 96 + }, + { + "epoch": 0.03, + "grad_norm": 12.170100212097168, + "learning_rate": 5.5799999999999994e-05, + "loss": 1.3709, + "step": 97 + }, + { + "epoch": 0.03, + "grad_norm": 13.481610298156738, + "learning_rate": 5.6399999999999995e-05, + "loss": 1.5149, + "step": 98 + }, + { + "epoch": 0.03, + "grad_norm": 8.465336799621582, + "learning_rate": 5.6999999999999996e-05, + "loss": 1.1589, + "step": 99 + }, + { + "epoch": 0.04, + "grad_norm": 8.697178840637207, + "learning_rate": 5.76e-05, + "loss": 1.338, + "step": 100 + }, + { + "epoch": 0.04, + "eval_loss": 1.2086584568023682, + "eval_runtime": 51.6602, + "eval_samples_per_second": 41.967, + "eval_steps_per_second": 10.492, + "eval_wer": 0.8778758000345961, + "step": 100 + }, + { + "epoch": 0.04, + "grad_norm": 9.466093063354492, + "learning_rate": 5.82e-05, + "loss": 1.9736, + "step": 101 + }, + { + "epoch": 0.04, + "grad_norm": 6.502716064453125, + "learning_rate": 5.88e-05, + "loss": 1.2357, + "step": 102 + }, + { + "epoch": 0.04, + "grad_norm": 6.822862148284912, + "learning_rate": 5.94e-05, + "loss": 0.969, + "step": 103 + }, + { + "epoch": 0.04, + "grad_norm": 4.4939680099487305, + "learning_rate": 5.9999999999999995e-05, + "loss": 0.8472, + "step": 104 + }, + { + "epoch": 0.04, + "grad_norm": 5.335927963256836, + "learning_rate": 6.0599999999999996e-05, + "loss": 1.232, + "step": 105 + }, + { + "epoch": 0.04, + "grad_norm": 4.711673736572266, + "learning_rate": 6.12e-05, + "loss": 1.0268, + "step": 106 + }, + { + "epoch": 0.04, + "grad_norm": 6.31601619720459, + "learning_rate": 6.18e-05, + "loss": 0.9878, + "step": 107 + }, + { + "epoch": 0.04, + "grad_norm": 7.08448600769043, + "learning_rate": 6.239999999999999e-05, + "loss": 1.2664, + "step": 108 + }, + { + "epoch": 0.04, + "grad_norm": 4.570302486419678, + "learning_rate": 6.299999999999999e-05, + "loss": 0.5733, + "step": 109 + }, + { + "epoch": 0.04, + "grad_norm": 44.946353912353516, + "learning_rate": 6.359999999999999e-05, + "loss": 1.6091, + "step": 110 + }, + { + "epoch": 0.04, + "grad_norm": 14.037195205688477, + "learning_rate": 6.419999999999999e-05, + "loss": 1.5667, + "step": 111 + }, + { + "epoch": 0.04, + "grad_norm": 9.980315208435059, + "learning_rate": 6.479999999999999e-05, + "loss": 1.8256, + "step": 112 + }, + { + "epoch": 0.04, + "grad_norm": 13.4607572555542, + "learning_rate": 6.539999999999999e-05, + "loss": 1.2149, + "step": 113 + }, + { + "epoch": 0.04, + "grad_norm": 10.761940002441406, + "learning_rate": 6.599999999999999e-05, + "loss": 1.4465, + "step": 114 + }, + { + "epoch": 0.04, + "grad_norm": 8.355905532836914, + "learning_rate": 6.659999999999999e-05, + "loss": 0.873, + "step": 115 + }, + { + "epoch": 0.04, + "grad_norm": 8.082406997680664, + "learning_rate": 6.72e-05, + "loss": 0.8309, + "step": 116 + }, + { + "epoch": 0.04, + "grad_norm": 7.375432968139648, + "learning_rate": 6.78e-05, + "loss": 1.1447, + "step": 117 + }, + { + "epoch": 0.04, + "grad_norm": 13.326135635375977, + "learning_rate": 6.84e-05, + "loss": 0.9311, + "step": 118 + }, + { + "epoch": 0.04, + "grad_norm": 8.355464935302734, + "learning_rate": 6.9e-05, + "loss": 0.9014, + "step": 119 + }, + { + "epoch": 0.04, + "grad_norm": 16.993934631347656, + "learning_rate": 6.96e-05, + "loss": 1.152, + "step": 120 + }, + { + "epoch": 0.04, + "grad_norm": 7.030590057373047, + "learning_rate": 7.02e-05, + "loss": 0.6491, + "step": 121 + }, + { + "epoch": 0.04, + "grad_norm": 10.766969680786133, + "learning_rate": 7.079999999999999e-05, + "loss": 1.738, + "step": 122 + }, + { + "epoch": 0.04, + "grad_norm": 16.502492904663086, + "learning_rate": 7.139999999999999e-05, + "loss": 1.0539, + "step": 123 + }, + { + "epoch": 0.04, + "grad_norm": 10.595362663269043, + "learning_rate": 7.199999999999999e-05, + "loss": 1.0724, + "step": 124 + }, + { + "epoch": 0.04, + "grad_norm": 7.935666084289551, + "learning_rate": 7.259999999999999e-05, + "loss": 0.795, + "step": 125 + }, + { + "epoch": 0.04, + "grad_norm": 7.293322563171387, + "learning_rate": 7.319999999999999e-05, + "loss": 1.279, + "step": 126 + }, + { + "epoch": 0.04, + "grad_norm": 14.84443187713623, + "learning_rate": 7.379999999999999e-05, + "loss": 1.993, + "step": 127 + }, + { + "epoch": 0.04, + "grad_norm": 8.164588928222656, + "learning_rate": 7.439999999999999e-05, + "loss": 1.0304, + "step": 128 + }, + { + "epoch": 0.05, + "grad_norm": 7.177072048187256, + "learning_rate": 7.5e-05, + "loss": 1.1268, + "step": 129 + }, + { + "epoch": 0.05, + "grad_norm": 8.665708541870117, + "learning_rate": 7.56e-05, + "loss": 0.911, + "step": 130 + }, + { + "epoch": 0.05, + "grad_norm": 7.278268337249756, + "learning_rate": 7.62e-05, + "loss": 0.8871, + "step": 131 + }, + { + "epoch": 0.05, + "grad_norm": 4.9616780281066895, + "learning_rate": 7.68e-05, + "loss": 0.7107, + "step": 132 + }, + { + "epoch": 0.05, + "grad_norm": 10.167497634887695, + "learning_rate": 7.74e-05, + "loss": 1.962, + "step": 133 + }, + { + "epoch": 0.05, + "grad_norm": 17.286006927490234, + "learning_rate": 7.8e-05, + "loss": 2.3033, + "step": 134 + }, + { + "epoch": 0.05, + "grad_norm": 5.155051231384277, + "learning_rate": 7.86e-05, + "loss": 0.7892, + "step": 135 + }, + { + "epoch": 0.05, + "grad_norm": 13.249787330627441, + "learning_rate": 7.92e-05, + "loss": 0.6655, + "step": 136 + }, + { + "epoch": 0.05, + "grad_norm": 13.192652702331543, + "learning_rate": 7.98e-05, + "loss": 0.9772, + "step": 137 + }, + { + "epoch": 0.05, + "grad_norm": 6.216400623321533, + "learning_rate": 8.04e-05, + "loss": 1.4718, + "step": 138 + }, + { + "epoch": 0.05, + "grad_norm": 9.961997985839844, + "learning_rate": 8.1e-05, + "loss": 1.1065, + "step": 139 + }, + { + "epoch": 0.05, + "grad_norm": 9.072415351867676, + "learning_rate": 8.16e-05, + "loss": 1.6112, + "step": 140 + }, + { + "epoch": 0.05, + "grad_norm": 8.392301559448242, + "learning_rate": 8.22e-05, + "loss": 1.284, + "step": 141 + }, + { + "epoch": 0.05, + "grad_norm": 7.51064920425415, + "learning_rate": 8.28e-05, + "loss": 1.2114, + "step": 142 + }, + { + "epoch": 0.05, + "grad_norm": 19.81893539428711, + "learning_rate": 8.34e-05, + "loss": 0.909, + "step": 143 + }, + { + "epoch": 0.05, + "grad_norm": 7.77775239944458, + "learning_rate": 8.4e-05, + "loss": 1.4994, + "step": 144 + }, + { + "epoch": 0.05, + "grad_norm": 6.78767204284668, + "learning_rate": 8.459999999999998e-05, + "loss": 0.6961, + "step": 145 + }, + { + "epoch": 0.05, + "grad_norm": 11.081228256225586, + "learning_rate": 8.519999999999998e-05, + "loss": 1.8639, + "step": 146 + }, + { + "epoch": 0.05, + "grad_norm": 11.193565368652344, + "learning_rate": 8.579999999999998e-05, + "loss": 1.6451, + "step": 147 + }, + { + "epoch": 0.05, + "grad_norm": 8.930937767028809, + "learning_rate": 8.639999999999999e-05, + "loss": 0.9557, + "step": 148 + }, + { + "epoch": 0.05, + "grad_norm": 20.329538345336914, + "learning_rate": 8.699999999999999e-05, + "loss": 0.9914, + "step": 149 + }, + { + "epoch": 0.05, + "grad_norm": NaN, + "learning_rate": 8.699999999999999e-05, + "loss": 0.2003, + "step": 150 + }, + { + "epoch": 0.05, + "grad_norm": 8.920183181762695, + "learning_rate": 8.759999999999999e-05, + "loss": 2.1297, + "step": 151 + }, + { + "epoch": 0.05, + "grad_norm": 4.7198567390441895, + "learning_rate": 8.819999999999999e-05, + "loss": 1.0552, + "step": 152 + }, + { + "epoch": 0.05, + "grad_norm": 5.2832512855529785, + "learning_rate": 8.879999999999999e-05, + "loss": 1.0677, + "step": 153 + }, + { + "epoch": 0.05, + "grad_norm": 7.0598530769348145, + "learning_rate": 8.939999999999999e-05, + "loss": 0.9668, + "step": 154 + }, + { + "epoch": 0.05, + "grad_norm": 7.0460052490234375, + "learning_rate": 8.999999999999999e-05, + "loss": 0.917, + "step": 155 + }, + { + "epoch": 0.05, + "grad_norm": 4.30267333984375, + "learning_rate": 9.059999999999999e-05, + "loss": 0.8791, + "step": 156 + }, + { + "epoch": 0.06, + "grad_norm": 15.747892379760742, + "learning_rate": 9.12e-05, + "loss": 1.0952, + "step": 157 + }, + { + "epoch": 0.06, + "grad_norm": 4.930239200592041, + "learning_rate": 9.18e-05, + "loss": 0.7907, + "step": 158 + }, + { + "epoch": 0.06, + "grad_norm": 9.977339744567871, + "learning_rate": 9.24e-05, + "loss": 1.6181, + "step": 159 + }, + { + "epoch": 0.06, + "grad_norm": 5.140440464019775, + "learning_rate": 9.3e-05, + "loss": 0.8012, + "step": 160 + }, + { + "epoch": 0.06, + "grad_norm": 8.982709884643555, + "learning_rate": 9.36e-05, + "loss": 0.8466, + "step": 161 + }, + { + "epoch": 0.06, + "grad_norm": 5.337733268737793, + "learning_rate": 9.419999999999999e-05, + "loss": 0.9134, + "step": 162 + }, + { + "epoch": 0.06, + "grad_norm": 5.364322662353516, + "learning_rate": 9.479999999999999e-05, + "loss": 0.4464, + "step": 163 + }, + { + "epoch": 0.06, + "grad_norm": 13.642416954040527, + "learning_rate": 9.539999999999999e-05, + "loss": 1.9781, + "step": 164 + }, + { + "epoch": 0.06, + "grad_norm": 6.412764549255371, + "learning_rate": 9.599999999999999e-05, + "loss": 0.7324, + "step": 165 + }, + { + "epoch": 0.06, + "grad_norm": 7.758469581604004, + "learning_rate": 9.659999999999999e-05, + "loss": 1.5962, + "step": 166 + }, + { + "epoch": 0.06, + "grad_norm": 6.5638041496276855, + "learning_rate": 9.719999999999999e-05, + "loss": 1.0662, + "step": 167 + }, + { + "epoch": 0.06, + "grad_norm": 5.04262638092041, + "learning_rate": 9.779999999999999e-05, + "loss": 1.106, + "step": 168 + }, + { + "epoch": 0.06, + "grad_norm": 6.401612758636475, + "learning_rate": 9.839999999999999e-05, + "loss": 1.0118, + "step": 169 + }, + { + "epoch": 0.06, + "grad_norm": 7.277603626251221, + "learning_rate": 9.9e-05, + "loss": 1.5725, + "step": 170 + }, + { + "epoch": 0.06, + "grad_norm": 7.579121112823486, + "learning_rate": 9.96e-05, + "loss": 1.0403, + "step": 171 + }, + { + "epoch": 0.06, + "grad_norm": 16.09749412536621, + "learning_rate": 0.0001002, + "loss": 1.1833, + "step": 172 + }, + { + "epoch": 0.06, + "grad_norm": 10.291646957397461, + "learning_rate": 0.0001008, + "loss": 2.9008, + "step": 173 + }, + { + "epoch": 0.06, + "grad_norm": 7.372079372406006, + "learning_rate": 0.0001014, + "loss": 0.779, + "step": 174 + }, + { + "epoch": 0.06, + "grad_norm": 15.55083179473877, + "learning_rate": 0.000102, + "loss": 1.0366, + "step": 175 + }, + { + "epoch": 0.06, + "grad_norm": 10.298460006713867, + "learning_rate": 0.0001026, + "loss": 1.7753, + "step": 176 + }, + { + "epoch": 0.06, + "grad_norm": 7.792703151702881, + "learning_rate": 0.00010319999999999999, + "loss": 1.3867, + "step": 177 + }, + { + "epoch": 0.06, + "grad_norm": 5.284561634063721, + "learning_rate": 0.00010379999999999999, + "loss": 1.2554, + "step": 178 + }, + { + "epoch": 0.06, + "grad_norm": 8.849788665771484, + "learning_rate": 0.00010439999999999999, + "loss": 1.087, + "step": 179 + }, + { + "epoch": 0.06, + "grad_norm": 5.7863664627075195, + "learning_rate": 0.00010499999999999999, + "loss": 1.003, + "step": 180 + }, + { + "epoch": 0.06, + "grad_norm": 6.806325912475586, + "learning_rate": 0.00010559999999999998, + "loss": 0.7003, + "step": 181 + }, + { + "epoch": 0.06, + "grad_norm": 8.477960586547852, + "learning_rate": 0.00010619999999999998, + "loss": 0.9081, + "step": 182 + }, + { + "epoch": 0.06, + "grad_norm": 7.785075664520264, + "learning_rate": 0.00010679999999999998, + "loss": 1.1227, + "step": 183 + }, + { + "epoch": 0.06, + "grad_norm": 6.257397174835205, + "learning_rate": 0.00010739999999999998, + "loss": 0.8996, + "step": 184 + }, + { + "epoch": 0.06, + "grad_norm": 8.024823188781738, + "learning_rate": 0.00010799999999999998, + "loss": 1.2054, + "step": 185 + }, + { + "epoch": 0.07, + "grad_norm": 7.594734191894531, + "learning_rate": 0.00010859999999999998, + "loss": 1.2464, + "step": 186 + }, + { + "epoch": 0.07, + "grad_norm": 9.327756881713867, + "learning_rate": 0.00010919999999999998, + "loss": 1.1473, + "step": 187 + }, + { + "epoch": 0.07, + "grad_norm": 15.395828247070312, + "learning_rate": 0.00010979999999999999, + "loss": 1.7228, + "step": 188 + }, + { + "epoch": 0.07, + "grad_norm": 8.292728424072266, + "learning_rate": 0.00011039999999999999, + "loss": 1.3052, + "step": 189 + }, + { + "epoch": 0.07, + "grad_norm": 6.423582077026367, + "learning_rate": 0.00011099999999999999, + "loss": 0.7601, + "step": 190 + }, + { + "epoch": 0.07, + "grad_norm": 8.61807918548584, + "learning_rate": 0.00011159999999999999, + "loss": 0.6694, + "step": 191 + }, + { + "epoch": 0.07, + "grad_norm": 10.768646240234375, + "learning_rate": 0.00011219999999999999, + "loss": 1.2744, + "step": 192 + }, + { + "epoch": 0.07, + "grad_norm": 6.7909836769104, + "learning_rate": 0.00011279999999999999, + "loss": 0.8817, + "step": 193 + }, + { + "epoch": 0.07, + "grad_norm": 9.817007064819336, + "learning_rate": 0.00011339999999999999, + "loss": 1.3541, + "step": 194 + }, + { + "epoch": 0.07, + "grad_norm": 6.27230978012085, + "learning_rate": 0.00011399999999999999, + "loss": 0.5326, + "step": 195 + }, + { + "epoch": 0.07, + "grad_norm": 9.24206256866455, + "learning_rate": 0.0001146, + "loss": 0.8079, + "step": 196 + }, + { + "epoch": 0.07, + "grad_norm": 7.788369655609131, + "learning_rate": 0.0001152, + "loss": 0.6113, + "step": 197 + }, + { + "epoch": 0.07, + "grad_norm": 16.040983200073242, + "learning_rate": 0.0001158, + "loss": 1.1879, + "step": 198 + }, + { + "epoch": 0.07, + "grad_norm": 8.447440147399902, + "learning_rate": 0.0001164, + "loss": 1.5101, + "step": 199 + }, + { + "epoch": 0.07, + "grad_norm": 12.108735084533691, + "learning_rate": 0.000117, + "loss": 1.9649, + "step": 200 + }, + { + "epoch": 0.07, + "eval_loss": 1.0153309106826782, + "eval_runtime": 50.5715, + "eval_samples_per_second": 42.87, + "eval_steps_per_second": 10.718, + "eval_wer": 0.805137519460301, + "step": 200 + }, + { + "epoch": 0.07, + "grad_norm": 8.759153366088867, + "learning_rate": 0.0001176, + "loss": 1.3547, + "step": 201 + }, + { + "epoch": 0.07, + "grad_norm": 6.493929386138916, + "learning_rate": 0.0001182, + "loss": 2.0605, + "step": 202 + }, + { + "epoch": 0.07, + "grad_norm": 6.16959285736084, + "learning_rate": 0.0001188, + "loss": 1.6697, + "step": 203 + }, + { + "epoch": 0.07, + "grad_norm": 6.1440348625183105, + "learning_rate": 0.0001194, + "loss": 1.4012, + "step": 204 + }, + { + "epoch": 0.07, + "grad_norm": 8.175420761108398, + "learning_rate": 0.00011999999999999999, + "loss": 1.164, + "step": 205 + }, + { + "epoch": 0.07, + "grad_norm": 7.481700420379639, + "learning_rate": 0.00012059999999999999, + "loss": 1.0755, + "step": 206 + }, + { + "epoch": 0.07, + "grad_norm": 5.793044567108154, + "learning_rate": 0.00012119999999999999, + "loss": 0.8223, + "step": 207 + }, + { + "epoch": 0.07, + "grad_norm": 7.168550968170166, + "learning_rate": 0.00012179999999999999, + "loss": 1.309, + "step": 208 + }, + { + "epoch": 0.07, + "grad_norm": 9.77776050567627, + "learning_rate": 0.0001224, + "loss": 1.1608, + "step": 209 + }, + { + "epoch": 0.07, + "grad_norm": 6.302064418792725, + "learning_rate": 0.00012299999999999998, + "loss": 0.7877, + "step": 210 + }, + { + "epoch": 0.07, + "grad_norm": 5.7911152839660645, + "learning_rate": 0.0001236, + "loss": 0.7164, + "step": 211 + }, + { + "epoch": 0.07, + "grad_norm": 6.755216121673584, + "learning_rate": 0.00012419999999999998, + "loss": 1.3267, + "step": 212 + }, + { + "epoch": 0.07, + "grad_norm": 5.61589241027832, + "learning_rate": 0.00012479999999999997, + "loss": 1.1473, + "step": 213 + }, + { + "epoch": 0.07, + "grad_norm": 6.926543712615967, + "learning_rate": 0.00012539999999999999, + "loss": 1.4776, + "step": 214 + }, + { + "epoch": 0.08, + "grad_norm": 30.6265869140625, + "learning_rate": 0.00012599999999999997, + "loss": 1.4803, + "step": 215 + }, + { + "epoch": 0.08, + "grad_norm": 10.334158897399902, + "learning_rate": 0.0001266, + "loss": 1.579, + "step": 216 + }, + { + "epoch": 0.08, + "grad_norm": 8.379430770874023, + "learning_rate": 0.00012719999999999997, + "loss": 1.0805, + "step": 217 + }, + { + "epoch": 0.08, + "grad_norm": 5.460320472717285, + "learning_rate": 0.0001278, + "loss": 0.7705, + "step": 218 + }, + { + "epoch": 0.08, + "grad_norm": 7.094217777252197, + "learning_rate": 0.00012839999999999998, + "loss": 1.061, + "step": 219 + }, + { + "epoch": 0.08, + "grad_norm": 7.368185043334961, + "learning_rate": 0.000129, + "loss": 1.1021, + "step": 220 + }, + { + "epoch": 0.08, + "grad_norm": 8.67043685913086, + "learning_rate": 0.00012959999999999998, + "loss": 1.84, + "step": 221 + }, + { + "epoch": 0.08, + "grad_norm": 9.372278213500977, + "learning_rate": 0.0001302, + "loss": 1.2919, + "step": 222 + }, + { + "epoch": 0.08, + "grad_norm": 10.148419380187988, + "learning_rate": 0.00013079999999999998, + "loss": 1.3467, + "step": 223 + }, + { + "epoch": 0.08, + "grad_norm": 9.848054885864258, + "learning_rate": 0.0001314, + "loss": 1.018, + "step": 224 + }, + { + "epoch": 0.08, + "grad_norm": 13.819363594055176, + "learning_rate": 0.00013199999999999998, + "loss": 1.8879, + "step": 225 + }, + { + "epoch": 0.08, + "grad_norm": 8.070960998535156, + "learning_rate": 0.0001326, + "loss": 2.0939, + "step": 226 + }, + { + "epoch": 0.08, + "grad_norm": 7.248965263366699, + "learning_rate": 0.00013319999999999999, + "loss": 1.4703, + "step": 227 + }, + { + "epoch": 0.08, + "grad_norm": 6.5253472328186035, + "learning_rate": 0.0001338, + "loss": 1.6318, + "step": 228 + }, + { + "epoch": 0.08, + "grad_norm": 8.237483024597168, + "learning_rate": 0.0001344, + "loss": 1.053, + "step": 229 + }, + { + "epoch": 0.08, + "grad_norm": 6.78982400894165, + "learning_rate": 0.000135, + "loss": 1.1394, + "step": 230 + }, + { + "epoch": 0.08, + "grad_norm": 7.947813510894775, + "learning_rate": 0.0001356, + "loss": 1.0707, + "step": 231 + }, + { + "epoch": 0.08, + "grad_norm": 6.178483486175537, + "learning_rate": 0.0001362, + "loss": 1.246, + "step": 232 + }, + { + "epoch": 0.08, + "grad_norm": 6.992203712463379, + "learning_rate": 0.0001368, + "loss": 0.8734, + "step": 233 + }, + { + "epoch": 0.08, + "grad_norm": 6.598153114318848, + "learning_rate": 0.0001374, + "loss": 1.5, + "step": 234 + }, + { + "epoch": 0.08, + "grad_norm": 6.893948078155518, + "learning_rate": 0.000138, + "loss": 0.902, + "step": 235 + }, + { + "epoch": 0.08, + "grad_norm": NaN, + "learning_rate": 0.000138, + "loss": 2.4242, + "step": 236 + }, + { + "epoch": 0.08, + "grad_norm": 10.769316673278809, + "learning_rate": 0.0001386, + "loss": 1.7384, + "step": 237 + }, + { + "epoch": 0.08, + "grad_norm": 8.3101167678833, + "learning_rate": 0.0001392, + "loss": 0.9501, + "step": 238 + }, + { + "epoch": 0.08, + "grad_norm": 7.548043251037598, + "learning_rate": 0.00013979999999999998, + "loss": 0.9354, + "step": 239 + }, + { + "epoch": 0.08, + "grad_norm": 7.125143527984619, + "learning_rate": 0.0001404, + "loss": 0.5763, + "step": 240 + }, + { + "epoch": 0.08, + "grad_norm": 6.7880330085754395, + "learning_rate": 0.00014099999999999998, + "loss": 0.7926, + "step": 241 + }, + { + "epoch": 0.08, + "grad_norm": 5.474691867828369, + "learning_rate": 0.00014159999999999997, + "loss": 0.6569, + "step": 242 + }, + { + "epoch": 0.09, + "grad_norm": 6.943902492523193, + "learning_rate": 0.0001422, + "loss": 0.5578, + "step": 243 + }, + { + "epoch": 0.09, + "grad_norm": 6.652454853057861, + "learning_rate": 0.00014279999999999997, + "loss": 1.0422, + "step": 244 + }, + { + "epoch": 0.09, + "grad_norm": 5.662099838256836, + "learning_rate": 0.0001434, + "loss": 0.9684, + "step": 245 + }, + { + "epoch": 0.09, + "grad_norm": 8.081048965454102, + "learning_rate": 0.00014399999999999998, + "loss": 1.0536, + "step": 246 + }, + { + "epoch": 0.09, + "grad_norm": 6.202698230743408, + "learning_rate": 0.0001446, + "loss": 0.9664, + "step": 247 + }, + { + "epoch": 0.09, + "grad_norm": 6.779542922973633, + "learning_rate": 0.00014519999999999998, + "loss": 1.0268, + "step": 248 + }, + { + "epoch": 0.09, + "grad_norm": 11.181925773620605, + "learning_rate": 0.0001458, + "loss": 1.184, + "step": 249 + }, + { + "epoch": 0.09, + "grad_norm": 11.750948905944824, + "learning_rate": 0.00014639999999999998, + "loss": 1.8029, + "step": 250 + }, + { + "epoch": 0.09, + "grad_norm": 14.2088041305542, + "learning_rate": 0.000147, + "loss": 2.2772, + "step": 251 + }, + { + "epoch": 0.09, + "grad_norm": 7.257857322692871, + "learning_rate": 0.00014759999999999998, + "loss": 1.2177, + "step": 252 + }, + { + "epoch": 0.09, + "grad_norm": 5.847878456115723, + "learning_rate": 0.0001482, + "loss": 1.4445, + "step": 253 + }, + { + "epoch": 0.09, + "grad_norm": 5.302583694458008, + "learning_rate": 0.00014879999999999998, + "loss": 1.1606, + "step": 254 + }, + { + "epoch": 0.09, + "grad_norm": 5.7541680335998535, + "learning_rate": 0.0001494, + "loss": 1.2038, + "step": 255 + }, + { + "epoch": 0.09, + "grad_norm": 4.664234638214111, + "learning_rate": 0.00015, + "loss": 1.3001, + "step": 256 + }, + { + "epoch": 0.09, + "grad_norm": 5.489469051361084, + "learning_rate": 0.00015059999999999997, + "loss": 0.766, + "step": 257 + }, + { + "epoch": 0.09, + "grad_norm": 9.820629119873047, + "learning_rate": 0.0001512, + "loss": 0.6067, + "step": 258 + }, + { + "epoch": 0.09, + "grad_norm": 6.636170864105225, + "learning_rate": 0.00015179999999999998, + "loss": 0.9712, + "step": 259 + }, + { + "epoch": 0.09, + "grad_norm": 4.902698993682861, + "learning_rate": 0.0001524, + "loss": 0.8656, + "step": 260 + }, + { + "epoch": 0.09, + "grad_norm": 12.261871337890625, + "learning_rate": 0.00015299999999999998, + "loss": 1.3325, + "step": 261 + }, + { + "epoch": 0.09, + "grad_norm": 10.92815113067627, + "learning_rate": 0.0001536, + "loss": 1.4591, + "step": 262 + }, + { + "epoch": 0.09, + "grad_norm": 5.674909591674805, + "learning_rate": 0.00015419999999999998, + "loss": 0.5967, + "step": 263 + }, + { + "epoch": 0.09, + "grad_norm": 7.12188196182251, + "learning_rate": 0.0001548, + "loss": 0.5938, + "step": 264 + }, + { + "epoch": 0.09, + "grad_norm": 7.955028057098389, + "learning_rate": 0.00015539999999999998, + "loss": 0.7309, + "step": 265 + }, + { + "epoch": 0.09, + "grad_norm": 8.135099411010742, + "learning_rate": 0.000156, + "loss": 0.9604, + "step": 266 + }, + { + "epoch": 0.09, + "grad_norm": 17.383867263793945, + "learning_rate": 0.00015659999999999998, + "loss": 0.966, + "step": 267 + }, + { + "epoch": 0.09, + "grad_norm": 7.752403736114502, + "learning_rate": 0.0001572, + "loss": 1.5528, + "step": 268 + }, + { + "epoch": 0.09, + "grad_norm": 9.291197776794434, + "learning_rate": 0.0001578, + "loss": 0.7933, + "step": 269 + }, + { + "epoch": 0.09, + "grad_norm": 7.345749855041504, + "learning_rate": 0.0001584, + "loss": 0.7673, + "step": 270 + }, + { + "epoch": 0.09, + "grad_norm": 4.077545166015625, + "learning_rate": 0.000159, + "loss": 0.4076, + "step": 271 + }, + { + "epoch": 0.1, + "grad_norm": 7.044876575469971, + "learning_rate": 0.0001596, + "loss": 2.2965, + "step": 272 + }, + { + "epoch": 0.1, + "grad_norm": 8.072477340698242, + "learning_rate": 0.0001602, + "loss": 1.0095, + "step": 273 + }, + { + "epoch": 0.1, + "grad_norm": 7.533301830291748, + "learning_rate": 0.0001608, + "loss": 1.6276, + "step": 274 + }, + { + "epoch": 0.1, + "grad_norm": 10.649826049804688, + "learning_rate": 0.0001614, + "loss": 1.1563, + "step": 275 + }, + { + "epoch": 0.1, + "grad_norm": 6.723114013671875, + "learning_rate": 0.000162, + "loss": 1.5564, + "step": 276 + }, + { + "epoch": 0.1, + "grad_norm": 4.508934497833252, + "learning_rate": 0.0001626, + "loss": 1.1316, + "step": 277 + }, + { + "epoch": 0.1, + "grad_norm": 6.111855983734131, + "learning_rate": 0.0001632, + "loss": 1.0925, + "step": 278 + }, + { + "epoch": 0.1, + "grad_norm": 7.4623613357543945, + "learning_rate": 0.0001638, + "loss": 0.7827, + "step": 279 + }, + { + "epoch": 0.1, + "grad_norm": 4.680227756500244, + "learning_rate": 0.0001644, + "loss": 0.9112, + "step": 280 + }, + { + "epoch": 0.1, + "grad_norm": 11.717487335205078, + "learning_rate": 0.000165, + "loss": 1.1662, + "step": 281 + }, + { + "epoch": 0.1, + "grad_norm": 4.690805912017822, + "learning_rate": 0.0001656, + "loss": 1.0872, + "step": 282 + }, + { + "epoch": 0.1, + "grad_norm": 8.276322364807129, + "learning_rate": 0.0001662, + "loss": 1.0738, + "step": 283 + }, + { + "epoch": 0.1, + "grad_norm": 5.989705562591553, + "learning_rate": 0.0001668, + "loss": 1.2949, + "step": 284 + }, + { + "epoch": 0.1, + "grad_norm": 5.663930416107178, + "learning_rate": 0.0001674, + "loss": 0.5814, + "step": 285 + }, + { + "epoch": 0.1, + "grad_norm": 6.9047698974609375, + "learning_rate": 0.000168, + "loss": 1.0676, + "step": 286 + }, + { + "epoch": 0.1, + "grad_norm": 7.110291957855225, + "learning_rate": 0.0001686, + "loss": 1.227, + "step": 287 + }, + { + "epoch": 0.1, + "grad_norm": 8.15951156616211, + "learning_rate": 0.00016919999999999997, + "loss": 0.9983, + "step": 288 + }, + { + "epoch": 0.1, + "grad_norm": 8.399914741516113, + "learning_rate": 0.00016979999999999998, + "loss": 0.4559, + "step": 289 + }, + { + "epoch": 0.1, + "grad_norm": 7.747352123260498, + "learning_rate": 0.00017039999999999997, + "loss": 1.0402, + "step": 290 + }, + { + "epoch": 0.1, + "grad_norm": 5.47703218460083, + "learning_rate": 0.00017099999999999998, + "loss": 1.167, + "step": 291 + }, + { + "epoch": 0.1, + "grad_norm": 6.136383533477783, + "learning_rate": 0.00017159999999999997, + "loss": 0.7194, + "step": 292 + }, + { + "epoch": 0.1, + "grad_norm": 6.892377853393555, + "learning_rate": 0.00017219999999999998, + "loss": 0.9781, + "step": 293 + }, + { + "epoch": 0.1, + "grad_norm": 14.462456703186035, + "learning_rate": 0.00017279999999999997, + "loss": 0.7622, + "step": 294 + }, + { + "epoch": 0.1, + "grad_norm": 7.86848783493042, + "learning_rate": 0.00017339999999999996, + "loss": 1.2177, + "step": 295 + }, + { + "epoch": 0.1, + "grad_norm": 7.074151992797852, + "learning_rate": 0.00017399999999999997, + "loss": 1.0429, + "step": 296 + }, + { + "epoch": 0.1, + "grad_norm": 6.4072675704956055, + "learning_rate": 0.00017459999999999996, + "loss": 0.6744, + "step": 297 + }, + { + "epoch": 0.1, + "grad_norm": 12.912552833557129, + "learning_rate": 0.00017519999999999998, + "loss": 0.888, + "step": 298 + }, + { + "epoch": 0.1, + "grad_norm": 10.346461296081543, + "learning_rate": 0.00017579999999999996, + "loss": 0.7609, + "step": 299 + }, + { + "epoch": 0.11, + "grad_norm": 8.770968437194824, + "learning_rate": 0.00017639999999999998, + "loss": 0.905, + "step": 300 + }, + { + "epoch": 0.11, + "eval_loss": 1.0725961923599243, + "eval_runtime": 51.0006, + "eval_samples_per_second": 42.509, + "eval_steps_per_second": 10.627, + "eval_wer": 0.8373983739837398, + "step": 300 + }, + { + "epoch": 0.11, + "grad_norm": 6.282247066497803, + "learning_rate": 0.00017699999999999997, + "loss": 1.1397, + "step": 301 + }, + { + "epoch": 0.11, + "grad_norm": 4.363534927368164, + "learning_rate": 0.00017759999999999998, + "loss": 0.9052, + "step": 302 + }, + { + "epoch": 0.11, + "grad_norm": 7.095592021942139, + "learning_rate": 0.00017819999999999997, + "loss": 0.7081, + "step": 303 + }, + { + "epoch": 0.11, + "grad_norm": 6.591117858886719, + "learning_rate": 0.00017879999999999998, + "loss": 1.198, + "step": 304 + }, + { + "epoch": 0.11, + "grad_norm": 7.835860252380371, + "learning_rate": 0.00017939999999999997, + "loss": 1.4741, + "step": 305 + }, + { + "epoch": 0.11, + "grad_norm": 5.496611595153809, + "learning_rate": 0.00017999999999999998, + "loss": 0.7443, + "step": 306 + }, + { + "epoch": 0.11, + "grad_norm": 5.134020805358887, + "learning_rate": 0.00018059999999999997, + "loss": 0.6569, + "step": 307 + }, + { + "epoch": 0.11, + "grad_norm": 4.844152450561523, + "learning_rate": 0.00018119999999999999, + "loss": 1.3045, + "step": 308 + }, + { + "epoch": 0.11, + "grad_norm": 4.551060676574707, + "learning_rate": 0.00018179999999999997, + "loss": 0.564, + "step": 309 + }, + { + "epoch": 0.11, + "grad_norm": 6.43617582321167, + "learning_rate": 0.0001824, + "loss": 1.0049, + "step": 310 + }, + { + "epoch": 0.11, + "grad_norm": 5.678737640380859, + "learning_rate": 0.00018299999999999998, + "loss": 0.5783, + "step": 311 + }, + { + "epoch": 0.11, + "grad_norm": 4.169671058654785, + "learning_rate": 0.0001836, + "loss": 0.6863, + "step": 312 + }, + { + "epoch": 0.11, + "grad_norm": 8.256979942321777, + "learning_rate": 0.00018419999999999998, + "loss": 1.0964, + "step": 313 + }, + { + "epoch": 0.11, + "grad_norm": 8.771745681762695, + "learning_rate": 0.0001848, + "loss": 1.2424, + "step": 314 + }, + { + "epoch": 0.11, + "grad_norm": 7.281775951385498, + "learning_rate": 0.00018539999999999998, + "loss": 2.047, + "step": 315 + }, + { + "epoch": 0.11, + "grad_norm": 5.94806432723999, + "learning_rate": 0.000186, + "loss": 1.0021, + "step": 316 + }, + { + "epoch": 0.11, + "grad_norm": 11.512472152709961, + "learning_rate": 0.00018659999999999998, + "loss": 1.3445, + "step": 317 + }, + { + "epoch": 0.11, + "grad_norm": 5.457605838775635, + "learning_rate": 0.0001872, + "loss": 0.9049, + "step": 318 + }, + { + "epoch": 0.11, + "grad_norm": 7.318243026733398, + "learning_rate": 0.00018779999999999998, + "loss": 1.0203, + "step": 319 + }, + { + "epoch": 0.11, + "grad_norm": 6.3816118240356445, + "learning_rate": 0.00018839999999999997, + "loss": 1.0586, + "step": 320 + }, + { + "epoch": 0.11, + "grad_norm": 6.394790172576904, + "learning_rate": 0.00018899999999999999, + "loss": 0.9951, + "step": 321 + }, + { + "epoch": 0.11, + "grad_norm": 8.764740943908691, + "learning_rate": 0.00018959999999999997, + "loss": 0.9491, + "step": 322 + }, + { + "epoch": 0.11, + "grad_norm": 7.136832237243652, + "learning_rate": 0.0001902, + "loss": 1.068, + "step": 323 + }, + { + "epoch": 0.11, + "grad_norm": 18.01349639892578, + "learning_rate": 0.00019079999999999998, + "loss": 1.7603, + "step": 324 + }, + { + "epoch": 0.11, + "grad_norm": 7.3441386222839355, + "learning_rate": 0.0001914, + "loss": 0.8072, + "step": 325 + }, + { + "epoch": 0.11, + "grad_norm": 7.931651592254639, + "learning_rate": 0.00019199999999999998, + "loss": 1.7458, + "step": 326 + }, + { + "epoch": 0.11, + "grad_norm": 6.381895542144775, + "learning_rate": 0.0001926, + "loss": 1.2449, + "step": 327 + }, + { + "epoch": 0.11, + "grad_norm": 6.183620452880859, + "learning_rate": 0.00019319999999999998, + "loss": 0.9892, + "step": 328 + }, + { + "epoch": 0.12, + "grad_norm": 5.490755558013916, + "learning_rate": 0.0001938, + "loss": 1.1006, + "step": 329 + }, + { + "epoch": 0.12, + "grad_norm": 5.9131011962890625, + "learning_rate": 0.00019439999999999998, + "loss": 1.2143, + "step": 330 + }, + { + "epoch": 0.12, + "grad_norm": 4.422755718231201, + "learning_rate": 0.000195, + "loss": 1.1383, + "step": 331 + }, + { + "epoch": 0.12, + "grad_norm": 4.351710796356201, + "learning_rate": 0.00019559999999999998, + "loss": 0.9472, + "step": 332 + }, + { + "epoch": 0.12, + "grad_norm": 4.6042375564575195, + "learning_rate": 0.0001962, + "loss": 0.5255, + "step": 333 + }, + { + "epoch": 0.12, + "grad_norm": 4.7591094970703125, + "learning_rate": 0.00019679999999999999, + "loss": 1.0898, + "step": 334 + }, + { + "epoch": 0.12, + "grad_norm": 17.369613647460938, + "learning_rate": 0.0001974, + "loss": 0.8401, + "step": 335 + }, + { + "epoch": 0.12, + "grad_norm": 8.167240142822266, + "learning_rate": 0.000198, + "loss": 1.9533, + "step": 336 + }, + { + "epoch": 0.12, + "grad_norm": 7.36649227142334, + "learning_rate": 0.0001986, + "loss": 1.0694, + "step": 337 + }, + { + "epoch": 0.12, + "grad_norm": 18.003772735595703, + "learning_rate": 0.0001992, + "loss": 0.8262, + "step": 338 + }, + { + "epoch": 0.12, + "grad_norm": 7.110168933868408, + "learning_rate": 0.0001998, + "loss": 1.0649, + "step": 339 + }, + { + "epoch": 0.12, + "grad_norm": 11.043972969055176, + "learning_rate": 0.0002004, + "loss": 0.9792, + "step": 340 + }, + { + "epoch": 0.12, + "grad_norm": 4.065829753875732, + "learning_rate": 0.000201, + "loss": 0.5039, + "step": 341 + }, + { + "epoch": 0.12, + "grad_norm": 8.094096183776855, + "learning_rate": 0.0002016, + "loss": 0.9588, + "step": 342 + }, + { + "epoch": 0.12, + "grad_norm": 9.9978609085083, + "learning_rate": 0.0002022, + "loss": 1.2725, + "step": 343 + }, + { + "epoch": 0.12, + "grad_norm": 13.194984436035156, + "learning_rate": 0.0002028, + "loss": 1.3617, + "step": 344 + }, + { + "epoch": 0.12, + "grad_norm": 8.760825157165527, + "learning_rate": 0.00020339999999999998, + "loss": 1.0096, + "step": 345 + }, + { + "epoch": 0.12, + "grad_norm": 13.242958068847656, + "learning_rate": 0.000204, + "loss": 1.2532, + "step": 346 + }, + { + "epoch": 0.12, + "grad_norm": 10.69446086883545, + "learning_rate": 0.00020459999999999999, + "loss": 0.9107, + "step": 347 + }, + { + "epoch": 0.12, + "grad_norm": 14.089339256286621, + "learning_rate": 0.0002052, + "loss": 1.0958, + "step": 348 + }, + { + "epoch": 0.12, + "grad_norm": 10.158014297485352, + "learning_rate": 0.0002058, + "loss": 1.3443, + "step": 349 + }, + { + "epoch": 0.12, + "grad_norm": 277.1723937988281, + "learning_rate": 0.00020639999999999998, + "loss": 1.2569, + "step": 350 + }, + { + "epoch": 0.12, + "grad_norm": 15.648301124572754, + "learning_rate": 0.00020699999999999996, + "loss": 2.473, + "step": 351 + }, + { + "epoch": 0.12, + "grad_norm": 5.679229259490967, + "learning_rate": 0.00020759999999999998, + "loss": 1.3066, + "step": 352 + }, + { + "epoch": 0.12, + "grad_norm": 6.558560371398926, + "learning_rate": 0.00020819999999999996, + "loss": 1.0691, + "step": 353 + }, + { + "epoch": 0.12, + "grad_norm": 3.9980814456939697, + "learning_rate": 0.00020879999999999998, + "loss": 0.9881, + "step": 354 + }, + { + "epoch": 0.12, + "grad_norm": 4.269549369812012, + "learning_rate": 0.00020939999999999997, + "loss": 0.9437, + "step": 355 + }, + { + "epoch": 0.12, + "grad_norm": 5.434672832489014, + "learning_rate": 0.00020999999999999998, + "loss": 1.139, + "step": 356 + }, + { + "epoch": 0.13, + "grad_norm": 3.682892084121704, + "learning_rate": 0.00021059999999999997, + "loss": 0.9274, + "step": 357 + }, + { + "epoch": 0.13, + "grad_norm": 4.203353404998779, + "learning_rate": 0.00021119999999999996, + "loss": 0.9735, + "step": 358 + }, + { + "epoch": 0.13, + "grad_norm": 4.374783039093018, + "learning_rate": 0.00021179999999999997, + "loss": 0.7179, + "step": 359 + }, + { + "epoch": 0.13, + "grad_norm": 6.496840476989746, + "learning_rate": 0.00021239999999999996, + "loss": 0.6335, + "step": 360 + }, + { + "epoch": 0.13, + "grad_norm": 4.820166110992432, + "learning_rate": 0.00021299999999999997, + "loss": 0.7724, + "step": 361 + }, + { + "epoch": 0.13, + "grad_norm": 4.941391468048096, + "learning_rate": 0.00021359999999999996, + "loss": 0.6864, + "step": 362 + }, + { + "epoch": 0.13, + "grad_norm": 4.761062145233154, + "learning_rate": 0.00021419999999999998, + "loss": 1.3754, + "step": 363 + }, + { + "epoch": 0.13, + "grad_norm": 8.317700386047363, + "learning_rate": 0.00021479999999999996, + "loss": 1.2307, + "step": 364 + }, + { + "epoch": 0.13, + "grad_norm": 7.029150009155273, + "learning_rate": 0.00021539999999999998, + "loss": 0.7201, + "step": 365 + }, + { + "epoch": 0.13, + "grad_norm": 11.40591812133789, + "learning_rate": 0.00021599999999999996, + "loss": 1.1349, + "step": 366 + }, + { + "epoch": 0.13, + "grad_norm": 7.807318687438965, + "learning_rate": 0.00021659999999999998, + "loss": 1.4664, + "step": 367 + }, + { + "epoch": 0.13, + "grad_norm": 7.696564197540283, + "learning_rate": 0.00021719999999999997, + "loss": 0.9169, + "step": 368 + }, + { + "epoch": 0.13, + "grad_norm": 6.1474151611328125, + "learning_rate": 0.00021779999999999998, + "loss": 1.0351, + "step": 369 + }, + { + "epoch": 0.13, + "grad_norm": 4.485285758972168, + "learning_rate": 0.00021839999999999997, + "loss": 0.5278, + "step": 370 + }, + { + "epoch": 0.13, + "grad_norm": 4.146036624908447, + "learning_rate": 0.00021899999999999998, + "loss": 0.6379, + "step": 371 + }, + { + "epoch": 0.13, + "grad_norm": 7.9090800285339355, + "learning_rate": 0.00021959999999999997, + "loss": 1.071, + "step": 372 + }, + { + "epoch": 0.13, + "grad_norm": 9.113584518432617, + "learning_rate": 0.00022019999999999999, + "loss": 0.8886, + "step": 373 + }, + { + "epoch": 0.13, + "grad_norm": 6.609561920166016, + "learning_rate": 0.00022079999999999997, + "loss": 0.7262, + "step": 374 + }, + { + "epoch": 0.13, + "grad_norm": 8.190186500549316, + "learning_rate": 0.0002214, + "loss": 0.7478, + "step": 375 + }, + { + "epoch": 0.13, + "grad_norm": 6.347649097442627, + "learning_rate": 0.00022199999999999998, + "loss": 1.5713, + "step": 376 + }, + { + "epoch": 0.13, + "grad_norm": 6.180627346038818, + "learning_rate": 0.0002226, + "loss": 1.8954, + "step": 377 + }, + { + "epoch": 0.13, + "grad_norm": 5.399271488189697, + "learning_rate": 0.00022319999999999998, + "loss": 1.382, + "step": 378 + }, + { + "epoch": 0.13, + "grad_norm": 6.500628471374512, + "learning_rate": 0.0002238, + "loss": 1.1642, + "step": 379 + }, + { + "epoch": 0.13, + "grad_norm": 8.18594741821289, + "learning_rate": 0.00022439999999999998, + "loss": 1.3459, + "step": 380 + }, + { + "epoch": 0.13, + "grad_norm": 3.980257272720337, + "learning_rate": 0.000225, + "loss": 0.6223, + "step": 381 + }, + { + "epoch": 0.13, + "grad_norm": 5.482236385345459, + "learning_rate": 0.00022559999999999998, + "loss": 1.2836, + "step": 382 + }, + { + "epoch": 0.13, + "grad_norm": 4.334828853607178, + "learning_rate": 0.00022619999999999997, + "loss": 0.9142, + "step": 383 + }, + { + "epoch": 0.13, + "grad_norm": 3.967797040939331, + "learning_rate": 0.00022679999999999998, + "loss": 0.5278, + "step": 384 + }, + { + "epoch": 0.13, + "grad_norm": 4.172309398651123, + "learning_rate": 0.00022739999999999997, + "loss": 0.6968, + "step": 385 + }, + { + "epoch": 0.14, + "grad_norm": 3.380971670150757, + "learning_rate": 0.00022799999999999999, + "loss": 0.6464, + "step": 386 + }, + { + "epoch": 0.14, + "grad_norm": 7.23440408706665, + "learning_rate": 0.00022859999999999997, + "loss": 1.8373, + "step": 387 + }, + { + "epoch": 0.14, + "grad_norm": 6.774138450622559, + "learning_rate": 0.0002292, + "loss": 1.0644, + "step": 388 + }, + { + "epoch": 0.14, + "grad_norm": 8.141240119934082, + "learning_rate": 0.00022979999999999997, + "loss": 1.4621, + "step": 389 + }, + { + "epoch": 0.14, + "grad_norm": 8.437649726867676, + "learning_rate": 0.0002304, + "loss": 1.0165, + "step": 390 + }, + { + "epoch": 0.14, + "grad_norm": 6.1060791015625, + "learning_rate": 0.00023099999999999998, + "loss": 1.5574, + "step": 391 + }, + { + "epoch": 0.14, + "grad_norm": 6.36237907409668, + "learning_rate": 0.0002316, + "loss": 0.8218, + "step": 392 + }, + { + "epoch": 0.14, + "grad_norm": 7.044564247131348, + "learning_rate": 0.00023219999999999998, + "loss": 1.0601, + "step": 393 + }, + { + "epoch": 0.14, + "grad_norm": 4.057678699493408, + "learning_rate": 0.0002328, + "loss": 0.8001, + "step": 394 + }, + { + "epoch": 0.14, + "grad_norm": 6.342691421508789, + "learning_rate": 0.00023339999999999998, + "loss": 0.5296, + "step": 395 + }, + { + "epoch": 0.14, + "grad_norm": 7.52527379989624, + "learning_rate": 0.000234, + "loss": 0.7155, + "step": 396 + }, + { + "epoch": 0.14, + "grad_norm": 8.385841369628906, + "learning_rate": 0.00023459999999999998, + "loss": 0.7998, + "step": 397 + }, + { + "epoch": 0.14, + "grad_norm": 8.12092399597168, + "learning_rate": 0.0002352, + "loss": 1.0234, + "step": 398 + }, + { + "epoch": 0.14, + "grad_norm": 14.459668159484863, + "learning_rate": 0.00023579999999999999, + "loss": 0.457, + "step": 399 + }, + { + "epoch": 0.14, + "grad_norm": 9.308672904968262, + "learning_rate": 0.0002364, + "loss": 2.0688, + "step": 400 + }, + { + "epoch": 0.14, + "eval_loss": 1.1829124689102173, + "eval_runtime": 50.8154, + "eval_samples_per_second": 42.664, + "eval_steps_per_second": 10.666, + "eval_wer": 0.8181975436775645, + "step": 400 + }, + { + "epoch": 0.14, + "grad_norm": 4.8363261222839355, + "learning_rate": 0.000237, + "loss": 1.5778, + "step": 401 + }, + { + "epoch": 0.14, + "grad_norm": 4.74521541595459, + "learning_rate": 0.0002376, + "loss": 0.9614, + "step": 402 + }, + { + "epoch": 0.14, + "grad_norm": 4.534332752227783, + "learning_rate": 0.0002382, + "loss": 0.8923, + "step": 403 + }, + { + "epoch": 0.14, + "grad_norm": 5.4116997718811035, + "learning_rate": 0.0002388, + "loss": 1.2341, + "step": 404 + }, + { + "epoch": 0.14, + "grad_norm": 6.503213882446289, + "learning_rate": 0.0002394, + "loss": 1.2941, + "step": 405 + }, + { + "epoch": 0.14, + "grad_norm": 7.444057941436768, + "learning_rate": 0.00023999999999999998, + "loss": 0.8984, + "step": 406 + }, + { + "epoch": 0.14, + "grad_norm": 5.511045932769775, + "learning_rate": 0.0002406, + "loss": 0.7948, + "step": 407 + }, + { + "epoch": 0.14, + "grad_norm": 7.183623313903809, + "learning_rate": 0.00024119999999999998, + "loss": 1.095, + "step": 408 + }, + { + "epoch": 0.14, + "grad_norm": 6.442593574523926, + "learning_rate": 0.0002418, + "loss": 1.6099, + "step": 409 + }, + { + "epoch": 0.14, + "grad_norm": 3.9881575107574463, + "learning_rate": 0.00024239999999999998, + "loss": 0.8797, + "step": 410 + }, + { + "epoch": 0.14, + "grad_norm": 10.62885570526123, + "learning_rate": 0.000243, + "loss": 1.0199, + "step": 411 + }, + { + "epoch": 0.14, + "grad_norm": 7.63018798828125, + "learning_rate": 0.00024359999999999999, + "loss": 0.9954, + "step": 412 + }, + { + "epoch": 0.14, + "grad_norm": 6.816760540008545, + "learning_rate": 0.00024419999999999997, + "loss": 0.9977, + "step": 413 + }, + { + "epoch": 0.15, + "grad_norm": 15.825918197631836, + "learning_rate": 0.0002448, + "loss": 3.4223, + "step": 414 + }, + { + "epoch": 0.15, + "grad_norm": 5.994204998016357, + "learning_rate": 0.00024539999999999995, + "loss": 1.4219, + "step": 415 + }, + { + "epoch": 0.15, + "grad_norm": 7.240597248077393, + "learning_rate": 0.00024599999999999996, + "loss": 1.4543, + "step": 416 + }, + { + "epoch": 0.15, + "grad_norm": 4.940434455871582, + "learning_rate": 0.0002466, + "loss": 0.6018, + "step": 417 + }, + { + "epoch": 0.15, + "grad_norm": 10.660022735595703, + "learning_rate": 0.0002472, + "loss": 1.586, + "step": 418 + }, + { + "epoch": 0.15, + "grad_norm": 6.618219375610352, + "learning_rate": 0.00024779999999999995, + "loss": 1.0557, + "step": 419 + }, + { + "epoch": 0.15, + "grad_norm": 5.79208517074585, + "learning_rate": 0.00024839999999999997, + "loss": 0.8003, + "step": 420 + }, + { + "epoch": 0.15, + "grad_norm": 5.461696147918701, + "learning_rate": 0.000249, + "loss": 1.1184, + "step": 421 + }, + { + "epoch": 0.15, + "grad_norm": 4.876696586608887, + "learning_rate": 0.00024959999999999994, + "loss": 0.8481, + "step": 422 + }, + { + "epoch": 0.15, + "grad_norm": 12.002443313598633, + "learning_rate": 0.00025019999999999996, + "loss": 1.8712, + "step": 423 + }, + { + "epoch": 0.15, + "grad_norm": 6.414361476898193, + "learning_rate": 0.00025079999999999997, + "loss": 1.0893, + "step": 424 + }, + { + "epoch": 0.15, + "grad_norm": NaN, + "learning_rate": 0.00025079999999999997, + "loss": 0.2905, + "step": 425 + }, + { + "epoch": 0.15, + "grad_norm": 5.970865249633789, + "learning_rate": 0.0002514, + "loss": 1.5082, + "step": 426 + }, + { + "epoch": 0.15, + "grad_norm": 5.599332332611084, + "learning_rate": 0.00025199999999999995, + "loss": 1.8596, + "step": 427 + }, + { + "epoch": 0.15, + "grad_norm": 5.235030651092529, + "learning_rate": 0.00025259999999999996, + "loss": 1.3534, + "step": 428 + }, + { + "epoch": 0.15, + "grad_norm": 5.113259792327881, + "learning_rate": 0.0002532, + "loss": 1.1064, + "step": 429 + }, + { + "epoch": 0.15, + "grad_norm": 4.495222091674805, + "learning_rate": 0.0002538, + "loss": 1.2008, + "step": 430 + }, + { + "epoch": 0.15, + "grad_norm": 4.514638900756836, + "learning_rate": 0.00025439999999999995, + "loss": 1.0261, + "step": 431 + }, + { + "epoch": 0.15, + "grad_norm": 5.7902960777282715, + "learning_rate": 0.00025499999999999996, + "loss": 1.0724, + "step": 432 + }, + { + "epoch": 0.15, + "grad_norm": 8.044351577758789, + "learning_rate": 0.0002556, + "loss": 1.1708, + "step": 433 + }, + { + "epoch": 0.15, + "grad_norm": 5.15432071685791, + "learning_rate": 0.0002562, + "loss": 1.1937, + "step": 434 + }, + { + "epoch": 0.15, + "grad_norm": 7.8752593994140625, + "learning_rate": 0.00025679999999999995, + "loss": 0.8926, + "step": 435 + }, + { + "epoch": 0.15, + "grad_norm": 11.0122652053833, + "learning_rate": 0.00025739999999999997, + "loss": 0.9854, + "step": 436 + }, + { + "epoch": 0.15, + "grad_norm": 6.583104133605957, + "learning_rate": 0.000258, + "loss": 1.3097, + "step": 437 + }, + { + "epoch": 0.15, + "grad_norm": 5.033292770385742, + "learning_rate": 0.0002586, + "loss": 1.1236, + "step": 438 + }, + { + "epoch": 0.15, + "grad_norm": 8.683174133300781, + "learning_rate": 0.00025919999999999996, + "loss": 1.2291, + "step": 439 + }, + { + "epoch": 0.15, + "grad_norm": 5.946511268615723, + "learning_rate": 0.00025979999999999997, + "loss": 0.9983, + "step": 440 + }, + { + "epoch": 0.15, + "grad_norm": 12.835111618041992, + "learning_rate": 0.0002604, + "loss": 1.512, + "step": 441 + }, + { + "epoch": 0.15, + "grad_norm": 8.656193733215332, + "learning_rate": 0.000261, + "loss": 0.9368, + "step": 442 + }, + { + "epoch": 0.16, + "grad_norm": 7.563745498657227, + "learning_rate": 0.00026159999999999996, + "loss": 1.2435, + "step": 443 + }, + { + "epoch": 0.16, + "grad_norm": 6.136525630950928, + "learning_rate": 0.0002622, + "loss": 0.4889, + "step": 444 + }, + { + "epoch": 0.16, + "grad_norm": 5.84512996673584, + "learning_rate": 0.0002628, + "loss": 0.8539, + "step": 445 + }, + { + "epoch": 0.16, + "grad_norm": 6.192235946655273, + "learning_rate": 0.00026339999999999995, + "loss": 1.1632, + "step": 446 + }, + { + "epoch": 0.16, + "grad_norm": 9.75316333770752, + "learning_rate": 0.00026399999999999997, + "loss": 1.2679, + "step": 447 + }, + { + "epoch": 0.16, + "grad_norm": 5.024916648864746, + "learning_rate": 0.0002646, + "loss": 1.1505, + "step": 448 + }, + { + "epoch": 0.16, + "grad_norm": 7.302039623260498, + "learning_rate": 0.0002652, + "loss": 1.0232, + "step": 449 + }, + { + "epoch": 0.16, + "grad_norm": 5.526060104370117, + "learning_rate": 0.00026579999999999996, + "loss": 0.9171, + "step": 450 + }, + { + "epoch": 0.16, + "grad_norm": 4.972250938415527, + "learning_rate": 0.00026639999999999997, + "loss": 1.7666, + "step": 451 + }, + { + "epoch": 0.16, + "grad_norm": 5.7761054039001465, + "learning_rate": 0.000267, + "loss": 1.3188, + "step": 452 + }, + { + "epoch": 0.16, + "grad_norm": 4.936556339263916, + "learning_rate": 0.0002676, + "loss": 0.8506, + "step": 453 + }, + { + "epoch": 0.16, + "grad_norm": 4.347964286804199, + "learning_rate": 0.00026819999999999996, + "loss": 1.0834, + "step": 454 + }, + { + "epoch": 0.16, + "grad_norm": 3.9066038131713867, + "learning_rate": 0.0002688, + "loss": 1.0587, + "step": 455 + }, + { + "epoch": 0.16, + "grad_norm": 10.331660270690918, + "learning_rate": 0.0002694, + "loss": 1.6772, + "step": 456 + }, + { + "epoch": 0.16, + "grad_norm": 6.844241142272949, + "learning_rate": 0.00027, + "loss": 1.0441, + "step": 457 + }, + { + "epoch": 0.16, + "grad_norm": 7.055009365081787, + "learning_rate": 0.00027059999999999996, + "loss": 1.4951, + "step": 458 + }, + { + "epoch": 0.16, + "grad_norm": 5.321764945983887, + "learning_rate": 0.0002712, + "loss": 0.9027, + "step": 459 + }, + { + "epoch": 0.16, + "grad_norm": 6.612902641296387, + "learning_rate": 0.0002718, + "loss": 0.9197, + "step": 460 + }, + { + "epoch": 0.16, + "grad_norm": 7.686680316925049, + "learning_rate": 0.0002724, + "loss": 0.8149, + "step": 461 + }, + { + "epoch": 0.16, + "grad_norm": 5.893856048583984, + "learning_rate": 0.00027299999999999997, + "loss": 0.7485, + "step": 462 + }, + { + "epoch": 0.16, + "grad_norm": 5.316108703613281, + "learning_rate": 0.0002736, + "loss": 0.8778, + "step": 463 + }, + { + "epoch": 0.16, + "grad_norm": 8.641250610351562, + "learning_rate": 0.0002742, + "loss": 1.4959, + "step": 464 + }, + { + "epoch": 0.16, + "grad_norm": 6.9227447509765625, + "learning_rate": 0.0002748, + "loss": 0.9793, + "step": 465 + }, + { + "epoch": 0.16, + "grad_norm": 17.23862075805664, + "learning_rate": 0.00027539999999999997, + "loss": 1.0452, + "step": 466 + }, + { + "epoch": 0.16, + "grad_norm": 6.343098163604736, + "learning_rate": 0.000276, + "loss": 1.271, + "step": 467 + }, + { + "epoch": 0.16, + "grad_norm": 9.62498664855957, + "learning_rate": 0.0002766, + "loss": 0.9764, + "step": 468 + }, + { + "epoch": 0.16, + "grad_norm": 7.0901336669921875, + "learning_rate": 0.0002772, + "loss": 1.3291, + "step": 469 + }, + { + "epoch": 0.16, + "grad_norm": 8.628947257995605, + "learning_rate": 0.0002778, + "loss": 1.2843, + "step": 470 + }, + { + "epoch": 0.17, + "grad_norm": 6.520139217376709, + "learning_rate": 0.0002784, + "loss": 0.8886, + "step": 471 + }, + { + "epoch": 0.17, + "grad_norm": 12.442567825317383, + "learning_rate": 0.000279, + "loss": 1.1537, + "step": 472 + }, + { + "epoch": 0.17, + "grad_norm": 6.3325347900390625, + "learning_rate": 0.00027959999999999997, + "loss": 1.3338, + "step": 473 + }, + { + "epoch": 0.17, + "grad_norm": 7.008511066436768, + "learning_rate": 0.0002802, + "loss": 1.2572, + "step": 474 + }, + { + "epoch": 0.17, + "grad_norm": 10.975461959838867, + "learning_rate": 0.0002808, + "loss": 1.2421, + "step": 475 + }, + { + "epoch": 0.17, + "grad_norm": 6.61250114440918, + "learning_rate": 0.00028139999999999996, + "loss": 2.1506, + "step": 476 + }, + { + "epoch": 0.17, + "grad_norm": 4.0594916343688965, + "learning_rate": 0.00028199999999999997, + "loss": 1.5596, + "step": 477 + }, + { + "epoch": 0.17, + "grad_norm": 4.184998989105225, + "learning_rate": 0.0002826, + "loss": 1.3744, + "step": 478 + }, + { + "epoch": 0.17, + "grad_norm": 5.103187084197998, + "learning_rate": 0.00028319999999999994, + "loss": 0.8721, + "step": 479 + }, + { + "epoch": 0.17, + "grad_norm": 5.862779140472412, + "learning_rate": 0.00028379999999999996, + "loss": 1.2812, + "step": 480 + }, + { + "epoch": 0.17, + "grad_norm": 4.681532859802246, + "learning_rate": 0.0002844, + "loss": 0.7297, + "step": 481 + }, + { + "epoch": 0.17, + "grad_norm": 3.8014369010925293, + "learning_rate": 0.000285, + "loss": 0.6603, + "step": 482 + }, + { + "epoch": 0.17, + "grad_norm": 6.400700092315674, + "learning_rate": 0.00028559999999999995, + "loss": 0.8617, + "step": 483 + }, + { + "epoch": 0.17, + "grad_norm": 7.437506675720215, + "learning_rate": 0.00028619999999999996, + "loss": 0.9853, + "step": 484 + }, + { + "epoch": 0.17, + "grad_norm": 7.865484237670898, + "learning_rate": 0.0002868, + "loss": 1.0431, + "step": 485 + }, + { + "epoch": 0.17, + "grad_norm": 11.700081825256348, + "learning_rate": 0.00028739999999999994, + "loss": 2.4413, + "step": 486 + }, + { + "epoch": 0.17, + "grad_norm": 6.08707332611084, + "learning_rate": 0.00028799999999999995, + "loss": 1.2467, + "step": 487 + }, + { + "epoch": 0.17, + "grad_norm": 5.360887050628662, + "learning_rate": 0.00028859999999999997, + "loss": 1.1013, + "step": 488 + }, + { + "epoch": 0.17, + "grad_norm": 5.402688980102539, + "learning_rate": 0.0002892, + "loss": 1.5754, + "step": 489 + }, + { + "epoch": 0.17, + "grad_norm": 5.761510848999023, + "learning_rate": 0.00028979999999999994, + "loss": 1.1612, + "step": 490 + }, + { + "epoch": 0.17, + "grad_norm": 4.469000339508057, + "learning_rate": 0.00029039999999999996, + "loss": 0.9344, + "step": 491 + }, + { + "epoch": 0.17, + "grad_norm": 8.59809684753418, + "learning_rate": 0.00029099999999999997, + "loss": 1.902, + "step": 492 + }, + { + "epoch": 0.17, + "grad_norm": 10.263930320739746, + "learning_rate": 0.0002916, + "loss": 0.6227, + "step": 493 + }, + { + "epoch": 0.17, + "grad_norm": 5.8753981590271, + "learning_rate": 0.00029219999999999995, + "loss": 0.4528, + "step": 494 + }, + { + "epoch": 0.17, + "grad_norm": 6.5920562744140625, + "learning_rate": 0.00029279999999999996, + "loss": 0.6706, + "step": 495 + }, + { + "epoch": 0.17, + "grad_norm": 10.204047203063965, + "learning_rate": 0.0002934, + "loss": 1.4762, + "step": 496 + }, + { + "epoch": 0.17, + "grad_norm": 14.272616386413574, + "learning_rate": 0.000294, + "loss": 2.0472, + "step": 497 + }, + { + "epoch": 0.17, + "grad_norm": 8.78574275970459, + "learning_rate": 0.00029459999999999995, + "loss": 1.101, + "step": 498 + }, + { + "epoch": 0.17, + "grad_norm": 6.776339530944824, + "learning_rate": 0.00029519999999999997, + "loss": 1.1936, + "step": 499 + }, + { + "epoch": 0.18, + "grad_norm": 6.81951379776001, + "learning_rate": 0.0002958, + "loss": 1.9781, + "step": 500 + }, + { + "epoch": 0.18, + "eval_loss": 1.4941623210906982, + "eval_runtime": 50.4072, + "eval_samples_per_second": 43.01, + "eval_steps_per_second": 10.752, + "eval_wer": 0.8929250994637606, + "step": 500 + }, + { + "epoch": 0.18, + "grad_norm": 8.40855884552002, + "learning_rate": 0.0002964, + "loss": 2.3229, + "step": 501 + }, + { + "epoch": 0.18, + "grad_norm": 9.991337776184082, + "learning_rate": 0.00029699999999999996, + "loss": 1.5519, + "step": 502 + }, + { + "epoch": 0.18, + "grad_norm": 4.177870273590088, + "learning_rate": 0.00029759999999999997, + "loss": 1.3197, + "step": 503 + }, + { + "epoch": 0.18, + "grad_norm": 4.229679107666016, + "learning_rate": 0.0002982, + "loss": 1.2668, + "step": 504 + }, + { + "epoch": 0.18, + "grad_norm": 5.256344795227051, + "learning_rate": 0.0002988, + "loss": 1.2646, + "step": 505 + }, + { + "epoch": 0.18, + "grad_norm": 3.3408243656158447, + "learning_rate": 0.00029939999999999996, + "loss": 0.8331, + "step": 506 + }, + { + "epoch": 0.18, + "grad_norm": 3.4926090240478516, + "learning_rate": 0.0003, + "loss": 0.7081, + "step": 507 + }, + { + "epoch": 0.18, + "grad_norm": 4.9362640380859375, + "learning_rate": 0.0002999627883899777, + "loss": 1.1574, + "step": 508 + }, + { + "epoch": 0.18, + "grad_norm": 3.2679216861724854, + "learning_rate": 0.0002999255767799553, + "loss": 0.546, + "step": 509 + }, + { + "epoch": 0.18, + "grad_norm": 10.322508811950684, + "learning_rate": 0.000299888365169933, + "loss": 0.9139, + "step": 510 + }, + { + "epoch": 0.18, + "grad_norm": 4.090631484985352, + "learning_rate": 0.0002998511535599107, + "loss": 0.5386, + "step": 511 + }, + { + "epoch": 0.18, + "grad_norm": 5.046153545379639, + "learning_rate": 0.0002998139419498883, + "loss": 0.7433, + "step": 512 + }, + { + "epoch": 0.18, + "grad_norm": 10.831178665161133, + "learning_rate": 0.00029977673033986603, + "loss": 1.5149, + "step": 513 + }, + { + "epoch": 0.18, + "grad_norm": 7.8567633628845215, + "learning_rate": 0.0002997395187298437, + "loss": 1.0058, + "step": 514 + }, + { + "epoch": 0.18, + "grad_norm": 4.131727695465088, + "learning_rate": 0.0002997023071198214, + "loss": 0.3814, + "step": 515 + }, + { + "epoch": 0.18, + "grad_norm": 11.242587089538574, + "learning_rate": 0.00029966509550979903, + "loss": 1.0306, + "step": 516 + }, + { + "epoch": 0.18, + "grad_norm": 8.567095756530762, + "learning_rate": 0.0002996278838997767, + "loss": 1.1139, + "step": 517 + }, + { + "epoch": 0.18, + "grad_norm": 6.56288480758667, + "learning_rate": 0.0002995906722897544, + "loss": 1.6086, + "step": 518 + }, + { + "epoch": 0.18, + "grad_norm": 7.899925231933594, + "learning_rate": 0.00029955346067973203, + "loss": 1.5415, + "step": 519 + }, + { + "epoch": 0.18, + "grad_norm": 6.132180213928223, + "learning_rate": 0.00029951624906970973, + "loss": 1.2357, + "step": 520 + }, + { + "epoch": 0.18, + "grad_norm": 4.827673435211182, + "learning_rate": 0.00029947903745968743, + "loss": 0.7701, + "step": 521 + }, + { + "epoch": 0.18, + "grad_norm": 12.996684074401855, + "learning_rate": 0.0002994418258496651, + "loss": 1.4232, + "step": 522 + }, + { + "epoch": 0.18, + "grad_norm": 6.13612174987793, + "learning_rate": 0.00029940461423964273, + "loss": 0.9823, + "step": 523 + }, + { + "epoch": 0.18, + "grad_norm": 6.259670734405518, + "learning_rate": 0.0002993674026296204, + "loss": 1.2875, + "step": 524 + }, + { + "epoch": 0.18, + "grad_norm": NaN, + "learning_rate": 0.0002993674026296204, + "loss": 0.694, + "step": 525 + }, + { + "epoch": 0.18, + "grad_norm": 6.94905424118042, + "learning_rate": 0.0002993301910195981, + "loss": 1.9648, + "step": 526 + }, + { + "epoch": 0.18, + "grad_norm": 3.61449933052063, + "learning_rate": 0.0002992929794095758, + "loss": 1.4221, + "step": 527 + }, + { + "epoch": 0.19, + "grad_norm": 3.9974653720855713, + "learning_rate": 0.00029925576779955344, + "loss": 1.28, + "step": 528 + }, + { + "epoch": 0.19, + "grad_norm": 4.458034992218018, + "learning_rate": 0.00029921855618953114, + "loss": 0.9629, + "step": 529 + }, + { + "epoch": 0.19, + "grad_norm": 4.363546371459961, + "learning_rate": 0.0002991813445795088, + "loss": 0.8967, + "step": 530 + }, + { + "epoch": 0.19, + "grad_norm": 6.293959140777588, + "learning_rate": 0.00029914413296948644, + "loss": 1.022, + "step": 531 + }, + { + "epoch": 0.19, + "grad_norm": 3.35537052154541, + "learning_rate": 0.00029910692135946414, + "loss": 0.6593, + "step": 532 + }, + { + "epoch": 0.19, + "grad_norm": 4.888710021972656, + "learning_rate": 0.0002990697097494418, + "loss": 0.9094, + "step": 533 + }, + { + "epoch": 0.19, + "grad_norm": 5.332656383514404, + "learning_rate": 0.0002990324981394195, + "loss": 1.7321, + "step": 534 + }, + { + "epoch": 0.19, + "grad_norm": 8.324007034301758, + "learning_rate": 0.00029899528652939714, + "loss": 1.4225, + "step": 535 + }, + { + "epoch": 0.19, + "grad_norm": 5.71950101852417, + "learning_rate": 0.0002989580749193748, + "loss": 0.626, + "step": 536 + }, + { + "epoch": 0.19, + "grad_norm": 6.610879421234131, + "learning_rate": 0.0002989208633093525, + "loss": 1.0425, + "step": 537 + }, + { + "epoch": 0.19, + "grad_norm": 6.266473770141602, + "learning_rate": 0.00029888365169933014, + "loss": 1.2166, + "step": 538 + }, + { + "epoch": 0.19, + "grad_norm": 5.074027061462402, + "learning_rate": 0.00029884644008930784, + "loss": 0.9753, + "step": 539 + }, + { + "epoch": 0.19, + "grad_norm": 5.849838733673096, + "learning_rate": 0.00029880922847928554, + "loss": 1.1584, + "step": 540 + }, + { + "epoch": 0.19, + "grad_norm": 4.537852764129639, + "learning_rate": 0.0002987720168692632, + "loss": 0.907, + "step": 541 + }, + { + "epoch": 0.19, + "grad_norm": 5.0589985847473145, + "learning_rate": 0.00029873480525924084, + "loss": 0.6799, + "step": 542 + }, + { + "epoch": 0.19, + "grad_norm": 6.327163219451904, + "learning_rate": 0.0002986975936492185, + "loss": 1.8219, + "step": 543 + }, + { + "epoch": 0.19, + "grad_norm": 7.557645797729492, + "learning_rate": 0.0002986603820391962, + "loss": 1.4392, + "step": 544 + }, + { + "epoch": 0.19, + "grad_norm": 5.596197128295898, + "learning_rate": 0.0002986231704291739, + "loss": 1.1227, + "step": 545 + }, + { + "epoch": 0.19, + "grad_norm": 5.67082405090332, + "learning_rate": 0.00029858595881915155, + "loss": 1.029, + "step": 546 + }, + { + "epoch": 0.19, + "grad_norm": 7.202919006347656, + "learning_rate": 0.00029854874720912925, + "loss": 0.736, + "step": 547 + }, + { + "epoch": 0.19, + "grad_norm": 6.42061185836792, + "learning_rate": 0.0002985115355991069, + "loss": 1.1307, + "step": 548 + }, + { + "epoch": 0.19, + "grad_norm": 7.092513561248779, + "learning_rate": 0.00029847432398908455, + "loss": 1.2644, + "step": 549 + }, + { + "epoch": 0.19, + "grad_norm": 5.857559680938721, + "learning_rate": 0.00029843711237906225, + "loss": 0.8481, + "step": 550 + }, + { + "epoch": 0.19, + "grad_norm": 5.834782123565674, + "learning_rate": 0.0002983999007690399, + "loss": 1.6768, + "step": 551 + }, + { + "epoch": 0.19, + "grad_norm": 6.073868751525879, + "learning_rate": 0.0002983626891590176, + "loss": 1.9901, + "step": 552 + }, + { + "epoch": 0.19, + "grad_norm": 3.636265754699707, + "learning_rate": 0.00029832547754899525, + "loss": 0.855, + "step": 553 + }, + { + "epoch": 0.19, + "grad_norm": 4.978489398956299, + "learning_rate": 0.00029828826593897295, + "loss": 1.5895, + "step": 554 + }, + { + "epoch": 0.19, + "grad_norm": 3.5552754402160645, + "learning_rate": 0.0002982510543289506, + "loss": 1.0363, + "step": 555 + }, + { + "epoch": 0.19, + "grad_norm": 6.47123908996582, + "learning_rate": 0.00029821384271892825, + "loss": 1.305, + "step": 556 + }, + { + "epoch": 0.2, + "grad_norm": 3.921194076538086, + "learning_rate": 0.00029817663110890595, + "loss": 1.571, + "step": 557 + }, + { + "epoch": 0.2, + "grad_norm": 3.359584331512451, + "learning_rate": 0.00029813941949888366, + "loss": 1.0663, + "step": 558 + }, + { + "epoch": 0.2, + "grad_norm": 5.973052501678467, + "learning_rate": 0.0002981022078888613, + "loss": 1.1589, + "step": 559 + }, + { + "epoch": 0.2, + "grad_norm": 6.5657124519348145, + "learning_rate": 0.00029806499627883895, + "loss": 0.8758, + "step": 560 + }, + { + "epoch": 0.2, + "grad_norm": 3.143813371658325, + "learning_rate": 0.00029802778466881666, + "loss": 0.9989, + "step": 561 + }, + { + "epoch": 0.2, + "grad_norm": 3.4931013584136963, + "learning_rate": 0.0002979905730587943, + "loss": 0.5586, + "step": 562 + }, + { + "epoch": 0.2, + "grad_norm": 4.445590496063232, + "learning_rate": 0.000297953361448772, + "loss": 1.5847, + "step": 563 + }, + { + "epoch": 0.2, + "grad_norm": 7.393503665924072, + "learning_rate": 0.00029791614983874966, + "loss": 1.226, + "step": 564 + }, + { + "epoch": 0.2, + "grad_norm": 4.212005138397217, + "learning_rate": 0.00029787893822872736, + "loss": 1.1496, + "step": 565 + }, + { + "epoch": 0.2, + "grad_norm": 6.066720962524414, + "learning_rate": 0.000297841726618705, + "loss": 1.0976, + "step": 566 + }, + { + "epoch": 0.2, + "grad_norm": 6.2609734535217285, + "learning_rate": 0.00029780451500868266, + "loss": 0.8392, + "step": 567 + }, + { + "epoch": 0.2, + "grad_norm": 3.803570508956909, + "learning_rate": 0.00029776730339866036, + "loss": 0.7692, + "step": 568 + }, + { + "epoch": 0.2, + "grad_norm": 5.159245491027832, + "learning_rate": 0.000297730091788638, + "loss": 0.8532, + "step": 569 + }, + { + "epoch": 0.2, + "grad_norm": 4.429141998291016, + "learning_rate": 0.0002976928801786157, + "loss": 0.7445, + "step": 570 + }, + { + "epoch": 0.2, + "grad_norm": 9.767818450927734, + "learning_rate": 0.0002976556685685934, + "loss": 1.5825, + "step": 571 + }, + { + "epoch": 0.2, + "grad_norm": 6.809798717498779, + "learning_rate": 0.00029761845695857106, + "loss": 1.5131, + "step": 572 + }, + { + "epoch": 0.2, + "grad_norm": 20.902917861938477, + "learning_rate": 0.0002975812453485487, + "loss": 1.8095, + "step": 573 + }, + { + "epoch": 0.2, + "grad_norm": 5.394655704498291, + "learning_rate": 0.0002975440337385264, + "loss": 1.2016, + "step": 574 + }, + { + "epoch": 0.2, + "grad_norm": NaN, + "learning_rate": 0.0002975440337385264, + "loss": 0.531, + "step": 575 + }, + { + "epoch": 0.2, + "grad_norm": 9.339009284973145, + "learning_rate": 0.00029750682212850406, + "loss": 3.3416, + "step": 576 + }, + { + "epoch": 0.2, + "grad_norm": 5.734882354736328, + "learning_rate": 0.00029746961051848177, + "loss": 2.2444, + "step": 577 + }, + { + "epoch": 0.2, + "grad_norm": 3.9475908279418945, + "learning_rate": 0.0002974323989084594, + "loss": 1.2054, + "step": 578 + }, + { + "epoch": 0.2, + "grad_norm": 3.528080463409424, + "learning_rate": 0.00029739518729843706, + "loss": 1.3752, + "step": 579 + }, + { + "epoch": 0.2, + "grad_norm": 3.754209041595459, + "learning_rate": 0.00029735797568841477, + "loss": 0.7848, + "step": 580 + }, + { + "epoch": 0.2, + "grad_norm": 3.216021776199341, + "learning_rate": 0.0002973207640783924, + "loss": 0.6936, + "step": 581 + }, + { + "epoch": 0.2, + "grad_norm": 3.249885082244873, + "learning_rate": 0.0002972835524683701, + "loss": 0.8303, + "step": 582 + }, + { + "epoch": 0.2, + "grad_norm": 4.777367115020752, + "learning_rate": 0.00029724634085834777, + "loss": 0.6014, + "step": 583 + }, + { + "epoch": 0.2, + "grad_norm": 9.312623023986816, + "learning_rate": 0.00029720912924832547, + "loss": 1.1901, + "step": 584 + }, + { + "epoch": 0.2, + "grad_norm": 4.88458776473999, + "learning_rate": 0.0002971719176383031, + "loss": 1.1543, + "step": 585 + }, + { + "epoch": 0.21, + "grad_norm": 4.8880615234375, + "learning_rate": 0.00029713470602828077, + "loss": 1.0548, + "step": 586 + }, + { + "epoch": 0.21, + "grad_norm": 4.879009246826172, + "learning_rate": 0.00029709749441825847, + "loss": 1.0961, + "step": 587 + }, + { + "epoch": 0.21, + "grad_norm": 4.066569805145264, + "learning_rate": 0.0002970602828082361, + "loss": 0.6876, + "step": 588 + }, + { + "epoch": 0.21, + "grad_norm": 5.1321892738342285, + "learning_rate": 0.0002970230711982138, + "loss": 1.0765, + "step": 589 + }, + { + "epoch": 0.21, + "grad_norm": 4.651081085205078, + "learning_rate": 0.0002969858595881915, + "loss": 0.6319, + "step": 590 + }, + { + "epoch": 0.21, + "grad_norm": 4.676162242889404, + "learning_rate": 0.00029694864797816917, + "loss": 0.8857, + "step": 591 + }, + { + "epoch": 0.21, + "grad_norm": 4.472525119781494, + "learning_rate": 0.0002969114363681468, + "loss": 1.2666, + "step": 592 + }, + { + "epoch": 0.21, + "grad_norm": 5.793959140777588, + "learning_rate": 0.0002968742247581245, + "loss": 0.8785, + "step": 593 + }, + { + "epoch": 0.21, + "grad_norm": 7.9074931144714355, + "learning_rate": 0.00029683701314810217, + "loss": 1.4696, + "step": 594 + }, + { + "epoch": 0.21, + "grad_norm": 4.401146411895752, + "learning_rate": 0.0002967998015380799, + "loss": 0.5815, + "step": 595 + }, + { + "epoch": 0.21, + "grad_norm": 7.1119513511657715, + "learning_rate": 0.0002967625899280575, + "loss": 1.0864, + "step": 596 + }, + { + "epoch": 0.21, + "grad_norm": 7.709166049957275, + "learning_rate": 0.0002967253783180352, + "loss": 1.6677, + "step": 597 + }, + { + "epoch": 0.21, + "grad_norm": 7.580686092376709, + "learning_rate": 0.0002966881667080129, + "loss": 1.2696, + "step": 598 + }, + { + "epoch": 0.21, + "grad_norm": 5.200621128082275, + "learning_rate": 0.0002966509550979905, + "loss": 0.6652, + "step": 599 + }, + { + "epoch": 0.21, + "grad_norm": 8.517735481262207, + "learning_rate": 0.0002966137434879682, + "loss": 1.5409, + "step": 600 + }, + { + "epoch": 0.21, + "eval_loss": 1.319638729095459, + "eval_runtime": 51.1269, + "eval_samples_per_second": 42.404, + "eval_steps_per_second": 10.601, + "eval_wer": 0.909271752291991, + "step": 600 + }, + { + "epoch": 0.21, + "grad_norm": 12.722813606262207, + "learning_rate": 0.0002965765318779459, + "loss": 2.4553, + "step": 601 + }, + { + "epoch": 0.21, + "grad_norm": 5.727902412414551, + "learning_rate": 0.0002965393202679236, + "loss": 1.4027, + "step": 602 + }, + { + "epoch": 0.21, + "grad_norm": 6.360785007476807, + "learning_rate": 0.00029650210865790123, + "loss": 1.1641, + "step": 603 + }, + { + "epoch": 0.21, + "grad_norm": 4.100715637207031, + "learning_rate": 0.0002964648970478789, + "loss": 1.0047, + "step": 604 + }, + { + "epoch": 0.21, + "grad_norm": 5.119365692138672, + "learning_rate": 0.0002964276854378566, + "loss": 1.1648, + "step": 605 + }, + { + "epoch": 0.21, + "grad_norm": 5.546939373016357, + "learning_rate": 0.0002963904738278343, + "loss": 1.3271, + "step": 606 + }, + { + "epoch": 0.21, + "grad_norm": 4.981604099273682, + "learning_rate": 0.00029635326221781193, + "loss": 1.2142, + "step": 607 + }, + { + "epoch": 0.21, + "grad_norm": 6.839823246002197, + "learning_rate": 0.00029631605060778963, + "loss": 1.6792, + "step": 608 + }, + { + "epoch": 0.21, + "grad_norm": 7.334500789642334, + "learning_rate": 0.0002962788389977673, + "loss": 1.2355, + "step": 609 + }, + { + "epoch": 0.21, + "grad_norm": 4.874765872955322, + "learning_rate": 0.00029624162738774493, + "loss": 0.9184, + "step": 610 + }, + { + "epoch": 0.21, + "grad_norm": 3.3265280723571777, + "learning_rate": 0.00029620441577772263, + "loss": 0.6558, + "step": 611 + }, + { + "epoch": 0.21, + "grad_norm": 3.5789058208465576, + "learning_rate": 0.0002961672041677003, + "loss": 0.8824, + "step": 612 + }, + { + "epoch": 0.21, + "grad_norm": 3.2460458278656006, + "learning_rate": 0.000296129992557678, + "loss": 1.0423, + "step": 613 + }, + { + "epoch": 0.22, + "grad_norm": 6.92147970199585, + "learning_rate": 0.00029609278094765563, + "loss": 0.7398, + "step": 614 + }, + { + "epoch": 0.22, + "grad_norm": 3.5256998538970947, + "learning_rate": 0.00029605556933763334, + "loss": 0.6854, + "step": 615 + }, + { + "epoch": 0.22, + "grad_norm": 4.17782735824585, + "learning_rate": 0.000296018357727611, + "loss": 1.0298, + "step": 616 + }, + { + "epoch": 0.22, + "grad_norm": 8.018060684204102, + "learning_rate": 0.00029598114611758863, + "loss": 2.2126, + "step": 617 + }, + { + "epoch": 0.22, + "grad_norm": 7.263321876525879, + "learning_rate": 0.00029594393450756634, + "loss": 0.9248, + "step": 618 + }, + { + "epoch": 0.22, + "grad_norm": 6.241823196411133, + "learning_rate": 0.00029590672289754404, + "loss": 1.1756, + "step": 619 + }, + { + "epoch": 0.22, + "grad_norm": 4.013612747192383, + "learning_rate": 0.0002958695112875217, + "loss": 0.882, + "step": 620 + }, + { + "epoch": 0.22, + "grad_norm": 4.5317792892456055, + "learning_rate": 0.00029583229967749934, + "loss": 1.0661, + "step": 621 + }, + { + "epoch": 0.22, + "grad_norm": 8.019999504089355, + "learning_rate": 0.00029579508806747704, + "loss": 0.9743, + "step": 622 + }, + { + "epoch": 0.22, + "grad_norm": 10.188430786132812, + "learning_rate": 0.0002957578764574547, + "loss": 1.3912, + "step": 623 + }, + { + "epoch": 0.22, + "grad_norm": 6.105098724365234, + "learning_rate": 0.0002957206648474324, + "loss": 0.8787, + "step": 624 + }, + { + "epoch": 0.22, + "grad_norm": 8.475005149841309, + "learning_rate": 0.00029568345323741004, + "loss": 2.6934, + "step": 625 + }, + { + "epoch": 0.22, + "grad_norm": 3.846712589263916, + "learning_rate": 0.00029564624162738774, + "loss": 1.4757, + "step": 626 + }, + { + "epoch": 0.22, + "grad_norm": 3.2802951335906982, + "learning_rate": 0.0002956090300173654, + "loss": 0.9426, + "step": 627 + }, + { + "epoch": 0.22, + "grad_norm": 3.0451314449310303, + "learning_rate": 0.00029557181840734304, + "loss": 0.8553, + "step": 628 + }, + { + "epoch": 0.22, + "grad_norm": 4.837835788726807, + "learning_rate": 0.00029553460679732074, + "loss": 1.0035, + "step": 629 + }, + { + "epoch": 0.22, + "grad_norm": 3.3272557258605957, + "learning_rate": 0.0002954973951872984, + "loss": 1.7012, + "step": 630 + }, + { + "epoch": 0.22, + "grad_norm": 3.1126511096954346, + "learning_rate": 0.0002954601835772761, + "loss": 0.8654, + "step": 631 + }, + { + "epoch": 0.22, + "grad_norm": 4.995132923126221, + "learning_rate": 0.00029542297196725374, + "loss": 0.9708, + "step": 632 + }, + { + "epoch": 0.22, + "grad_norm": 5.542963027954102, + "learning_rate": 0.00029538576035723145, + "loss": 1.0861, + "step": 633 + }, + { + "epoch": 0.22, + "grad_norm": 3.722038984298706, + "learning_rate": 0.0002953485487472091, + "loss": 1.012, + "step": 634 + }, + { + "epoch": 0.22, + "grad_norm": 4.468753337860107, + "learning_rate": 0.00029531133713718674, + "loss": 1.139, + "step": 635 + }, + { + "epoch": 0.22, + "grad_norm": 4.822729110717773, + "learning_rate": 0.00029527412552716445, + "loss": 1.1173, + "step": 636 + }, + { + "epoch": 0.22, + "grad_norm": 4.884435653686523, + "learning_rate": 0.00029523691391714215, + "loss": 1.0669, + "step": 637 + }, + { + "epoch": 0.22, + "grad_norm": 4.150437831878662, + "learning_rate": 0.0002951997023071198, + "loss": 0.871, + "step": 638 + }, + { + "epoch": 0.22, + "grad_norm": 4.12093448638916, + "learning_rate": 0.0002951624906970975, + "loss": 0.6693, + "step": 639 + }, + { + "epoch": 0.22, + "grad_norm": 7.429576396942139, + "learning_rate": 0.00029512527908707515, + "loss": 1.0517, + "step": 640 + }, + { + "epoch": 0.22, + "grad_norm": 3.6653037071228027, + "learning_rate": 0.0002950880674770528, + "loss": 0.8529, + "step": 641 + }, + { + "epoch": 0.22, + "grad_norm": 2.8426244258880615, + "learning_rate": 0.0002950508558670305, + "loss": 0.7732, + "step": 642 + }, + { + "epoch": 0.23, + "grad_norm": 7.198141574859619, + "learning_rate": 0.00029501364425700815, + "loss": 1.6585, + "step": 643 + }, + { + "epoch": 0.23, + "grad_norm": 7.202881813049316, + "learning_rate": 0.00029497643264698585, + "loss": 1.4548, + "step": 644 + }, + { + "epoch": 0.23, + "grad_norm": 6.224870681762695, + "learning_rate": 0.0002949392210369635, + "loss": 1.6397, + "step": 645 + }, + { + "epoch": 0.23, + "grad_norm": 3.892850160598755, + "learning_rate": 0.00029490200942694115, + "loss": 1.0224, + "step": 646 + }, + { + "epoch": 0.23, + "grad_norm": 6.112697601318359, + "learning_rate": 0.00029486479781691885, + "loss": 0.8937, + "step": 647 + }, + { + "epoch": 0.23, + "grad_norm": 10.07210636138916, + "learning_rate": 0.0002948275862068965, + "loss": 1.0152, + "step": 648 + }, + { + "epoch": 0.23, + "grad_norm": 6.240150451660156, + "learning_rate": 0.0002947903745968742, + "loss": 1.0351, + "step": 649 + }, + { + "epoch": 0.23, + "grad_norm": 9.268518447875977, + "learning_rate": 0.0002947531629868519, + "loss": 0.9157, + "step": 650 + }, + { + "epoch": 0.23, + "grad_norm": 4.466054916381836, + "learning_rate": 0.00029471595137682956, + "loss": 1.6634, + "step": 651 + }, + { + "epoch": 0.23, + "grad_norm": 5.009162425994873, + "learning_rate": 0.0002946787397668072, + "loss": 1.2699, + "step": 652 + }, + { + "epoch": 0.23, + "grad_norm": 4.312174320220947, + "learning_rate": 0.00029464152815678485, + "loss": 1.2239, + "step": 653 + }, + { + "epoch": 0.23, + "grad_norm": 3.121635675430298, + "learning_rate": 0.00029460431654676256, + "loss": 0.9369, + "step": 654 + }, + { + "epoch": 0.23, + "grad_norm": 4.073437213897705, + "learning_rate": 0.00029456710493674026, + "loss": 1.1005, + "step": 655 + }, + { + "epoch": 0.23, + "grad_norm": 3.3540802001953125, + "learning_rate": 0.0002945298933267179, + "loss": 1.0565, + "step": 656 + }, + { + "epoch": 0.23, + "grad_norm": 4.2247724533081055, + "learning_rate": 0.0002944926817166956, + "loss": 0.7415, + "step": 657 + }, + { + "epoch": 0.23, + "grad_norm": 3.133108139038086, + "learning_rate": 0.00029445547010667326, + "loss": 0.4355, + "step": 658 + }, + { + "epoch": 0.23, + "grad_norm": 5.775803089141846, + "learning_rate": 0.0002944182584966509, + "loss": 1.0846, + "step": 659 + }, + { + "epoch": 0.23, + "grad_norm": 6.800411701202393, + "learning_rate": 0.0002943810468866286, + "loss": 1.1723, + "step": 660 + }, + { + "epoch": 0.23, + "grad_norm": 5.831652641296387, + "learning_rate": 0.00029434383527660626, + "loss": 0.8722, + "step": 661 + }, + { + "epoch": 0.23, + "grad_norm": 5.7799882888793945, + "learning_rate": 0.00029430662366658396, + "loss": 1.0813, + "step": 662 + }, + { + "epoch": 0.23, + "grad_norm": 9.425108909606934, + "learning_rate": 0.0002942694120565616, + "loss": 2.4706, + "step": 663 + }, + { + "epoch": 0.23, + "grad_norm": 7.258337020874023, + "learning_rate": 0.0002942322004465393, + "loss": 1.0422, + "step": 664 + }, + { + "epoch": 0.23, + "grad_norm": 6.805200576782227, + "learning_rate": 0.00029419498883651696, + "loss": 0.8724, + "step": 665 + }, + { + "epoch": 0.23, + "grad_norm": 5.374764442443848, + "learning_rate": 0.0002941577772264946, + "loss": 0.8653, + "step": 666 + }, + { + "epoch": 0.23, + "grad_norm": 30.488962173461914, + "learning_rate": 0.0002941205656164723, + "loss": 2.5592, + "step": 667 + }, + { + "epoch": 0.23, + "grad_norm": 9.162358283996582, + "learning_rate": 0.00029408335400645, + "loss": 1.6403, + "step": 668 + }, + { + "epoch": 0.23, + "grad_norm": 6.208140850067139, + "learning_rate": 0.00029404614239642767, + "loss": 1.0371, + "step": 669 + }, + { + "epoch": 0.23, + "grad_norm": 4.24813175201416, + "learning_rate": 0.0002940089307864053, + "loss": 0.6602, + "step": 670 + }, + { + "epoch": 0.24, + "grad_norm": 6.219293594360352, + "learning_rate": 0.00029397171917638296, + "loss": 1.2641, + "step": 671 + }, + { + "epoch": 0.24, + "grad_norm": 10.14699935913086, + "learning_rate": 0.00029393450756636067, + "loss": 1.5108, + "step": 672 + }, + { + "epoch": 0.24, + "grad_norm": 8.945874214172363, + "learning_rate": 0.00029389729595633837, + "loss": 0.8659, + "step": 673 + }, + { + "epoch": 0.24, + "grad_norm": 9.979119300842285, + "learning_rate": 0.000293860084346316, + "loss": 0.6198, + "step": 674 + }, + { + "epoch": 0.24, + "grad_norm": 10.116988182067871, + "learning_rate": 0.0002938228727362937, + "loss": 1.1682, + "step": 675 + }, + { + "epoch": 0.24, + "grad_norm": 7.378312587738037, + "learning_rate": 0.00029378566112627137, + "loss": 2.0674, + "step": 676 + }, + { + "epoch": 0.24, + "grad_norm": 7.4542012214660645, + "learning_rate": 0.000293748449516249, + "loss": 1.8939, + "step": 677 + }, + { + "epoch": 0.24, + "grad_norm": 6.61478328704834, + "learning_rate": 0.0002937112379062267, + "loss": 1.6356, + "step": 678 + }, + { + "epoch": 0.24, + "grad_norm": 5.97426176071167, + "learning_rate": 0.00029367402629620437, + "loss": 0.8941, + "step": 679 + }, + { + "epoch": 0.24, + "grad_norm": 3.164785385131836, + "learning_rate": 0.0002936368146861821, + "loss": 0.6823, + "step": 680 + }, + { + "epoch": 0.24, + "grad_norm": 4.324411869049072, + "learning_rate": 0.0002935996030761598, + "loss": 0.6235, + "step": 681 + }, + { + "epoch": 0.24, + "grad_norm": 4.255089282989502, + "learning_rate": 0.0002935623914661374, + "loss": 0.9023, + "step": 682 + }, + { + "epoch": 0.24, + "grad_norm": 4.362627029418945, + "learning_rate": 0.0002935251798561151, + "loss": 1.3556, + "step": 683 + }, + { + "epoch": 0.24, + "grad_norm": 3.432969570159912, + "learning_rate": 0.0002934879682460927, + "loss": 0.8282, + "step": 684 + }, + { + "epoch": 0.24, + "grad_norm": 2.972658395767212, + "learning_rate": 0.0002934507566360704, + "loss": 1.0565, + "step": 685 + }, + { + "epoch": 0.24, + "grad_norm": 4.701770305633545, + "learning_rate": 0.00029341354502604813, + "loss": 1.2406, + "step": 686 + }, + { + "epoch": 0.24, + "grad_norm": 4.701326370239258, + "learning_rate": 0.0002933763334160258, + "loss": 0.7059, + "step": 687 + }, + { + "epoch": 0.24, + "grad_norm": 3.311460256576538, + "learning_rate": 0.0002933391218060034, + "loss": 0.9308, + "step": 688 + }, + { + "epoch": 0.24, + "grad_norm": 5.932095050811768, + "learning_rate": 0.00029330191019598113, + "loss": 1.8754, + "step": 689 + }, + { + "epoch": 0.24, + "grad_norm": 4.051765441894531, + "learning_rate": 0.0002932646985859588, + "loss": 0.7799, + "step": 690 + }, + { + "epoch": 0.24, + "grad_norm": 4.459011554718018, + "learning_rate": 0.0002932274869759365, + "loss": 0.8546, + "step": 691 + }, + { + "epoch": 0.24, + "grad_norm": 4.098698616027832, + "learning_rate": 0.00029319027536591413, + "loss": 0.5279, + "step": 692 + }, + { + "epoch": 0.24, + "grad_norm": 4.640591621398926, + "learning_rate": 0.00029315306375589183, + "loss": 0.9102, + "step": 693 + }, + { + "epoch": 0.24, + "grad_norm": 6.9347004890441895, + "learning_rate": 0.0002931158521458695, + "loss": 0.7036, + "step": 694 + }, + { + "epoch": 0.24, + "grad_norm": 4.845668792724609, + "learning_rate": 0.00029307864053584713, + "loss": 0.5378, + "step": 695 + }, + { + "epoch": 0.24, + "grad_norm": 5.8146281242370605, + "learning_rate": 0.00029304142892582483, + "loss": 1.1422, + "step": 696 + }, + { + "epoch": 0.24, + "grad_norm": 6.487931251525879, + "learning_rate": 0.0002930042173158025, + "loss": 1.0314, + "step": 697 + }, + { + "epoch": 0.24, + "grad_norm": 6.625868797302246, + "learning_rate": 0.0002929670057057802, + "loss": 1.5303, + "step": 698 + }, + { + "epoch": 0.24, + "grad_norm": 14.07421588897705, + "learning_rate": 0.0002929297940957579, + "loss": 2.0771, + "step": 699 + }, + { + "epoch": 0.25, + "grad_norm": 7.190564155578613, + "learning_rate": 0.00029289258248573554, + "loss": 1.1797, + "step": 700 + }, + { + "epoch": 0.25, + "eval_loss": 1.2068666219711304, + "eval_runtime": 51.0804, + "eval_samples_per_second": 42.443, + "eval_steps_per_second": 10.611, + "eval_wer": 0.8056564608199274, + "step": 700 + }, + { + "epoch": 0.25, + "grad_norm": 6.680876731872559, + "learning_rate": 0.0002928553708757132, + "loss": 2.1408, + "step": 701 + }, + { + "epoch": 0.25, + "grad_norm": 3.9025561809539795, + "learning_rate": 0.00029281815926569083, + "loss": 1.5205, + "step": 702 + }, + { + "epoch": 0.25, + "grad_norm": 4.40712833404541, + "learning_rate": 0.00029278094765566854, + "loss": 1.4955, + "step": 703 + }, + { + "epoch": 0.25, + "grad_norm": 3.2378780841827393, + "learning_rate": 0.00029274373604564624, + "loss": 0.9419, + "step": 704 + }, + { + "epoch": 0.25, + "grad_norm": 3.353944778442383, + "learning_rate": 0.0002927065244356239, + "loss": 1.0087, + "step": 705 + }, + { + "epoch": 0.25, + "grad_norm": 4.185000896453857, + "learning_rate": 0.0002926693128256016, + "loss": 1.2567, + "step": 706 + }, + { + "epoch": 0.25, + "grad_norm": 3.718811511993408, + "learning_rate": 0.00029263210121557924, + "loss": 0.8007, + "step": 707 + }, + { + "epoch": 0.25, + "grad_norm": 4.834765911102295, + "learning_rate": 0.0002925948896055569, + "loss": 0.9665, + "step": 708 + }, + { + "epoch": 0.25, + "grad_norm": 5.415790557861328, + "learning_rate": 0.0002925576779955346, + "loss": 0.6987, + "step": 709 + }, + { + "epoch": 0.25, + "grad_norm": 3.706892490386963, + "learning_rate": 0.00029252046638551224, + "loss": 0.7334, + "step": 710 + }, + { + "epoch": 0.25, + "grad_norm": 3.0536246299743652, + "learning_rate": 0.00029248325477548994, + "loss": 0.9511, + "step": 711 + }, + { + "epoch": 0.25, + "grad_norm": 4.1888933181762695, + "learning_rate": 0.0002924460431654676, + "loss": 1.2429, + "step": 712 + }, + { + "epoch": 0.25, + "grad_norm": 3.904407262802124, + "learning_rate": 0.0002924088315554453, + "loss": 1.1109, + "step": 713 + }, + { + "epoch": 0.25, + "grad_norm": 4.773108005523682, + "learning_rate": 0.00029237161994542294, + "loss": 0.9709, + "step": 714 + }, + { + "epoch": 0.25, + "grad_norm": 2.608194351196289, + "learning_rate": 0.0002923344083354006, + "loss": 0.402, + "step": 715 + }, + { + "epoch": 0.25, + "grad_norm": 5.155666351318359, + "learning_rate": 0.0002922971967253783, + "loss": 1.4452, + "step": 716 + }, + { + "epoch": 0.25, + "grad_norm": 11.795857429504395, + "learning_rate": 0.000292259985115356, + "loss": 3.5267, + "step": 717 + }, + { + "epoch": 0.25, + "grad_norm": 4.475924491882324, + "learning_rate": 0.00029222277350533365, + "loss": 0.6192, + "step": 718 + }, + { + "epoch": 0.25, + "grad_norm": 4.139604568481445, + "learning_rate": 0.0002921855618953113, + "loss": 0.7032, + "step": 719 + }, + { + "epoch": 0.25, + "grad_norm": 5.846983909606934, + "learning_rate": 0.00029214835028528894, + "loss": 1.1969, + "step": 720 + }, + { + "epoch": 0.25, + "grad_norm": 4.0570387840271, + "learning_rate": 0.00029211113867526665, + "loss": 0.6138, + "step": 721 + }, + { + "epoch": 0.25, + "grad_norm": 4.584964275360107, + "learning_rate": 0.00029207392706524435, + "loss": 0.6882, + "step": 722 + }, + { + "epoch": 0.25, + "grad_norm": 8.10388469696045, + "learning_rate": 0.000292036715455222, + "loss": 0.3923, + "step": 723 + }, + { + "epoch": 0.25, + "grad_norm": 6.056919097900391, + "learning_rate": 0.0002919995038451997, + "loss": 1.0729, + "step": 724 + }, + { + "epoch": 0.25, + "grad_norm": 7.191409111022949, + "learning_rate": 0.00029196229223517735, + "loss": 1.0135, + "step": 725 + }, + { + "epoch": 0.25, + "grad_norm": 6.211596488952637, + "learning_rate": 0.000291925080625155, + "loss": 2.1364, + "step": 726 + }, + { + "epoch": 0.25, + "grad_norm": 3.2215120792388916, + "learning_rate": 0.0002918878690151327, + "loss": 0.9194, + "step": 727 + }, + { + "epoch": 0.26, + "grad_norm": 3.045462131500244, + "learning_rate": 0.00029185065740511035, + "loss": 0.7808, + "step": 728 + }, + { + "epoch": 0.26, + "grad_norm": 2.9927256107330322, + "learning_rate": 0.00029181344579508805, + "loss": 0.7217, + "step": 729 + }, + { + "epoch": 0.26, + "grad_norm": 3.7817091941833496, + "learning_rate": 0.0002917762341850657, + "loss": 0.724, + "step": 730 + }, + { + "epoch": 0.26, + "grad_norm": 3.1122050285339355, + "learning_rate": 0.0002917390225750434, + "loss": 0.7772, + "step": 731 + }, + { + "epoch": 0.26, + "grad_norm": 4.8279852867126465, + "learning_rate": 0.00029170181096502105, + "loss": 1.2088, + "step": 732 + }, + { + "epoch": 0.26, + "grad_norm": 3.453472852706909, + "learning_rate": 0.0002916645993549987, + "loss": 0.6257, + "step": 733 + }, + { + "epoch": 0.26, + "grad_norm": 4.543291091918945, + "learning_rate": 0.0002916273877449764, + "loss": 0.7873, + "step": 734 + }, + { + "epoch": 0.26, + "grad_norm": 3.899169445037842, + "learning_rate": 0.0002915901761349541, + "loss": 0.7794, + "step": 735 + }, + { + "epoch": 0.26, + "grad_norm": 4.045598030090332, + "learning_rate": 0.00029155296452493176, + "loss": 0.4835, + "step": 736 + }, + { + "epoch": 0.26, + "grad_norm": 4.990403175354004, + "learning_rate": 0.0002915157529149094, + "loss": 1.0899, + "step": 737 + }, + { + "epoch": 0.26, + "grad_norm": 7.7346110343933105, + "learning_rate": 0.0002914785413048871, + "loss": 1.0977, + "step": 738 + }, + { + "epoch": 0.26, + "grad_norm": 3.3284356594085693, + "learning_rate": 0.00029144132969486476, + "loss": 0.4629, + "step": 739 + }, + { + "epoch": 0.26, + "grad_norm": 3.3941850662231445, + "learning_rate": 0.00029140411808484246, + "loss": 0.9461, + "step": 740 + }, + { + "epoch": 0.26, + "grad_norm": 3.9711191654205322, + "learning_rate": 0.0002913669064748201, + "loss": 1.1025, + "step": 741 + }, + { + "epoch": 0.26, + "grad_norm": 19.457284927368164, + "learning_rate": 0.0002913296948647978, + "loss": 2.8168, + "step": 742 + }, + { + "epoch": 0.26, + "grad_norm": 5.4924750328063965, + "learning_rate": 0.00029129248325477546, + "loss": 1.3301, + "step": 743 + }, + { + "epoch": 0.26, + "grad_norm": 5.154421806335449, + "learning_rate": 0.0002912552716447531, + "loss": 0.8412, + "step": 744 + }, + { + "epoch": 0.26, + "grad_norm": 5.290253162384033, + "learning_rate": 0.0002912180600347308, + "loss": 0.9267, + "step": 745 + }, + { + "epoch": 0.26, + "grad_norm": 4.45554780960083, + "learning_rate": 0.00029118084842470846, + "loss": 0.6853, + "step": 746 + }, + { + "epoch": 0.26, + "grad_norm": 5.069665431976318, + "learning_rate": 0.00029114363681468616, + "loss": 0.6652, + "step": 747 + }, + { + "epoch": 0.26, + "grad_norm": 4.831662178039551, + "learning_rate": 0.00029110642520466387, + "loss": 0.7098, + "step": 748 + }, + { + "epoch": 0.26, + "grad_norm": 4.927679538726807, + "learning_rate": 0.0002910692135946415, + "loss": 0.7609, + "step": 749 + }, + { + "epoch": 0.26, + "grad_norm": 3.876534938812256, + "learning_rate": 0.00029103200198461916, + "loss": 0.7453, + "step": 750 + }, + { + "epoch": 0.26, + "grad_norm": 4.562718868255615, + "learning_rate": 0.00029099479037459687, + "loss": 1.7246, + "step": 751 + }, + { + "epoch": 0.26, + "grad_norm": 3.7739195823669434, + "learning_rate": 0.0002909575787645745, + "loss": 1.3101, + "step": 752 + }, + { + "epoch": 0.26, + "grad_norm": 3.139188051223755, + "learning_rate": 0.0002909203671545522, + "loss": 1.1718, + "step": 753 + }, + { + "epoch": 0.26, + "grad_norm": 2.7190756797790527, + "learning_rate": 0.00029088315554452987, + "loss": 0.7607, + "step": 754 + }, + { + "epoch": 0.26, + "grad_norm": 3.391061544418335, + "learning_rate": 0.00029084594393450757, + "loss": 0.937, + "step": 755 + }, + { + "epoch": 0.26, + "grad_norm": 2.2688794136047363, + "learning_rate": 0.0002908087323244852, + "loss": 0.4747, + "step": 756 + }, + { + "epoch": 0.27, + "grad_norm": 2.645151376724243, + "learning_rate": 0.00029077152071446287, + "loss": 0.6512, + "step": 757 + }, + { + "epoch": 0.27, + "grad_norm": 5.204065322875977, + "learning_rate": 0.00029073430910444057, + "loss": 0.7823, + "step": 758 + }, + { + "epoch": 0.27, + "grad_norm": 4.213620185852051, + "learning_rate": 0.0002906970974944182, + "loss": 1.1063, + "step": 759 + }, + { + "epoch": 0.27, + "grad_norm": 7.5675129890441895, + "learning_rate": 0.0002906598858843959, + "loss": 1.9301, + "step": 760 + }, + { + "epoch": 0.27, + "grad_norm": 3.451794385910034, + "learning_rate": 0.00029062267427437357, + "loss": 0.7759, + "step": 761 + }, + { + "epoch": 0.27, + "grad_norm": 3.535918712615967, + "learning_rate": 0.0002905854626643512, + "loss": 0.7944, + "step": 762 + }, + { + "epoch": 0.27, + "grad_norm": 5.344732761383057, + "learning_rate": 0.0002905482510543289, + "loss": 1.0658, + "step": 763 + }, + { + "epoch": 0.27, + "grad_norm": 3.685420274734497, + "learning_rate": 0.0002905110394443066, + "loss": 0.5017, + "step": 764 + }, + { + "epoch": 0.27, + "grad_norm": 5.2186055183410645, + "learning_rate": 0.00029047382783428427, + "loss": 2.056, + "step": 765 + }, + { + "epoch": 0.27, + "grad_norm": 6.418582439422607, + "learning_rate": 0.000290436616224262, + "loss": 1.7033, + "step": 766 + }, + { + "epoch": 0.27, + "grad_norm": 6.189699649810791, + "learning_rate": 0.0002903994046142396, + "loss": 1.5274, + "step": 767 + }, + { + "epoch": 0.27, + "grad_norm": 6.971052646636963, + "learning_rate": 0.00029036219300421727, + "loss": 0.889, + "step": 768 + }, + { + "epoch": 0.27, + "grad_norm": 5.264997959136963, + "learning_rate": 0.000290324981394195, + "loss": 0.7537, + "step": 769 + }, + { + "epoch": 0.27, + "grad_norm": 9.451274871826172, + "learning_rate": 0.0002902877697841726, + "loss": 0.6874, + "step": 770 + }, + { + "epoch": 0.27, + "grad_norm": 4.780513763427734, + "learning_rate": 0.0002902505581741503, + "loss": 0.9426, + "step": 771 + }, + { + "epoch": 0.27, + "grad_norm": 7.542695999145508, + "learning_rate": 0.000290213346564128, + "loss": 1.5722, + "step": 772 + }, + { + "epoch": 0.27, + "grad_norm": 9.267386436462402, + "learning_rate": 0.0002901761349541057, + "loss": 0.9906, + "step": 773 + }, + { + "epoch": 0.27, + "grad_norm": 4.18009090423584, + "learning_rate": 0.00029013892334408333, + "loss": 0.9282, + "step": 774 + }, + { + "epoch": 0.27, + "grad_norm": 15.079828262329102, + "learning_rate": 0.000290101711734061, + "loss": 2.3483, + "step": 775 + }, + { + "epoch": 0.27, + "grad_norm": 6.639895439147949, + "learning_rate": 0.0002900645001240387, + "loss": 1.7343, + "step": 776 + }, + { + "epoch": 0.27, + "grad_norm": 5.052302360534668, + "learning_rate": 0.00029002728851401633, + "loss": 1.4465, + "step": 777 + }, + { + "epoch": 0.27, + "grad_norm": 4.000107765197754, + "learning_rate": 0.00028999007690399403, + "loss": 0.9901, + "step": 778 + }, + { + "epoch": 0.27, + "grad_norm": 3.0668177604675293, + "learning_rate": 0.0002899528652939717, + "loss": 0.8454, + "step": 779 + }, + { + "epoch": 0.27, + "grad_norm": 2.6663155555725098, + "learning_rate": 0.0002899156536839494, + "loss": 0.7529, + "step": 780 + }, + { + "epoch": 0.27, + "grad_norm": 3.555816650390625, + "learning_rate": 0.00028987844207392703, + "loss": 0.6824, + "step": 781 + }, + { + "epoch": 0.27, + "grad_norm": 4.055764675140381, + "learning_rate": 0.00028984123046390473, + "loss": 1.1432, + "step": 782 + }, + { + "epoch": 0.27, + "grad_norm": 4.053997039794922, + "learning_rate": 0.0002898040188538824, + "loss": 1.0801, + "step": 783 + }, + { + "epoch": 0.27, + "grad_norm": 3.9936132431030273, + "learning_rate": 0.0002897668072438601, + "loss": 1.2037, + "step": 784 + }, + { + "epoch": 0.28, + "grad_norm": 4.756950378417969, + "learning_rate": 0.00028972959563383773, + "loss": 1.5041, + "step": 785 + }, + { + "epoch": 0.28, + "grad_norm": 3.913256883621216, + "learning_rate": 0.0002896923840238154, + "loss": 0.6958, + "step": 786 + }, + { + "epoch": 0.28, + "grad_norm": 4.463769912719727, + "learning_rate": 0.0002896551724137931, + "loss": 1.1124, + "step": 787 + }, + { + "epoch": 0.28, + "grad_norm": 4.305164813995361, + "learning_rate": 0.00028961796080377073, + "loss": 1.2586, + "step": 788 + }, + { + "epoch": 0.28, + "grad_norm": 2.605264663696289, + "learning_rate": 0.00028958074919374844, + "loss": 0.4304, + "step": 789 + }, + { + "epoch": 0.28, + "grad_norm": 5.45980167388916, + "learning_rate": 0.0002895435375837261, + "loss": 1.613, + "step": 790 + }, + { + "epoch": 0.28, + "grad_norm": 4.24216365814209, + "learning_rate": 0.0002895063259737038, + "loss": 1.0244, + "step": 791 + }, + { + "epoch": 0.28, + "grad_norm": 3.666222333908081, + "learning_rate": 0.00028946911436368144, + "loss": 0.6473, + "step": 792 + }, + { + "epoch": 0.28, + "grad_norm": 4.788506031036377, + "learning_rate": 0.0002894319027536591, + "loss": 0.8939, + "step": 793 + }, + { + "epoch": 0.28, + "grad_norm": 4.17887020111084, + "learning_rate": 0.0002893946911436368, + "loss": 1.0748, + "step": 794 + }, + { + "epoch": 0.28, + "grad_norm": 5.128133773803711, + "learning_rate": 0.0002893574795336145, + "loss": 0.864, + "step": 795 + }, + { + "epoch": 0.28, + "grad_norm": 5.645290851593018, + "learning_rate": 0.00028932026792359214, + "loss": 1.656, + "step": 796 + }, + { + "epoch": 0.28, + "grad_norm": 5.542060375213623, + "learning_rate": 0.00028928305631356984, + "loss": 1.016, + "step": 797 + }, + { + "epoch": 0.28, + "grad_norm": 3.639885425567627, + "learning_rate": 0.0002892458447035475, + "loss": 0.6474, + "step": 798 + }, + { + "epoch": 0.28, + "grad_norm": 4.543883800506592, + "learning_rate": 0.00028920863309352514, + "loss": 0.895, + "step": 799 + }, + { + "epoch": 0.28, + "grad_norm": 6.362172603607178, + "learning_rate": 0.00028917142148350284, + "loss": 2.4732, + "step": 800 + }, + { + "epoch": 0.28, + "eval_loss": 0.987212598323822, + "eval_runtime": 50.7349, + "eval_samples_per_second": 42.732, + "eval_steps_per_second": 10.683, + "eval_wer": 0.7189932537623248, + "step": 800 + }, + { + "epoch": 0.28, + "grad_norm": 4.333866596221924, + "learning_rate": 0.0002891342098734805, + "loss": 1.3953, + "step": 801 + }, + { + "epoch": 0.28, + "grad_norm": 4.874297142028809, + "learning_rate": 0.0002890969982634582, + "loss": 1.6125, + "step": 802 + }, + { + "epoch": 0.28, + "grad_norm": 4.766399383544922, + "learning_rate": 0.00028905978665343584, + "loss": 1.0827, + "step": 803 + }, + { + "epoch": 0.28, + "grad_norm": 4.576672077178955, + "learning_rate": 0.0002890225750434135, + "loss": 0.8876, + "step": 804 + }, + { + "epoch": 0.28, + "grad_norm": 2.9895853996276855, + "learning_rate": 0.0002889853634333912, + "loss": 0.7372, + "step": 805 + }, + { + "epoch": 0.28, + "grad_norm": 5.91171407699585, + "learning_rate": 0.00028894815182336884, + "loss": 2.1011, + "step": 806 + }, + { + "epoch": 0.28, + "grad_norm": 5.903454303741455, + "learning_rate": 0.00028891094021334655, + "loss": 1.009, + "step": 807 + }, + { + "epoch": 0.28, + "grad_norm": 5.659839630126953, + "learning_rate": 0.00028887372860332425, + "loss": 0.8499, + "step": 808 + }, + { + "epoch": 0.28, + "grad_norm": 5.591513156890869, + "learning_rate": 0.0002888365169933019, + "loss": 0.9237, + "step": 809 + }, + { + "epoch": 0.28, + "grad_norm": 6.837412357330322, + "learning_rate": 0.00028879930538327955, + "loss": 1.2858, + "step": 810 + }, + { + "epoch": 0.28, + "grad_norm": 5.4030585289001465, + "learning_rate": 0.0002887620937732572, + "loss": 1.3576, + "step": 811 + }, + { + "epoch": 0.28, + "grad_norm": 4.567882537841797, + "learning_rate": 0.0002887248821632349, + "loss": 1.4714, + "step": 812 + }, + { + "epoch": 0.28, + "grad_norm": 3.555302381515503, + "learning_rate": 0.0002886876705532126, + "loss": 0.4524, + "step": 813 + }, + { + "epoch": 0.29, + "grad_norm": 2.767725706100464, + "learning_rate": 0.00028865045894319025, + "loss": 0.4047, + "step": 814 + }, + { + "epoch": 0.29, + "grad_norm": 4.69365930557251, + "learning_rate": 0.00028861324733316795, + "loss": 0.835, + "step": 815 + }, + { + "epoch": 0.29, + "grad_norm": 6.788897514343262, + "learning_rate": 0.0002885760357231456, + "loss": 1.5947, + "step": 816 + }, + { + "epoch": 0.29, + "grad_norm": 3.98279070854187, + "learning_rate": 0.00028853882411312325, + "loss": 0.8821, + "step": 817 + }, + { + "epoch": 0.29, + "grad_norm": 4.854119777679443, + "learning_rate": 0.00028850161250310095, + "loss": 0.9184, + "step": 818 + }, + { + "epoch": 0.29, + "grad_norm": 4.863973617553711, + "learning_rate": 0.0002884644008930786, + "loss": 0.9386, + "step": 819 + }, + { + "epoch": 0.29, + "grad_norm": 7.1155266761779785, + "learning_rate": 0.0002884271892830563, + "loss": 1.128, + "step": 820 + }, + { + "epoch": 0.29, + "grad_norm": 5.065074920654297, + "learning_rate": 0.00028838997767303395, + "loss": 0.7205, + "step": 821 + }, + { + "epoch": 0.29, + "grad_norm": 4.343176364898682, + "learning_rate": 0.00028835276606301166, + "loss": 0.6354, + "step": 822 + }, + { + "epoch": 0.29, + "grad_norm": 5.124356746673584, + "learning_rate": 0.0002883155544529893, + "loss": 0.6844, + "step": 823 + }, + { + "epoch": 0.29, + "grad_norm": 5.882658958435059, + "learning_rate": 0.00028827834284296695, + "loss": 1.1918, + "step": 824 + }, + { + "epoch": 0.29, + "grad_norm": 6.812358379364014, + "learning_rate": 0.00028824113123294466, + "loss": 1.0781, + "step": 825 + }, + { + "epoch": 0.29, + "grad_norm": 4.14019250869751, + "learning_rate": 0.00028820391962292236, + "loss": 1.4099, + "step": 826 + }, + { + "epoch": 0.29, + "grad_norm": 4.570350646972656, + "learning_rate": 0.0002881667080129, + "loss": 1.0887, + "step": 827 + }, + { + "epoch": 0.29, + "grad_norm": 3.517451047897339, + "learning_rate": 0.00028812949640287766, + "loss": 0.6863, + "step": 828 + }, + { + "epoch": 0.29, + "grad_norm": 5.128011703491211, + "learning_rate": 0.0002880922847928553, + "loss": 0.8951, + "step": 829 + }, + { + "epoch": 0.29, + "grad_norm": 6.206776142120361, + "learning_rate": 0.000288055073182833, + "loss": 0.6493, + "step": 830 + }, + { + "epoch": 0.29, + "grad_norm": 2.9472367763519287, + "learning_rate": 0.0002880178615728107, + "loss": 0.6847, + "step": 831 + }, + { + "epoch": 0.29, + "grad_norm": 4.013580799102783, + "learning_rate": 0.00028798064996278836, + "loss": 0.553, + "step": 832 + }, + { + "epoch": 0.29, + "grad_norm": 2.9942123889923096, + "learning_rate": 0.00028794343835276606, + "loss": 1.1586, + "step": 833 + }, + { + "epoch": 0.29, + "grad_norm": 5.667693138122559, + "learning_rate": 0.0002879062267427437, + "loss": 1.1743, + "step": 834 + }, + { + "epoch": 0.29, + "grad_norm": 3.7603282928466797, + "learning_rate": 0.00028786901513272136, + "loss": 0.8572, + "step": 835 + }, + { + "epoch": 0.29, + "grad_norm": 3.591860294342041, + "learning_rate": 0.00028783180352269906, + "loss": 0.6201, + "step": 836 + }, + { + "epoch": 0.29, + "grad_norm": 4.3808979988098145, + "learning_rate": 0.0002877945919126767, + "loss": 0.7408, + "step": 837 + }, + { + "epoch": 0.29, + "grad_norm": 2.802478313446045, + "learning_rate": 0.0002877573803026544, + "loss": 0.5719, + "step": 838 + }, + { + "epoch": 0.29, + "grad_norm": 3.7019598484039307, + "learning_rate": 0.0002877201686926321, + "loss": 0.7113, + "step": 839 + }, + { + "epoch": 0.29, + "grad_norm": 4.928816318511963, + "learning_rate": 0.00028768295708260977, + "loss": 1.0754, + "step": 840 + }, + { + "epoch": 0.29, + "grad_norm": 4.269992828369141, + "learning_rate": 0.0002876457454725874, + "loss": 0.5499, + "step": 841 + }, + { + "epoch": 0.3, + "grad_norm": 5.93079948425293, + "learning_rate": 0.00028760853386256506, + "loss": 0.9921, + "step": 842 + }, + { + "epoch": 0.3, + "grad_norm": 3.9162845611572266, + "learning_rate": 0.00028757132225254277, + "loss": 0.4579, + "step": 843 + }, + { + "epoch": 0.3, + "grad_norm": 4.143979549407959, + "learning_rate": 0.00028753411064252047, + "loss": 0.5102, + "step": 844 + }, + { + "epoch": 0.3, + "grad_norm": 7.756710052490234, + "learning_rate": 0.0002874968990324981, + "loss": 0.9789, + "step": 845 + }, + { + "epoch": 0.3, + "grad_norm": 9.213879585266113, + "learning_rate": 0.00028745968742247577, + "loss": 1.778, + "step": 846 + }, + { + "epoch": 0.3, + "grad_norm": 6.540843486785889, + "learning_rate": 0.00028742247581245347, + "loss": 2.657, + "step": 847 + }, + { + "epoch": 0.3, + "grad_norm": 3.8558125495910645, + "learning_rate": 0.0002873852642024311, + "loss": 0.6151, + "step": 848 + }, + { + "epoch": 0.3, + "grad_norm": 4.576344013214111, + "learning_rate": 0.0002873480525924088, + "loss": 0.6595, + "step": 849 + }, + { + "epoch": 0.3, + "grad_norm": 10.87994384765625, + "learning_rate": 0.00028731084098238647, + "loss": 0.8652, + "step": 850 + }, + { + "epoch": 0.3, + "grad_norm": 3.8747811317443848, + "learning_rate": 0.0002872736293723642, + "loss": 1.7178, + "step": 851 + }, + { + "epoch": 0.3, + "grad_norm": 4.656328201293945, + "learning_rate": 0.0002872364177623418, + "loss": 1.8586, + "step": 852 + }, + { + "epoch": 0.3, + "grad_norm": 3.2285523414611816, + "learning_rate": 0.00028719920615231947, + "loss": 1.324, + "step": 853 + }, + { + "epoch": 0.3, + "grad_norm": 5.688818454742432, + "learning_rate": 0.0002871619945422972, + "loss": 1.2527, + "step": 854 + }, + { + "epoch": 0.3, + "grad_norm": 4.996300220489502, + "learning_rate": 0.0002871247829322748, + "loss": 0.8493, + "step": 855 + }, + { + "epoch": 0.3, + "grad_norm": 4.3291850090026855, + "learning_rate": 0.0002870875713222525, + "loss": 0.9196, + "step": 856 + }, + { + "epoch": 0.3, + "grad_norm": 3.986222267150879, + "learning_rate": 0.00028705035971223023, + "loss": 0.9148, + "step": 857 + }, + { + "epoch": 0.3, + "grad_norm": 3.2459797859191895, + "learning_rate": 0.0002870131481022079, + "loss": 0.6929, + "step": 858 + }, + { + "epoch": 0.3, + "grad_norm": 4.315959930419922, + "learning_rate": 0.0002869759364921855, + "loss": 0.9603, + "step": 859 + }, + { + "epoch": 0.3, + "grad_norm": 3.7050576210021973, + "learning_rate": 0.0002869387248821632, + "loss": 0.9597, + "step": 860 + }, + { + "epoch": 0.3, + "grad_norm": 3.6986582279205322, + "learning_rate": 0.0002869015132721409, + "loss": 0.8796, + "step": 861 + }, + { + "epoch": 0.3, + "grad_norm": 2.9992449283599854, + "learning_rate": 0.0002868643016621186, + "loss": 0.7877, + "step": 862 + }, + { + "epoch": 0.3, + "grad_norm": 3.221742630004883, + "learning_rate": 0.00028682709005209623, + "loss": 1.0128, + "step": 863 + }, + { + "epoch": 0.3, + "grad_norm": 3.570732593536377, + "learning_rate": 0.00028678987844207393, + "loss": 0.5447, + "step": 864 + }, + { + "epoch": 0.3, + "grad_norm": 5.575841903686523, + "learning_rate": 0.0002867526668320516, + "loss": 1.1387, + "step": 865 + }, + { + "epoch": 0.3, + "grad_norm": 3.252793788909912, + "learning_rate": 0.00028671545522202923, + "loss": 0.659, + "step": 866 + }, + { + "epoch": 0.3, + "grad_norm": 4.882000923156738, + "learning_rate": 0.00028667824361200693, + "loss": 0.4831, + "step": 867 + }, + { + "epoch": 0.3, + "grad_norm": 4.8467326164245605, + "learning_rate": 0.0002866410320019846, + "loss": 1.0795, + "step": 868 + }, + { + "epoch": 0.3, + "grad_norm": 6.496115684509277, + "learning_rate": 0.0002866038203919623, + "loss": 1.1627, + "step": 869 + }, + { + "epoch": 0.3, + "grad_norm": 4.330075263977051, + "learning_rate": 0.00028656660878193993, + "loss": 0.5286, + "step": 870 + }, + { + "epoch": 0.31, + "grad_norm": 7.284979820251465, + "learning_rate": 0.0002865293971719176, + "loss": 1.3597, + "step": 871 + }, + { + "epoch": 0.31, + "grad_norm": 3.145815372467041, + "learning_rate": 0.0002864921855618953, + "loss": 0.5407, + "step": 872 + }, + { + "epoch": 0.31, + "grad_norm": 6.272894382476807, + "learning_rate": 0.00028645497395187293, + "loss": 1.104, + "step": 873 + }, + { + "epoch": 0.31, + "grad_norm": 5.079260349273682, + "learning_rate": 0.00028641776234185064, + "loss": 0.9572, + "step": 874 + }, + { + "epoch": 0.31, + "grad_norm": 9.606317520141602, + "learning_rate": 0.00028638055073182834, + "loss": 0.9342, + "step": 875 + }, + { + "epoch": 0.31, + "grad_norm": 4.756997585296631, + "learning_rate": 0.000286343339121806, + "loss": 1.7561, + "step": 876 + }, + { + "epoch": 0.31, + "grad_norm": 3.379587173461914, + "learning_rate": 0.00028630612751178364, + "loss": 0.8307, + "step": 877 + }, + { + "epoch": 0.31, + "grad_norm": 2.6264638900756836, + "learning_rate": 0.0002862689159017613, + "loss": 0.7802, + "step": 878 + }, + { + "epoch": 0.31, + "grad_norm": 4.880773544311523, + "learning_rate": 0.000286231704291739, + "loss": 0.7768, + "step": 879 + }, + { + "epoch": 0.31, + "grad_norm": 4.7575883865356445, + "learning_rate": 0.0002861944926817167, + "loss": 1.4659, + "step": 880 + }, + { + "epoch": 0.31, + "grad_norm": 3.334516763687134, + "learning_rate": 0.00028615728107169434, + "loss": 1.3035, + "step": 881 + }, + { + "epoch": 0.31, + "grad_norm": 3.547193765640259, + "learning_rate": 0.00028612006946167204, + "loss": 0.5167, + "step": 882 + }, + { + "epoch": 0.31, + "grad_norm": 2.4741361141204834, + "learning_rate": 0.0002860828578516497, + "loss": 0.582, + "step": 883 + }, + { + "epoch": 0.31, + "grad_norm": 3.9844954013824463, + "learning_rate": 0.00028604564624162734, + "loss": 0.5924, + "step": 884 + }, + { + "epoch": 0.31, + "grad_norm": 2.942441940307617, + "learning_rate": 0.00028600843463160504, + "loss": 0.6893, + "step": 885 + }, + { + "epoch": 0.31, + "grad_norm": 4.320768356323242, + "learning_rate": 0.0002859712230215827, + "loss": 0.8256, + "step": 886 + }, + { + "epoch": 0.31, + "grad_norm": 4.074968338012695, + "learning_rate": 0.0002859340114115604, + "loss": 1.0719, + "step": 887 + }, + { + "epoch": 0.31, + "grad_norm": 3.55584716796875, + "learning_rate": 0.00028589679980153804, + "loss": 1.0453, + "step": 888 + }, + { + "epoch": 0.31, + "grad_norm": 4.58350133895874, + "learning_rate": 0.00028585958819151575, + "loss": 0.4654, + "step": 889 + }, + { + "epoch": 0.31, + "grad_norm": 5.6324543952941895, + "learning_rate": 0.0002858223765814934, + "loss": 1.0127, + "step": 890 + }, + { + "epoch": 0.31, + "grad_norm": 4.809018611907959, + "learning_rate": 0.00028578516497147104, + "loss": 1.1784, + "step": 891 + }, + { + "epoch": 0.31, + "grad_norm": 4.3140645027160645, + "learning_rate": 0.00028574795336144875, + "loss": 0.9088, + "step": 892 + }, + { + "epoch": 0.31, + "grad_norm": 5.170782566070557, + "learning_rate": 0.00028571074175142645, + "loss": 1.2303, + "step": 893 + }, + { + "epoch": 0.31, + "grad_norm": 3.350752592086792, + "learning_rate": 0.0002856735301414041, + "loss": 0.8323, + "step": 894 + }, + { + "epoch": 0.31, + "grad_norm": 4.265800476074219, + "learning_rate": 0.00028563631853138175, + "loss": 0.9239, + "step": 895 + }, + { + "epoch": 0.31, + "grad_norm": 3.861973762512207, + "learning_rate": 0.00028559910692135945, + "loss": 0.5933, + "step": 896 + }, + { + "epoch": 0.31, + "grad_norm": 5.536797046661377, + "learning_rate": 0.0002855618953113371, + "loss": 0.5246, + "step": 897 + }, + { + "epoch": 0.31, + "grad_norm": 6.882984638214111, + "learning_rate": 0.0002855246837013148, + "loss": 0.782, + "step": 898 + }, + { + "epoch": 0.31, + "grad_norm": 3.6057496070861816, + "learning_rate": 0.00028548747209129245, + "loss": 0.8962, + "step": 899 + }, + { + "epoch": 0.32, + "grad_norm": 10.800071716308594, + "learning_rate": 0.00028545026048127015, + "loss": 0.6917, + "step": 900 + }, + { + "epoch": 0.32, + "eval_loss": 0.9631061553955078, + "eval_runtime": 51.078, + "eval_samples_per_second": 42.445, + "eval_steps_per_second": 10.611, + "eval_wer": 0.7361183186299948, + "step": 900 + }, + { + "epoch": 0.32, + "grad_norm": 3.3842124938964844, + "learning_rate": 0.0002854130488712478, + "loss": 1.3777, + "step": 901 + }, + { + "epoch": 0.32, + "grad_norm": 3.1951818466186523, + "learning_rate": 0.00028537583726122545, + "loss": 1.0767, + "step": 902 + }, + { + "epoch": 0.32, + "grad_norm": 2.5450639724731445, + "learning_rate": 0.00028533862565120315, + "loss": 0.714, + "step": 903 + }, + { + "epoch": 0.32, + "grad_norm": 2.42718505859375, + "learning_rate": 0.0002853014140411808, + "loss": 0.5684, + "step": 904 + }, + { + "epoch": 0.32, + "grad_norm": 5.6511759757995605, + "learning_rate": 0.0002852642024311585, + "loss": 1.1985, + "step": 905 + }, + { + "epoch": 0.32, + "grad_norm": 3.286999225616455, + "learning_rate": 0.0002852269908211362, + "loss": 0.9587, + "step": 906 + }, + { + "epoch": 0.32, + "grad_norm": 3.237370014190674, + "learning_rate": 0.00028518977921111386, + "loss": 0.4929, + "step": 907 + }, + { + "epoch": 0.32, + "grad_norm": 3.4097900390625, + "learning_rate": 0.0002851525676010915, + "loss": 0.8108, + "step": 908 + }, + { + "epoch": 0.32, + "grad_norm": 3.5256292819976807, + "learning_rate": 0.00028511535599106915, + "loss": 1.0193, + "step": 909 + }, + { + "epoch": 0.32, + "grad_norm": 3.9239373207092285, + "learning_rate": 0.00028507814438104686, + "loss": 1.0205, + "step": 910 + }, + { + "epoch": 0.32, + "grad_norm": 6.755927085876465, + "learning_rate": 0.00028504093277102456, + "loss": 1.2051, + "step": 911 + }, + { + "epoch": 0.32, + "grad_norm": 2.922299385070801, + "learning_rate": 0.0002850037211610022, + "loss": 0.6658, + "step": 912 + }, + { + "epoch": 0.32, + "grad_norm": 3.0863943099975586, + "learning_rate": 0.00028496650955097986, + "loss": 0.8485, + "step": 913 + }, + { + "epoch": 0.32, + "grad_norm": 3.762369394302368, + "learning_rate": 0.00028492929794095756, + "loss": 0.8154, + "step": 914 + }, + { + "epoch": 0.32, + "grad_norm": 2.955242395401001, + "learning_rate": 0.0002848920863309352, + "loss": 0.8395, + "step": 915 + }, + { + "epoch": 0.32, + "grad_norm": 4.491308212280273, + "learning_rate": 0.0002848548747209129, + "loss": 0.7699, + "step": 916 + }, + { + "epoch": 0.32, + "grad_norm": 6.529202938079834, + "learning_rate": 0.00028481766311089056, + "loss": 2.1599, + "step": 917 + }, + { + "epoch": 0.32, + "grad_norm": 2.4373815059661865, + "learning_rate": 0.00028478045150086826, + "loss": 0.3231, + "step": 918 + }, + { + "epoch": 0.32, + "grad_norm": 6.301473140716553, + "learning_rate": 0.0002847432398908459, + "loss": 0.4571, + "step": 919 + }, + { + "epoch": 0.32, + "grad_norm": 4.855682373046875, + "learning_rate": 0.00028470602828082356, + "loss": 0.5254, + "step": 920 + }, + { + "epoch": 0.32, + "grad_norm": 3.8739993572235107, + "learning_rate": 0.00028466881667080126, + "loss": 0.5581, + "step": 921 + }, + { + "epoch": 0.32, + "grad_norm": 5.096916198730469, + "learning_rate": 0.0002846316050607789, + "loss": 0.8322, + "step": 922 + }, + { + "epoch": 0.32, + "grad_norm": 6.568998336791992, + "learning_rate": 0.0002845943934507566, + "loss": 0.4273, + "step": 923 + }, + { + "epoch": 0.32, + "grad_norm": 6.560142993927002, + "learning_rate": 0.0002845571818407343, + "loss": 0.9331, + "step": 924 + }, + { + "epoch": 0.32, + "grad_norm": 3.5637927055358887, + "learning_rate": 0.00028451997023071197, + "loss": 0.3868, + "step": 925 + }, + { + "epoch": 0.32, + "grad_norm": 6.0603837966918945, + "learning_rate": 0.0002844827586206896, + "loss": 1.7794, + "step": 926 + }, + { + "epoch": 0.32, + "grad_norm": 6.264075756072998, + "learning_rate": 0.0002844455470106673, + "loss": 1.1889, + "step": 927 + }, + { + "epoch": 0.33, + "grad_norm": 4.174050331115723, + "learning_rate": 0.00028440833540064497, + "loss": 0.9896, + "step": 928 + }, + { + "epoch": 0.33, + "grad_norm": 2.8910465240478516, + "learning_rate": 0.00028437112379062267, + "loss": 0.7981, + "step": 929 + }, + { + "epoch": 0.33, + "grad_norm": 3.8150928020477295, + "learning_rate": 0.0002843339121806003, + "loss": 0.9314, + "step": 930 + }, + { + "epoch": 0.33, + "grad_norm": 5.244769096374512, + "learning_rate": 0.000284296700570578, + "loss": 0.7626, + "step": 931 + }, + { + "epoch": 0.33, + "grad_norm": 2.8552732467651367, + "learning_rate": 0.00028425948896055567, + "loss": 0.6992, + "step": 932 + }, + { + "epoch": 0.33, + "grad_norm": 3.5984230041503906, + "learning_rate": 0.0002842222773505333, + "loss": 1.0394, + "step": 933 + }, + { + "epoch": 0.33, + "grad_norm": 7.442532062530518, + "learning_rate": 0.000284185065740511, + "loss": 1.0194, + "step": 934 + }, + { + "epoch": 0.33, + "grad_norm": 4.221444606781006, + "learning_rate": 0.00028414785413048867, + "loss": 1.116, + "step": 935 + }, + { + "epoch": 0.33, + "grad_norm": 3.7174394130706787, + "learning_rate": 0.00028411064252046637, + "loss": 1.2957, + "step": 936 + }, + { + "epoch": 0.33, + "grad_norm": 4.641437530517578, + "learning_rate": 0.000284073430910444, + "loss": 0.9645, + "step": 937 + }, + { + "epoch": 0.33, + "grad_norm": 3.738492965698242, + "learning_rate": 0.0002840362193004217, + "loss": 1.0473, + "step": 938 + }, + { + "epoch": 0.33, + "grad_norm": 4.242377758026123, + "learning_rate": 0.00028399900769039937, + "loss": 0.9279, + "step": 939 + }, + { + "epoch": 0.33, + "grad_norm": 5.320493221282959, + "learning_rate": 0.0002839617960803771, + "loss": 0.6088, + "step": 940 + }, + { + "epoch": 0.33, + "grad_norm": 4.363595962524414, + "learning_rate": 0.0002839245844703547, + "loss": 0.9191, + "step": 941 + }, + { + "epoch": 0.33, + "grad_norm": 3.4770991802215576, + "learning_rate": 0.0002838873728603324, + "loss": 0.6281, + "step": 942 + }, + { + "epoch": 0.33, + "grad_norm": 7.542201519012451, + "learning_rate": 0.0002838501612503101, + "loss": 2.1469, + "step": 943 + }, + { + "epoch": 0.33, + "grad_norm": 7.0690202713012695, + "learning_rate": 0.0002838129496402877, + "loss": 1.5071, + "step": 944 + }, + { + "epoch": 0.33, + "grad_norm": 5.570958614349365, + "learning_rate": 0.00028377573803026543, + "loss": 0.7142, + "step": 945 + }, + { + "epoch": 0.33, + "grad_norm": 6.251632213592529, + "learning_rate": 0.0002837385264202431, + "loss": 1.2639, + "step": 946 + }, + { + "epoch": 0.33, + "grad_norm": 4.675748348236084, + "learning_rate": 0.0002837013148102208, + "loss": 0.9949, + "step": 947 + }, + { + "epoch": 0.33, + "grad_norm": 4.878772258758545, + "learning_rate": 0.00028366410320019843, + "loss": 0.9438, + "step": 948 + }, + { + "epoch": 0.33, + "grad_norm": 4.873948574066162, + "learning_rate": 0.00028362689159017613, + "loss": 0.8105, + "step": 949 + }, + { + "epoch": 0.33, + "grad_norm": 2.9942407608032227, + "learning_rate": 0.0002835896799801538, + "loss": 0.7128, + "step": 950 + }, + { + "epoch": 0.33, + "grad_norm": 4.542831897735596, + "learning_rate": 0.00028355246837013143, + "loss": 1.2922, + "step": 951 + }, + { + "epoch": 0.33, + "grad_norm": 2.6374423503875732, + "learning_rate": 0.00028351525676010913, + "loss": 0.912, + "step": 952 + }, + { + "epoch": 0.33, + "grad_norm": 3.016441822052002, + "learning_rate": 0.00028347804515008683, + "loss": 0.9093, + "step": 953 + }, + { + "epoch": 0.33, + "grad_norm": 2.8477137088775635, + "learning_rate": 0.0002834408335400645, + "loss": 0.7698, + "step": 954 + }, + { + "epoch": 0.33, + "grad_norm": 4.108532428741455, + "learning_rate": 0.00028340362193004213, + "loss": 1.0006, + "step": 955 + }, + { + "epoch": 0.33, + "grad_norm": 5.331216335296631, + "learning_rate": 0.00028336641032001983, + "loss": 1.0243, + "step": 956 + }, + { + "epoch": 0.34, + "grad_norm": 3.312037467956543, + "learning_rate": 0.0002833291987099975, + "loss": 0.6803, + "step": 957 + }, + { + "epoch": 0.34, + "grad_norm": 3.1434242725372314, + "learning_rate": 0.0002832919870999752, + "loss": 0.3665, + "step": 958 + }, + { + "epoch": 0.34, + "grad_norm": 5.74772834777832, + "learning_rate": 0.00028325477548995283, + "loss": 1.1212, + "step": 959 + }, + { + "epoch": 0.34, + "grad_norm": 3.111370325088501, + "learning_rate": 0.00028321756387993054, + "loss": 0.4247, + "step": 960 + }, + { + "epoch": 0.34, + "grad_norm": 4.159219741821289, + "learning_rate": 0.0002831803522699082, + "loss": 0.8041, + "step": 961 + }, + { + "epoch": 0.34, + "grad_norm": 5.435075759887695, + "learning_rate": 0.00028314314065988583, + "loss": 2.0447, + "step": 962 + }, + { + "epoch": 0.34, + "grad_norm": 4.009119987487793, + "learning_rate": 0.00028310592904986354, + "loss": 0.5044, + "step": 963 + }, + { + "epoch": 0.34, + "grad_norm": 3.7699248790740967, + "learning_rate": 0.0002830687174398412, + "loss": 0.3857, + "step": 964 + }, + { + "epoch": 0.34, + "grad_norm": 2.6067960262298584, + "learning_rate": 0.0002830315058298189, + "loss": 0.3094, + "step": 965 + }, + { + "epoch": 0.34, + "grad_norm": 2.571464776992798, + "learning_rate": 0.00028299429421979654, + "loss": 0.3871, + "step": 966 + }, + { + "epoch": 0.34, + "grad_norm": 9.788469314575195, + "learning_rate": 0.00028295708260977424, + "loss": 1.292, + "step": 967 + }, + { + "epoch": 0.34, + "grad_norm": 4.8277058601379395, + "learning_rate": 0.0002829198709997519, + "loss": 0.5882, + "step": 968 + }, + { + "epoch": 0.34, + "grad_norm": 5.459136486053467, + "learning_rate": 0.00028288265938972954, + "loss": 1.0885, + "step": 969 + }, + { + "epoch": 0.34, + "grad_norm": 3.053255558013916, + "learning_rate": 0.00028284544777970724, + "loss": 0.4897, + "step": 970 + }, + { + "epoch": 0.34, + "grad_norm": 4.948587417602539, + "learning_rate": 0.00028280823616968494, + "loss": 1.4657, + "step": 971 + }, + { + "epoch": 0.34, + "grad_norm": 6.1781206130981445, + "learning_rate": 0.0002827710245596626, + "loss": 0.903, + "step": 972 + }, + { + "epoch": 0.34, + "grad_norm": 7.313625812530518, + "learning_rate": 0.0002827338129496403, + "loss": 2.1275, + "step": 973 + }, + { + "epoch": 0.34, + "grad_norm": 5.631922721862793, + "learning_rate": 0.00028269660133961794, + "loss": 1.1911, + "step": 974 + }, + { + "epoch": 0.34, + "grad_norm": 11.282684326171875, + "learning_rate": 0.0002826593897295956, + "loss": 2.9956, + "step": 975 + }, + { + "epoch": 0.34, + "grad_norm": 7.1073317527771, + "learning_rate": 0.0002826221781195733, + "loss": 2.037, + "step": 976 + }, + { + "epoch": 0.34, + "grad_norm": 18.51368522644043, + "learning_rate": 0.00028258496650955094, + "loss": 2.3501, + "step": 977 + }, + { + "epoch": 0.34, + "grad_norm": 3.6810758113861084, + "learning_rate": 0.00028254775489952865, + "loss": 1.236, + "step": 978 + }, + { + "epoch": 0.34, + "grad_norm": 3.3135673999786377, + "learning_rate": 0.0002825105432895063, + "loss": 0.8256, + "step": 979 + }, + { + "epoch": 0.34, + "grad_norm": 3.1581242084503174, + "learning_rate": 0.000282473331679484, + "loss": 0.7498, + "step": 980 + }, + { + "epoch": 0.34, + "grad_norm": 3.0917932987213135, + "learning_rate": 0.00028243612006946165, + "loss": 0.7558, + "step": 981 + }, + { + "epoch": 0.34, + "grad_norm": 3.050999164581299, + "learning_rate": 0.0002823989084594393, + "loss": 0.7646, + "step": 982 + }, + { + "epoch": 0.34, + "grad_norm": 4.006269454956055, + "learning_rate": 0.000282361696849417, + "loss": 0.5423, + "step": 983 + }, + { + "epoch": 0.34, + "grad_norm": 3.775635242462158, + "learning_rate": 0.0002823244852393947, + "loss": 0.5903, + "step": 984 + }, + { + "epoch": 0.35, + "grad_norm": 2.8768789768218994, + "learning_rate": 0.00028228727362937235, + "loss": 0.8378, + "step": 985 + }, + { + "epoch": 0.35, + "grad_norm": 5.105539321899414, + "learning_rate": 0.00028225006201935, + "loss": 0.7609, + "step": 986 + }, + { + "epoch": 0.35, + "grad_norm": 3.927220344543457, + "learning_rate": 0.00028221285040932765, + "loss": 0.5829, + "step": 987 + }, + { + "epoch": 0.35, + "grad_norm": 4.363030433654785, + "learning_rate": 0.00028217563879930535, + "loss": 0.5635, + "step": 988 + }, + { + "epoch": 0.35, + "grad_norm": 3.495129108428955, + "learning_rate": 0.00028213842718928305, + "loss": 0.8921, + "step": 989 + }, + { + "epoch": 0.35, + "grad_norm": 4.773660659790039, + "learning_rate": 0.0002821012155792607, + "loss": 1.5913, + "step": 990 + }, + { + "epoch": 0.35, + "grad_norm": 3.911088228225708, + "learning_rate": 0.0002820640039692384, + "loss": 1.5924, + "step": 991 + }, + { + "epoch": 0.35, + "grad_norm": 4.539827823638916, + "learning_rate": 0.00028202679235921605, + "loss": 0.8482, + "step": 992 + }, + { + "epoch": 0.35, + "grad_norm": 3.734384298324585, + "learning_rate": 0.0002819895807491937, + "loss": 0.8819, + "step": 993 + }, + { + "epoch": 0.35, + "grad_norm": 5.120278835296631, + "learning_rate": 0.0002819523691391714, + "loss": 0.8981, + "step": 994 + }, + { + "epoch": 0.35, + "grad_norm": 4.272894382476807, + "learning_rate": 0.00028191515752914905, + "loss": 0.7838, + "step": 995 + }, + { + "epoch": 0.35, + "grad_norm": 5.84804105758667, + "learning_rate": 0.00028187794591912676, + "loss": 0.46, + "step": 996 + }, + { + "epoch": 0.35, + "grad_norm": 9.430734634399414, + "learning_rate": 0.0002818407343091044, + "loss": 0.6851, + "step": 997 + }, + { + "epoch": 0.35, + "grad_norm": 3.4790775775909424, + "learning_rate": 0.0002818035226990821, + "loss": 0.6983, + "step": 998 + }, + { + "epoch": 0.35, + "grad_norm": 3.3269646167755127, + "learning_rate": 0.00028176631108905976, + "loss": 0.2755, + "step": 999 + }, + { + "epoch": 0.35, + "grad_norm": 9.969862937927246, + "learning_rate": 0.0002817290994790374, + "loss": 1.5394, + "step": 1000 + }, + { + "epoch": 0.35, + "eval_loss": 0.8975178599357605, + "eval_runtime": 51.1763, + "eval_samples_per_second": 42.363, + "eval_steps_per_second": 10.591, + "eval_wer": 0.7244421380384016, + "step": 1000 + }, + { + "epoch": 0.35, + "grad_norm": 8.002113342285156, + "learning_rate": 0.0002816918878690151, + "loss": 1.4136, + "step": 1001 + }, + { + "epoch": 0.35, + "grad_norm": 2.763845920562744, + "learning_rate": 0.0002816546762589928, + "loss": 0.6863, + "step": 1002 + }, + { + "epoch": 0.35, + "grad_norm": 3.5052077770233154, + "learning_rate": 0.00028161746464897046, + "loss": 1.2483, + "step": 1003 + }, + { + "epoch": 0.35, + "grad_norm": 3.184037685394287, + "learning_rate": 0.0002815802530389481, + "loss": 0.7632, + "step": 1004 + }, + { + "epoch": 0.35, + "grad_norm": 2.645569086074829, + "learning_rate": 0.0002815430414289258, + "loss": 0.5727, + "step": 1005 + }, + { + "epoch": 0.35, + "grad_norm": 2.4027628898620605, + "learning_rate": 0.00028150582981890346, + "loss": 0.5045, + "step": 1006 + }, + { + "epoch": 0.35, + "grad_norm": 3.2467432022094727, + "learning_rate": 0.00028146861820888116, + "loss": 0.4922, + "step": 1007 + }, + { + "epoch": 0.35, + "grad_norm": 3.64123272895813, + "learning_rate": 0.0002814314065988588, + "loss": 0.5161, + "step": 1008 + }, + { + "epoch": 0.35, + "grad_norm": 3.1751201152801514, + "learning_rate": 0.0002813941949888365, + "loss": 1.003, + "step": 1009 + }, + { + "epoch": 0.35, + "grad_norm": 3.5006515979766846, + "learning_rate": 0.00028135698337881416, + "loss": 0.6148, + "step": 1010 + }, + { + "epoch": 0.35, + "grad_norm": 2.372243881225586, + "learning_rate": 0.0002813197717687918, + "loss": 0.5718, + "step": 1011 + }, + { + "epoch": 0.35, + "grad_norm": 5.657968044281006, + "learning_rate": 0.0002812825601587695, + "loss": 1.1868, + "step": 1012 + }, + { + "epoch": 0.35, + "grad_norm": 4.172140598297119, + "learning_rate": 0.00028124534854874716, + "loss": 0.7887, + "step": 1013 + }, + { + "epoch": 0.36, + "grad_norm": 4.085210800170898, + "learning_rate": 0.00028120813693872487, + "loss": 0.9341, + "step": 1014 + }, + { + "epoch": 0.36, + "grad_norm": 2.6052396297454834, + "learning_rate": 0.00028117092532870257, + "loss": 0.6269, + "step": 1015 + }, + { + "epoch": 0.36, + "grad_norm": 5.70512580871582, + "learning_rate": 0.0002811337137186802, + "loss": 1.2359, + "step": 1016 + }, + { + "epoch": 0.36, + "grad_norm": 8.653592109680176, + "learning_rate": 0.00028109650210865787, + "loss": 1.07, + "step": 1017 + }, + { + "epoch": 0.36, + "grad_norm": 5.807731628417969, + "learning_rate": 0.0002810592904986355, + "loss": 0.8944, + "step": 1018 + }, + { + "epoch": 0.36, + "grad_norm": 3.5197043418884277, + "learning_rate": 0.0002810220788886132, + "loss": 0.9289, + "step": 1019 + }, + { + "epoch": 0.36, + "grad_norm": 3.4789111614227295, + "learning_rate": 0.0002809848672785909, + "loss": 0.9998, + "step": 1020 + }, + { + "epoch": 0.36, + "grad_norm": 5.096599102020264, + "learning_rate": 0.00028094765566856857, + "loss": 0.939, + "step": 1021 + }, + { + "epoch": 0.36, + "grad_norm": 3.344417095184326, + "learning_rate": 0.0002809104440585463, + "loss": 1.0038, + "step": 1022 + }, + { + "epoch": 0.36, + "grad_norm": 3.7382874488830566, + "learning_rate": 0.0002808732324485239, + "loss": 1.4407, + "step": 1023 + }, + { + "epoch": 0.36, + "grad_norm": 2.9947118759155273, + "learning_rate": 0.00028083602083850157, + "loss": 0.5359, + "step": 1024 + }, + { + "epoch": 0.36, + "grad_norm": 3.5792839527130127, + "learning_rate": 0.0002807988092284793, + "loss": 0.5672, + "step": 1025 + }, + { + "epoch": 0.36, + "grad_norm": 3.1083927154541016, + "learning_rate": 0.0002807615976184569, + "loss": 0.9157, + "step": 1026 + }, + { + "epoch": 0.36, + "grad_norm": 2.818694829940796, + "learning_rate": 0.0002807243860084346, + "loss": 0.9535, + "step": 1027 + }, + { + "epoch": 0.36, + "grad_norm": 3.34951114654541, + "learning_rate": 0.0002806871743984123, + "loss": 1.2982, + "step": 1028 + }, + { + "epoch": 0.36, + "grad_norm": 3.1415767669677734, + "learning_rate": 0.0002806499627883899, + "loss": 0.6431, + "step": 1029 + }, + { + "epoch": 0.36, + "grad_norm": 3.301403284072876, + "learning_rate": 0.0002806127511783676, + "loss": 1.0773, + "step": 1030 + }, + { + "epoch": 0.36, + "grad_norm": 3.992616653442383, + "learning_rate": 0.0002805755395683453, + "loss": 0.9013, + "step": 1031 + }, + { + "epoch": 0.36, + "grad_norm": 3.0520200729370117, + "learning_rate": 0.000280538327958323, + "loss": 0.5579, + "step": 1032 + }, + { + "epoch": 0.36, + "grad_norm": 2.932624340057373, + "learning_rate": 0.0002805011163483007, + "loss": 1.0126, + "step": 1033 + }, + { + "epoch": 0.36, + "grad_norm": 3.583620309829712, + "learning_rate": 0.00028046390473827833, + "loss": 0.5483, + "step": 1034 + }, + { + "epoch": 0.36, + "grad_norm": 3.1285436153411865, + "learning_rate": 0.000280426693128256, + "loss": 0.4759, + "step": 1035 + }, + { + "epoch": 0.36, + "grad_norm": 2.9035234451293945, + "learning_rate": 0.0002803894815182336, + "loss": 0.3353, + "step": 1036 + }, + { + "epoch": 0.36, + "grad_norm": 3.205357789993286, + "learning_rate": 0.00028035226990821133, + "loss": 1.0646, + "step": 1037 + }, + { + "epoch": 0.36, + "grad_norm": 4.966174602508545, + "learning_rate": 0.00028031505829818903, + "loss": 2.3098, + "step": 1038 + }, + { + "epoch": 0.36, + "grad_norm": 5.033513069152832, + "learning_rate": 0.0002802778466881667, + "loss": 0.9838, + "step": 1039 + }, + { + "epoch": 0.36, + "grad_norm": 3.171358823776245, + "learning_rate": 0.0002802406350781444, + "loss": 0.5542, + "step": 1040 + }, + { + "epoch": 0.36, + "grad_norm": 3.6895203590393066, + "learning_rate": 0.00028020342346812203, + "loss": 0.8648, + "step": 1041 + }, + { + "epoch": 0.37, + "grad_norm": 5.115908145904541, + "learning_rate": 0.0002801662118580997, + "loss": 0.8138, + "step": 1042 + }, + { + "epoch": 0.37, + "grad_norm": 4.41919469833374, + "learning_rate": 0.0002801290002480774, + "loss": 0.6559, + "step": 1043 + }, + { + "epoch": 0.37, + "grad_norm": 3.736941337585449, + "learning_rate": 0.00028009178863805503, + "loss": 0.7366, + "step": 1044 + }, + { + "epoch": 0.37, + "grad_norm": 4.1285319328308105, + "learning_rate": 0.00028005457702803274, + "loss": 0.5219, + "step": 1045 + }, + { + "epoch": 0.37, + "grad_norm": 4.831568241119385, + "learning_rate": 0.0002800173654180104, + "loss": 0.5053, + "step": 1046 + }, + { + "epoch": 0.37, + "grad_norm": 4.898710250854492, + "learning_rate": 0.0002799801538079881, + "loss": 1.0036, + "step": 1047 + }, + { + "epoch": 0.37, + "grad_norm": 5.523413181304932, + "learning_rate": 0.00027994294219796574, + "loss": 1.398, + "step": 1048 + }, + { + "epoch": 0.37, + "grad_norm": 3.2347447872161865, + "learning_rate": 0.0002799057305879434, + "loss": 0.4556, + "step": 1049 + }, + { + "epoch": 0.37, + "grad_norm": 5.337017059326172, + "learning_rate": 0.0002798685189779211, + "loss": 1.328, + "step": 1050 + }, + { + "epoch": 0.37, + "grad_norm": 4.164899826049805, + "learning_rate": 0.0002798313073678988, + "loss": 1.2521, + "step": 1051 + }, + { + "epoch": 0.37, + "grad_norm": 2.6211090087890625, + "learning_rate": 0.00027979409575787644, + "loss": 1.0136, + "step": 1052 + }, + { + "epoch": 0.37, + "grad_norm": 2.821180820465088, + "learning_rate": 0.0002797568841478541, + "loss": 0.9512, + "step": 1053 + }, + { + "epoch": 0.37, + "grad_norm": 3.4610302448272705, + "learning_rate": 0.00027971967253783174, + "loss": 0.7071, + "step": 1054 + }, + { + "epoch": 0.37, + "grad_norm": 2.7207629680633545, + "learning_rate": 0.00027968246092780944, + "loss": 0.6617, + "step": 1055 + }, + { + "epoch": 0.37, + "grad_norm": 4.068587779998779, + "learning_rate": 0.00027964524931778714, + "loss": 0.7244, + "step": 1056 + }, + { + "epoch": 0.37, + "grad_norm": 3.698821544647217, + "learning_rate": 0.0002796080377077648, + "loss": 0.8169, + "step": 1057 + }, + { + "epoch": 0.37, + "grad_norm": 2.56290864944458, + "learning_rate": 0.0002795708260977425, + "loss": 0.5039, + "step": 1058 + }, + { + "epoch": 0.37, + "grad_norm": 3.410244941711426, + "learning_rate": 0.00027953361448772014, + "loss": 0.7284, + "step": 1059 + }, + { + "epoch": 0.37, + "grad_norm": 2.4827516078948975, + "learning_rate": 0.0002794964028776978, + "loss": 0.675, + "step": 1060 + }, + { + "epoch": 0.37, + "grad_norm": 2.349926471710205, + "learning_rate": 0.0002794591912676755, + "loss": 0.3923, + "step": 1061 + }, + { + "epoch": 0.37, + "grad_norm": 4.622032165527344, + "learning_rate": 0.00027942197965765314, + "loss": 0.7249, + "step": 1062 + }, + { + "epoch": 0.37, + "grad_norm": 4.842290878295898, + "learning_rate": 0.00027938476804763085, + "loss": 0.5359, + "step": 1063 + }, + { + "epoch": 0.37, + "grad_norm": 3.4208180904388428, + "learning_rate": 0.00027934755643760855, + "loss": 1.2509, + "step": 1064 + }, + { + "epoch": 0.37, + "grad_norm": 3.2167344093322754, + "learning_rate": 0.0002793103448275862, + "loss": 0.3897, + "step": 1065 + }, + { + "epoch": 0.37, + "grad_norm": 6.12382698059082, + "learning_rate": 0.00027927313321756385, + "loss": 0.8148, + "step": 1066 + }, + { + "epoch": 0.37, + "grad_norm": 3.50907301902771, + "learning_rate": 0.0002792359216075415, + "loss": 0.3775, + "step": 1067 + }, + { + "epoch": 0.37, + "grad_norm": 4.680488109588623, + "learning_rate": 0.0002791987099975192, + "loss": 0.7939, + "step": 1068 + }, + { + "epoch": 0.37, + "grad_norm": 5.077605247497559, + "learning_rate": 0.0002791614983874969, + "loss": 0.8747, + "step": 1069 + }, + { + "epoch": 0.37, + "grad_norm": 8.638588905334473, + "learning_rate": 0.00027912428677747455, + "loss": 1.3101, + "step": 1070 + }, + { + "epoch": 0.38, + "grad_norm": 6.577147006988525, + "learning_rate": 0.0002790870751674522, + "loss": 2.0627, + "step": 1071 + }, + { + "epoch": 0.38, + "grad_norm": 5.956910133361816, + "learning_rate": 0.0002790498635574299, + "loss": 1.1331, + "step": 1072 + }, + { + "epoch": 0.38, + "grad_norm": 8.133764266967773, + "learning_rate": 0.00027901265194740755, + "loss": 1.0287, + "step": 1073 + }, + { + "epoch": 0.38, + "grad_norm": 11.1182279586792, + "learning_rate": 0.00027897544033738525, + "loss": 1.0364, + "step": 1074 + }, + { + "epoch": 0.38, + "grad_norm": 3.266451835632324, + "learning_rate": 0.0002789382287273629, + "loss": 0.6508, + "step": 1075 + }, + { + "epoch": 0.38, + "grad_norm": 14.238346099853516, + "learning_rate": 0.0002789010171173406, + "loss": 2.2819, + "step": 1076 + }, + { + "epoch": 0.38, + "grad_norm": 3.7446582317352295, + "learning_rate": 0.00027886380550731825, + "loss": 0.922, + "step": 1077 + }, + { + "epoch": 0.38, + "grad_norm": 4.616486072540283, + "learning_rate": 0.0002788265938972959, + "loss": 1.1808, + "step": 1078 + }, + { + "epoch": 0.38, + "grad_norm": 3.3517651557922363, + "learning_rate": 0.0002787893822872736, + "loss": 1.3126, + "step": 1079 + }, + { + "epoch": 0.38, + "grad_norm": 2.9327385425567627, + "learning_rate": 0.00027875217067725125, + "loss": 0.805, + "step": 1080 + }, + { + "epoch": 0.38, + "grad_norm": 5.411552906036377, + "learning_rate": 0.00027871495906722896, + "loss": 0.8779, + "step": 1081 + }, + { + "epoch": 0.38, + "grad_norm": 4.122033596038818, + "learning_rate": 0.00027867774745720666, + "loss": 1.3039, + "step": 1082 + }, + { + "epoch": 0.38, + "grad_norm": 3.9304003715515137, + "learning_rate": 0.0002786405358471843, + "loss": 1.1372, + "step": 1083 + }, + { + "epoch": 0.38, + "grad_norm": 3.756955862045288, + "learning_rate": 0.00027860332423716196, + "loss": 1.0457, + "step": 1084 + }, + { + "epoch": 0.38, + "grad_norm": 4.7756242752075195, + "learning_rate": 0.00027856611262713966, + "loss": 0.8883, + "step": 1085 + }, + { + "epoch": 0.38, + "grad_norm": 5.649970054626465, + "learning_rate": 0.0002785289010171173, + "loss": 2.0304, + "step": 1086 + }, + { + "epoch": 0.38, + "grad_norm": 4.984282970428467, + "learning_rate": 0.000278491689407095, + "loss": 1.1931, + "step": 1087 + }, + { + "epoch": 0.38, + "grad_norm": 4.8974289894104, + "learning_rate": 0.00027845447779707266, + "loss": 0.7087, + "step": 1088 + }, + { + "epoch": 0.38, + "grad_norm": 3.27323579788208, + "learning_rate": 0.00027841726618705036, + "loss": 0.4356, + "step": 1089 + }, + { + "epoch": 0.38, + "grad_norm": 6.953647136688232, + "learning_rate": 0.000278380054577028, + "loss": 1.3964, + "step": 1090 + }, + { + "epoch": 0.38, + "grad_norm": 2.8305001258850098, + "learning_rate": 0.00027834284296700566, + "loss": 0.6397, + "step": 1091 + }, + { + "epoch": 0.38, + "grad_norm": 5.174875736236572, + "learning_rate": 0.00027830563135698336, + "loss": 0.6587, + "step": 1092 + }, + { + "epoch": 0.38, + "grad_norm": 3.3889546394348145, + "learning_rate": 0.000278268419746961, + "loss": 0.6804, + "step": 1093 + }, + { + "epoch": 0.38, + "grad_norm": 6.435478210449219, + "learning_rate": 0.0002782312081369387, + "loss": 1.0459, + "step": 1094 + }, + { + "epoch": 0.38, + "grad_norm": 9.19431209564209, + "learning_rate": 0.00027819399652691636, + "loss": 1.5923, + "step": 1095 + }, + { + "epoch": 0.38, + "grad_norm": 2.4626758098602295, + "learning_rate": 0.000278156784916894, + "loss": 0.7921, + "step": 1096 + }, + { + "epoch": 0.38, + "grad_norm": 5.5386457443237305, + "learning_rate": 0.0002781195733068717, + "loss": 0.9378, + "step": 1097 + }, + { + "epoch": 0.38, + "grad_norm": 4.50201940536499, + "learning_rate": 0.00027808236169684936, + "loss": 1.7841, + "step": 1098 + }, + { + "epoch": 0.39, + "grad_norm": 3.9906821250915527, + "learning_rate": 0.00027804515008682707, + "loss": 0.7195, + "step": 1099 + }, + { + "epoch": 0.39, + "grad_norm": 6.178668022155762, + "learning_rate": 0.00027800793847680477, + "loss": 0.5866, + "step": 1100 + }, + { + "epoch": 0.39, + "eval_loss": 0.858989417552948, + "eval_runtime": 50.9765, + "eval_samples_per_second": 42.529, + "eval_steps_per_second": 10.632, + "eval_wer": 0.7215014703338523, + "step": 1100 + }, + { + "epoch": 0.39, + "grad_norm": 4.665261268615723, + "learning_rate": 0.0002779707268667824, + "loss": 1.8917, + "step": 1101 + }, + { + "epoch": 0.39, + "grad_norm": 2.5688321590423584, + "learning_rate": 0.00027793351525676007, + "loss": 1.1618, + "step": 1102 + }, + { + "epoch": 0.39, + "grad_norm": 2.149432420730591, + "learning_rate": 0.00027789630364673777, + "loss": 0.7372, + "step": 1103 + }, + { + "epoch": 0.39, + "grad_norm": 2.077228307723999, + "learning_rate": 0.0002778590920367154, + "loss": 0.7426, + "step": 1104 + }, + { + "epoch": 0.39, + "grad_norm": 2.8044614791870117, + "learning_rate": 0.0002778218804266931, + "loss": 1.0178, + "step": 1105 + }, + { + "epoch": 0.39, + "grad_norm": 3.4295260906219482, + "learning_rate": 0.00027778466881667077, + "loss": 0.6801, + "step": 1106 + }, + { + "epoch": 0.39, + "grad_norm": 3.1818017959594727, + "learning_rate": 0.00027774745720664847, + "loss": 0.5587, + "step": 1107 + }, + { + "epoch": 0.39, + "grad_norm": 3.3339715003967285, + "learning_rate": 0.0002777102455966261, + "loss": 0.7365, + "step": 1108 + }, + { + "epoch": 0.39, + "grad_norm": 4.128452301025391, + "learning_rate": 0.00027767303398660377, + "loss": 1.1199, + "step": 1109 + }, + { + "epoch": 0.39, + "grad_norm": 3.8575356006622314, + "learning_rate": 0.00027763582237658147, + "loss": 1.1287, + "step": 1110 + }, + { + "epoch": 0.39, + "grad_norm": 3.410104751586914, + "learning_rate": 0.0002775986107665591, + "loss": 0.6495, + "step": 1111 + }, + { + "epoch": 0.39, + "grad_norm": 4.475276947021484, + "learning_rate": 0.0002775613991565368, + "loss": 0.7845, + "step": 1112 + }, + { + "epoch": 0.39, + "grad_norm": 2.5406651496887207, + "learning_rate": 0.00027752418754651447, + "loss": 0.476, + "step": 1113 + }, + { + "epoch": 0.39, + "grad_norm": 3.599978446960449, + "learning_rate": 0.0002774869759364922, + "loss": 0.9411, + "step": 1114 + }, + { + "epoch": 0.39, + "grad_norm": 2.8049709796905518, + "learning_rate": 0.0002774497643264698, + "loss": 0.7746, + "step": 1115 + }, + { + "epoch": 0.39, + "grad_norm": 5.1293182373046875, + "learning_rate": 0.00027741255271644753, + "loss": 1.4263, + "step": 1116 + }, + { + "epoch": 0.39, + "grad_norm": 4.558226585388184, + "learning_rate": 0.0002773753411064252, + "loss": 0.8449, + "step": 1117 + }, + { + "epoch": 0.39, + "grad_norm": 3.3060882091522217, + "learning_rate": 0.0002773381294964029, + "loss": 0.7272, + "step": 1118 + }, + { + "epoch": 0.39, + "grad_norm": 6.07926607131958, + "learning_rate": 0.00027730091788638053, + "loss": 0.9118, + "step": 1119 + }, + { + "epoch": 0.39, + "grad_norm": 4.133337020874023, + "learning_rate": 0.0002772637062763582, + "loss": 0.483, + "step": 1120 + }, + { + "epoch": 0.39, + "grad_norm": 5.365363121032715, + "learning_rate": 0.0002772264946663359, + "loss": 0.8083, + "step": 1121 + }, + { + "epoch": 0.39, + "grad_norm": 3.9955897331237793, + "learning_rate": 0.00027718928305631353, + "loss": 1.1863, + "step": 1122 + }, + { + "epoch": 0.39, + "grad_norm": 3.5983023643493652, + "learning_rate": 0.00027715207144629123, + "loss": 0.7021, + "step": 1123 + }, + { + "epoch": 0.39, + "grad_norm": 3.3443710803985596, + "learning_rate": 0.0002771148598362689, + "loss": 0.6462, + "step": 1124 + }, + { + "epoch": 0.39, + "grad_norm": 3.6084046363830566, + "learning_rate": 0.0002770776482262466, + "loss": 0.723, + "step": 1125 + }, + { + "epoch": 0.39, + "grad_norm": 3.003911256790161, + "learning_rate": 0.00027704043661622423, + "loss": 1.1826, + "step": 1126 + }, + { + "epoch": 0.39, + "grad_norm": 3.219900608062744, + "learning_rate": 0.0002770032250062019, + "loss": 1.0835, + "step": 1127 + }, + { + "epoch": 0.4, + "grad_norm": 2.942505121231079, + "learning_rate": 0.0002769660133961796, + "loss": 0.9503, + "step": 1128 + }, + { + "epoch": 0.4, + "grad_norm": 3.2380802631378174, + "learning_rate": 0.0002769288017861573, + "loss": 0.8017, + "step": 1129 + }, + { + "epoch": 0.4, + "grad_norm": 2.9053637981414795, + "learning_rate": 0.00027689159017613493, + "loss": 0.6966, + "step": 1130 + }, + { + "epoch": 0.4, + "grad_norm": 3.158212900161743, + "learning_rate": 0.00027685437856611264, + "loss": 0.4872, + "step": 1131 + }, + { + "epoch": 0.4, + "grad_norm": 3.566776990890503, + "learning_rate": 0.0002768171669560903, + "loss": 0.7114, + "step": 1132 + }, + { + "epoch": 0.4, + "grad_norm": 2.0422821044921875, + "learning_rate": 0.00027677995534606793, + "loss": 0.6142, + "step": 1133 + }, + { + "epoch": 0.4, + "grad_norm": 1.843910813331604, + "learning_rate": 0.00027674274373604564, + "loss": 0.4941, + "step": 1134 + }, + { + "epoch": 0.4, + "grad_norm": 4.197638034820557, + "learning_rate": 0.0002767055321260233, + "loss": 1.1024, + "step": 1135 + }, + { + "epoch": 0.4, + "grad_norm": 2.642378568649292, + "learning_rate": 0.000276668320516001, + "loss": 0.5026, + "step": 1136 + }, + { + "epoch": 0.4, + "grad_norm": 4.609485626220703, + "learning_rate": 0.00027663110890597864, + "loss": 1.2151, + "step": 1137 + }, + { + "epoch": 0.4, + "grad_norm": 2.2409887313842773, + "learning_rate": 0.0002765938972959563, + "loss": 0.3389, + "step": 1138 + }, + { + "epoch": 0.4, + "grad_norm": 4.53881311416626, + "learning_rate": 0.000276556685685934, + "loss": 0.9275, + "step": 1139 + }, + { + "epoch": 0.4, + "grad_norm": 5.820657730102539, + "learning_rate": 0.00027651947407591164, + "loss": 0.7122, + "step": 1140 + }, + { + "epoch": 0.4, + "grad_norm": 4.550056457519531, + "learning_rate": 0.00027648226246588934, + "loss": 1.9592, + "step": 1141 + }, + { + "epoch": 0.4, + "grad_norm": 3.5938735008239746, + "learning_rate": 0.000276445050855867, + "loss": 0.765, + "step": 1142 + }, + { + "epoch": 0.4, + "grad_norm": 3.5738070011138916, + "learning_rate": 0.0002764078392458447, + "loss": 0.6004, + "step": 1143 + }, + { + "epoch": 0.4, + "grad_norm": 3.4456381797790527, + "learning_rate": 0.00027637062763582234, + "loss": 0.5931, + "step": 1144 + }, + { + "epoch": 0.4, + "grad_norm": 7.033105850219727, + "learning_rate": 0.0002763334160258, + "loss": 0.8942, + "step": 1145 + }, + { + "epoch": 0.4, + "grad_norm": 3.166602611541748, + "learning_rate": 0.0002762962044157777, + "loss": 0.5482, + "step": 1146 + }, + { + "epoch": 0.4, + "grad_norm": 4.609311103820801, + "learning_rate": 0.0002762589928057554, + "loss": 0.9272, + "step": 1147 + }, + { + "epoch": 0.4, + "grad_norm": 3.320896625518799, + "learning_rate": 0.00027622178119573304, + "loss": 0.6604, + "step": 1148 + }, + { + "epoch": 0.4, + "grad_norm": 6.4265875816345215, + "learning_rate": 0.00027618456958571075, + "loss": 1.238, + "step": 1149 + }, + { + "epoch": 0.4, + "grad_norm": 8.044411659240723, + "learning_rate": 0.0002761473579756884, + "loss": 0.9712, + "step": 1150 + }, + { + "epoch": 0.4, + "grad_norm": 7.890890121459961, + "learning_rate": 0.00027611014636566604, + "loss": 2.561, + "step": 1151 + }, + { + "epoch": 0.4, + "grad_norm": 3.3083295822143555, + "learning_rate": 0.00027607293475564375, + "loss": 1.3886, + "step": 1152 + }, + { + "epoch": 0.4, + "grad_norm": 3.2230350971221924, + "learning_rate": 0.0002760357231456214, + "loss": 0.9359, + "step": 1153 + }, + { + "epoch": 0.4, + "grad_norm": 4.595873832702637, + "learning_rate": 0.0002759985115355991, + "loss": 1.0819, + "step": 1154 + }, + { + "epoch": 0.4, + "grad_norm": 3.82143497467041, + "learning_rate": 0.00027596129992557675, + "loss": 0.9526, + "step": 1155 + }, + { + "epoch": 0.41, + "grad_norm": 3.152099132537842, + "learning_rate": 0.00027592408831555445, + "loss": 0.5482, + "step": 1156 + }, + { + "epoch": 0.41, + "grad_norm": 2.9585154056549072, + "learning_rate": 0.0002758868767055321, + "loss": 1.1054, + "step": 1157 + }, + { + "epoch": 0.41, + "grad_norm": 2.825305700302124, + "learning_rate": 0.00027584966509550975, + "loss": 0.7547, + "step": 1158 + }, + { + "epoch": 0.41, + "grad_norm": 2.47904634475708, + "learning_rate": 0.00027581245348548745, + "loss": 0.7835, + "step": 1159 + }, + { + "epoch": 0.41, + "grad_norm": 2.429239511489868, + "learning_rate": 0.00027577524187546515, + "loss": 1.1264, + "step": 1160 + }, + { + "epoch": 0.41, + "grad_norm": 3.2548816204071045, + "learning_rate": 0.0002757380302654428, + "loss": 0.7843, + "step": 1161 + }, + { + "epoch": 0.41, + "grad_norm": 5.171494007110596, + "learning_rate": 0.00027570081865542045, + "loss": 0.8388, + "step": 1162 + }, + { + "epoch": 0.41, + "grad_norm": 4.407280921936035, + "learning_rate": 0.00027566360704539815, + "loss": 0.5067, + "step": 1163 + }, + { + "epoch": 0.41, + "grad_norm": 2.2985994815826416, + "learning_rate": 0.0002756263954353758, + "loss": 0.7189, + "step": 1164 + }, + { + "epoch": 0.41, + "grad_norm": 3.708470106124878, + "learning_rate": 0.0002755891838253535, + "loss": 0.7342, + "step": 1165 + }, + { + "epoch": 0.41, + "grad_norm": 3.204012393951416, + "learning_rate": 0.00027555197221533115, + "loss": 0.5299, + "step": 1166 + }, + { + "epoch": 0.41, + "grad_norm": 4.998049259185791, + "learning_rate": 0.00027551476060530886, + "loss": 0.3688, + "step": 1167 + }, + { + "epoch": 0.41, + "grad_norm": 2.720385789871216, + "learning_rate": 0.0002754775489952865, + "loss": 0.7139, + "step": 1168 + }, + { + "epoch": 0.41, + "grad_norm": 4.8262248039245605, + "learning_rate": 0.00027544033738526415, + "loss": 0.4921, + "step": 1169 + }, + { + "epoch": 0.41, + "grad_norm": 3.7257118225097656, + "learning_rate": 0.00027540312577524186, + "loss": 0.7639, + "step": 1170 + }, + { + "epoch": 0.41, + "grad_norm": 5.001168251037598, + "learning_rate": 0.0002753659141652195, + "loss": 0.6115, + "step": 1171 + }, + { + "epoch": 0.41, + "grad_norm": 3.162776470184326, + "learning_rate": 0.0002753287025551972, + "loss": 0.2777, + "step": 1172 + }, + { + "epoch": 0.41, + "grad_norm": 5.089084148406982, + "learning_rate": 0.0002752914909451749, + "loss": 1.4753, + "step": 1173 + }, + { + "epoch": 0.41, + "grad_norm": 2.25437068939209, + "learning_rate": 0.00027525427933515256, + "loss": 0.2112, + "step": 1174 + }, + { + "epoch": 0.41, + "grad_norm": 5.870993137359619, + "learning_rate": 0.0002752170677251302, + "loss": 0.6987, + "step": 1175 + }, + { + "epoch": 0.41, + "grad_norm": 3.5580074787139893, + "learning_rate": 0.00027517985611510786, + "loss": 0.8806, + "step": 1176 + }, + { + "epoch": 0.41, + "grad_norm": 5.758315563201904, + "learning_rate": 0.00027514264450508556, + "loss": 1.0525, + "step": 1177 + }, + { + "epoch": 0.41, + "grad_norm": 3.0043349266052246, + "learning_rate": 0.00027510543289506326, + "loss": 1.0657, + "step": 1178 + }, + { + "epoch": 0.41, + "grad_norm": 4.536125183105469, + "learning_rate": 0.0002750682212850409, + "loss": 1.1375, + "step": 1179 + }, + { + "epoch": 0.41, + "grad_norm": 22.37661361694336, + "learning_rate": 0.00027503100967501856, + "loss": 4.7936, + "step": 1180 + }, + { + "epoch": 0.41, + "grad_norm": 3.035590171813965, + "learning_rate": 0.00027499379806499626, + "loss": 0.9853, + "step": 1181 + }, + { + "epoch": 0.41, + "grad_norm": 2.8873214721679688, + "learning_rate": 0.0002749565864549739, + "loss": 0.5476, + "step": 1182 + }, + { + "epoch": 0.41, + "grad_norm": 2.164041042327881, + "learning_rate": 0.0002749193748449516, + "loss": 0.3647, + "step": 1183 + }, + { + "epoch": 0.41, + "grad_norm": 3.414952039718628, + "learning_rate": 0.00027488216323492926, + "loss": 1.0755, + "step": 1184 + }, + { + "epoch": 0.42, + "grad_norm": 4.234614849090576, + "learning_rate": 0.00027484495162490697, + "loss": 1.3185, + "step": 1185 + }, + { + "epoch": 0.42, + "grad_norm": 2.8845980167388916, + "learning_rate": 0.0002748077400148846, + "loss": 0.7329, + "step": 1186 + }, + { + "epoch": 0.42, + "grad_norm": 2.165452718734741, + "learning_rate": 0.00027477052840486226, + "loss": 0.4976, + "step": 1187 + }, + { + "epoch": 0.42, + "grad_norm": 3.6230342388153076, + "learning_rate": 0.00027473331679483997, + "loss": 1.2895, + "step": 1188 + }, + { + "epoch": 0.42, + "grad_norm": 2.947673797607422, + "learning_rate": 0.0002746961051848176, + "loss": 0.7188, + "step": 1189 + }, + { + "epoch": 0.42, + "grad_norm": 3.158074140548706, + "learning_rate": 0.0002746588935747953, + "loss": 0.608, + "step": 1190 + }, + { + "epoch": 0.42, + "grad_norm": 2.541929006576538, + "learning_rate": 0.000274621681964773, + "loss": 0.6903, + "step": 1191 + }, + { + "epoch": 0.42, + "grad_norm": 2.1511118412017822, + "learning_rate": 0.00027458447035475067, + "loss": 0.3223, + "step": 1192 + }, + { + "epoch": 0.42, + "grad_norm": 4.088685989379883, + "learning_rate": 0.0002745472587447283, + "loss": 0.9799, + "step": 1193 + }, + { + "epoch": 0.42, + "grad_norm": 4.290481090545654, + "learning_rate": 0.00027451004713470597, + "loss": 0.7953, + "step": 1194 + }, + { + "epoch": 0.42, + "grad_norm": 4.309513092041016, + "learning_rate": 0.00027447283552468367, + "loss": 0.7685, + "step": 1195 + }, + { + "epoch": 0.42, + "grad_norm": 3.8015859127044678, + "learning_rate": 0.0002744356239146614, + "loss": 0.5714, + "step": 1196 + }, + { + "epoch": 0.42, + "grad_norm": 5.4302568435668945, + "learning_rate": 0.000274398412304639, + "loss": 0.6369, + "step": 1197 + }, + { + "epoch": 0.42, + "grad_norm": 4.336185932159424, + "learning_rate": 0.0002743612006946167, + "loss": 0.3784, + "step": 1198 + }, + { + "epoch": 0.42, + "grad_norm": 3.980928659439087, + "learning_rate": 0.0002743239890845944, + "loss": 0.5137, + "step": 1199 + }, + { + "epoch": 0.42, + "grad_norm": 7.486271381378174, + "learning_rate": 0.000274286777474572, + "loss": 0.4206, + "step": 1200 + }, + { + "epoch": 0.42, + "eval_loss": 0.7752430438995361, + "eval_runtime": 49.918, + "eval_samples_per_second": 43.431, + "eval_steps_per_second": 10.858, + "eval_wer": 0.6498875627054143, + "step": 1200 + }, + { + "epoch": 0.42, + "grad_norm": 3.420276641845703, + "learning_rate": 0.0002742495658645497, + "loss": 1.3815, + "step": 1201 + }, + { + "epoch": 0.42, + "grad_norm": 3.3125431537628174, + "learning_rate": 0.0002742123542545274, + "loss": 1.3345, + "step": 1202 + }, + { + "epoch": 0.42, + "grad_norm": 6.291903495788574, + "learning_rate": 0.0002741751426445051, + "loss": 1.1505, + "step": 1203 + }, + { + "epoch": 0.42, + "grad_norm": 3.3073489665985107, + "learning_rate": 0.0002741379310344827, + "loss": 0.5659, + "step": 1204 + }, + { + "epoch": 0.42, + "grad_norm": 2.7754597663879395, + "learning_rate": 0.00027410071942446043, + "loss": 0.8152, + "step": 1205 + }, + { + "epoch": 0.42, + "grad_norm": 2.6730542182922363, + "learning_rate": 0.0002740635078144381, + "loss": 1.41, + "step": 1206 + }, + { + "epoch": 0.42, + "grad_norm": 4.539772987365723, + "learning_rate": 0.0002740262962044157, + "loss": 1.0475, + "step": 1207 + }, + { + "epoch": 0.42, + "grad_norm": 2.224508762359619, + "learning_rate": 0.00027398908459439343, + "loss": 0.5957, + "step": 1208 + }, + { + "epoch": 0.42, + "grad_norm": 2.530787467956543, + "learning_rate": 0.00027395187298437113, + "loss": 0.6799, + "step": 1209 + }, + { + "epoch": 0.42, + "grad_norm": 2.891000509262085, + "learning_rate": 0.0002739146613743488, + "loss": 1.2861, + "step": 1210 + }, + { + "epoch": 0.42, + "grad_norm": 3.0431253910064697, + "learning_rate": 0.00027387744976432643, + "loss": 0.5401, + "step": 1211 + }, + { + "epoch": 0.42, + "grad_norm": 2.738537549972534, + "learning_rate": 0.0002738402381543041, + "loss": 0.8909, + "step": 1212 + }, + { + "epoch": 0.43, + "grad_norm": 4.205295085906982, + "learning_rate": 0.0002738030265442818, + "loss": 0.8714, + "step": 1213 + }, + { + "epoch": 0.43, + "grad_norm": 2.5161707401275635, + "learning_rate": 0.0002737658149342595, + "loss": 0.5855, + "step": 1214 + }, + { + "epoch": 0.43, + "grad_norm": 3.841719150543213, + "learning_rate": 0.00027372860332423713, + "loss": 0.9467, + "step": 1215 + }, + { + "epoch": 0.43, + "grad_norm": 3.4140450954437256, + "learning_rate": 0.00027369139171421484, + "loss": 1.0405, + "step": 1216 + }, + { + "epoch": 0.43, + "grad_norm": 5.440001487731934, + "learning_rate": 0.0002736541801041925, + "loss": 0.5273, + "step": 1217 + }, + { + "epoch": 0.43, + "grad_norm": 3.5466346740722656, + "learning_rate": 0.00027361696849417013, + "loss": 0.8691, + "step": 1218 + }, + { + "epoch": 0.43, + "grad_norm": 4.198976516723633, + "learning_rate": 0.00027357975688414784, + "loss": 0.9419, + "step": 1219 + }, + { + "epoch": 0.43, + "grad_norm": 3.357349395751953, + "learning_rate": 0.0002735425452741255, + "loss": 0.7402, + "step": 1220 + }, + { + "epoch": 0.43, + "grad_norm": 5.02566385269165, + "learning_rate": 0.0002735053336641032, + "loss": 0.7176, + "step": 1221 + }, + { + "epoch": 0.43, + "grad_norm": 6.563841819763184, + "learning_rate": 0.00027346812205408084, + "loss": 2.2978, + "step": 1222 + }, + { + "epoch": 0.43, + "grad_norm": 3.9487085342407227, + "learning_rate": 0.00027343091044405854, + "loss": 0.6593, + "step": 1223 + }, + { + "epoch": 0.43, + "grad_norm": 4.61473274230957, + "learning_rate": 0.0002733936988340362, + "loss": 1.9731, + "step": 1224 + }, + { + "epoch": 0.43, + "grad_norm": 5.610717296600342, + "learning_rate": 0.00027335648722401384, + "loss": 2.6307, + "step": 1225 + }, + { + "epoch": 0.43, + "grad_norm": 4.044619083404541, + "learning_rate": 0.00027331927561399154, + "loss": 1.2597, + "step": 1226 + }, + { + "epoch": 0.43, + "grad_norm": 4.211724758148193, + "learning_rate": 0.00027328206400396924, + "loss": 1.6048, + "step": 1227 + }, + { + "epoch": 0.43, + "grad_norm": 2.5721964836120605, + "learning_rate": 0.0002732448523939469, + "loss": 0.686, + "step": 1228 + }, + { + "epoch": 0.43, + "grad_norm": 2.3949391841888428, + "learning_rate": 0.00027320764078392454, + "loss": 0.6873, + "step": 1229 + }, + { + "epoch": 0.43, + "grad_norm": 18.452733993530273, + "learning_rate": 0.00027317042917390224, + "loss": 3.4468, + "step": 1230 + }, + { + "epoch": 0.43, + "grad_norm": 3.9526307582855225, + "learning_rate": 0.0002731332175638799, + "loss": 0.6906, + "step": 1231 + }, + { + "epoch": 0.43, + "grad_norm": 4.902743339538574, + "learning_rate": 0.0002730960059538576, + "loss": 0.5812, + "step": 1232 + }, + { + "epoch": 0.43, + "grad_norm": 3.806962013244629, + "learning_rate": 0.00027305879434383524, + "loss": 0.6684, + "step": 1233 + }, + { + "epoch": 0.43, + "grad_norm": 4.608047008514404, + "learning_rate": 0.00027302158273381295, + "loss": 0.8382, + "step": 1234 + }, + { + "epoch": 0.43, + "grad_norm": 8.201254844665527, + "learning_rate": 0.0002729843711237906, + "loss": 0.645, + "step": 1235 + }, + { + "epoch": 0.43, + "grad_norm": 3.0138301849365234, + "learning_rate": 0.00027294715951376824, + "loss": 0.7516, + "step": 1236 + }, + { + "epoch": 0.43, + "grad_norm": 4.389562606811523, + "learning_rate": 0.00027290994790374595, + "loss": 1.2125, + "step": 1237 + }, + { + "epoch": 0.43, + "grad_norm": 2.5127413272857666, + "learning_rate": 0.0002728727362937236, + "loss": 0.4319, + "step": 1238 + }, + { + "epoch": 0.43, + "grad_norm": 3.1524224281311035, + "learning_rate": 0.0002728355246837013, + "loss": 0.586, + "step": 1239 + }, + { + "epoch": 0.43, + "grad_norm": 5.574815273284912, + "learning_rate": 0.000272798313073679, + "loss": 0.8344, + "step": 1240 + }, + { + "epoch": 0.43, + "grad_norm": 4.405158996582031, + "learning_rate": 0.00027276110146365665, + "loss": 0.8623, + "step": 1241 + }, + { + "epoch": 0.44, + "grad_norm": 3.2911202907562256, + "learning_rate": 0.0002727238898536343, + "loss": 1.0748, + "step": 1242 + }, + { + "epoch": 0.44, + "grad_norm": 4.5247392654418945, + "learning_rate": 0.00027268667824361195, + "loss": 0.5555, + "step": 1243 + }, + { + "epoch": 0.44, + "grad_norm": 4.708747386932373, + "learning_rate": 0.00027264946663358965, + "loss": 0.988, + "step": 1244 + }, + { + "epoch": 0.44, + "grad_norm": 3.7301928997039795, + "learning_rate": 0.00027261225502356735, + "loss": 0.7501, + "step": 1245 + }, + { + "epoch": 0.44, + "grad_norm": 4.694626331329346, + "learning_rate": 0.000272575043413545, + "loss": 1.3788, + "step": 1246 + }, + { + "epoch": 0.44, + "grad_norm": 4.803793907165527, + "learning_rate": 0.0002725378318035227, + "loss": 0.8157, + "step": 1247 + }, + { + "epoch": 0.44, + "grad_norm": 3.017162561416626, + "learning_rate": 0.00027250062019350035, + "loss": 0.7995, + "step": 1248 + }, + { + "epoch": 0.44, + "grad_norm": 3.8873093128204346, + "learning_rate": 0.000272463408583478, + "loss": 1.574, + "step": 1249 + }, + { + "epoch": 0.44, + "grad_norm": 2.3652150630950928, + "learning_rate": 0.0002724261969734557, + "loss": 0.3168, + "step": 1250 + }, + { + "epoch": 0.44, + "grad_norm": 4.411014556884766, + "learning_rate": 0.00027238898536343335, + "loss": 1.3121, + "step": 1251 + }, + { + "epoch": 0.44, + "grad_norm": 2.4930648803710938, + "learning_rate": 0.00027235177375341106, + "loss": 0.7999, + "step": 1252 + }, + { + "epoch": 0.44, + "grad_norm": 5.38267183303833, + "learning_rate": 0.0002723145621433887, + "loss": 0.9561, + "step": 1253 + }, + { + "epoch": 0.44, + "grad_norm": 2.609616756439209, + "learning_rate": 0.00027227735053336635, + "loss": 0.8172, + "step": 1254 + }, + { + "epoch": 0.44, + "grad_norm": 2.3476998805999756, + "learning_rate": 0.00027224013892334406, + "loss": 1.4765, + "step": 1255 + }, + { + "epoch": 0.44, + "grad_norm": 2.823747158050537, + "learning_rate": 0.0002722029273133217, + "loss": 0.5941, + "step": 1256 + }, + { + "epoch": 0.44, + "grad_norm": 2.738922595977783, + "learning_rate": 0.0002721657157032994, + "loss": 0.8431, + "step": 1257 + }, + { + "epoch": 0.44, + "grad_norm": 3.6117281913757324, + "learning_rate": 0.0002721285040932771, + "loss": 0.8236, + "step": 1258 + }, + { + "epoch": 0.44, + "grad_norm": 4.166642665863037, + "learning_rate": 0.00027209129248325476, + "loss": 0.8192, + "step": 1259 + }, + { + "epoch": 0.44, + "grad_norm": 2.9680120944976807, + "learning_rate": 0.0002720540808732324, + "loss": 0.5717, + "step": 1260 + }, + { + "epoch": 0.44, + "grad_norm": 3.276177406311035, + "learning_rate": 0.0002720168692632101, + "loss": 0.9233, + "step": 1261 + }, + { + "epoch": 0.44, + "grad_norm": 3.2780745029449463, + "learning_rate": 0.00027197965765318776, + "loss": 0.7867, + "step": 1262 + }, + { + "epoch": 0.44, + "grad_norm": 5.421667575836182, + "learning_rate": 0.00027194244604316546, + "loss": 0.8406, + "step": 1263 + }, + { + "epoch": 0.44, + "grad_norm": 3.3682520389556885, + "learning_rate": 0.0002719052344331431, + "loss": 0.5538, + "step": 1264 + }, + { + "epoch": 0.44, + "grad_norm": 2.914537191390991, + "learning_rate": 0.0002718680228231208, + "loss": 0.7114, + "step": 1265 + }, + { + "epoch": 0.44, + "grad_norm": 4.220479965209961, + "learning_rate": 0.00027183081121309846, + "loss": 0.8671, + "step": 1266 + }, + { + "epoch": 0.44, + "grad_norm": 5.633809566497803, + "learning_rate": 0.0002717935996030761, + "loss": 1.037, + "step": 1267 + }, + { + "epoch": 0.44, + "grad_norm": 3.3338539600372314, + "learning_rate": 0.0002717563879930538, + "loss": 0.5996, + "step": 1268 + }, + { + "epoch": 0.44, + "grad_norm": 6.159158706665039, + "learning_rate": 0.00027171917638303146, + "loss": 0.692, + "step": 1269 + }, + { + "epoch": 0.44, + "grad_norm": 3.33358097076416, + "learning_rate": 0.00027168196477300917, + "loss": 0.6227, + "step": 1270 + }, + { + "epoch": 0.45, + "grad_norm": 3.7529525756835938, + "learning_rate": 0.0002716447531629868, + "loss": 0.5452, + "step": 1271 + }, + { + "epoch": 0.45, + "grad_norm": 3.7959513664245605, + "learning_rate": 0.0002716075415529645, + "loss": 0.6587, + "step": 1272 + }, + { + "epoch": 0.45, + "grad_norm": 3.419649600982666, + "learning_rate": 0.00027157032994294217, + "loss": 0.8046, + "step": 1273 + }, + { + "epoch": 0.45, + "grad_norm": 3.8088862895965576, + "learning_rate": 0.00027153311833291987, + "loss": 0.8147, + "step": 1274 + }, + { + "epoch": 0.45, + "grad_norm": 4.987424373626709, + "learning_rate": 0.0002714959067228975, + "loss": 0.8777, + "step": 1275 + }, + { + "epoch": 0.45, + "grad_norm": 3.6485095024108887, + "learning_rate": 0.0002714586951128752, + "loss": 1.4681, + "step": 1276 + }, + { + "epoch": 0.45, + "grad_norm": 4.164416313171387, + "learning_rate": 0.00027142148350285287, + "loss": 0.8455, + "step": 1277 + }, + { + "epoch": 0.45, + "grad_norm": 4.422079086303711, + "learning_rate": 0.0002713842718928305, + "loss": 1.0435, + "step": 1278 + }, + { + "epoch": 0.45, + "grad_norm": 2.755207061767578, + "learning_rate": 0.0002713470602828082, + "loss": 0.6229, + "step": 1279 + }, + { + "epoch": 0.45, + "grad_norm": 2.0500385761260986, + "learning_rate": 0.00027130984867278587, + "loss": 0.6549, + "step": 1280 + }, + { + "epoch": 0.45, + "grad_norm": 2.116751194000244, + "learning_rate": 0.00027127263706276357, + "loss": 0.6984, + "step": 1281 + }, + { + "epoch": 0.45, + "grad_norm": 2.164412498474121, + "learning_rate": 0.0002712354254527412, + "loss": 0.6526, + "step": 1282 + }, + { + "epoch": 0.45, + "grad_norm": 2.9012765884399414, + "learning_rate": 0.0002711982138427189, + "loss": 0.7316, + "step": 1283 + }, + { + "epoch": 0.45, + "grad_norm": 2.856905937194824, + "learning_rate": 0.00027116100223269657, + "loss": 0.6951, + "step": 1284 + }, + { + "epoch": 0.45, + "grad_norm": 2.1080093383789062, + "learning_rate": 0.0002711237906226742, + "loss": 0.5701, + "step": 1285 + }, + { + "epoch": 0.45, + "grad_norm": 1.3872121572494507, + "learning_rate": 0.0002710865790126519, + "loss": 0.3921, + "step": 1286 + }, + { + "epoch": 0.45, + "grad_norm": 5.119579792022705, + "learning_rate": 0.0002710493674026296, + "loss": 0.4822, + "step": 1287 + }, + { + "epoch": 0.45, + "grad_norm": 3.3475329875946045, + "learning_rate": 0.0002710121557926073, + "loss": 0.4675, + "step": 1288 + }, + { + "epoch": 0.45, + "grad_norm": 4.7075090408325195, + "learning_rate": 0.000270974944182585, + "loss": 0.8302, + "step": 1289 + }, + { + "epoch": 0.45, + "grad_norm": 5.20393180847168, + "learning_rate": 0.00027093773257256263, + "loss": 1.3705, + "step": 1290 + }, + { + "epoch": 0.45, + "grad_norm": 3.181398391723633, + "learning_rate": 0.0002709005209625403, + "loss": 0.5417, + "step": 1291 + }, + { + "epoch": 0.45, + "grad_norm": NaN, + "learning_rate": 0.0002709005209625403, + "loss": 0.097, + "step": 1292 + }, + { + "epoch": 0.45, + "grad_norm": 3.0050837993621826, + "learning_rate": 0.000270863309352518, + "loss": 0.5245, + "step": 1293 + }, + { + "epoch": 0.45, + "grad_norm": 4.091763973236084, + "learning_rate": 0.00027082609774249563, + "loss": 1.1929, + "step": 1294 + }, + { + "epoch": 0.45, + "grad_norm": 3.52555513381958, + "learning_rate": 0.00027078888613247333, + "loss": 1.1448, + "step": 1295 + }, + { + "epoch": 0.45, + "grad_norm": 5.984481334686279, + "learning_rate": 0.000270751674522451, + "loss": 0.7998, + "step": 1296 + }, + { + "epoch": 0.45, + "grad_norm": 6.075478553771973, + "learning_rate": 0.00027071446291242863, + "loss": 1.3081, + "step": 1297 + }, + { + "epoch": 0.45, + "grad_norm": 16.068614959716797, + "learning_rate": 0.00027067725130240633, + "loss": 0.6254, + "step": 1298 + }, + { + "epoch": 0.46, + "grad_norm": 3.1314620971679688, + "learning_rate": 0.000270640039692384, + "loss": 0.4411, + "step": 1299 + }, + { + "epoch": 0.46, + "grad_norm": 4.841050624847412, + "learning_rate": 0.0002706028280823617, + "loss": 0.5328, + "step": 1300 + }, + { + "epoch": 0.46, + "eval_loss": 0.718771755695343, + "eval_runtime": 50.1715, + "eval_samples_per_second": 43.212, + "eval_steps_per_second": 10.803, + "eval_wer": 0.6280055353745027, + "step": 1300 + }, + { + "epoch": 0.46, + "grad_norm": 2.5195000171661377, + "learning_rate": 0.00027056561647233933, + "loss": 1.2308, + "step": 1301 + }, + { + "epoch": 0.46, + "grad_norm": 3.1528189182281494, + "learning_rate": 0.00027052840486231703, + "loss": 1.2859, + "step": 1302 + }, + { + "epoch": 0.46, + "grad_norm": 2.554877519607544, + "learning_rate": 0.0002704911932522947, + "loss": 0.6787, + "step": 1303 + }, + { + "epoch": 0.46, + "grad_norm": 3.0567691326141357, + "learning_rate": 0.00027045398164227233, + "loss": 0.8472, + "step": 1304 + }, + { + "epoch": 0.46, + "grad_norm": 1.9456758499145508, + "learning_rate": 0.00027041677003225003, + "loss": 0.5319, + "step": 1305 + }, + { + "epoch": 0.46, + "grad_norm": 2.8807168006896973, + "learning_rate": 0.00027037955842222774, + "loss": 0.5714, + "step": 1306 + }, + { + "epoch": 0.46, + "grad_norm": 3.7060537338256836, + "learning_rate": 0.0002703423468122054, + "loss": 0.8815, + "step": 1307 + }, + { + "epoch": 0.46, + "grad_norm": 4.672762870788574, + "learning_rate": 0.0002703051352021831, + "loss": 0.9812, + "step": 1308 + }, + { + "epoch": 0.46, + "grad_norm": 2.651334285736084, + "learning_rate": 0.00027026792359216074, + "loss": 0.58, + "step": 1309 + }, + { + "epoch": 0.46, + "grad_norm": 5.646411418914795, + "learning_rate": 0.0002702307119821384, + "loss": 1.1246, + "step": 1310 + }, + { + "epoch": 0.46, + "grad_norm": 2.932492971420288, + "learning_rate": 0.0002701935003721161, + "loss": 0.6828, + "step": 1311 + }, + { + "epoch": 0.46, + "grad_norm": 3.369302749633789, + "learning_rate": 0.00027015628876209374, + "loss": 1.0535, + "step": 1312 + }, + { + "epoch": 0.46, + "grad_norm": 3.5227344036102295, + "learning_rate": 0.00027011907715207144, + "loss": 0.7979, + "step": 1313 + }, + { + "epoch": 0.46, + "grad_norm": 3.5197794437408447, + "learning_rate": 0.0002700818655420491, + "loss": 0.818, + "step": 1314 + }, + { + "epoch": 0.46, + "grad_norm": 2.7741096019744873, + "learning_rate": 0.0002700446539320268, + "loss": 0.2863, + "step": 1315 + }, + { + "epoch": 0.46, + "grad_norm": 5.058023929595947, + "learning_rate": 0.00027000744232200444, + "loss": 0.9948, + "step": 1316 + }, + { + "epoch": 0.46, + "grad_norm": 4.746575832366943, + "learning_rate": 0.0002699702307119821, + "loss": 1.0099, + "step": 1317 + }, + { + "epoch": 0.46, + "grad_norm": 3.513601064682007, + "learning_rate": 0.0002699330191019598, + "loss": 0.5189, + "step": 1318 + }, + { + "epoch": 0.46, + "grad_norm": 3.361132860183716, + "learning_rate": 0.0002698958074919375, + "loss": 0.7626, + "step": 1319 + }, + { + "epoch": 0.46, + "grad_norm": 3.748187780380249, + "learning_rate": 0.00026985859588191514, + "loss": 0.4882, + "step": 1320 + }, + { + "epoch": 0.46, + "grad_norm": 3.661416530609131, + "learning_rate": 0.0002698213842718928, + "loss": 0.5571, + "step": 1321 + }, + { + "epoch": 0.46, + "grad_norm": 6.057156562805176, + "learning_rate": 0.00026978417266187044, + "loss": 0.6306, + "step": 1322 + }, + { + "epoch": 0.46, + "grad_norm": 4.695087432861328, + "learning_rate": 0.00026974696105184814, + "loss": 0.617, + "step": 1323 + }, + { + "epoch": 0.46, + "grad_norm": 3.9815876483917236, + "learning_rate": 0.00026970974944182585, + "loss": 0.4741, + "step": 1324 + }, + { + "epoch": 0.46, + "grad_norm": 5.866700649261475, + "learning_rate": 0.0002696725378318035, + "loss": 0.7968, + "step": 1325 + }, + { + "epoch": 0.46, + "grad_norm": 3.9536242485046387, + "learning_rate": 0.0002696353262217812, + "loss": 1.1302, + "step": 1326 + }, + { + "epoch": 0.46, + "grad_norm": 3.9558680057525635, + "learning_rate": 0.00026959811461175885, + "loss": 0.7709, + "step": 1327 + }, + { + "epoch": 0.47, + "grad_norm": 2.1161468029022217, + "learning_rate": 0.0002695609030017365, + "loss": 0.8176, + "step": 1328 + }, + { + "epoch": 0.47, + "grad_norm": 3.3528337478637695, + "learning_rate": 0.0002695236913917142, + "loss": 1.0837, + "step": 1329 + }, + { + "epoch": 0.47, + "grad_norm": 2.7077548503875732, + "learning_rate": 0.00026948647978169185, + "loss": 1.0823, + "step": 1330 + }, + { + "epoch": 0.47, + "grad_norm": 2.875311851501465, + "learning_rate": 0.00026944926817166955, + "loss": 0.7376, + "step": 1331 + }, + { + "epoch": 0.47, + "grad_norm": 3.183072090148926, + "learning_rate": 0.0002694120565616472, + "loss": 0.9196, + "step": 1332 + }, + { + "epoch": 0.47, + "grad_norm": 2.7492129802703857, + "learning_rate": 0.0002693748449516249, + "loss": 0.4203, + "step": 1333 + }, + { + "epoch": 0.47, + "grad_norm": 3.388514518737793, + "learning_rate": 0.00026933763334160255, + "loss": 1.483, + "step": 1334 + }, + { + "epoch": 0.47, + "grad_norm": 1.9908661842346191, + "learning_rate": 0.0002693004217315802, + "loss": 0.3537, + "step": 1335 + }, + { + "epoch": 0.47, + "grad_norm": 1.7505980730056763, + "learning_rate": 0.0002692632101215579, + "loss": 0.3952, + "step": 1336 + }, + { + "epoch": 0.47, + "grad_norm": 8.66080379486084, + "learning_rate": 0.0002692259985115356, + "loss": 2.9207, + "step": 1337 + }, + { + "epoch": 0.47, + "grad_norm": 3.433154821395874, + "learning_rate": 0.00026918878690151325, + "loss": 0.8745, + "step": 1338 + }, + { + "epoch": 0.47, + "grad_norm": 2.7627408504486084, + "learning_rate": 0.0002691515752914909, + "loss": 0.3728, + "step": 1339 + }, + { + "epoch": 0.47, + "grad_norm": 2.7169649600982666, + "learning_rate": 0.0002691143636814686, + "loss": 0.4879, + "step": 1340 + }, + { + "epoch": 0.47, + "grad_norm": 3.4643096923828125, + "learning_rate": 0.00026907715207144625, + "loss": 0.4795, + "step": 1341 + }, + { + "epoch": 0.47, + "grad_norm": 4.340239524841309, + "learning_rate": 0.00026903994046142396, + "loss": 0.8171, + "step": 1342 + }, + { + "epoch": 0.47, + "grad_norm": 3.2308506965637207, + "learning_rate": 0.0002690027288514016, + "loss": 0.7844, + "step": 1343 + }, + { + "epoch": 0.47, + "grad_norm": 4.7775092124938965, + "learning_rate": 0.0002689655172413793, + "loss": 0.7076, + "step": 1344 + }, + { + "epoch": 0.47, + "grad_norm": 4.886669635772705, + "learning_rate": 0.00026892830563135696, + "loss": 0.5041, + "step": 1345 + }, + { + "epoch": 0.47, + "grad_norm": 4.971267223358154, + "learning_rate": 0.0002688910940213346, + "loss": 0.8198, + "step": 1346 + }, + { + "epoch": 0.47, + "grad_norm": 2.96894907951355, + "learning_rate": 0.0002688538824113123, + "loss": 0.4911, + "step": 1347 + }, + { + "epoch": 0.47, + "grad_norm": 4.252577781677246, + "learning_rate": 0.00026881667080128996, + "loss": 0.7421, + "step": 1348 + }, + { + "epoch": 0.47, + "grad_norm": 5.704499244689941, + "learning_rate": 0.00026877945919126766, + "loss": 0.8419, + "step": 1349 + }, + { + "epoch": 0.47, + "grad_norm": 6.062288761138916, + "learning_rate": 0.00026874224758124536, + "loss": 2.6257, + "step": 1350 + }, + { + "epoch": 0.47, + "grad_norm": 2.9042396545410156, + "learning_rate": 0.000268705035971223, + "loss": 1.0895, + "step": 1351 + }, + { + "epoch": 0.47, + "grad_norm": 3.361873149871826, + "learning_rate": 0.00026866782436120066, + "loss": 1.0075, + "step": 1352 + }, + { + "epoch": 0.47, + "grad_norm": 1.925755262374878, + "learning_rate": 0.0002686306127511783, + "loss": 0.5891, + "step": 1353 + }, + { + "epoch": 0.47, + "grad_norm": 1.8617517948150635, + "learning_rate": 0.000268593401141156, + "loss": 0.6051, + "step": 1354 + }, + { + "epoch": 0.47, + "grad_norm": 3.268332004547119, + "learning_rate": 0.0002685561895311337, + "loss": 1.3279, + "step": 1355 + }, + { + "epoch": 0.48, + "grad_norm": 2.6798906326293945, + "learning_rate": 0.00026851897792111136, + "loss": 0.7166, + "step": 1356 + }, + { + "epoch": 0.48, + "grad_norm": 3.4666085243225098, + "learning_rate": 0.00026848176631108907, + "loss": 0.8236, + "step": 1357 + }, + { + "epoch": 0.48, + "grad_norm": 4.445189952850342, + "learning_rate": 0.0002684445547010667, + "loss": 1.0953, + "step": 1358 + }, + { + "epoch": 0.48, + "grad_norm": 3.491154909133911, + "learning_rate": 0.00026840734309104436, + "loss": 0.86, + "step": 1359 + }, + { + "epoch": 0.48, + "grad_norm": 1.873551368713379, + "learning_rate": 0.00026837013148102207, + "loss": 0.3485, + "step": 1360 + }, + { + "epoch": 0.48, + "grad_norm": 4.520420551300049, + "learning_rate": 0.0002683329198709997, + "loss": 0.5189, + "step": 1361 + }, + { + "epoch": 0.48, + "grad_norm": 2.5249836444854736, + "learning_rate": 0.0002682957082609774, + "loss": 0.6036, + "step": 1362 + }, + { + "epoch": 0.48, + "grad_norm": 6.196778774261475, + "learning_rate": 0.00026825849665095507, + "loss": 1.0194, + "step": 1363 + }, + { + "epoch": 0.48, + "grad_norm": 2.7715699672698975, + "learning_rate": 0.0002682212850409327, + "loss": 0.6127, + "step": 1364 + }, + { + "epoch": 0.48, + "grad_norm": 4.2750773429870605, + "learning_rate": 0.0002681840734309104, + "loss": 0.5814, + "step": 1365 + }, + { + "epoch": 0.48, + "grad_norm": 6.068713188171387, + "learning_rate": 0.00026814686182088807, + "loss": 0.8995, + "step": 1366 + }, + { + "epoch": 0.48, + "grad_norm": 2.5809454917907715, + "learning_rate": 0.00026810965021086577, + "loss": 0.4658, + "step": 1367 + }, + { + "epoch": 0.48, + "grad_norm": 8.29049301147461, + "learning_rate": 0.0002680724386008435, + "loss": 0.942, + "step": 1368 + }, + { + "epoch": 0.48, + "grad_norm": 2.825986385345459, + "learning_rate": 0.0002680352269908211, + "loss": 0.2832, + "step": 1369 + }, + { + "epoch": 0.48, + "grad_norm": 4.205315113067627, + "learning_rate": 0.00026799801538079877, + "loss": 0.3227, + "step": 1370 + }, + { + "epoch": 0.48, + "grad_norm": 3.076242685317993, + "learning_rate": 0.0002679608037707764, + "loss": 0.6167, + "step": 1371 + }, + { + "epoch": 0.48, + "grad_norm": 3.471637487411499, + "learning_rate": 0.0002679235921607541, + "loss": 0.4435, + "step": 1372 + }, + { + "epoch": 0.48, + "grad_norm": 6.81670618057251, + "learning_rate": 0.0002678863805507318, + "loss": 0.6166, + "step": 1373 + }, + { + "epoch": 0.48, + "grad_norm": 10.90646743774414, + "learning_rate": 0.0002678491689407095, + "loss": 1.4561, + "step": 1374 + }, + { + "epoch": 0.48, + "grad_norm": 10.379186630249023, + "learning_rate": 0.0002678119573306872, + "loss": 1.1457, + "step": 1375 + }, + { + "epoch": 0.48, + "grad_norm": 10.573211669921875, + "learning_rate": 0.0002677747457206648, + "loss": 1.8288, + "step": 1376 + }, + { + "epoch": 0.48, + "grad_norm": 3.0730042457580566, + "learning_rate": 0.0002677375341106425, + "loss": 1.1138, + "step": 1377 + }, + { + "epoch": 0.48, + "grad_norm": 3.4380507469177246, + "learning_rate": 0.0002677003225006202, + "loss": 0.6603, + "step": 1378 + }, + { + "epoch": 0.48, + "grad_norm": 2.709672212600708, + "learning_rate": 0.0002676631108905978, + "loss": 0.7853, + "step": 1379 + }, + { + "epoch": 0.48, + "grad_norm": 3.5759897232055664, + "learning_rate": 0.00026762589928057553, + "loss": 0.7393, + "step": 1380 + }, + { + "epoch": 0.48, + "grad_norm": 2.016786813735962, + "learning_rate": 0.0002675886876705532, + "loss": 0.9876, + "step": 1381 + }, + { + "epoch": 0.48, + "grad_norm": 2.079416036605835, + "learning_rate": 0.0002675514760605309, + "loss": 0.4571, + "step": 1382 + }, + { + "epoch": 0.48, + "grad_norm": 3.269665241241455, + "learning_rate": 0.00026751426445050853, + "loss": 0.9098, + "step": 1383 + }, + { + "epoch": 0.48, + "grad_norm": 3.512655735015869, + "learning_rate": 0.0002674770528404862, + "loss": 1.1042, + "step": 1384 + }, + { + "epoch": 0.49, + "grad_norm": 2.725264072418213, + "learning_rate": 0.0002674398412304639, + "loss": 0.8414, + "step": 1385 + }, + { + "epoch": 0.49, + "grad_norm": 4.0862860679626465, + "learning_rate": 0.0002674026296204416, + "loss": 1.6329, + "step": 1386 + }, + { + "epoch": 0.49, + "grad_norm": 4.442559242248535, + "learning_rate": 0.00026736541801041923, + "loss": 1.1877, + "step": 1387 + }, + { + "epoch": 0.49, + "grad_norm": 3.18272066116333, + "learning_rate": 0.0002673282064003969, + "loss": 0.8395, + "step": 1388 + }, + { + "epoch": 0.49, + "grad_norm": 3.8753631114959717, + "learning_rate": 0.0002672909947903746, + "loss": 0.5428, + "step": 1389 + }, + { + "epoch": 0.49, + "grad_norm": 3.923710823059082, + "learning_rate": 0.00026725378318035223, + "loss": 1.0107, + "step": 1390 + }, + { + "epoch": 0.49, + "grad_norm": 2.947282552719116, + "learning_rate": 0.00026721657157032994, + "loss": 0.4316, + "step": 1391 + }, + { + "epoch": 0.49, + "grad_norm": 3.441121816635132, + "learning_rate": 0.0002671793599603076, + "loss": 1.4819, + "step": 1392 + }, + { + "epoch": 0.49, + "grad_norm": 2.639099597930908, + "learning_rate": 0.0002671421483502853, + "loss": 0.4009, + "step": 1393 + }, + { + "epoch": 0.49, + "grad_norm": 4.281423091888428, + "learning_rate": 0.00026710493674026294, + "loss": 0.8285, + "step": 1394 + }, + { + "epoch": 0.49, + "grad_norm": 5.369678020477295, + "learning_rate": 0.0002670677251302406, + "loss": 1.3822, + "step": 1395 + }, + { + "epoch": 0.49, + "grad_norm": 3.5579185485839844, + "learning_rate": 0.0002670305135202183, + "loss": 0.4678, + "step": 1396 + }, + { + "epoch": 0.49, + "grad_norm": 4.262704849243164, + "learning_rate": 0.00026699330191019594, + "loss": 0.5735, + "step": 1397 + }, + { + "epoch": 0.49, + "grad_norm": 3.989433526992798, + "learning_rate": 0.00026695609030017364, + "loss": 0.6595, + "step": 1398 + }, + { + "epoch": 0.49, + "grad_norm": 7.13302755355835, + "learning_rate": 0.00026691887869015134, + "loss": 1.1522, + "step": 1399 + }, + { + "epoch": 0.49, + "grad_norm": 10.057518005371094, + "learning_rate": 0.000266881667080129, + "loss": 1.3733, + "step": 1400 + }, + { + "epoch": 0.49, + "eval_loss": 0.6848714351654053, + "eval_runtime": 50.8029, + "eval_samples_per_second": 42.675, + "eval_steps_per_second": 10.669, + "eval_wer": 0.6019719771665801, + "step": 1400 + }, + { + "epoch": 0.49, + "grad_norm": 3.617882251739502, + "learning_rate": 0.00026684445547010664, + "loss": 1.3938, + "step": 1401 + }, + { + "epoch": 0.49, + "grad_norm": 2.1804263591766357, + "learning_rate": 0.0002668072438600843, + "loss": 0.9542, + "step": 1402 + }, + { + "epoch": 0.49, + "grad_norm": 2.778125286102295, + "learning_rate": 0.000266770032250062, + "loss": 0.6878, + "step": 1403 + }, + { + "epoch": 0.49, + "grad_norm": 2.387033700942993, + "learning_rate": 0.0002667328206400397, + "loss": 0.6723, + "step": 1404 + }, + { + "epoch": 0.49, + "grad_norm": 3.006556272506714, + "learning_rate": 0.00026669560903001734, + "loss": 0.8387, + "step": 1405 + }, + { + "epoch": 0.49, + "grad_norm": 1.9671204090118408, + "learning_rate": 0.000266658397419995, + "loss": 0.6155, + "step": 1406 + }, + { + "epoch": 0.49, + "grad_norm": 2.056529998779297, + "learning_rate": 0.0002666211858099727, + "loss": 0.5556, + "step": 1407 + }, + { + "epoch": 0.49, + "grad_norm": 1.5942492485046387, + "learning_rate": 0.00026658397419995034, + "loss": 0.3429, + "step": 1408 + }, + { + "epoch": 0.49, + "grad_norm": 6.031838417053223, + "learning_rate": 0.00026654676258992805, + "loss": 1.1818, + "step": 1409 + }, + { + "epoch": 0.49, + "grad_norm": 2.842709541320801, + "learning_rate": 0.0002665095509799057, + "loss": 0.9802, + "step": 1410 + }, + { + "epoch": 0.49, + "grad_norm": 2.389888286590576, + "learning_rate": 0.0002664723393698834, + "loss": 0.4917, + "step": 1411 + }, + { + "epoch": 0.49, + "grad_norm": 3.805605173110962, + "learning_rate": 0.00026643512775986105, + "loss": 0.6546, + "step": 1412 + }, + { + "epoch": 0.5, + "grad_norm": 4.952422142028809, + "learning_rate": 0.0002663979161498387, + "loss": 0.6714, + "step": 1413 + }, + { + "epoch": 0.5, + "grad_norm": 3.914050340652466, + "learning_rate": 0.0002663607045398164, + "loss": 0.6369, + "step": 1414 + }, + { + "epoch": 0.5, + "grad_norm": 2.893771171569824, + "learning_rate": 0.00026632349292979405, + "loss": 0.721, + "step": 1415 + }, + { + "epoch": 0.5, + "grad_norm": 4.831104755401611, + "learning_rate": 0.00026628628131977175, + "loss": 1.0663, + "step": 1416 + }, + { + "epoch": 0.5, + "grad_norm": 2.74212384223938, + "learning_rate": 0.00026624906970974945, + "loss": 0.6219, + "step": 1417 + }, + { + "epoch": 0.5, + "grad_norm": 5.251964569091797, + "learning_rate": 0.0002662118580997271, + "loss": 1.5889, + "step": 1418 + }, + { + "epoch": 0.5, + "grad_norm": 4.620336532592773, + "learning_rate": 0.00026617464648970475, + "loss": 1.05, + "step": 1419 + }, + { + "epoch": 0.5, + "grad_norm": 5.674115180969238, + "learning_rate": 0.00026613743487968245, + "loss": 0.7279, + "step": 1420 + }, + { + "epoch": 0.5, + "grad_norm": 3.6265783309936523, + "learning_rate": 0.0002661002232696601, + "loss": 0.4997, + "step": 1421 + }, + { + "epoch": 0.5, + "grad_norm": 4.726874828338623, + "learning_rate": 0.0002660630116596378, + "loss": 0.9038, + "step": 1422 + }, + { + "epoch": 0.5, + "grad_norm": 5.306706428527832, + "learning_rate": 0.00026602580004961545, + "loss": 1.3036, + "step": 1423 + }, + { + "epoch": 0.5, + "grad_norm": 4.389394283294678, + "learning_rate": 0.00026598858843959316, + "loss": 1.1654, + "step": 1424 + }, + { + "epoch": 0.5, + "grad_norm": 3.7293262481689453, + "learning_rate": 0.0002659513768295708, + "loss": 0.4846, + "step": 1425 + }, + { + "epoch": 0.5, + "grad_norm": 3.0907764434814453, + "learning_rate": 0.00026591416521954845, + "loss": 1.5577, + "step": 1426 + }, + { + "epoch": 0.5, + "grad_norm": 4.340789318084717, + "learning_rate": 0.00026587695360952616, + "loss": 0.8854, + "step": 1427 + }, + { + "epoch": 0.5, + "grad_norm": 5.936422348022461, + "learning_rate": 0.0002658397419995038, + "loss": 1.3372, + "step": 1428 + }, + { + "epoch": 0.5, + "grad_norm": 2.7207283973693848, + "learning_rate": 0.0002658025303894815, + "loss": 0.9362, + "step": 1429 + }, + { + "epoch": 0.5, + "grad_norm": 2.6525309085845947, + "learning_rate": 0.00026576531877945916, + "loss": 0.7226, + "step": 1430 + }, + { + "epoch": 0.5, + "grad_norm": 2.381458282470703, + "learning_rate": 0.00026572810716943686, + "loss": 0.6479, + "step": 1431 + }, + { + "epoch": 0.5, + "grad_norm": 2.5098137855529785, + "learning_rate": 0.0002656908955594145, + "loss": 0.6157, + "step": 1432 + }, + { + "epoch": 0.5, + "grad_norm": 2.9847960472106934, + "learning_rate": 0.00026565368394939216, + "loss": 0.5089, + "step": 1433 + }, + { + "epoch": 0.5, + "grad_norm": 4.012515544891357, + "learning_rate": 0.00026561647233936986, + "loss": 0.4906, + "step": 1434 + }, + { + "epoch": 0.5, + "grad_norm": 2.7779381275177, + "learning_rate": 0.00026557926072934756, + "loss": 0.5565, + "step": 1435 + }, + { + "epoch": 0.5, + "grad_norm": 3.0392794609069824, + "learning_rate": 0.0002655420491193252, + "loss": 0.631, + "step": 1436 + }, + { + "epoch": 0.5, + "grad_norm": 5.07166051864624, + "learning_rate": 0.00026550483750930286, + "loss": 0.4397, + "step": 1437 + }, + { + "epoch": 0.5, + "grad_norm": 2.711935520172119, + "learning_rate": 0.00026546762589928056, + "loss": 0.5008, + "step": 1438 + }, + { + "epoch": 0.5, + "grad_norm": 2.4669036865234375, + "learning_rate": 0.0002654304142892582, + "loss": 0.5278, + "step": 1439 + }, + { + "epoch": 0.5, + "grad_norm": 4.123307228088379, + "learning_rate": 0.0002653932026792359, + "loss": 0.7766, + "step": 1440 + }, + { + "epoch": 0.5, + "grad_norm": 3.164463758468628, + "learning_rate": 0.00026535599106921356, + "loss": 0.6879, + "step": 1441 + }, + { + "epoch": 0.51, + "grad_norm": 4.237036228179932, + "learning_rate": 0.00026531877945919127, + "loss": 0.6142, + "step": 1442 + }, + { + "epoch": 0.51, + "grad_norm": 3.148812770843506, + "learning_rate": 0.0002652815678491689, + "loss": 0.469, + "step": 1443 + }, + { + "epoch": 0.51, + "grad_norm": 5.390077590942383, + "learning_rate": 0.00026524435623914656, + "loss": 0.7129, + "step": 1444 + }, + { + "epoch": 0.51, + "grad_norm": 3.447877883911133, + "learning_rate": 0.00026520714462912427, + "loss": 1.0652, + "step": 1445 + }, + { + "epoch": 0.51, + "grad_norm": 4.019307613372803, + "learning_rate": 0.0002651699330191019, + "loss": 0.6862, + "step": 1446 + }, + { + "epoch": 0.51, + "grad_norm": 5.470080375671387, + "learning_rate": 0.0002651327214090796, + "loss": 0.9423, + "step": 1447 + }, + { + "epoch": 0.51, + "grad_norm": 4.0855607986450195, + "learning_rate": 0.00026509550979905727, + "loss": 0.7612, + "step": 1448 + }, + { + "epoch": 0.51, + "grad_norm": 4.5919671058654785, + "learning_rate": 0.00026505829818903497, + "loss": 0.7675, + "step": 1449 + }, + { + "epoch": 0.51, + "grad_norm": 3.587916612625122, + "learning_rate": 0.0002650210865790126, + "loss": 0.2524, + "step": 1450 + }, + { + "epoch": 0.51, + "grad_norm": 6.112603664398193, + "learning_rate": 0.0002649838749689903, + "loss": 1.4048, + "step": 1451 + }, + { + "epoch": 0.51, + "grad_norm": 2.97434139251709, + "learning_rate": 0.00026494666335896797, + "loss": 1.2854, + "step": 1452 + }, + { + "epoch": 0.51, + "grad_norm": 2.173609972000122, + "learning_rate": 0.00026490945174894567, + "loss": 0.5143, + "step": 1453 + }, + { + "epoch": 0.51, + "grad_norm": 2.1337573528289795, + "learning_rate": 0.0002648722401389233, + "loss": 0.7012, + "step": 1454 + }, + { + "epoch": 0.51, + "grad_norm": 10.74719524383545, + "learning_rate": 0.00026483502852890097, + "loss": 2.8888, + "step": 1455 + }, + { + "epoch": 0.51, + "grad_norm": 2.577202558517456, + "learning_rate": 0.0002647978169188787, + "loss": 0.5115, + "step": 1456 + }, + { + "epoch": 0.51, + "grad_norm": 2.5629165172576904, + "learning_rate": 0.0002647606053088563, + "loss": 0.9478, + "step": 1457 + }, + { + "epoch": 0.51, + "grad_norm": 2.9556891918182373, + "learning_rate": 0.000264723393698834, + "loss": 0.5669, + "step": 1458 + }, + { + "epoch": 0.51, + "grad_norm": 2.905815601348877, + "learning_rate": 0.0002646861820888117, + "loss": 0.732, + "step": 1459 + }, + { + "epoch": 0.51, + "grad_norm": 3.5102758407592773, + "learning_rate": 0.0002646489704787894, + "loss": 0.875, + "step": 1460 + }, + { + "epoch": 0.51, + "grad_norm": 2.5579817295074463, + "learning_rate": 0.000264611758868767, + "loss": 0.5173, + "step": 1461 + }, + { + "epoch": 0.51, + "grad_norm": 3.0412824153900146, + "learning_rate": 0.0002645745472587447, + "loss": 0.95, + "step": 1462 + }, + { + "epoch": 0.51, + "grad_norm": 2.4420363903045654, + "learning_rate": 0.0002645373356487224, + "loss": 0.5622, + "step": 1463 + }, + { + "epoch": 0.51, + "grad_norm": 3.9669206142425537, + "learning_rate": 0.0002645001240387001, + "loss": 1.3098, + "step": 1464 + }, + { + "epoch": 0.51, + "grad_norm": 3.515052080154419, + "learning_rate": 0.00026446291242867773, + "loss": 0.6694, + "step": 1465 + }, + { + "epoch": 0.51, + "grad_norm": 4.764845848083496, + "learning_rate": 0.00026442570081865543, + "loss": 0.4299, + "step": 1466 + }, + { + "epoch": 0.51, + "grad_norm": 3.278150796890259, + "learning_rate": 0.0002643884892086331, + "loss": 0.5183, + "step": 1467 + }, + { + "epoch": 0.51, + "grad_norm": 5.361281394958496, + "learning_rate": 0.00026435127759861073, + "loss": 0.8904, + "step": 1468 + }, + { + "epoch": 0.51, + "grad_norm": 4.450416088104248, + "learning_rate": 0.00026431406598858843, + "loss": 0.5072, + "step": 1469 + }, + { + "epoch": 0.52, + "grad_norm": 3.9510233402252197, + "learning_rate": 0.0002642768543785661, + "loss": 0.5038, + "step": 1470 + }, + { + "epoch": 0.52, + "grad_norm": 4.09872579574585, + "learning_rate": 0.0002642396427685438, + "loss": 0.8801, + "step": 1471 + }, + { + "epoch": 0.52, + "grad_norm": 3.2074294090270996, + "learning_rate": 0.00026420243115852143, + "loss": 0.5852, + "step": 1472 + }, + { + "epoch": 0.52, + "grad_norm": 6.999640941619873, + "learning_rate": 0.00026416521954849913, + "loss": 0.4915, + "step": 1473 + }, + { + "epoch": 0.52, + "grad_norm": 3.909586191177368, + "learning_rate": 0.0002641280079384768, + "loss": 0.811, + "step": 1474 + }, + { + "epoch": 0.52, + "grad_norm": 8.129749298095703, + "learning_rate": 0.00026409079632845443, + "loss": 2.2248, + "step": 1475 + }, + { + "epoch": 0.52, + "grad_norm": 4.023890018463135, + "learning_rate": 0.00026405358471843213, + "loss": 1.2998, + "step": 1476 + }, + { + "epoch": 0.52, + "grad_norm": 3.378636598587036, + "learning_rate": 0.0002640163731084098, + "loss": 0.7717, + "step": 1477 + }, + { + "epoch": 0.52, + "grad_norm": 2.6731226444244385, + "learning_rate": 0.0002639791614983875, + "loss": 0.7744, + "step": 1478 + }, + { + "epoch": 0.52, + "grad_norm": 4.031921863555908, + "learning_rate": 0.00026394194988836513, + "loss": 0.9579, + "step": 1479 + }, + { + "epoch": 0.52, + "grad_norm": 3.2059433460235596, + "learning_rate": 0.0002639047382783428, + "loss": 0.7075, + "step": 1480 + }, + { + "epoch": 0.52, + "grad_norm": 2.67105770111084, + "learning_rate": 0.0002638675266683205, + "loss": 0.7341, + "step": 1481 + }, + { + "epoch": 0.52, + "grad_norm": 2.016000986099243, + "learning_rate": 0.0002638303150582982, + "loss": 0.4283, + "step": 1482 + }, + { + "epoch": 0.52, + "grad_norm": 2.9530158042907715, + "learning_rate": 0.00026379310344827584, + "loss": 0.6266, + "step": 1483 + }, + { + "epoch": 0.52, + "grad_norm": 2.5564568042755127, + "learning_rate": 0.00026375589183825354, + "loss": 0.7194, + "step": 1484 + }, + { + "epoch": 0.52, + "grad_norm": 2.650444269180298, + "learning_rate": 0.0002637186802282312, + "loss": 0.5366, + "step": 1485 + }, + { + "epoch": 0.52, + "grad_norm": 2.470327854156494, + "learning_rate": 0.00026368146861820884, + "loss": 0.6349, + "step": 1486 + }, + { + "epoch": 0.52, + "grad_norm": 3.796874761581421, + "learning_rate": 0.00026364425700818654, + "loss": 0.6011, + "step": 1487 + }, + { + "epoch": 0.52, + "grad_norm": 3.449688673019409, + "learning_rate": 0.0002636070453981642, + "loss": 0.6161, + "step": 1488 + }, + { + "epoch": 0.52, + "grad_norm": 2.23581600189209, + "learning_rate": 0.0002635698337881419, + "loss": 0.4338, + "step": 1489 + }, + { + "epoch": 0.52, + "grad_norm": 5.239893436431885, + "learning_rate": 0.00026353262217811954, + "loss": 0.9053, + "step": 1490 + }, + { + "epoch": 0.52, + "grad_norm": 3.8986151218414307, + "learning_rate": 0.00026349541056809724, + "loss": 0.3529, + "step": 1491 + }, + { + "epoch": 0.52, + "grad_norm": 3.6103169918060303, + "learning_rate": 0.0002634581989580749, + "loss": 0.8344, + "step": 1492 + }, + { + "epoch": 0.52, + "grad_norm": 4.4422993659973145, + "learning_rate": 0.00026342098734805254, + "loss": 0.6958, + "step": 1493 + }, + { + "epoch": 0.52, + "grad_norm": 3.637202501296997, + "learning_rate": 0.00026338377573803024, + "loss": 0.9728, + "step": 1494 + }, + { + "epoch": 0.52, + "grad_norm": 4.514386177062988, + "learning_rate": 0.00026334656412800795, + "loss": 0.625, + "step": 1495 + }, + { + "epoch": 0.52, + "grad_norm": 3.6551239490509033, + "learning_rate": 0.0002633093525179856, + "loss": 0.7805, + "step": 1496 + }, + { + "epoch": 0.52, + "grad_norm": 4.161332130432129, + "learning_rate": 0.00026327214090796324, + "loss": 0.7602, + "step": 1497 + }, + { + "epoch": 0.52, + "grad_norm": 3.9714465141296387, + "learning_rate": 0.00026323492929794095, + "loss": 0.5906, + "step": 1498 + }, + { + "epoch": 0.53, + "grad_norm": 3.682180404663086, + "learning_rate": 0.0002631977176879186, + "loss": 0.4266, + "step": 1499 + }, + { + "epoch": 0.53, + "grad_norm": 3.590712785720825, + "learning_rate": 0.0002631605060778963, + "loss": 0.4957, + "step": 1500 + }, + { + "epoch": 0.53, + "eval_loss": 0.735007107257843, + "eval_runtime": 50.1513, + "eval_samples_per_second": 43.229, + "eval_steps_per_second": 10.807, + "eval_wer": 0.630254281266217, + "step": 1500 + }, + { + "epoch": 0.53, + "grad_norm": 3.7293701171875, + "learning_rate": 0.00026312329446787395, + "loss": 0.8107, + "step": 1501 + }, + { + "epoch": 0.53, + "grad_norm": 3.134906053543091, + "learning_rate": 0.00026308608285785165, + "loss": 0.6081, + "step": 1502 + }, + { + "epoch": 0.53, + "grad_norm": 3.5551223754882812, + "learning_rate": 0.0002630488712478293, + "loss": 0.7375, + "step": 1503 + }, + { + "epoch": 0.53, + "grad_norm": 5.314273357391357, + "learning_rate": 0.00026301165963780695, + "loss": 0.8512, + "step": 1504 + }, + { + "epoch": 0.53, + "grad_norm": 2.985286235809326, + "learning_rate": 0.00026297444802778465, + "loss": 0.5388, + "step": 1505 + }, + { + "epoch": 0.53, + "grad_norm": 2.9923386573791504, + "learning_rate": 0.0002629372364177623, + "loss": 0.9692, + "step": 1506 + }, + { + "epoch": 0.53, + "grad_norm": 2.3773350715637207, + "learning_rate": 0.00026290002480774, + "loss": 0.9575, + "step": 1507 + }, + { + "epoch": 0.53, + "grad_norm": 3.891386032104492, + "learning_rate": 0.0002628628131977177, + "loss": 0.8126, + "step": 1508 + }, + { + "epoch": 0.53, + "grad_norm": 2.9968199729919434, + "learning_rate": 0.00026282560158769535, + "loss": 0.5441, + "step": 1509 + }, + { + "epoch": 0.53, + "grad_norm": 2.9973716735839844, + "learning_rate": 0.000262788389977673, + "loss": 0.618, + "step": 1510 + }, + { + "epoch": 0.53, + "grad_norm": 3.8595619201660156, + "learning_rate": 0.00026275117836765065, + "loss": 0.904, + "step": 1511 + }, + { + "epoch": 0.53, + "grad_norm": 3.169881820678711, + "learning_rate": 0.00026271396675762835, + "loss": 0.5045, + "step": 1512 + }, + { + "epoch": 0.53, + "grad_norm": 2.6832542419433594, + "learning_rate": 0.00026267675514760606, + "loss": 0.3136, + "step": 1513 + }, + { + "epoch": 0.53, + "grad_norm": 2.8257126808166504, + "learning_rate": 0.0002626395435375837, + "loss": 0.4304, + "step": 1514 + }, + { + "epoch": 0.53, + "grad_norm": 2.0919361114501953, + "learning_rate": 0.0002626023319275614, + "loss": 0.3809, + "step": 1515 + }, + { + "epoch": 0.53, + "grad_norm": 3.3260436058044434, + "learning_rate": 0.00026256512031753906, + "loss": 0.5717, + "step": 1516 + }, + { + "epoch": 0.53, + "grad_norm": 3.5611228942871094, + "learning_rate": 0.0002625279087075167, + "loss": 0.5815, + "step": 1517 + }, + { + "epoch": 0.53, + "grad_norm": 5.097376346588135, + "learning_rate": 0.0002624906970974944, + "loss": 0.7399, + "step": 1518 + }, + { + "epoch": 0.53, + "grad_norm": 3.300809621810913, + "learning_rate": 0.00026245348548747206, + "loss": 0.7625, + "step": 1519 + }, + { + "epoch": 0.53, + "grad_norm": 6.034483432769775, + "learning_rate": 0.00026241627387744976, + "loss": 1.3396, + "step": 1520 + }, + { + "epoch": 0.53, + "grad_norm": 3.987584114074707, + "learning_rate": 0.0002623790622674274, + "loss": 0.8065, + "step": 1521 + }, + { + "epoch": 0.53, + "grad_norm": 3.958810567855835, + "learning_rate": 0.00026234185065740506, + "loss": 0.5565, + "step": 1522 + }, + { + "epoch": 0.53, + "grad_norm": 3.113748550415039, + "learning_rate": 0.00026230463904738276, + "loss": 0.4262, + "step": 1523 + }, + { + "epoch": 0.53, + "grad_norm": 3.8435819149017334, + "learning_rate": 0.0002622674274373604, + "loss": 0.6072, + "step": 1524 + }, + { + "epoch": 0.53, + "grad_norm": 3.7314817905426025, + "learning_rate": 0.0002622302158273381, + "loss": 0.6308, + "step": 1525 + }, + { + "epoch": 0.53, + "grad_norm": 2.7327356338500977, + "learning_rate": 0.0002621930042173158, + "loss": 0.904, + "step": 1526 + }, + { + "epoch": 0.54, + "grad_norm": 3.118708848953247, + "learning_rate": 0.00026215579260729346, + "loss": 0.784, + "step": 1527 + }, + { + "epoch": 0.54, + "grad_norm": 2.2013180255889893, + "learning_rate": 0.0002621185809972711, + "loss": 0.8928, + "step": 1528 + }, + { + "epoch": 0.54, + "grad_norm": 2.631147861480713, + "learning_rate": 0.00026208136938724876, + "loss": 0.6634, + "step": 1529 + }, + { + "epoch": 0.54, + "grad_norm": 1.8624056577682495, + "learning_rate": 0.00026204415777722646, + "loss": 0.3717, + "step": 1530 + }, + { + "epoch": 0.54, + "grad_norm": 2.32458233833313, + "learning_rate": 0.00026200694616720417, + "loss": 0.4845, + "step": 1531 + }, + { + "epoch": 0.54, + "grad_norm": 2.4256484508514404, + "learning_rate": 0.0002619697345571818, + "loss": 0.6534, + "step": 1532 + }, + { + "epoch": 0.54, + "grad_norm": 3.6999833583831787, + "learning_rate": 0.0002619325229471595, + "loss": 0.6113, + "step": 1533 + }, + { + "epoch": 0.54, + "grad_norm": 2.6705386638641357, + "learning_rate": 0.00026189531133713717, + "loss": 0.6816, + "step": 1534 + }, + { + "epoch": 0.54, + "grad_norm": 2.588336706161499, + "learning_rate": 0.0002618580997271148, + "loss": 0.5221, + "step": 1535 + }, + { + "epoch": 0.54, + "grad_norm": 3.662912368774414, + "learning_rate": 0.0002618208881170925, + "loss": 1.0771, + "step": 1536 + }, + { + "epoch": 0.54, + "grad_norm": 3.598870277404785, + "learning_rate": 0.00026178367650707017, + "loss": 0.3864, + "step": 1537 + }, + { + "epoch": 0.54, + "grad_norm": 4.303178310394287, + "learning_rate": 0.00026174646489704787, + "loss": 0.8527, + "step": 1538 + }, + { + "epoch": 0.54, + "grad_norm": 1.631732702255249, + "learning_rate": 0.0002617092532870255, + "loss": 0.2559, + "step": 1539 + }, + { + "epoch": 0.54, + "grad_norm": 3.9511115550994873, + "learning_rate": 0.0002616720416770032, + "loss": 1.2759, + "step": 1540 + }, + { + "epoch": 0.54, + "grad_norm": 4.213435173034668, + "learning_rate": 0.00026163483006698087, + "loss": 0.9761, + "step": 1541 + }, + { + "epoch": 0.54, + "grad_norm": 4.040883541107178, + "learning_rate": 0.0002615976184569585, + "loss": 1.2503, + "step": 1542 + }, + { + "epoch": 0.54, + "grad_norm": 3.2061383724212646, + "learning_rate": 0.0002615604068469362, + "loss": 0.4728, + "step": 1543 + }, + { + "epoch": 0.54, + "grad_norm": 3.8172144889831543, + "learning_rate": 0.0002615231952369139, + "loss": 0.8298, + "step": 1544 + }, + { + "epoch": 0.54, + "grad_norm": 4.939277648925781, + "learning_rate": 0.0002614859836268916, + "loss": 1.1306, + "step": 1545 + }, + { + "epoch": 0.54, + "grad_norm": 2.502394676208496, + "learning_rate": 0.0002614487720168692, + "loss": 0.4795, + "step": 1546 + }, + { + "epoch": 0.54, + "grad_norm": 2.375518321990967, + "learning_rate": 0.00026141156040684687, + "loss": 0.3351, + "step": 1547 + }, + { + "epoch": 0.54, + "grad_norm": 6.979621410369873, + "learning_rate": 0.0002613743487968246, + "loss": 1.9284, + "step": 1548 + }, + { + "epoch": 0.54, + "grad_norm": 2.901841878890991, + "learning_rate": 0.0002613371371868023, + "loss": 0.4371, + "step": 1549 + }, + { + "epoch": 0.54, + "grad_norm": 4.348433971405029, + "learning_rate": 0.0002612999255767799, + "loss": 0.6329, + "step": 1550 + }, + { + "epoch": 0.54, + "grad_norm": 3.033573865890503, + "learning_rate": 0.00026126271396675763, + "loss": 1.1795, + "step": 1551 + }, + { + "epoch": 0.54, + "grad_norm": 2.457150459289551, + "learning_rate": 0.0002612255023567353, + "loss": 0.5499, + "step": 1552 + }, + { + "epoch": 0.54, + "grad_norm": 1.8844290971755981, + "learning_rate": 0.0002611882907467129, + "loss": 0.7523, + "step": 1553 + }, + { + "epoch": 0.54, + "grad_norm": 2.3162200450897217, + "learning_rate": 0.00026115107913669063, + "loss": 0.884, + "step": 1554 + }, + { + "epoch": 0.54, + "grad_norm": 2.550788164138794, + "learning_rate": 0.0002611138675266683, + "loss": 0.7523, + "step": 1555 + }, + { + "epoch": 0.55, + "grad_norm": 3.001537561416626, + "learning_rate": 0.000261076655916646, + "loss": 0.5522, + "step": 1556 + }, + { + "epoch": 0.55, + "grad_norm": 2.1897246837615967, + "learning_rate": 0.0002610394443066237, + "loss": 0.4203, + "step": 1557 + }, + { + "epoch": 0.55, + "grad_norm": 2.3803975582122803, + "learning_rate": 0.00026100223269660133, + "loss": 0.9045, + "step": 1558 + }, + { + "epoch": 0.55, + "grad_norm": 3.0036349296569824, + "learning_rate": 0.000260965021086579, + "loss": 0.6177, + "step": 1559 + }, + { + "epoch": 0.55, + "grad_norm": 4.269506454467773, + "learning_rate": 0.00026092780947655663, + "loss": 1.6503, + "step": 1560 + }, + { + "epoch": 0.55, + "grad_norm": 2.8251953125, + "learning_rate": 0.00026089059786653433, + "loss": 0.6453, + "step": 1561 + }, + { + "epoch": 0.55, + "grad_norm": 3.577118396759033, + "learning_rate": 0.00026085338625651204, + "loss": 1.0677, + "step": 1562 + }, + { + "epoch": 0.55, + "grad_norm": 2.7220301628112793, + "learning_rate": 0.0002608161746464897, + "loss": 1.0965, + "step": 1563 + }, + { + "epoch": 0.55, + "grad_norm": 2.421391010284424, + "learning_rate": 0.00026077896303646733, + "loss": 0.4751, + "step": 1564 + }, + { + "epoch": 0.55, + "grad_norm": 3.9482858180999756, + "learning_rate": 0.00026074175142644504, + "loss": 0.5636, + "step": 1565 + }, + { + "epoch": 0.55, + "grad_norm": 4.108609199523926, + "learning_rate": 0.0002607045398164227, + "loss": 0.6971, + "step": 1566 + }, + { + "epoch": 0.55, + "grad_norm": 2.993597984313965, + "learning_rate": 0.0002606673282064004, + "loss": 0.6064, + "step": 1567 + }, + { + "epoch": 0.55, + "grad_norm": 3.803035259246826, + "learning_rate": 0.00026063011659637804, + "loss": 0.5257, + "step": 1568 + }, + { + "epoch": 0.55, + "grad_norm": 3.5908119678497314, + "learning_rate": 0.00026059290498635574, + "loss": 0.8194, + "step": 1569 + }, + { + "epoch": 0.55, + "grad_norm": 2.1012234687805176, + "learning_rate": 0.0002605556933763334, + "loss": 0.5269, + "step": 1570 + }, + { + "epoch": 0.55, + "grad_norm": 2.691065549850464, + "learning_rate": 0.00026051848176631104, + "loss": 0.638, + "step": 1571 + }, + { + "epoch": 0.55, + "grad_norm": 4.872591018676758, + "learning_rate": 0.00026048127015628874, + "loss": 0.8042, + "step": 1572 + }, + { + "epoch": 0.55, + "grad_norm": 4.684198379516602, + "learning_rate": 0.0002604440585462664, + "loss": 0.8874, + "step": 1573 + }, + { + "epoch": 0.55, + "grad_norm": 4.133639812469482, + "learning_rate": 0.0002604068469362441, + "loss": 0.933, + "step": 1574 + }, + { + "epoch": 0.55, + "grad_norm": 5.2042012214660645, + "learning_rate": 0.0002603696353262218, + "loss": 1.4783, + "step": 1575 + }, + { + "epoch": 0.55, + "grad_norm": 4.421505928039551, + "learning_rate": 0.00026033242371619944, + "loss": 1.4112, + "step": 1576 + }, + { + "epoch": 0.55, + "grad_norm": 4.258162975311279, + "learning_rate": 0.0002602952121061771, + "loss": 1.1223, + "step": 1577 + }, + { + "epoch": 0.55, + "grad_norm": 3.783501386642456, + "learning_rate": 0.00026025800049615474, + "loss": 0.6567, + "step": 1578 + }, + { + "epoch": 0.55, + "grad_norm": 3.9202888011932373, + "learning_rate": 0.00026022078888613244, + "loss": 1.1697, + "step": 1579 + }, + { + "epoch": 0.55, + "grad_norm": 2.2230772972106934, + "learning_rate": 0.00026018357727611015, + "loss": 0.6542, + "step": 1580 + }, + { + "epoch": 0.55, + "grad_norm": 2.5005552768707275, + "learning_rate": 0.0002601463656660878, + "loss": 0.484, + "step": 1581 + }, + { + "epoch": 0.55, + "grad_norm": 2.2738864421844482, + "learning_rate": 0.0002601091540560655, + "loss": 0.5697, + "step": 1582 + }, + { + "epoch": 0.55, + "grad_norm": 2.813744306564331, + "learning_rate": 0.00026007194244604315, + "loss": 0.7979, + "step": 1583 + }, + { + "epoch": 0.56, + "grad_norm": 2.8571112155914307, + "learning_rate": 0.0002600347308360208, + "loss": 0.6285, + "step": 1584 + }, + { + "epoch": 0.56, + "grad_norm": 2.8317923545837402, + "learning_rate": 0.0002599975192259985, + "loss": 0.4895, + "step": 1585 + }, + { + "epoch": 0.56, + "grad_norm": 3.4439423084259033, + "learning_rate": 0.00025996030761597615, + "loss": 0.4935, + "step": 1586 + }, + { + "epoch": 0.56, + "grad_norm": 2.2733328342437744, + "learning_rate": 0.00025992309600595385, + "loss": 0.8773, + "step": 1587 + }, + { + "epoch": 0.56, + "grad_norm": 2.249488592147827, + "learning_rate": 0.0002598858843959315, + "loss": 0.4, + "step": 1588 + }, + { + "epoch": 0.56, + "grad_norm": 1.9486145973205566, + "learning_rate": 0.00025984867278590915, + "loss": 0.48, + "step": 1589 + }, + { + "epoch": 0.56, + "grad_norm": 4.063397407531738, + "learning_rate": 0.00025981146117588685, + "loss": 0.7096, + "step": 1590 + }, + { + "epoch": 0.56, + "grad_norm": 3.8297181129455566, + "learning_rate": 0.0002597742495658645, + "loss": 0.5382, + "step": 1591 + }, + { + "epoch": 0.56, + "grad_norm": 3.13065505027771, + "learning_rate": 0.0002597370379558422, + "loss": 0.6512, + "step": 1592 + }, + { + "epoch": 0.56, + "grad_norm": 3.807405710220337, + "learning_rate": 0.0002596998263458199, + "loss": 0.4922, + "step": 1593 + }, + { + "epoch": 0.56, + "grad_norm": 4.6079254150390625, + "learning_rate": 0.00025966261473579755, + "loss": 0.5638, + "step": 1594 + }, + { + "epoch": 0.56, + "grad_norm": 2.535735607147217, + "learning_rate": 0.0002596254031257752, + "loss": 0.7087, + "step": 1595 + }, + { + "epoch": 0.56, + "grad_norm": 2.7222447395324707, + "learning_rate": 0.0002595881915157529, + "loss": 0.4831, + "step": 1596 + }, + { + "epoch": 0.56, + "grad_norm": 3.0065388679504395, + "learning_rate": 0.00025955097990573055, + "loss": 0.7503, + "step": 1597 + }, + { + "epoch": 0.56, + "grad_norm": 3.3113796710968018, + "learning_rate": 0.00025951376829570826, + "loss": 0.5052, + "step": 1598 + }, + { + "epoch": 0.56, + "grad_norm": 4.114770412445068, + "learning_rate": 0.0002594765566856859, + "loss": 0.6825, + "step": 1599 + }, + { + "epoch": 0.56, + "grad_norm": 3.18342924118042, + "learning_rate": 0.0002594393450756636, + "loss": 0.4084, + "step": 1600 + }, + { + "epoch": 0.56, + "eval_loss": 0.7260645627975464, + "eval_runtime": 49.8256, + "eval_samples_per_second": 43.512, + "eval_steps_per_second": 10.878, + "eval_wer": 0.6434872859366891, + "step": 1600 + }, + { + "epoch": 0.56, + "grad_norm": 2.540189504623413, + "learning_rate": 0.00025940213346564126, + "loss": 0.9917, + "step": 1601 + }, + { + "epoch": 0.56, + "grad_norm": 5.168649673461914, + "learning_rate": 0.0002593649218556189, + "loss": 1.1896, + "step": 1602 + }, + { + "epoch": 0.56, + "grad_norm": 3.469858407974243, + "learning_rate": 0.0002593277102455966, + "loss": 0.588, + "step": 1603 + }, + { + "epoch": 0.56, + "grad_norm": 2.415588617324829, + "learning_rate": 0.00025929049863557426, + "loss": 1.2277, + "step": 1604 + }, + { + "epoch": 0.56, + "grad_norm": 1.8117973804473877, + "learning_rate": 0.00025925328702555196, + "loss": 0.5193, + "step": 1605 + }, + { + "epoch": 0.56, + "grad_norm": 2.193356990814209, + "learning_rate": 0.0002592160754155296, + "loss": 0.5361, + "step": 1606 + }, + { + "epoch": 0.56, + "grad_norm": 4.278368949890137, + "learning_rate": 0.0002591788638055073, + "loss": 0.7885, + "step": 1607 + }, + { + "epoch": 0.56, + "grad_norm": 2.5760748386383057, + "learning_rate": 0.00025914165219548496, + "loss": 0.6032, + "step": 1608 + }, + { + "epoch": 0.56, + "grad_norm": 2.544609308242798, + "learning_rate": 0.00025910444058546266, + "loss": 0.7552, + "step": 1609 + }, + { + "epoch": 0.56, + "grad_norm": 3.166447639465332, + "learning_rate": 0.0002590672289754403, + "loss": 0.9139, + "step": 1610 + }, + { + "epoch": 0.56, + "grad_norm": 5.605889797210693, + "learning_rate": 0.000259030017365418, + "loss": 1.2701, + "step": 1611 + }, + { + "epoch": 0.56, + "grad_norm": 3.2346339225769043, + "learning_rate": 0.00025899280575539566, + "loss": 0.807, + "step": 1612 + }, + { + "epoch": 0.57, + "grad_norm": 2.9761223793029785, + "learning_rate": 0.0002589555941453733, + "loss": 0.6815, + "step": 1613 + }, + { + "epoch": 0.57, + "grad_norm": 2.0151445865631104, + "learning_rate": 0.000258918382535351, + "loss": 0.4635, + "step": 1614 + }, + { + "epoch": 0.57, + "grad_norm": 1.8519450426101685, + "learning_rate": 0.00025888117092532866, + "loss": 0.3119, + "step": 1615 + }, + { + "epoch": 0.57, + "grad_norm": 4.429759502410889, + "learning_rate": 0.00025884395931530637, + "loss": 0.7836, + "step": 1616 + }, + { + "epoch": 0.57, + "grad_norm": 17.506669998168945, + "learning_rate": 0.000258806747705284, + "loss": 0.8412, + "step": 1617 + }, + { + "epoch": 0.57, + "grad_norm": 5.09642219543457, + "learning_rate": 0.0002587695360952617, + "loss": 0.8578, + "step": 1618 + }, + { + "epoch": 0.57, + "grad_norm": 7.458622932434082, + "learning_rate": 0.00025873232448523937, + "loss": 0.9713, + "step": 1619 + }, + { + "epoch": 0.57, + "grad_norm": 4.237120628356934, + "learning_rate": 0.000258695112875217, + "loss": 1.0024, + "step": 1620 + }, + { + "epoch": 0.57, + "grad_norm": 4.813836097717285, + "learning_rate": 0.0002586579012651947, + "loss": 0.8012, + "step": 1621 + }, + { + "epoch": 0.57, + "grad_norm": 6.002732753753662, + "learning_rate": 0.00025862068965517237, + "loss": 1.0697, + "step": 1622 + }, + { + "epoch": 0.57, + "grad_norm": 5.764606475830078, + "learning_rate": 0.00025858347804515007, + "loss": 0.8174, + "step": 1623 + }, + { + "epoch": 0.57, + "grad_norm": 3.286224842071533, + "learning_rate": 0.00025854626643512777, + "loss": 0.3334, + "step": 1624 + }, + { + "epoch": 0.57, + "grad_norm": 3.0024983882904053, + "learning_rate": 0.0002585090548251054, + "loss": 0.3869, + "step": 1625 + }, + { + "epoch": 0.57, + "grad_norm": 2.4190189838409424, + "learning_rate": 0.00025847184321508307, + "loss": 0.7463, + "step": 1626 + }, + { + "epoch": 0.57, + "grad_norm": 2.419149875640869, + "learning_rate": 0.0002584346316050608, + "loss": 0.8414, + "step": 1627 + }, + { + "epoch": 0.57, + "grad_norm": 2.5799694061279297, + "learning_rate": 0.0002583974199950384, + "loss": 0.8311, + "step": 1628 + }, + { + "epoch": 0.57, + "grad_norm": 1.2012771368026733, + "learning_rate": 0.0002583602083850161, + "loss": 0.2703, + "step": 1629 + }, + { + "epoch": 0.57, + "grad_norm": 2.410515785217285, + "learning_rate": 0.0002583229967749938, + "loss": 0.7188, + "step": 1630 + }, + { + "epoch": 0.57, + "grad_norm": 2.3258681297302246, + "learning_rate": 0.0002582857851649714, + "loss": 0.4769, + "step": 1631 + }, + { + "epoch": 0.57, + "grad_norm": 3.23000431060791, + "learning_rate": 0.0002582485735549491, + "loss": 0.3131, + "step": 1632 + }, + { + "epoch": 0.57, + "grad_norm": 3.1836373805999756, + "learning_rate": 0.0002582113619449268, + "loss": 0.91, + "step": 1633 + }, + { + "epoch": 0.57, + "grad_norm": 2.7543880939483643, + "learning_rate": 0.0002581741503349045, + "loss": 1.0753, + "step": 1634 + }, + { + "epoch": 0.57, + "grad_norm": 2.235100030899048, + "learning_rate": 0.0002581369387248821, + "loss": 0.5141, + "step": 1635 + }, + { + "epoch": 0.57, + "grad_norm": 2.6288204193115234, + "learning_rate": 0.00025809972711485983, + "loss": 0.8646, + "step": 1636 + }, + { + "epoch": 0.57, + "grad_norm": 3.652747869491577, + "learning_rate": 0.0002580625155048375, + "loss": 1.2427, + "step": 1637 + }, + { + "epoch": 0.57, + "grad_norm": 5.494747161865234, + "learning_rate": 0.0002580253038948151, + "loss": 0.784, + "step": 1638 + }, + { + "epoch": 0.57, + "grad_norm": 2.1231942176818848, + "learning_rate": 0.00025798809228479283, + "loss": 0.462, + "step": 1639 + }, + { + "epoch": 0.57, + "grad_norm": 4.513335704803467, + "learning_rate": 0.00025795088067477053, + "loss": 0.7612, + "step": 1640 + }, + { + "epoch": 0.57, + "grad_norm": 3.353682279586792, + "learning_rate": 0.0002579136690647482, + "loss": 0.6168, + "step": 1641 + }, + { + "epoch": 0.58, + "grad_norm": 2.6810176372528076, + "learning_rate": 0.0002578764574547259, + "loss": 0.6426, + "step": 1642 + }, + { + "epoch": 0.58, + "grad_norm": 6.6810126304626465, + "learning_rate": 0.00025783924584470353, + "loss": 2.6482, + "step": 1643 + }, + { + "epoch": 0.58, + "grad_norm": 3.389218330383301, + "learning_rate": 0.0002578020342346812, + "loss": 0.3808, + "step": 1644 + }, + { + "epoch": 0.58, + "grad_norm": 3.735605239868164, + "learning_rate": 0.0002577648226246589, + "loss": 0.5647, + "step": 1645 + }, + { + "epoch": 0.58, + "grad_norm": 40.25916290283203, + "learning_rate": 0.00025772761101463653, + "loss": 0.6441, + "step": 1646 + }, + { + "epoch": 0.58, + "grad_norm": 2.7484211921691895, + "learning_rate": 0.00025769039940461423, + "loss": 0.7046, + "step": 1647 + }, + { + "epoch": 0.58, + "grad_norm": 3.8884170055389404, + "learning_rate": 0.0002576531877945919, + "loss": 0.9336, + "step": 1648 + }, + { + "epoch": 0.58, + "grad_norm": 5.420833110809326, + "learning_rate": 0.0002576159761845696, + "loss": 1.2645, + "step": 1649 + }, + { + "epoch": 0.58, + "grad_norm": 4.9601593017578125, + "learning_rate": 0.00025757876457454723, + "loss": 0.7021, + "step": 1650 + }, + { + "epoch": 0.58, + "grad_norm": 2.882598400115967, + "learning_rate": 0.0002575415529645249, + "loss": 1.571, + "step": 1651 + }, + { + "epoch": 0.58, + "grad_norm": 2.408437490463257, + "learning_rate": 0.0002575043413545026, + "loss": 0.6583, + "step": 1652 + }, + { + "epoch": 0.58, + "grad_norm": 2.7199838161468506, + "learning_rate": 0.0002574671297444803, + "loss": 1.0358, + "step": 1653 + }, + { + "epoch": 0.58, + "grad_norm": 2.46736216545105, + "learning_rate": 0.00025742991813445794, + "loss": 0.7032, + "step": 1654 + }, + { + "epoch": 0.58, + "grad_norm": 1.8680689334869385, + "learning_rate": 0.0002573927065244356, + "loss": 0.3942, + "step": 1655 + }, + { + "epoch": 0.58, + "grad_norm": 2.436976671218872, + "learning_rate": 0.0002573554949144133, + "loss": 0.928, + "step": 1656 + }, + { + "epoch": 0.58, + "grad_norm": 1.8157801628112793, + "learning_rate": 0.00025731828330439094, + "loss": 0.5851, + "step": 1657 + }, + { + "epoch": 0.58, + "grad_norm": 2.2245826721191406, + "learning_rate": 0.00025728107169436864, + "loss": 0.829, + "step": 1658 + }, + { + "epoch": 0.58, + "grad_norm": 2.383336067199707, + "learning_rate": 0.0002572438600843463, + "loss": 0.5824, + "step": 1659 + }, + { + "epoch": 0.58, + "grad_norm": 2.794093370437622, + "learning_rate": 0.000257206648474324, + "loss": 0.7381, + "step": 1660 + }, + { + "epoch": 0.58, + "grad_norm": 6.125317573547363, + "learning_rate": 0.00025716943686430164, + "loss": 0.6719, + "step": 1661 + }, + { + "epoch": 0.58, + "grad_norm": 4.031485080718994, + "learning_rate": 0.0002571322252542793, + "loss": 1.0915, + "step": 1662 + }, + { + "epoch": 0.58, + "grad_norm": 2.9629569053649902, + "learning_rate": 0.000257095013644257, + "loss": 0.8506, + "step": 1663 + }, + { + "epoch": 0.58, + "grad_norm": 3.803297281265259, + "learning_rate": 0.00025705780203423464, + "loss": 0.6614, + "step": 1664 + }, + { + "epoch": 0.58, + "grad_norm": 7.903172016143799, + "learning_rate": 0.00025702059042421234, + "loss": 1.5172, + "step": 1665 + }, + { + "epoch": 0.58, + "grad_norm": 4.132394313812256, + "learning_rate": 0.00025698337881419, + "loss": 0.6185, + "step": 1666 + }, + { + "epoch": 0.58, + "grad_norm": 1.9272037744522095, + "learning_rate": 0.0002569461672041677, + "loss": 0.2627, + "step": 1667 + }, + { + "epoch": 0.58, + "grad_norm": 3.5171618461608887, + "learning_rate": 0.00025690895559414534, + "loss": 0.8559, + "step": 1668 + }, + { + "epoch": 0.58, + "grad_norm": 3.361370325088501, + "learning_rate": 0.000256871743984123, + "loss": 0.6559, + "step": 1669 + }, + { + "epoch": 0.59, + "grad_norm": 4.13026762008667, + "learning_rate": 0.0002568345323741007, + "loss": 1.6543, + "step": 1670 + }, + { + "epoch": 0.59, + "grad_norm": 3.8648040294647217, + "learning_rate": 0.0002567973207640784, + "loss": 0.784, + "step": 1671 + }, + { + "epoch": 0.59, + "grad_norm": 4.692941188812256, + "learning_rate": 0.00025676010915405605, + "loss": 0.5386, + "step": 1672 + }, + { + "epoch": 0.59, + "grad_norm": 2.755861520767212, + "learning_rate": 0.0002567228975440337, + "loss": 0.2788, + "step": 1673 + }, + { + "epoch": 0.59, + "grad_norm": 5.936482906341553, + "learning_rate": 0.0002566856859340114, + "loss": 1.7085, + "step": 1674 + }, + { + "epoch": 0.59, + "grad_norm": NaN, + "learning_rate": 0.0002566856859340114, + "loss": 0.1353, + "step": 1675 + }, + { + "epoch": 0.59, + "grad_norm": 2.012645959854126, + "learning_rate": 0.00025664847432398905, + "loss": 0.9096, + "step": 1676 + }, + { + "epoch": 0.59, + "grad_norm": 2.613264799118042, + "learning_rate": 0.00025661126271396675, + "loss": 0.5428, + "step": 1677 + }, + { + "epoch": 0.59, + "grad_norm": 3.0009877681732178, + "learning_rate": 0.0002565740511039444, + "loss": 0.5408, + "step": 1678 + }, + { + "epoch": 0.59, + "grad_norm": 3.448207378387451, + "learning_rate": 0.0002565368394939221, + "loss": 0.5128, + "step": 1679 + }, + { + "epoch": 0.59, + "grad_norm": 1.7628707885742188, + "learning_rate": 0.00025649962788389975, + "loss": 0.2766, + "step": 1680 + }, + { + "epoch": 0.59, + "grad_norm": 18.24505043029785, + "learning_rate": 0.0002564624162738774, + "loss": 4.3011, + "step": 1681 + }, + { + "epoch": 0.59, + "grad_norm": 2.7175798416137695, + "learning_rate": 0.0002564252046638551, + "loss": 0.9797, + "step": 1682 + }, + { + "epoch": 0.59, + "grad_norm": 2.9099786281585693, + "learning_rate": 0.00025638799305383275, + "loss": 0.6673, + "step": 1683 + }, + { + "epoch": 0.59, + "grad_norm": 2.798135280609131, + "learning_rate": 0.00025635078144381045, + "loss": 0.6912, + "step": 1684 + }, + { + "epoch": 0.59, + "grad_norm": 3.6960597038269043, + "learning_rate": 0.00025631356983378816, + "loss": 0.3386, + "step": 1685 + }, + { + "epoch": 0.59, + "grad_norm": 1.622521162033081, + "learning_rate": 0.0002562763582237658, + "loss": 0.3305, + "step": 1686 + }, + { + "epoch": 0.59, + "grad_norm": 1.7763274908065796, + "learning_rate": 0.00025623914661374346, + "loss": 0.3437, + "step": 1687 + }, + { + "epoch": 0.59, + "grad_norm": 2.253156900405884, + "learning_rate": 0.0002562019350037211, + "loss": 0.374, + "step": 1688 + }, + { + "epoch": 0.59, + "grad_norm": 3.9651002883911133, + "learning_rate": 0.0002561647233936988, + "loss": 0.5962, + "step": 1689 + }, + { + "epoch": 0.59, + "grad_norm": 3.5626463890075684, + "learning_rate": 0.0002561275117836765, + "loss": 0.7751, + "step": 1690 + }, + { + "epoch": 0.59, + "grad_norm": 3.0992138385772705, + "learning_rate": 0.00025609030017365416, + "loss": 0.7268, + "step": 1691 + }, + { + "epoch": 0.59, + "grad_norm": 3.610893487930298, + "learning_rate": 0.00025605308856363186, + "loss": 0.3764, + "step": 1692 + }, + { + "epoch": 0.59, + "grad_norm": 5.090748310089111, + "learning_rate": 0.0002560158769536095, + "loss": 1.1423, + "step": 1693 + }, + { + "epoch": 0.59, + "grad_norm": 4.872439861297607, + "learning_rate": 0.00025597866534358716, + "loss": 0.7031, + "step": 1694 + }, + { + "epoch": 0.59, + "grad_norm": 4.71915340423584, + "learning_rate": 0.00025594145373356486, + "loss": 1.1228, + "step": 1695 + }, + { + "epoch": 0.59, + "grad_norm": 3.626314163208008, + "learning_rate": 0.0002559042421235425, + "loss": 0.5694, + "step": 1696 + }, + { + "epoch": 0.59, + "grad_norm": 6.56594181060791, + "learning_rate": 0.0002558670305135202, + "loss": 1.0496, + "step": 1697 + }, + { + "epoch": 0.59, + "grad_norm": 6.181338310241699, + "learning_rate": 0.00025582981890349786, + "loss": 2.4125, + "step": 1698 + }, + { + "epoch": 0.6, + "grad_norm": 2.708691358566284, + "learning_rate": 0.00025579260729347556, + "loss": 0.3454, + "step": 1699 + }, + { + "epoch": 0.6, + "grad_norm": 3.9663429260253906, + "learning_rate": 0.0002557553956834532, + "loss": 0.5035, + "step": 1700 + }, + { + "epoch": 0.6, + "eval_loss": 0.7400824427604675, + "eval_runtime": 50.2926, + "eval_samples_per_second": 43.108, + "eval_steps_per_second": 10.777, + "eval_wer": 0.6105345096004151, + "step": 1700 + }, + { + "epoch": 0.6, + "grad_norm": 2.6513137817382812, + "learning_rate": 0.00025571818407343086, + "loss": 1.472, + "step": 1701 + }, + { + "epoch": 0.6, + "grad_norm": 1.8909034729003906, + "learning_rate": 0.00025568097246340856, + "loss": 0.7735, + "step": 1702 + }, + { + "epoch": 0.6, + "grad_norm": 1.8732975721359253, + "learning_rate": 0.00025564376085338627, + "loss": 0.6441, + "step": 1703 + }, + { + "epoch": 0.6, + "grad_norm": 1.949060082435608, + "learning_rate": 0.0002556065492433639, + "loss": 0.6141, + "step": 1704 + }, + { + "epoch": 0.6, + "grad_norm": 1.6796951293945312, + "learning_rate": 0.00025556933763334157, + "loss": 0.3948, + "step": 1705 + }, + { + "epoch": 0.6, + "grad_norm": 2.200995922088623, + "learning_rate": 0.0002555321260233192, + "loss": 0.4608, + "step": 1706 + }, + { + "epoch": 0.6, + "grad_norm": 2.7621562480926514, + "learning_rate": 0.0002554949144132969, + "loss": 0.6803, + "step": 1707 + }, + { + "epoch": 0.6, + "grad_norm": 4.656009197235107, + "learning_rate": 0.0002554577028032746, + "loss": 0.7062, + "step": 1708 + }, + { + "epoch": 0.6, + "grad_norm": 3.225820302963257, + "learning_rate": 0.00025542049119325227, + "loss": 1.0558, + "step": 1709 + }, + { + "epoch": 0.6, + "grad_norm": 3.8542261123657227, + "learning_rate": 0.00025538327958322997, + "loss": 1.0637, + "step": 1710 + }, + { + "epoch": 0.6, + "grad_norm": 2.9913618564605713, + "learning_rate": 0.0002553460679732076, + "loss": 0.4731, + "step": 1711 + }, + { + "epoch": 0.6, + "grad_norm": 2.990241289138794, + "learning_rate": 0.00025530885636318527, + "loss": 0.6727, + "step": 1712 + }, + { + "epoch": 0.6, + "grad_norm": 2.4291019439697266, + "learning_rate": 0.00025527164475316297, + "loss": 0.4963, + "step": 1713 + }, + { + "epoch": 0.6, + "grad_norm": 2.358236312866211, + "learning_rate": 0.0002552344331431406, + "loss": 0.3102, + "step": 1714 + }, + { + "epoch": 0.6, + "grad_norm": 2.8951985836029053, + "learning_rate": 0.0002551972215331183, + "loss": 0.6374, + "step": 1715 + }, + { + "epoch": 0.6, + "grad_norm": 3.7932424545288086, + "learning_rate": 0.00025516000992309597, + "loss": 0.3472, + "step": 1716 + }, + { + "epoch": 0.6, + "grad_norm": 3.9364991188049316, + "learning_rate": 0.0002551227983130737, + "loss": 1.3303, + "step": 1717 + }, + { + "epoch": 0.6, + "grad_norm": 2.639146566390991, + "learning_rate": 0.0002550855867030513, + "loss": 0.2635, + "step": 1718 + }, + { + "epoch": 0.6, + "grad_norm": 3.6595664024353027, + "learning_rate": 0.00025504837509302897, + "loss": 0.4851, + "step": 1719 + }, + { + "epoch": 0.6, + "grad_norm": 3.7507216930389404, + "learning_rate": 0.0002550111634830067, + "loss": 0.7927, + "step": 1720 + }, + { + "epoch": 0.6, + "grad_norm": 6.605137348175049, + "learning_rate": 0.0002549739518729844, + "loss": 1.9443, + "step": 1721 + }, + { + "epoch": 0.6, + "grad_norm": 3.5037612915039062, + "learning_rate": 0.000254936740262962, + "loss": 0.4611, + "step": 1722 + }, + { + "epoch": 0.6, + "grad_norm": 2.2894933223724365, + "learning_rate": 0.0002548995286529397, + "loss": 0.4025, + "step": 1723 + }, + { + "epoch": 0.6, + "grad_norm": 3.477785587310791, + "learning_rate": 0.0002548623170429174, + "loss": 1.0904, + "step": 1724 + }, + { + "epoch": 0.6, + "grad_norm": 1.9483599662780762, + "learning_rate": 0.000254825105432895, + "loss": 0.276, + "step": 1725 + }, + { + "epoch": 0.6, + "grad_norm": 2.0058553218841553, + "learning_rate": 0.00025478789382287273, + "loss": 0.5288, + "step": 1726 + }, + { + "epoch": 0.61, + "grad_norm": 3.122645616531372, + "learning_rate": 0.0002547506822128504, + "loss": 0.5928, + "step": 1727 + }, + { + "epoch": 0.61, + "grad_norm": 2.18483829498291, + "learning_rate": 0.0002547134706028281, + "loss": 0.579, + "step": 1728 + }, + { + "epoch": 0.61, + "grad_norm": 2.4548044204711914, + "learning_rate": 0.00025467625899280573, + "loss": 0.5435, + "step": 1729 + }, + { + "epoch": 0.61, + "grad_norm": 4.351948261260986, + "learning_rate": 0.0002546390473827834, + "loss": 0.7439, + "step": 1730 + }, + { + "epoch": 0.61, + "grad_norm": 2.7342119216918945, + "learning_rate": 0.0002546018357727611, + "loss": 0.5848, + "step": 1731 + }, + { + "epoch": 0.61, + "grad_norm": 2.438290596008301, + "learning_rate": 0.00025456462416273873, + "loss": 0.4699, + "step": 1732 + }, + { + "epoch": 0.61, + "grad_norm": 2.6222329139709473, + "learning_rate": 0.00025452741255271643, + "loss": 0.267, + "step": 1733 + }, + { + "epoch": 0.61, + "grad_norm": 3.6478686332702637, + "learning_rate": 0.00025449020094269414, + "loss": 0.5306, + "step": 1734 + }, + { + "epoch": 0.61, + "grad_norm": 3.7666268348693848, + "learning_rate": 0.0002544529893326718, + "loss": 0.676, + "step": 1735 + }, + { + "epoch": 0.61, + "grad_norm": 3.8781347274780273, + "learning_rate": 0.00025441577772264943, + "loss": 0.7332, + "step": 1736 + }, + { + "epoch": 0.61, + "grad_norm": 5.474586486816406, + "learning_rate": 0.0002543785661126271, + "loss": 1.1677, + "step": 1737 + }, + { + "epoch": 0.61, + "grad_norm": 3.475315809249878, + "learning_rate": 0.0002543413545026048, + "loss": 0.4186, + "step": 1738 + }, + { + "epoch": 0.61, + "grad_norm": 2.4638478755950928, + "learning_rate": 0.0002543041428925825, + "loss": 0.5006, + "step": 1739 + }, + { + "epoch": 0.61, + "grad_norm": 2.56996750831604, + "learning_rate": 0.00025426693128256014, + "loss": 0.3888, + "step": 1740 + }, + { + "epoch": 0.61, + "grad_norm": 6.963738918304443, + "learning_rate": 0.00025422971967253784, + "loss": 0.6737, + "step": 1741 + }, + { + "epoch": 0.61, + "grad_norm": 2.476889133453369, + "learning_rate": 0.0002541925080625155, + "loss": 0.2271, + "step": 1742 + }, + { + "epoch": 0.61, + "grad_norm": 3.8654210567474365, + "learning_rate": 0.00025415529645249314, + "loss": 1.5383, + "step": 1743 + }, + { + "epoch": 0.61, + "grad_norm": 6.272684097290039, + "learning_rate": 0.00025411808484247084, + "loss": 1.1683, + "step": 1744 + }, + { + "epoch": 0.61, + "grad_norm": 2.0367603302001953, + "learning_rate": 0.0002540808732324485, + "loss": 0.1529, + "step": 1745 + }, + { + "epoch": 0.61, + "grad_norm": 4.270781993865967, + "learning_rate": 0.0002540436616224262, + "loss": 0.3932, + "step": 1746 + }, + { + "epoch": 0.61, + "grad_norm": 3.1253650188446045, + "learning_rate": 0.00025400645001240384, + "loss": 0.2344, + "step": 1747 + }, + { + "epoch": 0.61, + "grad_norm": 4.384814739227295, + "learning_rate": 0.0002539692384023815, + "loss": 0.6099, + "step": 1748 + }, + { + "epoch": 0.61, + "grad_norm": 3.7127060890197754, + "learning_rate": 0.0002539320267923592, + "loss": 0.7978, + "step": 1749 + }, + { + "epoch": 0.61, + "grad_norm": 8.662912368774414, + "learning_rate": 0.00025389481518233684, + "loss": 1.5128, + "step": 1750 + }, + { + "epoch": 0.61, + "grad_norm": 4.237752914428711, + "learning_rate": 0.00025385760357231454, + "loss": 1.3841, + "step": 1751 + }, + { + "epoch": 0.61, + "grad_norm": 6.472276210784912, + "learning_rate": 0.00025382039196229225, + "loss": 1.1862, + "step": 1752 + }, + { + "epoch": 0.61, + "grad_norm": 5.888750076293945, + "learning_rate": 0.0002537831803522699, + "loss": 1.139, + "step": 1753 + }, + { + "epoch": 0.61, + "grad_norm": 4.32738733291626, + "learning_rate": 0.00025374596874224754, + "loss": 0.6998, + "step": 1754 + }, + { + "epoch": 0.61, + "grad_norm": 4.718627452850342, + "learning_rate": 0.0002537087571322252, + "loss": 1.035, + "step": 1755 + }, + { + "epoch": 0.62, + "grad_norm": 2.6345198154449463, + "learning_rate": 0.0002536715455222029, + "loss": 0.8893, + "step": 1756 + }, + { + "epoch": 0.62, + "grad_norm": 2.600464105606079, + "learning_rate": 0.0002536343339121806, + "loss": 0.7878, + "step": 1757 + }, + { + "epoch": 0.62, + "grad_norm": 3.2108471393585205, + "learning_rate": 0.00025359712230215825, + "loss": 0.8279, + "step": 1758 + }, + { + "epoch": 0.62, + "grad_norm": 3.6531155109405518, + "learning_rate": 0.00025355991069213595, + "loss": 1.1393, + "step": 1759 + }, + { + "epoch": 0.62, + "grad_norm": 3.240154504776001, + "learning_rate": 0.0002535226990821136, + "loss": 0.5395, + "step": 1760 + }, + { + "epoch": 0.62, + "grad_norm": 1.4271785020828247, + "learning_rate": 0.00025348548747209125, + "loss": 0.2189, + "step": 1761 + }, + { + "epoch": 0.62, + "grad_norm": 2.7750396728515625, + "learning_rate": 0.00025344827586206895, + "loss": 0.7323, + "step": 1762 + }, + { + "epoch": 0.62, + "grad_norm": 3.3077313899993896, + "learning_rate": 0.0002534110642520466, + "loss": 0.4554, + "step": 1763 + }, + { + "epoch": 0.62, + "grad_norm": 2.405827760696411, + "learning_rate": 0.0002533738526420243, + "loss": 0.6688, + "step": 1764 + }, + { + "epoch": 0.62, + "grad_norm": 2.642482042312622, + "learning_rate": 0.00025333664103200195, + "loss": 0.2194, + "step": 1765 + }, + { + "epoch": 0.62, + "grad_norm": 2.6313064098358154, + "learning_rate": 0.00025329942942197965, + "loss": 0.6633, + "step": 1766 + }, + { + "epoch": 0.62, + "grad_norm": 10.008116722106934, + "learning_rate": 0.0002532622178119573, + "loss": 2.5217, + "step": 1767 + }, + { + "epoch": 0.62, + "grad_norm": 4.850469589233398, + "learning_rate": 0.00025322500620193495, + "loss": 1.1002, + "step": 1768 + }, + { + "epoch": 0.62, + "grad_norm": 4.958583354949951, + "learning_rate": 0.00025318779459191265, + "loss": 0.5638, + "step": 1769 + }, + { + "epoch": 0.62, + "grad_norm": 2.5077438354492188, + "learning_rate": 0.00025315058298189036, + "loss": 0.4724, + "step": 1770 + }, + { + "epoch": 0.62, + "grad_norm": 5.4337687492370605, + "learning_rate": 0.000253113371371868, + "loss": 1.4577, + "step": 1771 + }, + { + "epoch": 0.62, + "grad_norm": 3.459052801132202, + "learning_rate": 0.00025307615976184565, + "loss": 0.6951, + "step": 1772 + }, + { + "epoch": 0.62, + "grad_norm": 8.31182861328125, + "learning_rate": 0.00025303894815182336, + "loss": 1.0552, + "step": 1773 + }, + { + "epoch": 0.62, + "grad_norm": 3.019193172454834, + "learning_rate": 0.000253001736541801, + "loss": 0.2639, + "step": 1774 + }, + { + "epoch": 0.62, + "grad_norm": NaN, + "learning_rate": 0.000253001736541801, + "loss": 0.1885, + "step": 1775 + }, + { + "epoch": 0.62, + "grad_norm": 4.729272842407227, + "learning_rate": 0.0002529645249317787, + "loss": 1.1689, + "step": 1776 + }, + { + "epoch": 0.62, + "grad_norm": 2.806692361831665, + "learning_rate": 0.00025292731332175636, + "loss": 0.9207, + "step": 1777 + }, + { + "epoch": 0.62, + "grad_norm": 3.110698699951172, + "learning_rate": 0.00025289010171173406, + "loss": 0.6481, + "step": 1778 + }, + { + "epoch": 0.62, + "grad_norm": 3.653905153274536, + "learning_rate": 0.0002528528901017117, + "loss": 0.7404, + "step": 1779 + }, + { + "epoch": 0.62, + "grad_norm": 3.4081614017486572, + "learning_rate": 0.00025281567849168936, + "loss": 0.7589, + "step": 1780 + }, + { + "epoch": 0.62, + "grad_norm": 2.3491172790527344, + "learning_rate": 0.00025277846688166706, + "loss": 0.5285, + "step": 1781 + }, + { + "epoch": 0.62, + "grad_norm": 2.7228968143463135, + "learning_rate": 0.0002527412552716447, + "loss": 0.947, + "step": 1782 + }, + { + "epoch": 0.62, + "grad_norm": 3.284248113632202, + "learning_rate": 0.0002527040436616224, + "loss": 0.4871, + "step": 1783 + }, + { + "epoch": 0.63, + "grad_norm": 3.298612117767334, + "learning_rate": 0.0002526668320516001, + "loss": 0.6996, + "step": 1784 + }, + { + "epoch": 0.63, + "grad_norm": 1.53757643699646, + "learning_rate": 0.00025262962044157776, + "loss": 0.3541, + "step": 1785 + }, + { + "epoch": 0.63, + "grad_norm": 2.7956089973449707, + "learning_rate": 0.0002525924088315554, + "loss": 0.3465, + "step": 1786 + }, + { + "epoch": 0.63, + "grad_norm": 4.37371826171875, + "learning_rate": 0.0002525551972215331, + "loss": 0.5631, + "step": 1787 + }, + { + "epoch": 0.63, + "grad_norm": 3.794459819793701, + "learning_rate": 0.00025251798561151076, + "loss": 0.8957, + "step": 1788 + }, + { + "epoch": 0.63, + "grad_norm": 2.2786507606506348, + "learning_rate": 0.00025248077400148847, + "loss": 0.5798, + "step": 1789 + }, + { + "epoch": 0.63, + "grad_norm": 2.390256404876709, + "learning_rate": 0.0002524435623914661, + "loss": 0.4864, + "step": 1790 + }, + { + "epoch": 0.63, + "grad_norm": 4.0629777908325195, + "learning_rate": 0.00025240635078144376, + "loss": 0.6592, + "step": 1791 + }, + { + "epoch": 0.63, + "grad_norm": 3.212480068206787, + "learning_rate": 0.00025236913917142147, + "loss": 1.3613, + "step": 1792 + }, + { + "epoch": 0.63, + "grad_norm": 4.64257287979126, + "learning_rate": 0.0002523319275613991, + "loss": 0.4347, + "step": 1793 + }, + { + "epoch": 0.63, + "grad_norm": 4.668285369873047, + "learning_rate": 0.0002522947159513768, + "loss": 0.7722, + "step": 1794 + }, + { + "epoch": 0.63, + "grad_norm": 3.8437860012054443, + "learning_rate": 0.00025225750434135447, + "loss": 0.3561, + "step": 1795 + }, + { + "epoch": 0.63, + "grad_norm": 3.092412233352661, + "learning_rate": 0.00025222029273133217, + "loss": 0.4881, + "step": 1796 + }, + { + "epoch": 0.63, + "grad_norm": 1.986132264137268, + "learning_rate": 0.0002521830811213098, + "loss": 0.1549, + "step": 1797 + }, + { + "epoch": 0.63, + "grad_norm": 2.6645147800445557, + "learning_rate": 0.00025214586951128747, + "loss": 0.4356, + "step": 1798 + }, + { + "epoch": 0.63, + "grad_norm": 3.3920583724975586, + "learning_rate": 0.00025210865790126517, + "loss": 0.3476, + "step": 1799 + }, + { + "epoch": 0.63, + "grad_norm": 3.257504463195801, + "learning_rate": 0.0002520714462912429, + "loss": 0.6923, + "step": 1800 + }, + { + "epoch": 0.63, + "eval_loss": 0.6619295477867126, + "eval_runtime": 50.7331, + "eval_samples_per_second": 42.733, + "eval_steps_per_second": 10.683, + "eval_wer": 0.5835495588998443, + "step": 1800 + }, + { + "epoch": 0.63, + "grad_norm": 3.3466756343841553, + "learning_rate": 0.0002520342346812205, + "loss": 0.6547, + "step": 1801 + }, + { + "epoch": 0.63, + "grad_norm": 3.27302885055542, + "learning_rate": 0.0002519970230711982, + "loss": 1.0436, + "step": 1802 + }, + { + "epoch": 0.63, + "grad_norm": 2.4561309814453125, + "learning_rate": 0.0002519598114611759, + "loss": 0.8965, + "step": 1803 + }, + { + "epoch": 0.63, + "grad_norm": 3.3739259243011475, + "learning_rate": 0.0002519225998511535, + "loss": 0.705, + "step": 1804 + }, + { + "epoch": 0.63, + "grad_norm": 2.3616445064544678, + "learning_rate": 0.0002518853882411312, + "loss": 0.8433, + "step": 1805 + }, + { + "epoch": 0.63, + "grad_norm": 2.9913322925567627, + "learning_rate": 0.0002518481766311089, + "loss": 0.4734, + "step": 1806 + }, + { + "epoch": 0.63, + "grad_norm": 2.3768415451049805, + "learning_rate": 0.0002518109650210866, + "loss": 0.4895, + "step": 1807 + }, + { + "epoch": 0.63, + "grad_norm": 2.2762320041656494, + "learning_rate": 0.0002517737534110642, + "loss": 0.621, + "step": 1808 + }, + { + "epoch": 0.63, + "grad_norm": 3.1981852054595947, + "learning_rate": 0.00025173654180104193, + "loss": 0.5364, + "step": 1809 + }, + { + "epoch": 0.63, + "grad_norm": 2.924687147140503, + "learning_rate": 0.0002516993301910196, + "loss": 0.2811, + "step": 1810 + }, + { + "epoch": 0.63, + "grad_norm": 2.7158939838409424, + "learning_rate": 0.0002516621185809972, + "loss": 0.6797, + "step": 1811 + }, + { + "epoch": 0.63, + "grad_norm": 2.226498603820801, + "learning_rate": 0.00025162490697097493, + "loss": 0.3998, + "step": 1812 + }, + { + "epoch": 0.64, + "grad_norm": 2.0350279808044434, + "learning_rate": 0.0002515876953609526, + "loss": 0.2877, + "step": 1813 + }, + { + "epoch": 0.64, + "grad_norm": 4.871344089508057, + "learning_rate": 0.0002515504837509303, + "loss": 0.6181, + "step": 1814 + }, + { + "epoch": 0.64, + "grad_norm": 3.3787949085235596, + "learning_rate": 0.00025151327214090793, + "loss": 0.7396, + "step": 1815 + }, + { + "epoch": 0.64, + "grad_norm": 2.748871326446533, + "learning_rate": 0.0002514760605308856, + "loss": 0.2645, + "step": 1816 + }, + { + "epoch": 0.64, + "grad_norm": 3.798830986022949, + "learning_rate": 0.0002514388489208633, + "loss": 0.5294, + "step": 1817 + }, + { + "epoch": 0.64, + "grad_norm": 4.125420570373535, + "learning_rate": 0.000251401637310841, + "loss": 0.565, + "step": 1818 + }, + { + "epoch": 0.64, + "grad_norm": 4.471548080444336, + "learning_rate": 0.00025136442570081863, + "loss": 0.5297, + "step": 1819 + }, + { + "epoch": 0.64, + "grad_norm": 1.885577917098999, + "learning_rate": 0.00025132721409079633, + "loss": 0.3337, + "step": 1820 + }, + { + "epoch": 0.64, + "grad_norm": 1.8695416450500488, + "learning_rate": 0.000251290002480774, + "loss": 0.2729, + "step": 1821 + }, + { + "epoch": 0.64, + "grad_norm": 4.600594520568848, + "learning_rate": 0.00025125279087075163, + "loss": 1.8971, + "step": 1822 + }, + { + "epoch": 0.64, + "grad_norm": 6.382452011108398, + "learning_rate": 0.00025121557926072933, + "loss": 2.4557, + "step": 1823 + }, + { + "epoch": 0.64, + "grad_norm": 7.36460542678833, + "learning_rate": 0.000251178367650707, + "loss": 2.337, + "step": 1824 + }, + { + "epoch": 0.64, + "grad_norm": 5.724518299102783, + "learning_rate": 0.0002511411560406847, + "loss": 1.9224, + "step": 1825 + }, + { + "epoch": 0.64, + "grad_norm": 3.918628454208374, + "learning_rate": 0.00025110394443066234, + "loss": 1.1783, + "step": 1826 + }, + { + "epoch": 0.64, + "grad_norm": 3.5602731704711914, + "learning_rate": 0.00025106673282064004, + "loss": 1.2087, + "step": 1827 + }, + { + "epoch": 0.64, + "grad_norm": 1.9868627786636353, + "learning_rate": 0.0002510295212106177, + "loss": 0.5769, + "step": 1828 + }, + { + "epoch": 0.64, + "grad_norm": 3.202514410018921, + "learning_rate": 0.00025099230960059534, + "loss": 1.2293, + "step": 1829 + }, + { + "epoch": 0.64, + "grad_norm": 1.9510756731033325, + "learning_rate": 0.00025095509799057304, + "loss": 0.4025, + "step": 1830 + }, + { + "epoch": 0.64, + "grad_norm": 2.144929885864258, + "learning_rate": 0.00025091788638055074, + "loss": 0.4471, + "step": 1831 + }, + { + "epoch": 0.64, + "grad_norm": 1.4620763063430786, + "learning_rate": 0.0002508806747705284, + "loss": 0.2977, + "step": 1832 + }, + { + "epoch": 0.64, + "grad_norm": 2.262373208999634, + "learning_rate": 0.00025084346316050604, + "loss": 0.6991, + "step": 1833 + }, + { + "epoch": 0.64, + "grad_norm": 3.069397211074829, + "learning_rate": 0.00025080625155048374, + "loss": 0.4164, + "step": 1834 + }, + { + "epoch": 0.64, + "grad_norm": 13.504817008972168, + "learning_rate": 0.0002507690399404614, + "loss": 3.6967, + "step": 1835 + }, + { + "epoch": 0.64, + "grad_norm": 3.4897937774658203, + "learning_rate": 0.0002507318283304391, + "loss": 0.8833, + "step": 1836 + }, + { + "epoch": 0.64, + "grad_norm": 3.5838449001312256, + "learning_rate": 0.00025069461672041674, + "loss": 0.6879, + "step": 1837 + }, + { + "epoch": 0.64, + "grad_norm": 4.084447860717773, + "learning_rate": 0.00025065740511039444, + "loss": 0.9948, + "step": 1838 + }, + { + "epoch": 0.64, + "grad_norm": 3.7266480922698975, + "learning_rate": 0.0002506201935003721, + "loss": 0.978, + "step": 1839 + }, + { + "epoch": 0.64, + "grad_norm": 2.592857599258423, + "learning_rate": 0.00025058298189034974, + "loss": 0.4904, + "step": 1840 + }, + { + "epoch": 0.65, + "grad_norm": 3.836439609527588, + "learning_rate": 0.00025054577028032744, + "loss": 0.4452, + "step": 1841 + }, + { + "epoch": 0.65, + "grad_norm": 4.47084379196167, + "learning_rate": 0.0002505085586703051, + "loss": 0.7633, + "step": 1842 + }, + { + "epoch": 0.65, + "grad_norm": 3.29998517036438, + "learning_rate": 0.0002504713470602828, + "loss": 1.2435, + "step": 1843 + }, + { + "epoch": 0.65, + "grad_norm": 2.52744460105896, + "learning_rate": 0.0002504341354502605, + "loss": 0.3426, + "step": 1844 + }, + { + "epoch": 0.65, + "grad_norm": 2.3847482204437256, + "learning_rate": 0.00025039692384023815, + "loss": 0.2179, + "step": 1845 + }, + { + "epoch": 0.65, + "grad_norm": 5.141510009765625, + "learning_rate": 0.0002503597122302158, + "loss": 0.6102, + "step": 1846 + }, + { + "epoch": 0.65, + "grad_norm": 4.136857509613037, + "learning_rate": 0.00025032250062019345, + "loss": 1.0573, + "step": 1847 + }, + { + "epoch": 0.65, + "grad_norm": 2.8954155445098877, + "learning_rate": 0.00025028528901017115, + "loss": 0.4377, + "step": 1848 + }, + { + "epoch": 0.65, + "grad_norm": 2.4569027423858643, + "learning_rate": 0.00025024807740014885, + "loss": 0.2517, + "step": 1849 + }, + { + "epoch": 0.65, + "grad_norm": 4.620333671569824, + "learning_rate": 0.0002502108657901265, + "loss": 1.7501, + "step": 1850 + }, + { + "epoch": 0.65, + "grad_norm": 2.291147470474243, + "learning_rate": 0.0002501736541801042, + "loss": 1.2419, + "step": 1851 + }, + { + "epoch": 0.65, + "grad_norm": 3.5810606479644775, + "learning_rate": 0.00025013644257008185, + "loss": 1.0098, + "step": 1852 + }, + { + "epoch": 0.65, + "grad_norm": 3.492764472961426, + "learning_rate": 0.0002500992309600595, + "loss": 0.8263, + "step": 1853 + }, + { + "epoch": 0.65, + "grad_norm": 2.030451536178589, + "learning_rate": 0.0002500620193500372, + "loss": 0.7977, + "step": 1854 + }, + { + "epoch": 0.65, + "grad_norm": 3.7013792991638184, + "learning_rate": 0.00025002480774001485, + "loss": 0.7732, + "step": 1855 + }, + { + "epoch": 0.65, + "grad_norm": 2.162282943725586, + "learning_rate": 0.00024998759612999255, + "loss": 0.8146, + "step": 1856 + }, + { + "epoch": 0.65, + "grad_norm": 3.0203332901000977, + "learning_rate": 0.0002499503845199702, + "loss": 0.8234, + "step": 1857 + }, + { + "epoch": 0.65, + "grad_norm": 2.6350369453430176, + "learning_rate": 0.00024991317290994785, + "loss": 0.3615, + "step": 1858 + }, + { + "epoch": 0.65, + "grad_norm": 3.785946846008301, + "learning_rate": 0.00024987596129992556, + "loss": 0.6719, + "step": 1859 + }, + { + "epoch": 0.65, + "grad_norm": 4.30195426940918, + "learning_rate": 0.0002498387496899032, + "loss": 0.4817, + "step": 1860 + }, + { + "epoch": 0.65, + "grad_norm": 2.285341262817383, + "learning_rate": 0.0002498015380798809, + "loss": 0.4845, + "step": 1861 + }, + { + "epoch": 0.65, + "grad_norm": 2.9334990978240967, + "learning_rate": 0.0002497643264698586, + "loss": 0.6951, + "step": 1862 + }, + { + "epoch": 0.65, + "grad_norm": 1.6328988075256348, + "learning_rate": 0.00024972711485983626, + "loss": 0.3517, + "step": 1863 + }, + { + "epoch": 0.65, + "grad_norm": 3.6665728092193604, + "learning_rate": 0.0002496899032498139, + "loss": 1.0003, + "step": 1864 + }, + { + "epoch": 0.65, + "grad_norm": 2.054015636444092, + "learning_rate": 0.00024965269163979156, + "loss": 0.4366, + "step": 1865 + }, + { + "epoch": 0.65, + "grad_norm": 3.34871506690979, + "learning_rate": 0.00024961548002976926, + "loss": 0.3632, + "step": 1866 + }, + { + "epoch": 0.65, + "grad_norm": 1.7466020584106445, + "learning_rate": 0.00024957826841974696, + "loss": 0.2544, + "step": 1867 + }, + { + "epoch": 0.65, + "grad_norm": 2.3395614624023438, + "learning_rate": 0.0002495410568097246, + "loss": 0.2607, + "step": 1868 + }, + { + "epoch": 0.65, + "grad_norm": 5.0486836433410645, + "learning_rate": 0.0002495038451997023, + "loss": 0.6904, + "step": 1869 + }, + { + "epoch": 0.66, + "grad_norm": 5.611022472381592, + "learning_rate": 0.00024946663358967996, + "loss": 0.643, + "step": 1870 + }, + { + "epoch": 0.66, + "grad_norm": 1.7561452388763428, + "learning_rate": 0.0002494294219796576, + "loss": 0.2442, + "step": 1871 + }, + { + "epoch": 0.66, + "grad_norm": 5.131147384643555, + "learning_rate": 0.0002493922103696353, + "loss": 0.6293, + "step": 1872 + }, + { + "epoch": 0.66, + "grad_norm": 6.987359523773193, + "learning_rate": 0.00024935499875961296, + "loss": 0.5611, + "step": 1873 + }, + { + "epoch": 0.66, + "grad_norm": 5.005058765411377, + "learning_rate": 0.00024931778714959066, + "loss": 1.9181, + "step": 1874 + }, + { + "epoch": 0.66, + "grad_norm": 5.872385501861572, + "learning_rate": 0.0002492805755395683, + "loss": 0.4414, + "step": 1875 + }, + { + "epoch": 0.66, + "grad_norm": 3.087235927581787, + "learning_rate": 0.000249243363929546, + "loss": 1.1648, + "step": 1876 + }, + { + "epoch": 0.66, + "grad_norm": 2.866982936859131, + "learning_rate": 0.00024920615231952367, + "loss": 0.8997, + "step": 1877 + }, + { + "epoch": 0.66, + "grad_norm": 3.6549713611602783, + "learning_rate": 0.0002491689407095013, + "loss": 0.7768, + "step": 1878 + }, + { + "epoch": 0.66, + "grad_norm": 3.923379421234131, + "learning_rate": 0.000249131729099479, + "loss": 0.7754, + "step": 1879 + }, + { + "epoch": 0.66, + "grad_norm": 3.286487340927124, + "learning_rate": 0.0002490945174894567, + "loss": 0.5886, + "step": 1880 + }, + { + "epoch": 0.66, + "grad_norm": 2.357010841369629, + "learning_rate": 0.00024905730587943437, + "loss": 0.5951, + "step": 1881 + }, + { + "epoch": 0.66, + "grad_norm": 1.6229811906814575, + "learning_rate": 0.000249020094269412, + "loss": 0.4376, + "step": 1882 + }, + { + "epoch": 0.66, + "grad_norm": 2.4344239234924316, + "learning_rate": 0.0002489828826593897, + "loss": 0.7466, + "step": 1883 + }, + { + "epoch": 0.66, + "grad_norm": 6.589191436767578, + "learning_rate": 0.00024894567104936737, + "loss": 0.6586, + "step": 1884 + }, + { + "epoch": 0.66, + "grad_norm": 3.2534403800964355, + "learning_rate": 0.00024890845943934507, + "loss": 0.7062, + "step": 1885 + }, + { + "epoch": 0.66, + "grad_norm": 2.400862455368042, + "learning_rate": 0.0002488712478293227, + "loss": 0.306, + "step": 1886 + }, + { + "epoch": 0.66, + "grad_norm": 2.253848075866699, + "learning_rate": 0.0002488340362193004, + "loss": 0.6433, + "step": 1887 + }, + { + "epoch": 0.66, + "grad_norm": 2.553795337677002, + "learning_rate": 0.00024879682460927807, + "loss": 0.4091, + "step": 1888 + }, + { + "epoch": 0.66, + "grad_norm": 2.4395694732666016, + "learning_rate": 0.0002487596129992557, + "loss": 0.5482, + "step": 1889 + }, + { + "epoch": 0.66, + "grad_norm": 3.8321399688720703, + "learning_rate": 0.0002487224013892334, + "loss": 0.6352, + "step": 1890 + }, + { + "epoch": 0.66, + "grad_norm": 2.5882153511047363, + "learning_rate": 0.00024868518977921107, + "loss": 0.4969, + "step": 1891 + }, + { + "epoch": 0.66, + "grad_norm": 4.412918567657471, + "learning_rate": 0.0002486479781691888, + "loss": 1.0272, + "step": 1892 + }, + { + "epoch": 0.66, + "grad_norm": 5.513781547546387, + "learning_rate": 0.0002486107665591665, + "loss": 1.2136, + "step": 1893 + }, + { + "epoch": 0.66, + "grad_norm": 2.6732091903686523, + "learning_rate": 0.0002485735549491441, + "loss": 0.3256, + "step": 1894 + }, + { + "epoch": 0.66, + "grad_norm": 3.6457021236419678, + "learning_rate": 0.0002485363433391218, + "loss": 0.4514, + "step": 1895 + }, + { + "epoch": 0.66, + "grad_norm": 3.561769485473633, + "learning_rate": 0.0002484991317290994, + "loss": 0.6833, + "step": 1896 + }, + { + "epoch": 0.66, + "grad_norm": 2.9853644371032715, + "learning_rate": 0.0002484619201190771, + "loss": 1.2547, + "step": 1897 + }, + { + "epoch": 0.67, + "grad_norm": 1.9570162296295166, + "learning_rate": 0.00024842470850905483, + "loss": 0.2446, + "step": 1898 + }, + { + "epoch": 0.67, + "grad_norm": 2.1816675662994385, + "learning_rate": 0.0002483874968990325, + "loss": 0.4237, + "step": 1899 + }, + { + "epoch": 0.67, + "grad_norm": 8.543338775634766, + "learning_rate": 0.0002483502852890101, + "loss": 1.1266, + "step": 1900 + }, + { + "epoch": 0.67, + "eval_loss": 0.6919850707054138, + "eval_runtime": 51.2516, + "eval_samples_per_second": 42.301, + "eval_steps_per_second": 10.575, + "eval_wer": 0.6012800553537451, + "step": 1900 + }, + { + "epoch": 0.67, + "grad_norm": 2.464935302734375, + "learning_rate": 0.00024831307367898783, + "loss": 0.9893, + "step": 1901 + }, + { + "epoch": 0.67, + "grad_norm": 2.831908941268921, + "learning_rate": 0.0002482758620689655, + "loss": 0.9191, + "step": 1902 + }, + { + "epoch": 0.67, + "grad_norm": 4.71111536026001, + "learning_rate": 0.0002482386504589432, + "loss": 1.5041, + "step": 1903 + }, + { + "epoch": 0.67, + "grad_norm": 2.486187219619751, + "learning_rate": 0.00024820143884892083, + "loss": 0.4669, + "step": 1904 + }, + { + "epoch": 0.67, + "grad_norm": 2.497227191925049, + "learning_rate": 0.00024816422723889853, + "loss": 0.8797, + "step": 1905 + }, + { + "epoch": 0.67, + "grad_norm": 3.937802791595459, + "learning_rate": 0.0002481270156288762, + "loss": 0.7039, + "step": 1906 + }, + { + "epoch": 0.67, + "grad_norm": 2.5400185585021973, + "learning_rate": 0.00024808980401885383, + "loss": 0.7996, + "step": 1907 + }, + { + "epoch": 0.67, + "grad_norm": 2.81622576713562, + "learning_rate": 0.00024805259240883153, + "loss": 0.5797, + "step": 1908 + }, + { + "epoch": 0.67, + "grad_norm": 2.533505916595459, + "learning_rate": 0.0002480153807988092, + "loss": 0.7682, + "step": 1909 + }, + { + "epoch": 0.67, + "grad_norm": 2.5406887531280518, + "learning_rate": 0.0002479781691887869, + "loss": 0.8529, + "step": 1910 + }, + { + "epoch": 0.67, + "grad_norm": 2.635380268096924, + "learning_rate": 0.0002479409575787646, + "loss": 0.5315, + "step": 1911 + }, + { + "epoch": 0.67, + "grad_norm": 3.293522834777832, + "learning_rate": 0.00024790374596874224, + "loss": 0.762, + "step": 1912 + }, + { + "epoch": 0.67, + "grad_norm": 3.7569010257720947, + "learning_rate": 0.0002478665343587199, + "loss": 0.8445, + "step": 1913 + }, + { + "epoch": 0.67, + "grad_norm": 4.208041667938232, + "learning_rate": 0.00024782932274869753, + "loss": 0.7308, + "step": 1914 + }, + { + "epoch": 0.67, + "grad_norm": 2.8499300479888916, + "learning_rate": 0.00024779211113867524, + "loss": 0.7733, + "step": 1915 + }, + { + "epoch": 0.67, + "grad_norm": 1.9192874431610107, + "learning_rate": 0.00024775489952865294, + "loss": 0.3927, + "step": 1916 + }, + { + "epoch": 0.67, + "grad_norm": 2.4525372982025146, + "learning_rate": 0.0002477176879186306, + "loss": 0.2387, + "step": 1917 + }, + { + "epoch": 0.67, + "grad_norm": 2.631014347076416, + "learning_rate": 0.0002476804763086083, + "loss": 0.7168, + "step": 1918 + }, + { + "epoch": 0.67, + "grad_norm": 1.9727121591567993, + "learning_rate": 0.00024764326469858594, + "loss": 0.5055, + "step": 1919 + }, + { + "epoch": 0.67, + "grad_norm": 8.828970909118652, + "learning_rate": 0.0002476060530885636, + "loss": 1.4732, + "step": 1920 + }, + { + "epoch": 0.67, + "grad_norm": 4.060351848602295, + "learning_rate": 0.0002475688414785413, + "loss": 1.418, + "step": 1921 + }, + { + "epoch": 0.67, + "grad_norm": 2.0158917903900146, + "learning_rate": 0.00024753162986851894, + "loss": 0.2002, + "step": 1922 + }, + { + "epoch": 0.67, + "grad_norm": 3.4808578491210938, + "learning_rate": 0.00024749441825849664, + "loss": 0.5887, + "step": 1923 + }, + { + "epoch": 0.67, + "grad_norm": 5.383602142333984, + "learning_rate": 0.0002474572066484743, + "loss": 1.5568, + "step": 1924 + }, + { + "epoch": 0.67, + "grad_norm": 2.389026165008545, + "learning_rate": 0.000247419995038452, + "loss": 0.3765, + "step": 1925 + }, + { + "epoch": 0.67, + "grad_norm": 2.9787375926971436, + "learning_rate": 0.00024738278342842964, + "loss": 0.9215, + "step": 1926 + }, + { + "epoch": 0.68, + "grad_norm": 1.7015265226364136, + "learning_rate": 0.0002473455718184073, + "loss": 0.5214, + "step": 1927 + }, + { + "epoch": 0.68, + "grad_norm": 1.7144050598144531, + "learning_rate": 0.000247308360208385, + "loss": 0.6626, + "step": 1928 + }, + { + "epoch": 0.68, + "grad_norm": 1.8831634521484375, + "learning_rate": 0.0002472711485983627, + "loss": 0.7753, + "step": 1929 + }, + { + "epoch": 0.68, + "grad_norm": 2.180262565612793, + "learning_rate": 0.00024723393698834035, + "loss": 0.5357, + "step": 1930 + }, + { + "epoch": 0.68, + "grad_norm": 2.9402575492858887, + "learning_rate": 0.000247196725378318, + "loss": 0.4598, + "step": 1931 + }, + { + "epoch": 0.68, + "grad_norm": 2.6572482585906982, + "learning_rate": 0.0002471595137682957, + "loss": 0.5624, + "step": 1932 + }, + { + "epoch": 0.68, + "grad_norm": 2.5817763805389404, + "learning_rate": 0.00024712230215827335, + "loss": 0.6215, + "step": 1933 + }, + { + "epoch": 0.68, + "grad_norm": 3.858384132385254, + "learning_rate": 0.00024708509054825105, + "loss": 0.879, + "step": 1934 + }, + { + "epoch": 0.68, + "grad_norm": 2.5925073623657227, + "learning_rate": 0.0002470478789382287, + "loss": 0.7597, + "step": 1935 + }, + { + "epoch": 0.68, + "grad_norm": 1.7075647115707397, + "learning_rate": 0.0002470106673282064, + "loss": 0.3585, + "step": 1936 + }, + { + "epoch": 0.68, + "grad_norm": 2.2654693126678467, + "learning_rate": 0.00024697345571818405, + "loss": 0.4981, + "step": 1937 + }, + { + "epoch": 0.68, + "grad_norm": 2.9442179203033447, + "learning_rate": 0.0002469362441081617, + "loss": 0.5214, + "step": 1938 + }, + { + "epoch": 0.68, + "grad_norm": 2.89056396484375, + "learning_rate": 0.0002468990324981394, + "loss": 0.4043, + "step": 1939 + }, + { + "epoch": 0.68, + "grad_norm": 2.2005202770233154, + "learning_rate": 0.00024686182088811705, + "loss": 0.2301, + "step": 1940 + }, + { + "epoch": 0.68, + "grad_norm": 4.534944534301758, + "learning_rate": 0.00024682460927809475, + "loss": 0.8182, + "step": 1941 + }, + { + "epoch": 0.68, + "grad_norm": 3.317547082901001, + "learning_rate": 0.0002467873976680724, + "loss": 0.8466, + "step": 1942 + }, + { + "epoch": 0.68, + "grad_norm": 3.431687593460083, + "learning_rate": 0.0002467501860580501, + "loss": 0.4799, + "step": 1943 + }, + { + "epoch": 0.68, + "grad_norm": 2.7903997898101807, + "learning_rate": 0.00024671297444802775, + "loss": 0.4264, + "step": 1944 + }, + { + "epoch": 0.68, + "grad_norm": 2.921651840209961, + "learning_rate": 0.0002466757628380054, + "loss": 0.3343, + "step": 1945 + }, + { + "epoch": 0.68, + "grad_norm": 4.756716728210449, + "learning_rate": 0.0002466385512279831, + "loss": 1.4059, + "step": 1946 + }, + { + "epoch": 0.68, + "grad_norm": 2.8157451152801514, + "learning_rate": 0.0002466013396179608, + "loss": 0.2149, + "step": 1947 + }, + { + "epoch": 0.68, + "grad_norm": 4.292349338531494, + "learning_rate": 0.00024656412800793846, + "loss": 0.6947, + "step": 1948 + }, + { + "epoch": 0.68, + "grad_norm": 5.523711204528809, + "learning_rate": 0.0002465269163979161, + "loss": 1.8716, + "step": 1949 + }, + { + "epoch": 0.68, + "grad_norm": 5.184019088745117, + "learning_rate": 0.0002464897047878938, + "loss": 1.5156, + "step": 1950 + }, + { + "epoch": 0.68, + "grad_norm": 9.27737808227539, + "learning_rate": 0.00024645249317787146, + "loss": 1.8832, + "step": 1951 + }, + { + "epoch": 0.68, + "grad_norm": 11.288307189941406, + "learning_rate": 0.00024641528156784916, + "loss": 1.2195, + "step": 1952 + }, + { + "epoch": 0.68, + "grad_norm": 6.941704273223877, + "learning_rate": 0.0002463780699578268, + "loss": 1.0644, + "step": 1953 + }, + { + "epoch": 0.68, + "grad_norm": 5.17105770111084, + "learning_rate": 0.0002463408583478045, + "loss": 1.0977, + "step": 1954 + }, + { + "epoch": 0.69, + "grad_norm": 2.853844165802002, + "learning_rate": 0.00024630364673778216, + "loss": 0.734, + "step": 1955 + }, + { + "epoch": 0.69, + "grad_norm": 3.7086217403411865, + "learning_rate": 0.0002462664351277598, + "loss": 0.9751, + "step": 1956 + }, + { + "epoch": 0.69, + "grad_norm": 2.8849620819091797, + "learning_rate": 0.0002462292235177375, + "loss": 0.6382, + "step": 1957 + }, + { + "epoch": 0.69, + "grad_norm": 2.1819937229156494, + "learning_rate": 0.00024619201190771516, + "loss": 0.5407, + "step": 1958 + }, + { + "epoch": 0.69, + "grad_norm": 3.5230162143707275, + "learning_rate": 0.00024615480029769286, + "loss": 1.6646, + "step": 1959 + }, + { + "epoch": 0.69, + "grad_norm": 2.293956995010376, + "learning_rate": 0.00024611758868767057, + "loss": 0.5963, + "step": 1960 + }, + { + "epoch": 0.69, + "grad_norm": 2.532461643218994, + "learning_rate": 0.0002460803770776482, + "loss": 0.7947, + "step": 1961 + }, + { + "epoch": 0.69, + "grad_norm": 2.1214241981506348, + "learning_rate": 0.00024604316546762586, + "loss": 0.6076, + "step": 1962 + }, + { + "epoch": 0.69, + "grad_norm": 3.7289674282073975, + "learning_rate": 0.00024600595385760357, + "loss": 0.9689, + "step": 1963 + }, + { + "epoch": 0.69, + "grad_norm": 2.5347867012023926, + "learning_rate": 0.0002459687422475812, + "loss": 0.3691, + "step": 1964 + }, + { + "epoch": 0.69, + "grad_norm": 1.878653645515442, + "learning_rate": 0.0002459315306375589, + "loss": 0.3384, + "step": 1965 + }, + { + "epoch": 0.69, + "grad_norm": 1.7828047275543213, + "learning_rate": 0.00024589431902753657, + "loss": 0.6173, + "step": 1966 + }, + { + "epoch": 0.69, + "grad_norm": 2.254936456680298, + "learning_rate": 0.00024585710741751427, + "loss": 0.8577, + "step": 1967 + }, + { + "epoch": 0.69, + "grad_norm": 1.9848906993865967, + "learning_rate": 0.0002458198958074919, + "loss": 0.2193, + "step": 1968 + }, + { + "epoch": 0.69, + "grad_norm": 3.952481985092163, + "learning_rate": 0.00024578268419746957, + "loss": 1.0369, + "step": 1969 + }, + { + "epoch": 0.69, + "grad_norm": 3.548969030380249, + "learning_rate": 0.00024574547258744727, + "loss": 0.749, + "step": 1970 + }, + { + "epoch": 0.69, + "grad_norm": 2.2937052249908447, + "learning_rate": 0.0002457082609774249, + "loss": 0.4634, + "step": 1971 + }, + { + "epoch": 0.69, + "grad_norm": 2.3762097358703613, + "learning_rate": 0.0002456710493674026, + "loss": 0.2366, + "step": 1972 + }, + { + "epoch": 0.69, + "grad_norm": 5.370176792144775, + "learning_rate": 0.00024563383775738027, + "loss": 1.8996, + "step": 1973 + }, + { + "epoch": 0.69, + "grad_norm": 3.1844329833984375, + "learning_rate": 0.0002455966261473579, + "loss": 0.3431, + "step": 1974 + }, + { + "epoch": 0.69, + "grad_norm": 2.5123300552368164, + "learning_rate": 0.0002455594145373356, + "loss": 0.2331, + "step": 1975 + }, + { + "epoch": 0.69, + "grad_norm": 2.754368782043457, + "learning_rate": 0.0002455222029273133, + "loss": 1.245, + "step": 1976 + }, + { + "epoch": 0.69, + "grad_norm": 3.0486223697662354, + "learning_rate": 0.000245484991317291, + "loss": 0.6925, + "step": 1977 + }, + { + "epoch": 0.69, + "grad_norm": 2.9800477027893066, + "learning_rate": 0.0002454477797072687, + "loss": 1.1662, + "step": 1978 + }, + { + "epoch": 0.69, + "grad_norm": 1.9747953414916992, + "learning_rate": 0.0002454105680972463, + "loss": 0.3735, + "step": 1979 + }, + { + "epoch": 0.69, + "grad_norm": 2.3765416145324707, + "learning_rate": 0.000245373356487224, + "loss": 0.7723, + "step": 1980 + }, + { + "epoch": 0.69, + "grad_norm": 3.1413557529449463, + "learning_rate": 0.0002453361448772017, + "loss": 0.666, + "step": 1981 + }, + { + "epoch": 0.69, + "grad_norm": 2.1634583473205566, + "learning_rate": 0.0002452989332671793, + "loss": 0.3816, + "step": 1982 + }, + { + "epoch": 0.69, + "grad_norm": 1.8471031188964844, + "learning_rate": 0.00024526172165715703, + "loss": 0.6517, + "step": 1983 + }, + { + "epoch": 0.7, + "grad_norm": 3.5720808506011963, + "learning_rate": 0.0002452245100471347, + "loss": 0.4428, + "step": 1984 + }, + { + "epoch": 0.7, + "grad_norm": 2.9162683486938477, + "learning_rate": 0.0002451872984371124, + "loss": 0.4646, + "step": 1985 + }, + { + "epoch": 0.7, + "grad_norm": 3.0276424884796143, + "learning_rate": 0.00024515008682709003, + "loss": 0.4527, + "step": 1986 + }, + { + "epoch": 0.7, + "grad_norm": 3.1987595558166504, + "learning_rate": 0.0002451128752170677, + "loss": 0.631, + "step": 1987 + }, + { + "epoch": 0.7, + "grad_norm": 1.891221284866333, + "learning_rate": 0.0002450756636070454, + "loss": 0.4301, + "step": 1988 + }, + { + "epoch": 0.7, + "grad_norm": 2.999260425567627, + "learning_rate": 0.00024503845199702303, + "loss": 0.7106, + "step": 1989 + }, + { + "epoch": 0.7, + "grad_norm": 4.418884754180908, + "learning_rate": 0.00024500124038700073, + "loss": 0.8794, + "step": 1990 + }, + { + "epoch": 0.7, + "grad_norm": 2.5227293968200684, + "learning_rate": 0.0002449640287769784, + "loss": 0.6786, + "step": 1991 + }, + { + "epoch": 0.7, + "grad_norm": 3.8759443759918213, + "learning_rate": 0.0002449268171669561, + "loss": 0.9838, + "step": 1992 + }, + { + "epoch": 0.7, + "grad_norm": 5.6880059242248535, + "learning_rate": 0.00024488960555693373, + "loss": 1.1084, + "step": 1993 + }, + { + "epoch": 0.7, + "grad_norm": 5.6343183517456055, + "learning_rate": 0.00024485239394691143, + "loss": 0.8567, + "step": 1994 + }, + { + "epoch": 0.7, + "grad_norm": 2.4194846153259277, + "learning_rate": 0.0002448151823368891, + "loss": 0.2741, + "step": 1995 + }, + { + "epoch": 0.7, + "grad_norm": 2.982990264892578, + "learning_rate": 0.0002447779707268668, + "loss": 0.3463, + "step": 1996 + }, + { + "epoch": 0.7, + "grad_norm": 3.392058849334717, + "learning_rate": 0.00024474075911684444, + "loss": 0.6745, + "step": 1997 + }, + { + "epoch": 0.7, + "grad_norm": 4.701278209686279, + "learning_rate": 0.0002447035475068221, + "loss": 1.1925, + "step": 1998 + }, + { + "epoch": 0.7, + "grad_norm": 3.1057417392730713, + "learning_rate": 0.0002446663358967998, + "loss": 0.225, + "step": 1999 + }, + { + "epoch": 0.7, + "grad_norm": 2.4700098037719727, + "learning_rate": 0.00024462912428677744, + "loss": 0.2833, + "step": 2000 + }, + { + "epoch": 0.7, + "eval_loss": 0.8290925025939941, + "eval_runtime": 50.9678, + "eval_samples_per_second": 42.537, + "eval_steps_per_second": 10.634, + "eval_wer": 0.6420169520844144, + "step": 2000 + }, + { + "epoch": 0.7, + "grad_norm": 7.508627414703369, + "learning_rate": 0.00024459191267675514, + "loss": 1.6429, + "step": 2001 + }, + { + "epoch": 0.7, + "grad_norm": 3.5678176879882812, + "learning_rate": 0.0002445547010667328, + "loss": 1.0799, + "step": 2002 + }, + { + "epoch": 0.7, + "grad_norm": 2.7853446006774902, + "learning_rate": 0.0002445174894567105, + "loss": 0.7929, + "step": 2003 + }, + { + "epoch": 0.7, + "grad_norm": 2.158714532852173, + "learning_rate": 0.00024448027784668814, + "loss": 0.6522, + "step": 2004 + }, + { + "epoch": 0.7, + "grad_norm": 1.3752233982086182, + "learning_rate": 0.0002444430662366658, + "loss": 0.407, + "step": 2005 + }, + { + "epoch": 0.7, + "grad_norm": 2.856546401977539, + "learning_rate": 0.0002444058546266435, + "loss": 0.649, + "step": 2006 + }, + { + "epoch": 0.7, + "grad_norm": 2.812596321105957, + "learning_rate": 0.0002443686430166212, + "loss": 0.4625, + "step": 2007 + }, + { + "epoch": 0.7, + "grad_norm": 2.5984840393066406, + "learning_rate": 0.00024433143140659884, + "loss": 0.3318, + "step": 2008 + }, + { + "epoch": 0.7, + "grad_norm": 2.0432894229888916, + "learning_rate": 0.00024429421979657654, + "loss": 0.3098, + "step": 2009 + }, + { + "epoch": 0.7, + "grad_norm": 2.646289587020874, + "learning_rate": 0.0002442570081865542, + "loss": 0.4007, + "step": 2010 + }, + { + "epoch": 0.7, + "grad_norm": 2.5235435962677, + "learning_rate": 0.00024421979657653184, + "loss": 0.3968, + "step": 2011 + }, + { + "epoch": 0.7, + "grad_norm": 4.276270866394043, + "learning_rate": 0.00024418258496650954, + "loss": 1.1839, + "step": 2012 + }, + { + "epoch": 0.71, + "grad_norm": 3.5912859439849854, + "learning_rate": 0.0002441453733564872, + "loss": 0.5202, + "step": 2013 + }, + { + "epoch": 0.71, + "grad_norm": 4.616440296173096, + "learning_rate": 0.00024410816174646487, + "loss": 1.8386, + "step": 2014 + }, + { + "epoch": 0.71, + "grad_norm": 4.167287349700928, + "learning_rate": 0.00024407095013644255, + "loss": 0.7833, + "step": 2015 + }, + { + "epoch": 0.71, + "grad_norm": 3.9030117988586426, + "learning_rate": 0.00024403373852642022, + "loss": 0.9212, + "step": 2016 + }, + { + "epoch": 0.71, + "grad_norm": 5.886682987213135, + "learning_rate": 0.0002439965269163979, + "loss": 0.3937, + "step": 2017 + }, + { + "epoch": 0.71, + "grad_norm": 2.818249464035034, + "learning_rate": 0.00024395931530637555, + "loss": 0.305, + "step": 2018 + }, + { + "epoch": 0.71, + "grad_norm": 4.023711204528809, + "learning_rate": 0.00024392210369635325, + "loss": 0.5857, + "step": 2019 + }, + { + "epoch": 0.71, + "grad_norm": 3.1700754165649414, + "learning_rate": 0.00024388489208633092, + "loss": 0.5954, + "step": 2020 + }, + { + "epoch": 0.71, + "grad_norm": 4.295510768890381, + "learning_rate": 0.00024384768047630857, + "loss": 0.742, + "step": 2021 + }, + { + "epoch": 0.71, + "grad_norm": 4.545529365539551, + "learning_rate": 0.00024381046886628628, + "loss": 0.9577, + "step": 2022 + }, + { + "epoch": 0.71, + "grad_norm": 2.260254383087158, + "learning_rate": 0.00024377325725626392, + "loss": 0.2636, + "step": 2023 + }, + { + "epoch": 0.71, + "grad_norm": 2.4573659896850586, + "learning_rate": 0.0002437360456462416, + "loss": 0.313, + "step": 2024 + }, + { + "epoch": 0.71, + "grad_norm": 4.888119220733643, + "learning_rate": 0.0002436988340362193, + "loss": 0.3761, + "step": 2025 + }, + { + "epoch": 0.71, + "grad_norm": 3.143555164337158, + "learning_rate": 0.00024366162242619695, + "loss": 1.1434, + "step": 2026 + }, + { + "epoch": 0.71, + "grad_norm": 2.872159719467163, + "learning_rate": 0.00024362441081617463, + "loss": 0.8784, + "step": 2027 + }, + { + "epoch": 0.71, + "grad_norm": 15.59583568572998, + "learning_rate": 0.00024358719920615228, + "loss": 6.3413, + "step": 2028 + }, + { + "epoch": 0.71, + "grad_norm": 3.5535459518432617, + "learning_rate": 0.00024354998759612998, + "loss": 0.6744, + "step": 2029 + }, + { + "epoch": 0.71, + "grad_norm": 3.3724849224090576, + "learning_rate": 0.00024351277598610766, + "loss": 0.6494, + "step": 2030 + }, + { + "epoch": 0.71, + "grad_norm": 12.914846420288086, + "learning_rate": 0.0002434755643760853, + "loss": 4.2932, + "step": 2031 + }, + { + "epoch": 0.71, + "grad_norm": 2.6765973567962646, + "learning_rate": 0.000243438352766063, + "loss": 0.3652, + "step": 2032 + }, + { + "epoch": 0.71, + "grad_norm": 3.295680284500122, + "learning_rate": 0.00024340114115604068, + "loss": 0.6002, + "step": 2033 + }, + { + "epoch": 0.71, + "grad_norm": 2.5459048748016357, + "learning_rate": 0.00024336392954601833, + "loss": 0.4525, + "step": 2034 + }, + { + "epoch": 0.71, + "grad_norm": 3.9346370697021484, + "learning_rate": 0.000243326717935996, + "loss": 0.4579, + "step": 2035 + }, + { + "epoch": 0.71, + "grad_norm": 3.241410732269287, + "learning_rate": 0.00024328950632597368, + "loss": 0.7072, + "step": 2036 + }, + { + "epoch": 0.71, + "grad_norm": 1.559287667274475, + "learning_rate": 0.00024325229471595136, + "loss": 0.2728, + "step": 2037 + }, + { + "epoch": 0.71, + "grad_norm": 2.5284101963043213, + "learning_rate": 0.00024321508310592903, + "loss": 0.5484, + "step": 2038 + }, + { + "epoch": 0.71, + "grad_norm": 3.7311956882476807, + "learning_rate": 0.00024317787149590668, + "loss": 0.8191, + "step": 2039 + }, + { + "epoch": 0.71, + "grad_norm": 12.553634643554688, + "learning_rate": 0.00024314065988588439, + "loss": 1.2444, + "step": 2040 + }, + { + "epoch": 0.72, + "grad_norm": 6.129791259765625, + "learning_rate": 0.00024310344827586203, + "loss": 0.8698, + "step": 2041 + }, + { + "epoch": 0.72, + "grad_norm": 2.7819488048553467, + "learning_rate": 0.0002430662366658397, + "loss": 0.4961, + "step": 2042 + }, + { + "epoch": 0.72, + "grad_norm": 3.355811357498169, + "learning_rate": 0.0002430290250558174, + "loss": 0.5451, + "step": 2043 + }, + { + "epoch": 0.72, + "grad_norm": 3.024493932723999, + "learning_rate": 0.00024299181344579506, + "loss": 0.7597, + "step": 2044 + }, + { + "epoch": 0.72, + "grad_norm": 3.637416124343872, + "learning_rate": 0.00024295460183577274, + "loss": 0.7338, + "step": 2045 + }, + { + "epoch": 0.72, + "grad_norm": 2.891045570373535, + "learning_rate": 0.0002429173902257504, + "loss": 0.3468, + "step": 2046 + }, + { + "epoch": 0.72, + "grad_norm": 5.023557186126709, + "learning_rate": 0.0002428801786157281, + "loss": 0.424, + "step": 2047 + }, + { + "epoch": 0.72, + "grad_norm": 3.320523262023926, + "learning_rate": 0.00024284296700570577, + "loss": 0.3529, + "step": 2048 + }, + { + "epoch": 0.72, + "grad_norm": 7.243979454040527, + "learning_rate": 0.00024280575539568341, + "loss": 2.7054, + "step": 2049 + }, + { + "epoch": 0.72, + "grad_norm": 1.9604123830795288, + "learning_rate": 0.00024276854378566112, + "loss": 0.2114, + "step": 2050 + }, + { + "epoch": 0.72, + "grad_norm": 4.573541164398193, + "learning_rate": 0.0002427313321756388, + "loss": 1.3774, + "step": 2051 + }, + { + "epoch": 0.72, + "grad_norm": 2.40055251121521, + "learning_rate": 0.00024269412056561644, + "loss": 0.8643, + "step": 2052 + }, + { + "epoch": 0.72, + "grad_norm": 3.0245659351348877, + "learning_rate": 0.00024265690895559414, + "loss": 0.4089, + "step": 2053 + }, + { + "epoch": 0.72, + "grad_norm": 3.8446860313415527, + "learning_rate": 0.0002426196973455718, + "loss": 0.8581, + "step": 2054 + }, + { + "epoch": 0.72, + "grad_norm": 2.76045823097229, + "learning_rate": 0.00024258248573554947, + "loss": 0.4217, + "step": 2055 + }, + { + "epoch": 0.72, + "grad_norm": 2.384861707687378, + "learning_rate": 0.00024254527412552714, + "loss": 0.5413, + "step": 2056 + }, + { + "epoch": 0.72, + "grad_norm": 1.9222930669784546, + "learning_rate": 0.00024250806251550482, + "loss": 0.3125, + "step": 2057 + }, + { + "epoch": 0.72, + "grad_norm": 3.540152072906494, + "learning_rate": 0.0002424708509054825, + "loss": 1.8312, + "step": 2058 + }, + { + "epoch": 0.72, + "grad_norm": 2.780682325363159, + "learning_rate": 0.00024243363929546014, + "loss": 0.6677, + "step": 2059 + }, + { + "epoch": 0.72, + "grad_norm": 3.9040141105651855, + "learning_rate": 0.00024239642768543782, + "loss": 0.7344, + "step": 2060 + }, + { + "epoch": 0.72, + "grad_norm": 4.254075050354004, + "learning_rate": 0.00024235921607541552, + "loss": 0.9292, + "step": 2061 + }, + { + "epoch": 0.72, + "grad_norm": 3.1678977012634277, + "learning_rate": 0.00024232200446539317, + "loss": 1.1215, + "step": 2062 + }, + { + "epoch": 0.72, + "grad_norm": 2.6753976345062256, + "learning_rate": 0.00024228479285537085, + "loss": 0.4991, + "step": 2063 + }, + { + "epoch": 0.72, + "grad_norm": 4.377896308898926, + "learning_rate": 0.00024224758124534855, + "loss": 0.5978, + "step": 2064 + }, + { + "epoch": 0.72, + "grad_norm": 3.7869486808776855, + "learning_rate": 0.0002422103696353262, + "loss": 0.8578, + "step": 2065 + }, + { + "epoch": 0.72, + "grad_norm": 3.29856014251709, + "learning_rate": 0.00024217315802530388, + "loss": 1.6217, + "step": 2066 + }, + { + "epoch": 0.72, + "grad_norm": 3.67818021774292, + "learning_rate": 0.00024213594641528152, + "loss": 0.5943, + "step": 2067 + }, + { + "epoch": 0.72, + "grad_norm": 4.469092845916748, + "learning_rate": 0.00024209873480525923, + "loss": 1.1634, + "step": 2068 + }, + { + "epoch": 0.72, + "grad_norm": 2.7627439498901367, + "learning_rate": 0.0002420615231952369, + "loss": 0.591, + "step": 2069 + }, + { + "epoch": 0.73, + "grad_norm": 2.3488175868988037, + "learning_rate": 0.00024202431158521455, + "loss": 0.2379, + "step": 2070 + }, + { + "epoch": 0.73, + "grad_norm": 3.005652904510498, + "learning_rate": 0.00024198709997519225, + "loss": 0.2445, + "step": 2071 + }, + { + "epoch": 0.73, + "grad_norm": 4.345755100250244, + "learning_rate": 0.0002419498883651699, + "loss": 0.7675, + "step": 2072 + }, + { + "epoch": 0.73, + "grad_norm": 7.79234504699707, + "learning_rate": 0.00024191267675514758, + "loss": 0.6093, + "step": 2073 + }, + { + "epoch": 0.73, + "grad_norm": 6.379326820373535, + "learning_rate": 0.00024187546514512528, + "loss": 1.4354, + "step": 2074 + }, + { + "epoch": 0.73, + "grad_norm": 3.6521170139312744, + "learning_rate": 0.00024183825353510293, + "loss": 0.7191, + "step": 2075 + }, + { + "epoch": 0.73, + "grad_norm": 3.690326452255249, + "learning_rate": 0.0002418010419250806, + "loss": 1.3118, + "step": 2076 + }, + { + "epoch": 0.73, + "grad_norm": 4.035961627960205, + "learning_rate": 0.00024176383031505828, + "loss": 1.0016, + "step": 2077 + }, + { + "epoch": 0.73, + "grad_norm": 4.05783748626709, + "learning_rate": 0.00024172661870503596, + "loss": 0.6541, + "step": 2078 + }, + { + "epoch": 0.73, + "grad_norm": 1.9695038795471191, + "learning_rate": 0.00024168940709501363, + "loss": 0.4606, + "step": 2079 + }, + { + "epoch": 0.73, + "grad_norm": 1.9987822771072388, + "learning_rate": 0.00024165219548499128, + "loss": 0.6701, + "step": 2080 + }, + { + "epoch": 0.73, + "grad_norm": 2.004821538925171, + "learning_rate": 0.00024161498387496896, + "loss": 0.4184, + "step": 2081 + }, + { + "epoch": 0.73, + "grad_norm": 2.6122078895568848, + "learning_rate": 0.00024157777226494666, + "loss": 0.3568, + "step": 2082 + }, + { + "epoch": 0.73, + "grad_norm": 3.2118942737579346, + "learning_rate": 0.0002415405606549243, + "loss": 0.4723, + "step": 2083 + }, + { + "epoch": 0.73, + "grad_norm": 2.2981133460998535, + "learning_rate": 0.00024150334904490199, + "loss": 0.5278, + "step": 2084 + }, + { + "epoch": 0.73, + "grad_norm": 3.0347321033477783, + "learning_rate": 0.00024146613743487963, + "loss": 0.8326, + "step": 2085 + }, + { + "epoch": 0.73, + "grad_norm": 3.122243881225586, + "learning_rate": 0.00024142892582485734, + "loss": 0.6792, + "step": 2086 + }, + { + "epoch": 0.73, + "grad_norm": 3.2204928398132324, + "learning_rate": 0.000241391714214835, + "loss": 0.6001, + "step": 2087 + }, + { + "epoch": 0.73, + "grad_norm": 4.347808837890625, + "learning_rate": 0.00024135450260481266, + "loss": 1.9925, + "step": 2088 + }, + { + "epoch": 0.73, + "grad_norm": 2.3485236167907715, + "learning_rate": 0.00024131729099479036, + "loss": 0.3445, + "step": 2089 + }, + { + "epoch": 0.73, + "grad_norm": 2.8277785778045654, + "learning_rate": 0.000241280079384768, + "loss": 0.5412, + "step": 2090 + }, + { + "epoch": 0.73, + "grad_norm": 1.600690484046936, + "learning_rate": 0.0002412428677747457, + "loss": 0.1389, + "step": 2091 + }, + { + "epoch": 0.73, + "grad_norm": 1.351870059967041, + "learning_rate": 0.0002412056561647234, + "loss": 0.1752, + "step": 2092 + }, + { + "epoch": 0.73, + "grad_norm": 3.944504499435425, + "learning_rate": 0.00024116844455470104, + "loss": 0.5421, + "step": 2093 + }, + { + "epoch": 0.73, + "grad_norm": 2.7904212474823, + "learning_rate": 0.00024113123294467872, + "loss": 0.481, + "step": 2094 + }, + { + "epoch": 0.73, + "grad_norm": 4.337879180908203, + "learning_rate": 0.00024109402133465642, + "loss": 1.1637, + "step": 2095 + }, + { + "epoch": 0.73, + "grad_norm": 3.739457607269287, + "learning_rate": 0.00024105680972463407, + "loss": 0.6533, + "step": 2096 + }, + { + "epoch": 0.73, + "grad_norm": 1.9053199291229248, + "learning_rate": 0.00024101959811461174, + "loss": 0.326, + "step": 2097 + }, + { + "epoch": 0.74, + "grad_norm": 10.310988426208496, + "learning_rate": 0.0002409823865045894, + "loss": 1.6222, + "step": 2098 + }, + { + "epoch": 0.74, + "grad_norm": 2.2947075366973877, + "learning_rate": 0.0002409451748945671, + "loss": 0.2548, + "step": 2099 + }, + { + "epoch": 0.74, + "grad_norm": 3.7225122451782227, + "learning_rate": 0.00024090796328454477, + "loss": 0.2533, + "step": 2100 + }, + { + "epoch": 0.74, + "eval_loss": 0.7072895169258118, + "eval_runtime": 50.5869, + "eval_samples_per_second": 42.857, + "eval_steps_per_second": 10.714, + "eval_wer": 0.5960906417574814, + "step": 2100 + }, + { + "epoch": 0.74, + "grad_norm": 4.087221622467041, + "learning_rate": 0.00024087075167452242, + "loss": 0.9829, + "step": 2101 + }, + { + "epoch": 0.74, + "grad_norm": 5.295423984527588, + "learning_rate": 0.0002408335400645001, + "loss": 1.442, + "step": 2102 + }, + { + "epoch": 0.74, + "grad_norm": 1.7258617877960205, + "learning_rate": 0.00024079632845447777, + "loss": 0.4635, + "step": 2103 + }, + { + "epoch": 0.74, + "grad_norm": 2.04913067817688, + "learning_rate": 0.00024075911684445545, + "loss": 0.462, + "step": 2104 + }, + { + "epoch": 0.74, + "grad_norm": 2.6161487102508545, + "learning_rate": 0.00024072190523443312, + "loss": 0.6827, + "step": 2105 + }, + { + "epoch": 0.74, + "grad_norm": 3.9730608463287354, + "learning_rate": 0.00024068469362441077, + "loss": 0.9988, + "step": 2106 + }, + { + "epoch": 0.74, + "grad_norm": 2.8048250675201416, + "learning_rate": 0.00024064748201438847, + "loss": 1.0969, + "step": 2107 + }, + { + "epoch": 0.74, + "grad_norm": 2.332301616668701, + "learning_rate": 0.00024061027040436615, + "loss": 0.4455, + "step": 2108 + }, + { + "epoch": 0.74, + "grad_norm": 3.4177818298339844, + "learning_rate": 0.0002405730587943438, + "loss": 2.1572, + "step": 2109 + }, + { + "epoch": 0.74, + "grad_norm": 5.298447132110596, + "learning_rate": 0.0002405358471843215, + "loss": 0.3914, + "step": 2110 + }, + { + "epoch": 0.74, + "grad_norm": 2.914675235748291, + "learning_rate": 0.00024049863557429915, + "loss": 1.0013, + "step": 2111 + }, + { + "epoch": 0.74, + "grad_norm": 4.703636646270752, + "learning_rate": 0.00024046142396427683, + "loss": 0.4293, + "step": 2112 + }, + { + "epoch": 0.74, + "grad_norm": 2.7955198287963867, + "learning_rate": 0.00024042421235425453, + "loss": 0.5958, + "step": 2113 + }, + { + "epoch": 0.74, + "grad_norm": 2.913386583328247, + "learning_rate": 0.00024038700074423218, + "loss": 0.7706, + "step": 2114 + }, + { + "epoch": 0.74, + "grad_norm": 3.9934210777282715, + "learning_rate": 0.00024034978913420985, + "loss": 0.4863, + "step": 2115 + }, + { + "epoch": 0.74, + "grad_norm": 1.5534229278564453, + "learning_rate": 0.0002403125775241875, + "loss": 0.2289, + "step": 2116 + }, + { + "epoch": 0.74, + "grad_norm": 3.206246852874756, + "learning_rate": 0.0002402753659141652, + "loss": 1.0357, + "step": 2117 + }, + { + "epoch": 0.74, + "grad_norm": 3.4681451320648193, + "learning_rate": 0.00024023815430414288, + "loss": 0.9145, + "step": 2118 + }, + { + "epoch": 0.74, + "grad_norm": 3.2907421588897705, + "learning_rate": 0.00024020094269412053, + "loss": 0.6565, + "step": 2119 + }, + { + "epoch": 0.74, + "grad_norm": 2.354326009750366, + "learning_rate": 0.00024016373108409823, + "loss": 0.5887, + "step": 2120 + }, + { + "epoch": 0.74, + "grad_norm": 3.702387571334839, + "learning_rate": 0.0002401265194740759, + "loss": 0.4698, + "step": 2121 + }, + { + "epoch": 0.74, + "grad_norm": 2.9041430950164795, + "learning_rate": 0.00024008930786405356, + "loss": 0.5025, + "step": 2122 + }, + { + "epoch": 0.74, + "grad_norm": 2.8005261421203613, + "learning_rate": 0.00024005209625403123, + "loss": 0.4798, + "step": 2123 + }, + { + "epoch": 0.74, + "grad_norm": 2.7690351009368896, + "learning_rate": 0.0002400148846440089, + "loss": 0.2449, + "step": 2124 + }, + { + "epoch": 0.74, + "grad_norm": 5.324347496032715, + "learning_rate": 0.00023997767303398658, + "loss": 1.9174, + "step": 2125 + }, + { + "epoch": 0.74, + "grad_norm": 2.6067378520965576, + "learning_rate": 0.00023994046142396426, + "loss": 0.6974, + "step": 2126 + }, + { + "epoch": 0.75, + "grad_norm": 2.2820823192596436, + "learning_rate": 0.0002399032498139419, + "loss": 0.3203, + "step": 2127 + }, + { + "epoch": 0.75, + "grad_norm": 1.06552255153656, + "learning_rate": 0.0002398660382039196, + "loss": 0.2081, + "step": 2128 + }, + { + "epoch": 0.75, + "grad_norm": 2.3411970138549805, + "learning_rate": 0.00023982882659389726, + "loss": 0.6395, + "step": 2129 + }, + { + "epoch": 0.75, + "grad_norm": 2.0925540924072266, + "learning_rate": 0.00023979161498387494, + "loss": 0.6678, + "step": 2130 + }, + { + "epoch": 0.75, + "grad_norm": 2.3923542499542236, + "learning_rate": 0.00023975440337385264, + "loss": 0.5511, + "step": 2131 + }, + { + "epoch": 0.75, + "grad_norm": 2.139909267425537, + "learning_rate": 0.0002397171917638303, + "loss": 0.4857, + "step": 2132 + }, + { + "epoch": 0.75, + "grad_norm": 5.286214828491211, + "learning_rate": 0.00023967998015380796, + "loss": 0.7586, + "step": 2133 + }, + { + "epoch": 0.75, + "grad_norm": 6.681207656860352, + "learning_rate": 0.0002396427685437856, + "loss": 0.7079, + "step": 2134 + }, + { + "epoch": 0.75, + "grad_norm": 3.236323595046997, + "learning_rate": 0.00023960555693376332, + "loss": 0.6949, + "step": 2135 + }, + { + "epoch": 0.75, + "grad_norm": 2.150146722793579, + "learning_rate": 0.000239568345323741, + "loss": 0.7396, + "step": 2136 + }, + { + "epoch": 0.75, + "grad_norm": 2.668715715408325, + "learning_rate": 0.00023953113371371864, + "loss": 0.7919, + "step": 2137 + }, + { + "epoch": 0.75, + "grad_norm": 2.448146104812622, + "learning_rate": 0.00023949392210369634, + "loss": 0.4682, + "step": 2138 + }, + { + "epoch": 0.75, + "grad_norm": 2.377995014190674, + "learning_rate": 0.00023945671049367402, + "loss": 0.6805, + "step": 2139 + }, + { + "epoch": 0.75, + "grad_norm": 2.0972490310668945, + "learning_rate": 0.00023941949888365167, + "loss": 0.388, + "step": 2140 + }, + { + "epoch": 0.75, + "grad_norm": 4.356851100921631, + "learning_rate": 0.00023938228727362937, + "loss": 0.9222, + "step": 2141 + }, + { + "epoch": 0.75, + "grad_norm": 3.6163222789764404, + "learning_rate": 0.00023934507566360702, + "loss": 0.3178, + "step": 2142 + }, + { + "epoch": 0.75, + "grad_norm": 3.244340181350708, + "learning_rate": 0.0002393078640535847, + "loss": 0.414, + "step": 2143 + }, + { + "epoch": 0.75, + "grad_norm": 2.176497220993042, + "learning_rate": 0.00023927065244356237, + "loss": 0.3438, + "step": 2144 + }, + { + "epoch": 0.75, + "grad_norm": 3.1383144855499268, + "learning_rate": 0.00023923344083354005, + "loss": 0.3254, + "step": 2145 + }, + { + "epoch": 0.75, + "grad_norm": 2.8384389877319336, + "learning_rate": 0.00023919622922351772, + "loss": 0.3931, + "step": 2146 + }, + { + "epoch": 0.75, + "grad_norm": 4.945198059082031, + "learning_rate": 0.00023915901761349537, + "loss": 0.6616, + "step": 2147 + }, + { + "epoch": 0.75, + "grad_norm": 2.2696187496185303, + "learning_rate": 0.00023912180600347305, + "loss": 0.1269, + "step": 2148 + }, + { + "epoch": 0.75, + "grad_norm": 4.868678092956543, + "learning_rate": 0.00023908459439345075, + "loss": 1.2393, + "step": 2149 + }, + { + "epoch": 0.75, + "grad_norm": 4.27547025680542, + "learning_rate": 0.0002390473827834284, + "loss": 0.3182, + "step": 2150 + }, + { + "epoch": 0.75, + "grad_norm": 4.301406383514404, + "learning_rate": 0.00023901017117340607, + "loss": 1.3776, + "step": 2151 + }, + { + "epoch": 0.75, + "grad_norm": 2.5143606662750244, + "learning_rate": 0.00023897295956338378, + "loss": 0.5273, + "step": 2152 + }, + { + "epoch": 0.75, + "grad_norm": 2.4446358680725098, + "learning_rate": 0.00023893574795336143, + "loss": 0.7657, + "step": 2153 + }, + { + "epoch": 0.75, + "grad_norm": 2.8754398822784424, + "learning_rate": 0.0002388985363433391, + "loss": 0.8753, + "step": 2154 + }, + { + "epoch": 0.76, + "grad_norm": 3.9459950923919678, + "learning_rate": 0.00023886132473331675, + "loss": 1.1413, + "step": 2155 + }, + { + "epoch": 0.76, + "grad_norm": 4.867654800415039, + "learning_rate": 0.00023882411312329445, + "loss": 0.612, + "step": 2156 + }, + { + "epoch": 0.76, + "grad_norm": 2.5319912433624268, + "learning_rate": 0.00023878690151327213, + "loss": 0.633, + "step": 2157 + }, + { + "epoch": 0.76, + "grad_norm": 3.040459632873535, + "learning_rate": 0.00023874968990324978, + "loss": 0.3897, + "step": 2158 + }, + { + "epoch": 0.76, + "grad_norm": 3.5026168823242188, + "learning_rate": 0.00023871247829322748, + "loss": 0.6734, + "step": 2159 + }, + { + "epoch": 0.76, + "grad_norm": 2.2834792137145996, + "learning_rate": 0.00023867526668320513, + "loss": 0.4584, + "step": 2160 + }, + { + "epoch": 0.76, + "grad_norm": 1.716819167137146, + "learning_rate": 0.0002386380550731828, + "loss": 0.4489, + "step": 2161 + }, + { + "epoch": 0.76, + "grad_norm": 3.5152437686920166, + "learning_rate": 0.0002386008434631605, + "loss": 1.6475, + "step": 2162 + }, + { + "epoch": 0.76, + "grad_norm": 2.483433961868286, + "learning_rate": 0.00023856363185313816, + "loss": 0.2928, + "step": 2163 + }, + { + "epoch": 0.76, + "grad_norm": 3.2262301445007324, + "learning_rate": 0.00023852642024311583, + "loss": 0.5461, + "step": 2164 + }, + { + "epoch": 0.76, + "grad_norm": 3.866180896759033, + "learning_rate": 0.0002384892086330935, + "loss": 0.4271, + "step": 2165 + }, + { + "epoch": 0.76, + "grad_norm": 2.7532100677490234, + "learning_rate": 0.00023845199702307118, + "loss": 0.3409, + "step": 2166 + }, + { + "epoch": 0.76, + "grad_norm": 3.9978840351104736, + "learning_rate": 0.00023841478541304886, + "loss": 1.1862, + "step": 2167 + }, + { + "epoch": 0.76, + "grad_norm": 1.0184621810913086, + "learning_rate": 0.0002383775738030265, + "loss": 0.1015, + "step": 2168 + }, + { + "epoch": 0.76, + "grad_norm": 2.473518133163452, + "learning_rate": 0.00023834036219300418, + "loss": 0.4473, + "step": 2169 + }, + { + "epoch": 0.76, + "grad_norm": 2.6468117237091064, + "learning_rate": 0.0002383031505829819, + "loss": 0.49, + "step": 2170 + }, + { + "epoch": 0.76, + "grad_norm": 4.993438720703125, + "learning_rate": 0.00023826593897295954, + "loss": 0.6108, + "step": 2171 + }, + { + "epoch": 0.76, + "grad_norm": 3.0172436237335205, + "learning_rate": 0.0002382287273629372, + "loss": 0.4763, + "step": 2172 + }, + { + "epoch": 0.76, + "grad_norm": 5.393608570098877, + "learning_rate": 0.0002381915157529149, + "loss": 0.4268, + "step": 2173 + }, + { + "epoch": 0.76, + "grad_norm": 4.117353439331055, + "learning_rate": 0.00023815430414289256, + "loss": 0.7324, + "step": 2174 + }, + { + "epoch": 0.76, + "grad_norm": 4.40974235534668, + "learning_rate": 0.00023811709253287024, + "loss": 0.3379, + "step": 2175 + }, + { + "epoch": 0.76, + "grad_norm": 5.692419052124023, + "learning_rate": 0.0002380798809228479, + "loss": 1.1812, + "step": 2176 + }, + { + "epoch": 0.76, + "grad_norm": 5.502355098724365, + "learning_rate": 0.0002380426693128256, + "loss": 1.0161, + "step": 2177 + }, + { + "epoch": 0.76, + "grad_norm": 2.216310501098633, + "learning_rate": 0.00023800545770280324, + "loss": 0.488, + "step": 2178 + }, + { + "epoch": 0.76, + "grad_norm": 2.1364212036132812, + "learning_rate": 0.00023796824609278091, + "loss": 0.528, + "step": 2179 + }, + { + "epoch": 0.76, + "grad_norm": 2.4948229789733887, + "learning_rate": 0.00023793103448275862, + "loss": 0.6491, + "step": 2180 + }, + { + "epoch": 0.76, + "grad_norm": 1.0776448249816895, + "learning_rate": 0.00023789382287273627, + "loss": 0.1683, + "step": 2181 + }, + { + "epoch": 0.76, + "grad_norm": 2.0587196350097656, + "learning_rate": 0.00023785661126271394, + "loss": 0.4055, + "step": 2182 + }, + { + "epoch": 0.76, + "grad_norm": 2.165605306625366, + "learning_rate": 0.00023781939965269164, + "loss": 0.7523, + "step": 2183 + }, + { + "epoch": 0.77, + "grad_norm": 2.3306822776794434, + "learning_rate": 0.0002377821880426693, + "loss": 0.2969, + "step": 2184 + }, + { + "epoch": 0.77, + "grad_norm": 3.706977128982544, + "learning_rate": 0.00023774497643264697, + "loss": 0.7947, + "step": 2185 + }, + { + "epoch": 0.77, + "grad_norm": 3.0041239261627197, + "learning_rate": 0.00023770776482262462, + "loss": 0.6185, + "step": 2186 + }, + { + "epoch": 0.77, + "grad_norm": 1.9066877365112305, + "learning_rate": 0.00023767055321260232, + "loss": 0.3012, + "step": 2187 + }, + { + "epoch": 0.77, + "grad_norm": 3.505563974380493, + "learning_rate": 0.00023763334160258, + "loss": 0.321, + "step": 2188 + }, + { + "epoch": 0.77, + "grad_norm": 4.046771049499512, + "learning_rate": 0.00023759612999255765, + "loss": 1.1728, + "step": 2189 + }, + { + "epoch": 0.77, + "grad_norm": 3.1281588077545166, + "learning_rate": 0.00023755891838253532, + "loss": 0.4046, + "step": 2190 + }, + { + "epoch": 0.77, + "grad_norm": 3.7986767292022705, + "learning_rate": 0.000237521706772513, + "loss": 0.8822, + "step": 2191 + }, + { + "epoch": 0.77, + "grad_norm": 3.6561527252197266, + "learning_rate": 0.00023748449516249067, + "loss": 0.6199, + "step": 2192 + }, + { + "epoch": 0.77, + "grad_norm": 4.620680332183838, + "learning_rate": 0.00023744728355246835, + "loss": 0.9583, + "step": 2193 + }, + { + "epoch": 0.77, + "grad_norm": 3.1020591259002686, + "learning_rate": 0.00023741007194244602, + "loss": 0.464, + "step": 2194 + }, + { + "epoch": 0.77, + "grad_norm": 3.0864760875701904, + "learning_rate": 0.0002373728603324237, + "loss": 0.5002, + "step": 2195 + }, + { + "epoch": 0.77, + "grad_norm": 3.774919033050537, + "learning_rate": 0.00023733564872240138, + "loss": 0.7486, + "step": 2196 + }, + { + "epoch": 0.77, + "grad_norm": 3.6889686584472656, + "learning_rate": 0.00023729843711237902, + "loss": 0.5443, + "step": 2197 + }, + { + "epoch": 0.77, + "grad_norm": 3.7951676845550537, + "learning_rate": 0.00023726122550235673, + "loss": 1.2228, + "step": 2198 + }, + { + "epoch": 0.77, + "grad_norm": 4.316519260406494, + "learning_rate": 0.00023722401389233438, + "loss": 1.6501, + "step": 2199 + }, + { + "epoch": 0.77, + "grad_norm": 2.2464311122894287, + "learning_rate": 0.00023718680228231205, + "loss": 0.3804, + "step": 2200 + }, + { + "epoch": 0.77, + "eval_loss": 0.6850249171257019, + "eval_runtime": 50.5746, + "eval_samples_per_second": 42.867, + "eval_steps_per_second": 10.717, + "eval_wer": 0.5974744853831517, + "step": 2200 + }, + { + "epoch": 0.77, + "grad_norm": 8.690844535827637, + "learning_rate": 0.00023714959067228976, + "loss": 2.5763, + "step": 2201 + }, + { + "epoch": 0.77, + "grad_norm": 1.9838602542877197, + "learning_rate": 0.0002371123790622674, + "loss": 0.7831, + "step": 2202 + }, + { + "epoch": 0.77, + "grad_norm": 2.0435171127319336, + "learning_rate": 0.00023707516745224508, + "loss": 0.5686, + "step": 2203 + }, + { + "epoch": 0.77, + "grad_norm": 2.6351277828216553, + "learning_rate": 0.00023703795584222273, + "loss": 0.9036, + "step": 2204 + }, + { + "epoch": 0.77, + "grad_norm": 2.1336121559143066, + "learning_rate": 0.00023700074423220043, + "loss": 0.6445, + "step": 2205 + }, + { + "epoch": 0.77, + "grad_norm": 9.804746627807617, + "learning_rate": 0.0002369635326221781, + "loss": 2.4538, + "step": 2206 + }, + { + "epoch": 0.77, + "grad_norm": 2.9941658973693848, + "learning_rate": 0.00023692632101215576, + "loss": 0.5441, + "step": 2207 + }, + { + "epoch": 0.77, + "grad_norm": 2.662944793701172, + "learning_rate": 0.00023688910940213346, + "loss": 0.2834, + "step": 2208 + }, + { + "epoch": 0.77, + "grad_norm": 2.271035671234131, + "learning_rate": 0.00023685189779211113, + "loss": 0.373, + "step": 2209 + }, + { + "epoch": 0.77, + "grad_norm": 2.727461099624634, + "learning_rate": 0.00023681468618208878, + "loss": 0.4508, + "step": 2210 + }, + { + "epoch": 0.77, + "grad_norm": 2.807447910308838, + "learning_rate": 0.00023677747457206646, + "loss": 0.5131, + "step": 2211 + }, + { + "epoch": 0.78, + "grad_norm": 3.0257365703582764, + "learning_rate": 0.00023674026296204413, + "loss": 0.4543, + "step": 2212 + }, + { + "epoch": 0.78, + "grad_norm": 3.6574392318725586, + "learning_rate": 0.0002367030513520218, + "loss": 0.8255, + "step": 2213 + }, + { + "epoch": 0.78, + "grad_norm": 5.633670330047607, + "learning_rate": 0.00023666583974199949, + "loss": 0.7957, + "step": 2214 + }, + { + "epoch": 0.78, + "grad_norm": 3.175776720046997, + "learning_rate": 0.00023662862813197716, + "loss": 0.7773, + "step": 2215 + }, + { + "epoch": 0.78, + "grad_norm": 2.383112907409668, + "learning_rate": 0.00023659141652195484, + "loss": 0.4066, + "step": 2216 + }, + { + "epoch": 0.78, + "grad_norm": 2.62146258354187, + "learning_rate": 0.0002365542049119325, + "loss": 0.8173, + "step": 2217 + }, + { + "epoch": 0.78, + "grad_norm": 48.06704330444336, + "learning_rate": 0.00023651699330191016, + "loss": 0.7836, + "step": 2218 + }, + { + "epoch": 0.78, + "grad_norm": 3.3420729637145996, + "learning_rate": 0.00023647978169188787, + "loss": 0.2653, + "step": 2219 + }, + { + "epoch": 0.78, + "grad_norm": 3.6878349781036377, + "learning_rate": 0.00023644257008186551, + "loss": 1.2167, + "step": 2220 + }, + { + "epoch": 0.78, + "grad_norm": 2.163689613342285, + "learning_rate": 0.0002364053584718432, + "loss": 0.2967, + "step": 2221 + }, + { + "epoch": 0.78, + "grad_norm": 2.2857308387756348, + "learning_rate": 0.0002363681468618209, + "loss": 0.2476, + "step": 2222 + }, + { + "epoch": 0.78, + "grad_norm": 2.968120574951172, + "learning_rate": 0.00023633093525179854, + "loss": 0.4314, + "step": 2223 + }, + { + "epoch": 0.78, + "grad_norm": 4.3224382400512695, + "learning_rate": 0.00023629372364177622, + "loss": 0.6707, + "step": 2224 + }, + { + "epoch": 0.78, + "grad_norm": 5.102696895599365, + "learning_rate": 0.00023625651203175387, + "loss": 0.7693, + "step": 2225 + }, + { + "epoch": 0.78, + "grad_norm": 4.199782371520996, + "learning_rate": 0.00023621930042173157, + "loss": 1.5092, + "step": 2226 + }, + { + "epoch": 0.78, + "grad_norm": 4.694085121154785, + "learning_rate": 0.00023618208881170924, + "loss": 1.4029, + "step": 2227 + }, + { + "epoch": 0.78, + "grad_norm": 2.0905003547668457, + "learning_rate": 0.0002361448772016869, + "loss": 0.5829, + "step": 2228 + }, + { + "epoch": 0.78, + "grad_norm": 2.0144598484039307, + "learning_rate": 0.0002361076655916646, + "loss": 0.6812, + "step": 2229 + }, + { + "epoch": 0.78, + "grad_norm": 2.288403034210205, + "learning_rate": 0.00023607045398164224, + "loss": 0.5396, + "step": 2230 + }, + { + "epoch": 0.78, + "grad_norm": 1.9742906093597412, + "learning_rate": 0.00023603324237161992, + "loss": 0.3993, + "step": 2231 + }, + { + "epoch": 0.78, + "grad_norm": 2.8184242248535156, + "learning_rate": 0.0002359960307615976, + "loss": 0.6374, + "step": 2232 + }, + { + "epoch": 0.78, + "grad_norm": 1.8115568161010742, + "learning_rate": 0.00023595881915157527, + "loss": 0.6224, + "step": 2233 + }, + { + "epoch": 0.78, + "grad_norm": 1.4084148406982422, + "learning_rate": 0.00023592160754155295, + "loss": 0.3404, + "step": 2234 + }, + { + "epoch": 0.78, + "grad_norm": 2.8344879150390625, + "learning_rate": 0.0002358843959315306, + "loss": 0.7027, + "step": 2235 + }, + { + "epoch": 0.78, + "grad_norm": 3.2461719512939453, + "learning_rate": 0.0002358471843215083, + "loss": 0.4054, + "step": 2236 + }, + { + "epoch": 0.78, + "grad_norm": 3.190657377243042, + "learning_rate": 0.00023580997271148598, + "loss": 0.7986, + "step": 2237 + }, + { + "epoch": 0.78, + "grad_norm": 1.7157618999481201, + "learning_rate": 0.00023577276110146362, + "loss": 0.3132, + "step": 2238 + }, + { + "epoch": 0.78, + "grad_norm": 2.2127954959869385, + "learning_rate": 0.0002357355494914413, + "loss": 0.3935, + "step": 2239 + }, + { + "epoch": 0.78, + "grad_norm": 3.468662738800049, + "learning_rate": 0.000235698337881419, + "loss": 0.6404, + "step": 2240 + }, + { + "epoch": 0.79, + "grad_norm": 2.6316206455230713, + "learning_rate": 0.00023566112627139665, + "loss": 0.5468, + "step": 2241 + }, + { + "epoch": 0.79, + "grad_norm": 3.6631693840026855, + "learning_rate": 0.00023562391466137433, + "loss": 0.7712, + "step": 2242 + }, + { + "epoch": 0.79, + "grad_norm": 4.116514205932617, + "learning_rate": 0.00023558670305135198, + "loss": 0.7397, + "step": 2243 + }, + { + "epoch": 0.79, + "grad_norm": 2.6557669639587402, + "learning_rate": 0.00023554949144132968, + "loss": 0.5441, + "step": 2244 + }, + { + "epoch": 0.79, + "grad_norm": 3.5287699699401855, + "learning_rate": 0.00023551227983130735, + "loss": 0.4418, + "step": 2245 + }, + { + "epoch": 0.79, + "grad_norm": 3.1134016513824463, + "learning_rate": 0.000235475068221285, + "loss": 0.4333, + "step": 2246 + }, + { + "epoch": 0.79, + "grad_norm": 4.637173652648926, + "learning_rate": 0.0002354378566112627, + "loss": 0.5619, + "step": 2247 + }, + { + "epoch": 0.79, + "grad_norm": 4.621034145355225, + "learning_rate": 0.00023540064500124035, + "loss": 0.8087, + "step": 2248 + }, + { + "epoch": 0.79, + "grad_norm": 3.706472635269165, + "learning_rate": 0.00023536343339121803, + "loss": 0.3778, + "step": 2249 + }, + { + "epoch": 0.79, + "grad_norm": 6.068023204803467, + "learning_rate": 0.00023532622178119573, + "loss": 0.5066, + "step": 2250 + }, + { + "epoch": 0.79, + "grad_norm": 3.4167532920837402, + "learning_rate": 0.00023528901017117338, + "loss": 1.2843, + "step": 2251 + }, + { + "epoch": 0.79, + "grad_norm": 1.9952954053878784, + "learning_rate": 0.00023525179856115106, + "loss": 0.7945, + "step": 2252 + }, + { + "epoch": 0.79, + "grad_norm": 3.9978554248809814, + "learning_rate": 0.00023521458695112876, + "loss": 0.5449, + "step": 2253 + }, + { + "epoch": 0.79, + "grad_norm": 2.489971876144409, + "learning_rate": 0.0002351773753411064, + "loss": 0.6356, + "step": 2254 + }, + { + "epoch": 0.79, + "grad_norm": 2.167726993560791, + "learning_rate": 0.00023514016373108409, + "loss": 0.4688, + "step": 2255 + }, + { + "epoch": 0.79, + "grad_norm": 2.427248477935791, + "learning_rate": 0.00023510295212106173, + "loss": 0.7861, + "step": 2256 + }, + { + "epoch": 0.79, + "grad_norm": 2.5070106983184814, + "learning_rate": 0.00023506574051103944, + "loss": 0.9434, + "step": 2257 + }, + { + "epoch": 0.79, + "grad_norm": 2.904707908630371, + "learning_rate": 0.0002350285289010171, + "loss": 0.7917, + "step": 2258 + }, + { + "epoch": 0.79, + "grad_norm": 4.7793803215026855, + "learning_rate": 0.00023499131729099476, + "loss": 1.5236, + "step": 2259 + }, + { + "epoch": 0.79, + "grad_norm": 3.193175792694092, + "learning_rate": 0.00023495410568097244, + "loss": 0.9601, + "step": 2260 + }, + { + "epoch": 0.79, + "grad_norm": 1.6871867179870605, + "learning_rate": 0.0002349168940709501, + "loss": 0.3522, + "step": 2261 + }, + { + "epoch": 0.79, + "grad_norm": 2.900092124938965, + "learning_rate": 0.0002348796824609278, + "loss": 0.9403, + "step": 2262 + }, + { + "epoch": 0.79, + "grad_norm": 3.3442814350128174, + "learning_rate": 0.00023484247085090546, + "loss": 0.807, + "step": 2263 + }, + { + "epoch": 0.79, + "grad_norm": 2.9604439735412598, + "learning_rate": 0.0002348052592408831, + "loss": 0.6192, + "step": 2264 + }, + { + "epoch": 0.79, + "grad_norm": 5.023985862731934, + "learning_rate": 0.00023476804763086082, + "loss": 1.084, + "step": 2265 + }, + { + "epoch": 0.79, + "grad_norm": 2.1374659538269043, + "learning_rate": 0.0002347308360208385, + "loss": 0.2996, + "step": 2266 + }, + { + "epoch": 0.79, + "grad_norm": 2.652350425720215, + "learning_rate": 0.00023469362441081614, + "loss": 0.5887, + "step": 2267 + }, + { + "epoch": 0.79, + "grad_norm": 3.5104193687438965, + "learning_rate": 0.00023465641280079384, + "loss": 0.6121, + "step": 2268 + }, + { + "epoch": 0.8, + "grad_norm": 4.1145853996276855, + "learning_rate": 0.0002346192011907715, + "loss": 0.5084, + "step": 2269 + }, + { + "epoch": 0.8, + "grad_norm": 4.850748538970947, + "learning_rate": 0.00023458198958074917, + "loss": 0.6602, + "step": 2270 + }, + { + "epoch": 0.8, + "grad_norm": 1.4793652296066284, + "learning_rate": 0.00023454477797072687, + "loss": 0.1197, + "step": 2271 + }, + { + "epoch": 0.8, + "grad_norm": 1.932457447052002, + "learning_rate": 0.00023450756636070452, + "loss": 0.1678, + "step": 2272 + }, + { + "epoch": 0.8, + "grad_norm": 1.8412857055664062, + "learning_rate": 0.0002344703547506822, + "loss": 0.1298, + "step": 2273 + }, + { + "epoch": 0.8, + "grad_norm": 5.570624828338623, + "learning_rate": 0.00023443314314065984, + "loss": 0.5715, + "step": 2274 + }, + { + "epoch": 0.8, + "grad_norm": 3.606353759765625, + "learning_rate": 0.00023439593153063755, + "loss": 0.3448, + "step": 2275 + }, + { + "epoch": 0.8, + "grad_norm": 2.9131336212158203, + "learning_rate": 0.00023435871992061522, + "loss": 1.2812, + "step": 2276 + }, + { + "epoch": 0.8, + "grad_norm": 2.4033138751983643, + "learning_rate": 0.00023432150831059287, + "loss": 0.5682, + "step": 2277 + }, + { + "epoch": 0.8, + "grad_norm": 2.5214476585388184, + "learning_rate": 0.00023428429670057057, + "loss": 0.734, + "step": 2278 + }, + { + "epoch": 0.8, + "grad_norm": 3.3294756412506104, + "learning_rate": 0.00023424708509054822, + "loss": 0.685, + "step": 2279 + }, + { + "epoch": 0.8, + "grad_norm": 2.0864174365997314, + "learning_rate": 0.0002342098734805259, + "loss": 0.6529, + "step": 2280 + }, + { + "epoch": 0.8, + "grad_norm": 1.4727028608322144, + "learning_rate": 0.00023417266187050357, + "loss": 0.3351, + "step": 2281 + }, + { + "epoch": 0.8, + "grad_norm": 1.7328686714172363, + "learning_rate": 0.00023413545026048125, + "loss": 0.4686, + "step": 2282 + }, + { + "epoch": 0.8, + "grad_norm": 2.6917636394500732, + "learning_rate": 0.00023409823865045893, + "loss": 0.4922, + "step": 2283 + }, + { + "epoch": 0.8, + "grad_norm": 2.5129282474517822, + "learning_rate": 0.0002340610270404366, + "loss": 0.4395, + "step": 2284 + }, + { + "epoch": 0.8, + "grad_norm": 2.758345603942871, + "learning_rate": 0.00023402381543041425, + "loss": 0.5023, + "step": 2285 + }, + { + "epoch": 0.8, + "grad_norm": 4.338339328765869, + "learning_rate": 0.00023398660382039195, + "loss": 0.6605, + "step": 2286 + }, + { + "epoch": 0.8, + "grad_norm": 2.811253786087036, + "learning_rate": 0.0002339493922103696, + "loss": 0.5628, + "step": 2287 + }, + { + "epoch": 0.8, + "grad_norm": 2.3005897998809814, + "learning_rate": 0.00023391218060034728, + "loss": 0.644, + "step": 2288 + }, + { + "epoch": 0.8, + "grad_norm": 3.50140118598938, + "learning_rate": 0.00023387496899032498, + "loss": 1.2557, + "step": 2289 + }, + { + "epoch": 0.8, + "grad_norm": 2.0913150310516357, + "learning_rate": 0.00023383775738030263, + "loss": 0.4355, + "step": 2290 + }, + { + "epoch": 0.8, + "grad_norm": 2.851500988006592, + "learning_rate": 0.0002338005457702803, + "loss": 0.4956, + "step": 2291 + }, + { + "epoch": 0.8, + "grad_norm": 4.441885471343994, + "learning_rate": 0.00023376333416025795, + "loss": 0.6377, + "step": 2292 + }, + { + "epoch": 0.8, + "grad_norm": 2.17043399810791, + "learning_rate": 0.00023372612255023566, + "loss": 0.5092, + "step": 2293 + }, + { + "epoch": 0.8, + "grad_norm": 2.0366196632385254, + "learning_rate": 0.00023368891094021333, + "loss": 0.1851, + "step": 2294 + }, + { + "epoch": 0.8, + "grad_norm": 3.8057641983032227, + "learning_rate": 0.00023365169933019098, + "loss": 0.733, + "step": 2295 + }, + { + "epoch": 0.8, + "grad_norm": 6.428284168243408, + "learning_rate": 0.00023361448772016868, + "loss": 0.7048, + "step": 2296 + }, + { + "epoch": 0.8, + "grad_norm": 3.9241631031036377, + "learning_rate": 0.00023357727611014636, + "loss": 1.9665, + "step": 2297 + }, + { + "epoch": 0.81, + "grad_norm": 4.794024467468262, + "learning_rate": 0.000233540064500124, + "loss": 0.953, + "step": 2298 + }, + { + "epoch": 0.81, + "grad_norm": 3.998034954071045, + "learning_rate": 0.0002335028528901017, + "loss": 1.7438, + "step": 2299 + }, + { + "epoch": 0.81, + "grad_norm": 4.720144271850586, + "learning_rate": 0.00023346564128007936, + "loss": 0.7473, + "step": 2300 + }, + { + "epoch": 0.81, + "eval_loss": 0.8185052275657654, + "eval_runtime": 50.5567, + "eval_samples_per_second": 42.883, + "eval_steps_per_second": 10.721, + "eval_wer": 0.7170039785504237, + "step": 2300 + }, + { + "epoch": 0.81, + "grad_norm": 3.0130574703216553, + "learning_rate": 0.00023342842967005704, + "loss": 0.6867, + "step": 2301 + }, + { + "epoch": 0.81, + "grad_norm": 3.1833200454711914, + "learning_rate": 0.0002333912180600347, + "loss": 0.9255, + "step": 2302 + }, + { + "epoch": 0.81, + "grad_norm": 3.382704019546509, + "learning_rate": 0.0002333540064500124, + "loss": 0.7009, + "step": 2303 + }, + { + "epoch": 0.81, + "grad_norm": 18.74046516418457, + "learning_rate": 0.00023331679483999006, + "loss": 5.78, + "step": 2304 + }, + { + "epoch": 0.81, + "grad_norm": 2.770467758178711, + "learning_rate": 0.0002332795832299677, + "loss": 0.7705, + "step": 2305 + }, + { + "epoch": 0.81, + "grad_norm": 2.4621565341949463, + "learning_rate": 0.0002332423716199454, + "loss": 0.7438, + "step": 2306 + }, + { + "epoch": 0.81, + "grad_norm": 8.334583282470703, + "learning_rate": 0.0002332051600099231, + "loss": 0.7081, + "step": 2307 + }, + { + "epoch": 0.81, + "grad_norm": 2.128434658050537, + "learning_rate": 0.00023316794839990074, + "loss": 0.4139, + "step": 2308 + }, + { + "epoch": 0.81, + "grad_norm": 5.296910285949707, + "learning_rate": 0.00023313073678987842, + "loss": 0.8335, + "step": 2309 + }, + { + "epoch": 0.81, + "grad_norm": 2.945010185241699, + "learning_rate": 0.00023309352517985612, + "loss": 0.6668, + "step": 2310 + }, + { + "epoch": 0.81, + "grad_norm": 3.273423433303833, + "learning_rate": 0.00023305631356983377, + "loss": 0.7138, + "step": 2311 + }, + { + "epoch": 0.81, + "grad_norm": 1.95408034324646, + "learning_rate": 0.00023301910195981144, + "loss": 0.4044, + "step": 2312 + }, + { + "epoch": 0.81, + "grad_norm": 7.172848224639893, + "learning_rate": 0.0002329818903497891, + "loss": 2.0813, + "step": 2313 + }, + { + "epoch": 0.81, + "grad_norm": 2.5199854373931885, + "learning_rate": 0.0002329446787397668, + "loss": 0.4251, + "step": 2314 + }, + { + "epoch": 0.81, + "grad_norm": 2.866971969604492, + "learning_rate": 0.00023290746712974447, + "loss": 0.5057, + "step": 2315 + }, + { + "epoch": 0.81, + "grad_norm": 2.345081090927124, + "learning_rate": 0.00023287025551972212, + "loss": 0.4534, + "step": 2316 + }, + { + "epoch": 0.81, + "grad_norm": 3.3792688846588135, + "learning_rate": 0.00023283304390969982, + "loss": 0.6358, + "step": 2317 + }, + { + "epoch": 0.81, + "grad_norm": 2.9677326679229736, + "learning_rate": 0.00023279583229967747, + "loss": 1.3059, + "step": 2318 + }, + { + "epoch": 0.81, + "grad_norm": 3.3454091548919678, + "learning_rate": 0.00023275862068965515, + "loss": 0.4036, + "step": 2319 + }, + { + "epoch": 0.81, + "grad_norm": 3.9923884868621826, + "learning_rate": 0.00023272140907963285, + "loss": 1.2093, + "step": 2320 + }, + { + "epoch": 0.81, + "grad_norm": 2.1582930088043213, + "learning_rate": 0.0002326841974696105, + "loss": 0.3846, + "step": 2321 + }, + { + "epoch": 0.81, + "grad_norm": 3.0936267375946045, + "learning_rate": 0.00023264698585958817, + "loss": 0.5771, + "step": 2322 + }, + { + "epoch": 0.81, + "grad_norm": 2.7140424251556396, + "learning_rate": 0.00023260977424956582, + "loss": 0.3977, + "step": 2323 + }, + { + "epoch": 0.81, + "grad_norm": 4.4949116706848145, + "learning_rate": 0.00023257256263954353, + "loss": 1.5159, + "step": 2324 + }, + { + "epoch": 0.81, + "grad_norm": 2.300839424133301, + "learning_rate": 0.0002325353510295212, + "loss": 0.3565, + "step": 2325 + }, + { + "epoch": 0.81, + "grad_norm": 2.1757450103759766, + "learning_rate": 0.00023249813941949885, + "loss": 0.9017, + "step": 2326 + }, + { + "epoch": 0.82, + "grad_norm": 2.34287166595459, + "learning_rate": 0.00023246092780947653, + "loss": 0.5174, + "step": 2327 + }, + { + "epoch": 0.82, + "grad_norm": 2.0251080989837646, + "learning_rate": 0.00023242371619945423, + "loss": 0.678, + "step": 2328 + }, + { + "epoch": 0.82, + "grad_norm": 2.359485149383545, + "learning_rate": 0.00023238650458943188, + "loss": 0.6204, + "step": 2329 + }, + { + "epoch": 0.82, + "grad_norm": 2.325961112976074, + "learning_rate": 0.00023234929297940955, + "loss": 0.6806, + "step": 2330 + }, + { + "epoch": 0.82, + "grad_norm": 2.272260904312134, + "learning_rate": 0.0002323120813693872, + "loss": 1.1038, + "step": 2331 + }, + { + "epoch": 0.82, + "grad_norm": 2.266369581222534, + "learning_rate": 0.0002322748697593649, + "loss": 0.5986, + "step": 2332 + }, + { + "epoch": 0.82, + "grad_norm": 2.7468202114105225, + "learning_rate": 0.00023223765814934258, + "loss": 0.6712, + "step": 2333 + }, + { + "epoch": 0.82, + "grad_norm": 2.0581917762756348, + "learning_rate": 0.00023220044653932023, + "loss": 0.4981, + "step": 2334 + }, + { + "epoch": 0.82, + "grad_norm": 3.185770273208618, + "learning_rate": 0.00023216323492929793, + "loss": 0.8548, + "step": 2335 + }, + { + "epoch": 0.82, + "grad_norm": 2.0419859886169434, + "learning_rate": 0.00023212602331927558, + "loss": 0.2562, + "step": 2336 + }, + { + "epoch": 0.82, + "grad_norm": 3.8037047386169434, + "learning_rate": 0.00023208881170925326, + "loss": 0.625, + "step": 2337 + }, + { + "epoch": 0.82, + "grad_norm": 6.072167873382568, + "learning_rate": 0.00023205160009923096, + "loss": 1.8957, + "step": 2338 + }, + { + "epoch": 0.82, + "grad_norm": 2.633638858795166, + "learning_rate": 0.0002320143884892086, + "loss": 0.7044, + "step": 2339 + }, + { + "epoch": 0.82, + "grad_norm": 2.252713680267334, + "learning_rate": 0.00023197717687918628, + "loss": 0.481, + "step": 2340 + }, + { + "epoch": 0.82, + "grad_norm": 2.7188823223114014, + "learning_rate": 0.000231939965269164, + "loss": 0.6524, + "step": 2341 + }, + { + "epoch": 0.82, + "grad_norm": 3.263317108154297, + "learning_rate": 0.00023190275365914164, + "loss": 0.3887, + "step": 2342 + }, + { + "epoch": 0.82, + "grad_norm": 3.679230213165283, + "learning_rate": 0.0002318655420491193, + "loss": 0.4308, + "step": 2343 + }, + { + "epoch": 0.82, + "grad_norm": 3.2064208984375, + "learning_rate": 0.00023182833043909696, + "loss": 1.0281, + "step": 2344 + }, + { + "epoch": 0.82, + "grad_norm": 4.935098171234131, + "learning_rate": 0.00023179111882907466, + "loss": 0.9202, + "step": 2345 + }, + { + "epoch": 0.82, + "grad_norm": 4.840257167816162, + "learning_rate": 0.00023175390721905234, + "loss": 1.914, + "step": 2346 + }, + { + "epoch": 0.82, + "grad_norm": 3.3795762062072754, + "learning_rate": 0.00023171669560903, + "loss": 0.6141, + "step": 2347 + }, + { + "epoch": 0.82, + "grad_norm": 4.437031269073486, + "learning_rate": 0.00023167948399900766, + "loss": 0.9272, + "step": 2348 + }, + { + "epoch": 0.82, + "grad_norm": 1.7707891464233398, + "learning_rate": 0.00023164227238898534, + "loss": 0.2347, + "step": 2349 + }, + { + "epoch": 0.82, + "grad_norm": 1.9971076250076294, + "learning_rate": 0.00023160506077896301, + "loss": 0.1552, + "step": 2350 + }, + { + "epoch": 0.82, + "grad_norm": 4.799489974975586, + "learning_rate": 0.0002315678491689407, + "loss": 1.1959, + "step": 2351 + }, + { + "epoch": 0.82, + "grad_norm": 1.57328462600708, + "learning_rate": 0.00023153063755891834, + "loss": 0.448, + "step": 2352 + }, + { + "epoch": 0.82, + "grad_norm": 3.8030247688293457, + "learning_rate": 0.00023149342594889604, + "loss": 1.108, + "step": 2353 + }, + { + "epoch": 0.82, + "grad_norm": 2.7969067096710205, + "learning_rate": 0.00023145621433887372, + "loss": 0.9344, + "step": 2354 + }, + { + "epoch": 0.83, + "grad_norm": 1.8098201751708984, + "learning_rate": 0.00023141900272885137, + "loss": 0.2884, + "step": 2355 + }, + { + "epoch": 0.83, + "grad_norm": 3.249875783920288, + "learning_rate": 0.00023138179111882907, + "loss": 0.6573, + "step": 2356 + }, + { + "epoch": 0.83, + "grad_norm": 3.7881550788879395, + "learning_rate": 0.00023134457950880672, + "loss": 1.0587, + "step": 2357 + }, + { + "epoch": 0.83, + "grad_norm": 3.2492504119873047, + "learning_rate": 0.0002313073678987844, + "loss": 0.8628, + "step": 2358 + }, + { + "epoch": 0.83, + "grad_norm": 2.8697025775909424, + "learning_rate": 0.0002312701562887621, + "loss": 0.622, + "step": 2359 + }, + { + "epoch": 0.83, + "grad_norm": 3.8673083782196045, + "learning_rate": 0.00023123294467873975, + "loss": 0.7776, + "step": 2360 + }, + { + "epoch": 0.83, + "grad_norm": 3.7203593254089355, + "learning_rate": 0.00023119573306871742, + "loss": 0.8226, + "step": 2361 + }, + { + "epoch": 0.83, + "grad_norm": 10.66352653503418, + "learning_rate": 0.00023115852145869507, + "loss": 2.7217, + "step": 2362 + }, + { + "epoch": 0.83, + "grad_norm": 2.4907355308532715, + "learning_rate": 0.00023112130984867277, + "loss": 0.3725, + "step": 2363 + }, + { + "epoch": 0.83, + "grad_norm": 6.29020357131958, + "learning_rate": 0.00023108409823865045, + "loss": 2.0326, + "step": 2364 + }, + { + "epoch": 0.83, + "grad_norm": 3.2957611083984375, + "learning_rate": 0.0002310468866286281, + "loss": 0.6962, + "step": 2365 + }, + { + "epoch": 0.83, + "grad_norm": 4.59990930557251, + "learning_rate": 0.0002310096750186058, + "loss": 0.9934, + "step": 2366 + }, + { + "epoch": 0.83, + "grad_norm": 3.0413990020751953, + "learning_rate": 0.00023097246340858345, + "loss": 0.4285, + "step": 2367 + }, + { + "epoch": 0.83, + "grad_norm": 2.2277538776397705, + "learning_rate": 0.00023093525179856112, + "loss": 0.4112, + "step": 2368 + }, + { + "epoch": 0.83, + "grad_norm": 2.3201451301574707, + "learning_rate": 0.0002308980401885388, + "loss": 0.3224, + "step": 2369 + }, + { + "epoch": 0.83, + "grad_norm": 3.672980785369873, + "learning_rate": 0.00023086082857851648, + "loss": 0.4006, + "step": 2370 + }, + { + "epoch": 0.83, + "grad_norm": 2.6918740272521973, + "learning_rate": 0.00023082361696849415, + "loss": 0.4209, + "step": 2371 + }, + { + "epoch": 0.83, + "grad_norm": 4.196498394012451, + "learning_rate": 0.00023078640535847183, + "loss": 0.6705, + "step": 2372 + }, + { + "epoch": 0.83, + "grad_norm": 1.965975046157837, + "learning_rate": 0.00023074919374844948, + "loss": 0.2825, + "step": 2373 + }, + { + "epoch": 0.83, + "grad_norm": 3.151905059814453, + "learning_rate": 0.00023071198213842718, + "loss": 0.5709, + "step": 2374 + }, + { + "epoch": 0.83, + "grad_norm": 9.685364723205566, + "learning_rate": 0.00023067477052840483, + "loss": 2.5129, + "step": 2375 + }, + { + "epoch": 0.83, + "grad_norm": 1.8640772104263306, + "learning_rate": 0.0002306375589183825, + "loss": 0.7282, + "step": 2376 + }, + { + "epoch": 0.83, + "grad_norm": 2.197077989578247, + "learning_rate": 0.0002306003473083602, + "loss": 0.6786, + "step": 2377 + }, + { + "epoch": 0.83, + "grad_norm": 2.610832691192627, + "learning_rate": 0.00023056313569833786, + "loss": 0.4671, + "step": 2378 + }, + { + "epoch": 0.83, + "grad_norm": 2.7640953063964844, + "learning_rate": 0.00023052592408831553, + "loss": 0.4904, + "step": 2379 + }, + { + "epoch": 0.83, + "grad_norm": 1.4002923965454102, + "learning_rate": 0.00023048871247829318, + "loss": 0.3408, + "step": 2380 + }, + { + "epoch": 0.83, + "grad_norm": 2.018538475036621, + "learning_rate": 0.00023045150086827088, + "loss": 0.5375, + "step": 2381 + }, + { + "epoch": 0.83, + "grad_norm": 2.6300442218780518, + "learning_rate": 0.00023041428925824856, + "loss": 0.4204, + "step": 2382 + }, + { + "epoch": 0.83, + "grad_norm": 11.127724647521973, + "learning_rate": 0.0002303770776482262, + "loss": 2.8526, + "step": 2383 + }, + { + "epoch": 0.84, + "grad_norm": 1.2783316373825073, + "learning_rate": 0.0002303398660382039, + "loss": 0.1801, + "step": 2384 + }, + { + "epoch": 0.84, + "grad_norm": 2.074375867843628, + "learning_rate": 0.00023030265442818159, + "loss": 0.5052, + "step": 2385 + }, + { + "epoch": 0.84, + "grad_norm": 2.3270208835601807, + "learning_rate": 0.00023026544281815923, + "loss": 0.306, + "step": 2386 + }, + { + "epoch": 0.84, + "grad_norm": 4.8103556632995605, + "learning_rate": 0.00023022823120813694, + "loss": 1.2487, + "step": 2387 + }, + { + "epoch": 0.84, + "grad_norm": 3.1849074363708496, + "learning_rate": 0.0002301910195981146, + "loss": 0.5772, + "step": 2388 + }, + { + "epoch": 0.84, + "grad_norm": 3.9481422901153564, + "learning_rate": 0.00023015380798809226, + "loss": 1.5224, + "step": 2389 + }, + { + "epoch": 0.84, + "grad_norm": 2.1063621044158936, + "learning_rate": 0.00023011659637806994, + "loss": 0.3233, + "step": 2390 + }, + { + "epoch": 0.84, + "grad_norm": 2.34477162361145, + "learning_rate": 0.00023007938476804761, + "loss": 0.4639, + "step": 2391 + }, + { + "epoch": 0.84, + "grad_norm": 1.563861608505249, + "learning_rate": 0.0002300421731580253, + "loss": 0.203, + "step": 2392 + }, + { + "epoch": 0.84, + "grad_norm": 3.3610353469848633, + "learning_rate": 0.00023000496154800294, + "loss": 0.5348, + "step": 2393 + }, + { + "epoch": 0.84, + "grad_norm": 2.9858109951019287, + "learning_rate": 0.00022996774993798061, + "loss": 0.2315, + "step": 2394 + }, + { + "epoch": 0.84, + "grad_norm": 6.193589210510254, + "learning_rate": 0.00022993053832795832, + "loss": 3.4532, + "step": 2395 + }, + { + "epoch": 0.84, + "grad_norm": 6.4159698486328125, + "learning_rate": 0.00022989332671793597, + "loss": 1.3103, + "step": 2396 + }, + { + "epoch": 0.84, + "grad_norm": 2.0241963863372803, + "learning_rate": 0.00022985611510791364, + "loss": 0.1741, + "step": 2397 + }, + { + "epoch": 0.84, + "grad_norm": 4.740751266479492, + "learning_rate": 0.00022981890349789134, + "loss": 0.7566, + "step": 2398 + }, + { + "epoch": 0.84, + "grad_norm": 2.657395362854004, + "learning_rate": 0.000229781691887869, + "loss": 0.4493, + "step": 2399 + }, + { + "epoch": 0.84, + "grad_norm": 3.8431265354156494, + "learning_rate": 0.00022974448027784667, + "loss": 0.6928, + "step": 2400 + }, + { + "epoch": 0.84, + "eval_loss": 0.6465885639190674, + "eval_runtime": 50.1998, + "eval_samples_per_second": 43.187, + "eval_steps_per_second": 10.797, + "eval_wer": 0.5759384189586577, + "step": 2400 + }, + { + "epoch": 0.84, + "grad_norm": 2.204583168029785, + "learning_rate": 0.00022970726866782432, + "loss": 0.8461, + "step": 2401 + }, + { + "epoch": 0.84, + "grad_norm": 2.2085318565368652, + "learning_rate": 0.00022967005705780202, + "loss": 0.7323, + "step": 2402 + }, + { + "epoch": 0.84, + "grad_norm": 2.1746833324432373, + "learning_rate": 0.0002296328454477797, + "loss": 0.5866, + "step": 2403 + }, + { + "epoch": 0.84, + "grad_norm": 2.1107888221740723, + "learning_rate": 0.00022959563383775734, + "loss": 0.4081, + "step": 2404 + }, + { + "epoch": 0.84, + "grad_norm": 3.436554193496704, + "learning_rate": 0.00022955842222773505, + "loss": 0.5447, + "step": 2405 + }, + { + "epoch": 0.84, + "grad_norm": 1.6409112215042114, + "learning_rate": 0.0002295212106177127, + "loss": 0.4951, + "step": 2406 + }, + { + "epoch": 0.84, + "grad_norm": 2.4227728843688965, + "learning_rate": 0.00022948399900769037, + "loss": 1.4331, + "step": 2407 + }, + { + "epoch": 0.84, + "grad_norm": 1.892978549003601, + "learning_rate": 0.00022944678739766808, + "loss": 0.3022, + "step": 2408 + }, + { + "epoch": 0.84, + "grad_norm": 3.203372001647949, + "learning_rate": 0.00022940957578764572, + "loss": 0.5053, + "step": 2409 + }, + { + "epoch": 0.84, + "grad_norm": 4.97529411315918, + "learning_rate": 0.0002293723641776234, + "loss": 2.5665, + "step": 2410 + }, + { + "epoch": 0.84, + "grad_norm": 3.133910894393921, + "learning_rate": 0.00022933515256760105, + "loss": 0.5585, + "step": 2411 + }, + { + "epoch": 0.85, + "grad_norm": 2.481238842010498, + "learning_rate": 0.00022929794095757875, + "loss": 0.7493, + "step": 2412 + }, + { + "epoch": 0.85, + "grad_norm": 2.110828399658203, + "learning_rate": 0.00022926072934755643, + "loss": 0.6066, + "step": 2413 + }, + { + "epoch": 0.85, + "grad_norm": 2.420081377029419, + "learning_rate": 0.00022922351773753408, + "loss": 0.4951, + "step": 2414 + }, + { + "epoch": 0.85, + "grad_norm": 4.824462890625, + "learning_rate": 0.00022918630612751175, + "loss": 0.6661, + "step": 2415 + }, + { + "epoch": 0.85, + "grad_norm": 3.048468828201294, + "learning_rate": 0.00022914909451748945, + "loss": 0.6657, + "step": 2416 + }, + { + "epoch": 0.85, + "grad_norm": 5.503145217895508, + "learning_rate": 0.0002291118829074671, + "loss": 0.4398, + "step": 2417 + }, + { + "epoch": 0.85, + "grad_norm": 2.5302720069885254, + "learning_rate": 0.00022907467129744478, + "loss": 0.4753, + "step": 2418 + }, + { + "epoch": 0.85, + "grad_norm": 1.1612811088562012, + "learning_rate": 0.00022903745968742245, + "loss": 0.143, + "step": 2419 + }, + { + "epoch": 0.85, + "grad_norm": 8.667667388916016, + "learning_rate": 0.00022900024807740013, + "loss": 0.6554, + "step": 2420 + }, + { + "epoch": 0.85, + "grad_norm": 4.002796649932861, + "learning_rate": 0.0002289630364673778, + "loss": 0.5772, + "step": 2421 + }, + { + "epoch": 0.85, + "grad_norm": 6.211899280548096, + "learning_rate": 0.00022892582485735546, + "loss": 1.3073, + "step": 2422 + }, + { + "epoch": 0.85, + "grad_norm": 3.510040521621704, + "learning_rate": 0.00022888861324733316, + "loss": 0.6746, + "step": 2423 + }, + { + "epoch": 0.85, + "grad_norm": 8.804295539855957, + "learning_rate": 0.0002288514016373108, + "loss": 2.8085, + "step": 2424 + }, + { + "epoch": 0.85, + "grad_norm": 1.8787487745285034, + "learning_rate": 0.00022881419002728848, + "loss": 0.3381, + "step": 2425 + }, + { + "epoch": 0.85, + "grad_norm": 2.092188596725464, + "learning_rate": 0.00022877697841726619, + "loss": 0.9032, + "step": 2426 + }, + { + "epoch": 0.85, + "grad_norm": 3.2084264755249023, + "learning_rate": 0.00022873976680724383, + "loss": 0.6057, + "step": 2427 + }, + { + "epoch": 0.85, + "grad_norm": 3.3689112663269043, + "learning_rate": 0.0002287025551972215, + "loss": 0.9147, + "step": 2428 + }, + { + "epoch": 0.85, + "grad_norm": 1.9687527418136597, + "learning_rate": 0.0002286653435871992, + "loss": 0.5522, + "step": 2429 + }, + { + "epoch": 0.85, + "grad_norm": 2.207888603210449, + "learning_rate": 0.00022862813197717686, + "loss": 0.6443, + "step": 2430 + }, + { + "epoch": 0.85, + "grad_norm": 2.4033074378967285, + "learning_rate": 0.00022859092036715454, + "loss": 0.5481, + "step": 2431 + }, + { + "epoch": 0.85, + "grad_norm": 1.924738883972168, + "learning_rate": 0.00022855370875713219, + "loss": 0.2973, + "step": 2432 + }, + { + "epoch": 0.85, + "grad_norm": 2.1252126693725586, + "learning_rate": 0.0002285164971471099, + "loss": 0.3515, + "step": 2433 + }, + { + "epoch": 0.85, + "grad_norm": 3.03769850730896, + "learning_rate": 0.00022847928553708756, + "loss": 0.5084, + "step": 2434 + }, + { + "epoch": 0.85, + "grad_norm": 2.743135929107666, + "learning_rate": 0.0002284420739270652, + "loss": 0.7894, + "step": 2435 + }, + { + "epoch": 0.85, + "grad_norm": 2.572986602783203, + "learning_rate": 0.00022840486231704292, + "loss": 0.6504, + "step": 2436 + }, + { + "epoch": 0.85, + "grad_norm": 2.5167198181152344, + "learning_rate": 0.00022836765070702056, + "loss": 0.479, + "step": 2437 + }, + { + "epoch": 0.85, + "grad_norm": 2.302143096923828, + "learning_rate": 0.00022833043909699824, + "loss": 0.6252, + "step": 2438 + }, + { + "epoch": 0.85, + "grad_norm": 1.8964296579360962, + "learning_rate": 0.00022829322748697592, + "loss": 0.2638, + "step": 2439 + }, + { + "epoch": 0.85, + "grad_norm": 1.6442536115646362, + "learning_rate": 0.0002282560158769536, + "loss": 0.1767, + "step": 2440 + }, + { + "epoch": 0.86, + "grad_norm": 2.2055866718292236, + "learning_rate": 0.00022821880426693127, + "loss": 0.2613, + "step": 2441 + }, + { + "epoch": 0.86, + "grad_norm": 2.640995979309082, + "learning_rate": 0.00022818159265690894, + "loss": 0.5495, + "step": 2442 + }, + { + "epoch": 0.86, + "grad_norm": 2.59956955909729, + "learning_rate": 0.0002281443810468866, + "loss": 0.5332, + "step": 2443 + }, + { + "epoch": 0.86, + "grad_norm": 4.761136531829834, + "learning_rate": 0.0002281071694368643, + "loss": 1.1939, + "step": 2444 + }, + { + "epoch": 0.86, + "grad_norm": 3.0465445518493652, + "learning_rate": 0.00022806995782684194, + "loss": 0.3178, + "step": 2445 + }, + { + "epoch": 0.86, + "grad_norm": 2.101271390914917, + "learning_rate": 0.00022803274621681962, + "loss": 0.5765, + "step": 2446 + }, + { + "epoch": 0.86, + "grad_norm": 3.0883326530456543, + "learning_rate": 0.00022799553460679732, + "loss": 0.2466, + "step": 2447 + }, + { + "epoch": 0.86, + "grad_norm": 2.459491491317749, + "learning_rate": 0.00022795832299677497, + "loss": 0.3144, + "step": 2448 + }, + { + "epoch": 0.86, + "grad_norm": 5.362196445465088, + "learning_rate": 0.00022792111138675265, + "loss": 0.6192, + "step": 2449 + }, + { + "epoch": 0.86, + "grad_norm": 3.9791548252105713, + "learning_rate": 0.0002278838997767303, + "loss": 0.548, + "step": 2450 + }, + { + "epoch": 0.86, + "grad_norm": 2.309065103530884, + "learning_rate": 0.000227846688166708, + "loss": 1.0592, + "step": 2451 + }, + { + "epoch": 0.86, + "grad_norm": 3.0640029907226562, + "learning_rate": 0.00022780947655668567, + "loss": 1.2627, + "step": 2452 + }, + { + "epoch": 0.86, + "grad_norm": 2.2788937091827393, + "learning_rate": 0.00022777226494666332, + "loss": 0.7759, + "step": 2453 + }, + { + "epoch": 0.86, + "grad_norm": 1.9871941804885864, + "learning_rate": 0.00022773505333664103, + "loss": 0.7314, + "step": 2454 + }, + { + "epoch": 0.86, + "grad_norm": 2.758849620819092, + "learning_rate": 0.0002276978417266187, + "loss": 0.5533, + "step": 2455 + }, + { + "epoch": 0.86, + "grad_norm": 2.1479053497314453, + "learning_rate": 0.00022766063011659635, + "loss": 0.6265, + "step": 2456 + }, + { + "epoch": 0.86, + "grad_norm": 2.5701639652252197, + "learning_rate": 0.00022762341850657405, + "loss": 0.6011, + "step": 2457 + }, + { + "epoch": 0.86, + "grad_norm": 2.6615350246429443, + "learning_rate": 0.0002275862068965517, + "loss": 0.6043, + "step": 2458 + }, + { + "epoch": 0.86, + "grad_norm": 1.9056451320648193, + "learning_rate": 0.00022754899528652938, + "loss": 0.4244, + "step": 2459 + }, + { + "epoch": 0.86, + "grad_norm": 2.3481318950653076, + "learning_rate": 0.00022751178367650705, + "loss": 0.3759, + "step": 2460 + }, + { + "epoch": 0.86, + "grad_norm": 1.501145601272583, + "learning_rate": 0.00022747457206648473, + "loss": 0.3211, + "step": 2461 + }, + { + "epoch": 0.86, + "grad_norm": 1.2316216230392456, + "learning_rate": 0.0002274373604564624, + "loss": 0.213, + "step": 2462 + }, + { + "epoch": 0.86, + "grad_norm": 3.9179749488830566, + "learning_rate": 0.00022740014884644005, + "loss": 0.5273, + "step": 2463 + }, + { + "epoch": 0.86, + "grad_norm": 2.6479461193084717, + "learning_rate": 0.00022736293723641773, + "loss": 0.3405, + "step": 2464 + }, + { + "epoch": 0.86, + "grad_norm": 3.821375846862793, + "learning_rate": 0.00022732572562639543, + "loss": 0.4056, + "step": 2465 + }, + { + "epoch": 0.86, + "grad_norm": 3.5695273876190186, + "learning_rate": 0.00022728851401637308, + "loss": 0.65, + "step": 2466 + }, + { + "epoch": 0.86, + "grad_norm": 3.268481969833374, + "learning_rate": 0.00022725130240635076, + "loss": 0.4224, + "step": 2467 + }, + { + "epoch": 0.86, + "grad_norm": 3.3104357719421387, + "learning_rate": 0.0002272140907963284, + "loss": 0.5315, + "step": 2468 + }, + { + "epoch": 0.87, + "grad_norm": 3.089568853378296, + "learning_rate": 0.0002271768791863061, + "loss": 0.5053, + "step": 2469 + }, + { + "epoch": 0.87, + "grad_norm": 4.867792129516602, + "learning_rate": 0.00022713966757628378, + "loss": 1.8816, + "step": 2470 + }, + { + "epoch": 0.87, + "grad_norm": 2.6889870166778564, + "learning_rate": 0.00022710245596626143, + "loss": 0.3397, + "step": 2471 + }, + { + "epoch": 0.87, + "grad_norm": 5.504836559295654, + "learning_rate": 0.00022706524435623914, + "loss": 0.8586, + "step": 2472 + }, + { + "epoch": 0.87, + "grad_norm": 3.099041700363159, + "learning_rate": 0.0002270280327462168, + "loss": 0.3921, + "step": 2473 + }, + { + "epoch": 0.87, + "grad_norm": 3.710519790649414, + "learning_rate": 0.00022699082113619446, + "loss": 0.3917, + "step": 2474 + }, + { + "epoch": 0.87, + "grad_norm": 2.710888624191284, + "learning_rate": 0.00022695360952617216, + "loss": 0.1841, + "step": 2475 + }, + { + "epoch": 0.87, + "grad_norm": 2.646345376968384, + "learning_rate": 0.0002269163979161498, + "loss": 0.9153, + "step": 2476 + }, + { + "epoch": 0.87, + "grad_norm": 2.3178653717041016, + "learning_rate": 0.0002268791863061275, + "loss": 0.6073, + "step": 2477 + }, + { + "epoch": 0.87, + "grad_norm": 32.63193130493164, + "learning_rate": 0.0002268419746961052, + "loss": 6.8604, + "step": 2478 + }, + { + "epoch": 0.87, + "grad_norm": 2.098633289337158, + "learning_rate": 0.00022680476308608284, + "loss": 0.6981, + "step": 2479 + }, + { + "epoch": 0.87, + "grad_norm": 1.7247930765151978, + "learning_rate": 0.00022676755147606052, + "loss": 0.3698, + "step": 2480 + }, + { + "epoch": 0.87, + "grad_norm": 3.1738440990448, + "learning_rate": 0.00022673033986603816, + "loss": 0.5705, + "step": 2481 + }, + { + "epoch": 0.87, + "grad_norm": 3.0334391593933105, + "learning_rate": 0.00022669312825601587, + "loss": 0.3283, + "step": 2482 + }, + { + "epoch": 0.87, + "grad_norm": 1.9234660863876343, + "learning_rate": 0.00022665591664599354, + "loss": 0.633, + "step": 2483 + }, + { + "epoch": 0.87, + "grad_norm": 2.026090621948242, + "learning_rate": 0.0002266187050359712, + "loss": 0.3883, + "step": 2484 + }, + { + "epoch": 0.87, + "grad_norm": 3.060999631881714, + "learning_rate": 0.00022658149342594887, + "loss": 0.6174, + "step": 2485 + }, + { + "epoch": 0.87, + "grad_norm": 3.9415786266326904, + "learning_rate": 0.00022654428181592657, + "loss": 0.7415, + "step": 2486 + }, + { + "epoch": 0.87, + "grad_norm": 3.4952282905578613, + "learning_rate": 0.00022650707020590422, + "loss": 0.7448, + "step": 2487 + }, + { + "epoch": 0.87, + "grad_norm": 4.7627339363098145, + "learning_rate": 0.0002264698585958819, + "loss": 0.86, + "step": 2488 + }, + { + "epoch": 0.87, + "grad_norm": 2.7875306606292725, + "learning_rate": 0.00022643264698585954, + "loss": 0.6431, + "step": 2489 + }, + { + "epoch": 0.87, + "grad_norm": 4.5709052085876465, + "learning_rate": 0.00022639543537583725, + "loss": 0.4823, + "step": 2490 + }, + { + "epoch": 0.87, + "grad_norm": 4.550630569458008, + "learning_rate": 0.00022635822376581492, + "loss": 0.951, + "step": 2491 + }, + { + "epoch": 0.87, + "grad_norm": 2.399900436401367, + "learning_rate": 0.00022632101215579257, + "loss": 0.3805, + "step": 2492 + }, + { + "epoch": 0.87, + "grad_norm": 2.55071759223938, + "learning_rate": 0.00022628380054577027, + "loss": 0.3896, + "step": 2493 + }, + { + "epoch": 0.87, + "grad_norm": 3.296781063079834, + "learning_rate": 0.00022624658893574792, + "loss": 0.3268, + "step": 2494 + }, + { + "epoch": 0.87, + "grad_norm": 3.0526139736175537, + "learning_rate": 0.0002262093773257256, + "loss": 0.6264, + "step": 2495 + }, + { + "epoch": 0.87, + "grad_norm": 4.58652400970459, + "learning_rate": 0.0002261721657157033, + "loss": 0.5402, + "step": 2496 + }, + { + "epoch": 0.87, + "grad_norm": 6.031370162963867, + "learning_rate": 0.00022613495410568095, + "loss": 1.0957, + "step": 2497 + }, + { + "epoch": 0.88, + "grad_norm": 3.1321797370910645, + "learning_rate": 0.00022609774249565863, + "loss": 0.4783, + "step": 2498 + }, + { + "epoch": 0.88, + "grad_norm": 3.8745222091674805, + "learning_rate": 0.00022606053088563633, + "loss": 0.3831, + "step": 2499 + }, + { + "epoch": 0.88, + "grad_norm": 3.6690893173217773, + "learning_rate": 0.00022602331927561398, + "loss": 0.6712, + "step": 2500 + }, + { + "epoch": 0.88, + "eval_loss": 0.5969449877738953, + "eval_runtime": 50.5023, + "eval_samples_per_second": 42.929, + "eval_steps_per_second": 10.732, + "eval_wer": 0.5697111226431413, + "step": 2500 + }, + { + "epoch": 0.88, + "grad_norm": 3.2524619102478027, + "learning_rate": 0.00022598610766559165, + "loss": 1.1098, + "step": 2501 + }, + { + "epoch": 0.88, + "grad_norm": 3.3563032150268555, + "learning_rate": 0.0002259488960555693, + "loss": 0.8505, + "step": 2502 + }, + { + "epoch": 0.88, + "grad_norm": 1.617898941040039, + "learning_rate": 0.000225911684445547, + "loss": 0.5266, + "step": 2503 + }, + { + "epoch": 0.88, + "grad_norm": 1.1156997680664062, + "learning_rate": 0.00022587447283552468, + "loss": 0.285, + "step": 2504 + }, + { + "epoch": 0.88, + "grad_norm": 2.1009535789489746, + "learning_rate": 0.00022583726122550233, + "loss": 0.4942, + "step": 2505 + }, + { + "epoch": 0.88, + "grad_norm": 2.473078966140747, + "learning_rate": 0.00022580004961548, + "loss": 0.5901, + "step": 2506 + }, + { + "epoch": 0.88, + "grad_norm": 2.716780662536621, + "learning_rate": 0.00022576283800545768, + "loss": 0.834, + "step": 2507 + }, + { + "epoch": 0.88, + "grad_norm": 2.1853911876678467, + "learning_rate": 0.00022572562639543536, + "loss": 0.7706, + "step": 2508 + }, + { + "epoch": 0.88, + "grad_norm": 4.181541919708252, + "learning_rate": 0.00022568841478541303, + "loss": 0.9358, + "step": 2509 + }, + { + "epoch": 0.88, + "grad_norm": 3.0345635414123535, + "learning_rate": 0.00022565120317539068, + "loss": 0.6332, + "step": 2510 + }, + { + "epoch": 0.88, + "grad_norm": 2.495234966278076, + "learning_rate": 0.00022561399156536838, + "loss": 0.5035, + "step": 2511 + }, + { + "epoch": 0.88, + "grad_norm": 2.819892168045044, + "learning_rate": 0.00022557677995534603, + "loss": 0.4648, + "step": 2512 + }, + { + "epoch": 0.88, + "grad_norm": 2.7033729553222656, + "learning_rate": 0.0002255395683453237, + "loss": 0.5328, + "step": 2513 + }, + { + "epoch": 0.88, + "grad_norm": 1.782414197921753, + "learning_rate": 0.0002255023567353014, + "loss": 0.5677, + "step": 2514 + }, + { + "epoch": 0.88, + "grad_norm": 2.6469204425811768, + "learning_rate": 0.00022546514512527906, + "loss": 0.6008, + "step": 2515 + }, + { + "epoch": 0.88, + "grad_norm": 3.3379626274108887, + "learning_rate": 0.00022542793351525674, + "loss": 0.621, + "step": 2516 + }, + { + "epoch": 0.88, + "grad_norm": 3.2679710388183594, + "learning_rate": 0.00022539072190523444, + "loss": 0.5374, + "step": 2517 + }, + { + "epoch": 0.88, + "grad_norm": 1.7875244617462158, + "learning_rate": 0.0002253535102952121, + "loss": 0.2722, + "step": 2518 + }, + { + "epoch": 0.88, + "grad_norm": 1.986699104309082, + "learning_rate": 0.00022531629868518976, + "loss": 0.4861, + "step": 2519 + }, + { + "epoch": 0.88, + "grad_norm": 4.055794715881348, + "learning_rate": 0.0002252790870751674, + "loss": 0.8421, + "step": 2520 + }, + { + "epoch": 0.88, + "grad_norm": 3.854576587677002, + "learning_rate": 0.00022524187546514511, + "loss": 0.2723, + "step": 2521 + }, + { + "epoch": 0.88, + "grad_norm": 4.405861854553223, + "learning_rate": 0.0002252046638551228, + "loss": 0.6845, + "step": 2522 + }, + { + "epoch": 0.88, + "grad_norm": 7.5870161056518555, + "learning_rate": 0.00022516745224510044, + "loss": 2.0838, + "step": 2523 + }, + { + "epoch": 0.88, + "grad_norm": 2.9509167671203613, + "learning_rate": 0.00022513024063507814, + "loss": 0.2952, + "step": 2524 + }, + { + "epoch": 0.88, + "grad_norm": 2.6641058921813965, + "learning_rate": 0.0002250930290250558, + "loss": 0.4337, + "step": 2525 + }, + { + "epoch": 0.89, + "grad_norm": 5.685914993286133, + "learning_rate": 0.00022505581741503347, + "loss": 1.4865, + "step": 2526 + }, + { + "epoch": 0.89, + "grad_norm": 2.8381335735321045, + "learning_rate": 0.00022501860580501114, + "loss": 0.8048, + "step": 2527 + }, + { + "epoch": 0.89, + "grad_norm": 2.5004918575286865, + "learning_rate": 0.00022498139419498882, + "loss": 0.8519, + "step": 2528 + }, + { + "epoch": 0.89, + "grad_norm": 2.1345882415771484, + "learning_rate": 0.0002249441825849665, + "loss": 0.6214, + "step": 2529 + }, + { + "epoch": 0.89, + "grad_norm": 3.871791362762451, + "learning_rate": 0.00022490697097494417, + "loss": 1.7872, + "step": 2530 + }, + { + "epoch": 0.89, + "grad_norm": 2.4108870029449463, + "learning_rate": 0.00022486975936492182, + "loss": 0.4127, + "step": 2531 + }, + { + "epoch": 0.89, + "grad_norm": 2.154738426208496, + "learning_rate": 0.00022483254775489952, + "loss": 0.3094, + "step": 2532 + }, + { + "epoch": 0.89, + "grad_norm": 1.8952206373214722, + "learning_rate": 0.00022479533614487717, + "loss": 0.4161, + "step": 2533 + }, + { + "epoch": 0.89, + "grad_norm": 2.7930335998535156, + "learning_rate": 0.00022475812453485485, + "loss": 0.7349, + "step": 2534 + }, + { + "epoch": 0.89, + "grad_norm": 2.5914433002471924, + "learning_rate": 0.00022472091292483255, + "loss": 0.6303, + "step": 2535 + }, + { + "epoch": 0.89, + "grad_norm": 2.9063329696655273, + "learning_rate": 0.0002246837013148102, + "loss": 0.7971, + "step": 2536 + }, + { + "epoch": 0.89, + "grad_norm": 2.814877986907959, + "learning_rate": 0.00022464648970478787, + "loss": 0.6404, + "step": 2537 + }, + { + "epoch": 0.89, + "grad_norm": 2.2921807765960693, + "learning_rate": 0.00022460927809476552, + "loss": 0.6443, + "step": 2538 + }, + { + "epoch": 0.89, + "grad_norm": 3.2125370502471924, + "learning_rate": 0.00022457206648474322, + "loss": 1.599, + "step": 2539 + }, + { + "epoch": 0.89, + "grad_norm": 5.036359786987305, + "learning_rate": 0.0002245348548747209, + "loss": 0.8502, + "step": 2540 + }, + { + "epoch": 0.89, + "grad_norm": 2.4251275062561035, + "learning_rate": 0.00022449764326469855, + "loss": 0.3993, + "step": 2541 + }, + { + "epoch": 0.89, + "grad_norm": 3.6843395233154297, + "learning_rate": 0.00022446043165467625, + "loss": 1.2595, + "step": 2542 + }, + { + "epoch": 0.89, + "grad_norm": 1.9409282207489014, + "learning_rate": 0.00022442322004465393, + "loss": 0.213, + "step": 2543 + }, + { + "epoch": 0.89, + "grad_norm": 2.4829013347625732, + "learning_rate": 0.00022438600843463158, + "loss": 0.4155, + "step": 2544 + }, + { + "epoch": 0.89, + "grad_norm": 12.528837203979492, + "learning_rate": 0.00022434879682460928, + "loss": 2.7458, + "step": 2545 + }, + { + "epoch": 0.89, + "grad_norm": 3.5831103324890137, + "learning_rate": 0.00022431158521458693, + "loss": 0.9758, + "step": 2546 + }, + { + "epoch": 0.89, + "grad_norm": 3.7440414428710938, + "learning_rate": 0.0002242743736045646, + "loss": 0.8341, + "step": 2547 + }, + { + "epoch": 0.89, + "grad_norm": 3.1389410495758057, + "learning_rate": 0.00022423716199454228, + "loss": 0.3766, + "step": 2548 + }, + { + "epoch": 0.89, + "grad_norm": 3.8604750633239746, + "learning_rate": 0.00022419995038451996, + "loss": 0.4142, + "step": 2549 + }, + { + "epoch": 0.89, + "grad_norm": 5.646600246429443, + "learning_rate": 0.00022416273877449763, + "loss": 0.4988, + "step": 2550 + }, + { + "epoch": 0.89, + "grad_norm": 2.2489912509918213, + "learning_rate": 0.00022412552716447528, + "loss": 1.1134, + "step": 2551 + }, + { + "epoch": 0.89, + "grad_norm": 1.9654885530471802, + "learning_rate": 0.00022408831555445296, + "loss": 0.4167, + "step": 2552 + }, + { + "epoch": 0.89, + "grad_norm": 2.2615177631378174, + "learning_rate": 0.00022405110394443066, + "loss": 0.6725, + "step": 2553 + }, + { + "epoch": 0.89, + "grad_norm": 5.323215961456299, + "learning_rate": 0.0002240138923344083, + "loss": 0.9081, + "step": 2554 + }, + { + "epoch": 0.9, + "grad_norm": 2.1853933334350586, + "learning_rate": 0.00022397668072438598, + "loss": 0.5613, + "step": 2555 + }, + { + "epoch": 0.9, + "grad_norm": 2.7656657695770264, + "learning_rate": 0.00022393946911436363, + "loss": 0.4834, + "step": 2556 + }, + { + "epoch": 0.9, + "grad_norm": 2.5364134311676025, + "learning_rate": 0.00022390225750434133, + "loss": 0.8214, + "step": 2557 + }, + { + "epoch": 0.9, + "grad_norm": 3.2517011165618896, + "learning_rate": 0.000223865045894319, + "loss": 0.6773, + "step": 2558 + }, + { + "epoch": 0.9, + "grad_norm": 6.1538166999816895, + "learning_rate": 0.00022382783428429666, + "loss": 2.0649, + "step": 2559 + }, + { + "epoch": 0.9, + "grad_norm": 3.786186695098877, + "learning_rate": 0.00022379062267427436, + "loss": 0.6274, + "step": 2560 + }, + { + "epoch": 0.9, + "grad_norm": 2.8489811420440674, + "learning_rate": 0.00022375341106425204, + "loss": 0.5626, + "step": 2561 + }, + { + "epoch": 0.9, + "grad_norm": 14.115158081054688, + "learning_rate": 0.0002237161994542297, + "loss": 3.3753, + "step": 2562 + }, + { + "epoch": 0.9, + "grad_norm": 2.628080129623413, + "learning_rate": 0.0002236789878442074, + "loss": 0.333, + "step": 2563 + }, + { + "epoch": 0.9, + "grad_norm": 2.309471607208252, + "learning_rate": 0.00022364177623418504, + "loss": 0.3252, + "step": 2564 + }, + { + "epoch": 0.9, + "grad_norm": 4.064052104949951, + "learning_rate": 0.00022360456462416271, + "loss": 0.7157, + "step": 2565 + }, + { + "epoch": 0.9, + "grad_norm": 1.8947696685791016, + "learning_rate": 0.00022356735301414042, + "loss": 0.3062, + "step": 2566 + }, + { + "epoch": 0.9, + "grad_norm": 3.9529271125793457, + "learning_rate": 0.00022353014140411807, + "loss": 0.3285, + "step": 2567 + }, + { + "epoch": 0.9, + "grad_norm": 2.573422431945801, + "learning_rate": 0.00022349292979409574, + "loss": 0.462, + "step": 2568 + }, + { + "epoch": 0.9, + "grad_norm": 3.323622703552246, + "learning_rate": 0.0002234557181840734, + "loss": 0.4158, + "step": 2569 + }, + { + "epoch": 0.9, + "grad_norm": 3.2690248489379883, + "learning_rate": 0.0002234185065740511, + "loss": 1.531, + "step": 2570 + }, + { + "epoch": 0.9, + "grad_norm": 1.335678219795227, + "learning_rate": 0.00022338129496402877, + "loss": 0.1792, + "step": 2571 + }, + { + "epoch": 0.9, + "grad_norm": 3.555920362472534, + "learning_rate": 0.00022334408335400642, + "loss": 0.3438, + "step": 2572 + }, + { + "epoch": 0.9, + "grad_norm": 1.97938072681427, + "learning_rate": 0.0002233068717439841, + "loss": 0.1992, + "step": 2573 + }, + { + "epoch": 0.9, + "grad_norm": 4.451127529144287, + "learning_rate": 0.0002232696601339618, + "loss": 0.9753, + "step": 2574 + }, + { + "epoch": 0.9, + "grad_norm": 3.8145601749420166, + "learning_rate": 0.00022323244852393944, + "loss": 0.5944, + "step": 2575 + }, + { + "epoch": 0.9, + "grad_norm": 3.062714099884033, + "learning_rate": 0.00022319523691391712, + "loss": 1.197, + "step": 2576 + }, + { + "epoch": 0.9, + "grad_norm": 2.360086441040039, + "learning_rate": 0.00022315802530389477, + "loss": 0.4267, + "step": 2577 + }, + { + "epoch": 0.9, + "grad_norm": 3.727750301361084, + "learning_rate": 0.00022312081369387247, + "loss": 1.1279, + "step": 2578 + }, + { + "epoch": 0.9, + "grad_norm": 1.5544394254684448, + "learning_rate": 0.00022308360208385015, + "loss": 0.5312, + "step": 2579 + }, + { + "epoch": 0.9, + "grad_norm": 2.51163387298584, + "learning_rate": 0.0002230463904738278, + "loss": 0.3286, + "step": 2580 + }, + { + "epoch": 0.9, + "grad_norm": 1.9520326852798462, + "learning_rate": 0.0002230091788638055, + "loss": 0.6118, + "step": 2581 + }, + { + "epoch": 0.9, + "grad_norm": 1.6972302198410034, + "learning_rate": 0.00022297196725378315, + "loss": 0.289, + "step": 2582 + }, + { + "epoch": 0.91, + "grad_norm": 2.5757126808166504, + "learning_rate": 0.00022293475564376082, + "loss": 0.4856, + "step": 2583 + }, + { + "epoch": 0.91, + "grad_norm": 2.543616533279419, + "learning_rate": 0.00022289754403373853, + "loss": 0.5646, + "step": 2584 + }, + { + "epoch": 0.91, + "grad_norm": 3.380565643310547, + "learning_rate": 0.00022286033242371618, + "loss": 0.5098, + "step": 2585 + }, + { + "epoch": 0.91, + "grad_norm": 1.8600701093673706, + "learning_rate": 0.00022282312081369385, + "loss": 0.4146, + "step": 2586 + }, + { + "epoch": 0.91, + "grad_norm": 1.9469465017318726, + "learning_rate": 0.00022278590920367155, + "loss": 0.3405, + "step": 2587 + }, + { + "epoch": 0.91, + "grad_norm": 6.387368679046631, + "learning_rate": 0.0002227486975936492, + "loss": 2.7503, + "step": 2588 + }, + { + "epoch": 0.91, + "grad_norm": 2.1771650314331055, + "learning_rate": 0.00022271148598362688, + "loss": 0.2495, + "step": 2589 + }, + { + "epoch": 0.91, + "grad_norm": 3.2582592964172363, + "learning_rate": 0.00022267427437360453, + "loss": 0.7721, + "step": 2590 + }, + { + "epoch": 0.91, + "grad_norm": 4.26287317276001, + "learning_rate": 0.00022263706276358223, + "loss": 0.3901, + "step": 2591 + }, + { + "epoch": 0.91, + "grad_norm": 4.030311584472656, + "learning_rate": 0.0002225998511535599, + "loss": 1.1871, + "step": 2592 + }, + { + "epoch": 0.91, + "grad_norm": 3.2739689350128174, + "learning_rate": 0.00022256263954353756, + "loss": 0.6562, + "step": 2593 + }, + { + "epoch": 0.91, + "grad_norm": 2.621596574783325, + "learning_rate": 0.00022252542793351523, + "loss": 0.3165, + "step": 2594 + }, + { + "epoch": 0.91, + "grad_norm": 9.08333683013916, + "learning_rate": 0.0002224882163234929, + "loss": 2.6412, + "step": 2595 + }, + { + "epoch": 0.91, + "grad_norm": 2.8135602474212646, + "learning_rate": 0.00022245100471347058, + "loss": 0.5285, + "step": 2596 + }, + { + "epoch": 0.91, + "grad_norm": 2.7928903102874756, + "learning_rate": 0.00022241379310344826, + "loss": 0.3371, + "step": 2597 + }, + { + "epoch": 0.91, + "grad_norm": 1.8602352142333984, + "learning_rate": 0.0002223765814934259, + "loss": 0.2245, + "step": 2598 + }, + { + "epoch": 0.91, + "grad_norm": 3.1992337703704834, + "learning_rate": 0.0002223393698834036, + "loss": 0.5035, + "step": 2599 + }, + { + "epoch": 0.91, + "grad_norm": 7.390182971954346, + "learning_rate": 0.00022230215827338126, + "loss": 0.8551, + "step": 2600 + }, + { + "epoch": 0.91, + "eval_loss": 0.6041048169136047, + "eval_runtime": 51.0519, + "eval_samples_per_second": 42.467, + "eval_steps_per_second": 10.617, + "eval_wer": 0.5422072305829442, + "step": 2600 + }, + { + "epoch": 0.91, + "grad_norm": 2.8501031398773193, + "learning_rate": 0.00022226494666335893, + "loss": 0.9814, + "step": 2601 + }, + { + "epoch": 0.91, + "grad_norm": 3.3961238861083984, + "learning_rate": 0.00022222773505333664, + "loss": 1.0017, + "step": 2602 + }, + { + "epoch": 0.91, + "grad_norm": 3.359489917755127, + "learning_rate": 0.00022219052344331429, + "loss": 0.5428, + "step": 2603 + }, + { + "epoch": 0.91, + "grad_norm": 3.420226573944092, + "learning_rate": 0.00022215331183329196, + "loss": 0.854, + "step": 2604 + }, + { + "epoch": 0.91, + "grad_norm": 1.7213860750198364, + "learning_rate": 0.00022211610022326966, + "loss": 0.2745, + "step": 2605 + }, + { + "epoch": 0.91, + "grad_norm": 3.067789077758789, + "learning_rate": 0.0002220788886132473, + "loss": 0.5725, + "step": 2606 + }, + { + "epoch": 0.91, + "grad_norm": 2.9434151649475098, + "learning_rate": 0.000222041677003225, + "loss": 0.4392, + "step": 2607 + }, + { + "epoch": 0.91, + "grad_norm": 3.986076593399048, + "learning_rate": 0.00022200446539320264, + "loss": 0.5028, + "step": 2608 + }, + { + "epoch": 0.91, + "grad_norm": 3.050105094909668, + "learning_rate": 0.00022196725378318034, + "loss": 0.506, + "step": 2609 + }, + { + "epoch": 0.91, + "grad_norm": 8.282485961914062, + "learning_rate": 0.00022193004217315802, + "loss": 2.1749, + "step": 2610 + }, + { + "epoch": 0.91, + "grad_norm": 4.559239864349365, + "learning_rate": 0.00022189283056313567, + "loss": 0.4975, + "step": 2611 + }, + { + "epoch": 0.92, + "grad_norm": 2.908665895462036, + "learning_rate": 0.00022185561895311337, + "loss": 0.3902, + "step": 2612 + }, + { + "epoch": 0.92, + "grad_norm": 2.392807722091675, + "learning_rate": 0.00022181840734309102, + "loss": 0.2691, + "step": 2613 + }, + { + "epoch": 0.92, + "grad_norm": 3.977257251739502, + "learning_rate": 0.0002217811957330687, + "loss": 0.5115, + "step": 2614 + }, + { + "epoch": 0.92, + "grad_norm": 2.195502758026123, + "learning_rate": 0.00022174398412304637, + "loss": 0.3884, + "step": 2615 + }, + { + "epoch": 0.92, + "grad_norm": 3.966074228286743, + "learning_rate": 0.00022170677251302404, + "loss": 0.7567, + "step": 2616 + }, + { + "epoch": 0.92, + "grad_norm": 1.6738837957382202, + "learning_rate": 0.00022166956090300172, + "loss": 0.238, + "step": 2617 + }, + { + "epoch": 0.92, + "grad_norm": 1.954295039176941, + "learning_rate": 0.0002216323492929794, + "loss": 0.4697, + "step": 2618 + }, + { + "epoch": 0.92, + "grad_norm": 2.3093600273132324, + "learning_rate": 0.00022159513768295704, + "loss": 0.2654, + "step": 2619 + }, + { + "epoch": 0.92, + "grad_norm": 9.452103614807129, + "learning_rate": 0.00022155792607293475, + "loss": 0.5994, + "step": 2620 + }, + { + "epoch": 0.92, + "grad_norm": 2.7643561363220215, + "learning_rate": 0.0002215207144629124, + "loss": 0.2038, + "step": 2621 + }, + { + "epoch": 0.92, + "grad_norm": 4.625439167022705, + "learning_rate": 0.00022148350285289007, + "loss": 0.6295, + "step": 2622 + }, + { + "epoch": 0.92, + "grad_norm": 6.1190996170043945, + "learning_rate": 0.00022144629124286777, + "loss": 0.4905, + "step": 2623 + }, + { + "epoch": 0.92, + "grad_norm": 4.208706378936768, + "learning_rate": 0.00022140907963284542, + "loss": 1.0413, + "step": 2624 + }, + { + "epoch": 0.92, + "grad_norm": 6.129053592681885, + "learning_rate": 0.0002213718680228231, + "loss": 0.58, + "step": 2625 + }, + { + "epoch": 0.92, + "grad_norm": 2.2219321727752686, + "learning_rate": 0.00022133465641280075, + "loss": 1.1687, + "step": 2626 + }, + { + "epoch": 0.92, + "grad_norm": 2.6231391429901123, + "learning_rate": 0.00022129744480277845, + "loss": 0.6352, + "step": 2627 + }, + { + "epoch": 0.92, + "grad_norm": 1.8433243036270142, + "learning_rate": 0.00022126023319275613, + "loss": 0.7305, + "step": 2628 + }, + { + "epoch": 0.92, + "grad_norm": 2.111185073852539, + "learning_rate": 0.00022122302158273378, + "loss": 0.4803, + "step": 2629 + }, + { + "epoch": 0.92, + "grad_norm": 2.3844072818756104, + "learning_rate": 0.00022118580997271148, + "loss": 0.5074, + "step": 2630 + }, + { + "epoch": 0.92, + "grad_norm": 1.9091522693634033, + "learning_rate": 0.00022114859836268915, + "loss": 0.6794, + "step": 2631 + }, + { + "epoch": 0.92, + "grad_norm": 2.5186028480529785, + "learning_rate": 0.0002211113867526668, + "loss": 0.6303, + "step": 2632 + }, + { + "epoch": 0.92, + "grad_norm": 3.9880521297454834, + "learning_rate": 0.0002210741751426445, + "loss": 0.3124, + "step": 2633 + }, + { + "epoch": 0.92, + "grad_norm": 1.2950042486190796, + "learning_rate": 0.00022103696353262215, + "loss": 0.1439, + "step": 2634 + }, + { + "epoch": 0.92, + "grad_norm": 1.8072837591171265, + "learning_rate": 0.00022099975192259983, + "loss": 0.1895, + "step": 2635 + }, + { + "epoch": 0.92, + "grad_norm": 2.449474334716797, + "learning_rate": 0.0002209625403125775, + "loss": 0.5064, + "step": 2636 + }, + { + "epoch": 0.92, + "grad_norm": 2.273451805114746, + "learning_rate": 0.00022092532870255518, + "loss": 0.7899, + "step": 2637 + }, + { + "epoch": 0.92, + "grad_norm": 3.689117193222046, + "learning_rate": 0.00022088811709253286, + "loss": 0.4124, + "step": 2638 + }, + { + "epoch": 0.92, + "grad_norm": 3.5043230056762695, + "learning_rate": 0.0002208509054825105, + "loss": 0.6174, + "step": 2639 + }, + { + "epoch": 0.93, + "grad_norm": 0.7596065998077393, + "learning_rate": 0.0002208136938724882, + "loss": 0.0876, + "step": 2640 + }, + { + "epoch": 0.93, + "grad_norm": 2.0988588333129883, + "learning_rate": 0.00022077648226246588, + "loss": 0.486, + "step": 2641 + }, + { + "epoch": 0.93, + "grad_norm": 3.2585132122039795, + "learning_rate": 0.00022073927065244353, + "loss": 0.7462, + "step": 2642 + }, + { + "epoch": 0.93, + "grad_norm": 2.4397363662719727, + "learning_rate": 0.0002207020590424212, + "loss": 0.2855, + "step": 2643 + }, + { + "epoch": 0.93, + "grad_norm": 3.3539910316467285, + "learning_rate": 0.0002206648474323989, + "loss": 0.6796, + "step": 2644 + }, + { + "epoch": 0.93, + "grad_norm": 6.321644306182861, + "learning_rate": 0.00022062763582237656, + "loss": 1.5811, + "step": 2645 + }, + { + "epoch": 0.93, + "grad_norm": 5.399683952331543, + "learning_rate": 0.00022059042421235424, + "loss": 0.7615, + "step": 2646 + }, + { + "epoch": 0.93, + "grad_norm": 4.508324146270752, + "learning_rate": 0.00022055321260233189, + "loss": 1.0115, + "step": 2647 + }, + { + "epoch": 0.93, + "grad_norm": 4.360750198364258, + "learning_rate": 0.0002205160009923096, + "loss": 0.6244, + "step": 2648 + }, + { + "epoch": 0.93, + "grad_norm": 2.8480136394500732, + "learning_rate": 0.00022047878938228726, + "loss": 0.444, + "step": 2649 + }, + { + "epoch": 0.93, + "grad_norm": NaN, + "learning_rate": 0.00022047878938228726, + "loss": 0.3347, + "step": 2650 + }, + { + "epoch": 0.93, + "grad_norm": 2.590242385864258, + "learning_rate": 0.0002204415777722649, + "loss": 0.506, + "step": 2651 + }, + { + "epoch": 0.93, + "grad_norm": 2.5283608436584473, + "learning_rate": 0.00022040436616224262, + "loss": 0.5284, + "step": 2652 + }, + { + "epoch": 0.93, + "grad_norm": 3.57216477394104, + "learning_rate": 0.00022036715455222026, + "loss": 1.126, + "step": 2653 + }, + { + "epoch": 0.93, + "grad_norm": 2.9254002571105957, + "learning_rate": 0.00022032994294219794, + "loss": 0.7635, + "step": 2654 + }, + { + "epoch": 0.93, + "grad_norm": 5.162715911865234, + "learning_rate": 0.00022029273133217564, + "loss": 0.7601, + "step": 2655 + }, + { + "epoch": 0.93, + "grad_norm": 3.1532511711120605, + "learning_rate": 0.0002202555197221533, + "loss": 0.941, + "step": 2656 + }, + { + "epoch": 0.93, + "grad_norm": 2.203185796737671, + "learning_rate": 0.00022021830811213097, + "loss": 0.4708, + "step": 2657 + }, + { + "epoch": 0.93, + "grad_norm": 2.62638521194458, + "learning_rate": 0.00022018109650210862, + "loss": 0.3295, + "step": 2658 + }, + { + "epoch": 0.93, + "grad_norm": 4.998661994934082, + "learning_rate": 0.00022014388489208632, + "loss": 1.4867, + "step": 2659 + }, + { + "epoch": 0.93, + "grad_norm": 3.389922857284546, + "learning_rate": 0.000220106673282064, + "loss": 1.2759, + "step": 2660 + }, + { + "epoch": 0.93, + "grad_norm": 5.583156108856201, + "learning_rate": 0.00022006946167204164, + "loss": 0.4192, + "step": 2661 + }, + { + "epoch": 0.93, + "grad_norm": 4.610195159912109, + "learning_rate": 0.00022003225006201935, + "loss": 0.5941, + "step": 2662 + }, + { + "epoch": 0.93, + "grad_norm": 2.2611424922943115, + "learning_rate": 0.00021999503845199702, + "loss": 0.7521, + "step": 2663 + }, + { + "epoch": 0.93, + "grad_norm": 2.2116403579711914, + "learning_rate": 0.00021995782684197467, + "loss": 0.4965, + "step": 2664 + }, + { + "epoch": 0.93, + "grad_norm": 3.487800359725952, + "learning_rate": 0.00021992061523195235, + "loss": 0.7573, + "step": 2665 + }, + { + "epoch": 0.93, + "grad_norm": 2.6182491779327393, + "learning_rate": 0.00021988340362193002, + "loss": 0.4018, + "step": 2666 + }, + { + "epoch": 0.93, + "grad_norm": 2.2056775093078613, + "learning_rate": 0.0002198461920119077, + "loss": 0.3728, + "step": 2667 + }, + { + "epoch": 0.93, + "grad_norm": 1.8955894708633423, + "learning_rate": 0.00021980898040188537, + "loss": 0.4454, + "step": 2668 + }, + { + "epoch": 0.94, + "grad_norm": 3.290935754776001, + "learning_rate": 0.00021977176879186302, + "loss": 0.8278, + "step": 2669 + }, + { + "epoch": 0.94, + "grad_norm": 5.875994682312012, + "learning_rate": 0.00021973455718184073, + "loss": 0.3937, + "step": 2670 + }, + { + "epoch": 0.94, + "grad_norm": 7.943798065185547, + "learning_rate": 0.00021969734557181837, + "loss": 2.0068, + "step": 2671 + }, + { + "epoch": 0.94, + "grad_norm": 5.384636878967285, + "learning_rate": 0.00021966013396179605, + "loss": 0.6497, + "step": 2672 + }, + { + "epoch": 0.94, + "grad_norm": 5.006543159484863, + "learning_rate": 0.00021962292235177375, + "loss": 0.4773, + "step": 2673 + }, + { + "epoch": 0.94, + "grad_norm": 2.17258358001709, + "learning_rate": 0.0002195857107417514, + "loss": 0.3188, + "step": 2674 + }, + { + "epoch": 0.94, + "grad_norm": 4.57729434967041, + "learning_rate": 0.00021954849913172908, + "loss": 0.857, + "step": 2675 + }, + { + "epoch": 0.94, + "grad_norm": 2.9307923316955566, + "learning_rate": 0.00021951128752170678, + "loss": 0.6661, + "step": 2676 + }, + { + "epoch": 0.94, + "grad_norm": 2.026794910430908, + "learning_rate": 0.00021947407591168443, + "loss": 0.4217, + "step": 2677 + }, + { + "epoch": 0.94, + "grad_norm": 2.6623916625976562, + "learning_rate": 0.0002194368643016621, + "loss": 0.5877, + "step": 2678 + }, + { + "epoch": 0.94, + "grad_norm": 2.1093175411224365, + "learning_rate": 0.00021939965269163975, + "loss": 0.3837, + "step": 2679 + }, + { + "epoch": 0.94, + "grad_norm": 1.958167552947998, + "learning_rate": 0.00021936244108161746, + "loss": 0.438, + "step": 2680 + }, + { + "epoch": 0.94, + "grad_norm": 2.3107874393463135, + "learning_rate": 0.00021932522947159513, + "loss": 0.2563, + "step": 2681 + }, + { + "epoch": 0.94, + "grad_norm": 2.5440564155578613, + "learning_rate": 0.00021928801786157278, + "loss": 0.5432, + "step": 2682 + }, + { + "epoch": 0.94, + "grad_norm": 2.4444539546966553, + "learning_rate": 0.00021925080625155048, + "loss": 0.8262, + "step": 2683 + }, + { + "epoch": 0.94, + "grad_norm": 3.0069375038146973, + "learning_rate": 0.00021921359464152813, + "loss": 0.8865, + "step": 2684 + }, + { + "epoch": 0.94, + "grad_norm": 2.6746938228607178, + "learning_rate": 0.0002191763830315058, + "loss": 0.425, + "step": 2685 + }, + { + "epoch": 0.94, + "grad_norm": 2.956552028656006, + "learning_rate": 0.00021913917142148348, + "loss": 0.6153, + "step": 2686 + }, + { + "epoch": 0.94, + "grad_norm": 2.7592060565948486, + "learning_rate": 0.00021910195981146116, + "loss": 0.6633, + "step": 2687 + }, + { + "epoch": 0.94, + "grad_norm": 4.972530841827393, + "learning_rate": 0.00021906474820143884, + "loss": 0.5743, + "step": 2688 + }, + { + "epoch": 0.94, + "grad_norm": 2.044546365737915, + "learning_rate": 0.0002190275365914165, + "loss": 0.2586, + "step": 2689 + }, + { + "epoch": 0.94, + "grad_norm": 2.6088757514953613, + "learning_rate": 0.00021899032498139416, + "loss": 0.2942, + "step": 2690 + }, + { + "epoch": 0.94, + "grad_norm": 2.929062843322754, + "learning_rate": 0.00021895311337137186, + "loss": 0.4355, + "step": 2691 + }, + { + "epoch": 0.94, + "grad_norm": 4.158072471618652, + "learning_rate": 0.0002189159017613495, + "loss": 0.597, + "step": 2692 + }, + { + "epoch": 0.94, + "grad_norm": 2.822199821472168, + "learning_rate": 0.0002188786901513272, + "loss": 0.5923, + "step": 2693 + }, + { + "epoch": 0.94, + "grad_norm": 2.6804251670837402, + "learning_rate": 0.0002188414785413049, + "loss": 0.3537, + "step": 2694 + }, + { + "epoch": 0.94, + "grad_norm": 1.3298931121826172, + "learning_rate": 0.00021880426693128254, + "loss": 0.1007, + "step": 2695 + }, + { + "epoch": 0.94, + "grad_norm": 1.8608430624008179, + "learning_rate": 0.00021876705532126022, + "loss": 0.1824, + "step": 2696 + }, + { + "epoch": 0.94, + "grad_norm": 2.879000663757324, + "learning_rate": 0.00021872984371123786, + "loss": 0.3099, + "step": 2697 + }, + { + "epoch": 0.95, + "grad_norm": 4.222035884857178, + "learning_rate": 0.00021869263210121557, + "loss": 0.6009, + "step": 2698 + }, + { + "epoch": 0.95, + "grad_norm": 2.855086326599121, + "learning_rate": 0.00021865542049119324, + "loss": 0.6408, + "step": 2699 + }, + { + "epoch": 0.95, + "grad_norm": 2.642244577407837, + "learning_rate": 0.0002186182088811709, + "loss": 0.2426, + "step": 2700 + }, + { + "epoch": 0.95, + "eval_loss": 0.5336953401565552, + "eval_runtime": 51.4996, + "eval_samples_per_second": 42.097, + "eval_steps_per_second": 10.524, + "eval_wer": 0.48719944646254976, + "step": 2700 + }, + { + "epoch": 0.95, + "grad_norm": 2.4252710342407227, + "learning_rate": 0.0002185809972711486, + "loss": 0.9991, + "step": 2701 + }, + { + "epoch": 0.95, + "grad_norm": 2.518662452697754, + "learning_rate": 0.00021854378566112624, + "loss": 0.5021, + "step": 2702 + }, + { + "epoch": 0.95, + "grad_norm": 2.2588770389556885, + "learning_rate": 0.00021850657405110392, + "loss": 0.6613, + "step": 2703 + }, + { + "epoch": 0.95, + "grad_norm": 2.61147141456604, + "learning_rate": 0.00021846936244108162, + "loss": 0.6095, + "step": 2704 + }, + { + "epoch": 0.95, + "grad_norm": 2.213031053543091, + "learning_rate": 0.00021843215083105927, + "loss": 0.3952, + "step": 2705 + }, + { + "epoch": 0.95, + "grad_norm": 1.7817362546920776, + "learning_rate": 0.00021839493922103695, + "loss": 0.467, + "step": 2706 + }, + { + "epoch": 0.95, + "grad_norm": 1.5945087671279907, + "learning_rate": 0.00021835772761101462, + "loss": 0.2962, + "step": 2707 + }, + { + "epoch": 0.95, + "grad_norm": 3.3003673553466797, + "learning_rate": 0.0002183205160009923, + "loss": 0.5996, + "step": 2708 + }, + { + "epoch": 0.95, + "grad_norm": 2.145582675933838, + "learning_rate": 0.00021828330439096997, + "loss": 0.4746, + "step": 2709 + }, + { + "epoch": 0.95, + "grad_norm": 2.858013153076172, + "learning_rate": 0.00021824609278094762, + "loss": 0.7782, + "step": 2710 + }, + { + "epoch": 0.95, + "grad_norm": 2.5435516834259033, + "learning_rate": 0.0002182088811709253, + "loss": 0.5275, + "step": 2711 + }, + { + "epoch": 0.95, + "grad_norm": 2.836350202560425, + "learning_rate": 0.000218171669560903, + "loss": 0.4082, + "step": 2712 + }, + { + "epoch": 0.95, + "grad_norm": 1.8937970399856567, + "learning_rate": 0.00021813445795088065, + "loss": 0.3822, + "step": 2713 + }, + { + "epoch": 0.95, + "grad_norm": 2.3945305347442627, + "learning_rate": 0.00021809724634085833, + "loss": 0.371, + "step": 2714 + }, + { + "epoch": 0.95, + "grad_norm": 1.8893818855285645, + "learning_rate": 0.00021806003473083597, + "loss": 0.2202, + "step": 2715 + }, + { + "epoch": 0.95, + "grad_norm": 2.0246646404266357, + "learning_rate": 0.00021802282312081368, + "loss": 0.1932, + "step": 2716 + }, + { + "epoch": 0.95, + "grad_norm": 1.8880715370178223, + "learning_rate": 0.00021798561151079135, + "loss": 0.2989, + "step": 2717 + }, + { + "epoch": 0.95, + "grad_norm": 5.3225202560424805, + "learning_rate": 0.000217948399900769, + "loss": 1.991, + "step": 2718 + }, + { + "epoch": 0.95, + "grad_norm": 5.143655776977539, + "learning_rate": 0.0002179111882907467, + "loss": 0.3331, + "step": 2719 + }, + { + "epoch": 0.95, + "grad_norm": 2.7824084758758545, + "learning_rate": 0.00021787397668072438, + "loss": 0.3482, + "step": 2720 + }, + { + "epoch": 0.95, + "grad_norm": 1.4806097745895386, + "learning_rate": 0.00021783676507070203, + "loss": 0.1659, + "step": 2721 + }, + { + "epoch": 0.95, + "grad_norm": 4.375702381134033, + "learning_rate": 0.00021779955346067973, + "loss": 0.7355, + "step": 2722 + }, + { + "epoch": 0.95, + "grad_norm": 3.235441207885742, + "learning_rate": 0.00021776234185065738, + "loss": 0.5138, + "step": 2723 + }, + { + "epoch": 0.95, + "grad_norm": 5.325510501861572, + "learning_rate": 0.00021772513024063506, + "loss": 0.2818, + "step": 2724 + }, + { + "epoch": 0.95, + "grad_norm": 2.0752792358398438, + "learning_rate": 0.00021768791863061276, + "loss": 0.1656, + "step": 2725 + }, + { + "epoch": 0.96, + "grad_norm": 2.6207728385925293, + "learning_rate": 0.0002176507070205904, + "loss": 1.094, + "step": 2726 + }, + { + "epoch": 0.96, + "grad_norm": 1.5188002586364746, + "learning_rate": 0.00021761349541056808, + "loss": 0.2545, + "step": 2727 + }, + { + "epoch": 0.96, + "grad_norm": 2.4537734985351562, + "learning_rate": 0.00021757628380054573, + "loss": 0.476, + "step": 2728 + }, + { + "epoch": 0.96, + "grad_norm": 1.6389610767364502, + "learning_rate": 0.00021753907219052343, + "loss": 0.3701, + "step": 2729 + }, + { + "epoch": 0.96, + "grad_norm": 2.6082892417907715, + "learning_rate": 0.0002175018605805011, + "loss": 0.5414, + "step": 2730 + }, + { + "epoch": 0.96, + "grad_norm": 3.334270477294922, + "learning_rate": 0.00021746464897047876, + "loss": 0.5533, + "step": 2731 + }, + { + "epoch": 0.96, + "grad_norm": 5.05968713760376, + "learning_rate": 0.00021742743736045644, + "loss": 0.7159, + "step": 2732 + }, + { + "epoch": 0.96, + "grad_norm": 3.399036407470703, + "learning_rate": 0.00021739022575043414, + "loss": 1.2499, + "step": 2733 + }, + { + "epoch": 0.96, + "grad_norm": 1.3443745374679565, + "learning_rate": 0.0002173530141404118, + "loss": 0.176, + "step": 2734 + }, + { + "epoch": 0.96, + "grad_norm": 2.077892303466797, + "learning_rate": 0.00021731580253038946, + "loss": 0.2954, + "step": 2735 + }, + { + "epoch": 0.96, + "grad_norm": 2.6642885208129883, + "learning_rate": 0.0002172785909203671, + "loss": 0.2387, + "step": 2736 + }, + { + "epoch": 0.96, + "grad_norm": 4.107677459716797, + "learning_rate": 0.00021724137931034481, + "loss": 0.6326, + "step": 2737 + }, + { + "epoch": 0.96, + "grad_norm": 3.31645131111145, + "learning_rate": 0.0002172041677003225, + "loss": 0.4842, + "step": 2738 + }, + { + "epoch": 0.96, + "grad_norm": 2.741530418395996, + "learning_rate": 0.00021716695609030014, + "loss": 0.3405, + "step": 2739 + }, + { + "epoch": 0.96, + "grad_norm": 1.6625313758850098, + "learning_rate": 0.00021712974448027784, + "loss": 0.1492, + "step": 2740 + }, + { + "epoch": 0.96, + "grad_norm": 4.23145866394043, + "learning_rate": 0.0002170925328702555, + "loss": 0.4991, + "step": 2741 + }, + { + "epoch": 0.96, + "grad_norm": 3.0887067317962646, + "learning_rate": 0.00021705532126023317, + "loss": 0.3526, + "step": 2742 + }, + { + "epoch": 0.96, + "grad_norm": 4.2543463706970215, + "learning_rate": 0.00021701810965021087, + "loss": 0.6271, + "step": 2743 + }, + { + "epoch": 0.96, + "grad_norm": 3.850689172744751, + "learning_rate": 0.00021698089804018852, + "loss": 0.5781, + "step": 2744 + }, + { + "epoch": 0.96, + "grad_norm": 3.8406736850738525, + "learning_rate": 0.0002169436864301662, + "loss": 0.5398, + "step": 2745 + }, + { + "epoch": 0.96, + "grad_norm": 2.770142078399658, + "learning_rate": 0.00021690647482014384, + "loss": 0.1511, + "step": 2746 + }, + { + "epoch": 0.96, + "grad_norm": 2.917293071746826, + "learning_rate": 0.00021686926321012154, + "loss": 1.3509, + "step": 2747 + }, + { + "epoch": 0.96, + "grad_norm": 1.7973790168762207, + "learning_rate": 0.00021683205160009922, + "loss": 0.1045, + "step": 2748 + }, + { + "epoch": 0.96, + "grad_norm": 3.61274790763855, + "learning_rate": 0.00021679483999007687, + "loss": 0.3338, + "step": 2749 + }, + { + "epoch": 0.96, + "grad_norm": 3.060636043548584, + "learning_rate": 0.00021675762838005457, + "loss": 0.2876, + "step": 2750 + }, + { + "epoch": 0.96, + "grad_norm": 3.3534083366394043, + "learning_rate": 0.00021672041677003225, + "loss": 1.3637, + "step": 2751 + }, + { + "epoch": 0.96, + "grad_norm": 3.5700647830963135, + "learning_rate": 0.0002166832051600099, + "loss": 0.9309, + "step": 2752 + }, + { + "epoch": 0.96, + "grad_norm": 1.867060661315918, + "learning_rate": 0.00021664599354998757, + "loss": 0.4095, + "step": 2753 + }, + { + "epoch": 0.96, + "grad_norm": 2.7170629501342773, + "learning_rate": 0.00021660878193996525, + "loss": 0.7493, + "step": 2754 + }, + { + "epoch": 0.97, + "grad_norm": 2.105668783187866, + "learning_rate": 0.00021657157032994292, + "loss": 0.6006, + "step": 2755 + }, + { + "epoch": 0.97, + "grad_norm": 1.1052336692810059, + "learning_rate": 0.0002165343587199206, + "loss": 0.097, + "step": 2756 + }, + { + "epoch": 0.97, + "grad_norm": 2.781369924545288, + "learning_rate": 0.00021649714710989825, + "loss": 0.5647, + "step": 2757 + }, + { + "epoch": 0.97, + "grad_norm": 2.3035714626312256, + "learning_rate": 0.00021645993549987595, + "loss": 0.3785, + "step": 2758 + }, + { + "epoch": 0.97, + "grad_norm": 1.3320902585983276, + "learning_rate": 0.0002164227238898536, + "loss": 0.2026, + "step": 2759 + }, + { + "epoch": 0.97, + "grad_norm": 1.859253168106079, + "learning_rate": 0.00021638551227983128, + "loss": 0.2957, + "step": 2760 + }, + { + "epoch": 0.97, + "grad_norm": 2.7369508743286133, + "learning_rate": 0.00021634830066980898, + "loss": 0.6835, + "step": 2761 + }, + { + "epoch": 0.97, + "grad_norm": 2.1216232776641846, + "learning_rate": 0.00021631108905978663, + "loss": 0.2972, + "step": 2762 + }, + { + "epoch": 0.97, + "grad_norm": 1.9394500255584717, + "learning_rate": 0.0002162738774497643, + "loss": 0.5818, + "step": 2763 + }, + { + "epoch": 0.97, + "grad_norm": 1.9596755504608154, + "learning_rate": 0.000216236665839742, + "loss": 0.4004, + "step": 2764 + }, + { + "epoch": 0.97, + "grad_norm": 4.52447509765625, + "learning_rate": 0.00021619945422971966, + "loss": 0.4622, + "step": 2765 + }, + { + "epoch": 0.97, + "grad_norm": 2.721889019012451, + "learning_rate": 0.00021616224261969733, + "loss": 0.2771, + "step": 2766 + }, + { + "epoch": 0.97, + "grad_norm": 2.8889129161834717, + "learning_rate": 0.00021612503100967498, + "loss": 0.6328, + "step": 2767 + }, + { + "epoch": 0.97, + "grad_norm": 3.644382953643799, + "learning_rate": 0.00021608781939965268, + "loss": 0.5304, + "step": 2768 + }, + { + "epoch": 0.97, + "grad_norm": 4.635915279388428, + "learning_rate": 0.00021605060778963036, + "loss": 0.6429, + "step": 2769 + }, + { + "epoch": 0.97, + "grad_norm": 3.2131359577178955, + "learning_rate": 0.000216013396179608, + "loss": 1.1534, + "step": 2770 + }, + { + "epoch": 0.97, + "grad_norm": 2.65970516204834, + "learning_rate": 0.0002159761845695857, + "loss": 0.3379, + "step": 2771 + }, + { + "epoch": 0.97, + "grad_norm": 1.9941884279251099, + "learning_rate": 0.00021593897295956336, + "loss": 0.2206, + "step": 2772 + }, + { + "epoch": 0.97, + "grad_norm": 3.2422287464141846, + "learning_rate": 0.00021590176134954103, + "loss": 0.2301, + "step": 2773 + }, + { + "epoch": 0.97, + "grad_norm": 2.5298879146575928, + "learning_rate": 0.0002158645497395187, + "loss": 0.2831, + "step": 2774 + }, + { + "epoch": 0.97, + "grad_norm": 4.899098873138428, + "learning_rate": 0.00021582733812949639, + "loss": 2.1931, + "step": 2775 + }, + { + "epoch": 0.97, + "grad_norm": 2.338663101196289, + "learning_rate": 0.00021579012651947406, + "loss": 0.8356, + "step": 2776 + }, + { + "epoch": 0.97, + "grad_norm": 2.9031519889831543, + "learning_rate": 0.00021575291490945174, + "loss": 1.0787, + "step": 2777 + }, + { + "epoch": 0.97, + "grad_norm": 3.205125093460083, + "learning_rate": 0.00021571570329942939, + "loss": 0.9548, + "step": 2778 + }, + { + "epoch": 0.97, + "grad_norm": 2.117971181869507, + "learning_rate": 0.0002156784916894071, + "loss": 0.4246, + "step": 2779 + }, + { + "epoch": 0.97, + "grad_norm": 2.159374713897705, + "learning_rate": 0.00021564128007938474, + "loss": 0.3248, + "step": 2780 + }, + { + "epoch": 0.97, + "grad_norm": 2.537348508834839, + "learning_rate": 0.00021560406846936241, + "loss": 0.7855, + "step": 2781 + }, + { + "epoch": 0.97, + "grad_norm": 3.984494924545288, + "learning_rate": 0.00021556685685934012, + "loss": 0.6787, + "step": 2782 + }, + { + "epoch": 0.98, + "grad_norm": 2.4059112071990967, + "learning_rate": 0.00021552964524931777, + "loss": 0.5901, + "step": 2783 + }, + { + "epoch": 0.98, + "grad_norm": 2.266899824142456, + "learning_rate": 0.00021549243363929544, + "loss": 0.4769, + "step": 2784 + }, + { + "epoch": 0.98, + "grad_norm": 2.490513563156128, + "learning_rate": 0.0002154552220292731, + "loss": 0.5872, + "step": 2785 + }, + { + "epoch": 0.98, + "grad_norm": 2.9180526733398438, + "learning_rate": 0.0002154180104192508, + "loss": 0.3963, + "step": 2786 + }, + { + "epoch": 0.98, + "grad_norm": 1.8597452640533447, + "learning_rate": 0.00021538079880922847, + "loss": 0.5766, + "step": 2787 + }, + { + "epoch": 0.98, + "grad_norm": 1.8061572313308716, + "learning_rate": 0.00021534358719920612, + "loss": 0.4378, + "step": 2788 + }, + { + "epoch": 0.98, + "grad_norm": 2.9399218559265137, + "learning_rate": 0.00021530637558918382, + "loss": 0.5846, + "step": 2789 + }, + { + "epoch": 0.98, + "grad_norm": 2.21833872795105, + "learning_rate": 0.00021526916397916147, + "loss": 0.424, + "step": 2790 + }, + { + "epoch": 0.98, + "grad_norm": 2.508014678955078, + "learning_rate": 0.00021523195236913914, + "loss": 0.3813, + "step": 2791 + }, + { + "epoch": 0.98, + "grad_norm": 1.099500060081482, + "learning_rate": 0.00021519474075911685, + "loss": 0.2054, + "step": 2792 + }, + { + "epoch": 0.98, + "grad_norm": 2.6345198154449463, + "learning_rate": 0.0002151575291490945, + "loss": 0.537, + "step": 2793 + }, + { + "epoch": 0.98, + "grad_norm": 3.041452646255493, + "learning_rate": 0.00021512031753907217, + "loss": 0.5931, + "step": 2794 + }, + { + "epoch": 0.98, + "grad_norm": 2.204749822616577, + "learning_rate": 0.00021508310592904985, + "loss": 0.418, + "step": 2795 + }, + { + "epoch": 0.98, + "grad_norm": 3.116248369216919, + "learning_rate": 0.00021504589431902752, + "loss": 0.4851, + "step": 2796 + }, + { + "epoch": 0.98, + "grad_norm": 4.110528469085693, + "learning_rate": 0.0002150086827090052, + "loss": 0.4382, + "step": 2797 + }, + { + "epoch": 0.98, + "grad_norm": 3.8598759174346924, + "learning_rate": 0.00021497147109898285, + "loss": 0.542, + "step": 2798 + }, + { + "epoch": 0.98, + "grad_norm": 3.3355400562286377, + "learning_rate": 0.00021493425948896052, + "loss": 0.3713, + "step": 2799 + }, + { + "epoch": 0.98, + "grad_norm": 5.150740623474121, + "learning_rate": 0.00021489704787893823, + "loss": 2.0462, + "step": 2800 + }, + { + "epoch": 0.98, + "eval_loss": 0.5526766180992126, + "eval_runtime": 51.4541, + "eval_samples_per_second": 42.135, + "eval_steps_per_second": 10.534, + "eval_wer": 0.49014011416709913, + "step": 2800 + }, + { + "epoch": 0.98, + "grad_norm": 3.123936414718628, + "learning_rate": 0.00021485983626891588, + "loss": 0.9954, + "step": 2801 + }, + { + "epoch": 0.98, + "grad_norm": 25.905872344970703, + "learning_rate": 0.00021482262465889355, + "loss": 5.7803, + "step": 2802 + }, + { + "epoch": 0.98, + "grad_norm": 2.5377426147460938, + "learning_rate": 0.0002147854130488712, + "loss": 0.5039, + "step": 2803 + }, + { + "epoch": 0.98, + "grad_norm": 1.8195780515670776, + "learning_rate": 0.0002147482014388489, + "loss": 0.8649, + "step": 2804 + }, + { + "epoch": 0.98, + "grad_norm": 2.3947935104370117, + "learning_rate": 0.00021471098982882658, + "loss": 0.7395, + "step": 2805 + }, + { + "epoch": 0.98, + "grad_norm": 2.462125062942505, + "learning_rate": 0.00021467377821880423, + "loss": 0.3764, + "step": 2806 + }, + { + "epoch": 0.98, + "grad_norm": 3.7861380577087402, + "learning_rate": 0.00021463656660878193, + "loss": 1.1175, + "step": 2807 + }, + { + "epoch": 0.98, + "grad_norm": 2.9747540950775146, + "learning_rate": 0.0002145993549987596, + "loss": 0.8438, + "step": 2808 + }, + { + "epoch": 0.98, + "grad_norm": 2.355682611465454, + "learning_rate": 0.00021456214338873725, + "loss": 0.6275, + "step": 2809 + }, + { + "epoch": 0.98, + "grad_norm": 1.986989140510559, + "learning_rate": 0.00021452493177871496, + "loss": 0.4578, + "step": 2810 + }, + { + "epoch": 0.98, + "grad_norm": 3.2945938110351562, + "learning_rate": 0.0002144877201686926, + "loss": 0.4254, + "step": 2811 + }, + { + "epoch": 0.99, + "grad_norm": 2.7034192085266113, + "learning_rate": 0.00021445050855867028, + "loss": 0.8129, + "step": 2812 + }, + { + "epoch": 0.99, + "grad_norm": 3.6484246253967285, + "learning_rate": 0.00021441329694864798, + "loss": 0.5471, + "step": 2813 + }, + { + "epoch": 0.99, + "grad_norm": 9.947596549987793, + "learning_rate": 0.00021437608533862563, + "loss": 2.901, + "step": 2814 + }, + { + "epoch": 0.99, + "grad_norm": 2.3516814708709717, + "learning_rate": 0.0002143388737286033, + "loss": 0.3611, + "step": 2815 + }, + { + "epoch": 0.99, + "grad_norm": 3.094783067703247, + "learning_rate": 0.00021430166211858096, + "loss": 0.4288, + "step": 2816 + }, + { + "epoch": 0.99, + "grad_norm": 2.745293617248535, + "learning_rate": 0.00021426445050855866, + "loss": 0.4119, + "step": 2817 + }, + { + "epoch": 0.99, + "grad_norm": 2.0576019287109375, + "learning_rate": 0.00021422723889853634, + "loss": 0.3339, + "step": 2818 + }, + { + "epoch": 0.99, + "grad_norm": 1.2697802782058716, + "learning_rate": 0.00021419002728851399, + "loss": 0.1731, + "step": 2819 + }, + { + "epoch": 0.99, + "grad_norm": 3.58626127243042, + "learning_rate": 0.00021415281567849166, + "loss": 0.4412, + "step": 2820 + }, + { + "epoch": 0.99, + "grad_norm": 2.652714490890503, + "learning_rate": 0.00021411560406846936, + "loss": 0.2896, + "step": 2821 + }, + { + "epoch": 0.99, + "grad_norm": 2.500732660293579, + "learning_rate": 0.000214078392458447, + "loss": 0.2454, + "step": 2822 + }, + { + "epoch": 0.99, + "grad_norm": 1.2118359804153442, + "learning_rate": 0.0002140411808484247, + "loss": 0.0464, + "step": 2823 + }, + { + "epoch": 0.99, + "grad_norm": 2.111560106277466, + "learning_rate": 0.00021400396923840236, + "loss": 0.3128, + "step": 2824 + }, + { + "epoch": 0.99, + "grad_norm": 5.749283790588379, + "learning_rate": 0.00021396675762838004, + "loss": 0.9602, + "step": 2825 + }, + { + "epoch": 0.99, + "grad_norm": 3.080014228820801, + "learning_rate": 0.00021392954601835772, + "loss": 0.9871, + "step": 2826 + }, + { + "epoch": 0.99, + "grad_norm": 2.2783267498016357, + "learning_rate": 0.00021389233440833536, + "loss": 0.6355, + "step": 2827 + }, + { + "epoch": 0.99, + "grad_norm": 1.5478532314300537, + "learning_rate": 0.00021385512279831307, + "loss": 0.3404, + "step": 2828 + }, + { + "epoch": 0.99, + "grad_norm": 2.262725353240967, + "learning_rate": 0.00021381791118829072, + "loss": 0.3556, + "step": 2829 + }, + { + "epoch": 0.99, + "grad_norm": 2.2624759674072266, + "learning_rate": 0.0002137806995782684, + "loss": 0.4647, + "step": 2830 + }, + { + "epoch": 0.99, + "grad_norm": 2.1995623111724854, + "learning_rate": 0.0002137434879682461, + "loss": 0.4685, + "step": 2831 + }, + { + "epoch": 0.99, + "grad_norm": 2.1879045963287354, + "learning_rate": 0.00021370627635822374, + "loss": 0.5635, + "step": 2832 + }, + { + "epoch": 0.99, + "grad_norm": 1.52578604221344, + "learning_rate": 0.00021366906474820142, + "loss": 0.3173, + "step": 2833 + }, + { + "epoch": 0.99, + "grad_norm": 3.537405490875244, + "learning_rate": 0.00021363185313817907, + "loss": 0.7481, + "step": 2834 + }, + { + "epoch": 0.99, + "grad_norm": 1.3475868701934814, + "learning_rate": 0.00021359464152815677, + "loss": 0.2276, + "step": 2835 + }, + { + "epoch": 0.99, + "grad_norm": 2.945054769515991, + "learning_rate": 0.00021355742991813445, + "loss": 0.7077, + "step": 2836 + }, + { + "epoch": 0.99, + "grad_norm": 2.155667304992676, + "learning_rate": 0.0002135202183081121, + "loss": 0.4307, + "step": 2837 + }, + { + "epoch": 0.99, + "grad_norm": 4.234096527099609, + "learning_rate": 0.0002134830066980898, + "loss": 0.3034, + "step": 2838 + }, + { + "epoch": 0.99, + "grad_norm": 2.2338759899139404, + "learning_rate": 0.00021344579508806747, + "loss": 0.4335, + "step": 2839 + }, + { + "epoch": 1.0, + "grad_norm": 4.541741371154785, + "learning_rate": 0.00021340858347804512, + "loss": 1.6635, + "step": 2840 + }, + { + "epoch": 1.0, + "grad_norm": 5.528841495513916, + "learning_rate": 0.0002133713718680228, + "loss": 2.2904, + "step": 2841 + }, + { + "epoch": 1.0, + "grad_norm": 1.8137770891189575, + "learning_rate": 0.00021333416025800047, + "loss": 0.2749, + "step": 2842 + }, + { + "epoch": 1.0, + "grad_norm": 2.021759033203125, + "learning_rate": 0.00021329694864797815, + "loss": 0.2995, + "step": 2843 + }, + { + "epoch": 1.0, + "grad_norm": 4.046627044677734, + "learning_rate": 0.00021325973703795583, + "loss": 0.5484, + "step": 2844 + }, + { + "epoch": 1.0, + "grad_norm": 4.797898292541504, + "learning_rate": 0.0002132225254279335, + "loss": 0.5586, + "step": 2845 + }, + { + "epoch": 1.0, + "grad_norm": 3.8933186531066895, + "learning_rate": 0.00021318531381791118, + "loss": 0.2478, + "step": 2846 + }, + { + "epoch": 1.0, + "grad_norm": 2.601649045944214, + "learning_rate": 0.00021314810220788883, + "loss": 0.2398, + "step": 2847 + }, + { + "epoch": 1.0, + "grad_norm": 2.6861727237701416, + "learning_rate": 0.0002131108905978665, + "loss": 0.3503, + "step": 2848 + }, + { + "epoch": 1.0, + "grad_norm": 3.729224681854248, + "learning_rate": 0.0002130736789878442, + "loss": 0.5147, + "step": 2849 + }, + { + "epoch": 1.0, + "grad_norm": 4.694298267364502, + "learning_rate": 0.00021303646737782185, + "loss": 1.8265, + "step": 2850 + }, + { + "epoch": 1.0, + "grad_norm": 2.0727405548095703, + "learning_rate": 0.00021299925576779953, + "loss": 0.7055, + "step": 2851 + }, + { + "epoch": 1.0, + "grad_norm": 2.8349599838256836, + "learning_rate": 0.00021296204415777723, + "loss": 0.373, + "step": 2852 + }, + { + "epoch": 1.0, + "grad_norm": 1.7524197101593018, + "learning_rate": 0.00021292483254775488, + "loss": 0.3642, + "step": 2853 + }, + { + "epoch": 1.0, + "grad_norm": 1.7273423671722412, + "learning_rate": 0.00021288762093773256, + "loss": 0.4562, + "step": 2854 + }, + { + "epoch": 1.0, + "grad_norm": 1.9599108695983887, + "learning_rate": 0.0002128504093277102, + "loss": 0.8411, + "step": 2855 + }, + { + "epoch": 1.0, + "grad_norm": 2.2834584712982178, + "learning_rate": 0.0002128131977176879, + "loss": 0.6458, + "step": 2856 + }, + { + "epoch": 1.0, + "grad_norm": 2.6620326042175293, + "learning_rate": 0.00021277598610766558, + "loss": 0.7874, + "step": 2857 + }, + { + "epoch": 1.0, + "grad_norm": 1.7331115007400513, + "learning_rate": 0.00021273877449764323, + "loss": 0.4673, + "step": 2858 + }, + { + "epoch": 1.0, + "grad_norm": 1.6492310762405396, + "learning_rate": 0.00021270156288762094, + "loss": 0.4048, + "step": 2859 + }, + { + "epoch": 1.0, + "grad_norm": 1.8001649379730225, + "learning_rate": 0.00021266435127759858, + "loss": 0.5527, + "step": 2860 + }, + { + "epoch": 1.0, + "grad_norm": 3.7418673038482666, + "learning_rate": 0.00021262713966757626, + "loss": 0.8918, + "step": 2861 + }, + { + "epoch": 1.0, + "grad_norm": 1.890509843826294, + "learning_rate": 0.00021258992805755394, + "loss": 0.4264, + "step": 2862 + }, + { + "epoch": 1.0, + "grad_norm": 2.927704334259033, + "learning_rate": 0.0002125527164475316, + "loss": 0.7042, + "step": 2863 + }, + { + "epoch": 1.0, + "grad_norm": 2.7126057147979736, + "learning_rate": 0.0002125155048375093, + "loss": 0.4692, + "step": 2864 + }, + { + "epoch": 1.0, + "grad_norm": 1.3469655513763428, + "learning_rate": 0.00021247829322748696, + "loss": 0.3412, + "step": 2865 + }, + { + "epoch": 1.0, + "grad_norm": 2.2996063232421875, + "learning_rate": 0.00021244108161746464, + "loss": 0.2926, + "step": 2866 + }, + { + "epoch": 1.0, + "grad_norm": 1.5315076112747192, + "learning_rate": 0.00021240387000744232, + "loss": 0.1615, + "step": 2867 + }, + { + "epoch": 1.0, + "grad_norm": 3.6261684894561768, + "learning_rate": 0.00021236665839741996, + "loss": 0.7396, + "step": 2868 + }, + { + "epoch": 1.01, + "grad_norm": 1.5153400897979736, + "learning_rate": 0.00021232944678739764, + "loss": 0.2164, + "step": 2869 + }, + { + "epoch": 1.01, + "grad_norm": 3.3273916244506836, + "learning_rate": 0.00021229223517737534, + "loss": 0.6213, + "step": 2870 + }, + { + "epoch": 1.01, + "grad_norm": 4.1875104904174805, + "learning_rate": 0.000212255023567353, + "loss": 0.7009, + "step": 2871 + }, + { + "epoch": 1.01, + "grad_norm": 1.51561439037323, + "learning_rate": 0.00021221781195733067, + "loss": 0.1144, + "step": 2872 + }, + { + "epoch": 1.01, + "grad_norm": 2.509672164916992, + "learning_rate": 0.00021218060034730832, + "loss": 0.3553, + "step": 2873 + }, + { + "epoch": 1.01, + "grad_norm": 1.6920043230056763, + "learning_rate": 0.00021214338873728602, + "loss": 0.1711, + "step": 2874 + }, + { + "epoch": 1.01, + "grad_norm": 3.063408136367798, + "learning_rate": 0.0002121061771272637, + "loss": 0.195, + "step": 2875 + }, + { + "epoch": 1.01, + "grad_norm": 4.922022819519043, + "learning_rate": 0.00021206896551724134, + "loss": 0.6325, + "step": 2876 + }, + { + "epoch": 1.01, + "grad_norm": 3.254242181777954, + "learning_rate": 0.00021203175390721905, + "loss": 0.4139, + "step": 2877 + }, + { + "epoch": 1.01, + "grad_norm": 1.8609694242477417, + "learning_rate": 0.00021199454229719672, + "loss": 0.3271, + "step": 2878 + }, + { + "epoch": 1.01, + "grad_norm": 4.9110283851623535, + "learning_rate": 0.00021195733068717437, + "loss": 0.4979, + "step": 2879 + }, + { + "epoch": 1.01, + "grad_norm": 2.849888324737549, + "learning_rate": 0.00021192011907715207, + "loss": 0.9667, + "step": 2880 + }, + { + "epoch": 1.01, + "grad_norm": 3.0285425186157227, + "learning_rate": 0.00021188290746712972, + "loss": 0.6187, + "step": 2881 + }, + { + "epoch": 1.01, + "grad_norm": 2.866227388381958, + "learning_rate": 0.0002118456958571074, + "loss": 0.5702, + "step": 2882 + }, + { + "epoch": 1.01, + "grad_norm": 2.6997718811035156, + "learning_rate": 0.00021180848424708507, + "loss": 0.5016, + "step": 2883 + }, + { + "epoch": 1.01, + "grad_norm": 1.718056559562683, + "learning_rate": 0.00021177127263706275, + "loss": 0.289, + "step": 2884 + }, + { + "epoch": 1.01, + "grad_norm": 3.1275880336761475, + "learning_rate": 0.00021173406102704043, + "loss": 0.376, + "step": 2885 + }, + { + "epoch": 1.01, + "grad_norm": 4.316718101501465, + "learning_rate": 0.00021169684941701807, + "loss": 0.8418, + "step": 2886 + }, + { + "epoch": 1.01, + "grad_norm": 2.493377685546875, + "learning_rate": 0.00021165963780699578, + "loss": 0.2839, + "step": 2887 + }, + { + "epoch": 1.01, + "grad_norm": 2.5852391719818115, + "learning_rate": 0.00021162242619697345, + "loss": 0.4074, + "step": 2888 + }, + { + "epoch": 1.01, + "grad_norm": 2.4773120880126953, + "learning_rate": 0.0002115852145869511, + "loss": 0.4167, + "step": 2889 + }, + { + "epoch": 1.01, + "grad_norm": 1.7587331533432007, + "learning_rate": 0.00021154800297692878, + "loss": 0.2211, + "step": 2890 + }, + { + "epoch": 1.01, + "grad_norm": 2.402329444885254, + "learning_rate": 0.00021151079136690645, + "loss": 0.2386, + "step": 2891 + }, + { + "epoch": 1.01, + "grad_norm": 2.4426639080047607, + "learning_rate": 0.00021147357975688413, + "loss": 0.5694, + "step": 2892 + }, + { + "epoch": 1.01, + "grad_norm": 1.6130284070968628, + "learning_rate": 0.0002114363681468618, + "loss": 0.3316, + "step": 2893 + }, + { + "epoch": 1.01, + "grad_norm": 2.9390599727630615, + "learning_rate": 0.00021139915653683945, + "loss": 0.2903, + "step": 2894 + }, + { + "epoch": 1.01, + "grad_norm": 3.764599561691284, + "learning_rate": 0.00021136194492681716, + "loss": 0.5749, + "step": 2895 + }, + { + "epoch": 1.01, + "grad_norm": 1.7966651916503906, + "learning_rate": 0.00021132473331679483, + "loss": 0.1905, + "step": 2896 + }, + { + "epoch": 1.02, + "grad_norm": 2.91357684135437, + "learning_rate": 0.00021128752170677248, + "loss": 0.9233, + "step": 2897 + }, + { + "epoch": 1.02, + "grad_norm": 2.6174521446228027, + "learning_rate": 0.00021125031009675018, + "loss": 0.3764, + "step": 2898 + }, + { + "epoch": 1.02, + "grad_norm": 5.59943151473999, + "learning_rate": 0.00021121309848672783, + "loss": 0.5162, + "step": 2899 + }, + { + "epoch": 1.02, + "grad_norm": 6.940462589263916, + "learning_rate": 0.0002111758868767055, + "loss": 0.2316, + "step": 2900 + }, + { + "epoch": 1.02, + "eval_loss": 0.6327888369560242, + "eval_runtime": 51.2836, + "eval_samples_per_second": 42.275, + "eval_steps_per_second": 10.569, + "eval_wer": 0.5484345268984605, + "step": 2900 + }, + { + "epoch": 1.02, + "grad_norm": 2.0894784927368164, + "learning_rate": 0.0002111386752666832, + "loss": 0.1064, + "step": 2901 + }, + { + "epoch": 1.02, + "grad_norm": 3.177110195159912, + "learning_rate": 0.00021110146365666086, + "loss": 0.3558, + "step": 2902 + }, + { + "epoch": 1.02, + "grad_norm": 4.781301021575928, + "learning_rate": 0.00021106425204663854, + "loss": 0.9566, + "step": 2903 + }, + { + "epoch": 1.02, + "grad_norm": 1.9870142936706543, + "learning_rate": 0.00021102704043661618, + "loss": 0.3168, + "step": 2904 + }, + { + "epoch": 1.02, + "grad_norm": 3.021113395690918, + "learning_rate": 0.0002109898288265939, + "loss": 1.3912, + "step": 2905 + }, + { + "epoch": 1.02, + "grad_norm": 1.378859519958496, + "learning_rate": 0.00021095261721657156, + "loss": 0.6019, + "step": 2906 + }, + { + "epoch": 1.02, + "grad_norm": 2.121788740158081, + "learning_rate": 0.0002109154056065492, + "loss": 0.6099, + "step": 2907 + }, + { + "epoch": 1.02, + "grad_norm": 1.3554753065109253, + "learning_rate": 0.00021087819399652691, + "loss": 0.2695, + "step": 2908 + }, + { + "epoch": 1.02, + "grad_norm": 2.836920738220215, + "learning_rate": 0.0002108409823865046, + "loss": 0.4332, + "step": 2909 + }, + { + "epoch": 1.02, + "grad_norm": 2.007983922958374, + "learning_rate": 0.00021080377077648224, + "loss": 0.3614, + "step": 2910 + }, + { + "epoch": 1.02, + "grad_norm": 2.3177947998046875, + "learning_rate": 0.00021076655916645991, + "loss": 0.4861, + "step": 2911 + }, + { + "epoch": 1.02, + "grad_norm": 2.913270950317383, + "learning_rate": 0.0002107293475564376, + "loss": 0.5023, + "step": 2912 + }, + { + "epoch": 1.02, + "grad_norm": 5.127606391906738, + "learning_rate": 0.00021069213594641527, + "loss": 0.8786, + "step": 2913 + }, + { + "epoch": 1.02, + "grad_norm": 2.110982894897461, + "learning_rate": 0.00021065492433639294, + "loss": 0.4902, + "step": 2914 + }, + { + "epoch": 1.02, + "grad_norm": 4.175901889801025, + "learning_rate": 0.0002106177127263706, + "loss": 0.4782, + "step": 2915 + }, + { + "epoch": 1.02, + "grad_norm": 2.3658077716827393, + "learning_rate": 0.0002105805011163483, + "loss": 0.3401, + "step": 2916 + }, + { + "epoch": 1.02, + "grad_norm": 3.592463970184326, + "learning_rate": 0.00021054328950632594, + "loss": 0.882, + "step": 2917 + }, + { + "epoch": 1.02, + "grad_norm": 2.3057944774627686, + "learning_rate": 0.00021050607789630362, + "loss": 0.5945, + "step": 2918 + }, + { + "epoch": 1.02, + "grad_norm": 3.1992056369781494, + "learning_rate": 0.00021046886628628132, + "loss": 0.7865, + "step": 2919 + }, + { + "epoch": 1.02, + "grad_norm": 5.556008815765381, + "learning_rate": 0.00021043165467625897, + "loss": 0.8176, + "step": 2920 + }, + { + "epoch": 1.02, + "grad_norm": 4.94827127456665, + "learning_rate": 0.00021039444306623665, + "loss": 0.3772, + "step": 2921 + }, + { + "epoch": 1.02, + "grad_norm": 2.1931419372558594, + "learning_rate": 0.00021035723145621435, + "loss": 0.1395, + "step": 2922 + }, + { + "epoch": 1.02, + "grad_norm": 2.6224441528320312, + "learning_rate": 0.000210320019846192, + "loss": 0.3006, + "step": 2923 + }, + { + "epoch": 1.02, + "grad_norm": 14.6669282913208, + "learning_rate": 0.00021028280823616967, + "loss": 3.7282, + "step": 2924 + }, + { + "epoch": 1.02, + "grad_norm": 2.3192381858825684, + "learning_rate": 0.00021024559662614732, + "loss": 0.4179, + "step": 2925 + }, + { + "epoch": 1.03, + "grad_norm": 1.563372254371643, + "learning_rate": 0.00021020838501612502, + "loss": 0.1219, + "step": 2926 + }, + { + "epoch": 1.03, + "grad_norm": 1.8706691265106201, + "learning_rate": 0.0002101711734061027, + "loss": 0.1685, + "step": 2927 + }, + { + "epoch": 1.03, + "grad_norm": 2.756892204284668, + "learning_rate": 0.00021013396179608035, + "loss": 0.2088, + "step": 2928 + }, + { + "epoch": 1.03, + "grad_norm": 4.2238030433654785, + "learning_rate": 0.00021009675018605805, + "loss": 1.3, + "step": 2929 + }, + { + "epoch": 1.03, + "grad_norm": 4.2373576164245605, + "learning_rate": 0.0002100595385760357, + "loss": 0.8429, + "step": 2930 + }, + { + "epoch": 1.03, + "grad_norm": 1.899154782295227, + "learning_rate": 0.00021002232696601338, + "loss": 0.524, + "step": 2931 + }, + { + "epoch": 1.03, + "grad_norm": 2.224109411239624, + "learning_rate": 0.00020998511535599105, + "loss": 0.6571, + "step": 2932 + }, + { + "epoch": 1.03, + "grad_norm": 3.5380167961120605, + "learning_rate": 0.00020994790374596873, + "loss": 0.998, + "step": 2933 + }, + { + "epoch": 1.03, + "grad_norm": 1.9109605550765991, + "learning_rate": 0.0002099106921359464, + "loss": 0.2862, + "step": 2934 + }, + { + "epoch": 1.03, + "grad_norm": 3.0752928256988525, + "learning_rate": 0.00020987348052592405, + "loss": 0.4658, + "step": 2935 + }, + { + "epoch": 1.03, + "grad_norm": 2.45890212059021, + "learning_rate": 0.00020983626891590173, + "loss": 0.7092, + "step": 2936 + }, + { + "epoch": 1.03, + "grad_norm": 3.076648235321045, + "learning_rate": 0.00020979905730587943, + "loss": 0.6868, + "step": 2937 + }, + { + "epoch": 1.03, + "grad_norm": 2.497008800506592, + "learning_rate": 0.00020976184569585708, + "loss": 0.2683, + "step": 2938 + }, + { + "epoch": 1.03, + "grad_norm": 1.9469985961914062, + "learning_rate": 0.00020972463408583476, + "loss": 0.2959, + "step": 2939 + }, + { + "epoch": 1.03, + "grad_norm": 1.4684535264968872, + "learning_rate": 0.00020968742247581246, + "loss": 0.1444, + "step": 2940 + }, + { + "epoch": 1.03, + "grad_norm": 2.822787046432495, + "learning_rate": 0.0002096502108657901, + "loss": 0.4749, + "step": 2941 + }, + { + "epoch": 1.03, + "grad_norm": 2.6502792835235596, + "learning_rate": 0.00020961299925576778, + "loss": 0.5122, + "step": 2942 + }, + { + "epoch": 1.03, + "grad_norm": 9.322734832763672, + "learning_rate": 0.00020957578764574543, + "loss": 2.2989, + "step": 2943 + }, + { + "epoch": 1.03, + "grad_norm": 2.103727340698242, + "learning_rate": 0.00020953857603572313, + "loss": 0.444, + "step": 2944 + }, + { + "epoch": 1.03, + "grad_norm": 3.0473310947418213, + "learning_rate": 0.0002095013644257008, + "loss": 0.4347, + "step": 2945 + }, + { + "epoch": 1.03, + "grad_norm": 1.7083921432495117, + "learning_rate": 0.00020946415281567846, + "loss": 0.3226, + "step": 2946 + }, + { + "epoch": 1.03, + "grad_norm": 3.072146415710449, + "learning_rate": 0.00020942694120565616, + "loss": 0.5612, + "step": 2947 + }, + { + "epoch": 1.03, + "grad_norm": 2.4064691066741943, + "learning_rate": 0.0002093897295956338, + "loss": 0.1788, + "step": 2948 + }, + { + "epoch": 1.03, + "grad_norm": 1.3841482400894165, + "learning_rate": 0.00020935251798561149, + "loss": 0.1171, + "step": 2949 + }, + { + "epoch": 1.03, + "grad_norm": 2.551501989364624, + "learning_rate": 0.0002093153063755892, + "loss": 0.616, + "step": 2950 + }, + { + "epoch": 1.03, + "grad_norm": 3.9594438076019287, + "learning_rate": 0.00020927809476556684, + "loss": 0.7705, + "step": 2951 + }, + { + "epoch": 1.03, + "grad_norm": 4.557310104370117, + "learning_rate": 0.00020924088315554451, + "loss": 1.3497, + "step": 2952 + }, + { + "epoch": 1.03, + "grad_norm": 2.093480348587036, + "learning_rate": 0.0002092036715455222, + "loss": 0.2775, + "step": 2953 + }, + { + "epoch": 1.04, + "grad_norm": 3.45406174659729, + "learning_rate": 0.00020916645993549987, + "loss": 0.619, + "step": 2954 + }, + { + "epoch": 1.04, + "grad_norm": 2.039518356323242, + "learning_rate": 0.00020912924832547754, + "loss": 0.9185, + "step": 2955 + }, + { + "epoch": 1.04, + "grad_norm": 2.3017618656158447, + "learning_rate": 0.0002090920367154552, + "loss": 0.5944, + "step": 2956 + }, + { + "epoch": 1.04, + "grad_norm": 1.7939972877502441, + "learning_rate": 0.00020905482510543287, + "loss": 0.5319, + "step": 2957 + }, + { + "epoch": 1.04, + "grad_norm": 29.27108383178711, + "learning_rate": 0.00020901761349541057, + "loss": 5.4024, + "step": 2958 + }, + { + "epoch": 1.04, + "grad_norm": 3.3019933700561523, + "learning_rate": 0.00020898040188538822, + "loss": 0.8981, + "step": 2959 + }, + { + "epoch": 1.04, + "grad_norm": 2.4643125534057617, + "learning_rate": 0.0002089431902753659, + "loss": 0.8616, + "step": 2960 + }, + { + "epoch": 1.04, + "grad_norm": 2.9230358600616455, + "learning_rate": 0.00020890597866534354, + "loss": 0.5856, + "step": 2961 + }, + { + "epoch": 1.04, + "grad_norm": 1.3859621286392212, + "learning_rate": 0.00020886876705532124, + "loss": 0.3128, + "step": 2962 + }, + { + "epoch": 1.04, + "grad_norm": 3.490842342376709, + "learning_rate": 0.00020883155544529892, + "loss": 0.4453, + "step": 2963 + }, + { + "epoch": 1.04, + "grad_norm": 2.791914701461792, + "learning_rate": 0.00020879434383527657, + "loss": 0.6412, + "step": 2964 + }, + { + "epoch": 1.04, + "grad_norm": 2.955834150314331, + "learning_rate": 0.00020875713222525427, + "loss": 0.3226, + "step": 2965 + }, + { + "epoch": 1.04, + "grad_norm": 1.8695831298828125, + "learning_rate": 0.00020871992061523195, + "loss": 0.3433, + "step": 2966 + }, + { + "epoch": 1.04, + "grad_norm": 2.185141086578369, + "learning_rate": 0.0002086827090052096, + "loss": 0.2187, + "step": 2967 + }, + { + "epoch": 1.04, + "grad_norm": 2.4184865951538086, + "learning_rate": 0.0002086454973951873, + "loss": 0.2672, + "step": 2968 + }, + { + "epoch": 1.04, + "grad_norm": 7.882993221282959, + "learning_rate": 0.00020860828578516495, + "loss": 0.2307, + "step": 2969 + }, + { + "epoch": 1.04, + "grad_norm": 1.4877923727035522, + "learning_rate": 0.00020857107417514262, + "loss": 0.1831, + "step": 2970 + }, + { + "epoch": 1.04, + "grad_norm": 1.8400189876556396, + "learning_rate": 0.00020853386256512033, + "loss": 0.3421, + "step": 2971 + }, + { + "epoch": 1.04, + "grad_norm": 1.835120439529419, + "learning_rate": 0.00020849665095509798, + "loss": 0.2359, + "step": 2972 + }, + { + "epoch": 1.04, + "grad_norm": 2.832714080810547, + "learning_rate": 0.00020845943934507565, + "loss": 0.4349, + "step": 2973 + }, + { + "epoch": 1.04, + "grad_norm": 0.8223691582679749, + "learning_rate": 0.0002084222277350533, + "loss": 0.039, + "step": 2974 + }, + { + "epoch": 1.04, + "grad_norm": 3.099144697189331, + "learning_rate": 0.000208385016125031, + "loss": 0.5629, + "step": 2975 + }, + { + "epoch": 1.04, + "grad_norm": 2.728029251098633, + "learning_rate": 0.00020834780451500868, + "loss": 0.3675, + "step": 2976 + }, + { + "epoch": 1.04, + "grad_norm": 2.1751224994659424, + "learning_rate": 0.00020831059290498633, + "loss": 0.1833, + "step": 2977 + }, + { + "epoch": 1.04, + "grad_norm": 8.328917503356934, + "learning_rate": 0.000208273381294964, + "loss": 0.5198, + "step": 2978 + }, + { + "epoch": 1.04, + "grad_norm": 6.681622505187988, + "learning_rate": 0.00020823616968494168, + "loss": 2.3852, + "step": 2979 + }, + { + "epoch": 1.04, + "grad_norm": 3.279292583465576, + "learning_rate": 0.00020819895807491935, + "loss": 1.1333, + "step": 2980 + }, + { + "epoch": 1.04, + "grad_norm": 1.996398687362671, + "learning_rate": 0.00020816174646489703, + "loss": 0.5301, + "step": 2981 + }, + { + "epoch": 1.04, + "grad_norm": 1.8816949129104614, + "learning_rate": 0.00020812453485487468, + "loss": 0.4437, + "step": 2982 + }, + { + "epoch": 1.05, + "grad_norm": 2.7721152305603027, + "learning_rate": 0.00020808732324485238, + "loss": 0.7248, + "step": 2983 + }, + { + "epoch": 1.05, + "grad_norm": 1.3053263425827026, + "learning_rate": 0.00020805011163483006, + "loss": 0.1844, + "step": 2984 + }, + { + "epoch": 1.05, + "grad_norm": 1.6006126403808594, + "learning_rate": 0.0002080129000248077, + "loss": 0.3389, + "step": 2985 + }, + { + "epoch": 1.05, + "grad_norm": 6.084453582763672, + "learning_rate": 0.0002079756884147854, + "loss": 2.0407, + "step": 2986 + }, + { + "epoch": 1.05, + "grad_norm": 1.4119192361831665, + "learning_rate": 0.00020793847680476306, + "loss": 0.2708, + "step": 2987 + }, + { + "epoch": 1.05, + "grad_norm": 2.911585807800293, + "learning_rate": 0.00020790126519474073, + "loss": 0.3626, + "step": 2988 + }, + { + "epoch": 1.05, + "grad_norm": 1.2379357814788818, + "learning_rate": 0.00020786405358471844, + "loss": 0.1421, + "step": 2989 + }, + { + "epoch": 1.05, + "grad_norm": 1.679255485534668, + "learning_rate": 0.00020782684197469609, + "loss": 0.2169, + "step": 2990 + }, + { + "epoch": 1.05, + "grad_norm": 1.825289011001587, + "learning_rate": 0.00020778963036467376, + "loss": 0.1638, + "step": 2991 + }, + { + "epoch": 1.05, + "grad_norm": 2.66688871383667, + "learning_rate": 0.0002077524187546514, + "loss": 0.6995, + "step": 2992 + }, + { + "epoch": 1.05, + "grad_norm": 3.020879030227661, + "learning_rate": 0.0002077152071446291, + "loss": 0.4142, + "step": 2993 + }, + { + "epoch": 1.05, + "grad_norm": 2.3098480701446533, + "learning_rate": 0.0002076779955346068, + "loss": 0.371, + "step": 2994 + }, + { + "epoch": 1.05, + "grad_norm": 4.051997661590576, + "learning_rate": 0.00020764078392458444, + "loss": 0.4462, + "step": 2995 + }, + { + "epoch": 1.05, + "grad_norm": 2.9029579162597656, + "learning_rate": 0.00020760357231456214, + "loss": 0.3817, + "step": 2996 + }, + { + "epoch": 1.05, + "grad_norm": 5.890556812286377, + "learning_rate": 0.00020756636070453982, + "loss": 1.261, + "step": 2997 + }, + { + "epoch": 1.05, + "grad_norm": 2.089731216430664, + "learning_rate": 0.00020752914909451746, + "loss": 0.3077, + "step": 2998 + }, + { + "epoch": 1.05, + "grad_norm": 4.995520114898682, + "learning_rate": 0.00020749193748449514, + "loss": 1.0239, + "step": 2999 + }, + { + "epoch": 1.05, + "grad_norm": 4.084024429321289, + "learning_rate": 0.00020745472587447282, + "loss": 0.6881, + "step": 3000 + }, + { + "epoch": 1.05, + "eval_loss": 0.6195849776268005, + "eval_runtime": 50.8747, + "eval_samples_per_second": 42.615, + "eval_steps_per_second": 10.654, + "eval_wer": 0.5003459609064176, + "step": 3000 + }, + { + "epoch": 1.05, + "grad_norm": 7.645015239715576, + "learning_rate": 0.0002074175142644505, + "loss": 0.9345, + "step": 3001 + }, + { + "epoch": 1.05, + "grad_norm": 3.707676887512207, + "learning_rate": 0.00020738030265442817, + "loss": 0.3963, + "step": 3002 + }, + { + "epoch": 1.05, + "grad_norm": 2.314399003982544, + "learning_rate": 0.00020734309104440582, + "loss": 0.2629, + "step": 3003 + }, + { + "epoch": 1.05, + "grad_norm": 5.601327896118164, + "learning_rate": 0.00020730587943438352, + "loss": 0.5629, + "step": 3004 + }, + { + "epoch": 1.05, + "grad_norm": 3.5226032733917236, + "learning_rate": 0.00020726866782436117, + "loss": 1.5056, + "step": 3005 + }, + { + "epoch": 1.05, + "grad_norm": 1.585764765739441, + "learning_rate": 0.00020723145621433884, + "loss": 0.4755, + "step": 3006 + }, + { + "epoch": 1.05, + "grad_norm": 1.83793044090271, + "learning_rate": 0.00020719424460431655, + "loss": 0.5136, + "step": 3007 + }, + { + "epoch": 1.05, + "grad_norm": 2.307407855987549, + "learning_rate": 0.0002071570329942942, + "loss": 0.5605, + "step": 3008 + }, + { + "epoch": 1.05, + "grad_norm": 1.9383962154388428, + "learning_rate": 0.00020711982138427187, + "loss": 0.2642, + "step": 3009 + }, + { + "epoch": 1.05, + "grad_norm": 5.103401184082031, + "learning_rate": 0.00020708260977424957, + "loss": 0.685, + "step": 3010 + }, + { + "epoch": 1.06, + "grad_norm": 2.193871259689331, + "learning_rate": 0.00020704539816422722, + "loss": 0.363, + "step": 3011 + }, + { + "epoch": 1.06, + "grad_norm": 2.350487232208252, + "learning_rate": 0.0002070081865542049, + "loss": 0.4608, + "step": 3012 + }, + { + "epoch": 1.06, + "grad_norm": 1.3571010828018188, + "learning_rate": 0.00020697097494418255, + "loss": 0.1649, + "step": 3013 + }, + { + "epoch": 1.06, + "grad_norm": 2.3821628093719482, + "learning_rate": 0.00020693376333416025, + "loss": 0.2691, + "step": 3014 + }, + { + "epoch": 1.06, + "grad_norm": 1.663122296333313, + "learning_rate": 0.00020689655172413793, + "loss": 0.3526, + "step": 3015 + }, + { + "epoch": 1.06, + "grad_norm": 3.3357670307159424, + "learning_rate": 0.00020685934011411557, + "loss": 0.3898, + "step": 3016 + }, + { + "epoch": 1.06, + "grad_norm": 4.479585647583008, + "learning_rate": 0.00020682212850409328, + "loss": 0.2298, + "step": 3017 + }, + { + "epoch": 1.06, + "grad_norm": 3.3683953285217285, + "learning_rate": 0.00020678491689407093, + "loss": 0.4032, + "step": 3018 + }, + { + "epoch": 1.06, + "grad_norm": 12.573981285095215, + "learning_rate": 0.0002067477052840486, + "loss": 3.1628, + "step": 3019 + }, + { + "epoch": 1.06, + "grad_norm": 2.957279682159424, + "learning_rate": 0.00020671049367402628, + "loss": 0.6773, + "step": 3020 + }, + { + "epoch": 1.06, + "grad_norm": 2.875685453414917, + "learning_rate": 0.00020667328206400395, + "loss": 0.6258, + "step": 3021 + }, + { + "epoch": 1.06, + "grad_norm": 4.08288049697876, + "learning_rate": 0.00020663607045398163, + "loss": 1.0724, + "step": 3022 + }, + { + "epoch": 1.06, + "grad_norm": 0.861497163772583, + "learning_rate": 0.00020659885884395928, + "loss": 0.0636, + "step": 3023 + }, + { + "epoch": 1.06, + "grad_norm": 3.3727078437805176, + "learning_rate": 0.00020656164723393695, + "loss": 0.5172, + "step": 3024 + }, + { + "epoch": 1.06, + "grad_norm": 3.8272769451141357, + "learning_rate": 0.00020652443562391466, + "loss": 0.6818, + "step": 3025 + }, + { + "epoch": 1.06, + "grad_norm": 0.6726584434509277, + "learning_rate": 0.0002064872240138923, + "loss": 0.0578, + "step": 3026 + }, + { + "epoch": 1.06, + "grad_norm": 4.285344123840332, + "learning_rate": 0.00020645001240386998, + "loss": 0.7979, + "step": 3027 + }, + { + "epoch": 1.06, + "grad_norm": 7.106195449829102, + "learning_rate": 0.00020641280079384768, + "loss": 3.3654, + "step": 3028 + }, + { + "epoch": 1.06, + "grad_norm": 1.9665549993515015, + "learning_rate": 0.00020637558918382533, + "loss": 0.1718, + "step": 3029 + }, + { + "epoch": 1.06, + "grad_norm": 1.6774803400039673, + "learning_rate": 0.000206338377573803, + "loss": 0.6851, + "step": 3030 + }, + { + "epoch": 1.06, + "grad_norm": 2.010876417160034, + "learning_rate": 0.00020630116596378066, + "loss": 0.4386, + "step": 3031 + }, + { + "epoch": 1.06, + "grad_norm": 1.4450249671936035, + "learning_rate": 0.00020626395435375836, + "loss": 0.3772, + "step": 3032 + }, + { + "epoch": 1.06, + "grad_norm": 2.3104891777038574, + "learning_rate": 0.00020622674274373604, + "loss": 0.3893, + "step": 3033 + }, + { + "epoch": 1.06, + "grad_norm": 1.3111859560012817, + "learning_rate": 0.00020618953113371368, + "loss": 0.2572, + "step": 3034 + }, + { + "epoch": 1.06, + "grad_norm": 2.2951879501342773, + "learning_rate": 0.0002061523195236914, + "loss": 0.3824, + "step": 3035 + }, + { + "epoch": 1.06, + "grad_norm": 2.6300010681152344, + "learning_rate": 0.00020611510791366904, + "loss": 0.5777, + "step": 3036 + }, + { + "epoch": 1.06, + "grad_norm": 3.6638519763946533, + "learning_rate": 0.0002060778963036467, + "loss": 1.1108, + "step": 3037 + }, + { + "epoch": 1.06, + "grad_norm": 2.498910427093506, + "learning_rate": 0.00020604068469362442, + "loss": 0.3768, + "step": 3038 + }, + { + "epoch": 1.06, + "grad_norm": 2.4238033294677734, + "learning_rate": 0.00020600347308360206, + "loss": 0.5088, + "step": 3039 + }, + { + "epoch": 1.07, + "grad_norm": 1.8531930446624756, + "learning_rate": 0.00020596626147357974, + "loss": 0.2276, + "step": 3040 + }, + { + "epoch": 1.07, + "grad_norm": 2.1931774616241455, + "learning_rate": 0.00020592904986355742, + "loss": 0.3416, + "step": 3041 + }, + { + "epoch": 1.07, + "grad_norm": 1.7940934896469116, + "learning_rate": 0.0002058918382535351, + "loss": 0.2504, + "step": 3042 + }, + { + "epoch": 1.07, + "grad_norm": 1.3244794607162476, + "learning_rate": 0.00020585462664351277, + "loss": 0.1899, + "step": 3043 + }, + { + "epoch": 1.07, + "grad_norm": 2.233159303665161, + "learning_rate": 0.00020581741503349042, + "loss": 0.4307, + "step": 3044 + }, + { + "epoch": 1.07, + "grad_norm": 4.452759265899658, + "learning_rate": 0.0002057802034234681, + "loss": 1.5291, + "step": 3045 + }, + { + "epoch": 1.07, + "grad_norm": 6.447901248931885, + "learning_rate": 0.0002057429918134458, + "loss": 0.3252, + "step": 3046 + }, + { + "epoch": 1.07, + "grad_norm": 6.9542765617370605, + "learning_rate": 0.00020570578020342344, + "loss": 0.2644, + "step": 3047 + }, + { + "epoch": 1.07, + "grad_norm": 3.4594507217407227, + "learning_rate": 0.00020566856859340112, + "loss": 0.6206, + "step": 3048 + }, + { + "epoch": 1.07, + "grad_norm": 4.8815388679504395, + "learning_rate": 0.0002056313569833788, + "loss": 1.6332, + "step": 3049 + }, + { + "epoch": 1.07, + "grad_norm": 4.452385902404785, + "learning_rate": 0.00020559414537335647, + "loss": 0.9805, + "step": 3050 + }, + { + "epoch": 1.07, + "grad_norm": 3.0658645629882812, + "learning_rate": 0.00020555693376333415, + "loss": 0.5303, + "step": 3051 + }, + { + "epoch": 1.07, + "grad_norm": 4.26853084564209, + "learning_rate": 0.0002055197221533118, + "loss": 0.3217, + "step": 3052 + }, + { + "epoch": 1.07, + "grad_norm": 2.2705271244049072, + "learning_rate": 0.0002054825105432895, + "loss": 0.3074, + "step": 3053 + }, + { + "epoch": 1.07, + "grad_norm": 2.20687198638916, + "learning_rate": 0.00020544529893326717, + "loss": 0.2464, + "step": 3054 + }, + { + "epoch": 1.07, + "grad_norm": 3.222644090652466, + "learning_rate": 0.00020540808732324482, + "loss": 1.1983, + "step": 3055 + }, + { + "epoch": 1.07, + "grad_norm": 1.951676368713379, + "learning_rate": 0.00020537087571322253, + "loss": 0.4934, + "step": 3056 + }, + { + "epoch": 1.07, + "grad_norm": 1.844739317893982, + "learning_rate": 0.00020533366410320017, + "loss": 0.4015, + "step": 3057 + }, + { + "epoch": 1.07, + "grad_norm": 2.413743019104004, + "learning_rate": 0.00020529645249317785, + "loss": 0.4277, + "step": 3058 + }, + { + "epoch": 1.07, + "grad_norm": 2.190391778945923, + "learning_rate": 0.00020525924088315555, + "loss": 0.3995, + "step": 3059 + }, + { + "epoch": 1.07, + "grad_norm": 3.3840878009796143, + "learning_rate": 0.0002052220292731332, + "loss": 0.5431, + "step": 3060 + }, + { + "epoch": 1.07, + "grad_norm": 3.4068565368652344, + "learning_rate": 0.00020518481766311088, + "loss": 0.2978, + "step": 3061 + }, + { + "epoch": 1.07, + "grad_norm": 1.8028383255004883, + "learning_rate": 0.00020514760605308853, + "loss": 0.5075, + "step": 3062 + }, + { + "epoch": 1.07, + "grad_norm": 2.067969799041748, + "learning_rate": 0.00020511039444306623, + "loss": 0.3496, + "step": 3063 + }, + { + "epoch": 1.07, + "grad_norm": 3.3436696529388428, + "learning_rate": 0.0002050731828330439, + "loss": 0.5107, + "step": 3064 + }, + { + "epoch": 1.07, + "grad_norm": 2.0510003566741943, + "learning_rate": 0.00020503597122302155, + "loss": 0.3864, + "step": 3065 + }, + { + "epoch": 1.07, + "grad_norm": 1.5820317268371582, + "learning_rate": 0.00020499875961299923, + "loss": 0.2344, + "step": 3066 + }, + { + "epoch": 1.07, + "grad_norm": 2.2191803455352783, + "learning_rate": 0.00020496154800297693, + "loss": 0.493, + "step": 3067 + }, + { + "epoch": 1.07, + "grad_norm": 1.7818882465362549, + "learning_rate": 0.00020492433639295458, + "loss": 0.1892, + "step": 3068 + }, + { + "epoch": 1.08, + "grad_norm": 3.912038564682007, + "learning_rate": 0.00020488712478293226, + "loss": 0.6714, + "step": 3069 + }, + { + "epoch": 1.08, + "grad_norm": 2.960134506225586, + "learning_rate": 0.00020484991317290993, + "loss": 0.264, + "step": 3070 + }, + { + "epoch": 1.08, + "grad_norm": 2.104323625564575, + "learning_rate": 0.0002048127015628876, + "loss": 0.5416, + "step": 3071 + }, + { + "epoch": 1.08, + "grad_norm": 4.2121148109436035, + "learning_rate": 0.00020477548995286528, + "loss": 1.9044, + "step": 3072 + }, + { + "epoch": 1.08, + "grad_norm": 2.713250160217285, + "learning_rate": 0.00020473827834284293, + "loss": 0.3652, + "step": 3073 + }, + { + "epoch": 1.08, + "grad_norm": 0.9541451334953308, + "learning_rate": 0.00020470106673282064, + "loss": 0.0514, + "step": 3074 + }, + { + "epoch": 1.08, + "grad_norm": 9.565132141113281, + "learning_rate": 0.00020466385512279828, + "loss": 1.9225, + "step": 3075 + }, + { + "epoch": 1.08, + "grad_norm": 3.1412031650543213, + "learning_rate": 0.00020462664351277596, + "loss": 0.3966, + "step": 3076 + }, + { + "epoch": 1.08, + "grad_norm": 1.8707399368286133, + "learning_rate": 0.00020458943190275366, + "loss": 0.2158, + "step": 3077 + }, + { + "epoch": 1.08, + "grad_norm": 2.589046001434326, + "learning_rate": 0.0002045522202927313, + "loss": 0.3275, + "step": 3078 + }, + { + "epoch": 1.08, + "grad_norm": 4.2253804206848145, + "learning_rate": 0.000204515008682709, + "loss": 0.3441, + "step": 3079 + }, + { + "epoch": 1.08, + "grad_norm": 3.184797525405884, + "learning_rate": 0.00020447779707268664, + "loss": 1.0592, + "step": 3080 + }, + { + "epoch": 1.08, + "grad_norm": 2.5358669757843018, + "learning_rate": 0.00020444058546266434, + "loss": 0.6669, + "step": 3081 + }, + { + "epoch": 1.08, + "grad_norm": 3.5697948932647705, + "learning_rate": 0.00020440337385264201, + "loss": 0.4798, + "step": 3082 + }, + { + "epoch": 1.08, + "grad_norm": 1.44380521774292, + "learning_rate": 0.00020436616224261966, + "loss": 0.4084, + "step": 3083 + }, + { + "epoch": 1.08, + "grad_norm": 12.404438972473145, + "learning_rate": 0.00020432895063259737, + "loss": 4.2819, + "step": 3084 + }, + { + "epoch": 1.08, + "grad_norm": 2.316073417663574, + "learning_rate": 0.00020429173902257504, + "loss": 0.6902, + "step": 3085 + }, + { + "epoch": 1.08, + "grad_norm": 1.3172087669372559, + "learning_rate": 0.0002042545274125527, + "loss": 0.2745, + "step": 3086 + }, + { + "epoch": 1.08, + "grad_norm": 2.7705490589141846, + "learning_rate": 0.00020421731580253037, + "loss": 1.3201, + "step": 3087 + }, + { + "epoch": 1.08, + "grad_norm": 2.1392929553985596, + "learning_rate": 0.00020418010419250804, + "loss": 0.2256, + "step": 3088 + }, + { + "epoch": 1.08, + "grad_norm": 2.1735339164733887, + "learning_rate": 0.00020414289258248572, + "loss": 0.7259, + "step": 3089 + }, + { + "epoch": 1.08, + "grad_norm": 1.7704854011535645, + "learning_rate": 0.0002041056809724634, + "loss": 0.3775, + "step": 3090 + }, + { + "epoch": 1.08, + "grad_norm": 2.1959261894226074, + "learning_rate": 0.00020406846936244107, + "loss": 0.3028, + "step": 3091 + }, + { + "epoch": 1.08, + "grad_norm": 1.4941906929016113, + "learning_rate": 0.00020403125775241875, + "loss": 0.2983, + "step": 3092 + }, + { + "epoch": 1.08, + "grad_norm": 1.2572664022445679, + "learning_rate": 0.0002039940461423964, + "loss": 0.1432, + "step": 3093 + }, + { + "epoch": 1.08, + "grad_norm": 1.722988247871399, + "learning_rate": 0.00020395683453237407, + "loss": 0.252, + "step": 3094 + }, + { + "epoch": 1.08, + "grad_norm": 1.260048508644104, + "learning_rate": 0.00020391962292235177, + "loss": 0.2426, + "step": 3095 + }, + { + "epoch": 1.08, + "grad_norm": 1.990040898323059, + "learning_rate": 0.00020388241131232942, + "loss": 0.2344, + "step": 3096 + }, + { + "epoch": 1.09, + "grad_norm": 4.270902156829834, + "learning_rate": 0.0002038451997023071, + "loss": 0.445, + "step": 3097 + }, + { + "epoch": 1.09, + "grad_norm": 6.141219615936279, + "learning_rate": 0.0002038079880922848, + "loss": 0.8813, + "step": 3098 + }, + { + "epoch": 1.09, + "grad_norm": 4.40023946762085, + "learning_rate": 0.00020377077648226245, + "loss": 0.3991, + "step": 3099 + }, + { + "epoch": 1.09, + "grad_norm": 2.3631558418273926, + "learning_rate": 0.00020373356487224012, + "loss": 0.2522, + "step": 3100 + }, + { + "epoch": 1.09, + "eval_loss": 0.5331594347953796, + "eval_runtime": 50.9883, + "eval_samples_per_second": 42.52, + "eval_steps_per_second": 10.63, + "eval_wer": 0.46116588825462723, + "step": 3100 + }, + { + "epoch": 1.09, + "grad_norm": 6.387331485748291, + "learning_rate": 0.00020369635326221777, + "loss": 0.5592, + "step": 3101 + }, + { + "epoch": 1.09, + "grad_norm": 3.025975227355957, + "learning_rate": 0.00020365914165219548, + "loss": 0.1448, + "step": 3102 + }, + { + "epoch": 1.09, + "grad_norm": 5.364758014678955, + "learning_rate": 0.00020362193004217315, + "loss": 0.854, + "step": 3103 + }, + { + "epoch": 1.09, + "grad_norm": 3.2311348915100098, + "learning_rate": 0.0002035847184321508, + "loss": 0.2019, + "step": 3104 + }, + { + "epoch": 1.09, + "grad_norm": 2.2923085689544678, + "learning_rate": 0.0002035475068221285, + "loss": 0.7306, + "step": 3105 + }, + { + "epoch": 1.09, + "grad_norm": 1.8082457780838013, + "learning_rate": 0.00020351029521210615, + "loss": 0.74, + "step": 3106 + }, + { + "epoch": 1.09, + "grad_norm": 3.0178422927856445, + "learning_rate": 0.00020347308360208383, + "loss": 0.5268, + "step": 3107 + }, + { + "epoch": 1.09, + "grad_norm": 1.8592593669891357, + "learning_rate": 0.0002034358719920615, + "loss": 0.3687, + "step": 3108 + }, + { + "epoch": 1.09, + "grad_norm": 2.91344952583313, + "learning_rate": 0.00020339866038203918, + "loss": 0.3192, + "step": 3109 + }, + { + "epoch": 1.09, + "grad_norm": 4.410944938659668, + "learning_rate": 0.00020336144877201686, + "loss": 0.3268, + "step": 3110 + }, + { + "epoch": 1.09, + "grad_norm": 2.5860300064086914, + "learning_rate": 0.00020332423716199453, + "loss": 0.6365, + "step": 3111 + }, + { + "epoch": 1.09, + "grad_norm": 2.230043411254883, + "learning_rate": 0.0002032870255519722, + "loss": 0.3995, + "step": 3112 + }, + { + "epoch": 1.09, + "grad_norm": 2.6025543212890625, + "learning_rate": 0.00020324981394194988, + "loss": 0.534, + "step": 3113 + }, + { + "epoch": 1.09, + "grad_norm": 3.757107973098755, + "learning_rate": 0.00020321260233192753, + "loss": 0.6327, + "step": 3114 + }, + { + "epoch": 1.09, + "grad_norm": 1.6777108907699585, + "learning_rate": 0.0002031753907219052, + "loss": 0.1812, + "step": 3115 + }, + { + "epoch": 1.09, + "grad_norm": 2.1241323947906494, + "learning_rate": 0.0002031381791118829, + "loss": 0.3462, + "step": 3116 + }, + { + "epoch": 1.09, + "grad_norm": 2.4214656352996826, + "learning_rate": 0.00020310096750186056, + "loss": 0.5252, + "step": 3117 + }, + { + "epoch": 1.09, + "grad_norm": 1.9669643640518188, + "learning_rate": 0.00020306375589183823, + "loss": 0.2442, + "step": 3118 + }, + { + "epoch": 1.09, + "grad_norm": 2.5513925552368164, + "learning_rate": 0.00020302654428181588, + "loss": 0.7533, + "step": 3119 + }, + { + "epoch": 1.09, + "grad_norm": 2.9641880989074707, + "learning_rate": 0.00020298933267179359, + "loss": 0.5389, + "step": 3120 + }, + { + "epoch": 1.09, + "grad_norm": 2.38806414604187, + "learning_rate": 0.00020295212106177126, + "loss": 0.1863, + "step": 3121 + }, + { + "epoch": 1.09, + "grad_norm": 3.5284969806671143, + "learning_rate": 0.0002029149094517489, + "loss": 0.6842, + "step": 3122 + }, + { + "epoch": 1.09, + "grad_norm": 3.640097141265869, + "learning_rate": 0.00020287769784172661, + "loss": 0.9635, + "step": 3123 + }, + { + "epoch": 1.09, + "grad_norm": 3.8287434577941895, + "learning_rate": 0.00020284048623170426, + "loss": 1.0185, + "step": 3124 + }, + { + "epoch": 1.09, + "grad_norm": 2.4016950130462646, + "learning_rate": 0.00020280327462168194, + "loss": 0.3064, + "step": 3125 + }, + { + "epoch": 1.1, + "grad_norm": 4.09964656829834, + "learning_rate": 0.00020276606301165964, + "loss": 0.3252, + "step": 3126 + }, + { + "epoch": 1.1, + "grad_norm": 1.654166579246521, + "learning_rate": 0.0002027288514016373, + "loss": 0.2792, + "step": 3127 + }, + { + "epoch": 1.1, + "grad_norm": 4.5107622146606445, + "learning_rate": 0.00020269163979161497, + "loss": 0.4426, + "step": 3128 + }, + { + "epoch": 1.1, + "grad_norm": 2.8204357624053955, + "learning_rate": 0.00020265442818159264, + "loss": 0.2263, + "step": 3129 + }, + { + "epoch": 1.1, + "grad_norm": 2.2634711265563965, + "learning_rate": 0.00020261721657157032, + "loss": 0.7353, + "step": 3130 + }, + { + "epoch": 1.1, + "grad_norm": 2.142753839492798, + "learning_rate": 0.000202580004961548, + "loss": 0.6885, + "step": 3131 + }, + { + "epoch": 1.1, + "grad_norm": 1.8485157489776611, + "learning_rate": 0.00020254279335152564, + "loss": 0.4566, + "step": 3132 + }, + { + "epoch": 1.1, + "grad_norm": 2.232492685317993, + "learning_rate": 0.00020250558174150334, + "loss": 0.4872, + "step": 3133 + }, + { + "epoch": 1.1, + "grad_norm": 2.160388231277466, + "learning_rate": 0.00020246837013148102, + "loss": 0.3397, + "step": 3134 + }, + { + "epoch": 1.1, + "grad_norm": 1.7792160511016846, + "learning_rate": 0.00020243115852145867, + "loss": 0.5532, + "step": 3135 + }, + { + "epoch": 1.1, + "grad_norm": 1.8902424573898315, + "learning_rate": 0.00020239394691143634, + "loss": 0.3345, + "step": 3136 + }, + { + "epoch": 1.1, + "grad_norm": 2.7055909633636475, + "learning_rate": 0.00020235673530141402, + "loss": 0.5375, + "step": 3137 + }, + { + "epoch": 1.1, + "grad_norm": 1.9411323070526123, + "learning_rate": 0.0002023195236913917, + "loss": 0.3124, + "step": 3138 + }, + { + "epoch": 1.1, + "grad_norm": 1.9080373048782349, + "learning_rate": 0.00020228231208136937, + "loss": 0.2328, + "step": 3139 + }, + { + "epoch": 1.1, + "grad_norm": 2.3703057765960693, + "learning_rate": 0.00020224510047134702, + "loss": 0.4152, + "step": 3140 + }, + { + "epoch": 1.1, + "grad_norm": 3.085244655609131, + "learning_rate": 0.00020220788886132472, + "loss": 0.6424, + "step": 3141 + }, + { + "epoch": 1.1, + "grad_norm": 4.0253801345825195, + "learning_rate": 0.0002021706772513024, + "loss": 0.3865, + "step": 3142 + }, + { + "epoch": 1.1, + "grad_norm": 2.0360500812530518, + "learning_rate": 0.00020213346564128005, + "loss": 0.2447, + "step": 3143 + }, + { + "epoch": 1.1, + "grad_norm": 2.123710870742798, + "learning_rate": 0.00020209625403125775, + "loss": 0.3863, + "step": 3144 + }, + { + "epoch": 1.1, + "grad_norm": 2.0543999671936035, + "learning_rate": 0.0002020590424212354, + "loss": 0.3734, + "step": 3145 + }, + { + "epoch": 1.1, + "grad_norm": 2.6033356189727783, + "learning_rate": 0.00020202183081121308, + "loss": 1.1586, + "step": 3146 + }, + { + "epoch": 1.1, + "grad_norm": 2.208231210708618, + "learning_rate": 0.00020198461920119078, + "loss": 0.4899, + "step": 3147 + }, + { + "epoch": 1.1, + "grad_norm": 2.316422700881958, + "learning_rate": 0.00020194740759116843, + "loss": 0.7458, + "step": 3148 + }, + { + "epoch": 1.1, + "grad_norm": 2.228898525238037, + "learning_rate": 0.0002019101959811461, + "loss": 0.355, + "step": 3149 + }, + { + "epoch": 1.1, + "grad_norm": 3.5317399501800537, + "learning_rate": 0.00020187298437112375, + "loss": 0.3847, + "step": 3150 + }, + { + "epoch": 1.1, + "grad_norm": 2.578821897506714, + "learning_rate": 0.00020183577276110145, + "loss": 0.3668, + "step": 3151 + }, + { + "epoch": 1.1, + "grad_norm": 4.172102928161621, + "learning_rate": 0.00020179856115107913, + "loss": 0.5389, + "step": 3152 + }, + { + "epoch": 1.1, + "grad_norm": 2.7270286083221436, + "learning_rate": 0.00020176134954105678, + "loss": 0.4602, + "step": 3153 + }, + { + "epoch": 1.11, + "grad_norm": 3.5921061038970947, + "learning_rate": 0.00020172413793103448, + "loss": 0.3265, + "step": 3154 + }, + { + "epoch": 1.11, + "grad_norm": 1.7611098289489746, + "learning_rate": 0.00020168692632101216, + "loss": 0.359, + "step": 3155 + }, + { + "epoch": 1.11, + "grad_norm": 2.818851947784424, + "learning_rate": 0.0002016497147109898, + "loss": 0.8464, + "step": 3156 + }, + { + "epoch": 1.11, + "grad_norm": 2.597490072250366, + "learning_rate": 0.00020161250310096748, + "loss": 0.6396, + "step": 3157 + }, + { + "epoch": 1.11, + "grad_norm": 4.247330665588379, + "learning_rate": 0.00020157529149094516, + "loss": 1.4697, + "step": 3158 + }, + { + "epoch": 1.11, + "grad_norm": 2.6339824199676514, + "learning_rate": 0.00020153807988092283, + "loss": 0.6861, + "step": 3159 + }, + { + "epoch": 1.11, + "grad_norm": 1.2723904848098755, + "learning_rate": 0.0002015008682709005, + "loss": 0.2326, + "step": 3160 + }, + { + "epoch": 1.11, + "grad_norm": 2.37565016746521, + "learning_rate": 0.00020146365666087816, + "loss": 0.2286, + "step": 3161 + }, + { + "epoch": 1.11, + "grad_norm": 3.4451630115509033, + "learning_rate": 0.00020142644505085586, + "loss": 0.9374, + "step": 3162 + }, + { + "epoch": 1.11, + "grad_norm": 2.457343101501465, + "learning_rate": 0.0002013892334408335, + "loss": 0.3735, + "step": 3163 + }, + { + "epoch": 1.11, + "grad_norm": 2.033566951751709, + "learning_rate": 0.00020135202183081119, + "loss": 0.2974, + "step": 3164 + }, + { + "epoch": 1.11, + "grad_norm": 1.4532384872436523, + "learning_rate": 0.0002013148102207889, + "loss": 0.2587, + "step": 3165 + }, + { + "epoch": 1.11, + "grad_norm": 9.672016143798828, + "learning_rate": 0.00020127759861076654, + "loss": 2.1447, + "step": 3166 + }, + { + "epoch": 1.11, + "grad_norm": 3.0728158950805664, + "learning_rate": 0.0002012403870007442, + "loss": 0.4088, + "step": 3167 + }, + { + "epoch": 1.11, + "grad_norm": 3.4193506240844727, + "learning_rate": 0.00020120317539072186, + "loss": 0.6485, + "step": 3168 + }, + { + "epoch": 1.11, + "grad_norm": 2.32649302482605, + "learning_rate": 0.00020116596378069956, + "loss": 0.5016, + "step": 3169 + }, + { + "epoch": 1.11, + "grad_norm": 1.4236040115356445, + "learning_rate": 0.00020112875217067724, + "loss": 0.2539, + "step": 3170 + }, + { + "epoch": 1.11, + "grad_norm": 2.348944902420044, + "learning_rate": 0.0002010915405606549, + "loss": 0.3252, + "step": 3171 + }, + { + "epoch": 1.11, + "grad_norm": 0.6811813712120056, + "learning_rate": 0.0002010543289506326, + "loss": 0.0438, + "step": 3172 + }, + { + "epoch": 1.11, + "grad_norm": 2.597822427749634, + "learning_rate": 0.00020101711734061027, + "loss": 0.3622, + "step": 3173 + }, + { + "epoch": 1.11, + "grad_norm": 1.7118090391159058, + "learning_rate": 0.00020097990573058792, + "loss": 0.2325, + "step": 3174 + }, + { + "epoch": 1.11, + "grad_norm": 2.546022415161133, + "learning_rate": 0.00020094269412056562, + "loss": 0.2348, + "step": 3175 + }, + { + "epoch": 1.11, + "grad_norm": 1.8402515649795532, + "learning_rate": 0.00020090548251054327, + "loss": 0.2659, + "step": 3176 + }, + { + "epoch": 1.11, + "grad_norm": 3.6556687355041504, + "learning_rate": 0.00020086827090052094, + "loss": 0.2047, + "step": 3177 + }, + { + "epoch": 1.11, + "grad_norm": 3.188159942626953, + "learning_rate": 0.00020083105929049862, + "loss": 0.3726, + "step": 3178 + }, + { + "epoch": 1.11, + "grad_norm": NaN, + "learning_rate": 0.00020083105929049862, + "loss": 0.0813, + "step": 3179 + }, + { + "epoch": 1.11, + "grad_norm": 2.2536139488220215, + "learning_rate": 0.0002007938476804763, + "loss": 0.7854, + "step": 3180 + }, + { + "epoch": 1.11, + "grad_norm": 1.8012816905975342, + "learning_rate": 0.00020075663607045397, + "loss": 0.406, + "step": 3181 + }, + { + "epoch": 1.11, + "grad_norm": 2.1195740699768066, + "learning_rate": 0.00020071942446043162, + "loss": 0.309, + "step": 3182 + }, + { + "epoch": 1.12, + "grad_norm": 2.053685188293457, + "learning_rate": 0.0002006822128504093, + "loss": 0.485, + "step": 3183 + }, + { + "epoch": 1.12, + "grad_norm": 2.672041654586792, + "learning_rate": 0.000200645001240387, + "loss": 0.4533, + "step": 3184 + }, + { + "epoch": 1.12, + "grad_norm": 3.1025266647338867, + "learning_rate": 0.00020060778963036465, + "loss": 0.9278, + "step": 3185 + }, + { + "epoch": 1.12, + "grad_norm": 1.895678162574768, + "learning_rate": 0.00020057057802034232, + "loss": 0.3792, + "step": 3186 + }, + { + "epoch": 1.12, + "grad_norm": 3.569613218307495, + "learning_rate": 0.00020053336641032003, + "loss": 0.9807, + "step": 3187 + }, + { + "epoch": 1.12, + "grad_norm": 2.383301019668579, + "learning_rate": 0.00020049615480029767, + "loss": 0.4056, + "step": 3188 + }, + { + "epoch": 1.12, + "grad_norm": 5.517592430114746, + "learning_rate": 0.00020045894319027535, + "loss": 0.4775, + "step": 3189 + }, + { + "epoch": 1.12, + "grad_norm": 3.9130759239196777, + "learning_rate": 0.000200421731580253, + "loss": 1.5079, + "step": 3190 + }, + { + "epoch": 1.12, + "grad_norm": 1.221430778503418, + "learning_rate": 0.0002003845199702307, + "loss": 0.1815, + "step": 3191 + }, + { + "epoch": 1.12, + "grad_norm": 1.0000752210617065, + "learning_rate": 0.00020034730836020838, + "loss": 0.1435, + "step": 3192 + }, + { + "epoch": 1.12, + "grad_norm": 2.36952543258667, + "learning_rate": 0.00020031009675018603, + "loss": 0.4677, + "step": 3193 + }, + { + "epoch": 1.12, + "grad_norm": 1.628010869026184, + "learning_rate": 0.00020027288514016373, + "loss": 0.2069, + "step": 3194 + }, + { + "epoch": 1.12, + "grad_norm": 3.232687473297119, + "learning_rate": 0.00020023567353014138, + "loss": 0.4343, + "step": 3195 + }, + { + "epoch": 1.12, + "grad_norm": 2.015259027481079, + "learning_rate": 0.00020019846192011905, + "loss": 0.3253, + "step": 3196 + }, + { + "epoch": 1.12, + "grad_norm": 1.6881203651428223, + "learning_rate": 0.00020016125031009676, + "loss": 0.2276, + "step": 3197 + }, + { + "epoch": 1.12, + "grad_norm": 2.478654384613037, + "learning_rate": 0.0002001240387000744, + "loss": 0.4578, + "step": 3198 + }, + { + "epoch": 1.12, + "grad_norm": 3.1310973167419434, + "learning_rate": 0.00020008682709005208, + "loss": 0.3007, + "step": 3199 + }, + { + "epoch": 1.12, + "grad_norm": 2.1835124492645264, + "learning_rate": 0.00020004961548002976, + "loss": 0.4325, + "step": 3200 + }, + { + "epoch": 1.12, + "eval_loss": 0.5288262367248535, + "eval_runtime": 51.0509, + "eval_samples_per_second": 42.467, + "eval_steps_per_second": 10.617, + "eval_wer": 0.4644525168655942, + "step": 3200 + }, + { + "epoch": 1.12, + "grad_norm": 5.164034843444824, + "learning_rate": 0.00020001240387000743, + "loss": 0.4793, + "step": 3201 + }, + { + "epoch": 1.12, + "grad_norm": 4.324225902557373, + "learning_rate": 0.0001999751922599851, + "loss": 1.9038, + "step": 3202 + }, + { + "epoch": 1.12, + "grad_norm": 3.0840883255004883, + "learning_rate": 0.00019993798064996276, + "loss": 0.2772, + "step": 3203 + }, + { + "epoch": 1.12, + "grad_norm": 4.304738521575928, + "learning_rate": 0.00019990076903994043, + "loss": 0.4444, + "step": 3204 + }, + { + "epoch": 1.12, + "grad_norm": 2.658390998840332, + "learning_rate": 0.00019986355742991814, + "loss": 1.3376, + "step": 3205 + }, + { + "epoch": 1.12, + "grad_norm": 1.7802234888076782, + "learning_rate": 0.00019982634581989578, + "loss": 0.5389, + "step": 3206 + }, + { + "epoch": 1.12, + "grad_norm": 2.279994010925293, + "learning_rate": 0.00019978913420987346, + "loss": 0.5139, + "step": 3207 + }, + { + "epoch": 1.12, + "grad_norm": 2.162747621536255, + "learning_rate": 0.0001997519225998511, + "loss": 0.508, + "step": 3208 + }, + { + "epoch": 1.12, + "grad_norm": 2.005580186843872, + "learning_rate": 0.0001997147109898288, + "loss": 0.5523, + "step": 3209 + }, + { + "epoch": 1.12, + "grad_norm": 3.2639243602752686, + "learning_rate": 0.0001996774993798065, + "loss": 0.7956, + "step": 3210 + }, + { + "epoch": 1.13, + "grad_norm": 1.1911425590515137, + "learning_rate": 0.00019964028776978414, + "loss": 0.1047, + "step": 3211 + }, + { + "epoch": 1.13, + "grad_norm": 2.8164703845977783, + "learning_rate": 0.00019960307615976184, + "loss": 0.3521, + "step": 3212 + }, + { + "epoch": 1.13, + "grad_norm": 1.5338577032089233, + "learning_rate": 0.0001995658645497395, + "loss": 0.3544, + "step": 3213 + }, + { + "epoch": 1.13, + "grad_norm": 2.2264373302459717, + "learning_rate": 0.00019952865293971716, + "loss": 0.5603, + "step": 3214 + }, + { + "epoch": 1.13, + "grad_norm": 2.001485824584961, + "learning_rate": 0.00019949144132969487, + "loss": 0.3046, + "step": 3215 + }, + { + "epoch": 1.13, + "grad_norm": 4.817356586456299, + "learning_rate": 0.00019945422971967252, + "loss": 0.527, + "step": 3216 + }, + { + "epoch": 1.13, + "grad_norm": 2.8212592601776123, + "learning_rate": 0.0001994170181096502, + "loss": 0.595, + "step": 3217 + }, + { + "epoch": 1.13, + "grad_norm": 4.72152853012085, + "learning_rate": 0.0001993798064996279, + "loss": 0.9974, + "step": 3218 + }, + { + "epoch": 1.13, + "grad_norm": 3.7662367820739746, + "learning_rate": 0.00019934259488960554, + "loss": 0.8654, + "step": 3219 + }, + { + "epoch": 1.13, + "grad_norm": 0.8878252506256104, + "learning_rate": 0.00019930538327958322, + "loss": 0.0876, + "step": 3220 + }, + { + "epoch": 1.13, + "grad_norm": 4.1150922775268555, + "learning_rate": 0.00019926817166956087, + "loss": 0.3632, + "step": 3221 + }, + { + "epoch": 1.13, + "grad_norm": 2.1186084747314453, + "learning_rate": 0.00019923096005953857, + "loss": 0.4102, + "step": 3222 + }, + { + "epoch": 1.13, + "grad_norm": 2.5478599071502686, + "learning_rate": 0.00019919374844951625, + "loss": 0.2458, + "step": 3223 + }, + { + "epoch": 1.13, + "grad_norm": 2.566505193710327, + "learning_rate": 0.0001991565368394939, + "loss": 0.2762, + "step": 3224 + }, + { + "epoch": 1.13, + "grad_norm": 2.5866873264312744, + "learning_rate": 0.00019911932522947157, + "loss": 0.8733, + "step": 3225 + }, + { + "epoch": 1.13, + "grad_norm": 1.6467078924179077, + "learning_rate": 0.00019908211361944925, + "loss": 0.1229, + "step": 3226 + }, + { + "epoch": 1.13, + "grad_norm": 7.527315139770508, + "learning_rate": 0.00019904490200942692, + "loss": 0.5953, + "step": 3227 + }, + { + "epoch": 1.13, + "grad_norm": 2.10206937789917, + "learning_rate": 0.0001990076903994046, + "loss": 0.4737, + "step": 3228 + }, + { + "epoch": 1.13, + "grad_norm": 1.532116174697876, + "learning_rate": 0.00019897047878938225, + "loss": 0.1381, + "step": 3229 + }, + { + "epoch": 1.13, + "grad_norm": 1.7178963422775269, + "learning_rate": 0.00019893326717935995, + "loss": 0.6589, + "step": 3230 + }, + { + "epoch": 1.13, + "grad_norm": 1.9948376417160034, + "learning_rate": 0.00019889605556933763, + "loss": 0.4115, + "step": 3231 + }, + { + "epoch": 1.13, + "grad_norm": 1.7607015371322632, + "learning_rate": 0.00019885884395931527, + "loss": 0.3731, + "step": 3232 + }, + { + "epoch": 1.13, + "grad_norm": 1.7697004079818726, + "learning_rate": 0.00019882163234929298, + "loss": 0.2709, + "step": 3233 + }, + { + "epoch": 1.13, + "grad_norm": 1.821341633796692, + "learning_rate": 0.00019878442073927063, + "loss": 0.2842, + "step": 3234 + }, + { + "epoch": 1.13, + "grad_norm": 2.076326370239258, + "learning_rate": 0.0001987472091292483, + "loss": 0.3179, + "step": 3235 + }, + { + "epoch": 1.13, + "grad_norm": 1.9342252016067505, + "learning_rate": 0.000198709997519226, + "loss": 0.2932, + "step": 3236 + }, + { + "epoch": 1.13, + "grad_norm": 2.633678674697876, + "learning_rate": 0.00019867278590920365, + "loss": 0.5614, + "step": 3237 + }, + { + "epoch": 1.13, + "grad_norm": 2.345721483230591, + "learning_rate": 0.00019863557429918133, + "loss": 0.3905, + "step": 3238 + }, + { + "epoch": 1.13, + "grad_norm": 5.671786785125732, + "learning_rate": 0.00019859836268915898, + "loss": 0.373, + "step": 3239 + }, + { + "epoch": 1.14, + "grad_norm": 3.5498249530792236, + "learning_rate": 0.00019856115107913668, + "loss": 0.3899, + "step": 3240 + }, + { + "epoch": 1.14, + "grad_norm": 2.3076889514923096, + "learning_rate": 0.00019852393946911436, + "loss": 0.3927, + "step": 3241 + }, + { + "epoch": 1.14, + "grad_norm": 1.8654396533966064, + "learning_rate": 0.000198486727859092, + "loss": 0.4113, + "step": 3242 + }, + { + "epoch": 1.14, + "grad_norm": 3.3518593311309814, + "learning_rate": 0.0001984495162490697, + "loss": 0.4291, + "step": 3243 + }, + { + "epoch": 1.14, + "grad_norm": 3.1347527503967285, + "learning_rate": 0.00019841230463904738, + "loss": 0.5388, + "step": 3244 + }, + { + "epoch": 1.14, + "grad_norm": 1.9862194061279297, + "learning_rate": 0.00019837509302902503, + "loss": 0.3185, + "step": 3245 + }, + { + "epoch": 1.14, + "grad_norm": 3.0931460857391357, + "learning_rate": 0.0001983378814190027, + "loss": 0.2293, + "step": 3246 + }, + { + "epoch": 1.14, + "grad_norm": 3.0222811698913574, + "learning_rate": 0.00019830066980898038, + "loss": 0.675, + "step": 3247 + }, + { + "epoch": 1.14, + "grad_norm": 4.591670989990234, + "learning_rate": 0.00019826345819895806, + "loss": 1.6013, + "step": 3248 + }, + { + "epoch": 1.14, + "grad_norm": 2.511385679244995, + "learning_rate": 0.00019822624658893574, + "loss": 0.3192, + "step": 3249 + }, + { + "epoch": 1.14, + "grad_norm": 2.967205047607422, + "learning_rate": 0.00019818903497891338, + "loss": 0.2893, + "step": 3250 + }, + { + "epoch": 1.14, + "grad_norm": 1.8851898908615112, + "learning_rate": 0.0001981518233688911, + "loss": 0.221, + "step": 3251 + }, + { + "epoch": 1.14, + "grad_norm": 4.387383460998535, + "learning_rate": 0.00019811461175886874, + "loss": 0.3816, + "step": 3252 + }, + { + "epoch": 1.14, + "grad_norm": 1.66984224319458, + "learning_rate": 0.0001980774001488464, + "loss": 0.1179, + "step": 3253 + }, + { + "epoch": 1.14, + "grad_norm": 2.261944055557251, + "learning_rate": 0.00019804018853882411, + "loss": 0.23, + "step": 3254 + }, + { + "epoch": 1.14, + "grad_norm": 2.3139607906341553, + "learning_rate": 0.00019800297692880176, + "loss": 0.6849, + "step": 3255 + }, + { + "epoch": 1.14, + "grad_norm": 3.3277676105499268, + "learning_rate": 0.00019796576531877944, + "loss": 0.821, + "step": 3256 + }, + { + "epoch": 1.14, + "grad_norm": 1.5088117122650146, + "learning_rate": 0.0001979285537087571, + "loss": 0.3421, + "step": 3257 + }, + { + "epoch": 1.14, + "grad_norm": 26.52508544921875, + "learning_rate": 0.0001978913420987348, + "loss": 5.8404, + "step": 3258 + }, + { + "epoch": 1.14, + "grad_norm": 2.2505085468292236, + "learning_rate": 0.00019785413048871247, + "loss": 0.2756, + "step": 3259 + }, + { + "epoch": 1.14, + "grad_norm": 1.61557137966156, + "learning_rate": 0.00019781691887869012, + "loss": 0.2101, + "step": 3260 + }, + { + "epoch": 1.14, + "grad_norm": 1.9748115539550781, + "learning_rate": 0.00019777970726866782, + "loss": 0.3818, + "step": 3261 + }, + { + "epoch": 1.14, + "grad_norm": 1.2797274589538574, + "learning_rate": 0.0001977424956586455, + "loss": 0.2122, + "step": 3262 + }, + { + "epoch": 1.14, + "grad_norm": 2.0891008377075195, + "learning_rate": 0.00019770528404862314, + "loss": 0.3929, + "step": 3263 + }, + { + "epoch": 1.14, + "grad_norm": 1.7179592847824097, + "learning_rate": 0.00019766807243860085, + "loss": 0.2509, + "step": 3264 + }, + { + "epoch": 1.14, + "grad_norm": 2.773587703704834, + "learning_rate": 0.0001976308608285785, + "loss": 0.2744, + "step": 3265 + }, + { + "epoch": 1.14, + "grad_norm": 1.902330756187439, + "learning_rate": 0.00019759364921855617, + "loss": 0.3719, + "step": 3266 + }, + { + "epoch": 1.14, + "grad_norm": 3.543816566467285, + "learning_rate": 0.00019755643760853385, + "loss": 0.6176, + "step": 3267 + }, + { + "epoch": 1.15, + "grad_norm": 2.187521457672119, + "learning_rate": 0.00019751922599851152, + "loss": 0.2295, + "step": 3268 + }, + { + "epoch": 1.15, + "grad_norm": 2.898231029510498, + "learning_rate": 0.0001974820143884892, + "loss": 0.3294, + "step": 3269 + }, + { + "epoch": 1.15, + "grad_norm": 1.8710620403289795, + "learning_rate": 0.00019744480277846685, + "loss": 0.2361, + "step": 3270 + }, + { + "epoch": 1.15, + "grad_norm": 1.4173840284347534, + "learning_rate": 0.00019740759116844452, + "loss": 0.2267, + "step": 3271 + }, + { + "epoch": 1.15, + "grad_norm": 3.5871973037719727, + "learning_rate": 0.00019737037955842222, + "loss": 0.6848, + "step": 3272 + }, + { + "epoch": 1.15, + "grad_norm": 3.1234724521636963, + "learning_rate": 0.00019733316794839987, + "loss": 0.3783, + "step": 3273 + }, + { + "epoch": 1.15, + "grad_norm": 1.742729902267456, + "learning_rate": 0.00019729595633837755, + "loss": 0.3094, + "step": 3274 + }, + { + "epoch": 1.15, + "grad_norm": 3.235287666320801, + "learning_rate": 0.00019725874472835525, + "loss": 0.5631, + "step": 3275 + }, + { + "epoch": 1.15, + "grad_norm": 5.217351913452148, + "learning_rate": 0.0001972215331183329, + "loss": 0.5498, + "step": 3276 + }, + { + "epoch": 1.15, + "grad_norm": 1.3385629653930664, + "learning_rate": 0.00019718432150831058, + "loss": 0.066, + "step": 3277 + }, + { + "epoch": 1.15, + "grad_norm": 5.248405933380127, + "learning_rate": 0.00019714710989828823, + "loss": 0.769, + "step": 3278 + }, + { + "epoch": 1.15, + "grad_norm": 3.6078615188598633, + "learning_rate": 0.00019710989828826593, + "loss": 1.5544, + "step": 3279 + }, + { + "epoch": 1.15, + "grad_norm": 3.464466094970703, + "learning_rate": 0.0001970726866782436, + "loss": 0.6935, + "step": 3280 + }, + { + "epoch": 1.15, + "grad_norm": 1.388893723487854, + "learning_rate": 0.00019703547506822125, + "loss": 0.196, + "step": 3281 + }, + { + "epoch": 1.15, + "grad_norm": 2.4973855018615723, + "learning_rate": 0.00019699826345819896, + "loss": 1.0041, + "step": 3282 + }, + { + "epoch": 1.15, + "grad_norm": 1.7962448596954346, + "learning_rate": 0.0001969610518481766, + "loss": 0.3729, + "step": 3283 + }, + { + "epoch": 1.15, + "grad_norm": 2.4133572578430176, + "learning_rate": 0.00019692384023815428, + "loss": 0.4041, + "step": 3284 + }, + { + "epoch": 1.15, + "grad_norm": 1.40854012966156, + "learning_rate": 0.00019688662862813198, + "loss": 0.2944, + "step": 3285 + }, + { + "epoch": 1.15, + "grad_norm": 2.9127371311187744, + "learning_rate": 0.00019684941701810963, + "loss": 0.1197, + "step": 3286 + }, + { + "epoch": 1.15, + "grad_norm": 1.9602265357971191, + "learning_rate": 0.0001968122054080873, + "loss": 0.3084, + "step": 3287 + }, + { + "epoch": 1.15, + "grad_norm": 3.038182258605957, + "learning_rate": 0.00019677499379806498, + "loss": 0.7004, + "step": 3288 + }, + { + "epoch": 1.15, + "grad_norm": 1.9776606559753418, + "learning_rate": 0.00019673778218804266, + "loss": 0.5882, + "step": 3289 + }, + { + "epoch": 1.15, + "grad_norm": 2.051262855529785, + "learning_rate": 0.00019670057057802033, + "loss": 0.3358, + "step": 3290 + }, + { + "epoch": 1.15, + "grad_norm": 2.713244676589966, + "learning_rate": 0.00019666335896799798, + "loss": 0.6035, + "step": 3291 + }, + { + "epoch": 1.15, + "grad_norm": 1.6439521312713623, + "learning_rate": 0.00019662614735797566, + "loss": 0.1498, + "step": 3292 + }, + { + "epoch": 1.15, + "grad_norm": 2.486056327819824, + "learning_rate": 0.00019658893574795336, + "loss": 0.5642, + "step": 3293 + }, + { + "epoch": 1.15, + "grad_norm": 1.6842329502105713, + "learning_rate": 0.000196551724137931, + "loss": 0.3027, + "step": 3294 + }, + { + "epoch": 1.15, + "grad_norm": 5.799333095550537, + "learning_rate": 0.0001965145125279087, + "loss": 0.771, + "step": 3295 + }, + { + "epoch": 1.15, + "grad_norm": 3.132808208465576, + "learning_rate": 0.00019647730091788636, + "loss": 0.5863, + "step": 3296 + }, + { + "epoch": 1.16, + "grad_norm": 1.502746343612671, + "learning_rate": 0.00019644008930786404, + "loss": 0.1146, + "step": 3297 + }, + { + "epoch": 1.16, + "grad_norm": 2.2371695041656494, + "learning_rate": 0.00019640287769784171, + "loss": 0.1923, + "step": 3298 + }, + { + "epoch": 1.16, + "grad_norm": 1.7083606719970703, + "learning_rate": 0.00019636566608781936, + "loss": 0.2605, + "step": 3299 + }, + { + "epoch": 1.16, + "grad_norm": 1.663743019104004, + "learning_rate": 0.00019632845447779707, + "loss": 0.2404, + "step": 3300 + }, + { + "epoch": 1.16, + "eval_loss": 0.53449547290802, + "eval_runtime": 51.0865, + "eval_samples_per_second": 42.438, + "eval_steps_per_second": 10.609, + "eval_wer": 0.48806434872859367, + "step": 3300 + }, + { + "epoch": 1.16, + "grad_norm": 2.146204710006714, + "learning_rate": 0.00019629124286777474, + "loss": 0.3883, + "step": 3301 + }, + { + "epoch": 1.16, + "grad_norm": 4.3042311668396, + "learning_rate": 0.0001962540312577524, + "loss": 0.5085, + "step": 3302 + }, + { + "epoch": 1.16, + "grad_norm": 1.9416069984436035, + "learning_rate": 0.0001962168196477301, + "loss": 0.1635, + "step": 3303 + }, + { + "epoch": 1.16, + "grad_norm": 1.1905418634414673, + "learning_rate": 0.00019617960803770774, + "loss": 0.0755, + "step": 3304 + }, + { + "epoch": 1.16, + "grad_norm": 2.9481916427612305, + "learning_rate": 0.00019614239642768542, + "loss": 1.2842, + "step": 3305 + }, + { + "epoch": 1.16, + "grad_norm": 4.018986701965332, + "learning_rate": 0.00019610518481766312, + "loss": 1.239, + "step": 3306 + }, + { + "epoch": 1.16, + "grad_norm": 2.8045568466186523, + "learning_rate": 0.00019606797320764077, + "loss": 0.7349, + "step": 3307 + }, + { + "epoch": 1.16, + "grad_norm": 2.780566692352295, + "learning_rate": 0.00019603076159761844, + "loss": 0.6553, + "step": 3308 + }, + { + "epoch": 1.16, + "grad_norm": 1.8182471990585327, + "learning_rate": 0.0001959935499875961, + "loss": 0.3462, + "step": 3309 + }, + { + "epoch": 1.16, + "grad_norm": 2.2723684310913086, + "learning_rate": 0.0001959563383775738, + "loss": 0.3089, + "step": 3310 + }, + { + "epoch": 1.16, + "grad_norm": 2.788958787918091, + "learning_rate": 0.00019591912676755147, + "loss": 0.439, + "step": 3311 + }, + { + "epoch": 1.16, + "grad_norm": 1.5581282377243042, + "learning_rate": 0.00019588191515752912, + "loss": 0.1266, + "step": 3312 + }, + { + "epoch": 1.16, + "grad_norm": 2.770595073699951, + "learning_rate": 0.0001958447035475068, + "loss": 0.5998, + "step": 3313 + }, + { + "epoch": 1.16, + "grad_norm": 2.167032480239868, + "learning_rate": 0.00019580749193748447, + "loss": 0.4174, + "step": 3314 + }, + { + "epoch": 1.16, + "grad_norm": 3.6844260692596436, + "learning_rate": 0.00019577028032746215, + "loss": 0.574, + "step": 3315 + }, + { + "epoch": 1.16, + "grad_norm": 2.024935722351074, + "learning_rate": 0.00019573306871743982, + "loss": 0.2714, + "step": 3316 + }, + { + "epoch": 1.16, + "grad_norm": 3.656796932220459, + "learning_rate": 0.0001956958571074175, + "loss": 0.7618, + "step": 3317 + }, + { + "epoch": 1.16, + "grad_norm": 3.2108490467071533, + "learning_rate": 0.00019565864549739518, + "loss": 0.4265, + "step": 3318 + }, + { + "epoch": 1.16, + "grad_norm": 1.514299750328064, + "learning_rate": 0.00019562143388737285, + "loss": 0.1267, + "step": 3319 + }, + { + "epoch": 1.16, + "grad_norm": 4.003231048583984, + "learning_rate": 0.0001955842222773505, + "loss": 0.4855, + "step": 3320 + }, + { + "epoch": 1.16, + "grad_norm": 2.6469032764434814, + "learning_rate": 0.0001955470106673282, + "loss": 0.4387, + "step": 3321 + }, + { + "epoch": 1.16, + "grad_norm": 1.6862777471542358, + "learning_rate": 0.00019550979905730585, + "loss": 0.2019, + "step": 3322 + }, + { + "epoch": 1.16, + "grad_norm": 3.7266769409179688, + "learning_rate": 0.00019547258744728353, + "loss": 0.6177, + "step": 3323 + }, + { + "epoch": 1.16, + "grad_norm": 2.3731110095977783, + "learning_rate": 0.00019543537583726123, + "loss": 0.2896, + "step": 3324 + }, + { + "epoch": 1.17, + "grad_norm": 3.063096761703491, + "learning_rate": 0.00019539816422723888, + "loss": 0.3411, + "step": 3325 + }, + { + "epoch": 1.17, + "grad_norm": 2.101358413696289, + "learning_rate": 0.00019536095261721655, + "loss": 0.3822, + "step": 3326 + }, + { + "epoch": 1.17, + "grad_norm": 4.259075164794922, + "learning_rate": 0.0001953237410071942, + "loss": 1.7024, + "step": 3327 + }, + { + "epoch": 1.17, + "grad_norm": 4.619799613952637, + "learning_rate": 0.0001952865293971719, + "loss": 0.7057, + "step": 3328 + }, + { + "epoch": 1.17, + "grad_norm": 1.7613863945007324, + "learning_rate": 0.00019524931778714958, + "loss": 0.1737, + "step": 3329 + }, + { + "epoch": 1.17, + "grad_norm": 2.4438929557800293, + "learning_rate": 0.00019521210617712723, + "loss": 0.759, + "step": 3330 + }, + { + "epoch": 1.17, + "grad_norm": 2.7754616737365723, + "learning_rate": 0.00019517489456710493, + "loss": 0.7182, + "step": 3331 + }, + { + "epoch": 1.17, + "grad_norm": 2.5432474613189697, + "learning_rate": 0.0001951376829570826, + "loss": 0.3178, + "step": 3332 + }, + { + "epoch": 1.17, + "grad_norm": 1.662652850151062, + "learning_rate": 0.00019510047134706026, + "loss": 0.268, + "step": 3333 + }, + { + "epoch": 1.17, + "grad_norm": 1.8840018510818481, + "learning_rate": 0.00019506325973703793, + "loss": 0.5437, + "step": 3334 + }, + { + "epoch": 1.17, + "grad_norm": 2.192028760910034, + "learning_rate": 0.0001950260481270156, + "loss": 0.2108, + "step": 3335 + }, + { + "epoch": 1.17, + "grad_norm": 1.9820222854614258, + "learning_rate": 0.00019498883651699329, + "loss": 0.2483, + "step": 3336 + }, + { + "epoch": 1.17, + "grad_norm": 3.4582228660583496, + "learning_rate": 0.00019495162490697096, + "loss": 0.6822, + "step": 3337 + }, + { + "epoch": 1.17, + "grad_norm": 1.7434332370758057, + "learning_rate": 0.00019491441329694864, + "loss": 0.2123, + "step": 3338 + }, + { + "epoch": 1.17, + "grad_norm": 2.4500560760498047, + "learning_rate": 0.0001948772016869263, + "loss": 0.482, + "step": 3339 + }, + { + "epoch": 1.17, + "grad_norm": 2.2425453662872314, + "learning_rate": 0.00019483999007690396, + "loss": 0.3816, + "step": 3340 + }, + { + "epoch": 1.17, + "grad_norm": 3.76282000541687, + "learning_rate": 0.00019480277846688164, + "loss": 0.3388, + "step": 3341 + }, + { + "epoch": 1.17, + "grad_norm": 1.8158988952636719, + "learning_rate": 0.00019476556685685934, + "loss": 0.2258, + "step": 3342 + }, + { + "epoch": 1.17, + "grad_norm": 1.5334137678146362, + "learning_rate": 0.000194728355246837, + "loss": 0.217, + "step": 3343 + }, + { + "epoch": 1.17, + "grad_norm": 3.0049142837524414, + "learning_rate": 0.00019469114363681466, + "loss": 0.3094, + "step": 3344 + }, + { + "epoch": 1.17, + "grad_norm": 0.7647264003753662, + "learning_rate": 0.00019465393202679237, + "loss": 0.0804, + "step": 3345 + }, + { + "epoch": 1.17, + "grad_norm": 2.254091739654541, + "learning_rate": 0.00019461672041677002, + "loss": 0.2055, + "step": 3346 + }, + { + "epoch": 1.17, + "grad_norm": 3.365161657333374, + "learning_rate": 0.0001945795088067477, + "loss": 0.6031, + "step": 3347 + }, + { + "epoch": 1.17, + "grad_norm": 2.869905471801758, + "learning_rate": 0.00019454229719672534, + "loss": 0.2699, + "step": 3348 + }, + { + "epoch": 1.17, + "grad_norm": 1.7060647010803223, + "learning_rate": 0.00019450508558670304, + "loss": 0.1983, + "step": 3349 + }, + { + "epoch": 1.17, + "grad_norm": 3.431352376937866, + "learning_rate": 0.00019446787397668072, + "loss": 0.2818, + "step": 3350 + }, + { + "epoch": 1.17, + "grad_norm": 3.3158788681030273, + "learning_rate": 0.00019443066236665837, + "loss": 0.6713, + "step": 3351 + }, + { + "epoch": 1.17, + "grad_norm": 2.5465288162231445, + "learning_rate": 0.00019439345075663607, + "loss": 0.1836, + "step": 3352 + }, + { + "epoch": 1.17, + "grad_norm": 7.218588352203369, + "learning_rate": 0.00019435623914661372, + "loss": 0.5528, + "step": 3353 + }, + { + "epoch": 1.18, + "grad_norm": 3.608271360397339, + "learning_rate": 0.0001943190275365914, + "loss": 0.2997, + "step": 3354 + }, + { + "epoch": 1.18, + "grad_norm": 2.334040403366089, + "learning_rate": 0.00019428181592656907, + "loss": 0.8903, + "step": 3355 + }, + { + "epoch": 1.18, + "grad_norm": 3.217416524887085, + "learning_rate": 0.00019424460431654675, + "loss": 0.8583, + "step": 3356 + }, + { + "epoch": 1.18, + "grad_norm": 2.663219928741455, + "learning_rate": 0.00019420739270652442, + "loss": 1.0071, + "step": 3357 + }, + { + "epoch": 1.18, + "grad_norm": 2.4265856742858887, + "learning_rate": 0.00019417018109650207, + "loss": 0.5831, + "step": 3358 + }, + { + "epoch": 1.18, + "grad_norm": 3.747819185256958, + "learning_rate": 0.00019413296948647977, + "loss": 0.9428, + "step": 3359 + }, + { + "epoch": 1.18, + "grad_norm": 1.7516096830368042, + "learning_rate": 0.00019409575787645745, + "loss": 0.3003, + "step": 3360 + }, + { + "epoch": 1.18, + "grad_norm": 2.066317558288574, + "learning_rate": 0.0001940585462664351, + "loss": 0.4669, + "step": 3361 + }, + { + "epoch": 1.18, + "grad_norm": 1.9735511541366577, + "learning_rate": 0.00019402133465641277, + "loss": 0.5892, + "step": 3362 + }, + { + "epoch": 1.18, + "grad_norm": 3.3901679515838623, + "learning_rate": 0.00019398412304639048, + "loss": 0.6881, + "step": 3363 + }, + { + "epoch": 1.18, + "grad_norm": 6.7699785232543945, + "learning_rate": 0.00019394691143636813, + "loss": 0.9592, + "step": 3364 + }, + { + "epoch": 1.18, + "grad_norm": 3.4974114894866943, + "learning_rate": 0.0001939096998263458, + "loss": 0.4567, + "step": 3365 + }, + { + "epoch": 1.18, + "grad_norm": 1.7345564365386963, + "learning_rate": 0.00019387248821632345, + "loss": 0.2935, + "step": 3366 + }, + { + "epoch": 1.18, + "grad_norm": 2.886187791824341, + "learning_rate": 0.00019383527660630115, + "loss": 0.3215, + "step": 3367 + }, + { + "epoch": 1.18, + "grad_norm": 2.7109858989715576, + "learning_rate": 0.00019379806499627883, + "loss": 0.6187, + "step": 3368 + }, + { + "epoch": 1.18, + "grad_norm": 3.9955689907073975, + "learning_rate": 0.00019376085338625648, + "loss": 0.9804, + "step": 3369 + }, + { + "epoch": 1.18, + "grad_norm": 2.5214648246765137, + "learning_rate": 0.00019372364177623418, + "loss": 0.509, + "step": 3370 + }, + { + "epoch": 1.18, + "grad_norm": 1.90560781955719, + "learning_rate": 0.00019368643016621183, + "loss": 0.1936, + "step": 3371 + }, + { + "epoch": 1.18, + "grad_norm": 5.796487331390381, + "learning_rate": 0.0001936492185561895, + "loss": 1.6628, + "step": 3372 + }, + { + "epoch": 1.18, + "grad_norm": 2.390986919403076, + "learning_rate": 0.0001936120069461672, + "loss": 0.1252, + "step": 3373 + }, + { + "epoch": 1.18, + "grad_norm": 1.6030224561691284, + "learning_rate": 0.00019357479533614486, + "loss": 0.1826, + "step": 3374 + }, + { + "epoch": 1.18, + "grad_norm": 1.6612952947616577, + "learning_rate": 0.00019353758372612253, + "loss": 0.3292, + "step": 3375 + }, + { + "epoch": 1.18, + "grad_norm": 4.129511833190918, + "learning_rate": 0.0001935003721161002, + "loss": 0.5278, + "step": 3376 + }, + { + "epoch": 1.18, + "grad_norm": 3.8274450302124023, + "learning_rate": 0.00019346316050607788, + "loss": 0.537, + "step": 3377 + }, + { + "epoch": 1.18, + "grad_norm": 1.8161903619766235, + "learning_rate": 0.00019342594889605556, + "loss": 0.2818, + "step": 3378 + }, + { + "epoch": 1.18, + "grad_norm": 3.630079746246338, + "learning_rate": 0.0001933887372860332, + "loss": 0.1704, + "step": 3379 + }, + { + "epoch": 1.18, + "grad_norm": 3.014974594116211, + "learning_rate": 0.0001933515256760109, + "loss": 1.1119, + "step": 3380 + }, + { + "epoch": 1.18, + "grad_norm": 6.352705478668213, + "learning_rate": 0.0001933143140659886, + "loss": 1.7637, + "step": 3381 + }, + { + "epoch": 1.19, + "grad_norm": 1.94754159450531, + "learning_rate": 0.00019327710245596624, + "loss": 0.5617, + "step": 3382 + }, + { + "epoch": 1.19, + "grad_norm": 1.7361313104629517, + "learning_rate": 0.0001932398908459439, + "loss": 0.4192, + "step": 3383 + }, + { + "epoch": 1.19, + "grad_norm": 1.9336076974868774, + "learning_rate": 0.0001932026792359216, + "loss": 0.5007, + "step": 3384 + }, + { + "epoch": 1.19, + "grad_norm": 1.9910989999771118, + "learning_rate": 0.00019316546762589926, + "loss": 0.6072, + "step": 3385 + }, + { + "epoch": 1.19, + "grad_norm": 1.9290117025375366, + "learning_rate": 0.00019312825601587694, + "loss": 0.3207, + "step": 3386 + }, + { + "epoch": 1.19, + "grad_norm": 2.000905990600586, + "learning_rate": 0.0001930910444058546, + "loss": 0.6482, + "step": 3387 + }, + { + "epoch": 1.19, + "grad_norm": 2.0836875438690186, + "learning_rate": 0.0001930538327958323, + "loss": 0.3071, + "step": 3388 + }, + { + "epoch": 1.19, + "grad_norm": 1.299101710319519, + "learning_rate": 0.00019301662118580997, + "loss": 0.2281, + "step": 3389 + }, + { + "epoch": 1.19, + "grad_norm": 2.1644299030303955, + "learning_rate": 0.00019297940957578762, + "loss": 0.4635, + "step": 3390 + }, + { + "epoch": 1.19, + "grad_norm": 1.7756919860839844, + "learning_rate": 0.00019294219796576532, + "loss": 0.3381, + "step": 3391 + }, + { + "epoch": 1.19, + "grad_norm": 1.1819500923156738, + "learning_rate": 0.00019290498635574297, + "loss": 0.0619, + "step": 3392 + }, + { + "epoch": 1.19, + "grad_norm": 2.2042393684387207, + "learning_rate": 0.00019286777474572064, + "loss": 0.3169, + "step": 3393 + }, + { + "epoch": 1.19, + "grad_norm": 1.324364423751831, + "learning_rate": 0.00019283056313569835, + "loss": 0.0623, + "step": 3394 + }, + { + "epoch": 1.19, + "grad_norm": 3.631133556365967, + "learning_rate": 0.000192793351525676, + "loss": 0.4106, + "step": 3395 + }, + { + "epoch": 1.19, + "grad_norm": 4.347462177276611, + "learning_rate": 0.00019275613991565367, + "loss": 0.4728, + "step": 3396 + }, + { + "epoch": 1.19, + "grad_norm": 3.753833770751953, + "learning_rate": 0.00019271892830563132, + "loss": 0.6239, + "step": 3397 + }, + { + "epoch": 1.19, + "grad_norm": 3.2457504272460938, + "learning_rate": 0.00019268171669560902, + "loss": 0.4568, + "step": 3398 + }, + { + "epoch": 1.19, + "grad_norm": 3.2867891788482666, + "learning_rate": 0.0001926445050855867, + "loss": 0.8455, + "step": 3399 + }, + { + "epoch": 1.19, + "grad_norm": 2.0727713108062744, + "learning_rate": 0.00019260729347556435, + "loss": 0.2855, + "step": 3400 + }, + { + "epoch": 1.19, + "eval_loss": 0.5019727349281311, + "eval_runtime": 51.094, + "eval_samples_per_second": 42.432, + "eval_steps_per_second": 10.608, + "eval_wer": 0.48131811105345096, + "step": 3400 + }, + { + "epoch": 1.19, + "grad_norm": 0.6999847292900085, + "learning_rate": 0.00019257008186554205, + "loss": 0.0542, + "step": 3401 + }, + { + "epoch": 1.19, + "grad_norm": 2.255366563796997, + "learning_rate": 0.0001925328702555197, + "loss": 0.2321, + "step": 3402 + }, + { + "epoch": 1.19, + "grad_norm": 3.2004432678222656, + "learning_rate": 0.00019249565864549737, + "loss": 0.5873, + "step": 3403 + }, + { + "epoch": 1.19, + "grad_norm": 7.720293998718262, + "learning_rate": 0.00019245844703547505, + "loss": 1.3688, + "step": 3404 + }, + { + "epoch": 1.19, + "grad_norm": 2.9082274436950684, + "learning_rate": 0.00019242123542545273, + "loss": 0.8094, + "step": 3405 + }, + { + "epoch": 1.19, + "grad_norm": 24.3272762298584, + "learning_rate": 0.0001923840238154304, + "loss": 5.8724, + "step": 3406 + }, + { + "epoch": 1.19, + "grad_norm": 1.7688156366348267, + "learning_rate": 0.00019234681220540808, + "loss": 0.2914, + "step": 3407 + }, + { + "epoch": 1.19, + "grad_norm": 2.372715950012207, + "learning_rate": 0.00019230960059538573, + "loss": 0.2986, + "step": 3408 + }, + { + "epoch": 1.19, + "grad_norm": 2.354346990585327, + "learning_rate": 0.00019227238898536343, + "loss": 0.4223, + "step": 3409 + }, + { + "epoch": 1.19, + "grad_norm": 1.976027250289917, + "learning_rate": 0.00019223517737534108, + "loss": 0.2763, + "step": 3410 + }, + { + "epoch": 1.2, + "grad_norm": 1.1134535074234009, + "learning_rate": 0.00019219796576531875, + "loss": 0.1037, + "step": 3411 + }, + { + "epoch": 1.2, + "grad_norm": 2.9392096996307373, + "learning_rate": 0.00019216075415529646, + "loss": 0.4426, + "step": 3412 + }, + { + "epoch": 1.2, + "grad_norm": 1.128270149230957, + "learning_rate": 0.0001921235425452741, + "loss": 0.1267, + "step": 3413 + }, + { + "epoch": 1.2, + "grad_norm": 0.8088712692260742, + "learning_rate": 0.00019208633093525178, + "loss": 0.0562, + "step": 3414 + }, + { + "epoch": 1.2, + "grad_norm": NaN, + "learning_rate": 0.00019208633093525178, + "loss": 0.4185, + "step": 3415 + }, + { + "epoch": 1.2, + "grad_norm": 4.43418550491333, + "learning_rate": 0.00019204911932522943, + "loss": 0.8585, + "step": 3416 + }, + { + "epoch": 1.2, + "grad_norm": 2.2035839557647705, + "learning_rate": 0.00019201190771520713, + "loss": 0.2187, + "step": 3417 + }, + { + "epoch": 1.2, + "grad_norm": 2.9955644607543945, + "learning_rate": 0.0001919746961051848, + "loss": 0.2888, + "step": 3418 + }, + { + "epoch": 1.2, + "grad_norm": 3.610004186630249, + "learning_rate": 0.00019193748449516246, + "loss": 0.8794, + "step": 3419 + }, + { + "epoch": 1.2, + "grad_norm": 5.0635809898376465, + "learning_rate": 0.00019190027288514016, + "loss": 0.5365, + "step": 3420 + }, + { + "epoch": 1.2, + "grad_norm": 2.6513617038726807, + "learning_rate": 0.00019186306127511784, + "loss": 0.2151, + "step": 3421 + }, + { + "epoch": 1.2, + "grad_norm": 3.7692182064056396, + "learning_rate": 0.00019182584966509548, + "loss": 0.5967, + "step": 3422 + }, + { + "epoch": 1.2, + "grad_norm": 4.396376609802246, + "learning_rate": 0.0001917886380550732, + "loss": 0.6936, + "step": 3423 + }, + { + "epoch": 1.2, + "grad_norm": 5.046158313751221, + "learning_rate": 0.00019175142644505084, + "loss": 0.7558, + "step": 3424 + }, + { + "epoch": 1.2, + "grad_norm": 2.674578905105591, + "learning_rate": 0.0001917142148350285, + "loss": 0.2655, + "step": 3425 + }, + { + "epoch": 1.2, + "grad_norm": 3.748619556427002, + "learning_rate": 0.0001916770032250062, + "loss": 0.7096, + "step": 3426 + }, + { + "epoch": 1.2, + "grad_norm": 4.511143684387207, + "learning_rate": 0.00019163979161498386, + "loss": 0.3599, + "step": 3427 + }, + { + "epoch": 1.2, + "grad_norm": 7.0941481590271, + "learning_rate": 0.00019160258000496154, + "loss": 0.2173, + "step": 3428 + }, + { + "epoch": 1.2, + "grad_norm": NaN, + "learning_rate": 0.00019160258000496154, + "loss": 0.0813, + "step": 3429 + }, + { + "epoch": 1.2, + "grad_norm": 2.511932134628296, + "learning_rate": 0.0001915653683949392, + "loss": 0.896, + "step": 3430 + }, + { + "epoch": 1.2, + "grad_norm": 1.8741555213928223, + "learning_rate": 0.00019152815678491686, + "loss": 0.4229, + "step": 3431 + }, + { + "epoch": 1.2, + "grad_norm": 1.6796114444732666, + "learning_rate": 0.00019149094517489457, + "loss": 0.4073, + "step": 3432 + }, + { + "epoch": 1.2, + "grad_norm": 2.2420263290405273, + "learning_rate": 0.00019145373356487222, + "loss": 0.2323, + "step": 3433 + }, + { + "epoch": 1.2, + "grad_norm": 1.184206485748291, + "learning_rate": 0.0001914165219548499, + "loss": 0.1977, + "step": 3434 + }, + { + "epoch": 1.2, + "grad_norm": 1.924786925315857, + "learning_rate": 0.0001913793103448276, + "loss": 0.3857, + "step": 3435 + }, + { + "epoch": 1.2, + "grad_norm": 2.2253284454345703, + "learning_rate": 0.00019134209873480524, + "loss": 0.3105, + "step": 3436 + }, + { + "epoch": 1.2, + "grad_norm": 2.1328001022338867, + "learning_rate": 0.00019130488712478292, + "loss": 0.3734, + "step": 3437 + }, + { + "epoch": 1.2, + "grad_norm": 2.7778384685516357, + "learning_rate": 0.00019126767551476057, + "loss": 0.2954, + "step": 3438 + }, + { + "epoch": 1.2, + "grad_norm": 2.7506439685821533, + "learning_rate": 0.00019123046390473827, + "loss": 0.2154, + "step": 3439 + }, + { + "epoch": 1.21, + "grad_norm": 1.9881768226623535, + "learning_rate": 0.00019119325229471595, + "loss": 0.2684, + "step": 3440 + }, + { + "epoch": 1.21, + "grad_norm": 2.7313220500946045, + "learning_rate": 0.0001911560406846936, + "loss": 0.2496, + "step": 3441 + }, + { + "epoch": 1.21, + "grad_norm": 2.945786237716675, + "learning_rate": 0.0001911188290746713, + "loss": 0.7178, + "step": 3442 + }, + { + "epoch": 1.21, + "grad_norm": 1.2393323183059692, + "learning_rate": 0.00019108161746464895, + "loss": 0.1755, + "step": 3443 + }, + { + "epoch": 1.21, + "grad_norm": 2.6016125679016113, + "learning_rate": 0.00019104440585462662, + "loss": 0.4143, + "step": 3444 + }, + { + "epoch": 1.21, + "grad_norm": 4.074510097503662, + "learning_rate": 0.00019100719424460432, + "loss": 0.4698, + "step": 3445 + }, + { + "epoch": 1.21, + "grad_norm": 1.408915400505066, + "learning_rate": 0.00019096998263458197, + "loss": 0.1266, + "step": 3446 + }, + { + "epoch": 1.21, + "grad_norm": 2.3694286346435547, + "learning_rate": 0.00019093277102455965, + "loss": 0.3139, + "step": 3447 + }, + { + "epoch": 1.21, + "grad_norm": 2.525958776473999, + "learning_rate": 0.0001908955594145373, + "loss": 0.4195, + "step": 3448 + }, + { + "epoch": 1.21, + "grad_norm": 2.69555401802063, + "learning_rate": 0.000190858347804515, + "loss": 0.3823, + "step": 3449 + }, + { + "epoch": 1.21, + "grad_norm": 1.620243787765503, + "learning_rate": 0.00019082113619449268, + "loss": 0.1779, + "step": 3450 + }, + { + "epoch": 1.21, + "grad_norm": 4.35145902633667, + "learning_rate": 0.00019078392458447033, + "loss": 1.6517, + "step": 3451 + }, + { + "epoch": 1.21, + "grad_norm": 3.985114336013794, + "learning_rate": 0.000190746712974448, + "loss": 1.7988, + "step": 3452 + }, + { + "epoch": 1.21, + "grad_norm": 4.146234512329102, + "learning_rate": 0.0001907095013644257, + "loss": 0.2545, + "step": 3453 + }, + { + "epoch": 1.21, + "grad_norm": 7.112207412719727, + "learning_rate": 0.00019067228975440335, + "loss": 0.6047, + "step": 3454 + }, + { + "epoch": 1.21, + "grad_norm": 2.409411668777466, + "learning_rate": 0.00019063507814438103, + "loss": 0.9475, + "step": 3455 + }, + { + "epoch": 1.21, + "grad_norm": 3.0187294483184814, + "learning_rate": 0.00019059786653435868, + "loss": 0.7817, + "step": 3456 + }, + { + "epoch": 1.21, + "grad_norm": 1.8108159303665161, + "learning_rate": 0.00019056065492433638, + "loss": 0.5218, + "step": 3457 + }, + { + "epoch": 1.21, + "grad_norm": 1.8068140745162964, + "learning_rate": 0.00019052344331431406, + "loss": 0.4357, + "step": 3458 + }, + { + "epoch": 1.21, + "grad_norm": 2.0581214427948, + "learning_rate": 0.0001904862317042917, + "loss": 0.4965, + "step": 3459 + }, + { + "epoch": 1.21, + "grad_norm": 2.0165770053863525, + "learning_rate": 0.0001904490200942694, + "loss": 0.3724, + "step": 3460 + }, + { + "epoch": 1.21, + "grad_norm": 1.243851900100708, + "learning_rate": 0.00019041180848424706, + "loss": 0.2379, + "step": 3461 + }, + { + "epoch": 1.21, + "grad_norm": 2.6917412281036377, + "learning_rate": 0.00019037459687422473, + "loss": 0.7907, + "step": 3462 + }, + { + "epoch": 1.21, + "grad_norm": 1.7246170043945312, + "learning_rate": 0.00019033738526420243, + "loss": 0.4098, + "step": 3463 + }, + { + "epoch": 1.21, + "grad_norm": 1.3708009719848633, + "learning_rate": 0.00019030017365418008, + "loss": 0.182, + "step": 3464 + }, + { + "epoch": 1.21, + "grad_norm": 1.1372557878494263, + "learning_rate": 0.00019026296204415776, + "loss": 0.2405, + "step": 3465 + }, + { + "epoch": 1.21, + "grad_norm": 2.3582069873809814, + "learning_rate": 0.00019022575043413546, + "loss": 0.3401, + "step": 3466 + }, + { + "epoch": 1.21, + "grad_norm": 5.7276835441589355, + "learning_rate": 0.0001901885388241131, + "loss": 0.4506, + "step": 3467 + }, + { + "epoch": 1.22, + "grad_norm": 1.6258736848831177, + "learning_rate": 0.0001901513272140908, + "loss": 0.2368, + "step": 3468 + }, + { + "epoch": 1.22, + "grad_norm": 3.5020952224731445, + "learning_rate": 0.00019011411560406844, + "loss": 0.7847, + "step": 3469 + }, + { + "epoch": 1.22, + "grad_norm": 4.4619526863098145, + "learning_rate": 0.00019007690399404614, + "loss": 0.3962, + "step": 3470 + }, + { + "epoch": 1.22, + "grad_norm": 1.9173682928085327, + "learning_rate": 0.00019003969238402381, + "loss": 0.194, + "step": 3471 + }, + { + "epoch": 1.22, + "grad_norm": 2.3916375637054443, + "learning_rate": 0.00019000248077400146, + "loss": 0.3269, + "step": 3472 + }, + { + "epoch": 1.22, + "grad_norm": 1.9122830629348755, + "learning_rate": 0.00018996526916397914, + "loss": 0.0976, + "step": 3473 + }, + { + "epoch": 1.22, + "grad_norm": 1.4700841903686523, + "learning_rate": 0.00018992805755395681, + "loss": 0.0848, + "step": 3474 + }, + { + "epoch": 1.22, + "grad_norm": 5.40512228012085, + "learning_rate": 0.0001898908459439345, + "loss": 0.3826, + "step": 3475 + }, + { + "epoch": 1.22, + "grad_norm": 9.25135612487793, + "learning_rate": 0.00018985363433391217, + "loss": 2.3233, + "step": 3476 + }, + { + "epoch": 1.22, + "grad_norm": 4.043542861938477, + "learning_rate": 0.00018981642272388981, + "loss": 0.726, + "step": 3477 + }, + { + "epoch": 1.22, + "grad_norm": 0.8716025948524475, + "learning_rate": 0.00018977921111386752, + "loss": 0.0542, + "step": 3478 + }, + { + "epoch": 1.22, + "grad_norm": 1.9265203475952148, + "learning_rate": 0.0001897419995038452, + "loss": 0.2385, + "step": 3479 + }, + { + "epoch": 1.22, + "grad_norm": 3.297703981399536, + "learning_rate": 0.00018970478789382284, + "loss": 0.9332, + "step": 3480 + }, + { + "epoch": 1.22, + "grad_norm": 4.390806674957275, + "learning_rate": 0.00018966757628380054, + "loss": 0.9762, + "step": 3481 + }, + { + "epoch": 1.22, + "grad_norm": 2.7843103408813477, + "learning_rate": 0.0001896303646737782, + "loss": 0.3961, + "step": 3482 + }, + { + "epoch": 1.22, + "grad_norm": 2.2936036586761475, + "learning_rate": 0.00018959315306375587, + "loss": 0.515, + "step": 3483 + }, + { + "epoch": 1.22, + "grad_norm": 1.33071768283844, + "learning_rate": 0.00018955594145373357, + "loss": 0.1201, + "step": 3484 + }, + { + "epoch": 1.22, + "grad_norm": 1.876550555229187, + "learning_rate": 0.00018951872984371122, + "loss": 0.391, + "step": 3485 + }, + { + "epoch": 1.22, + "grad_norm": 1.878441333770752, + "learning_rate": 0.0001894815182336889, + "loss": 0.2876, + "step": 3486 + }, + { + "epoch": 1.22, + "grad_norm": 2.9820950031280518, + "learning_rate": 0.00018944430662366655, + "loss": 0.5285, + "step": 3487 + }, + { + "epoch": 1.22, + "grad_norm": 2.3062541484832764, + "learning_rate": 0.00018940709501364425, + "loss": 0.3399, + "step": 3488 + }, + { + "epoch": 1.22, + "grad_norm": 2.2896065711975098, + "learning_rate": 0.00018936988340362192, + "loss": 0.3287, + "step": 3489 + }, + { + "epoch": 1.22, + "grad_norm": 2.6173689365386963, + "learning_rate": 0.00018933267179359957, + "loss": 0.7003, + "step": 3490 + }, + { + "epoch": 1.22, + "grad_norm": 1.3826167583465576, + "learning_rate": 0.00018929546018357728, + "loss": 0.1447, + "step": 3491 + }, + { + "epoch": 1.22, + "grad_norm": 1.945378303527832, + "learning_rate": 0.00018925824857355495, + "loss": 0.1798, + "step": 3492 + }, + { + "epoch": 1.22, + "grad_norm": 3.251554489135742, + "learning_rate": 0.0001892210369635326, + "loss": 0.4669, + "step": 3493 + }, + { + "epoch": 1.22, + "grad_norm": 3.7647817134857178, + "learning_rate": 0.00018918382535351028, + "loss": 0.3134, + "step": 3494 + }, + { + "epoch": 1.22, + "grad_norm": 1.6382604837417603, + "learning_rate": 0.00018914661374348795, + "loss": 0.168, + "step": 3495 + }, + { + "epoch": 1.22, + "grad_norm": 4.437539100646973, + "learning_rate": 0.00018910940213346563, + "loss": 0.4604, + "step": 3496 + }, + { + "epoch": 1.23, + "grad_norm": 19.861299514770508, + "learning_rate": 0.0001890721905234433, + "loss": 1.9396, + "step": 3497 + }, + { + "epoch": 1.23, + "grad_norm": 1.6166614294052124, + "learning_rate": 0.00018903497891342095, + "loss": 0.0805, + "step": 3498 + }, + { + "epoch": 1.23, + "grad_norm": 1.6526031494140625, + "learning_rate": 0.00018899776730339865, + "loss": 0.2469, + "step": 3499 + }, + { + "epoch": 1.23, + "grad_norm": 3.7113420963287354, + "learning_rate": 0.0001889605556933763, + "loss": 0.3856, + "step": 3500 + }, + { + "epoch": 1.23, + "eval_loss": 0.48135173320770264, + "eval_runtime": 51.3793, + "eval_samples_per_second": 42.196, + "eval_steps_per_second": 10.549, + "eval_wer": 0.4328835841549905, + "step": 3500 + }, + { + "epoch": 1.23, + "grad_norm": 6.976255893707275, + "learning_rate": 0.00018892334408335398, + "loss": 2.395, + "step": 3501 + }, + { + "epoch": 1.23, + "grad_norm": 3.2706868648529053, + "learning_rate": 0.00018888613247333168, + "loss": 0.5762, + "step": 3502 + }, + { + "epoch": 1.23, + "grad_norm": 2.8764538764953613, + "learning_rate": 0.00018884892086330933, + "loss": 0.2334, + "step": 3503 + }, + { + "epoch": 1.23, + "grad_norm": 4.566110610961914, + "learning_rate": 0.000188811709253287, + "loss": 1.624, + "step": 3504 + }, + { + "epoch": 1.23, + "grad_norm": 3.793048620223999, + "learning_rate": 0.00018877449764326466, + "loss": 1.0498, + "step": 3505 + }, + { + "epoch": 1.23, + "grad_norm": 2.507688283920288, + "learning_rate": 0.00018873728603324236, + "loss": 0.4346, + "step": 3506 + }, + { + "epoch": 1.23, + "grad_norm": 2.692643404006958, + "learning_rate": 0.00018870007442322003, + "loss": 0.4612, + "step": 3507 + }, + { + "epoch": 1.23, + "grad_norm": 1.4150710105895996, + "learning_rate": 0.00018866286281319768, + "loss": 0.2225, + "step": 3508 + }, + { + "epoch": 1.23, + "grad_norm": 2.229931116104126, + "learning_rate": 0.00018862565120317539, + "loss": 0.386, + "step": 3509 + }, + { + "epoch": 1.23, + "grad_norm": 1.482277750968933, + "learning_rate": 0.00018858843959315306, + "loss": 0.4351, + "step": 3510 + }, + { + "epoch": 1.23, + "grad_norm": 2.131032943725586, + "learning_rate": 0.0001885512279831307, + "loss": 0.2765, + "step": 3511 + }, + { + "epoch": 1.23, + "grad_norm": 1.5943416357040405, + "learning_rate": 0.0001885140163731084, + "loss": 0.3301, + "step": 3512 + }, + { + "epoch": 1.23, + "grad_norm": 1.9223726987838745, + "learning_rate": 0.00018847680476308606, + "loss": 0.3836, + "step": 3513 + }, + { + "epoch": 1.23, + "grad_norm": 1.1175265312194824, + "learning_rate": 0.00018843959315306374, + "loss": 0.208, + "step": 3514 + }, + { + "epoch": 1.23, + "grad_norm": 2.3698058128356934, + "learning_rate": 0.0001884023815430414, + "loss": 0.422, + "step": 3515 + }, + { + "epoch": 1.23, + "grad_norm": 3.7978971004486084, + "learning_rate": 0.0001883651699330191, + "loss": 1.4153, + "step": 3516 + }, + { + "epoch": 1.23, + "grad_norm": 2.1298134326934814, + "learning_rate": 0.00018832795832299676, + "loss": 0.5128, + "step": 3517 + }, + { + "epoch": 1.23, + "grad_norm": 1.7395635843276978, + "learning_rate": 0.00018829074671297441, + "loss": 0.1592, + "step": 3518 + }, + { + "epoch": 1.23, + "grad_norm": 1.4399739503860474, + "learning_rate": 0.0001882535351029521, + "loss": 0.0962, + "step": 3519 + }, + { + "epoch": 1.23, + "grad_norm": 2.0876457691192627, + "learning_rate": 0.0001882163234929298, + "loss": 0.5794, + "step": 3520 + }, + { + "epoch": 1.23, + "grad_norm": 2.5795669555664062, + "learning_rate": 0.00018817911188290744, + "loss": 0.45, + "step": 3521 + }, + { + "epoch": 1.23, + "grad_norm": 0.8200573921203613, + "learning_rate": 0.00018814190027288512, + "loss": 0.0697, + "step": 3522 + }, + { + "epoch": 1.23, + "grad_norm": 1.8458431959152222, + "learning_rate": 0.00018810468866286282, + "loss": 0.1935, + "step": 3523 + }, + { + "epoch": 1.23, + "grad_norm": 1.1191134452819824, + "learning_rate": 0.00018806747705284047, + "loss": 0.0881, + "step": 3524 + }, + { + "epoch": 1.24, + "grad_norm": 4.453530311584473, + "learning_rate": 0.00018803026544281814, + "loss": 0.5468, + "step": 3525 + }, + { + "epoch": 1.24, + "grad_norm": 1.0060765743255615, + "learning_rate": 0.0001879930538327958, + "loss": 0.122, + "step": 3526 + }, + { + "epoch": 1.24, + "grad_norm": 2.2647805213928223, + "learning_rate": 0.0001879558422227735, + "loss": 0.1266, + "step": 3527 + }, + { + "epoch": 1.24, + "grad_norm": 3.9961869716644287, + "learning_rate": 0.00018791863061275117, + "loss": 0.118, + "step": 3528 + }, + { + "epoch": 1.24, + "grad_norm": 0.9355319142341614, + "learning_rate": 0.00018788141900272882, + "loss": 0.0574, + "step": 3529 + }, + { + "epoch": 1.24, + "grad_norm": 3.0905239582061768, + "learning_rate": 0.00018784420739270652, + "loss": 1.0581, + "step": 3530 + }, + { + "epoch": 1.24, + "grad_norm": 2.1791467666625977, + "learning_rate": 0.00018780699578268417, + "loss": 0.6661, + "step": 3531 + }, + { + "epoch": 1.24, + "grad_norm": 2.5521254539489746, + "learning_rate": 0.00018776978417266185, + "loss": 0.4533, + "step": 3532 + }, + { + "epoch": 1.24, + "grad_norm": 2.4842238426208496, + "learning_rate": 0.00018773257256263955, + "loss": 0.6154, + "step": 3533 + }, + { + "epoch": 1.24, + "grad_norm": 1.9282567501068115, + "learning_rate": 0.0001876953609526172, + "loss": 0.3092, + "step": 3534 + }, + { + "epoch": 1.24, + "grad_norm": 2.4563050270080566, + "learning_rate": 0.00018765814934259488, + "loss": 0.3415, + "step": 3535 + }, + { + "epoch": 1.24, + "grad_norm": 1.9763050079345703, + "learning_rate": 0.00018762093773257255, + "loss": 0.4392, + "step": 3536 + }, + { + "epoch": 1.24, + "grad_norm": 3.0104451179504395, + "learning_rate": 0.00018758372612255023, + "loss": 0.3869, + "step": 3537 + }, + { + "epoch": 1.24, + "grad_norm": 2.270270824432373, + "learning_rate": 0.0001875465145125279, + "loss": 0.2269, + "step": 3538 + }, + { + "epoch": 1.24, + "grad_norm": 4.0361008644104, + "learning_rate": 0.00018750930290250555, + "loss": 0.2886, + "step": 3539 + }, + { + "epoch": 1.24, + "grad_norm": 2.7986207008361816, + "learning_rate": 0.00018747209129248323, + "loss": 0.4975, + "step": 3540 + }, + { + "epoch": 1.24, + "grad_norm": 2.5085690021514893, + "learning_rate": 0.00018743487968246093, + "loss": 0.3265, + "step": 3541 + }, + { + "epoch": 1.24, + "grad_norm": 2.8387558460235596, + "learning_rate": 0.00018739766807243858, + "loss": 0.2783, + "step": 3542 + }, + { + "epoch": 1.24, + "grad_norm": 3.8573365211486816, + "learning_rate": 0.00018736045646241625, + "loss": 0.6503, + "step": 3543 + }, + { + "epoch": 1.24, + "grad_norm": 4.418932914733887, + "learning_rate": 0.00018732324485239393, + "loss": 0.5577, + "step": 3544 + }, + { + "epoch": 1.24, + "grad_norm": 1.9382532835006714, + "learning_rate": 0.0001872860332423716, + "loss": 0.4881, + "step": 3545 + }, + { + "epoch": 1.24, + "grad_norm": 2.3090128898620605, + "learning_rate": 0.00018724882163234928, + "loss": 0.4069, + "step": 3546 + }, + { + "epoch": 1.24, + "grad_norm": 2.0739097595214844, + "learning_rate": 0.00018721161002232693, + "loss": 0.3043, + "step": 3547 + }, + { + "epoch": 1.24, + "grad_norm": 2.7553725242614746, + "learning_rate": 0.00018717439841230463, + "loss": 0.299, + "step": 3548 + }, + { + "epoch": 1.24, + "grad_norm": 4.5599212646484375, + "learning_rate": 0.00018713718680228228, + "loss": 1.5577, + "step": 3549 + }, + { + "epoch": 1.24, + "grad_norm": 4.312692642211914, + "learning_rate": 0.00018709997519225996, + "loss": 0.4333, + "step": 3550 + }, + { + "epoch": 1.24, + "grad_norm": 2.080211877822876, + "learning_rate": 0.00018706276358223766, + "loss": 0.1648, + "step": 3551 + }, + { + "epoch": 1.24, + "grad_norm": 4.01967191696167, + "learning_rate": 0.0001870255519722153, + "loss": 0.4049, + "step": 3552 + }, + { + "epoch": 1.24, + "grad_norm": 2.141727924346924, + "learning_rate": 0.00018698834036219299, + "loss": 0.265, + "step": 3553 + }, + { + "epoch": 1.25, + "grad_norm": 1.7477151155471802, + "learning_rate": 0.0001869511287521707, + "loss": 0.2846, + "step": 3554 + }, + { + "epoch": 1.25, + "grad_norm": 2.3710789680480957, + "learning_rate": 0.00018691391714214834, + "loss": 0.4719, + "step": 3555 + }, + { + "epoch": 1.25, + "grad_norm": 1.525882601737976, + "learning_rate": 0.000186876705532126, + "loss": 0.4626, + "step": 3556 + }, + { + "epoch": 1.25, + "grad_norm": 1.5245758295059204, + "learning_rate": 0.00018683949392210366, + "loss": 0.367, + "step": 3557 + }, + { + "epoch": 1.25, + "grad_norm": 3.189377784729004, + "learning_rate": 0.00018680228231208136, + "loss": 0.8119, + "step": 3558 + }, + { + "epoch": 1.25, + "grad_norm": 1.608099102973938, + "learning_rate": 0.00018676507070205904, + "loss": 0.3054, + "step": 3559 + }, + { + "epoch": 1.25, + "grad_norm": 1.2353789806365967, + "learning_rate": 0.0001867278590920367, + "loss": 0.1744, + "step": 3560 + }, + { + "epoch": 1.25, + "grad_norm": 1.7148106098175049, + "learning_rate": 0.00018669064748201436, + "loss": 0.4698, + "step": 3561 + }, + { + "epoch": 1.25, + "grad_norm": 2.018219470977783, + "learning_rate": 0.00018665343587199204, + "loss": 0.2835, + "step": 3562 + }, + { + "epoch": 1.25, + "grad_norm": 1.603143572807312, + "learning_rate": 0.00018661622426196972, + "loss": 0.2637, + "step": 3563 + }, + { + "epoch": 1.25, + "grad_norm": 1.5139524936676025, + "learning_rate": 0.0001865790126519474, + "loss": 0.3385, + "step": 3564 + }, + { + "epoch": 1.25, + "grad_norm": 2.398815393447876, + "learning_rate": 0.00018654180104192507, + "loss": 0.195, + "step": 3565 + }, + { + "epoch": 1.25, + "grad_norm": 2.6348488330841064, + "learning_rate": 0.00018650458943190274, + "loss": 0.8485, + "step": 3566 + }, + { + "epoch": 1.25, + "grad_norm": 3.205677032470703, + "learning_rate": 0.00018646737782188042, + "loss": 0.6398, + "step": 3567 + }, + { + "epoch": 1.25, + "grad_norm": 1.6468408107757568, + "learning_rate": 0.00018643016621185807, + "loss": 0.2491, + "step": 3568 + }, + { + "epoch": 1.25, + "grad_norm": 1.7327412366867065, + "learning_rate": 0.00018639295460183577, + "loss": 0.2545, + "step": 3569 + }, + { + "epoch": 1.25, + "grad_norm": 2.0780012607574463, + "learning_rate": 0.00018635574299181342, + "loss": 0.3401, + "step": 3570 + }, + { + "epoch": 1.25, + "grad_norm": 1.9040396213531494, + "learning_rate": 0.0001863185313817911, + "loss": 0.2863, + "step": 3571 + }, + { + "epoch": 1.25, + "grad_norm": 1.8664028644561768, + "learning_rate": 0.0001862813197717688, + "loss": 0.1769, + "step": 3572 + }, + { + "epoch": 1.25, + "grad_norm": 2.1530826091766357, + "learning_rate": 0.00018624410816174645, + "loss": 0.2482, + "step": 3573 + }, + { + "epoch": 1.25, + "grad_norm": 4.735560417175293, + "learning_rate": 0.00018620689655172412, + "loss": 1.8875, + "step": 3574 + }, + { + "epoch": 1.25, + "grad_norm": 0.7484613656997681, + "learning_rate": 0.00018616968494170177, + "loss": 0.0572, + "step": 3575 + }, + { + "epoch": 1.25, + "grad_norm": 2.2754640579223633, + "learning_rate": 0.00018613247333167947, + "loss": 0.12, + "step": 3576 + }, + { + "epoch": 1.25, + "grad_norm": 2.0318729877471924, + "learning_rate": 0.00018609526172165715, + "loss": 0.0707, + "step": 3577 + }, + { + "epoch": 1.25, + "grad_norm": 4.152217388153076, + "learning_rate": 0.0001860580501116348, + "loss": 1.4776, + "step": 3578 + }, + { + "epoch": 1.25, + "grad_norm": 6.380149841308594, + "learning_rate": 0.0001860208385016125, + "loss": 0.3457, + "step": 3579 + }, + { + "epoch": 1.25, + "grad_norm": 2.7456815242767334, + "learning_rate": 0.00018598362689159018, + "loss": 0.8919, + "step": 3580 + }, + { + "epoch": 1.25, + "grad_norm": 1.7026749849319458, + "learning_rate": 0.00018594641528156783, + "loss": 0.424, + "step": 3581 + }, + { + "epoch": 1.26, + "grad_norm": 1.725861668586731, + "learning_rate": 0.0001859092036715455, + "loss": 0.3761, + "step": 3582 + }, + { + "epoch": 1.26, + "grad_norm": 1.5452537536621094, + "learning_rate": 0.00018587199206152318, + "loss": 0.3575, + "step": 3583 + }, + { + "epoch": 1.26, + "grad_norm": 2.4589481353759766, + "learning_rate": 0.00018583478045150085, + "loss": 0.4449, + "step": 3584 + }, + { + "epoch": 1.26, + "grad_norm": 1.2494957447052002, + "learning_rate": 0.00018579756884147853, + "loss": 0.2364, + "step": 3585 + }, + { + "epoch": 1.26, + "grad_norm": 1.3550739288330078, + "learning_rate": 0.0001857603572314562, + "loss": 0.2196, + "step": 3586 + }, + { + "epoch": 1.26, + "grad_norm": 2.515165090560913, + "learning_rate": 0.00018572314562143388, + "loss": 0.3064, + "step": 3587 + }, + { + "epoch": 1.26, + "grad_norm": 1.4528539180755615, + "learning_rate": 0.00018568593401141153, + "loss": 0.2979, + "step": 3588 + }, + { + "epoch": 1.26, + "grad_norm": 2.131699800491333, + "learning_rate": 0.0001856487224013892, + "loss": 0.3577, + "step": 3589 + }, + { + "epoch": 1.26, + "grad_norm": 2.56373929977417, + "learning_rate": 0.0001856115107913669, + "loss": 0.6694, + "step": 3590 + }, + { + "epoch": 1.26, + "grad_norm": 3.1758902072906494, + "learning_rate": 0.00018557429918134456, + "loss": 0.7342, + "step": 3591 + }, + { + "epoch": 1.26, + "grad_norm": 1.6790735721588135, + "learning_rate": 0.00018553708757132223, + "loss": 0.2676, + "step": 3592 + }, + { + "epoch": 1.26, + "grad_norm": 3.171738624572754, + "learning_rate": 0.00018549987596129988, + "loss": 0.6191, + "step": 3593 + }, + { + "epoch": 1.26, + "grad_norm": 3.72245454788208, + "learning_rate": 0.00018546266435127758, + "loss": 0.9003, + "step": 3594 + }, + { + "epoch": 1.26, + "grad_norm": 3.158806085586548, + "learning_rate": 0.00018542545274125526, + "loss": 0.5151, + "step": 3595 + }, + { + "epoch": 1.26, + "grad_norm": 2.433006763458252, + "learning_rate": 0.0001853882411312329, + "loss": 0.3028, + "step": 3596 + }, + { + "epoch": 1.26, + "grad_norm": 2.634091377258301, + "learning_rate": 0.0001853510295212106, + "loss": 0.3578, + "step": 3597 + }, + { + "epoch": 1.26, + "grad_norm": 1.5862232446670532, + "learning_rate": 0.0001853138179111883, + "loss": 0.2275, + "step": 3598 + }, + { + "epoch": 1.26, + "grad_norm": 3.65346097946167, + "learning_rate": 0.00018527660630116594, + "loss": 0.4811, + "step": 3599 + }, + { + "epoch": 1.26, + "grad_norm": 2.1591145992279053, + "learning_rate": 0.00018523939469114364, + "loss": 0.3709, + "step": 3600 + }, + { + "epoch": 1.26, + "eval_loss": 0.5617594122886658, + "eval_runtime": 51.0249, + "eval_samples_per_second": 42.489, + "eval_steps_per_second": 10.622, + "eval_wer": 0.5672029060716139, + "step": 3600 + }, + { + "epoch": 1.26, + "grad_norm": 4.291797161102295, + "learning_rate": 0.0001852021830811213, + "loss": 0.7502, + "step": 3601 + }, + { + "epoch": 1.26, + "grad_norm": 2.1709883213043213, + "learning_rate": 0.00018516497147109896, + "loss": 0.6292, + "step": 3602 + }, + { + "epoch": 1.26, + "grad_norm": 2.368156909942627, + "learning_rate": 0.00018512775986107664, + "loss": 0.3489, + "step": 3603 + }, + { + "epoch": 1.26, + "grad_norm": 4.229372024536133, + "learning_rate": 0.00018509054825105432, + "loss": 1.2459, + "step": 3604 + }, + { + "epoch": 1.26, + "grad_norm": 2.126002788543701, + "learning_rate": 0.000185053336641032, + "loss": 0.7528, + "step": 3605 + }, + { + "epoch": 1.26, + "grad_norm": 2.2286715507507324, + "learning_rate": 0.00018501612503100964, + "loss": 0.6335, + "step": 3606 + }, + { + "epoch": 1.26, + "grad_norm": 2.8708016872406006, + "learning_rate": 0.00018497891342098734, + "loss": 0.6489, + "step": 3607 + }, + { + "epoch": 1.26, + "grad_norm": 1.7687532901763916, + "learning_rate": 0.00018494170181096502, + "loss": 0.6581, + "step": 3608 + }, + { + "epoch": 1.26, + "grad_norm": 1.8210864067077637, + "learning_rate": 0.00018490449020094267, + "loss": 0.4258, + "step": 3609 + }, + { + "epoch": 1.26, + "grad_norm": 1.6010420322418213, + "learning_rate": 0.00018486727859092034, + "loss": 0.2422, + "step": 3610 + }, + { + "epoch": 1.27, + "grad_norm": 1.8562211990356445, + "learning_rate": 0.00018483006698089805, + "loss": 0.3653, + "step": 3611 + }, + { + "epoch": 1.27, + "grad_norm": 2.263174533843994, + "learning_rate": 0.0001847928553708757, + "loss": 0.5212, + "step": 3612 + }, + { + "epoch": 1.27, + "grad_norm": 2.848961591720581, + "learning_rate": 0.00018475564376085337, + "loss": 0.383, + "step": 3613 + }, + { + "epoch": 1.27, + "grad_norm": 4.881875514984131, + "learning_rate": 0.00018471843215083102, + "loss": 0.6873, + "step": 3614 + }, + { + "epoch": 1.27, + "grad_norm": 3.0648579597473145, + "learning_rate": 0.00018468122054080872, + "loss": 0.8545, + "step": 3615 + }, + { + "epoch": 1.27, + "grad_norm": 2.5609729290008545, + "learning_rate": 0.0001846440089307864, + "loss": 0.3276, + "step": 3616 + }, + { + "epoch": 1.27, + "grad_norm": 2.5366320610046387, + "learning_rate": 0.00018460679732076405, + "loss": 0.3974, + "step": 3617 + }, + { + "epoch": 1.27, + "grad_norm": 1.6741987466812134, + "learning_rate": 0.00018456958571074175, + "loss": 0.3688, + "step": 3618 + }, + { + "epoch": 1.27, + "grad_norm": 2.0672824382781982, + "learning_rate": 0.0001845323741007194, + "loss": 0.2186, + "step": 3619 + }, + { + "epoch": 1.27, + "grad_norm": 3.1901285648345947, + "learning_rate": 0.00018449516249069707, + "loss": 0.3451, + "step": 3620 + }, + { + "epoch": 1.27, + "grad_norm": 2.1494998931884766, + "learning_rate": 0.00018445795088067478, + "loss": 0.2902, + "step": 3621 + }, + { + "epoch": 1.27, + "grad_norm": 5.410013675689697, + "learning_rate": 0.00018442073927065243, + "loss": 0.2924, + "step": 3622 + }, + { + "epoch": 1.27, + "grad_norm": 1.7226850986480713, + "learning_rate": 0.0001843835276606301, + "loss": 0.2274, + "step": 3623 + }, + { + "epoch": 1.27, + "grad_norm": 1.2159602642059326, + "learning_rate": 0.00018434631605060778, + "loss": 0.0521, + "step": 3624 + }, + { + "epoch": 1.27, + "grad_norm": 6.059643268585205, + "learning_rate": 0.00018430910444058545, + "loss": 0.7462, + "step": 3625 + }, + { + "epoch": 1.27, + "grad_norm": 1.91728937625885, + "learning_rate": 0.00018427189283056313, + "loss": 0.1399, + "step": 3626 + }, + { + "epoch": 1.27, + "grad_norm": 2.763751745223999, + "learning_rate": 0.00018423468122054078, + "loss": 0.9764, + "step": 3627 + }, + { + "epoch": 1.27, + "grad_norm": 3.757922887802124, + "learning_rate": 0.00018419746961051848, + "loss": 0.4835, + "step": 3628 + }, + { + "epoch": 1.27, + "grad_norm": 4.211630821228027, + "learning_rate": 0.00018416025800049616, + "loss": 0.4143, + "step": 3629 + }, + { + "epoch": 1.27, + "grad_norm": 4.662005424499512, + "learning_rate": 0.0001841230463904738, + "loss": 0.9232, + "step": 3630 + }, + { + "epoch": 1.27, + "grad_norm": 1.8224451541900635, + "learning_rate": 0.00018408583478045148, + "loss": 0.2985, + "step": 3631 + }, + { + "epoch": 1.27, + "grad_norm": 1.7688192129135132, + "learning_rate": 0.00018404862317042916, + "loss": 0.2334, + "step": 3632 + }, + { + "epoch": 1.27, + "grad_norm": 1.983191967010498, + "learning_rate": 0.00018401141156040683, + "loss": 0.3987, + "step": 3633 + }, + { + "epoch": 1.27, + "grad_norm": 2.6247076988220215, + "learning_rate": 0.0001839741999503845, + "loss": 0.5811, + "step": 3634 + }, + { + "epoch": 1.27, + "grad_norm": 1.944804310798645, + "learning_rate": 0.00018393698834036216, + "loss": 0.5739, + "step": 3635 + }, + { + "epoch": 1.27, + "grad_norm": 1.3238880634307861, + "learning_rate": 0.00018389977673033986, + "loss": 0.2322, + "step": 3636 + }, + { + "epoch": 1.27, + "grad_norm": 2.190138339996338, + "learning_rate": 0.0001838625651203175, + "loss": 0.4987, + "step": 3637 + }, + { + "epoch": 1.27, + "grad_norm": 1.9091345071792603, + "learning_rate": 0.00018382535351029518, + "loss": 0.3517, + "step": 3638 + }, + { + "epoch": 1.28, + "grad_norm": 1.6717495918273926, + "learning_rate": 0.0001837881419002729, + "loss": 0.1578, + "step": 3639 + }, + { + "epoch": 1.28, + "grad_norm": 2.4669203758239746, + "learning_rate": 0.00018375093029025054, + "loss": 0.3245, + "step": 3640 + }, + { + "epoch": 1.28, + "grad_norm": 1.4179534912109375, + "learning_rate": 0.0001837137186802282, + "loss": 0.2772, + "step": 3641 + }, + { + "epoch": 1.28, + "grad_norm": 2.291625738143921, + "learning_rate": 0.00018367650707020591, + "loss": 0.7771, + "step": 3642 + }, + { + "epoch": 1.28, + "grad_norm": 2.39475417137146, + "learning_rate": 0.00018363929546018356, + "loss": 0.3678, + "step": 3643 + }, + { + "epoch": 1.28, + "grad_norm": 1.2829241752624512, + "learning_rate": 0.00018360208385016124, + "loss": 0.3058, + "step": 3644 + }, + { + "epoch": 1.28, + "grad_norm": 1.9139297008514404, + "learning_rate": 0.0001835648722401389, + "loss": 0.1881, + "step": 3645 + }, + { + "epoch": 1.28, + "grad_norm": 4.035134792327881, + "learning_rate": 0.0001835276606301166, + "loss": 0.2359, + "step": 3646 + }, + { + "epoch": 1.28, + "grad_norm": 3.865679979324341, + "learning_rate": 0.00018349044902009427, + "loss": 0.4176, + "step": 3647 + }, + { + "epoch": 1.28, + "grad_norm": 7.729085445404053, + "learning_rate": 0.00018345323741007191, + "loss": 1.612, + "step": 3648 + }, + { + "epoch": 1.28, + "grad_norm": 1.2197033166885376, + "learning_rate": 0.00018341602580004962, + "loss": 0.1282, + "step": 3649 + }, + { + "epoch": 1.28, + "grad_norm": 1.467267394065857, + "learning_rate": 0.00018337881419002727, + "loss": 0.1326, + "step": 3650 + }, + { + "epoch": 1.28, + "grad_norm": 4.615586757659912, + "learning_rate": 0.00018334160258000494, + "loss": 0.674, + "step": 3651 + }, + { + "epoch": 1.28, + "grad_norm": 1.724073052406311, + "learning_rate": 0.00018330439096998262, + "loss": 0.268, + "step": 3652 + }, + { + "epoch": 1.28, + "grad_norm": 3.465282440185547, + "learning_rate": 0.0001832671793599603, + "loss": 0.5606, + "step": 3653 + }, + { + "epoch": 1.28, + "grad_norm": 2.1217899322509766, + "learning_rate": 0.00018322996774993797, + "loss": 0.1697, + "step": 3654 + }, + { + "epoch": 1.28, + "grad_norm": 1.2871822118759155, + "learning_rate": 0.00018319275613991565, + "loss": 0.4938, + "step": 3655 + }, + { + "epoch": 1.28, + "grad_norm": 2.0756819248199463, + "learning_rate": 0.0001831555445298933, + "loss": 0.5783, + "step": 3656 + }, + { + "epoch": 1.28, + "grad_norm": 3.0986368656158447, + "learning_rate": 0.000183118332919871, + "loss": 0.5325, + "step": 3657 + }, + { + "epoch": 1.28, + "grad_norm": 1.4617551565170288, + "learning_rate": 0.00018308112130984865, + "loss": 0.3135, + "step": 3658 + }, + { + "epoch": 1.28, + "grad_norm": 1.108564019203186, + "learning_rate": 0.00018304390969982632, + "loss": 0.1062, + "step": 3659 + }, + { + "epoch": 1.28, + "grad_norm": 3.5125279426574707, + "learning_rate": 0.00018300669808980402, + "loss": 0.6693, + "step": 3660 + }, + { + "epoch": 1.28, + "grad_norm": 2.014686346054077, + "learning_rate": 0.00018296948647978167, + "loss": 0.4059, + "step": 3661 + }, + { + "epoch": 1.28, + "grad_norm": 1.8392510414123535, + "learning_rate": 0.00018293227486975935, + "loss": 0.4116, + "step": 3662 + }, + { + "epoch": 1.28, + "grad_norm": 2.073396921157837, + "learning_rate": 0.000182895063259737, + "loss": 0.3753, + "step": 3663 + }, + { + "epoch": 1.28, + "grad_norm": 2.183483362197876, + "learning_rate": 0.0001828578516497147, + "loss": 0.5035, + "step": 3664 + }, + { + "epoch": 1.28, + "grad_norm": 2.8279621601104736, + "learning_rate": 0.00018282064003969238, + "loss": 0.3977, + "step": 3665 + }, + { + "epoch": 1.28, + "grad_norm": 2.0163662433624268, + "learning_rate": 0.00018278342842967002, + "loss": 0.2057, + "step": 3666 + }, + { + "epoch": 1.28, + "grad_norm": 2.163269519805908, + "learning_rate": 0.00018274621681964773, + "loss": 0.5138, + "step": 3667 + }, + { + "epoch": 1.29, + "grad_norm": 1.980743169784546, + "learning_rate": 0.0001827090052096254, + "loss": 0.3655, + "step": 3668 + }, + { + "epoch": 1.29, + "grad_norm": 2.201828956604004, + "learning_rate": 0.00018267179359960305, + "loss": 0.6443, + "step": 3669 + }, + { + "epoch": 1.29, + "grad_norm": 1.6139568090438843, + "learning_rate": 0.00018263458198958075, + "loss": 0.1735, + "step": 3670 + }, + { + "epoch": 1.29, + "grad_norm": 3.5710642337799072, + "learning_rate": 0.0001825973703795584, + "loss": 0.6014, + "step": 3671 + }, + { + "epoch": 1.29, + "grad_norm": 1.6693230867385864, + "learning_rate": 0.00018256015876953608, + "loss": 0.3088, + "step": 3672 + }, + { + "epoch": 1.29, + "grad_norm": 2.734638214111328, + "learning_rate": 0.00018252294715951376, + "loss": 0.4733, + "step": 3673 + }, + { + "epoch": 1.29, + "grad_norm": 3.7495346069335938, + "learning_rate": 0.00018248573554949143, + "loss": 0.6562, + "step": 3674 + }, + { + "epoch": 1.29, + "grad_norm": 0.6632982492446899, + "learning_rate": 0.0001824485239394691, + "loss": 0.0649, + "step": 3675 + }, + { + "epoch": 1.29, + "grad_norm": 2.975598096847534, + "learning_rate": 0.00018241131232944676, + "loss": 0.3478, + "step": 3676 + }, + { + "epoch": 1.29, + "grad_norm": 1.9812325239181519, + "learning_rate": 0.00018237410071942443, + "loss": 0.3126, + "step": 3677 + }, + { + "epoch": 1.29, + "grad_norm": 4.1589274406433105, + "learning_rate": 0.00018233688910940213, + "loss": 0.3591, + "step": 3678 + }, + { + "epoch": 1.29, + "grad_norm": 1.129032015800476, + "learning_rate": 0.00018229967749937978, + "loss": 0.0812, + "step": 3679 + }, + { + "epoch": 1.29, + "grad_norm": 1.9920796155929565, + "learning_rate": 0.00018226246588935746, + "loss": 1.0188, + "step": 3680 + }, + { + "epoch": 1.29, + "grad_norm": 2.480633020401001, + "learning_rate": 0.0001822252542793351, + "loss": 0.8319, + "step": 3681 + }, + { + "epoch": 1.29, + "grad_norm": 1.5719470977783203, + "learning_rate": 0.0001821880426693128, + "loss": 0.446, + "step": 3682 + }, + { + "epoch": 1.29, + "grad_norm": 2.4275617599487305, + "learning_rate": 0.00018215083105929049, + "loss": 0.601, + "step": 3683 + }, + { + "epoch": 1.29, + "grad_norm": 1.5678902864456177, + "learning_rate": 0.00018211361944926813, + "loss": 0.326, + "step": 3684 + }, + { + "epoch": 1.29, + "grad_norm": 1.8893762826919556, + "learning_rate": 0.00018207640783924584, + "loss": 0.3836, + "step": 3685 + }, + { + "epoch": 1.29, + "grad_norm": 1.8527387380599976, + "learning_rate": 0.0001820391962292235, + "loss": 0.3873, + "step": 3686 + }, + { + "epoch": 1.29, + "grad_norm": 2.947082757949829, + "learning_rate": 0.00018200198461920116, + "loss": 0.2627, + "step": 3687 + }, + { + "epoch": 1.29, + "grad_norm": 1.0232598781585693, + "learning_rate": 0.00018196477300917886, + "loss": 0.1291, + "step": 3688 + }, + { + "epoch": 1.29, + "grad_norm": 1.9064313173294067, + "learning_rate": 0.00018192756139915651, + "loss": 0.2368, + "step": 3689 + }, + { + "epoch": 1.29, + "grad_norm": 1.997897982597351, + "learning_rate": 0.0001818903497891342, + "loss": 0.17, + "step": 3690 + }, + { + "epoch": 1.29, + "grad_norm": 8.157362937927246, + "learning_rate": 0.0001818531381791119, + "loss": 1.9281, + "step": 3691 + }, + { + "epoch": 1.29, + "grad_norm": 4.2168660163879395, + "learning_rate": 0.00018181592656908954, + "loss": 0.4399, + "step": 3692 + }, + { + "epoch": 1.29, + "grad_norm": 2.208404064178467, + "learning_rate": 0.00018177871495906722, + "loss": 0.1804, + "step": 3693 + }, + { + "epoch": 1.29, + "grad_norm": 2.2321760654449463, + "learning_rate": 0.00018174150334904487, + "loss": 0.2453, + "step": 3694 + }, + { + "epoch": 1.29, + "grad_norm": 1.9782792329788208, + "learning_rate": 0.00018170429173902257, + "loss": 0.1628, + "step": 3695 + }, + { + "epoch": 1.3, + "grad_norm": 2.479062795639038, + "learning_rate": 0.00018166708012900024, + "loss": 0.3915, + "step": 3696 + }, + { + "epoch": 1.3, + "grad_norm": 1.956723928451538, + "learning_rate": 0.0001816298685189779, + "loss": 0.2848, + "step": 3697 + }, + { + "epoch": 1.3, + "grad_norm": 4.0798211097717285, + "learning_rate": 0.00018159265690895557, + "loss": 1.0078, + "step": 3698 + }, + { + "epoch": 1.3, + "grad_norm": 2.6179561614990234, + "learning_rate": 0.00018155544529893327, + "loss": 0.3553, + "step": 3699 + }, + { + "epoch": 1.3, + "grad_norm": 2.044512987136841, + "learning_rate": 0.00018151823368891092, + "loss": 0.1905, + "step": 3700 + }, + { + "epoch": 1.3, + "eval_loss": 0.46060672402381897, + "eval_runtime": 51.537, + "eval_samples_per_second": 42.067, + "eval_steps_per_second": 10.517, + "eval_wer": 0.44300294066770457, + "step": 3700 + }, + { + "epoch": 1.3, + "grad_norm": 3.8095672130584717, + "learning_rate": 0.0001814810220788886, + "loss": 1.5776, + "step": 3701 + }, + { + "epoch": 1.3, + "grad_norm": 4.374634265899658, + "learning_rate": 0.00018144381046886624, + "loss": 1.4929, + "step": 3702 + }, + { + "epoch": 1.3, + "grad_norm": 1.16568922996521, + "learning_rate": 0.00018140659885884395, + "loss": 0.1051, + "step": 3703 + }, + { + "epoch": 1.3, + "grad_norm": 1.5767412185668945, + "learning_rate": 0.00018136938724882162, + "loss": 0.1765, + "step": 3704 + }, + { + "epoch": 1.3, + "grad_norm": 4.894369125366211, + "learning_rate": 0.00018133217563879927, + "loss": 0.9859, + "step": 3705 + }, + { + "epoch": 1.3, + "grad_norm": 3.5991573333740234, + "learning_rate": 0.00018129496402877698, + "loss": 0.5492, + "step": 3706 + }, + { + "epoch": 1.3, + "grad_norm": 4.314338684082031, + "learning_rate": 0.00018125775241875462, + "loss": 0.9102, + "step": 3707 + }, + { + "epoch": 1.3, + "grad_norm": 2.0932729244232178, + "learning_rate": 0.0001812205408087323, + "loss": 0.5376, + "step": 3708 + }, + { + "epoch": 1.3, + "grad_norm": 1.41013765335083, + "learning_rate": 0.00018118332919871, + "loss": 0.2054, + "step": 3709 + }, + { + "epoch": 1.3, + "grad_norm": 2.404813528060913, + "learning_rate": 0.00018114611758868765, + "loss": 0.2452, + "step": 3710 + }, + { + "epoch": 1.3, + "grad_norm": 1.5884853601455688, + "learning_rate": 0.00018110890597866533, + "loss": 0.233, + "step": 3711 + }, + { + "epoch": 1.3, + "grad_norm": 1.3731337785720825, + "learning_rate": 0.00018107169436864303, + "loss": 0.1386, + "step": 3712 + }, + { + "epoch": 1.3, + "grad_norm": 3.145470142364502, + "learning_rate": 0.00018103448275862068, + "loss": 0.6139, + "step": 3713 + }, + { + "epoch": 1.3, + "grad_norm": 1.8240619897842407, + "learning_rate": 0.00018099727114859835, + "loss": 0.4299, + "step": 3714 + }, + { + "epoch": 1.3, + "grad_norm": 1.6262075901031494, + "learning_rate": 0.000180960059538576, + "loss": 0.2562, + "step": 3715 + }, + { + "epoch": 1.3, + "grad_norm": 2.166367769241333, + "learning_rate": 0.0001809228479285537, + "loss": 0.3563, + "step": 3716 + }, + { + "epoch": 1.3, + "grad_norm": 2.9389724731445312, + "learning_rate": 0.00018088563631853138, + "loss": 1.4989, + "step": 3717 + }, + { + "epoch": 1.3, + "grad_norm": 1.0228006839752197, + "learning_rate": 0.00018084842470850903, + "loss": 0.0534, + "step": 3718 + }, + { + "epoch": 1.3, + "grad_norm": 3.372701406478882, + "learning_rate": 0.0001808112130984867, + "loss": 0.4041, + "step": 3719 + }, + { + "epoch": 1.3, + "grad_norm": 2.5104494094848633, + "learning_rate": 0.00018077400148846438, + "loss": 0.3649, + "step": 3720 + }, + { + "epoch": 1.3, + "grad_norm": 3.1328682899475098, + "learning_rate": 0.00018073678987844206, + "loss": 0.5192, + "step": 3721 + }, + { + "epoch": 1.3, + "grad_norm": 1.5079821348190308, + "learning_rate": 0.00018069957826841973, + "loss": 0.1187, + "step": 3722 + }, + { + "epoch": 1.3, + "grad_norm": 2.341155529022217, + "learning_rate": 0.00018066236665839738, + "loss": 0.2129, + "step": 3723 + }, + { + "epoch": 1.3, + "grad_norm": 3.211752414703369, + "learning_rate": 0.00018062515504837509, + "loss": 0.3416, + "step": 3724 + }, + { + "epoch": 1.31, + "grad_norm": 1.5088387727737427, + "learning_rate": 0.00018058794343835276, + "loss": 0.2176, + "step": 3725 + }, + { + "epoch": 1.31, + "grad_norm": 2.9890217781066895, + "learning_rate": 0.0001805507318283304, + "loss": 0.437, + "step": 3726 + }, + { + "epoch": 1.31, + "grad_norm": 1.218353271484375, + "learning_rate": 0.0001805135202183081, + "loss": 0.1558, + "step": 3727 + }, + { + "epoch": 1.31, + "grad_norm": 2.0937466621398926, + "learning_rate": 0.00018047630860828576, + "loss": 0.3612, + "step": 3728 + }, + { + "epoch": 1.31, + "grad_norm": 2.505051612854004, + "learning_rate": 0.00018043909699826344, + "loss": 0.4007, + "step": 3729 + }, + { + "epoch": 1.31, + "grad_norm": 2.501373529434204, + "learning_rate": 0.00018040188538824114, + "loss": 0.8542, + "step": 3730 + }, + { + "epoch": 1.31, + "grad_norm": 1.9108080863952637, + "learning_rate": 0.0001803646737782188, + "loss": 0.6732, + "step": 3731 + }, + { + "epoch": 1.31, + "grad_norm": 1.495678424835205, + "learning_rate": 0.00018032746216819646, + "loss": 0.2758, + "step": 3732 + }, + { + "epoch": 1.31, + "grad_norm": 1.6871167421340942, + "learning_rate": 0.0001802902505581741, + "loss": 0.2974, + "step": 3733 + }, + { + "epoch": 1.31, + "grad_norm": 1.7430815696716309, + "learning_rate": 0.00018025303894815182, + "loss": 0.3686, + "step": 3734 + }, + { + "epoch": 1.31, + "grad_norm": 3.15686297416687, + "learning_rate": 0.0001802158273381295, + "loss": 0.4121, + "step": 3735 + }, + { + "epoch": 1.31, + "grad_norm": 0.959276556968689, + "learning_rate": 0.00018017861572810714, + "loss": 0.0906, + "step": 3736 + }, + { + "epoch": 1.31, + "grad_norm": 1.973922610282898, + "learning_rate": 0.00018014140411808484, + "loss": 0.3729, + "step": 3737 + }, + { + "epoch": 1.31, + "grad_norm": 1.6108390092849731, + "learning_rate": 0.0001801041925080625, + "loss": 0.2135, + "step": 3738 + }, + { + "epoch": 1.31, + "grad_norm": 1.7713210582733154, + "learning_rate": 0.00018006698089804017, + "loss": 0.447, + "step": 3739 + }, + { + "epoch": 1.31, + "grad_norm": 2.696568727493286, + "learning_rate": 0.00018002976928801784, + "loss": 0.3003, + "step": 3740 + }, + { + "epoch": 1.31, + "grad_norm": 1.4664280414581299, + "learning_rate": 0.00017999255767799552, + "loss": 0.174, + "step": 3741 + }, + { + "epoch": 1.31, + "grad_norm": 3.218578815460205, + "learning_rate": 0.0001799553460679732, + "loss": 0.2536, + "step": 3742 + }, + { + "epoch": 1.31, + "grad_norm": 3.3548967838287354, + "learning_rate": 0.00017991813445795087, + "loss": 0.3933, + "step": 3743 + }, + { + "epoch": 1.31, + "grad_norm": 3.9141340255737305, + "learning_rate": 0.00017988092284792852, + "loss": 0.6092, + "step": 3744 + }, + { + "epoch": 1.31, + "grad_norm": 2.043869972229004, + "learning_rate": 0.00017984371123790622, + "loss": 0.3214, + "step": 3745 + }, + { + "epoch": 1.31, + "grad_norm": 2.997690200805664, + "learning_rate": 0.00017980649962788387, + "loss": 1.3812, + "step": 3746 + }, + { + "epoch": 1.31, + "grad_norm": 1.9347894191741943, + "learning_rate": 0.00017976928801786155, + "loss": 0.185, + "step": 3747 + }, + { + "epoch": 1.31, + "grad_norm": 2.9342029094696045, + "learning_rate": 0.00017973207640783925, + "loss": 0.5739, + "step": 3748 + }, + { + "epoch": 1.31, + "grad_norm": 2.7313919067382812, + "learning_rate": 0.0001796948647978169, + "loss": 0.3957, + "step": 3749 + }, + { + "epoch": 1.31, + "grad_norm": 3.2632312774658203, + "learning_rate": 0.00017965765318779457, + "loss": 0.1384, + "step": 3750 + }, + { + "epoch": 1.31, + "grad_norm": 2.596008539199829, + "learning_rate": 0.00017962044157777222, + "loss": 0.4888, + "step": 3751 + }, + { + "epoch": 1.31, + "grad_norm": 2.564819574356079, + "learning_rate": 0.00017958322996774993, + "loss": 0.2528, + "step": 3752 + }, + { + "epoch": 1.31, + "grad_norm": 2.4152514934539795, + "learning_rate": 0.0001795460183577276, + "loss": 0.2479, + "step": 3753 + }, + { + "epoch": 1.32, + "grad_norm": 1.620201587677002, + "learning_rate": 0.00017950880674770525, + "loss": 0.1761, + "step": 3754 + }, + { + "epoch": 1.32, + "grad_norm": 2.0693604946136475, + "learning_rate": 0.00017947159513768295, + "loss": 0.9116, + "step": 3755 + }, + { + "epoch": 1.32, + "grad_norm": 1.889093041419983, + "learning_rate": 0.00017943438352766063, + "loss": 0.417, + "step": 3756 + }, + { + "epoch": 1.32, + "grad_norm": 2.0303893089294434, + "learning_rate": 0.00017939717191763828, + "loss": 0.3572, + "step": 3757 + }, + { + "epoch": 1.32, + "grad_norm": 1.2891769409179688, + "learning_rate": 0.00017935996030761598, + "loss": 0.1608, + "step": 3758 + }, + { + "epoch": 1.32, + "grad_norm": 1.4122023582458496, + "learning_rate": 0.00017932274869759363, + "loss": 0.2832, + "step": 3759 + }, + { + "epoch": 1.32, + "grad_norm": 2.285872459411621, + "learning_rate": 0.0001792855370875713, + "loss": 0.4338, + "step": 3760 + }, + { + "epoch": 1.32, + "grad_norm": 1.5522655248641968, + "learning_rate": 0.00017924832547754898, + "loss": 0.3917, + "step": 3761 + }, + { + "epoch": 1.32, + "grad_norm": 5.722376823425293, + "learning_rate": 0.00017921111386752666, + "loss": 0.201, + "step": 3762 + }, + { + "epoch": 1.32, + "grad_norm": 2.5768380165100098, + "learning_rate": 0.00017917390225750433, + "loss": 0.2258, + "step": 3763 + }, + { + "epoch": 1.32, + "grad_norm": 3.3789002895355225, + "learning_rate": 0.00017913669064748198, + "loss": 0.6568, + "step": 3764 + }, + { + "epoch": 1.32, + "grad_norm": 2.571653127670288, + "learning_rate": 0.00017909947903745966, + "loss": 0.3319, + "step": 3765 + }, + { + "epoch": 1.32, + "grad_norm": 2.023444414138794, + "learning_rate": 0.00017906226742743736, + "loss": 0.2127, + "step": 3766 + }, + { + "epoch": 1.32, + "grad_norm": 2.665778875350952, + "learning_rate": 0.000179025055817415, + "loss": 0.3803, + "step": 3767 + }, + { + "epoch": 1.32, + "grad_norm": 2.8000216484069824, + "learning_rate": 0.00017898784420739268, + "loss": 0.3656, + "step": 3768 + }, + { + "epoch": 1.32, + "grad_norm": 5.576625347137451, + "learning_rate": 0.0001789506325973704, + "loss": 2.0108, + "step": 3769 + }, + { + "epoch": 1.32, + "grad_norm": 4.2713165283203125, + "learning_rate": 0.00017891342098734804, + "loss": 0.5785, + "step": 3770 + }, + { + "epoch": 1.32, + "grad_norm": 2.837256908416748, + "learning_rate": 0.0001788762093773257, + "loss": 0.744, + "step": 3771 + }, + { + "epoch": 1.32, + "grad_norm": 2.32977557182312, + "learning_rate": 0.00017883899776730336, + "loss": 0.1912, + "step": 3772 + }, + { + "epoch": 1.32, + "grad_norm": 1.8392449617385864, + "learning_rate": 0.00017880178615728106, + "loss": 0.4553, + "step": 3773 + }, + { + "epoch": 1.32, + "grad_norm": 2.949434757232666, + "learning_rate": 0.00017876457454725874, + "loss": 0.2178, + "step": 3774 + }, + { + "epoch": 1.32, + "grad_norm": 1.4645886421203613, + "learning_rate": 0.0001787273629372364, + "loss": 0.1202, + "step": 3775 + }, + { + "epoch": 1.32, + "grad_norm": 1.1854076385498047, + "learning_rate": 0.0001786901513272141, + "loss": 0.1576, + "step": 3776 + }, + { + "epoch": 1.32, + "grad_norm": 1.7854406833648682, + "learning_rate": 0.00017865293971719174, + "loss": 0.2579, + "step": 3777 + }, + { + "epoch": 1.32, + "grad_norm": 3.848768472671509, + "learning_rate": 0.00017861572810716942, + "loss": 1.4119, + "step": 3778 + }, + { + "epoch": 1.32, + "grad_norm": 2.103977918624878, + "learning_rate": 0.00017857851649714712, + "loss": 0.2911, + "step": 3779 + }, + { + "epoch": 1.32, + "grad_norm": 2.866246461868286, + "learning_rate": 0.00017854130488712477, + "loss": 0.9159, + "step": 3780 + }, + { + "epoch": 1.32, + "grad_norm": 1.2970139980316162, + "learning_rate": 0.00017850409327710244, + "loss": 0.4276, + "step": 3781 + }, + { + "epoch": 1.33, + "grad_norm": 1.2081961631774902, + "learning_rate": 0.0001784668816670801, + "loss": 0.3139, + "step": 3782 + }, + { + "epoch": 1.33, + "grad_norm": 2.1453487873077393, + "learning_rate": 0.0001784296700570578, + "loss": 0.7616, + "step": 3783 + }, + { + "epoch": 1.33, + "grad_norm": 1.9024728536605835, + "learning_rate": 0.00017839245844703547, + "loss": 0.2492, + "step": 3784 + }, + { + "epoch": 1.33, + "grad_norm": 1.2727264165878296, + "learning_rate": 0.00017835524683701312, + "loss": 0.2043, + "step": 3785 + }, + { + "epoch": 1.33, + "grad_norm": 1.4936034679412842, + "learning_rate": 0.0001783180352269908, + "loss": 0.3883, + "step": 3786 + }, + { + "epoch": 1.33, + "grad_norm": 1.377806544303894, + "learning_rate": 0.0001782808236169685, + "loss": 0.273, + "step": 3787 + }, + { + "epoch": 1.33, + "grad_norm": 2.399864435195923, + "learning_rate": 0.00017824361200694615, + "loss": 0.3119, + "step": 3788 + }, + { + "epoch": 1.33, + "grad_norm": 1.5656849145889282, + "learning_rate": 0.00017820640039692382, + "loss": 0.2332, + "step": 3789 + }, + { + "epoch": 1.33, + "grad_norm": 15.384991645812988, + "learning_rate": 0.0001781691887869015, + "loss": 5.3366, + "step": 3790 + }, + { + "epoch": 1.33, + "grad_norm": 3.5615835189819336, + "learning_rate": 0.00017813197717687917, + "loss": 0.8114, + "step": 3791 + }, + { + "epoch": 1.33, + "grad_norm": 2.1784439086914062, + "learning_rate": 0.00017809476556685685, + "loss": 0.5916, + "step": 3792 + }, + { + "epoch": 1.33, + "grad_norm": 1.5509488582611084, + "learning_rate": 0.0001780575539568345, + "loss": 0.2626, + "step": 3793 + }, + { + "epoch": 1.33, + "grad_norm": 0.8482944369316101, + "learning_rate": 0.0001780203423468122, + "loss": 0.066, + "step": 3794 + }, + { + "epoch": 1.33, + "grad_norm": 1.3398417234420776, + "learning_rate": 0.00017798313073678985, + "loss": 0.1493, + "step": 3795 + }, + { + "epoch": 1.33, + "grad_norm": 1.738661766052246, + "learning_rate": 0.00017794591912676753, + "loss": 0.198, + "step": 3796 + }, + { + "epoch": 1.33, + "grad_norm": 4.550568580627441, + "learning_rate": 0.00017790870751674523, + "loss": 1.3028, + "step": 3797 + }, + { + "epoch": 1.33, + "grad_norm": 2.300915002822876, + "learning_rate": 0.00017787149590672288, + "loss": 0.2393, + "step": 3798 + }, + { + "epoch": 1.33, + "grad_norm": 1.3625463247299194, + "learning_rate": 0.00017783428429670055, + "loss": 0.1016, + "step": 3799 + }, + { + "epoch": 1.33, + "grad_norm": 2.7032511234283447, + "learning_rate": 0.00017779707268667826, + "loss": 0.4724, + "step": 3800 + }, + { + "epoch": 1.33, + "eval_loss": 0.49983254075050354, + "eval_runtime": 51.5096, + "eval_samples_per_second": 42.089, + "eval_steps_per_second": 10.522, + "eval_wer": 0.4850371907974399, + "step": 3800 + }, + { + "epoch": 1.33, + "grad_norm": 3.71159029006958, + "learning_rate": 0.0001777598610766559, + "loss": 0.169, + "step": 3801 + }, + { + "epoch": 1.33, + "grad_norm": 2.478241205215454, + "learning_rate": 0.00017772264946663358, + "loss": 0.2778, + "step": 3802 + }, + { + "epoch": 1.33, + "grad_norm": 3.812366247177124, + "learning_rate": 0.00017768543785661123, + "loss": 0.4634, + "step": 3803 + }, + { + "epoch": 1.33, + "grad_norm": 1.6730952262878418, + "learning_rate": 0.00017764822624658893, + "loss": 0.0989, + "step": 3804 + }, + { + "epoch": 1.33, + "grad_norm": 3.086575508117676, + "learning_rate": 0.0001776110146365666, + "loss": 0.9503, + "step": 3805 + }, + { + "epoch": 1.33, + "grad_norm": 1.9270522594451904, + "learning_rate": 0.00017757380302654426, + "loss": 0.7548, + "step": 3806 + }, + { + "epoch": 1.33, + "grad_norm": 1.6181902885437012, + "learning_rate": 0.00017753659141652193, + "loss": 0.3449, + "step": 3807 + }, + { + "epoch": 1.33, + "grad_norm": 2.112922430038452, + "learning_rate": 0.0001774993798064996, + "loss": 0.4004, + "step": 3808 + }, + { + "epoch": 1.33, + "grad_norm": 2.618039846420288, + "learning_rate": 0.00017746216819647728, + "loss": 0.4503, + "step": 3809 + }, + { + "epoch": 1.33, + "grad_norm": 2.6346006393432617, + "learning_rate": 0.00017742495658645496, + "loss": 0.6283, + "step": 3810 + }, + { + "epoch": 1.34, + "grad_norm": 2.166271448135376, + "learning_rate": 0.00017738774497643264, + "loss": 0.21, + "step": 3811 + }, + { + "epoch": 1.34, + "grad_norm": 1.9964041709899902, + "learning_rate": 0.0001773505333664103, + "loss": 0.1659, + "step": 3812 + }, + { + "epoch": 1.34, + "grad_norm": 1.5653916597366333, + "learning_rate": 0.000177313321756388, + "loss": 0.2786, + "step": 3813 + }, + { + "epoch": 1.34, + "grad_norm": 3.1856682300567627, + "learning_rate": 0.00017727611014636564, + "loss": 0.3468, + "step": 3814 + }, + { + "epoch": 1.34, + "grad_norm": 1.068045973777771, + "learning_rate": 0.00017723889853634334, + "loss": 0.1729, + "step": 3815 + }, + { + "epoch": 1.34, + "grad_norm": 2.467663526535034, + "learning_rate": 0.000177201686926321, + "loss": 0.3148, + "step": 3816 + }, + { + "epoch": 1.34, + "grad_norm": 2.52803373336792, + "learning_rate": 0.00017716447531629866, + "loss": 0.6002, + "step": 3817 + }, + { + "epoch": 1.34, + "grad_norm": 1.5784509181976318, + "learning_rate": 0.00017712726370627637, + "loss": 0.1783, + "step": 3818 + }, + { + "epoch": 1.34, + "grad_norm": 5.923018455505371, + "learning_rate": 0.00017709005209625401, + "loss": 0.2747, + "step": 3819 + }, + { + "epoch": 1.34, + "grad_norm": 1.4094946384429932, + "learning_rate": 0.0001770528404862317, + "loss": 0.0858, + "step": 3820 + }, + { + "epoch": 1.34, + "grad_norm": 1.2953375577926636, + "learning_rate": 0.00017701562887620934, + "loss": 0.0638, + "step": 3821 + }, + { + "epoch": 1.34, + "grad_norm": 4.134175777435303, + "learning_rate": 0.00017697841726618704, + "loss": 0.4833, + "step": 3822 + }, + { + "epoch": 1.34, + "grad_norm": 4.836775302886963, + "learning_rate": 0.00017694120565616472, + "loss": 0.8205, + "step": 3823 + }, + { + "epoch": 1.34, + "grad_norm": 3.950936794281006, + "learning_rate": 0.00017690399404614237, + "loss": 0.4628, + "step": 3824 + }, + { + "epoch": 1.34, + "grad_norm": 2.5039966106414795, + "learning_rate": 0.00017686678243612007, + "loss": 0.2418, + "step": 3825 + }, + { + "epoch": 1.34, + "grad_norm": 1.359667420387268, + "learning_rate": 0.00017682957082609772, + "loss": 0.1673, + "step": 3826 + }, + { + "epoch": 1.34, + "grad_norm": 3.17059326171875, + "learning_rate": 0.0001767923592160754, + "loss": 0.2301, + "step": 3827 + }, + { + "epoch": 1.34, + "grad_norm": 3.165222406387329, + "learning_rate": 0.00017675514760605307, + "loss": 0.2882, + "step": 3828 + }, + { + "epoch": 1.34, + "grad_norm": 2.6665470600128174, + "learning_rate": 0.00017671793599603075, + "loss": 0.1528, + "step": 3829 + }, + { + "epoch": 1.34, + "grad_norm": 3.1193861961364746, + "learning_rate": 0.00017668072438600842, + "loss": 0.8316, + "step": 3830 + }, + { + "epoch": 1.34, + "grad_norm": 3.0916197299957275, + "learning_rate": 0.0001766435127759861, + "loss": 0.7043, + "step": 3831 + }, + { + "epoch": 1.34, + "grad_norm": 2.5814852714538574, + "learning_rate": 0.00017660630116596377, + "loss": 0.8129, + "step": 3832 + }, + { + "epoch": 1.34, + "grad_norm": 2.543747663497925, + "learning_rate": 0.00017656908955594145, + "loss": 0.3409, + "step": 3833 + }, + { + "epoch": 1.34, + "grad_norm": 4.633474826812744, + "learning_rate": 0.0001765318779459191, + "loss": 1.6709, + "step": 3834 + }, + { + "epoch": 1.34, + "grad_norm": 1.9206253290176392, + "learning_rate": 0.00017649466633589677, + "loss": 0.4245, + "step": 3835 + }, + { + "epoch": 1.34, + "grad_norm": 2.735995054244995, + "learning_rate": 0.00017645745472587448, + "loss": 0.3843, + "step": 3836 + }, + { + "epoch": 1.34, + "grad_norm": 1.561367392539978, + "learning_rate": 0.00017642024311585212, + "loss": 0.3513, + "step": 3837 + }, + { + "epoch": 1.34, + "grad_norm": 2.5527138710021973, + "learning_rate": 0.0001763830315058298, + "loss": 0.2287, + "step": 3838 + }, + { + "epoch": 1.35, + "grad_norm": 2.1322858333587646, + "learning_rate": 0.00017634581989580745, + "loss": 0.465, + "step": 3839 + }, + { + "epoch": 1.35, + "grad_norm": 3.6257131099700928, + "learning_rate": 0.00017630860828578515, + "loss": 0.6081, + "step": 3840 + }, + { + "epoch": 1.35, + "grad_norm": 2.3494338989257812, + "learning_rate": 0.00017627139667576283, + "loss": 0.5918, + "step": 3841 + }, + { + "epoch": 1.35, + "grad_norm": 1.8620758056640625, + "learning_rate": 0.00017623418506574048, + "loss": 0.3641, + "step": 3842 + }, + { + "epoch": 1.35, + "grad_norm": 1.5785250663757324, + "learning_rate": 0.00017619697345571818, + "loss": 0.1248, + "step": 3843 + }, + { + "epoch": 1.35, + "grad_norm": 1.321878433227539, + "learning_rate": 0.00017615976184569586, + "loss": 0.1409, + "step": 3844 + }, + { + "epoch": 1.35, + "grad_norm": 1.8503745794296265, + "learning_rate": 0.0001761225502356735, + "loss": 0.2243, + "step": 3845 + }, + { + "epoch": 1.35, + "grad_norm": 0.8740498423576355, + "learning_rate": 0.0001760853386256512, + "loss": 0.0779, + "step": 3846 + }, + { + "epoch": 1.35, + "grad_norm": 2.1911394596099854, + "learning_rate": 0.00017604812701562886, + "loss": 0.2616, + "step": 3847 + }, + { + "epoch": 1.35, + "grad_norm": 1.2762500047683716, + "learning_rate": 0.00017601091540560653, + "loss": 0.0597, + "step": 3848 + }, + { + "epoch": 1.35, + "grad_norm": 3.6884820461273193, + "learning_rate": 0.0001759737037955842, + "loss": 0.4311, + "step": 3849 + }, + { + "epoch": 1.35, + "grad_norm": 1.9794673919677734, + "learning_rate": 0.00017593649218556188, + "loss": 0.1456, + "step": 3850 + }, + { + "epoch": 1.35, + "grad_norm": 18.375457763671875, + "learning_rate": 0.00017589928057553956, + "loss": 0.7519, + "step": 3851 + }, + { + "epoch": 1.35, + "grad_norm": 9.318000793457031, + "learning_rate": 0.0001758620689655172, + "loss": 1.2229, + "step": 3852 + }, + { + "epoch": 1.35, + "grad_norm": 0.8230860233306885, + "learning_rate": 0.0001758248573554949, + "loss": 0.0594, + "step": 3853 + }, + { + "epoch": 1.35, + "grad_norm": 3.9834702014923096, + "learning_rate": 0.00017578764574547259, + "loss": 1.0047, + "step": 3854 + }, + { + "epoch": 1.35, + "grad_norm": 2.5470404624938965, + "learning_rate": 0.00017575043413545023, + "loss": 0.7612, + "step": 3855 + }, + { + "epoch": 1.35, + "grad_norm": 2.304075241088867, + "learning_rate": 0.0001757132225254279, + "loss": 0.4069, + "step": 3856 + }, + { + "epoch": 1.35, + "grad_norm": 2.3937854766845703, + "learning_rate": 0.0001756760109154056, + "loss": 0.4114, + "step": 3857 + }, + { + "epoch": 1.35, + "grad_norm": 1.7787152528762817, + "learning_rate": 0.00017563879930538326, + "loss": 0.3035, + "step": 3858 + }, + { + "epoch": 1.35, + "grad_norm": 1.8002721071243286, + "learning_rate": 0.00017560158769536094, + "loss": 0.6163, + "step": 3859 + }, + { + "epoch": 1.35, + "grad_norm": 1.9406236410140991, + "learning_rate": 0.0001755643760853386, + "loss": 0.2896, + "step": 3860 + }, + { + "epoch": 1.35, + "grad_norm": 2.4129421710968018, + "learning_rate": 0.0001755271644753163, + "loss": 0.6502, + "step": 3861 + }, + { + "epoch": 1.35, + "grad_norm": 2.7634332180023193, + "learning_rate": 0.00017548995286529397, + "loss": 0.6027, + "step": 3862 + }, + { + "epoch": 1.35, + "grad_norm": 1.0977592468261719, + "learning_rate": 0.00017545274125527161, + "loss": 0.1788, + "step": 3863 + }, + { + "epoch": 1.35, + "grad_norm": 3.4504082202911377, + "learning_rate": 0.00017541552964524932, + "loss": 0.3887, + "step": 3864 + }, + { + "epoch": 1.35, + "grad_norm": 1.857100009918213, + "learning_rate": 0.00017537831803522697, + "loss": 0.2107, + "step": 3865 + }, + { + "epoch": 1.35, + "grad_norm": 4.293605327606201, + "learning_rate": 0.00017534110642520464, + "loss": 1.4237, + "step": 3866 + }, + { + "epoch": 1.35, + "grad_norm": 2.1752376556396484, + "learning_rate": 0.00017530389481518234, + "loss": 0.3186, + "step": 3867 + }, + { + "epoch": 1.36, + "grad_norm": 2.9012653827667236, + "learning_rate": 0.00017526668320516, + "loss": 0.3952, + "step": 3868 + }, + { + "epoch": 1.36, + "grad_norm": 1.6721316576004028, + "learning_rate": 0.00017522947159513767, + "loss": 0.1556, + "step": 3869 + }, + { + "epoch": 1.36, + "grad_norm": 3.3207669258117676, + "learning_rate": 0.00017519225998511532, + "loss": 0.1144, + "step": 3870 + }, + { + "epoch": 1.36, + "grad_norm": 2.309567928314209, + "learning_rate": 0.00017515504837509302, + "loss": 0.2904, + "step": 3871 + }, + { + "epoch": 1.36, + "grad_norm": 3.172945261001587, + "learning_rate": 0.0001751178367650707, + "loss": 0.4218, + "step": 3872 + }, + { + "epoch": 1.36, + "grad_norm": 1.9167137145996094, + "learning_rate": 0.00017508062515504834, + "loss": 0.1754, + "step": 3873 + }, + { + "epoch": 1.36, + "grad_norm": 0.7879188656806946, + "learning_rate": 0.00017504341354502605, + "loss": 0.0516, + "step": 3874 + }, + { + "epoch": 1.36, + "grad_norm": 10.358311653137207, + "learning_rate": 0.00017500620193500372, + "loss": 0.6033, + "step": 3875 + }, + { + "epoch": 1.36, + "grad_norm": 3.172363758087158, + "learning_rate": 0.00017496899032498137, + "loss": 0.2935, + "step": 3876 + }, + { + "epoch": 1.36, + "grad_norm": 3.7427735328674316, + "learning_rate": 0.00017493177871495905, + "loss": 0.2327, + "step": 3877 + }, + { + "epoch": 1.36, + "grad_norm": 5.786536693572998, + "learning_rate": 0.00017489456710493672, + "loss": 1.0866, + "step": 3878 + }, + { + "epoch": 1.36, + "grad_norm": 5.266422748565674, + "learning_rate": 0.0001748573554949144, + "loss": 1.0328, + "step": 3879 + }, + { + "epoch": 1.36, + "grad_norm": 1.993481993675232, + "learning_rate": 0.00017482014388489208, + "loss": 0.6055, + "step": 3880 + }, + { + "epoch": 1.36, + "grad_norm": 2.0876619815826416, + "learning_rate": 0.00017478293227486972, + "loss": 0.6218, + "step": 3881 + }, + { + "epoch": 1.36, + "grad_norm": 2.1874656677246094, + "learning_rate": 0.00017474572066484743, + "loss": 0.3442, + "step": 3882 + }, + { + "epoch": 1.36, + "grad_norm": 1.9037868976593018, + "learning_rate": 0.00017470850905482508, + "loss": 0.1977, + "step": 3883 + }, + { + "epoch": 1.36, + "grad_norm": 2.1550540924072266, + "learning_rate": 0.00017467129744480275, + "loss": 0.3056, + "step": 3884 + }, + { + "epoch": 1.36, + "grad_norm": 2.475119113922119, + "learning_rate": 0.00017463408583478045, + "loss": 0.4085, + "step": 3885 + }, + { + "epoch": 1.36, + "grad_norm": 2.8540899753570557, + "learning_rate": 0.0001745968742247581, + "loss": 0.2703, + "step": 3886 + }, + { + "epoch": 1.36, + "grad_norm": 1.23556649684906, + "learning_rate": 0.00017455966261473578, + "loss": 0.1271, + "step": 3887 + }, + { + "epoch": 1.36, + "grad_norm": 3.1399030685424805, + "learning_rate": 0.00017452245100471348, + "loss": 0.3749, + "step": 3888 + }, + { + "epoch": 1.36, + "grad_norm": 2.0435614585876465, + "learning_rate": 0.00017448523939469113, + "loss": 0.3553, + "step": 3889 + }, + { + "epoch": 1.36, + "grad_norm": 2.5815916061401367, + "learning_rate": 0.0001744480277846688, + "loss": 0.3135, + "step": 3890 + }, + { + "epoch": 1.36, + "grad_norm": 2.8995308876037598, + "learning_rate": 0.00017441081617464645, + "loss": 0.3687, + "step": 3891 + }, + { + "epoch": 1.36, + "grad_norm": 3.48101544380188, + "learning_rate": 0.00017437360456462416, + "loss": 0.5593, + "step": 3892 + }, + { + "epoch": 1.36, + "grad_norm": 2.921421766281128, + "learning_rate": 0.00017433639295460183, + "loss": 0.3693, + "step": 3893 + }, + { + "epoch": 1.36, + "grad_norm": 1.2923214435577393, + "learning_rate": 0.00017429918134457948, + "loss": 0.0802, + "step": 3894 + }, + { + "epoch": 1.36, + "grad_norm": 2.212675094604492, + "learning_rate": 0.00017426196973455719, + "loss": 0.3009, + "step": 3895 + }, + { + "epoch": 1.37, + "grad_norm": 4.238613128662109, + "learning_rate": 0.00017422475812453483, + "loss": 0.2686, + "step": 3896 + }, + { + "epoch": 1.37, + "grad_norm": 2.2763991355895996, + "learning_rate": 0.0001741875465145125, + "loss": 0.234, + "step": 3897 + }, + { + "epoch": 1.37, + "grad_norm": 2.234372615814209, + "learning_rate": 0.00017415033490449019, + "loss": 0.3564, + "step": 3898 + }, + { + "epoch": 1.37, + "grad_norm": 4.635880470275879, + "learning_rate": 0.00017411312329446786, + "loss": 0.1816, + "step": 3899 + }, + { + "epoch": 1.37, + "grad_norm": 2.752310037612915, + "learning_rate": 0.00017407591168444554, + "loss": 0.2585, + "step": 3900 + }, + { + "epoch": 1.37, + "eval_loss": 0.5356029272079468, + "eval_runtime": 51.7497, + "eval_samples_per_second": 41.894, + "eval_steps_per_second": 10.473, + "eval_wer": 0.4876318975955717, + "step": 3900 + }, + { + "epoch": 1.37, + "grad_norm": 2.9280731678009033, + "learning_rate": 0.0001740387000744232, + "loss": 0.4685, + "step": 3901 + }, + { + "epoch": 1.37, + "grad_norm": 2.367094039916992, + "learning_rate": 0.00017400148846440086, + "loss": 0.1438, + "step": 3902 + }, + { + "epoch": 1.37, + "grad_norm": 2.750795602798462, + "learning_rate": 0.00017396427685437856, + "loss": 0.3067, + "step": 3903 + }, + { + "epoch": 1.37, + "grad_norm": 2.7507309913635254, + "learning_rate": 0.0001739270652443562, + "loss": 0.4385, + "step": 3904 + }, + { + "epoch": 1.37, + "grad_norm": 1.9318255186080933, + "learning_rate": 0.0001738898536343339, + "loss": 0.5685, + "step": 3905 + }, + { + "epoch": 1.37, + "grad_norm": 1.8872981071472168, + "learning_rate": 0.0001738526420243116, + "loss": 0.5847, + "step": 3906 + }, + { + "epoch": 1.37, + "grad_norm": 1.6942192316055298, + "learning_rate": 0.00017381543041428924, + "loss": 0.3749, + "step": 3907 + }, + { + "epoch": 1.37, + "grad_norm": 1.610882043838501, + "learning_rate": 0.00017377821880426692, + "loss": 0.3559, + "step": 3908 + }, + { + "epoch": 1.37, + "grad_norm": 1.635751485824585, + "learning_rate": 0.00017374100719424456, + "loss": 0.1369, + "step": 3909 + }, + { + "epoch": 1.37, + "grad_norm": 2.099339723587036, + "learning_rate": 0.00017370379558422227, + "loss": 0.6477, + "step": 3910 + }, + { + "epoch": 1.37, + "grad_norm": 1.2585214376449585, + "learning_rate": 0.00017366658397419994, + "loss": 0.272, + "step": 3911 + }, + { + "epoch": 1.37, + "grad_norm": 3.2351205348968506, + "learning_rate": 0.0001736293723641776, + "loss": 0.5484, + "step": 3912 + }, + { + "epoch": 1.37, + "grad_norm": 2.1207034587860107, + "learning_rate": 0.0001735921607541553, + "loss": 0.3723, + "step": 3913 + }, + { + "epoch": 1.37, + "grad_norm": 2.07908034324646, + "learning_rate": 0.00017355494914413297, + "loss": 0.314, + "step": 3914 + }, + { + "epoch": 1.37, + "grad_norm": 2.0071959495544434, + "learning_rate": 0.00017351773753411062, + "loss": 0.6595, + "step": 3915 + }, + { + "epoch": 1.37, + "grad_norm": 2.149886131286621, + "learning_rate": 0.00017348052592408832, + "loss": 0.6534, + "step": 3916 + }, + { + "epoch": 1.37, + "grad_norm": 1.7911567687988281, + "learning_rate": 0.00017344331431406597, + "loss": 0.2316, + "step": 3917 + }, + { + "epoch": 1.37, + "grad_norm": 3.381746292114258, + "learning_rate": 0.00017340610270404365, + "loss": 1.408, + "step": 3918 + }, + { + "epoch": 1.37, + "grad_norm": 4.066563606262207, + "learning_rate": 0.00017336889109402132, + "loss": 0.5341, + "step": 3919 + }, + { + "epoch": 1.37, + "grad_norm": 2.2210659980773926, + "learning_rate": 0.000173331679483999, + "loss": 0.2397, + "step": 3920 + }, + { + "epoch": 1.37, + "grad_norm": 2.28897762298584, + "learning_rate": 0.00017329446787397667, + "loss": 0.277, + "step": 3921 + }, + { + "epoch": 1.37, + "grad_norm": 2.897461175918579, + "learning_rate": 0.00017325725626395432, + "loss": 0.5583, + "step": 3922 + }, + { + "epoch": 1.37, + "grad_norm": 2.140532970428467, + "learning_rate": 0.000173220044653932, + "loss": 0.4073, + "step": 3923 + }, + { + "epoch": 1.37, + "grad_norm": 3.024677276611328, + "learning_rate": 0.0001731828330439097, + "loss": 0.5048, + "step": 3924 + }, + { + "epoch": 1.38, + "grad_norm": 2.735630750656128, + "learning_rate": 0.00017314562143388735, + "loss": 0.374, + "step": 3925 + }, + { + "epoch": 1.38, + "grad_norm": 5.665718078613281, + "learning_rate": 0.00017310840982386503, + "loss": 0.7075, + "step": 3926 + }, + { + "epoch": 1.38, + "grad_norm": 4.23260498046875, + "learning_rate": 0.00017307119821384268, + "loss": 1.46, + "step": 3927 + }, + { + "epoch": 1.38, + "grad_norm": 1.9788540601730347, + "learning_rate": 0.00017303398660382038, + "loss": 0.3469, + "step": 3928 + }, + { + "epoch": 1.38, + "grad_norm": 1.4005393981933594, + "learning_rate": 0.00017299677499379805, + "loss": 0.1381, + "step": 3929 + }, + { + "epoch": 1.38, + "grad_norm": 1.7616915702819824, + "learning_rate": 0.0001729595633837757, + "loss": 0.7645, + "step": 3930 + }, + { + "epoch": 1.38, + "grad_norm": 2.4172654151916504, + "learning_rate": 0.0001729223517737534, + "loss": 0.5808, + "step": 3931 + }, + { + "epoch": 1.38, + "grad_norm": 1.8812202215194702, + "learning_rate": 0.00017288514016373108, + "loss": 0.3962, + "step": 3932 + }, + { + "epoch": 1.38, + "grad_norm": 2.0839879512786865, + "learning_rate": 0.00017284792855370873, + "loss": 0.3107, + "step": 3933 + }, + { + "epoch": 1.38, + "grad_norm": 1.7547316551208496, + "learning_rate": 0.00017281071694368643, + "loss": 0.4774, + "step": 3934 + }, + { + "epoch": 1.38, + "grad_norm": 1.7336246967315674, + "learning_rate": 0.00017277350533366408, + "loss": 0.297, + "step": 3935 + }, + { + "epoch": 1.38, + "grad_norm": 1.464845895767212, + "learning_rate": 0.00017273629372364176, + "loss": 0.2388, + "step": 3936 + }, + { + "epoch": 1.38, + "grad_norm": 1.2136765718460083, + "learning_rate": 0.00017269908211361946, + "loss": 0.0873, + "step": 3937 + }, + { + "epoch": 1.38, + "grad_norm": 1.4514065980911255, + "learning_rate": 0.0001726618705035971, + "loss": 0.1335, + "step": 3938 + }, + { + "epoch": 1.38, + "grad_norm": 3.2778918743133545, + "learning_rate": 0.00017262465889357478, + "loss": 0.3828, + "step": 3939 + }, + { + "epoch": 1.38, + "grad_norm": 1.513120174407959, + "learning_rate": 0.00017258744728355243, + "loss": 0.1283, + "step": 3940 + }, + { + "epoch": 1.38, + "grad_norm": 1.7113932371139526, + "learning_rate": 0.00017255023567353014, + "loss": 0.1633, + "step": 3941 + }, + { + "epoch": 1.38, + "grad_norm": 2.0901389122009277, + "learning_rate": 0.0001725130240635078, + "loss": 0.2138, + "step": 3942 + }, + { + "epoch": 1.38, + "grad_norm": 4.5583720207214355, + "learning_rate": 0.00017247581245348546, + "loss": 0.7136, + "step": 3943 + }, + { + "epoch": 1.38, + "grad_norm": 1.8743005990982056, + "learning_rate": 0.00017243860084346314, + "loss": 0.2668, + "step": 3944 + }, + { + "epoch": 1.38, + "grad_norm": 3.3022148609161377, + "learning_rate": 0.00017240138923344084, + "loss": 1.001, + "step": 3945 + }, + { + "epoch": 1.38, + "grad_norm": 3.1593058109283447, + "learning_rate": 0.0001723641776234185, + "loss": 0.358, + "step": 3946 + }, + { + "epoch": 1.38, + "grad_norm": 4.342871189117432, + "learning_rate": 0.00017232696601339616, + "loss": 0.8455, + "step": 3947 + }, + { + "epoch": 1.38, + "grad_norm": 4.121707439422607, + "learning_rate": 0.0001722897544033738, + "loss": 0.511, + "step": 3948 + }, + { + "epoch": 1.38, + "grad_norm": 2.737363815307617, + "learning_rate": 0.00017225254279335152, + "loss": 0.2577, + "step": 3949 + }, + { + "epoch": 1.38, + "grad_norm": 4.477611541748047, + "learning_rate": 0.0001722153311833292, + "loss": 1.0024, + "step": 3950 + }, + { + "epoch": 1.38, + "grad_norm": 2.959623336791992, + "learning_rate": 0.00017217811957330684, + "loss": 0.4979, + "step": 3951 + }, + { + "epoch": 1.38, + "grad_norm": 3.6896047592163086, + "learning_rate": 0.00017214090796328454, + "loss": 0.502, + "step": 3952 + }, + { + "epoch": 1.39, + "grad_norm": 2.5401012897491455, + "learning_rate": 0.0001721036963532622, + "loss": 0.4048, + "step": 3953 + }, + { + "epoch": 1.39, + "grad_norm": 2.559720039367676, + "learning_rate": 0.00017206648474323987, + "loss": 0.201, + "step": 3954 + }, + { + "epoch": 1.39, + "grad_norm": 1.9754453897476196, + "learning_rate": 0.00017202927313321757, + "loss": 0.6484, + "step": 3955 + }, + { + "epoch": 1.39, + "grad_norm": 1.5338460206985474, + "learning_rate": 0.00017199206152319522, + "loss": 0.3546, + "step": 3956 + }, + { + "epoch": 1.39, + "grad_norm": 1.5986213684082031, + "learning_rate": 0.0001719548499131729, + "loss": 0.3799, + "step": 3957 + }, + { + "epoch": 1.39, + "grad_norm": 2.644622802734375, + "learning_rate": 0.0001719176383031506, + "loss": 0.5743, + "step": 3958 + }, + { + "epoch": 1.39, + "grad_norm": 2.5025296211242676, + "learning_rate": 0.00017188042669312825, + "loss": 0.3493, + "step": 3959 + }, + { + "epoch": 1.39, + "grad_norm": 1.100515604019165, + "learning_rate": 0.00017184321508310592, + "loss": 0.0687, + "step": 3960 + }, + { + "epoch": 1.39, + "grad_norm": 1.8527333736419678, + "learning_rate": 0.00017180600347308357, + "loss": 0.172, + "step": 3961 + }, + { + "epoch": 1.39, + "grad_norm": 2.1646475791931152, + "learning_rate": 0.00017176879186306127, + "loss": 0.3352, + "step": 3962 + }, + { + "epoch": 1.39, + "grad_norm": 1.9406778812408447, + "learning_rate": 0.00017173158025303895, + "loss": 0.2927, + "step": 3963 + }, + { + "epoch": 1.39, + "grad_norm": 1.5747565031051636, + "learning_rate": 0.0001716943686430166, + "loss": 0.2809, + "step": 3964 + }, + { + "epoch": 1.39, + "grad_norm": 2.524975299835205, + "learning_rate": 0.00017165715703299427, + "loss": 0.3335, + "step": 3965 + }, + { + "epoch": 1.39, + "grad_norm": 4.1112823486328125, + "learning_rate": 0.00017161994542297195, + "loss": 0.3868, + "step": 3966 + }, + { + "epoch": 1.39, + "grad_norm": 2.326080799102783, + "learning_rate": 0.00017158273381294963, + "loss": 0.4065, + "step": 3967 + }, + { + "epoch": 1.39, + "grad_norm": 1.6009414196014404, + "learning_rate": 0.0001715455222029273, + "loss": 0.1864, + "step": 3968 + }, + { + "epoch": 1.39, + "grad_norm": 14.051066398620605, + "learning_rate": 0.00017150831059290495, + "loss": 2.8598, + "step": 3969 + }, + { + "epoch": 1.39, + "grad_norm": 3.3767621517181396, + "learning_rate": 0.00017147109898288265, + "loss": 0.192, + "step": 3970 + }, + { + "epoch": 1.39, + "grad_norm": 4.84522008895874, + "learning_rate": 0.0001714338873728603, + "loss": 0.5361, + "step": 3971 + }, + { + "epoch": 1.39, + "grad_norm": 3.2274253368377686, + "learning_rate": 0.00017139667576283798, + "loss": 0.5027, + "step": 3972 + }, + { + "epoch": 1.39, + "grad_norm": 1.2869607210159302, + "learning_rate": 0.00017135946415281568, + "loss": 0.1095, + "step": 3973 + }, + { + "epoch": 1.39, + "grad_norm": 2.0447194576263428, + "learning_rate": 0.00017132225254279333, + "loss": 0.2067, + "step": 3974 + }, + { + "epoch": 1.39, + "grad_norm": 4.841538906097412, + "learning_rate": 0.000171285040932771, + "loss": 0.6107, + "step": 3975 + }, + { + "epoch": 1.39, + "grad_norm": 1.8944703340530396, + "learning_rate": 0.0001712478293227487, + "loss": 0.1473, + "step": 3976 + }, + { + "epoch": 1.39, + "grad_norm": 4.2880659103393555, + "learning_rate": 0.00017121061771272636, + "loss": 1.5288, + "step": 3977 + }, + { + "epoch": 1.39, + "grad_norm": 3.7833964824676514, + "learning_rate": 0.00017117340610270403, + "loss": 0.4495, + "step": 3978 + }, + { + "epoch": 1.39, + "grad_norm": 5.308502674102783, + "learning_rate": 0.00017113619449268168, + "loss": 1.1916, + "step": 3979 + }, + { + "epoch": 1.39, + "grad_norm": 1.6576392650604248, + "learning_rate": 0.00017109898288265938, + "loss": 0.5451, + "step": 3980 + }, + { + "epoch": 1.39, + "grad_norm": 1.7152270078659058, + "learning_rate": 0.00017106177127263706, + "loss": 0.4996, + "step": 3981 + }, + { + "epoch": 1.4, + "grad_norm": 2.689882278442383, + "learning_rate": 0.0001710245596626147, + "loss": 0.8473, + "step": 3982 + }, + { + "epoch": 1.4, + "grad_norm": 2.522495746612549, + "learning_rate": 0.0001709873480525924, + "loss": 0.6978, + "step": 3983 + }, + { + "epoch": 1.4, + "grad_norm": 1.8638933897018433, + "learning_rate": 0.00017095013644257006, + "loss": 0.3669, + "step": 3984 + }, + { + "epoch": 1.4, + "grad_norm": 1.6137725114822388, + "learning_rate": 0.00017091292483254774, + "loss": 0.2279, + "step": 3985 + }, + { + "epoch": 1.4, + "grad_norm": 3.917705774307251, + "learning_rate": 0.0001708757132225254, + "loss": 1.6021, + "step": 3986 + }, + { + "epoch": 1.4, + "grad_norm": 3.742889165878296, + "learning_rate": 0.0001708385016125031, + "loss": 0.6855, + "step": 3987 + }, + { + "epoch": 1.4, + "grad_norm": 2.922379493713379, + "learning_rate": 0.00017080129000248076, + "loss": 0.2816, + "step": 3988 + }, + { + "epoch": 1.4, + "grad_norm": 1.8310586214065552, + "learning_rate": 0.00017076407839245844, + "loss": 0.3215, + "step": 3989 + }, + { + "epoch": 1.4, + "grad_norm": 1.7077147960662842, + "learning_rate": 0.0001707268667824361, + "loss": 0.4342, + "step": 3990 + }, + { + "epoch": 1.4, + "grad_norm": 1.127012014389038, + "learning_rate": 0.0001706896551724138, + "loss": 0.1672, + "step": 3991 + }, + { + "epoch": 1.4, + "grad_norm": 2.1023213863372803, + "learning_rate": 0.00017065244356239144, + "loss": 0.4928, + "step": 3992 + }, + { + "epoch": 1.4, + "grad_norm": 1.650435209274292, + "learning_rate": 0.00017061523195236911, + "loss": 0.3392, + "step": 3993 + }, + { + "epoch": 1.4, + "grad_norm": 1.628867268562317, + "learning_rate": 0.00017057802034234682, + "loss": 0.1326, + "step": 3994 + }, + { + "epoch": 1.4, + "grad_norm": 3.0410878658294678, + "learning_rate": 0.00017054080873232447, + "loss": 0.3564, + "step": 3995 + }, + { + "epoch": 1.4, + "grad_norm": 4.636041164398193, + "learning_rate": 0.00017050359712230214, + "loss": 0.5428, + "step": 3996 + }, + { + "epoch": 1.4, + "grad_norm": 2.0328681468963623, + "learning_rate": 0.0001704663855122798, + "loss": 0.1978, + "step": 3997 + }, + { + "epoch": 1.4, + "grad_norm": 2.5405378341674805, + "learning_rate": 0.0001704291739022575, + "loss": 0.341, + "step": 3998 + }, + { + "epoch": 1.4, + "grad_norm": 2.484320640563965, + "learning_rate": 0.00017039196229223517, + "loss": 0.3324, + "step": 3999 + }, + { + "epoch": 1.4, + "grad_norm": 2.7092721462249756, + "learning_rate": 0.00017035475068221282, + "loss": 0.3742, + "step": 4000 + }, + { + "epoch": 1.4, + "eval_loss": 0.4177427291870117, + "eval_runtime": 50.8405, + "eval_samples_per_second": 42.643, + "eval_steps_per_second": 10.661, + "eval_wer": 0.3851409790693652, + "step": 4000 + }, + { + "epoch": 1.4, + "grad_norm": 1.0885396003723145, + "learning_rate": 0.00017031753907219052, + "loss": 0.0431, + "step": 4001 + }, + { + "epoch": 1.4, + "grad_norm": 3.04752254486084, + "learning_rate": 0.0001702803274621682, + "loss": 0.2688, + "step": 4002 + }, + { + "epoch": 1.4, + "grad_norm": 2.453718900680542, + "learning_rate": 0.00017024311585214585, + "loss": 0.1588, + "step": 4003 + }, + { + "epoch": 1.4, + "grad_norm": 2.611881732940674, + "learning_rate": 0.00017020590424212355, + "loss": 0.1296, + "step": 4004 + }, + { + "epoch": 1.4, + "grad_norm": 1.520546555519104, + "learning_rate": 0.0001701686926321012, + "loss": 0.4584, + "step": 4005 + }, + { + "epoch": 1.4, + "grad_norm": 1.637783408164978, + "learning_rate": 0.00017013148102207887, + "loss": 0.4303, + "step": 4006 + }, + { + "epoch": 1.4, + "grad_norm": 1.7446013689041138, + "learning_rate": 0.00017009426941205655, + "loss": 0.2818, + "step": 4007 + }, + { + "epoch": 1.4, + "grad_norm": 1.9278056621551514, + "learning_rate": 0.00017005705780203422, + "loss": 0.1499, + "step": 4008 + }, + { + "epoch": 1.4, + "grad_norm": 1.8733441829681396, + "learning_rate": 0.0001700198461920119, + "loss": 0.3069, + "step": 4009 + }, + { + "epoch": 1.41, + "grad_norm": 1.9108270406723022, + "learning_rate": 0.00016998263458198955, + "loss": 0.3054, + "step": 4010 + }, + { + "epoch": 1.41, + "grad_norm": 1.8484750986099243, + "learning_rate": 0.00016994542297196722, + "loss": 0.2867, + "step": 4011 + }, + { + "epoch": 1.41, + "grad_norm": 0.9013972878456116, + "learning_rate": 0.00016990821136194493, + "loss": 0.129, + "step": 4012 + }, + { + "epoch": 1.41, + "grad_norm": 1.9117753505706787, + "learning_rate": 0.00016987099975192258, + "loss": 0.2353, + "step": 4013 + }, + { + "epoch": 1.41, + "grad_norm": 1.5956077575683594, + "learning_rate": 0.00016983378814190025, + "loss": 0.1636, + "step": 4014 + }, + { + "epoch": 1.41, + "grad_norm": 3.0155153274536133, + "learning_rate": 0.00016979657653187793, + "loss": 1.5728, + "step": 4015 + }, + { + "epoch": 1.41, + "grad_norm": 1.7413406372070312, + "learning_rate": 0.0001697593649218556, + "loss": 0.2977, + "step": 4016 + }, + { + "epoch": 1.41, + "grad_norm": 3.3247792720794678, + "learning_rate": 0.00016972215331183328, + "loss": 0.3402, + "step": 4017 + }, + { + "epoch": 1.41, + "grad_norm": 1.7041586637496948, + "learning_rate": 0.00016968494170181093, + "loss": 0.1643, + "step": 4018 + }, + { + "epoch": 1.41, + "grad_norm": 2.3175644874572754, + "learning_rate": 0.00016964773009178863, + "loss": 0.2752, + "step": 4019 + }, + { + "epoch": 1.41, + "grad_norm": 2.561286211013794, + "learning_rate": 0.0001696105184817663, + "loss": 0.3803, + "step": 4020 + }, + { + "epoch": 1.41, + "grad_norm": 1.3216911554336548, + "learning_rate": 0.00016957330687174396, + "loss": 0.1224, + "step": 4021 + }, + { + "epoch": 1.41, + "grad_norm": 2.120414972305298, + "learning_rate": 0.00016953609526172166, + "loss": 0.2079, + "step": 4022 + }, + { + "epoch": 1.41, + "grad_norm": 2.5523006916046143, + "learning_rate": 0.0001694988836516993, + "loss": 0.3881, + "step": 4023 + }, + { + "epoch": 1.41, + "grad_norm": 2.9489386081695557, + "learning_rate": 0.00016946167204167698, + "loss": 0.3729, + "step": 4024 + }, + { + "epoch": 1.41, + "grad_norm": 2.491358757019043, + "learning_rate": 0.00016942446043165469, + "loss": 0.4037, + "step": 4025 + }, + { + "epoch": 1.41, + "grad_norm": 0.5692845582962036, + "learning_rate": 0.00016938724882163233, + "loss": 0.044, + "step": 4026 + }, + { + "epoch": 1.41, + "grad_norm": 5.157808780670166, + "learning_rate": 0.00016935003721161, + "loss": 0.2437, + "step": 4027 + }, + { + "epoch": 1.41, + "grad_norm": 3.8554892539978027, + "learning_rate": 0.00016931282560158766, + "loss": 1.2543, + "step": 4028 + }, + { + "epoch": 1.41, + "grad_norm": 4.396365642547607, + "learning_rate": 0.00016927561399156536, + "loss": 0.8959, + "step": 4029 + }, + { + "epoch": 1.41, + "grad_norm": 5.370511054992676, + "learning_rate": 0.00016923840238154304, + "loss": 1.3415, + "step": 4030 + }, + { + "epoch": 1.41, + "grad_norm": 2.6273608207702637, + "learning_rate": 0.0001692011907715207, + "loss": 0.7875, + "step": 4031 + }, + { + "epoch": 1.41, + "grad_norm": 1.6899088621139526, + "learning_rate": 0.00016916397916149836, + "loss": 0.4165, + "step": 4032 + }, + { + "epoch": 1.41, + "grad_norm": 2.6857593059539795, + "learning_rate": 0.00016912676755147607, + "loss": 0.552, + "step": 4033 + }, + { + "epoch": 1.41, + "grad_norm": 3.3171510696411133, + "learning_rate": 0.00016908955594145371, + "loss": 0.2746, + "step": 4034 + }, + { + "epoch": 1.41, + "grad_norm": 3.1243948936462402, + "learning_rate": 0.0001690523443314314, + "loss": 1.4537, + "step": 4035 + }, + { + "epoch": 1.41, + "grad_norm": 2.551290273666382, + "learning_rate": 0.00016901513272140907, + "loss": 0.7787, + "step": 4036 + }, + { + "epoch": 1.41, + "grad_norm": 1.3610817193984985, + "learning_rate": 0.00016897792111138674, + "loss": 0.2018, + "step": 4037 + }, + { + "epoch": 1.41, + "grad_norm": 2.2377007007598877, + "learning_rate": 0.00016894070950136442, + "loss": 0.6747, + "step": 4038 + }, + { + "epoch": 1.42, + "grad_norm": 3.3656833171844482, + "learning_rate": 0.00016890349789134207, + "loss": 1.3106, + "step": 4039 + }, + { + "epoch": 1.42, + "grad_norm": 1.9152419567108154, + "learning_rate": 0.00016886628628131977, + "loss": 0.4056, + "step": 4040 + }, + { + "epoch": 1.42, + "grad_norm": 2.9426541328430176, + "learning_rate": 0.00016882907467129742, + "loss": 0.7818, + "step": 4041 + }, + { + "epoch": 1.42, + "grad_norm": 1.2641545534133911, + "learning_rate": 0.0001687918630612751, + "loss": 0.218, + "step": 4042 + }, + { + "epoch": 1.42, + "grad_norm": 1.596787691116333, + "learning_rate": 0.0001687546514512528, + "loss": 0.2985, + "step": 4043 + }, + { + "epoch": 1.42, + "grad_norm": 1.177599549293518, + "learning_rate": 0.00016871743984123044, + "loss": 0.1941, + "step": 4044 + }, + { + "epoch": 1.42, + "grad_norm": 2.4323856830596924, + "learning_rate": 0.00016868022823120812, + "loss": 0.4303, + "step": 4045 + }, + { + "epoch": 1.42, + "grad_norm": 1.3808494806289673, + "learning_rate": 0.00016864301662118582, + "loss": 0.1963, + "step": 4046 + }, + { + "epoch": 1.42, + "grad_norm": 3.3002772331237793, + "learning_rate": 0.00016860580501116347, + "loss": 0.4449, + "step": 4047 + }, + { + "epoch": 1.42, + "grad_norm": 3.7071423530578613, + "learning_rate": 0.00016856859340114115, + "loss": 1.0698, + "step": 4048 + }, + { + "epoch": 1.42, + "grad_norm": 3.5901901721954346, + "learning_rate": 0.0001685313817911188, + "loss": 0.4192, + "step": 4049 + }, + { + "epoch": 1.42, + "grad_norm": 1.7595155239105225, + "learning_rate": 0.0001684941701810965, + "loss": 0.1192, + "step": 4050 + }, + { + "epoch": 1.42, + "grad_norm": 2.3339569568634033, + "learning_rate": 0.00016845695857107418, + "loss": 0.2746, + "step": 4051 + }, + { + "epoch": 1.42, + "grad_norm": 2.930159568786621, + "learning_rate": 0.00016841974696105182, + "loss": 0.6843, + "step": 4052 + }, + { + "epoch": 1.42, + "grad_norm": 0.9335921406745911, + "learning_rate": 0.0001683825353510295, + "loss": 0.0758, + "step": 4053 + }, + { + "epoch": 1.42, + "grad_norm": 9.361223220825195, + "learning_rate": 0.00016834532374100718, + "loss": 0.8905, + "step": 4054 + }, + { + "epoch": 1.42, + "grad_norm": 2.8475027084350586, + "learning_rate": 0.00016830811213098485, + "loss": 0.7053, + "step": 4055 + }, + { + "epoch": 1.42, + "grad_norm": 3.661919593811035, + "learning_rate": 0.00016827090052096253, + "loss": 0.4094, + "step": 4056 + }, + { + "epoch": 1.42, + "grad_norm": 1.8695873022079468, + "learning_rate": 0.0001682336889109402, + "loss": 0.1678, + "step": 4057 + }, + { + "epoch": 1.42, + "grad_norm": 2.414778232574463, + "learning_rate": 0.00016819647730091788, + "loss": 0.5948, + "step": 4058 + }, + { + "epoch": 1.42, + "grad_norm": 2.0762922763824463, + "learning_rate": 0.00016815926569089553, + "loss": 0.2875, + "step": 4059 + }, + { + "epoch": 1.42, + "grad_norm": 2.1993393898010254, + "learning_rate": 0.0001681220540808732, + "loss": 0.5876, + "step": 4060 + }, + { + "epoch": 1.42, + "grad_norm": 3.719446897506714, + "learning_rate": 0.0001680848424708509, + "loss": 0.5155, + "step": 4061 + }, + { + "epoch": 1.42, + "grad_norm": 3.0953850746154785, + "learning_rate": 0.00016804763086082855, + "loss": 0.4639, + "step": 4062 + }, + { + "epoch": 1.42, + "grad_norm": 2.066967487335205, + "learning_rate": 0.00016801041925080623, + "loss": 0.3151, + "step": 4063 + }, + { + "epoch": 1.42, + "grad_norm": 2.6755409240722656, + "learning_rate": 0.00016797320764078393, + "loss": 0.4324, + "step": 4064 + }, + { + "epoch": 1.42, + "grad_norm": 1.722860336303711, + "learning_rate": 0.00016793599603076158, + "loss": 0.3173, + "step": 4065 + }, + { + "epoch": 1.42, + "grad_norm": 3.7338409423828125, + "learning_rate": 0.00016789878442073926, + "loss": 0.8751, + "step": 4066 + }, + { + "epoch": 1.43, + "grad_norm": 1.5438848733901978, + "learning_rate": 0.0001678615728107169, + "loss": 0.164, + "step": 4067 + }, + { + "epoch": 1.43, + "grad_norm": 2.704420328140259, + "learning_rate": 0.0001678243612006946, + "loss": 0.2626, + "step": 4068 + }, + { + "epoch": 1.43, + "grad_norm": 3.400644063949585, + "learning_rate": 0.00016778714959067229, + "loss": 0.5246, + "step": 4069 + }, + { + "epoch": 1.43, + "grad_norm": 2.035137414932251, + "learning_rate": 0.00016774993798064993, + "loss": 0.3621, + "step": 4070 + }, + { + "epoch": 1.43, + "grad_norm": 3.7264976501464844, + "learning_rate": 0.00016771272637062764, + "loss": 0.6366, + "step": 4071 + }, + { + "epoch": 1.43, + "grad_norm": 2.768700361251831, + "learning_rate": 0.00016767551476060529, + "loss": 0.8278, + "step": 4072 + }, + { + "epoch": 1.43, + "grad_norm": 2.690241813659668, + "learning_rate": 0.00016763830315058296, + "loss": 0.3648, + "step": 4073 + }, + { + "epoch": 1.43, + "grad_norm": 5.137421607971191, + "learning_rate": 0.00016760109154056064, + "loss": 0.3702, + "step": 4074 + }, + { + "epoch": 1.43, + "grad_norm": 0.9374441504478455, + "learning_rate": 0.0001675638799305383, + "loss": 0.0988, + "step": 4075 + }, + { + "epoch": 1.43, + "grad_norm": 1.983991265296936, + "learning_rate": 0.000167526668320516, + "loss": 0.1474, + "step": 4076 + }, + { + "epoch": 1.43, + "grad_norm": 1.5250822305679321, + "learning_rate": 0.00016748945671049366, + "loss": 0.1217, + "step": 4077 + }, + { + "epoch": 1.43, + "grad_norm": 1.7774677276611328, + "learning_rate": 0.00016745224510047134, + "loss": 0.1447, + "step": 4078 + }, + { + "epoch": 1.43, + "grad_norm": 6.5276994705200195, + "learning_rate": 0.00016741503349044902, + "loss": 0.3185, + "step": 4079 + }, + { + "epoch": 1.43, + "grad_norm": 2.6374850273132324, + "learning_rate": 0.00016737782188042666, + "loss": 1.071, + "step": 4080 + }, + { + "epoch": 1.43, + "grad_norm": 1.5644630193710327, + "learning_rate": 0.00016734061027040434, + "loss": 0.4552, + "step": 4081 + }, + { + "epoch": 1.43, + "grad_norm": 2.4700703620910645, + "learning_rate": 0.00016730339866038204, + "loss": 0.6185, + "step": 4082 + }, + { + "epoch": 1.43, + "grad_norm": 1.9310435056686401, + "learning_rate": 0.0001672661870503597, + "loss": 0.3552, + "step": 4083 + }, + { + "epoch": 1.43, + "grad_norm": 1.529681921005249, + "learning_rate": 0.00016722897544033737, + "loss": 0.2221, + "step": 4084 + }, + { + "epoch": 1.43, + "grad_norm": 2.560516357421875, + "learning_rate": 0.00016719176383031502, + "loss": 0.2798, + "step": 4085 + }, + { + "epoch": 1.43, + "grad_norm": 2.354257106781006, + "learning_rate": 0.00016715455222029272, + "loss": 0.2105, + "step": 4086 + }, + { + "epoch": 1.43, + "grad_norm": 2.342054843902588, + "learning_rate": 0.0001671173406102704, + "loss": 0.2796, + "step": 4087 + }, + { + "epoch": 1.43, + "grad_norm": 1.957754135131836, + "learning_rate": 0.00016708012900024804, + "loss": 0.2997, + "step": 4088 + }, + { + "epoch": 1.43, + "grad_norm": 2.821141004562378, + "learning_rate": 0.00016704291739022575, + "loss": 0.7008, + "step": 4089 + }, + { + "epoch": 1.43, + "grad_norm": 12.810015678405762, + "learning_rate": 0.00016700570578020342, + "loss": 3.4665, + "step": 4090 + }, + { + "epoch": 1.43, + "grad_norm": 2.314800500869751, + "learning_rate": 0.00016696849417018107, + "loss": 0.689, + "step": 4091 + }, + { + "epoch": 1.43, + "grad_norm": 3.096869468688965, + "learning_rate": 0.00016693128256015877, + "loss": 0.7017, + "step": 4092 + }, + { + "epoch": 1.43, + "grad_norm": 3.2854676246643066, + "learning_rate": 0.00016689407095013642, + "loss": 0.5166, + "step": 4093 + }, + { + "epoch": 1.43, + "grad_norm": 1.9120439291000366, + "learning_rate": 0.0001668568593401141, + "loss": 0.2128, + "step": 4094 + }, + { + "epoch": 1.43, + "grad_norm": 2.307483434677124, + "learning_rate": 0.00016681964773009177, + "loss": 0.2255, + "step": 4095 + }, + { + "epoch": 1.44, + "grad_norm": 3.0140573978424072, + "learning_rate": 0.00016678243612006945, + "loss": 0.3603, + "step": 4096 + }, + { + "epoch": 1.44, + "grad_norm": 1.1581931114196777, + "learning_rate": 0.00016674522451004713, + "loss": 0.1719, + "step": 4097 + }, + { + "epoch": 1.44, + "grad_norm": 1.792546033859253, + "learning_rate": 0.00016670801290002478, + "loss": 0.1403, + "step": 4098 + }, + { + "epoch": 1.44, + "grad_norm": 1.0819088220596313, + "learning_rate": 0.00016667080129000248, + "loss": 0.0878, + "step": 4099 + }, + { + "epoch": 1.44, + "grad_norm": 5.966635227203369, + "learning_rate": 0.00016663358967998015, + "loss": 0.6863, + "step": 4100 + }, + { + "epoch": 1.44, + "eval_loss": 0.4655866026878357, + "eval_runtime": 51.618, + "eval_samples_per_second": 42.001, + "eval_steps_per_second": 10.5, + "eval_wer": 0.4545061408060889, + "step": 4100 + }, + { + "epoch": 1.44, + "grad_norm": 1.8284623622894287, + "learning_rate": 0.0001665963780699578, + "loss": 0.0954, + "step": 4101 + }, + { + "epoch": 1.44, + "grad_norm": 1.2109270095825195, + "learning_rate": 0.00016655916645993548, + "loss": 0.0594, + "step": 4102 + }, + { + "epoch": 1.44, + "grad_norm": 2.24116587638855, + "learning_rate": 0.00016652195484991315, + "loss": 0.2425, + "step": 4103 + }, + { + "epoch": 1.44, + "grad_norm": 6.5993475914001465, + "learning_rate": 0.00016648474323989083, + "loss": 1.3286, + "step": 4104 + }, + { + "epoch": 1.44, + "grad_norm": 2.824436902999878, + "learning_rate": 0.0001664475316298685, + "loss": 0.8653, + "step": 4105 + }, + { + "epoch": 1.44, + "grad_norm": 2.3655171394348145, + "learning_rate": 0.00016641032001984615, + "loss": 0.6443, + "step": 4106 + }, + { + "epoch": 1.44, + "grad_norm": 2.7993006706237793, + "learning_rate": 0.00016637310840982386, + "loss": 0.4293, + "step": 4107 + }, + { + "epoch": 1.44, + "grad_norm": 1.109576940536499, + "learning_rate": 0.00016633589679980153, + "loss": 0.3577, + "step": 4108 + }, + { + "epoch": 1.44, + "grad_norm": 1.4774690866470337, + "learning_rate": 0.00016629868518977918, + "loss": 0.2693, + "step": 4109 + }, + { + "epoch": 1.44, + "grad_norm": 1.442692518234253, + "learning_rate": 0.00016626147357975688, + "loss": 0.3693, + "step": 4110 + }, + { + "epoch": 1.44, + "grad_norm": 1.9737093448638916, + "learning_rate": 0.00016622426196973453, + "loss": 0.2922, + "step": 4111 + }, + { + "epoch": 1.44, + "grad_norm": 3.433197498321533, + "learning_rate": 0.0001661870503597122, + "loss": 0.2324, + "step": 4112 + }, + { + "epoch": 1.44, + "grad_norm": 1.9645071029663086, + "learning_rate": 0.0001661498387496899, + "loss": 0.2494, + "step": 4113 + }, + { + "epoch": 1.44, + "grad_norm": 2.4682822227478027, + "learning_rate": 0.00016611262713966756, + "loss": 0.3718, + "step": 4114 + }, + { + "epoch": 1.44, + "grad_norm": 1.6847212314605713, + "learning_rate": 0.00016607541552964524, + "loss": 0.315, + "step": 4115 + }, + { + "epoch": 1.44, + "grad_norm": 1.7860573530197144, + "learning_rate": 0.00016603820391962289, + "loss": 0.4285, + "step": 4116 + }, + { + "epoch": 1.44, + "grad_norm": 2.275067090988159, + "learning_rate": 0.0001660009923096006, + "loss": 0.2831, + "step": 4117 + }, + { + "epoch": 1.44, + "grad_norm": 2.7724900245666504, + "learning_rate": 0.00016596378069957826, + "loss": 0.2465, + "step": 4118 + }, + { + "epoch": 1.44, + "grad_norm": 2.4586076736450195, + "learning_rate": 0.0001659265690895559, + "loss": 0.2474, + "step": 4119 + }, + { + "epoch": 1.44, + "grad_norm": 2.156881332397461, + "learning_rate": 0.00016588935747953362, + "loss": 0.1468, + "step": 4120 + }, + { + "epoch": 1.44, + "grad_norm": 2.4893946647644043, + "learning_rate": 0.0001658521458695113, + "loss": 0.2606, + "step": 4121 + }, + { + "epoch": 1.44, + "grad_norm": 1.4689332246780396, + "learning_rate": 0.00016581493425948894, + "loss": 0.0906, + "step": 4122 + }, + { + "epoch": 1.44, + "grad_norm": 4.48578405380249, + "learning_rate": 0.00016577772264946662, + "loss": 1.7996, + "step": 4123 + }, + { + "epoch": 1.44, + "grad_norm": 1.9185426235198975, + "learning_rate": 0.0001657405110394443, + "loss": 0.1204, + "step": 4124 + }, + { + "epoch": 1.45, + "grad_norm": 3.5373518466949463, + "learning_rate": 0.00016570329942942197, + "loss": 0.6004, + "step": 4125 + }, + { + "epoch": 1.45, + "grad_norm": 0.8519726991653442, + "learning_rate": 0.00016566608781939964, + "loss": 0.0576, + "step": 4126 + }, + { + "epoch": 1.45, + "grad_norm": 1.851152777671814, + "learning_rate": 0.0001656288762093773, + "loss": 0.2595, + "step": 4127 + }, + { + "epoch": 1.45, + "grad_norm": 1.1237139701843262, + "learning_rate": 0.000165591664599355, + "loss": 0.0662, + "step": 4128 + }, + { + "epoch": 1.45, + "grad_norm": 1.562714695930481, + "learning_rate": 0.00016555445298933264, + "loss": 0.1051, + "step": 4129 + }, + { + "epoch": 1.45, + "grad_norm": 1.522381067276001, + "learning_rate": 0.00016551724137931032, + "loss": 0.4156, + "step": 4130 + }, + { + "epoch": 1.45, + "grad_norm": 1.2748173475265503, + "learning_rate": 0.00016548002976928802, + "loss": 0.2003, + "step": 4131 + }, + { + "epoch": 1.45, + "grad_norm": 2.538360118865967, + "learning_rate": 0.00016544281815926567, + "loss": 0.963, + "step": 4132 + }, + { + "epoch": 1.45, + "grad_norm": 2.971334934234619, + "learning_rate": 0.00016540560654924335, + "loss": 0.6725, + "step": 4133 + }, + { + "epoch": 1.45, + "grad_norm": 2.554715633392334, + "learning_rate": 0.00016536839493922105, + "loss": 0.8091, + "step": 4134 + }, + { + "epoch": 1.45, + "grad_norm": 1.7067142724990845, + "learning_rate": 0.0001653311833291987, + "loss": 0.4307, + "step": 4135 + }, + { + "epoch": 1.45, + "grad_norm": 2.621626377105713, + "learning_rate": 0.00016529397171917637, + "loss": 0.2681, + "step": 4136 + }, + { + "epoch": 1.45, + "grad_norm": 4.470788478851318, + "learning_rate": 0.00016525676010915402, + "loss": 0.9887, + "step": 4137 + }, + { + "epoch": 1.45, + "grad_norm": 1.222482442855835, + "learning_rate": 0.00016521954849913173, + "loss": 0.1786, + "step": 4138 + }, + { + "epoch": 1.45, + "grad_norm": 2.5366249084472656, + "learning_rate": 0.0001651823368891094, + "loss": 0.4021, + "step": 4139 + }, + { + "epoch": 1.45, + "grad_norm": 1.220999836921692, + "learning_rate": 0.00016514512527908705, + "loss": 0.1173, + "step": 4140 + }, + { + "epoch": 1.45, + "grad_norm": 1.6525065898895264, + "learning_rate": 0.00016510791366906475, + "loss": 0.249, + "step": 4141 + }, + { + "epoch": 1.45, + "grad_norm": 3.0360629558563232, + "learning_rate": 0.0001650707020590424, + "loss": 1.3031, + "step": 4142 + }, + { + "epoch": 1.45, + "grad_norm": 2.506528854370117, + "learning_rate": 0.00016503349044902008, + "loss": 0.3216, + "step": 4143 + }, + { + "epoch": 1.45, + "grad_norm": 2.794973611831665, + "learning_rate": 0.00016499627883899775, + "loss": 0.3352, + "step": 4144 + }, + { + "epoch": 1.45, + "grad_norm": 1.864272117614746, + "learning_rate": 0.00016495906722897543, + "loss": 0.1704, + "step": 4145 + }, + { + "epoch": 1.45, + "grad_norm": 0.7695359587669373, + "learning_rate": 0.0001649218556189531, + "loss": 0.0561, + "step": 4146 + }, + { + "epoch": 1.45, + "grad_norm": 1.7436617612838745, + "learning_rate": 0.00016488464400893078, + "loss": 0.3065, + "step": 4147 + }, + { + "epoch": 1.45, + "grad_norm": 1.8266197443008423, + "learning_rate": 0.00016484743239890843, + "loss": 0.1391, + "step": 4148 + }, + { + "epoch": 1.45, + "grad_norm": 4.190832614898682, + "learning_rate": 0.00016481022078888613, + "loss": 0.4169, + "step": 4149 + }, + { + "epoch": 1.45, + "grad_norm": 3.211941719055176, + "learning_rate": 0.00016477300917886378, + "loss": 0.2127, + "step": 4150 + }, + { + "epoch": 1.45, + "grad_norm": 3.884977340698242, + "learning_rate": 0.00016473579756884146, + "loss": 1.0903, + "step": 4151 + }, + { + "epoch": 1.45, + "grad_norm": 2.969237804412842, + "learning_rate": 0.00016469858595881916, + "loss": 1.1619, + "step": 4152 + }, + { + "epoch": 1.46, + "grad_norm": 4.25085973739624, + "learning_rate": 0.0001646613743487968, + "loss": 0.3401, + "step": 4153 + }, + { + "epoch": 1.46, + "grad_norm": 1.1101423501968384, + "learning_rate": 0.00016462416273877448, + "loss": 0.0716, + "step": 4154 + }, + { + "epoch": 1.46, + "grad_norm": 1.9234617948532104, + "learning_rate": 0.00016458695112875213, + "loss": 0.8407, + "step": 4155 + }, + { + "epoch": 1.46, + "grad_norm": 2.631096601486206, + "learning_rate": 0.00016454973951872984, + "loss": 0.7553, + "step": 4156 + }, + { + "epoch": 1.46, + "grad_norm": 1.5557128190994263, + "learning_rate": 0.0001645125279087075, + "loss": 0.4707, + "step": 4157 + }, + { + "epoch": 1.46, + "grad_norm": 1.641202449798584, + "learning_rate": 0.00016447531629868516, + "loss": 0.3575, + "step": 4158 + }, + { + "epoch": 1.46, + "grad_norm": 2.3638205528259277, + "learning_rate": 0.00016443810468866286, + "loss": 0.3766, + "step": 4159 + }, + { + "epoch": 1.46, + "grad_norm": 2.019489288330078, + "learning_rate": 0.0001644008930786405, + "loss": 0.412, + "step": 4160 + }, + { + "epoch": 1.46, + "grad_norm": 1.5674275159835815, + "learning_rate": 0.0001643636814686182, + "loss": 0.1787, + "step": 4161 + }, + { + "epoch": 1.46, + "grad_norm": 1.2668960094451904, + "learning_rate": 0.0001643264698585959, + "loss": 0.1301, + "step": 4162 + }, + { + "epoch": 1.46, + "grad_norm": 1.8856401443481445, + "learning_rate": 0.00016428925824857354, + "loss": 0.3409, + "step": 4163 + }, + { + "epoch": 1.46, + "grad_norm": 2.119640350341797, + "learning_rate": 0.00016425204663855121, + "loss": 0.2735, + "step": 4164 + }, + { + "epoch": 1.46, + "grad_norm": 3.6939704418182373, + "learning_rate": 0.0001642148350285289, + "loss": 0.4351, + "step": 4165 + }, + { + "epoch": 1.46, + "grad_norm": 3.8741753101348877, + "learning_rate": 0.00016417762341850657, + "loss": 0.4203, + "step": 4166 + }, + { + "epoch": 1.46, + "grad_norm": 2.0240747928619385, + "learning_rate": 0.00016414041180848424, + "loss": 0.2846, + "step": 4167 + }, + { + "epoch": 1.46, + "grad_norm": 1.6168068647384644, + "learning_rate": 0.0001641032001984619, + "loss": 0.3109, + "step": 4168 + }, + { + "epoch": 1.46, + "grad_norm": 0.7785304188728333, + "learning_rate": 0.00016406598858843957, + "loss": 0.0582, + "step": 4169 + }, + { + "epoch": 1.46, + "grad_norm": 2.258286476135254, + "learning_rate": 0.00016402877697841727, + "loss": 0.1708, + "step": 4170 + }, + { + "epoch": 1.46, + "grad_norm": 1.3507694005966187, + "learning_rate": 0.00016399156536839492, + "loss": 0.1883, + "step": 4171 + }, + { + "epoch": 1.46, + "grad_norm": 3.4873201847076416, + "learning_rate": 0.0001639543537583726, + "loss": 0.3557, + "step": 4172 + }, + { + "epoch": 1.46, + "grad_norm": 2.767658233642578, + "learning_rate": 0.00016391714214835024, + "loss": 0.3405, + "step": 4173 + }, + { + "epoch": 1.46, + "grad_norm": 4.61103630065918, + "learning_rate": 0.00016387993053832795, + "loss": 0.5315, + "step": 4174 + }, + { + "epoch": 1.46, + "grad_norm": 4.691863536834717, + "learning_rate": 0.00016384271892830562, + "loss": 0.2525, + "step": 4175 + }, + { + "epoch": 1.46, + "grad_norm": 2.6262426376342773, + "learning_rate": 0.00016380550731828327, + "loss": 0.3521, + "step": 4176 + }, + { + "epoch": 1.46, + "grad_norm": 4.043594837188721, + "learning_rate": 0.00016376829570826097, + "loss": 0.7037, + "step": 4177 + }, + { + "epoch": 1.46, + "grad_norm": 3.2441458702087402, + "learning_rate": 0.00016373108409823865, + "loss": 0.4664, + "step": 4178 + }, + { + "epoch": 1.46, + "grad_norm": 1.617117166519165, + "learning_rate": 0.0001636938724882163, + "loss": 0.0479, + "step": 4179 + }, + { + "epoch": 1.46, + "grad_norm": 1.4325096607208252, + "learning_rate": 0.000163656660878194, + "loss": 0.3984, + "step": 4180 + }, + { + "epoch": 1.46, + "grad_norm": 2.373211622238159, + "learning_rate": 0.00016361944926817165, + "loss": 0.5353, + "step": 4181 + }, + { + "epoch": 1.47, + "grad_norm": 1.8118925094604492, + "learning_rate": 0.00016358223765814932, + "loss": 0.3167, + "step": 4182 + }, + { + "epoch": 1.47, + "grad_norm": 2.375030279159546, + "learning_rate": 0.00016354502604812703, + "loss": 0.173, + "step": 4183 + }, + { + "epoch": 1.47, + "grad_norm": 2.043342113494873, + "learning_rate": 0.00016350781443810468, + "loss": 0.5591, + "step": 4184 + }, + { + "epoch": 1.47, + "grad_norm": 2.062807321548462, + "learning_rate": 0.00016347060282808235, + "loss": 0.3204, + "step": 4185 + }, + { + "epoch": 1.47, + "grad_norm": 1.876054286956787, + "learning_rate": 0.00016343339121806, + "loss": 0.2098, + "step": 4186 + }, + { + "epoch": 1.47, + "grad_norm": 2.224905014038086, + "learning_rate": 0.0001633961796080377, + "loss": 0.5262, + "step": 4187 + }, + { + "epoch": 1.47, + "grad_norm": 2.370276689529419, + "learning_rate": 0.00016335896799801538, + "loss": 0.6072, + "step": 4188 + }, + { + "epoch": 1.47, + "grad_norm": 3.29032301902771, + "learning_rate": 0.00016332175638799303, + "loss": 0.7222, + "step": 4189 + }, + { + "epoch": 1.47, + "grad_norm": 2.3209869861602783, + "learning_rate": 0.0001632845447779707, + "loss": 0.4061, + "step": 4190 + }, + { + "epoch": 1.47, + "grad_norm": 2.168501853942871, + "learning_rate": 0.0001632473331679484, + "loss": 0.2453, + "step": 4191 + }, + { + "epoch": 1.47, + "grad_norm": 1.7172929048538208, + "learning_rate": 0.00016321012155792606, + "loss": 0.4018, + "step": 4192 + }, + { + "epoch": 1.47, + "grad_norm": 3.087212562561035, + "learning_rate": 0.00016317290994790373, + "loss": 0.2529, + "step": 4193 + }, + { + "epoch": 1.47, + "grad_norm": 2.2027931213378906, + "learning_rate": 0.00016313569833788138, + "loss": 0.3247, + "step": 4194 + }, + { + "epoch": 1.47, + "grad_norm": 1.7877609729766846, + "learning_rate": 0.00016309848672785908, + "loss": 0.1612, + "step": 4195 + }, + { + "epoch": 1.47, + "grad_norm": 2.64384126663208, + "learning_rate": 0.00016306127511783676, + "loss": 0.4328, + "step": 4196 + }, + { + "epoch": 1.47, + "grad_norm": 3.098459243774414, + "learning_rate": 0.0001630240635078144, + "loss": 0.4273, + "step": 4197 + }, + { + "epoch": 1.47, + "grad_norm": 2.489880084991455, + "learning_rate": 0.0001629868518977921, + "loss": 0.3342, + "step": 4198 + }, + { + "epoch": 1.47, + "grad_norm": 2.079407215118408, + "learning_rate": 0.00016294964028776976, + "loss": 0.1652, + "step": 4199 + }, + { + "epoch": 1.47, + "grad_norm": 9.008259773254395, + "learning_rate": 0.00016291242867774743, + "loss": 0.281, + "step": 4200 + }, + { + "epoch": 1.47, + "eval_loss": 0.4405984878540039, + "eval_runtime": 51.6554, + "eval_samples_per_second": 41.97, + "eval_steps_per_second": 10.493, + "eval_wer": 0.41567202906071615, + "step": 4200 + }, + { + "epoch": 1.47, + "grad_norm": 2.8630118370056152, + "learning_rate": 0.00016287521706772514, + "loss": 0.3071, + "step": 4201 + }, + { + "epoch": 1.47, + "grad_norm": 2.293118476867676, + "learning_rate": 0.0001628380054577028, + "loss": 0.2474, + "step": 4202 + }, + { + "epoch": 1.47, + "grad_norm": 4.106233596801758, + "learning_rate": 0.00016280079384768046, + "loss": 0.094, + "step": 4203 + }, + { + "epoch": 1.47, + "grad_norm": 1.9723445177078247, + "learning_rate": 0.0001627635822376581, + "loss": 0.1708, + "step": 4204 + }, + { + "epoch": 1.47, + "grad_norm": 1.9736557006835938, + "learning_rate": 0.00016272637062763581, + "loss": 0.8252, + "step": 4205 + }, + { + "epoch": 1.47, + "grad_norm": 1.89078688621521, + "learning_rate": 0.0001626891590176135, + "loss": 0.516, + "step": 4206 + }, + { + "epoch": 1.47, + "grad_norm": 2.3267147541046143, + "learning_rate": 0.00016265194740759114, + "loss": 0.7264, + "step": 4207 + }, + { + "epoch": 1.47, + "grad_norm": 1.9280672073364258, + "learning_rate": 0.00016261473579756884, + "loss": 0.462, + "step": 4208 + }, + { + "epoch": 1.47, + "grad_norm": 1.9770915508270264, + "learning_rate": 0.00016257752418754652, + "loss": 0.2252, + "step": 4209 + }, + { + "epoch": 1.48, + "grad_norm": 1.2686558961868286, + "learning_rate": 0.00016254031257752417, + "loss": 0.154, + "step": 4210 + }, + { + "epoch": 1.48, + "grad_norm": 2.1990997791290283, + "learning_rate": 0.00016250310096750184, + "loss": 0.1505, + "step": 4211 + }, + { + "epoch": 1.48, + "grad_norm": 1.4449461698532104, + "learning_rate": 0.00016246588935747952, + "loss": 0.218, + "step": 4212 + }, + { + "epoch": 1.48, + "grad_norm": 2.5558810234069824, + "learning_rate": 0.0001624286777474572, + "loss": 0.6625, + "step": 4213 + }, + { + "epoch": 1.48, + "grad_norm": 2.122541666030884, + "learning_rate": 0.00016239146613743487, + "loss": 0.3919, + "step": 4214 + }, + { + "epoch": 1.48, + "grad_norm": 2.2353134155273438, + "learning_rate": 0.00016235425452741252, + "loss": 0.4296, + "step": 4215 + }, + { + "epoch": 1.48, + "grad_norm": 1.7417476177215576, + "learning_rate": 0.00016231704291739022, + "loss": 0.4243, + "step": 4216 + }, + { + "epoch": 1.48, + "grad_norm": 1.6109976768493652, + "learning_rate": 0.00016227983130736787, + "loss": 0.169, + "step": 4217 + }, + { + "epoch": 1.48, + "grad_norm": 1.284348487854004, + "learning_rate": 0.00016224261969734555, + "loss": 0.2186, + "step": 4218 + }, + { + "epoch": 1.48, + "grad_norm": 1.6397535800933838, + "learning_rate": 0.00016220540808732325, + "loss": 0.3277, + "step": 4219 + }, + { + "epoch": 1.48, + "grad_norm": 3.001840114593506, + "learning_rate": 0.0001621681964773009, + "loss": 0.158, + "step": 4220 + }, + { + "epoch": 1.48, + "grad_norm": 1.503371000289917, + "learning_rate": 0.00016213098486727857, + "loss": 0.2988, + "step": 4221 + }, + { + "epoch": 1.48, + "grad_norm": 1.4237070083618164, + "learning_rate": 0.00016209377325725628, + "loss": 0.0898, + "step": 4222 + }, + { + "epoch": 1.48, + "grad_norm": 3.1129143238067627, + "learning_rate": 0.00016205656164723392, + "loss": 0.2174, + "step": 4223 + }, + { + "epoch": 1.48, + "grad_norm": 0.9691246747970581, + "learning_rate": 0.0001620193500372116, + "loss": 0.1042, + "step": 4224 + }, + { + "epoch": 1.48, + "grad_norm": 2.2304303646087646, + "learning_rate": 0.00016198213842718925, + "loss": 0.1921, + "step": 4225 + }, + { + "epoch": 1.48, + "grad_norm": 2.0455522537231445, + "learning_rate": 0.00016194492681716695, + "loss": 0.1548, + "step": 4226 + }, + { + "epoch": 1.48, + "grad_norm": 3.2553882598876953, + "learning_rate": 0.00016190771520714463, + "loss": 0.523, + "step": 4227 + }, + { + "epoch": 1.48, + "grad_norm": 1.433052897453308, + "learning_rate": 0.00016187050359712228, + "loss": 0.1224, + "step": 4228 + }, + { + "epoch": 1.48, + "grad_norm": 4.60850191116333, + "learning_rate": 0.00016183329198709998, + "loss": 0.7773, + "step": 4229 + }, + { + "epoch": 1.48, + "grad_norm": 3.2903518676757812, + "learning_rate": 0.00016179608037707763, + "loss": 0.5722, + "step": 4230 + }, + { + "epoch": 1.48, + "grad_norm": 3.1476831436157227, + "learning_rate": 0.0001617588687670553, + "loss": 0.5969, + "step": 4231 + }, + { + "epoch": 1.48, + "grad_norm": 1.1613272428512573, + "learning_rate": 0.00016172165715703298, + "loss": 0.1067, + "step": 4232 + }, + { + "epoch": 1.48, + "grad_norm": 1.8969131708145142, + "learning_rate": 0.00016168444554701065, + "loss": 0.4415, + "step": 4233 + }, + { + "epoch": 1.48, + "grad_norm": 2.0695786476135254, + "learning_rate": 0.00016164723393698833, + "loss": 0.3461, + "step": 4234 + }, + { + "epoch": 1.48, + "grad_norm": 4.001595973968506, + "learning_rate": 0.000161610022326966, + "loss": 0.687, + "step": 4235 + }, + { + "epoch": 1.48, + "grad_norm": 2.7482621669769287, + "learning_rate": 0.00016157281071694366, + "loss": 0.4221, + "step": 4236 + }, + { + "epoch": 1.48, + "grad_norm": 3.1002767086029053, + "learning_rate": 0.00016153559910692136, + "loss": 0.2666, + "step": 4237 + }, + { + "epoch": 1.48, + "grad_norm": 1.9085137844085693, + "learning_rate": 0.000161498387496899, + "loss": 0.4164, + "step": 4238 + }, + { + "epoch": 1.49, + "grad_norm": 2.563671112060547, + "learning_rate": 0.00016146117588687668, + "loss": 0.3175, + "step": 4239 + }, + { + "epoch": 1.49, + "grad_norm": 3.7906312942504883, + "learning_rate": 0.00016142396427685439, + "loss": 1.8977, + "step": 4240 + }, + { + "epoch": 1.49, + "grad_norm": 4.173704147338867, + "learning_rate": 0.00016138675266683203, + "loss": 0.4102, + "step": 4241 + }, + { + "epoch": 1.49, + "grad_norm": 4.604090690612793, + "learning_rate": 0.0001613495410568097, + "loss": 1.782, + "step": 4242 + }, + { + "epoch": 1.49, + "grad_norm": 2.0995211601257324, + "learning_rate": 0.00016131232944678736, + "loss": 0.2092, + "step": 4243 + }, + { + "epoch": 1.49, + "grad_norm": 3.1292269229888916, + "learning_rate": 0.00016127511783676506, + "loss": 0.341, + "step": 4244 + }, + { + "epoch": 1.49, + "grad_norm": 2.0066728591918945, + "learning_rate": 0.00016123790622674274, + "loss": 0.3798, + "step": 4245 + }, + { + "epoch": 1.49, + "grad_norm": 2.5838475227355957, + "learning_rate": 0.00016120069461672039, + "loss": 0.3337, + "step": 4246 + }, + { + "epoch": 1.49, + "grad_norm": 3.64768385887146, + "learning_rate": 0.0001611634830066981, + "loss": 0.5525, + "step": 4247 + }, + { + "epoch": 1.49, + "grad_norm": 5.899421215057373, + "learning_rate": 0.00016112627139667574, + "loss": 0.2683, + "step": 4248 + }, + { + "epoch": 1.49, + "grad_norm": 2.8585946559906006, + "learning_rate": 0.0001610890597866534, + "loss": 0.3204, + "step": 4249 + }, + { + "epoch": 1.49, + "grad_norm": 1.4531258344650269, + "learning_rate": 0.00016105184817663112, + "loss": 0.1442, + "step": 4250 + }, + { + "epoch": 1.49, + "grad_norm": 2.3642122745513916, + "learning_rate": 0.00016101463656660876, + "loss": 0.2424, + "step": 4251 + }, + { + "epoch": 1.49, + "grad_norm": 2.2466611862182617, + "learning_rate": 0.00016097742495658644, + "loss": 0.1768, + "step": 4252 + }, + { + "epoch": 1.49, + "grad_norm": 1.5451269149780273, + "learning_rate": 0.00016094021334656412, + "loss": 0.065, + "step": 4253 + }, + { + "epoch": 1.49, + "grad_norm": 3.8907992839813232, + "learning_rate": 0.0001609030017365418, + "loss": 0.3097, + "step": 4254 + }, + { + "epoch": 1.49, + "grad_norm": 2.250171422958374, + "learning_rate": 0.00016086579012651947, + "loss": 0.908, + "step": 4255 + }, + { + "epoch": 1.49, + "grad_norm": 5.258478164672852, + "learning_rate": 0.00016082857851649712, + "loss": 1.5232, + "step": 4256 + }, + { + "epoch": 1.49, + "grad_norm": 2.6386005878448486, + "learning_rate": 0.0001607913669064748, + "loss": 0.4984, + "step": 4257 + }, + { + "epoch": 1.49, + "grad_norm": 1.1076130867004395, + "learning_rate": 0.0001607541552964525, + "loss": 0.242, + "step": 4258 + }, + { + "epoch": 1.49, + "grad_norm": 1.5664863586425781, + "learning_rate": 0.00016071694368643014, + "loss": 0.1759, + "step": 4259 + }, + { + "epoch": 1.49, + "grad_norm": 2.852780342102051, + "learning_rate": 0.00016067973207640782, + "loss": 0.3636, + "step": 4260 + }, + { + "epoch": 1.49, + "grad_norm": 1.7188012599945068, + "learning_rate": 0.0001606425204663855, + "loss": 0.2137, + "step": 4261 + }, + { + "epoch": 1.49, + "grad_norm": 1.521380066871643, + "learning_rate": 0.00016060530885636317, + "loss": 0.2769, + "step": 4262 + }, + { + "epoch": 1.49, + "grad_norm": 1.657301902770996, + "learning_rate": 0.00016056809724634085, + "loss": 0.3082, + "step": 4263 + }, + { + "epoch": 1.49, + "grad_norm": 2.6540775299072266, + "learning_rate": 0.0001605308856363185, + "loss": 0.4584, + "step": 4264 + }, + { + "epoch": 1.49, + "grad_norm": 1.5712755918502808, + "learning_rate": 0.0001604936740262962, + "loss": 0.1983, + "step": 4265 + }, + { + "epoch": 1.49, + "grad_norm": 2.1187055110931396, + "learning_rate": 0.00016045646241627387, + "loss": 0.3515, + "step": 4266 + }, + { + "epoch": 1.5, + "grad_norm": 4.760385036468506, + "learning_rate": 0.00016041925080625152, + "loss": 2.0062, + "step": 4267 + }, + { + "epoch": 1.5, + "grad_norm": 12.318647384643555, + "learning_rate": 0.00016038203919622923, + "loss": 0.6059, + "step": 4268 + }, + { + "epoch": 1.5, + "grad_norm": 1.8167134523391724, + "learning_rate": 0.00016034482758620688, + "loss": 0.1795, + "step": 4269 + }, + { + "epoch": 1.5, + "grad_norm": 3.7228429317474365, + "learning_rate": 0.00016030761597618455, + "loss": 0.4189, + "step": 4270 + }, + { + "epoch": 1.5, + "grad_norm": 2.039379596710205, + "learning_rate": 0.00016027040436616225, + "loss": 0.2122, + "step": 4271 + }, + { + "epoch": 1.5, + "grad_norm": 3.154689073562622, + "learning_rate": 0.0001602331927561399, + "loss": 0.3161, + "step": 4272 + }, + { + "epoch": 1.5, + "grad_norm": 2.978641986846924, + "learning_rate": 0.00016019598114611758, + "loss": 0.4632, + "step": 4273 + }, + { + "epoch": 1.5, + "grad_norm": 1.5532993078231812, + "learning_rate": 0.00016015876953609523, + "loss": 0.2329, + "step": 4274 + }, + { + "epoch": 1.5, + "grad_norm": 1.4337552785873413, + "learning_rate": 0.00016012155792607293, + "loss": 0.2356, + "step": 4275 + }, + { + "epoch": 1.5, + "grad_norm": 2.861510992050171, + "learning_rate": 0.0001600843463160506, + "loss": 0.6937, + "step": 4276 + }, + { + "epoch": 1.5, + "grad_norm": 2.7515218257904053, + "learning_rate": 0.00016004713470602825, + "loss": 0.2255, + "step": 4277 + }, + { + "epoch": 1.5, + "grad_norm": 2.600706100463867, + "learning_rate": 0.00016000992309600593, + "loss": 0.2059, + "step": 4278 + }, + { + "epoch": 1.5, + "grad_norm": 5.621941566467285, + "learning_rate": 0.00015997271148598363, + "loss": 0.5214, + "step": 4279 + }, + { + "epoch": 1.5, + "grad_norm": 2.779236078262329, + "learning_rate": 0.00015993549987596128, + "loss": 0.9023, + "step": 4280 + }, + { + "epoch": 1.5, + "grad_norm": 2.4109227657318115, + "learning_rate": 0.00015989828826593896, + "loss": 0.2555, + "step": 4281 + }, + { + "epoch": 1.5, + "grad_norm": 1.9273884296417236, + "learning_rate": 0.00015986107665591663, + "loss": 0.3355, + "step": 4282 + }, + { + "epoch": 1.5, + "grad_norm": 1.6707803010940552, + "learning_rate": 0.0001598238650458943, + "loss": 0.295, + "step": 4283 + }, + { + "epoch": 1.5, + "grad_norm": 14.171672821044922, + "learning_rate": 0.00015978665343587198, + "loss": 4.537, + "step": 4284 + }, + { + "epoch": 1.5, + "grad_norm": 2.2641332149505615, + "learning_rate": 0.00015974944182584963, + "loss": 0.502, + "step": 4285 + }, + { + "epoch": 1.5, + "grad_norm": 1.5345269441604614, + "learning_rate": 0.00015971223021582734, + "loss": 0.4606, + "step": 4286 + }, + { + "epoch": 1.5, + "grad_norm": 1.9413623809814453, + "learning_rate": 0.00015967501860580499, + "loss": 0.3375, + "step": 4287 + }, + { + "epoch": 1.5, + "grad_norm": 1.7889741659164429, + "learning_rate": 0.00015963780699578266, + "loss": 0.2946, + "step": 4288 + }, + { + "epoch": 1.5, + "grad_norm": 3.114777088165283, + "learning_rate": 0.00015960059538576036, + "loss": 0.5715, + "step": 4289 + }, + { + "epoch": 1.5, + "grad_norm": 2.282841920852661, + "learning_rate": 0.000159563383775738, + "loss": 0.3345, + "step": 4290 + }, + { + "epoch": 1.5, + "grad_norm": 1.8913458585739136, + "learning_rate": 0.0001595261721657157, + "loss": 0.3601, + "step": 4291 + }, + { + "epoch": 1.5, + "grad_norm": 1.8621262311935425, + "learning_rate": 0.00015948896055569334, + "loss": 0.0981, + "step": 4292 + }, + { + "epoch": 1.5, + "grad_norm": 3.931448221206665, + "learning_rate": 0.00015945174894567104, + "loss": 0.4209, + "step": 4293 + }, + { + "epoch": 1.5, + "grad_norm": 1.0255603790283203, + "learning_rate": 0.00015941453733564872, + "loss": 0.1596, + "step": 4294 + }, + { + "epoch": 1.5, + "grad_norm": 2.5541460514068604, + "learning_rate": 0.00015937732572562636, + "loss": 0.5015, + "step": 4295 + }, + { + "epoch": 1.51, + "grad_norm": 1.8127018213272095, + "learning_rate": 0.00015934011411560407, + "loss": 0.1114, + "step": 4296 + }, + { + "epoch": 1.51, + "grad_norm": 1.7823874950408936, + "learning_rate": 0.00015930290250558174, + "loss": 0.1683, + "step": 4297 + }, + { + "epoch": 1.51, + "grad_norm": 3.2042083740234375, + "learning_rate": 0.0001592656908955594, + "loss": 0.4738, + "step": 4298 + }, + { + "epoch": 1.51, + "grad_norm": 1.9896401166915894, + "learning_rate": 0.00015922847928553707, + "loss": 0.0851, + "step": 4299 + }, + { + "epoch": 1.51, + "grad_norm": 1.8025238513946533, + "learning_rate": 0.00015919126767551474, + "loss": 0.733, + "step": 4300 + }, + { + "epoch": 1.51, + "eval_loss": 0.47069793939590454, + "eval_runtime": 51.3136, + "eval_samples_per_second": 42.25, + "eval_steps_per_second": 10.563, + "eval_wer": 0.44680851063829785, + "step": 4300 + }, + { + "epoch": 1.51, + "grad_norm": 2.523890495300293, + "learning_rate": 0.00015915405606549242, + "loss": 0.2174, + "step": 4301 + }, + { + "epoch": 1.51, + "grad_norm": 7.219611644744873, + "learning_rate": 0.0001591168444554701, + "loss": 1.3755, + "step": 4302 + }, + { + "epoch": 1.51, + "grad_norm": 2.2744252681732178, + "learning_rate": 0.00015907963284544777, + "loss": 0.2139, + "step": 4303 + }, + { + "epoch": 1.51, + "grad_norm": 5.458730220794678, + "learning_rate": 0.00015904242123542545, + "loss": 1.3734, + "step": 4304 + }, + { + "epoch": 1.51, + "grad_norm": 3.0790417194366455, + "learning_rate": 0.0001590052096254031, + "loss": 0.8824, + "step": 4305 + }, + { + "epoch": 1.51, + "grad_norm": 2.6385726928710938, + "learning_rate": 0.00015896799801538077, + "loss": 0.629, + "step": 4306 + }, + { + "epoch": 1.51, + "grad_norm": 3.4197399616241455, + "learning_rate": 0.00015893078640535847, + "loss": 0.6337, + "step": 4307 + }, + { + "epoch": 1.51, + "grad_norm": 1.8060104846954346, + "learning_rate": 0.00015889357479533612, + "loss": 0.2284, + "step": 4308 + }, + { + "epoch": 1.51, + "grad_norm": 1.9204670190811157, + "learning_rate": 0.0001588563631853138, + "loss": 0.2744, + "step": 4309 + }, + { + "epoch": 1.51, + "grad_norm": 1.3821399211883545, + "learning_rate": 0.0001588191515752915, + "loss": 0.2596, + "step": 4310 + }, + { + "epoch": 1.51, + "grad_norm": 2.8792121410369873, + "learning_rate": 0.00015878193996526915, + "loss": 0.8357, + "step": 4311 + }, + { + "epoch": 1.51, + "grad_norm": 1.4682904481887817, + "learning_rate": 0.00015874472835524683, + "loss": 0.3634, + "step": 4312 + }, + { + "epoch": 1.51, + "grad_norm": 2.5377604961395264, + "learning_rate": 0.00015870751674522447, + "loss": 0.1408, + "step": 4313 + }, + { + "epoch": 1.51, + "grad_norm": 2.061809539794922, + "learning_rate": 0.00015867030513520218, + "loss": 0.1885, + "step": 4314 + }, + { + "epoch": 1.51, + "grad_norm": 3.3205690383911133, + "learning_rate": 0.00015863309352517985, + "loss": 0.3503, + "step": 4315 + }, + { + "epoch": 1.51, + "grad_norm": 1.4646776914596558, + "learning_rate": 0.0001585958819151575, + "loss": 0.1579, + "step": 4316 + }, + { + "epoch": 1.51, + "grad_norm": 4.019704818725586, + "learning_rate": 0.0001585586703051352, + "loss": 0.4413, + "step": 4317 + }, + { + "epoch": 1.51, + "grad_norm": 2.0454628467559814, + "learning_rate": 0.00015852145869511285, + "loss": 0.179, + "step": 4318 + }, + { + "epoch": 1.51, + "grad_norm": 3.3551511764526367, + "learning_rate": 0.00015848424708509053, + "loss": 0.3165, + "step": 4319 + }, + { + "epoch": 1.51, + "grad_norm": 2.5540342330932617, + "learning_rate": 0.0001584470354750682, + "loss": 0.1543, + "step": 4320 + }, + { + "epoch": 1.51, + "grad_norm": 3.365748643875122, + "learning_rate": 0.00015840982386504588, + "loss": 0.3139, + "step": 4321 + }, + { + "epoch": 1.51, + "grad_norm": 1.9014160633087158, + "learning_rate": 0.00015837261225502356, + "loss": 0.3596, + "step": 4322 + }, + { + "epoch": 1.51, + "grad_norm": 2.817349910736084, + "learning_rate": 0.00015833540064500123, + "loss": 0.4239, + "step": 4323 + }, + { + "epoch": 1.52, + "grad_norm": 7.915454864501953, + "learning_rate": 0.0001582981890349789, + "loss": 0.356, + "step": 4324 + }, + { + "epoch": 1.52, + "grad_norm": 4.057788372039795, + "learning_rate": 0.00015826097742495658, + "loss": 0.2209, + "step": 4325 + }, + { + "epoch": 1.52, + "grad_norm": 3.6405909061431885, + "learning_rate": 0.00015822376581493423, + "loss": 0.1062, + "step": 4326 + }, + { + "epoch": 1.52, + "grad_norm": 2.823045015335083, + "learning_rate": 0.0001581865542049119, + "loss": 0.4549, + "step": 4327 + }, + { + "epoch": 1.52, + "grad_norm": 4.013254642486572, + "learning_rate": 0.0001581493425948896, + "loss": 0.5598, + "step": 4328 + }, + { + "epoch": 1.52, + "grad_norm": 2.840559244155884, + "learning_rate": 0.00015811213098486726, + "loss": 0.2415, + "step": 4329 + }, + { + "epoch": 1.52, + "grad_norm": 2.6936604976654053, + "learning_rate": 0.00015807491937484494, + "loss": 0.7174, + "step": 4330 + }, + { + "epoch": 1.52, + "grad_norm": 2.872133493423462, + "learning_rate": 0.00015803770776482258, + "loss": 0.8422, + "step": 4331 + }, + { + "epoch": 1.52, + "grad_norm": 1.364221453666687, + "learning_rate": 0.0001580004961548003, + "loss": 0.2798, + "step": 4332 + }, + { + "epoch": 1.52, + "grad_norm": 1.7011353969573975, + "learning_rate": 0.00015796328454477796, + "loss": 0.3753, + "step": 4333 + }, + { + "epoch": 1.52, + "grad_norm": 1.4171128273010254, + "learning_rate": 0.0001579260729347556, + "loss": 0.1875, + "step": 4334 + }, + { + "epoch": 1.52, + "grad_norm": 3.3720600605010986, + "learning_rate": 0.00015788886132473331, + "loss": 0.8102, + "step": 4335 + }, + { + "epoch": 1.52, + "grad_norm": 1.8144382238388062, + "learning_rate": 0.000157851649714711, + "loss": 0.2117, + "step": 4336 + }, + { + "epoch": 1.52, + "grad_norm": 2.3069920539855957, + "learning_rate": 0.00015781443810468864, + "loss": 0.8746, + "step": 4337 + }, + { + "epoch": 1.52, + "grad_norm": 1.95841646194458, + "learning_rate": 0.00015777722649466634, + "loss": 0.3763, + "step": 4338 + }, + { + "epoch": 1.52, + "grad_norm": 1.462180495262146, + "learning_rate": 0.000157740014884644, + "loss": 0.1606, + "step": 4339 + }, + { + "epoch": 1.52, + "grad_norm": 1.9129983186721802, + "learning_rate": 0.00015770280327462167, + "loss": 0.2706, + "step": 4340 + }, + { + "epoch": 1.52, + "grad_norm": 1.6017885208129883, + "learning_rate": 0.00015766559166459934, + "loss": 0.2079, + "step": 4341 + }, + { + "epoch": 1.52, + "grad_norm": 2.809229612350464, + "learning_rate": 0.00015762838005457702, + "loss": 0.5534, + "step": 4342 + }, + { + "epoch": 1.52, + "grad_norm": 1.4012563228607178, + "learning_rate": 0.0001575911684445547, + "loss": 0.1605, + "step": 4343 + }, + { + "epoch": 1.52, + "grad_norm": 1.8997706174850464, + "learning_rate": 0.00015755395683453234, + "loss": 0.1523, + "step": 4344 + }, + { + "epoch": 1.52, + "grad_norm": 5.734255313873291, + "learning_rate": 0.00015751674522451005, + "loss": 0.3817, + "step": 4345 + }, + { + "epoch": 1.52, + "grad_norm": 2.4937775135040283, + "learning_rate": 0.00015747953361448772, + "loss": 0.3117, + "step": 4346 + }, + { + "epoch": 1.52, + "grad_norm": NaN, + "learning_rate": 0.00015747953361448772, + "loss": 0.1006, + "step": 4347 + }, + { + "epoch": 1.52, + "grad_norm": 1.9749798774719238, + "learning_rate": 0.00015744232200446537, + "loss": 0.4598, + "step": 4348 + }, + { + "epoch": 1.52, + "grad_norm": 1.340526819229126, + "learning_rate": 0.00015740511039444305, + "loss": 0.0752, + "step": 4349 + }, + { + "epoch": 1.52, + "grad_norm": 2.0161750316619873, + "learning_rate": 0.00015736789878442072, + "loss": 0.2629, + "step": 4350 + }, + { + "epoch": 1.52, + "grad_norm": 2.0471723079681396, + "learning_rate": 0.0001573306871743984, + "loss": 0.3901, + "step": 4351 + }, + { + "epoch": 1.52, + "grad_norm": 3.22011661529541, + "learning_rate": 0.00015729347556437607, + "loss": 0.2667, + "step": 4352 + }, + { + "epoch": 1.53, + "grad_norm": 2.5319764614105225, + "learning_rate": 0.00015725626395435372, + "loss": 0.2311, + "step": 4353 + }, + { + "epoch": 1.53, + "grad_norm": 3.327425718307495, + "learning_rate": 0.00015721905234433142, + "loss": 0.2222, + "step": 4354 + }, + { + "epoch": 1.53, + "grad_norm": 1.8986114263534546, + "learning_rate": 0.0001571818407343091, + "loss": 0.5881, + "step": 4355 + }, + { + "epoch": 1.53, + "grad_norm": 1.7233716249465942, + "learning_rate": 0.00015714462912428675, + "loss": 0.6135, + "step": 4356 + }, + { + "epoch": 1.53, + "grad_norm": 1.6733200550079346, + "learning_rate": 0.00015710741751426445, + "loss": 0.2479, + "step": 4357 + }, + { + "epoch": 1.53, + "grad_norm": 0.7937635183334351, + "learning_rate": 0.0001570702059042421, + "loss": 0.0792, + "step": 4358 + }, + { + "epoch": 1.53, + "grad_norm": 2.3683009147644043, + "learning_rate": 0.00015703299429421978, + "loss": 0.2897, + "step": 4359 + }, + { + "epoch": 1.53, + "grad_norm": 3.6302194595336914, + "learning_rate": 0.00015699578268419748, + "loss": 0.5056, + "step": 4360 + }, + { + "epoch": 1.53, + "grad_norm": 2.038682460784912, + "learning_rate": 0.00015695857107417513, + "loss": 0.339, + "step": 4361 + }, + { + "epoch": 1.53, + "grad_norm": 2.3707022666931152, + "learning_rate": 0.0001569213594641528, + "loss": 0.5805, + "step": 4362 + }, + { + "epoch": 1.53, + "grad_norm": 11.967528343200684, + "learning_rate": 0.00015688414785413045, + "loss": 3.6802, + "step": 4363 + }, + { + "epoch": 1.53, + "grad_norm": 3.1059775352478027, + "learning_rate": 0.00015684693624410816, + "loss": 0.6931, + "step": 4364 + }, + { + "epoch": 1.53, + "grad_norm": 1.6652494668960571, + "learning_rate": 0.00015680972463408583, + "loss": 0.3522, + "step": 4365 + }, + { + "epoch": 1.53, + "grad_norm": 2.2658798694610596, + "learning_rate": 0.00015677251302406348, + "loss": 0.3224, + "step": 4366 + }, + { + "epoch": 1.53, + "grad_norm": 14.272518157958984, + "learning_rate": 0.00015673530141404118, + "loss": 3.4446, + "step": 4367 + }, + { + "epoch": 1.53, + "grad_norm": 1.9729360342025757, + "learning_rate": 0.00015669808980401886, + "loss": 0.5131, + "step": 4368 + }, + { + "epoch": 1.53, + "grad_norm": 2.555955648422241, + "learning_rate": 0.0001566608781939965, + "loss": 0.4208, + "step": 4369 + }, + { + "epoch": 1.53, + "grad_norm": 0.9813282489776611, + "learning_rate": 0.00015662366658397418, + "loss": 0.1237, + "step": 4370 + }, + { + "epoch": 1.53, + "grad_norm": 2.0302422046661377, + "learning_rate": 0.00015658645497395186, + "loss": 0.3017, + "step": 4371 + }, + { + "epoch": 1.53, + "grad_norm": 2.8068108558654785, + "learning_rate": 0.00015654924336392953, + "loss": 0.3008, + "step": 4372 + }, + { + "epoch": 1.53, + "grad_norm": 2.404326915740967, + "learning_rate": 0.0001565120317539072, + "loss": 0.4064, + "step": 4373 + }, + { + "epoch": 1.53, + "grad_norm": 1.2311917543411255, + "learning_rate": 0.00015647482014388486, + "loss": 0.1547, + "step": 4374 + }, + { + "epoch": 1.53, + "grad_norm": 2.6930012702941895, + "learning_rate": 0.00015643760853386256, + "loss": 0.3034, + "step": 4375 + }, + { + "epoch": 1.53, + "grad_norm": 1.130953073501587, + "learning_rate": 0.0001564003969238402, + "loss": 0.0701, + "step": 4376 + }, + { + "epoch": 1.53, + "grad_norm": 1.6812334060668945, + "learning_rate": 0.0001563631853138179, + "loss": 0.1034, + "step": 4377 + }, + { + "epoch": 1.53, + "grad_norm": 3.887232780456543, + "learning_rate": 0.0001563259737037956, + "loss": 0.8612, + "step": 4378 + }, + { + "epoch": 1.53, + "grad_norm": 4.974242687225342, + "learning_rate": 0.00015628876209377324, + "loss": 0.3702, + "step": 4379 + }, + { + "epoch": 1.53, + "grad_norm": 2.9421982765197754, + "learning_rate": 0.00015625155048375091, + "loss": 0.6489, + "step": 4380 + }, + { + "epoch": 1.54, + "grad_norm": 1.6786479949951172, + "learning_rate": 0.00015621433887372862, + "loss": 0.2916, + "step": 4381 + }, + { + "epoch": 1.54, + "grad_norm": 2.749591112136841, + "learning_rate": 0.00015617712726370627, + "loss": 0.5676, + "step": 4382 + }, + { + "epoch": 1.54, + "grad_norm": 1.2755649089813232, + "learning_rate": 0.00015613991565368394, + "loss": 0.1627, + "step": 4383 + }, + { + "epoch": 1.54, + "grad_norm": 1.326809048652649, + "learning_rate": 0.0001561027040436616, + "loss": 0.1395, + "step": 4384 + }, + { + "epoch": 1.54, + "grad_norm": 0.9218014478683472, + "learning_rate": 0.0001560654924336393, + "loss": 0.1224, + "step": 4385 + }, + { + "epoch": 1.54, + "grad_norm": 2.6354215145111084, + "learning_rate": 0.00015602828082361697, + "loss": 0.5855, + "step": 4386 + }, + { + "epoch": 1.54, + "grad_norm": 7.072910308837891, + "learning_rate": 0.00015599106921359462, + "loss": 1.493, + "step": 4387 + }, + { + "epoch": 1.54, + "grad_norm": 1.5853562355041504, + "learning_rate": 0.00015595385760357232, + "loss": 0.2689, + "step": 4388 + }, + { + "epoch": 1.54, + "grad_norm": 6.221762657165527, + "learning_rate": 0.00015591664599354997, + "loss": 0.4643, + "step": 4389 + }, + { + "epoch": 1.54, + "grad_norm": 3.7703330516815186, + "learning_rate": 0.00015587943438352765, + "loss": 0.4222, + "step": 4390 + }, + { + "epoch": 1.54, + "grad_norm": 2.02024245262146, + "learning_rate": 0.00015584222277350532, + "loss": 0.5177, + "step": 4391 + }, + { + "epoch": 1.54, + "grad_norm": 2.0577681064605713, + "learning_rate": 0.000155805011163483, + "loss": 0.2802, + "step": 4392 + }, + { + "epoch": 1.54, + "grad_norm": 2.6712489128112793, + "learning_rate": 0.00015576779955346067, + "loss": 0.1628, + "step": 4393 + }, + { + "epoch": 1.54, + "grad_norm": 2.0091423988342285, + "learning_rate": 0.00015573058794343832, + "loss": 0.3688, + "step": 4394 + }, + { + "epoch": 1.54, + "grad_norm": 5.743954181671143, + "learning_rate": 0.000155693376333416, + "loss": 0.5611, + "step": 4395 + }, + { + "epoch": 1.54, + "grad_norm": 2.4916129112243652, + "learning_rate": 0.0001556561647233937, + "loss": 0.286, + "step": 4396 + }, + { + "epoch": 1.54, + "grad_norm": 6.647517204284668, + "learning_rate": 0.00015561895311337135, + "loss": 0.7609, + "step": 4397 + }, + { + "epoch": 1.54, + "grad_norm": 1.73363196849823, + "learning_rate": 0.00015558174150334902, + "loss": 0.1074, + "step": 4398 + }, + { + "epoch": 1.54, + "grad_norm": 2.5416290760040283, + "learning_rate": 0.00015554452989332673, + "loss": 0.2281, + "step": 4399 + }, + { + "epoch": 1.54, + "grad_norm": 1.1465420722961426, + "learning_rate": 0.00015550731828330438, + "loss": 0.123, + "step": 4400 + }, + { + "epoch": 1.54, + "eval_loss": 0.3980734944343567, + "eval_runtime": 51.2139, + "eval_samples_per_second": 42.332, + "eval_steps_per_second": 10.583, + "eval_wer": 0.379173153433662, + "step": 4400 + }, + { + "epoch": 1.54, + "grad_norm": 1.1583079099655151, + "learning_rate": 0.00015547010667328205, + "loss": 0.0627, + "step": 4401 + }, + { + "epoch": 1.54, + "grad_norm": 1.6864182949066162, + "learning_rate": 0.0001554328950632597, + "loss": 0.1769, + "step": 4402 + }, + { + "epoch": 1.54, + "grad_norm": 2.6987173557281494, + "learning_rate": 0.0001553956834532374, + "loss": 0.1046, + "step": 4403 + }, + { + "epoch": 1.54, + "grad_norm": 4.390762805938721, + "learning_rate": 0.00015535847184321508, + "loss": 0.0657, + "step": 4404 + }, + { + "epoch": 1.54, + "grad_norm": 2.5673904418945312, + "learning_rate": 0.00015532126023319273, + "loss": 0.8568, + "step": 4405 + }, + { + "epoch": 1.54, + "grad_norm": 1.5236790180206299, + "learning_rate": 0.00015528404862317043, + "loss": 0.1907, + "step": 4406 + }, + { + "epoch": 1.54, + "grad_norm": 1.6562654972076416, + "learning_rate": 0.00015524683701314808, + "loss": 0.1583, + "step": 4407 + }, + { + "epoch": 1.54, + "grad_norm": 2.632766008377075, + "learning_rate": 0.00015520962540312576, + "loss": 0.5559, + "step": 4408 + }, + { + "epoch": 1.54, + "grad_norm": 2.8173253536224365, + "learning_rate": 0.00015517241379310346, + "loss": 0.3249, + "step": 4409 + }, + { + "epoch": 1.55, + "grad_norm": 3.325995922088623, + "learning_rate": 0.0001551352021830811, + "loss": 0.4845, + "step": 4410 + }, + { + "epoch": 1.55, + "grad_norm": 1.7911338806152344, + "learning_rate": 0.00015509799057305878, + "loss": 0.2434, + "step": 4411 + }, + { + "epoch": 1.55, + "grad_norm": 2.012998342514038, + "learning_rate": 0.00015506077896303646, + "loss": 0.2877, + "step": 4412 + }, + { + "epoch": 1.55, + "grad_norm": 1.2822898626327515, + "learning_rate": 0.00015502356735301413, + "loss": 0.2108, + "step": 4413 + }, + { + "epoch": 1.55, + "grad_norm": 1.760624885559082, + "learning_rate": 0.0001549863557429918, + "loss": 0.1786, + "step": 4414 + }, + { + "epoch": 1.55, + "grad_norm": 2.9734385013580322, + "learning_rate": 0.00015494914413296946, + "loss": 0.3499, + "step": 4415 + }, + { + "epoch": 1.55, + "grad_norm": 1.6788557767868042, + "learning_rate": 0.00015491193252294713, + "loss": 0.2782, + "step": 4416 + }, + { + "epoch": 1.55, + "grad_norm": 2.383653163909912, + "learning_rate": 0.00015487472091292484, + "loss": 0.4833, + "step": 4417 + }, + { + "epoch": 1.55, + "grad_norm": 4.293880939483643, + "learning_rate": 0.00015483750930290249, + "loss": 0.5878, + "step": 4418 + }, + { + "epoch": 1.55, + "grad_norm": 2.733428716659546, + "learning_rate": 0.00015480029769288016, + "loss": 0.2317, + "step": 4419 + }, + { + "epoch": 1.55, + "grad_norm": 1.5017122030258179, + "learning_rate": 0.0001547630860828578, + "loss": 0.0682, + "step": 4420 + }, + { + "epoch": 1.55, + "grad_norm": 3.120365619659424, + "learning_rate": 0.0001547258744728355, + "loss": 0.3306, + "step": 4421 + }, + { + "epoch": 1.55, + "grad_norm": 0.6654366254806519, + "learning_rate": 0.0001546886628628132, + "loss": 0.0215, + "step": 4422 + }, + { + "epoch": 1.55, + "grad_norm": 2.2988080978393555, + "learning_rate": 0.00015465145125279084, + "loss": 0.282, + "step": 4423 + }, + { + "epoch": 1.55, + "grad_norm": 3.34647536277771, + "learning_rate": 0.00015461423964276854, + "loss": 0.3574, + "step": 4424 + }, + { + "epoch": 1.55, + "grad_norm": 1.9335713386535645, + "learning_rate": 0.00015457702803274622, + "loss": 0.1248, + "step": 4425 + }, + { + "epoch": 1.55, + "grad_norm": 1.3534259796142578, + "learning_rate": 0.00015453981642272387, + "loss": 0.0529, + "step": 4426 + }, + { + "epoch": 1.55, + "grad_norm": 8.45742130279541, + "learning_rate": 0.00015450260481270157, + "loss": 1.6093, + "step": 4427 + }, + { + "epoch": 1.55, + "grad_norm": 0.5706729888916016, + "learning_rate": 0.00015446539320267922, + "loss": 0.0306, + "step": 4428 + }, + { + "epoch": 1.55, + "grad_norm": 6.064884662628174, + "learning_rate": 0.0001544281815926569, + "loss": 0.898, + "step": 4429 + }, + { + "epoch": 1.55, + "grad_norm": 2.807861328125, + "learning_rate": 0.0001543909699826346, + "loss": 1.1501, + "step": 4430 + }, + { + "epoch": 1.55, + "grad_norm": 1.6200414896011353, + "learning_rate": 0.00015435375837261224, + "loss": 0.4798, + "step": 4431 + }, + { + "epoch": 1.55, + "grad_norm": 2.61049485206604, + "learning_rate": 0.00015431654676258992, + "loss": 0.6317, + "step": 4432 + }, + { + "epoch": 1.55, + "grad_norm": 1.9506374597549438, + "learning_rate": 0.00015427933515256757, + "loss": 0.2907, + "step": 4433 + }, + { + "epoch": 1.55, + "grad_norm": 1.570434331893921, + "learning_rate": 0.00015424212354254527, + "loss": 0.2544, + "step": 4434 + }, + { + "epoch": 1.55, + "grad_norm": 2.3015689849853516, + "learning_rate": 0.00015420491193252295, + "loss": 0.2989, + "step": 4435 + }, + { + "epoch": 1.55, + "grad_norm": 1.8835831880569458, + "learning_rate": 0.0001541677003225006, + "loss": 0.3247, + "step": 4436 + }, + { + "epoch": 1.55, + "grad_norm": 2.0309693813323975, + "learning_rate": 0.00015413048871247827, + "loss": 0.3081, + "step": 4437 + }, + { + "epoch": 1.56, + "grad_norm": 1.200584888458252, + "learning_rate": 0.00015409327710245595, + "loss": 0.1762, + "step": 4438 + }, + { + "epoch": 1.56, + "grad_norm": 3.047585964202881, + "learning_rate": 0.00015405606549243362, + "loss": 0.3421, + "step": 4439 + }, + { + "epoch": 1.56, + "grad_norm": 2.6859099864959717, + "learning_rate": 0.0001540188538824113, + "loss": 0.2895, + "step": 4440 + }, + { + "epoch": 1.56, + "grad_norm": 3.2242605686187744, + "learning_rate": 0.00015398164227238895, + "loss": 0.5598, + "step": 4441 + }, + { + "epoch": 1.56, + "grad_norm": 3.5308659076690674, + "learning_rate": 0.00015394443066236665, + "loss": 0.581, + "step": 4442 + }, + { + "epoch": 1.56, + "grad_norm": 2.4729762077331543, + "learning_rate": 0.00015390721905234433, + "loss": 0.371, + "step": 4443 + }, + { + "epoch": 1.56, + "grad_norm": 1.6830202341079712, + "learning_rate": 0.00015387000744232198, + "loss": 0.2435, + "step": 4444 + }, + { + "epoch": 1.56, + "grad_norm": 2.3365495204925537, + "learning_rate": 0.00015383279583229968, + "loss": 0.3798, + "step": 4445 + }, + { + "epoch": 1.56, + "grad_norm": 2.603057384490967, + "learning_rate": 0.00015379558422227733, + "loss": 0.2935, + "step": 4446 + }, + { + "epoch": 1.56, + "grad_norm": 1.5318717956542969, + "learning_rate": 0.000153758372612255, + "loss": 0.1117, + "step": 4447 + }, + { + "epoch": 1.56, + "grad_norm": 3.2214760780334473, + "learning_rate": 0.0001537211610022327, + "loss": 0.5403, + "step": 4448 + }, + { + "epoch": 1.56, + "grad_norm": 1.5494588613510132, + "learning_rate": 0.00015368394939221035, + "loss": 0.1355, + "step": 4449 + }, + { + "epoch": 1.56, + "grad_norm": 2.5164105892181396, + "learning_rate": 0.00015364673778218803, + "loss": 0.4476, + "step": 4450 + }, + { + "epoch": 1.56, + "grad_norm": 2.3167803287506104, + "learning_rate": 0.00015360952617216568, + "loss": 0.1465, + "step": 4451 + }, + { + "epoch": 1.56, + "grad_norm": 1.139276146888733, + "learning_rate": 0.00015357231456214338, + "loss": 0.0653, + "step": 4452 + }, + { + "epoch": 1.56, + "grad_norm": 9.703216552734375, + "learning_rate": 0.00015353510295212106, + "loss": 0.5352, + "step": 4453 + }, + { + "epoch": 1.56, + "grad_norm": 2.554642915725708, + "learning_rate": 0.0001534978913420987, + "loss": 0.0762, + "step": 4454 + }, + { + "epoch": 1.56, + "grad_norm": 2.5939784049987793, + "learning_rate": 0.0001534606797320764, + "loss": 0.7377, + "step": 4455 + }, + { + "epoch": 1.56, + "grad_norm": 2.136479139328003, + "learning_rate": 0.00015342346812205408, + "loss": 0.8079, + "step": 4456 + }, + { + "epoch": 1.56, + "grad_norm": 1.0142302513122559, + "learning_rate": 0.00015338625651203173, + "loss": 0.2065, + "step": 4457 + }, + { + "epoch": 1.56, + "grad_norm": 1.6144118309020996, + "learning_rate": 0.0001533490449020094, + "loss": 0.3742, + "step": 4458 + }, + { + "epoch": 1.56, + "grad_norm": 1.8569650650024414, + "learning_rate": 0.00015331183329198709, + "loss": 0.2521, + "step": 4459 + }, + { + "epoch": 1.56, + "grad_norm": 1.7510970830917358, + "learning_rate": 0.00015327462168196476, + "loss": 0.2786, + "step": 4460 + }, + { + "epoch": 1.56, + "grad_norm": 2.130952835083008, + "learning_rate": 0.00015323741007194244, + "loss": 0.1905, + "step": 4461 + }, + { + "epoch": 1.56, + "grad_norm": 2.0437982082366943, + "learning_rate": 0.00015320019846192009, + "loss": 0.367, + "step": 4462 + }, + { + "epoch": 1.56, + "grad_norm": 2.792440891265869, + "learning_rate": 0.0001531629868518978, + "loss": 0.5759, + "step": 4463 + }, + { + "epoch": 1.56, + "grad_norm": 1.3819493055343628, + "learning_rate": 0.00015312577524187544, + "loss": 0.2252, + "step": 4464 + }, + { + "epoch": 1.56, + "grad_norm": 4.414833068847656, + "learning_rate": 0.0001530885636318531, + "loss": 0.4762, + "step": 4465 + }, + { + "epoch": 1.56, + "grad_norm": 2.5327303409576416, + "learning_rate": 0.00015305135202183082, + "loss": 0.4483, + "step": 4466 + }, + { + "epoch": 1.57, + "grad_norm": 2.1207656860351562, + "learning_rate": 0.00015301414041180846, + "loss": 0.2676, + "step": 4467 + }, + { + "epoch": 1.57, + "grad_norm": 2.1019747257232666, + "learning_rate": 0.00015297692880178614, + "loss": 0.3797, + "step": 4468 + }, + { + "epoch": 1.57, + "grad_norm": 2.4563004970550537, + "learning_rate": 0.00015293971719176384, + "loss": 0.2758, + "step": 4469 + }, + { + "epoch": 1.57, + "grad_norm": 3.147643804550171, + "learning_rate": 0.0001529025055817415, + "loss": 0.9069, + "step": 4470 + }, + { + "epoch": 1.57, + "grad_norm": 4.162635803222656, + "learning_rate": 0.00015286529397171917, + "loss": 0.6038, + "step": 4471 + }, + { + "epoch": 1.57, + "grad_norm": 4.253012657165527, + "learning_rate": 0.00015282808236169682, + "loss": 0.8356, + "step": 4472 + }, + { + "epoch": 1.57, + "grad_norm": 2.0794060230255127, + "learning_rate": 0.00015279087075167452, + "loss": 0.3112, + "step": 4473 + }, + { + "epoch": 1.57, + "grad_norm": 3.6223506927490234, + "learning_rate": 0.0001527536591416522, + "loss": 0.3925, + "step": 4474 + }, + { + "epoch": 1.57, + "grad_norm": 2.9207382202148438, + "learning_rate": 0.00015271644753162984, + "loss": 0.3373, + "step": 4475 + }, + { + "epoch": 1.57, + "grad_norm": 2.2720437049865723, + "learning_rate": 0.00015267923592160755, + "loss": 0.2101, + "step": 4476 + }, + { + "epoch": 1.57, + "grad_norm": 1.576826572418213, + "learning_rate": 0.0001526420243115852, + "loss": 0.2305, + "step": 4477 + }, + { + "epoch": 1.57, + "grad_norm": 0.4385606646537781, + "learning_rate": 0.00015260481270156287, + "loss": 0.0335, + "step": 4478 + }, + { + "epoch": 1.57, + "grad_norm": 2.5743887424468994, + "learning_rate": 0.00015256760109154055, + "loss": 0.8183, + "step": 4479 + }, + { + "epoch": 1.57, + "grad_norm": 1.7291278839111328, + "learning_rate": 0.00015253038948151822, + "loss": 0.334, + "step": 4480 + }, + { + "epoch": 1.57, + "grad_norm": 1.7327170372009277, + "learning_rate": 0.0001524931778714959, + "loss": 0.4015, + "step": 4481 + }, + { + "epoch": 1.57, + "grad_norm": 1.8238621950149536, + "learning_rate": 0.00015245596626147355, + "loss": 0.4654, + "step": 4482 + }, + { + "epoch": 1.57, + "grad_norm": 1.92159104347229, + "learning_rate": 0.00015241875465145122, + "loss": 0.4713, + "step": 4483 + }, + { + "epoch": 1.57, + "grad_norm": 6.046019077301025, + "learning_rate": 0.00015238154304142893, + "loss": 0.5623, + "step": 4484 + }, + { + "epoch": 1.57, + "grad_norm": 2.2049782276153564, + "learning_rate": 0.00015234433143140657, + "loss": 0.3545, + "step": 4485 + }, + { + "epoch": 1.57, + "grad_norm": 1.759090542793274, + "learning_rate": 0.00015230711982138425, + "loss": 0.2411, + "step": 4486 + }, + { + "epoch": 1.57, + "grad_norm": 2.1252946853637695, + "learning_rate": 0.00015226990821136195, + "loss": 0.2737, + "step": 4487 + }, + { + "epoch": 1.57, + "grad_norm": 1.3396649360656738, + "learning_rate": 0.0001522326966013396, + "loss": 0.1346, + "step": 4488 + }, + { + "epoch": 1.57, + "grad_norm": 1.8554527759552002, + "learning_rate": 0.00015219548499131728, + "loss": 0.3038, + "step": 4489 + }, + { + "epoch": 1.57, + "grad_norm": 1.3641473054885864, + "learning_rate": 0.00015215827338129493, + "loss": 0.1102, + "step": 4490 + }, + { + "epoch": 1.57, + "grad_norm": 3.5774457454681396, + "learning_rate": 0.00015212106177127263, + "loss": 0.4797, + "step": 4491 + }, + { + "epoch": 1.57, + "grad_norm": 4.142420768737793, + "learning_rate": 0.0001520838501612503, + "loss": 0.3904, + "step": 4492 + }, + { + "epoch": 1.57, + "grad_norm": 1.1774590015411377, + "learning_rate": 0.00015204663855122795, + "loss": 0.0748, + "step": 4493 + }, + { + "epoch": 1.57, + "grad_norm": 3.950049638748169, + "learning_rate": 0.00015200942694120566, + "loss": 0.3689, + "step": 4494 + }, + { + "epoch": 1.57, + "grad_norm": 0.5134997963905334, + "learning_rate": 0.0001519722153311833, + "loss": 0.0291, + "step": 4495 + }, + { + "epoch": 1.58, + "grad_norm": 6.9983296394348145, + "learning_rate": 0.00015193500372116098, + "loss": 2.1653, + "step": 4496 + }, + { + "epoch": 1.58, + "grad_norm": 4.325428485870361, + "learning_rate": 0.00015189779211113868, + "loss": 0.4339, + "step": 4497 + }, + { + "epoch": 1.58, + "grad_norm": 5.906067371368408, + "learning_rate": 0.00015186058050111633, + "loss": 0.5815, + "step": 4498 + }, + { + "epoch": 1.58, + "grad_norm": 6.516275882720947, + "learning_rate": 0.000151823368891094, + "loss": 0.2292, + "step": 4499 + }, + { + "epoch": 1.58, + "grad_norm": 5.414793491363525, + "learning_rate": 0.00015178615728107168, + "loss": 0.3685, + "step": 4500 + }, + { + "epoch": 1.58, + "eval_loss": 0.4531491696834564, + "eval_runtime": 51.0994, + "eval_samples_per_second": 42.427, + "eval_steps_per_second": 10.607, + "eval_wer": 0.3689673066943435, + "step": 4500 + }, + { + "epoch": 1.58, + "grad_norm": 5.927810192108154, + "learning_rate": 0.00015174894567104936, + "loss": 0.4957, + "step": 4501 + }, + { + "epoch": 1.58, + "grad_norm": 3.2968862056732178, + "learning_rate": 0.00015171173406102704, + "loss": 0.1763, + "step": 4502 + }, + { + "epoch": 1.58, + "grad_norm": 18.59258270263672, + "learning_rate": 0.00015167452245100468, + "loss": 0.3655, + "step": 4503 + }, + { + "epoch": 1.58, + "grad_norm": 9.49154281616211, + "learning_rate": 0.00015163731084098236, + "loss": 1.3948, + "step": 4504 + }, + { + "epoch": 1.58, + "grad_norm": 1.5831422805786133, + "learning_rate": 0.00015160009923096006, + "loss": 0.5639, + "step": 4505 + }, + { + "epoch": 1.58, + "grad_norm": 1.5149224996566772, + "learning_rate": 0.0001515628876209377, + "loss": 0.361, + "step": 4506 + }, + { + "epoch": 1.58, + "grad_norm": 1.7891191244125366, + "learning_rate": 0.0001515256760109154, + "loss": 0.4626, + "step": 4507 + }, + { + "epoch": 1.58, + "grad_norm": 1.3684577941894531, + "learning_rate": 0.00015148846440089306, + "loss": 0.2943, + "step": 4508 + }, + { + "epoch": 1.58, + "grad_norm": 1.371821641921997, + "learning_rate": 0.00015145125279087074, + "loss": 0.1162, + "step": 4509 + }, + { + "epoch": 1.58, + "grad_norm": 1.8998194932937622, + "learning_rate": 0.00015141404118084842, + "loss": 0.2725, + "step": 4510 + }, + { + "epoch": 1.58, + "grad_norm": 3.163402557373047, + "learning_rate": 0.00015137682957082606, + "loss": 0.475, + "step": 4511 + }, + { + "epoch": 1.58, + "grad_norm": 3.110438108444214, + "learning_rate": 0.00015133961796080377, + "loss": 0.4981, + "step": 4512 + }, + { + "epoch": 1.58, + "grad_norm": 2.5122923851013184, + "learning_rate": 0.00015130240635078144, + "loss": 0.4744, + "step": 4513 + }, + { + "epoch": 1.58, + "grad_norm": 5.760143280029297, + "learning_rate": 0.0001512651947407591, + "loss": 1.8897, + "step": 4514 + }, + { + "epoch": 1.58, + "grad_norm": 1.6696605682373047, + "learning_rate": 0.0001512279831307368, + "loss": 0.2554, + "step": 4515 + }, + { + "epoch": 1.58, + "grad_norm": 2.809109687805176, + "learning_rate": 0.00015119077152071444, + "loss": 0.406, + "step": 4516 + }, + { + "epoch": 1.58, + "grad_norm": 1.2222541570663452, + "learning_rate": 0.00015115355991069212, + "loss": 0.0803, + "step": 4517 + }, + { + "epoch": 1.58, + "grad_norm": 3.3390469551086426, + "learning_rate": 0.00015111634830066982, + "loss": 0.4206, + "step": 4518 + }, + { + "epoch": 1.58, + "grad_norm": 1.309799075126648, + "learning_rate": 0.00015107913669064747, + "loss": 0.1442, + "step": 4519 + }, + { + "epoch": 1.58, + "grad_norm": 3.854196310043335, + "learning_rate": 0.00015104192508062515, + "loss": 1.496, + "step": 4520 + }, + { + "epoch": 1.58, + "grad_norm": 2.176786422729492, + "learning_rate": 0.0001510047134706028, + "loss": 0.2341, + "step": 4521 + }, + { + "epoch": 1.58, + "grad_norm": 1.9741429090499878, + "learning_rate": 0.0001509675018605805, + "loss": 0.2108, + "step": 4522 + }, + { + "epoch": 1.58, + "grad_norm": 5.400284290313721, + "learning_rate": 0.00015093029025055817, + "loss": 2.0911, + "step": 4523 + }, + { + "epoch": 1.59, + "grad_norm": 3.421788215637207, + "learning_rate": 0.00015089307864053582, + "loss": 0.4519, + "step": 4524 + }, + { + "epoch": 1.59, + "grad_norm": 4.890323162078857, + "learning_rate": 0.0001508558670305135, + "loss": 0.5325, + "step": 4525 + }, + { + "epoch": 1.59, + "grad_norm": 3.7187812328338623, + "learning_rate": 0.00015081865542049117, + "loss": 0.392, + "step": 4526 + }, + { + "epoch": 1.59, + "grad_norm": 2.6591031551361084, + "learning_rate": 0.00015078144381046885, + "loss": 0.3085, + "step": 4527 + }, + { + "epoch": 1.59, + "grad_norm": 3.004119873046875, + "learning_rate": 0.00015074423220044653, + "loss": 0.3717, + "step": 4528 + }, + { + "epoch": 1.59, + "grad_norm": 1.017032265663147, + "learning_rate": 0.0001507070205904242, + "loss": 0.0585, + "step": 4529 + }, + { + "epoch": 1.59, + "grad_norm": 2.0729451179504395, + "learning_rate": 0.00015066980898040188, + "loss": 0.393, + "step": 4530 + }, + { + "epoch": 1.59, + "grad_norm": 1.5264289379119873, + "learning_rate": 0.00015063259737037955, + "loss": 0.3011, + "step": 4531 + }, + { + "epoch": 1.59, + "grad_norm": 1.535727858543396, + "learning_rate": 0.0001505953857603572, + "loss": 0.3631, + "step": 4532 + }, + { + "epoch": 1.59, + "grad_norm": 2.5578010082244873, + "learning_rate": 0.0001505581741503349, + "loss": 0.3524, + "step": 4533 + }, + { + "epoch": 1.59, + "grad_norm": 2.180941104888916, + "learning_rate": 0.00015052096254031255, + "loss": 0.3525, + "step": 4534 + }, + { + "epoch": 1.59, + "grad_norm": 1.2360469102859497, + "learning_rate": 0.00015048375093029023, + "loss": 0.1369, + "step": 4535 + }, + { + "epoch": 1.59, + "grad_norm": 1.1341615915298462, + "learning_rate": 0.00015044653932026793, + "loss": 0.1489, + "step": 4536 + }, + { + "epoch": 1.59, + "grad_norm": 1.9309412240982056, + "learning_rate": 0.00015040932771024558, + "loss": 0.2772, + "step": 4537 + }, + { + "epoch": 1.59, + "grad_norm": 1.6193166971206665, + "learning_rate": 0.00015037211610022326, + "loss": 0.3045, + "step": 4538 + }, + { + "epoch": 1.59, + "grad_norm": 1.2819677591323853, + "learning_rate": 0.0001503349044902009, + "loss": 0.2624, + "step": 4539 + }, + { + "epoch": 1.59, + "grad_norm": 2.1933364868164062, + "learning_rate": 0.0001502976928801786, + "loss": 0.2117, + "step": 4540 + }, + { + "epoch": 1.59, + "grad_norm": 2.030672073364258, + "learning_rate": 0.00015026048127015628, + "loss": 0.2636, + "step": 4541 + }, + { + "epoch": 1.59, + "grad_norm": 3.077730894088745, + "learning_rate": 0.00015022326966013393, + "loss": 0.4608, + "step": 4542 + }, + { + "epoch": 1.59, + "grad_norm": 1.8299646377563477, + "learning_rate": 0.00015018605805011163, + "loss": 0.1079, + "step": 4543 + }, + { + "epoch": 1.59, + "grad_norm": 2.263606309890747, + "learning_rate": 0.0001501488464400893, + "loss": 0.2057, + "step": 4544 + }, + { + "epoch": 1.59, + "grad_norm": 1.6388328075408936, + "learning_rate": 0.00015011163483006696, + "loss": 0.2149, + "step": 4545 + }, + { + "epoch": 1.59, + "grad_norm": 1.8084484338760376, + "learning_rate": 0.00015007442322004464, + "loss": 0.2344, + "step": 4546 + }, + { + "epoch": 1.59, + "grad_norm": 1.729175329208374, + "learning_rate": 0.0001500372116100223, + "loss": 0.1099, + "step": 4547 + }, + { + "epoch": 1.59, + "grad_norm": 2.0611491203308105, + "learning_rate": 0.00015, + "loss": 0.3877, + "step": 4548 + }, + { + "epoch": 1.59, + "grad_norm": 4.244816303253174, + "learning_rate": 0.00014996278838997766, + "loss": 0.4705, + "step": 4549 + }, + { + "epoch": 1.59, + "grad_norm": 3.561108112335205, + "learning_rate": 0.00014992557677995534, + "loss": 0.3884, + "step": 4550 + }, + { + "epoch": 1.59, + "grad_norm": 3.055769205093384, + "learning_rate": 0.00014988836516993301, + "loss": 0.234, + "step": 4551 + }, + { + "epoch": 1.59, + "grad_norm": 4.927545547485352, + "learning_rate": 0.0001498511535599107, + "loss": 1.9593, + "step": 4552 + }, + { + "epoch": 1.6, + "grad_norm": 5.328706741333008, + "learning_rate": 0.00014981394194988834, + "loss": 1.9694, + "step": 4553 + }, + { + "epoch": 1.6, + "grad_norm": 2.255901575088501, + "learning_rate": 0.00014977673033986601, + "loss": 0.1712, + "step": 4554 + }, + { + "epoch": 1.6, + "grad_norm": 2.4930551052093506, + "learning_rate": 0.00014973951872984372, + "loss": 0.6505, + "step": 4555 + }, + { + "epoch": 1.6, + "grad_norm": 2.2214691638946533, + "learning_rate": 0.00014970230711982137, + "loss": 0.6706, + "step": 4556 + }, + { + "epoch": 1.6, + "grad_norm": 2.260343313217163, + "learning_rate": 0.00014966509550979904, + "loss": 0.4399, + "step": 4557 + }, + { + "epoch": 1.6, + "grad_norm": 1.8949406147003174, + "learning_rate": 0.00014962788389977672, + "loss": 0.681, + "step": 4558 + }, + { + "epoch": 1.6, + "grad_norm": 1.0256386995315552, + "learning_rate": 0.0001495906722897544, + "loss": 0.1752, + "step": 4559 + }, + { + "epoch": 1.6, + "grad_norm": 2.9112942218780518, + "learning_rate": 0.00014955346067973207, + "loss": 0.2714, + "step": 4560 + }, + { + "epoch": 1.6, + "grad_norm": 1.3259639739990234, + "learning_rate": 0.00014951624906970975, + "loss": 0.1771, + "step": 4561 + }, + { + "epoch": 1.6, + "grad_norm": 2.3593876361846924, + "learning_rate": 0.0001494790374596874, + "loss": 0.51, + "step": 4562 + }, + { + "epoch": 1.6, + "grad_norm": 2.028107166290283, + "learning_rate": 0.00014944182584966507, + "loss": 0.6044, + "step": 4563 + }, + { + "epoch": 1.6, + "grad_norm": 2.4526591300964355, + "learning_rate": 0.00014940461423964277, + "loss": 0.3303, + "step": 4564 + }, + { + "epoch": 1.6, + "grad_norm": 3.8882853984832764, + "learning_rate": 0.00014936740262962042, + "loss": 0.7958, + "step": 4565 + }, + { + "epoch": 1.6, + "grad_norm": 1.1775007247924805, + "learning_rate": 0.0001493301910195981, + "loss": 0.128, + "step": 4566 + }, + { + "epoch": 1.6, + "grad_norm": 1.8732891082763672, + "learning_rate": 0.00014929297940957577, + "loss": 0.1814, + "step": 4567 + }, + { + "epoch": 1.6, + "grad_norm": 2.4956729412078857, + "learning_rate": 0.00014925576779955345, + "loss": 0.2284, + "step": 4568 + }, + { + "epoch": 1.6, + "grad_norm": 1.7072712182998657, + "learning_rate": 0.00014921855618953112, + "loss": 0.114, + "step": 4569 + }, + { + "epoch": 1.6, + "grad_norm": 1.2081888914108276, + "learning_rate": 0.0001491813445795088, + "loss": 0.0593, + "step": 4570 + }, + { + "epoch": 1.6, + "grad_norm": 3.359692335128784, + "learning_rate": 0.00014914413296948648, + "loss": 0.6738, + "step": 4571 + }, + { + "epoch": 1.6, + "grad_norm": 2.427903652191162, + "learning_rate": 0.00014910692135946412, + "loss": 0.2915, + "step": 4572 + }, + { + "epoch": 1.6, + "grad_norm": 1.3507580757141113, + "learning_rate": 0.00014906970974944183, + "loss": 0.2221, + "step": 4573 + }, + { + "epoch": 1.6, + "grad_norm": 1.6972339153289795, + "learning_rate": 0.00014903249813941948, + "loss": 0.1641, + "step": 4574 + }, + { + "epoch": 1.6, + "grad_norm": 4.0066328048706055, + "learning_rate": 0.00014899528652939715, + "loss": 0.6401, + "step": 4575 + }, + { + "epoch": 1.6, + "grad_norm": 2.1944432258605957, + "learning_rate": 0.00014895807491937483, + "loss": 0.2104, + "step": 4576 + }, + { + "epoch": 1.6, + "grad_norm": 3.7553157806396484, + "learning_rate": 0.0001489208633093525, + "loss": 0.3944, + "step": 4577 + }, + { + "epoch": 1.6, + "grad_norm": NaN, + "learning_rate": 0.0001489208633093525, + "loss": 1.2432, + "step": 4578 + }, + { + "epoch": 1.6, + "grad_norm": 1.1025826930999756, + "learning_rate": 0.00014888365169933018, + "loss": 0.0429, + "step": 4579 + }, + { + "epoch": 1.6, + "grad_norm": 3.72552752494812, + "learning_rate": 0.00014884644008930786, + "loss": 0.7761, + "step": 4580 + }, + { + "epoch": 1.61, + "grad_norm": 2.1023457050323486, + "learning_rate": 0.00014880922847928553, + "loss": 0.5812, + "step": 4581 + }, + { + "epoch": 1.61, + "grad_norm": 2.557485818862915, + "learning_rate": 0.0001487720168692632, + "loss": 0.7201, + "step": 4582 + }, + { + "epoch": 1.61, + "grad_norm": 1.0474027395248413, + "learning_rate": 0.00014873480525924088, + "loss": 0.174, + "step": 4583 + }, + { + "epoch": 1.61, + "grad_norm": 1.4817769527435303, + "learning_rate": 0.00014869759364921853, + "loss": 0.0965, + "step": 4584 + }, + { + "epoch": 1.61, + "grad_norm": 3.8702480792999268, + "learning_rate": 0.0001486603820391962, + "loss": 0.9629, + "step": 4585 + }, + { + "epoch": 1.61, + "grad_norm": 13.161911010742188, + "learning_rate": 0.00014862317042917388, + "loss": 2.8826, + "step": 4586 + }, + { + "epoch": 1.61, + "grad_norm": 3.8920679092407227, + "learning_rate": 0.00014858595881915156, + "loss": 0.6731, + "step": 4587 + }, + { + "epoch": 1.61, + "grad_norm": 1.4065778255462646, + "learning_rate": 0.00014854874720912923, + "loss": 0.2983, + "step": 4588 + }, + { + "epoch": 1.61, + "grad_norm": 3.574580430984497, + "learning_rate": 0.0001485115355991069, + "loss": 0.6195, + "step": 4589 + }, + { + "epoch": 1.61, + "grad_norm": 6.651939392089844, + "learning_rate": 0.00014847432398908459, + "loss": 0.2192, + "step": 4590 + }, + { + "epoch": 1.61, + "grad_norm": 2.2979915142059326, + "learning_rate": 0.00014843711237906226, + "loss": 0.51, + "step": 4591 + }, + { + "epoch": 1.61, + "grad_norm": 1.8823531866073608, + "learning_rate": 0.00014839990076903994, + "loss": 0.3496, + "step": 4592 + }, + { + "epoch": 1.61, + "grad_norm": 3.272880792617798, + "learning_rate": 0.0001483626891590176, + "loss": 0.3933, + "step": 4593 + }, + { + "epoch": 1.61, + "grad_norm": 2.0282230377197266, + "learning_rate": 0.00014832547754899526, + "loss": 0.5362, + "step": 4594 + }, + { + "epoch": 1.61, + "grad_norm": 2.4727609157562256, + "learning_rate": 0.00014828826593897294, + "loss": 0.3841, + "step": 4595 + }, + { + "epoch": 1.61, + "grad_norm": 1.0097224712371826, + "learning_rate": 0.00014825105432895061, + "loss": 0.1183, + "step": 4596 + }, + { + "epoch": 1.61, + "grad_norm": 2.555164337158203, + "learning_rate": 0.0001482138427189283, + "loss": 0.2854, + "step": 4597 + }, + { + "epoch": 1.61, + "grad_norm": 4.284493446350098, + "learning_rate": 0.00014817663110890597, + "loss": 0.3708, + "step": 4598 + }, + { + "epoch": 1.61, + "grad_norm": 3.0818679332733154, + "learning_rate": 0.00014813941949888364, + "loss": 0.2883, + "step": 4599 + }, + { + "epoch": 1.61, + "grad_norm": 5.769961833953857, + "learning_rate": 0.00014810220788886132, + "loss": 1.0105, + "step": 4600 + }, + { + "epoch": 1.61, + "eval_loss": 0.43010884523391724, + "eval_runtime": 51.2971, + "eval_samples_per_second": 42.264, + "eval_steps_per_second": 10.566, + "eval_wer": 0.40140114167099117, + "step": 4600 + }, + { + "epoch": 1.61, + "grad_norm": 3.1520931720733643, + "learning_rate": 0.000148064996278839, + "loss": 0.2645, + "step": 4601 + }, + { + "epoch": 1.61, + "grad_norm": 1.3779430389404297, + "learning_rate": 0.00014802778466881667, + "loss": 0.0886, + "step": 4602 + }, + { + "epoch": 1.61, + "grad_norm": 4.223806381225586, + "learning_rate": 0.00014799057305879432, + "loss": 0.5118, + "step": 4603 + }, + { + "epoch": 1.61, + "grad_norm": 2.133859157562256, + "learning_rate": 0.00014795336144877202, + "loss": 0.1417, + "step": 4604 + }, + { + "epoch": 1.61, + "grad_norm": 2.7136905193328857, + "learning_rate": 0.00014791614983874967, + "loss": 0.789, + "step": 4605 + }, + { + "epoch": 1.61, + "grad_norm": 1.8167808055877686, + "learning_rate": 0.00014787893822872734, + "loss": 0.3996, + "step": 4606 + }, + { + "epoch": 1.61, + "grad_norm": 1.4409061670303345, + "learning_rate": 0.00014784172661870502, + "loss": 0.3649, + "step": 4607 + }, + { + "epoch": 1.61, + "grad_norm": 2.616767406463623, + "learning_rate": 0.0001478045150086827, + "loss": 0.4231, + "step": 4608 + }, + { + "epoch": 1.61, + "grad_norm": 1.7175992727279663, + "learning_rate": 0.00014776730339866037, + "loss": 0.3993, + "step": 4609 + }, + { + "epoch": 1.62, + "grad_norm": 1.3235002756118774, + "learning_rate": 0.00014773009178863805, + "loss": 0.1547, + "step": 4610 + }, + { + "epoch": 1.62, + "grad_norm": 2.10302996635437, + "learning_rate": 0.00014769288017861572, + "loss": 0.3232, + "step": 4611 + }, + { + "epoch": 1.62, + "grad_norm": 1.746387004852295, + "learning_rate": 0.00014765566856859337, + "loss": 0.333, + "step": 4612 + }, + { + "epoch": 1.62, + "grad_norm": 2.292447805404663, + "learning_rate": 0.00014761845695857108, + "loss": 0.2756, + "step": 4613 + }, + { + "epoch": 1.62, + "grad_norm": 1.5536998510360718, + "learning_rate": 0.00014758124534854875, + "loss": 0.2888, + "step": 4614 + }, + { + "epoch": 1.62, + "grad_norm": 2.7711429595947266, + "learning_rate": 0.0001475440337385264, + "loss": 0.5742, + "step": 4615 + }, + { + "epoch": 1.62, + "grad_norm": 1.2211471796035767, + "learning_rate": 0.00014750682212850408, + "loss": 0.1224, + "step": 4616 + }, + { + "epoch": 1.62, + "grad_norm": 2.2878341674804688, + "learning_rate": 0.00014746961051848175, + "loss": 0.3532, + "step": 4617 + }, + { + "epoch": 1.62, + "grad_norm": 1.6021337509155273, + "learning_rate": 0.00014743239890845943, + "loss": 0.2547, + "step": 4618 + }, + { + "epoch": 1.62, + "grad_norm": 3.6017909049987793, + "learning_rate": 0.0001473951872984371, + "loss": 1.4935, + "step": 4619 + }, + { + "epoch": 1.62, + "grad_norm": 2.1262946128845215, + "learning_rate": 0.00014735797568841478, + "loss": 0.2398, + "step": 4620 + }, + { + "epoch": 1.62, + "grad_norm": 2.549982786178589, + "learning_rate": 0.00014732076407839243, + "loss": 0.7301, + "step": 4621 + }, + { + "epoch": 1.62, + "grad_norm": 0.8897354602813721, + "learning_rate": 0.00014728355246837013, + "loss": 0.0852, + "step": 4622 + }, + { + "epoch": 1.62, + "grad_norm": 2.3464910984039307, + "learning_rate": 0.0001472463408583478, + "loss": 0.3429, + "step": 4623 + }, + { + "epoch": 1.62, + "grad_norm": 2.2914788722991943, + "learning_rate": 0.00014720912924832545, + "loss": 0.2171, + "step": 4624 + }, + { + "epoch": 1.62, + "grad_norm": 5.431281566619873, + "learning_rate": 0.00014717191763830313, + "loss": 0.4327, + "step": 4625 + }, + { + "epoch": 1.62, + "grad_norm": 2.59971022605896, + "learning_rate": 0.0001471347060282808, + "loss": 0.2839, + "step": 4626 + }, + { + "epoch": 1.62, + "grad_norm": 2.3822665214538574, + "learning_rate": 0.00014709749441825848, + "loss": 0.1915, + "step": 4627 + }, + { + "epoch": 1.62, + "grad_norm": 1.2179664373397827, + "learning_rate": 0.00014706028280823616, + "loss": 0.1606, + "step": 4628 + }, + { + "epoch": 1.62, + "grad_norm": 3.38651180267334, + "learning_rate": 0.00014702307119821383, + "loss": 1.2565, + "step": 4629 + }, + { + "epoch": 1.62, + "grad_norm": 1.9841926097869873, + "learning_rate": 0.00014698585958819148, + "loss": 0.7275, + "step": 4630 + }, + { + "epoch": 1.62, + "grad_norm": 2.126986503601074, + "learning_rate": 0.00014694864797816919, + "loss": 0.8143, + "step": 4631 + }, + { + "epoch": 1.62, + "grad_norm": 1.2944574356079102, + "learning_rate": 0.00014691143636814686, + "loss": 0.2415, + "step": 4632 + }, + { + "epoch": 1.62, + "grad_norm": 1.2523384094238281, + "learning_rate": 0.0001468742247581245, + "loss": 0.3349, + "step": 4633 + }, + { + "epoch": 1.62, + "grad_norm": 2.1736972332000732, + "learning_rate": 0.00014683701314810219, + "loss": 0.2556, + "step": 4634 + }, + { + "epoch": 1.62, + "grad_norm": 2.1065425872802734, + "learning_rate": 0.0001467998015380799, + "loss": 0.2447, + "step": 4635 + }, + { + "epoch": 1.62, + "grad_norm": 3.6076436042785645, + "learning_rate": 0.00014676258992805754, + "loss": 0.7707, + "step": 4636 + }, + { + "epoch": 1.62, + "grad_norm": 2.045930862426758, + "learning_rate": 0.0001467253783180352, + "loss": 0.3137, + "step": 4637 + }, + { + "epoch": 1.63, + "grad_norm": 1.0164872407913208, + "learning_rate": 0.0001466881667080129, + "loss": 0.1478, + "step": 4638 + }, + { + "epoch": 1.63, + "grad_norm": 2.6273579597473145, + "learning_rate": 0.00014665095509799056, + "loss": 0.2894, + "step": 4639 + }, + { + "epoch": 1.63, + "grad_norm": 2.061847686767578, + "learning_rate": 0.00014661374348796824, + "loss": 0.109, + "step": 4640 + }, + { + "epoch": 1.63, + "grad_norm": 2.5759425163269043, + "learning_rate": 0.00014657653187794592, + "loss": 0.1666, + "step": 4641 + }, + { + "epoch": 1.63, + "grad_norm": 2.0799243450164795, + "learning_rate": 0.00014653932026792356, + "loss": 0.2198, + "step": 4642 + }, + { + "epoch": 1.63, + "grad_norm": 2.562469959259033, + "learning_rate": 0.00014650210865790124, + "loss": 0.3698, + "step": 4643 + }, + { + "epoch": 1.63, + "grad_norm": 5.489830493927002, + "learning_rate": 0.00014646489704787894, + "loss": 0.3143, + "step": 4644 + }, + { + "epoch": 1.63, + "grad_norm": 3.388197422027588, + "learning_rate": 0.0001464276854378566, + "loss": 0.2593, + "step": 4645 + }, + { + "epoch": 1.63, + "grad_norm": 3.1358988285064697, + "learning_rate": 0.00014639047382783427, + "loss": 0.2429, + "step": 4646 + }, + { + "epoch": 1.63, + "grad_norm": 2.042487859725952, + "learning_rate": 0.00014635326221781194, + "loss": 0.2667, + "step": 4647 + }, + { + "epoch": 1.63, + "grad_norm": 3.1954731941223145, + "learning_rate": 0.00014631605060778962, + "loss": 0.2772, + "step": 4648 + }, + { + "epoch": 1.63, + "grad_norm": 6.20961856842041, + "learning_rate": 0.0001462788389977673, + "loss": 0.8224, + "step": 4649 + }, + { + "epoch": 1.63, + "grad_norm": 2.716186046600342, + "learning_rate": 0.00014624162738774497, + "loss": 0.3826, + "step": 4650 + }, + { + "epoch": 1.63, + "grad_norm": 2.333148717880249, + "learning_rate": 0.00014620441577772265, + "loss": 0.2995, + "step": 4651 + }, + { + "epoch": 1.63, + "grad_norm": 9.35218620300293, + "learning_rate": 0.0001461672041677003, + "loss": 1.6333, + "step": 4652 + }, + { + "epoch": 1.63, + "grad_norm": 2.3607444763183594, + "learning_rate": 0.000146129992557678, + "loss": 0.213, + "step": 4653 + }, + { + "epoch": 1.63, + "grad_norm": 3.0793917179107666, + "learning_rate": 0.00014609278094765565, + "loss": 0.1334, + "step": 4654 + }, + { + "epoch": 1.63, + "grad_norm": 3.1427955627441406, + "learning_rate": 0.00014605556933763332, + "loss": 0.8882, + "step": 4655 + }, + { + "epoch": 1.63, + "grad_norm": 3.179497718811035, + "learning_rate": 0.000146018357727611, + "loss": 1.0056, + "step": 4656 + }, + { + "epoch": 1.63, + "grad_norm": 2.290648937225342, + "learning_rate": 0.00014598114611758867, + "loss": 0.439, + "step": 4657 + }, + { + "epoch": 1.63, + "grad_norm": 2.732677698135376, + "learning_rate": 0.00014594393450756635, + "loss": 0.7399, + "step": 4658 + }, + { + "epoch": 1.63, + "grad_norm": 1.9614787101745605, + "learning_rate": 0.00014590672289754403, + "loss": 0.3696, + "step": 4659 + }, + { + "epoch": 1.63, + "grad_norm": 1.658251404762268, + "learning_rate": 0.0001458695112875217, + "loss": 0.3531, + "step": 4660 + }, + { + "epoch": 1.63, + "grad_norm": 1.365149974822998, + "learning_rate": 0.00014583229967749935, + "loss": 0.1389, + "step": 4661 + }, + { + "epoch": 1.63, + "grad_norm": 1.6162738800048828, + "learning_rate": 0.00014579508806747705, + "loss": 0.2063, + "step": 4662 + }, + { + "epoch": 1.63, + "grad_norm": 1.5521570444107056, + "learning_rate": 0.0001457578764574547, + "loss": 0.1486, + "step": 4663 + }, + { + "epoch": 1.63, + "grad_norm": 1.7068825960159302, + "learning_rate": 0.00014572066484743238, + "loss": 0.4079, + "step": 4664 + }, + { + "epoch": 1.63, + "grad_norm": 3.820570230484009, + "learning_rate": 0.00014568345323741005, + "loss": 1.0572, + "step": 4665 + }, + { + "epoch": 1.63, + "grad_norm": 1.7748942375183105, + "learning_rate": 0.00014564624162738773, + "loss": 0.2345, + "step": 4666 + }, + { + "epoch": 1.64, + "grad_norm": 2.3004446029663086, + "learning_rate": 0.0001456090300173654, + "loss": 0.2676, + "step": 4667 + }, + { + "epoch": 1.64, + "grad_norm": 1.6133116483688354, + "learning_rate": 0.00014557181840734308, + "loss": 0.1579, + "step": 4668 + }, + { + "epoch": 1.64, + "grad_norm": 1.5437865257263184, + "learning_rate": 0.00014553460679732076, + "loss": 0.1428, + "step": 4669 + }, + { + "epoch": 1.64, + "grad_norm": 3.339294910430908, + "learning_rate": 0.00014549739518729843, + "loss": 0.1919, + "step": 4670 + }, + { + "epoch": 1.64, + "grad_norm": 2.862550973892212, + "learning_rate": 0.0001454601835772761, + "loss": 0.5847, + "step": 4671 + }, + { + "epoch": 1.64, + "grad_norm": 2.6982133388519287, + "learning_rate": 0.00014542297196725378, + "loss": 0.0781, + "step": 4672 + }, + { + "epoch": 1.64, + "grad_norm": 4.697220325469971, + "learning_rate": 0.00014538576035723143, + "loss": 0.4647, + "step": 4673 + }, + { + "epoch": 1.64, + "grad_norm": 2.356121778488159, + "learning_rate": 0.0001453485487472091, + "loss": 0.253, + "step": 4674 + }, + { + "epoch": 1.64, + "grad_norm": 3.0695745944976807, + "learning_rate": 0.00014531133713718678, + "loss": 0.1711, + "step": 4675 + }, + { + "epoch": 1.64, + "grad_norm": 2.4559316635131836, + "learning_rate": 0.00014527412552716446, + "loss": 0.1617, + "step": 4676 + }, + { + "epoch": 1.64, + "grad_norm": 5.379946708679199, + "learning_rate": 0.00014523691391714214, + "loss": 0.486, + "step": 4677 + }, + { + "epoch": 1.64, + "grad_norm": 4.076511859893799, + "learning_rate": 0.0001451997023071198, + "loss": 1.2421, + "step": 4678 + }, + { + "epoch": 1.64, + "grad_norm": 3.5741493701934814, + "learning_rate": 0.0001451624906970975, + "loss": 0.1734, + "step": 4679 + }, + { + "epoch": 1.64, + "grad_norm": 2.880136728286743, + "learning_rate": 0.00014512527908707516, + "loss": 0.8671, + "step": 4680 + }, + { + "epoch": 1.64, + "grad_norm": 2.4380300045013428, + "learning_rate": 0.00014508806747705284, + "loss": 0.3913, + "step": 4681 + }, + { + "epoch": 1.64, + "grad_norm": 1.858528733253479, + "learning_rate": 0.0001450508558670305, + "loss": 0.4003, + "step": 4682 + }, + { + "epoch": 1.64, + "grad_norm": 1.5311648845672607, + "learning_rate": 0.00014501364425700816, + "loss": 0.2132, + "step": 4683 + }, + { + "epoch": 1.64, + "grad_norm": 1.997759222984314, + "learning_rate": 0.00014497643264698584, + "loss": 0.4408, + "step": 4684 + }, + { + "epoch": 1.64, + "grad_norm": 2.679245710372925, + "learning_rate": 0.00014493922103696352, + "loss": 0.4171, + "step": 4685 + }, + { + "epoch": 1.64, + "grad_norm": 3.2098543643951416, + "learning_rate": 0.0001449020094269412, + "loss": 0.3754, + "step": 4686 + }, + { + "epoch": 1.64, + "grad_norm": 3.1209075450897217, + "learning_rate": 0.00014486479781691887, + "loss": 0.6859, + "step": 4687 + }, + { + "epoch": 1.64, + "grad_norm": 1.5616499185562134, + "learning_rate": 0.00014482758620689654, + "loss": 0.1983, + "step": 4688 + }, + { + "epoch": 1.64, + "grad_norm": 2.346381425857544, + "learning_rate": 0.00014479037459687422, + "loss": 0.4227, + "step": 4689 + }, + { + "epoch": 1.64, + "grad_norm": 2.0079967975616455, + "learning_rate": 0.0001447531629868519, + "loss": 0.2501, + "step": 4690 + }, + { + "epoch": 1.64, + "grad_norm": 1.4115216732025146, + "learning_rate": 0.00014471595137682954, + "loss": 0.1448, + "step": 4691 + }, + { + "epoch": 1.64, + "grad_norm": 1.5479143857955933, + "learning_rate": 0.00014467873976680725, + "loss": 0.0889, + "step": 4692 + }, + { + "epoch": 1.64, + "grad_norm": 2.8004372119903564, + "learning_rate": 0.00014464152815678492, + "loss": 0.2043, + "step": 4693 + }, + { + "epoch": 1.64, + "grad_norm": 3.007761240005493, + "learning_rate": 0.00014460431654676257, + "loss": 0.0748, + "step": 4694 + }, + { + "epoch": 1.65, + "grad_norm": 1.9818124771118164, + "learning_rate": 0.00014456710493674025, + "loss": 0.2978, + "step": 4695 + }, + { + "epoch": 1.65, + "grad_norm": 2.8312606811523438, + "learning_rate": 0.00014452989332671792, + "loss": 0.2482, + "step": 4696 + }, + { + "epoch": 1.65, + "grad_norm": 2.239459753036499, + "learning_rate": 0.0001444926817166956, + "loss": 0.1302, + "step": 4697 + }, + { + "epoch": 1.65, + "grad_norm": 2.9169397354125977, + "learning_rate": 0.00014445547010667327, + "loss": 0.2396, + "step": 4698 + }, + { + "epoch": 1.65, + "grad_norm": 1.8439327478408813, + "learning_rate": 0.00014441825849665095, + "loss": 0.1673, + "step": 4699 + }, + { + "epoch": 1.65, + "grad_norm": 4.448665618896484, + "learning_rate": 0.0001443810468866286, + "loss": 1.1154, + "step": 4700 + }, + { + "epoch": 1.65, + "eval_loss": 0.5081397891044617, + "eval_runtime": 51.1352, + "eval_samples_per_second": 42.397, + "eval_steps_per_second": 10.599, + "eval_wer": 0.4475869226777374, + "step": 4700 + }, + { + "epoch": 1.65, + "grad_norm": 2.9089102745056152, + "learning_rate": 0.0001443438352766063, + "loss": 0.3261, + "step": 4701 + }, + { + "epoch": 1.65, + "grad_norm": 2.1453356742858887, + "learning_rate": 0.00014430662366658398, + "loss": 0.2774, + "step": 4702 + }, + { + "epoch": 1.65, + "grad_norm": 3.1812984943389893, + "learning_rate": 0.00014426941205656163, + "loss": 0.1525, + "step": 4703 + }, + { + "epoch": 1.65, + "grad_norm": 1.623590350151062, + "learning_rate": 0.0001442322004465393, + "loss": 0.1587, + "step": 4704 + }, + { + "epoch": 1.65, + "grad_norm": 2.106051445007324, + "learning_rate": 0.00014419498883651698, + "loss": 0.3934, + "step": 4705 + }, + { + "epoch": 1.65, + "grad_norm": 2.303579807281494, + "learning_rate": 0.00014415777722649465, + "loss": 0.5234, + "step": 4706 + }, + { + "epoch": 1.65, + "grad_norm": 2.132972240447998, + "learning_rate": 0.00014412056561647233, + "loss": 0.4725, + "step": 4707 + }, + { + "epoch": 1.65, + "grad_norm": 1.8919379711151123, + "learning_rate": 0.00014408335400645, + "loss": 0.2028, + "step": 4708 + }, + { + "epoch": 1.65, + "grad_norm": 1.383896827697754, + "learning_rate": 0.00014404614239642765, + "loss": 0.1585, + "step": 4709 + }, + { + "epoch": 1.65, + "grad_norm": 2.1082210540771484, + "learning_rate": 0.00014400893078640536, + "loss": 0.5981, + "step": 4710 + }, + { + "epoch": 1.65, + "grad_norm": 1.662786841392517, + "learning_rate": 0.00014397171917638303, + "loss": 0.2439, + "step": 4711 + }, + { + "epoch": 1.65, + "grad_norm": 2.6457130908966064, + "learning_rate": 0.00014393450756636068, + "loss": 0.43, + "step": 4712 + }, + { + "epoch": 1.65, + "grad_norm": 2.411425828933716, + "learning_rate": 0.00014389729595633836, + "loss": 0.3565, + "step": 4713 + }, + { + "epoch": 1.65, + "grad_norm": 2.6090455055236816, + "learning_rate": 0.00014386008434631606, + "loss": 0.1975, + "step": 4714 + }, + { + "epoch": 1.65, + "grad_norm": 2.9632768630981445, + "learning_rate": 0.0001438228727362937, + "loss": 0.202, + "step": 4715 + }, + { + "epoch": 1.65, + "grad_norm": 2.053697109222412, + "learning_rate": 0.00014378566112627138, + "loss": 0.3983, + "step": 4716 + }, + { + "epoch": 1.65, + "grad_norm": 2.210340738296509, + "learning_rate": 0.00014374844951624906, + "loss": 0.3449, + "step": 4717 + }, + { + "epoch": 1.65, + "grad_norm": 1.4516555070877075, + "learning_rate": 0.00014371123790622674, + "loss": 0.2329, + "step": 4718 + }, + { + "epoch": 1.65, + "grad_norm": 1.6123175621032715, + "learning_rate": 0.0001436740262962044, + "loss": 0.2082, + "step": 4719 + }, + { + "epoch": 1.65, + "grad_norm": 1.6988117694854736, + "learning_rate": 0.0001436368146861821, + "loss": 0.1245, + "step": 4720 + }, + { + "epoch": 1.65, + "grad_norm": 1.9669654369354248, + "learning_rate": 0.00014359960307615974, + "loss": 0.2072, + "step": 4721 + }, + { + "epoch": 1.65, + "grad_norm": 2.498318672180176, + "learning_rate": 0.0001435623914661374, + "loss": 0.1746, + "step": 4722 + }, + { + "epoch": 1.65, + "grad_norm": 2.309772253036499, + "learning_rate": 0.00014352517985611511, + "loss": 0.3295, + "step": 4723 + }, + { + "epoch": 1.66, + "grad_norm": 1.7596651315689087, + "learning_rate": 0.00014348796824609276, + "loss": 0.1938, + "step": 4724 + }, + { + "epoch": 1.66, + "grad_norm": 2.1776697635650635, + "learning_rate": 0.00014345075663607044, + "loss": 0.124, + "step": 4725 + }, + { + "epoch": 1.66, + "grad_norm": 3.659687042236328, + "learning_rate": 0.00014341354502604811, + "loss": 0.4348, + "step": 4726 + }, + { + "epoch": 1.66, + "grad_norm": 2.932208776473999, + "learning_rate": 0.0001433763334160258, + "loss": 0.3146, + "step": 4727 + }, + { + "epoch": 1.66, + "grad_norm": 2.094736099243164, + "learning_rate": 0.00014333912180600347, + "loss": 0.1576, + "step": 4728 + }, + { + "epoch": 1.66, + "grad_norm": 1.049635887145996, + "learning_rate": 0.00014330191019598114, + "loss": 0.0841, + "step": 4729 + }, + { + "epoch": 1.66, + "grad_norm": 1.7156835794448853, + "learning_rate": 0.0001432646985859588, + "loss": 0.7529, + "step": 4730 + }, + { + "epoch": 1.66, + "grad_norm": 1.7752554416656494, + "learning_rate": 0.00014322748697593647, + "loss": 0.1068, + "step": 4731 + }, + { + "epoch": 1.66, + "grad_norm": 1.4473437070846558, + "learning_rate": 0.00014319027536591417, + "loss": 0.2724, + "step": 4732 + }, + { + "epoch": 1.66, + "grad_norm": 1.5380414724349976, + "learning_rate": 0.00014315306375589182, + "loss": 0.317, + "step": 4733 + }, + { + "epoch": 1.66, + "grad_norm": 3.0182528495788574, + "learning_rate": 0.0001431158521458695, + "loss": 0.4587, + "step": 4734 + }, + { + "epoch": 1.66, + "grad_norm": 1.3725048303604126, + "learning_rate": 0.00014307864053584717, + "loss": 0.2012, + "step": 4735 + }, + { + "epoch": 1.66, + "grad_norm": 5.923343658447266, + "learning_rate": 0.00014304142892582485, + "loss": 0.4233, + "step": 4736 + }, + { + "epoch": 1.66, + "grad_norm": 0.7124632596969604, + "learning_rate": 0.00014300421731580252, + "loss": 0.067, + "step": 4737 + }, + { + "epoch": 1.66, + "grad_norm": 3.3575642108917236, + "learning_rate": 0.0001429670057057802, + "loss": 0.8093, + "step": 4738 + }, + { + "epoch": 1.66, + "grad_norm": 2.0340492725372314, + "learning_rate": 0.00014292979409575787, + "loss": 0.3322, + "step": 4739 + }, + { + "epoch": 1.66, + "grad_norm": 0.9786810874938965, + "learning_rate": 0.00014289258248573552, + "loss": 0.0871, + "step": 4740 + }, + { + "epoch": 1.66, + "grad_norm": 1.9138987064361572, + "learning_rate": 0.00014285537087571322, + "loss": 0.3063, + "step": 4741 + }, + { + "epoch": 1.66, + "grad_norm": 1.8535488843917847, + "learning_rate": 0.00014281815926569087, + "loss": 0.3178, + "step": 4742 + }, + { + "epoch": 1.66, + "grad_norm": 1.4353394508361816, + "learning_rate": 0.00014278094765566855, + "loss": 0.1811, + "step": 4743 + }, + { + "epoch": 1.66, + "grad_norm": 3.0626792907714844, + "learning_rate": 0.00014274373604564622, + "loss": 0.4848, + "step": 4744 + }, + { + "epoch": 1.66, + "grad_norm": 2.1779870986938477, + "learning_rate": 0.0001427065244356239, + "loss": 0.2258, + "step": 4745 + }, + { + "epoch": 1.66, + "grad_norm": 7.890713214874268, + "learning_rate": 0.00014266931282560158, + "loss": 0.9487, + "step": 4746 + }, + { + "epoch": 1.66, + "grad_norm": 3.38159441947937, + "learning_rate": 0.00014263210121557925, + "loss": 0.2995, + "step": 4747 + }, + { + "epoch": 1.66, + "grad_norm": 2.917301654815674, + "learning_rate": 0.00014259488960555693, + "loss": 0.5792, + "step": 4748 + }, + { + "epoch": 1.66, + "grad_norm": 3.2175188064575195, + "learning_rate": 0.00014255767799553458, + "loss": 0.1773, + "step": 4749 + }, + { + "epoch": 1.66, + "grad_norm": 2.1095893383026123, + "learning_rate": 0.00014252046638551228, + "loss": 0.2296, + "step": 4750 + }, + { + "epoch": 1.66, + "grad_norm": 3.4463748931884766, + "learning_rate": 0.00014248325477548993, + "loss": 0.0579, + "step": 4751 + }, + { + "epoch": 1.67, + "grad_norm": 3.1098155975341797, + "learning_rate": 0.0001424460431654676, + "loss": 0.6259, + "step": 4752 + }, + { + "epoch": 1.67, + "grad_norm": 1.331878662109375, + "learning_rate": 0.00014240883155544528, + "loss": 0.0832, + "step": 4753 + }, + { + "epoch": 1.67, + "grad_norm": 2.0971672534942627, + "learning_rate": 0.00014237161994542296, + "loss": 0.1894, + "step": 4754 + }, + { + "epoch": 1.67, + "grad_norm": 9.917618751525879, + "learning_rate": 0.00014233440833540063, + "loss": 2.6677, + "step": 4755 + }, + { + "epoch": 1.67, + "grad_norm": 1.685046672821045, + "learning_rate": 0.0001422971967253783, + "loss": 0.4792, + "step": 4756 + }, + { + "epoch": 1.67, + "grad_norm": 3.0383598804473877, + "learning_rate": 0.00014225998511535598, + "loss": 0.6298, + "step": 4757 + }, + { + "epoch": 1.67, + "grad_norm": 1.575229525566101, + "learning_rate": 0.00014222277350533366, + "loss": 0.1554, + "step": 4758 + }, + { + "epoch": 1.67, + "grad_norm": 1.6466107368469238, + "learning_rate": 0.00014218556189531133, + "loss": 0.2602, + "step": 4759 + }, + { + "epoch": 1.67, + "grad_norm": 1.8354624509811401, + "learning_rate": 0.000142148350285289, + "loss": 0.258, + "step": 4760 + }, + { + "epoch": 1.67, + "grad_norm": 2.351383924484253, + "learning_rate": 0.00014211113867526666, + "loss": 0.3148, + "step": 4761 + }, + { + "epoch": 1.67, + "grad_norm": 4.153576374053955, + "learning_rate": 0.00014207392706524433, + "loss": 0.4399, + "step": 4762 + }, + { + "epoch": 1.67, + "grad_norm": 1.340376853942871, + "learning_rate": 0.000142036715455222, + "loss": 0.1725, + "step": 4763 + }, + { + "epoch": 1.67, + "grad_norm": 2.0633182525634766, + "learning_rate": 0.00014199950384519969, + "loss": 0.3566, + "step": 4764 + }, + { + "epoch": 1.67, + "grad_norm": 1.320162057876587, + "learning_rate": 0.00014196229223517736, + "loss": 0.2584, + "step": 4765 + }, + { + "epoch": 1.67, + "grad_norm": 1.1114542484283447, + "learning_rate": 0.00014192508062515504, + "loss": 0.0903, + "step": 4766 + }, + { + "epoch": 1.67, + "grad_norm": 3.655611276626587, + "learning_rate": 0.00014188786901513271, + "loss": 0.4427, + "step": 4767 + }, + { + "epoch": 1.67, + "grad_norm": 5.079207897186279, + "learning_rate": 0.0001418506574051104, + "loss": 0.547, + "step": 4768 + }, + { + "epoch": 1.67, + "grad_norm": 4.637149810791016, + "learning_rate": 0.00014181344579508807, + "loss": 0.2594, + "step": 4769 + }, + { + "epoch": 1.67, + "grad_norm": 1.4649180173873901, + "learning_rate": 0.00014177623418506571, + "loss": 0.0639, + "step": 4770 + }, + { + "epoch": 1.67, + "grad_norm": 2.595176935195923, + "learning_rate": 0.00014173902257504342, + "loss": 0.1618, + "step": 4771 + }, + { + "epoch": 1.67, + "grad_norm": 3.4233720302581787, + "learning_rate": 0.00014170181096502107, + "loss": 0.3967, + "step": 4772 + }, + { + "epoch": 1.67, + "grad_norm": 5.957653045654297, + "learning_rate": 0.00014166459935499874, + "loss": 1.8509, + "step": 4773 + }, + { + "epoch": 1.67, + "grad_norm": 1.967976450920105, + "learning_rate": 0.00014162738774497642, + "loss": 0.1728, + "step": 4774 + }, + { + "epoch": 1.67, + "grad_norm": 3.4824140071868896, + "learning_rate": 0.0001415901761349541, + "loss": 0.2681, + "step": 4775 + }, + { + "epoch": 1.67, + "grad_norm": 1.7179447412490845, + "learning_rate": 0.00014155296452493177, + "loss": 0.1949, + "step": 4776 + }, + { + "epoch": 1.67, + "grad_norm": 2.076540946960449, + "learning_rate": 0.00014151575291490944, + "loss": 0.248, + "step": 4777 + }, + { + "epoch": 1.67, + "grad_norm": 2.4143893718719482, + "learning_rate": 0.00014147854130488712, + "loss": 0.3014, + "step": 4778 + }, + { + "epoch": 1.67, + "grad_norm": 4.271050930023193, + "learning_rate": 0.00014144132969486477, + "loss": 0.3233, + "step": 4779 + }, + { + "epoch": 1.67, + "grad_norm": 4.913974761962891, + "learning_rate": 0.00014140411808484247, + "loss": 1.046, + "step": 4780 + }, + { + "epoch": 1.68, + "grad_norm": 3.808880090713501, + "learning_rate": 0.00014136690647482015, + "loss": 0.5579, + "step": 4781 + }, + { + "epoch": 1.68, + "grad_norm": 3.678056478500366, + "learning_rate": 0.0001413296948647978, + "loss": 0.5987, + "step": 4782 + }, + { + "epoch": 1.68, + "grad_norm": 1.150814175605774, + "learning_rate": 0.00014129248325477547, + "loss": 0.1279, + "step": 4783 + }, + { + "epoch": 1.68, + "grad_norm": 2.413187026977539, + "learning_rate": 0.00014125527164475315, + "loss": 0.4254, + "step": 4784 + }, + { + "epoch": 1.68, + "grad_norm": 1.675049066543579, + "learning_rate": 0.00014121806003473082, + "loss": 0.1614, + "step": 4785 + }, + { + "epoch": 1.68, + "grad_norm": 3.6439006328582764, + "learning_rate": 0.0001411808484247085, + "loss": 0.4455, + "step": 4786 + }, + { + "epoch": 1.68, + "grad_norm": 3.2664308547973633, + "learning_rate": 0.00014114363681468618, + "loss": 0.757, + "step": 4787 + }, + { + "epoch": 1.68, + "grad_norm": 2.9309678077697754, + "learning_rate": 0.00014110642520466382, + "loss": 0.3334, + "step": 4788 + }, + { + "epoch": 1.68, + "grad_norm": 2.2318620681762695, + "learning_rate": 0.00014106921359464153, + "loss": 0.3742, + "step": 4789 + }, + { + "epoch": 1.68, + "grad_norm": 2.364555597305298, + "learning_rate": 0.0001410320019846192, + "loss": 0.3565, + "step": 4790 + }, + { + "epoch": 1.68, + "grad_norm": 2.487501859664917, + "learning_rate": 0.00014099479037459685, + "loss": 0.2205, + "step": 4791 + }, + { + "epoch": 1.68, + "grad_norm": 1.8816657066345215, + "learning_rate": 0.00014095757876457453, + "loss": 0.1847, + "step": 4792 + }, + { + "epoch": 1.68, + "grad_norm": 2.2951865196228027, + "learning_rate": 0.0001409203671545522, + "loss": 0.258, + "step": 4793 + }, + { + "epoch": 1.68, + "grad_norm": 1.8484877347946167, + "learning_rate": 0.00014088315554452988, + "loss": 0.1441, + "step": 4794 + }, + { + "epoch": 1.68, + "grad_norm": 3.879554033279419, + "learning_rate": 0.00014084594393450755, + "loss": 0.5913, + "step": 4795 + }, + { + "epoch": 1.68, + "grad_norm": 8.260385513305664, + "learning_rate": 0.00014080873232448523, + "loss": 1.9975, + "step": 4796 + }, + { + "epoch": 1.68, + "grad_norm": 2.673550605773926, + "learning_rate": 0.0001407715207144629, + "loss": 0.6173, + "step": 4797 + }, + { + "epoch": 1.68, + "grad_norm": 2.3849501609802246, + "learning_rate": 0.00014073430910444058, + "loss": 0.2429, + "step": 4798 + }, + { + "epoch": 1.68, + "grad_norm": 4.342643737792969, + "learning_rate": 0.00014069709749441826, + "loss": 0.6903, + "step": 4799 + }, + { + "epoch": 1.68, + "grad_norm": 2.8125104904174805, + "learning_rate": 0.0001406598858843959, + "loss": 0.5278, + "step": 4800 + }, + { + "epoch": 1.68, + "eval_loss": 0.4198063313961029, + "eval_runtime": 51.395, + "eval_samples_per_second": 42.183, + "eval_steps_per_second": 10.546, + "eval_wer": 0.3914547656114859, + "step": 4800 + }, + { + "epoch": 1.68, + "grad_norm": 1.2338154315948486, + "learning_rate": 0.00014062267427437358, + "loss": 0.102, + "step": 4801 + }, + { + "epoch": 1.68, + "grad_norm": 1.0008563995361328, + "learning_rate": 0.00014058546266435129, + "loss": 0.0983, + "step": 4802 + }, + { + "epoch": 1.68, + "grad_norm": 1.7836005687713623, + "learning_rate": 0.00014054825105432893, + "loss": 0.0784, + "step": 4803 + }, + { + "epoch": 1.68, + "grad_norm": 5.8645453453063965, + "learning_rate": 0.0001405110394443066, + "loss": 0.5595, + "step": 4804 + }, + { + "epoch": 1.68, + "grad_norm": 6.060259819030762, + "learning_rate": 0.00014047382783428429, + "loss": 1.0248, + "step": 4805 + }, + { + "epoch": 1.68, + "grad_norm": 2.253270387649536, + "learning_rate": 0.00014043661622426196, + "loss": 0.417, + "step": 4806 + }, + { + "epoch": 1.68, + "grad_norm": 2.2081809043884277, + "learning_rate": 0.00014039940461423964, + "loss": 0.4441, + "step": 4807 + }, + { + "epoch": 1.68, + "grad_norm": 2.563767910003662, + "learning_rate": 0.0001403621930042173, + "loss": 0.5687, + "step": 4808 + }, + { + "epoch": 1.69, + "grad_norm": 1.858834147453308, + "learning_rate": 0.00014032498139419496, + "loss": 0.491, + "step": 4809 + }, + { + "epoch": 1.69, + "grad_norm": 2.5288004875183105, + "learning_rate": 0.00014028776978417264, + "loss": 0.4507, + "step": 4810 + }, + { + "epoch": 1.69, + "grad_norm": 1.2425075769424438, + "learning_rate": 0.00014025055817415034, + "loss": 0.2789, + "step": 4811 + }, + { + "epoch": 1.69, + "grad_norm": 3.400965929031372, + "learning_rate": 0.000140213346564128, + "loss": 0.785, + "step": 4812 + }, + { + "epoch": 1.69, + "grad_norm": 2.7308123111724854, + "learning_rate": 0.00014017613495410566, + "loss": 0.4628, + "step": 4813 + }, + { + "epoch": 1.69, + "grad_norm": 2.1761913299560547, + "learning_rate": 0.00014013892334408334, + "loss": 0.4261, + "step": 4814 + }, + { + "epoch": 1.69, + "grad_norm": 3.010317325592041, + "learning_rate": 0.00014010171173406102, + "loss": 0.2918, + "step": 4815 + }, + { + "epoch": 1.69, + "grad_norm": 2.6612460613250732, + "learning_rate": 0.0001400645001240387, + "loss": 0.4434, + "step": 4816 + }, + { + "epoch": 1.69, + "grad_norm": 1.3962461948394775, + "learning_rate": 0.00014002728851401637, + "loss": 0.182, + "step": 4817 + }, + { + "epoch": 1.69, + "grad_norm": 1.6107089519500732, + "learning_rate": 0.00013999007690399404, + "loss": 0.14, + "step": 4818 + }, + { + "epoch": 1.69, + "grad_norm": 0.7715272903442383, + "learning_rate": 0.0001399528652939717, + "loss": 0.0546, + "step": 4819 + }, + { + "epoch": 1.69, + "grad_norm": 1.2983365058898926, + "learning_rate": 0.0001399156536839494, + "loss": 0.0752, + "step": 4820 + }, + { + "epoch": 1.69, + "grad_norm": 1.6679033041000366, + "learning_rate": 0.00013987844207392704, + "loss": 0.1966, + "step": 4821 + }, + { + "epoch": 1.69, + "grad_norm": 1.6639858484268188, + "learning_rate": 0.00013984123046390472, + "loss": 0.0978, + "step": 4822 + }, + { + "epoch": 1.69, + "grad_norm": 0.8459872007369995, + "learning_rate": 0.0001398040188538824, + "loss": 0.0403, + "step": 4823 + }, + { + "epoch": 1.69, + "grad_norm": 8.972551345825195, + "learning_rate": 0.00013976680724386007, + "loss": 1.5492, + "step": 4824 + }, + { + "epoch": 1.69, + "grad_norm": 3.672152042388916, + "learning_rate": 0.00013972959563383775, + "loss": 0.5177, + "step": 4825 + }, + { + "epoch": 1.69, + "grad_norm": 4.917130947113037, + "learning_rate": 0.00013969238402381542, + "loss": 0.5867, + "step": 4826 + }, + { + "epoch": 1.69, + "grad_norm": 3.8166067600250244, + "learning_rate": 0.0001396551724137931, + "loss": 0.7695, + "step": 4827 + }, + { + "epoch": 1.69, + "grad_norm": NaN, + "learning_rate": 0.0001396551724137931, + "loss": 0.0208, + "step": 4828 + }, + { + "epoch": 1.69, + "grad_norm": 5.4278788566589355, + "learning_rate": 0.00013961796080377075, + "loss": 0.9888, + "step": 4829 + }, + { + "epoch": 1.69, + "grad_norm": 2.555227041244507, + "learning_rate": 0.00013958074919374845, + "loss": 0.4968, + "step": 4830 + }, + { + "epoch": 1.69, + "grad_norm": 1.814968228340149, + "learning_rate": 0.0001395435375837261, + "loss": 0.3631, + "step": 4831 + }, + { + "epoch": 1.69, + "grad_norm": 1.8207460641860962, + "learning_rate": 0.00013950632597370377, + "loss": 0.4976, + "step": 4832 + }, + { + "epoch": 1.69, + "grad_norm": 2.339345693588257, + "learning_rate": 0.00013946911436368145, + "loss": 1.0818, + "step": 4833 + }, + { + "epoch": 1.69, + "grad_norm": 2.1169614791870117, + "learning_rate": 0.00013943190275365913, + "loss": 0.2953, + "step": 4834 + }, + { + "epoch": 1.69, + "grad_norm": 1.9661637544631958, + "learning_rate": 0.0001393946911436368, + "loss": 0.4478, + "step": 4835 + }, + { + "epoch": 1.69, + "grad_norm": 1.695245623588562, + "learning_rate": 0.00013935747953361448, + "loss": 0.3099, + "step": 4836 + }, + { + "epoch": 1.69, + "grad_norm": 1.6964011192321777, + "learning_rate": 0.00013932026792359215, + "loss": 0.3012, + "step": 4837 + }, + { + "epoch": 1.7, + "grad_norm": 0.6117279529571533, + "learning_rate": 0.00013928305631356983, + "loss": 0.0769, + "step": 4838 + }, + { + "epoch": 1.7, + "grad_norm": 4.413363933563232, + "learning_rate": 0.0001392458447035475, + "loss": 0.5109, + "step": 4839 + }, + { + "epoch": 1.7, + "grad_norm": 5.733808994293213, + "learning_rate": 0.00013920863309352518, + "loss": 1.3301, + "step": 4840 + }, + { + "epoch": 1.7, + "grad_norm": 2.3918161392211914, + "learning_rate": 0.00013917142148350283, + "loss": 0.716, + "step": 4841 + }, + { + "epoch": 1.7, + "grad_norm": 1.3467110395431519, + "learning_rate": 0.0001391342098734805, + "loss": 0.186, + "step": 4842 + }, + { + "epoch": 1.7, + "grad_norm": 1.767811894416809, + "learning_rate": 0.00013909699826345818, + "loss": 0.2146, + "step": 4843 + }, + { + "epoch": 1.7, + "grad_norm": 2.754969835281372, + "learning_rate": 0.00013905978665343586, + "loss": 0.4884, + "step": 4844 + }, + { + "epoch": 1.7, + "grad_norm": 1.2362042665481567, + "learning_rate": 0.00013902257504341353, + "loss": 0.2557, + "step": 4845 + }, + { + "epoch": 1.7, + "grad_norm": 1.3661056756973267, + "learning_rate": 0.0001389853634333912, + "loss": 0.1672, + "step": 4846 + }, + { + "epoch": 1.7, + "grad_norm": 0.7961978912353516, + "learning_rate": 0.00013894815182336888, + "loss": 0.0539, + "step": 4847 + }, + { + "epoch": 1.7, + "grad_norm": 2.0398333072662354, + "learning_rate": 0.00013891094021334656, + "loss": 0.2883, + "step": 4848 + }, + { + "epoch": 1.7, + "grad_norm": 1.8911408185958862, + "learning_rate": 0.00013887372860332424, + "loss": 0.2, + "step": 4849 + }, + { + "epoch": 1.7, + "grad_norm": 3.2513821125030518, + "learning_rate": 0.00013883651699330188, + "loss": 0.5697, + "step": 4850 + }, + { + "epoch": 1.7, + "grad_norm": 1.9013601541519165, + "learning_rate": 0.00013879930538327956, + "loss": 0.1448, + "step": 4851 + }, + { + "epoch": 1.7, + "grad_norm": 3.6349408626556396, + "learning_rate": 0.00013876209377325724, + "loss": 0.4995, + "step": 4852 + }, + { + "epoch": 1.7, + "grad_norm": 1.0852819681167603, + "learning_rate": 0.0001387248821632349, + "loss": 0.0309, + "step": 4853 + }, + { + "epoch": 1.7, + "grad_norm": NaN, + "learning_rate": 0.0001387248821632349, + "loss": 0.0687, + "step": 4854 + }, + { + "epoch": 1.7, + "grad_norm": 2.3139288425445557, + "learning_rate": 0.0001386876705532126, + "loss": 0.572, + "step": 4855 + }, + { + "epoch": 1.7, + "grad_norm": 2.169691324234009, + "learning_rate": 0.00013865045894319026, + "loss": 0.265, + "step": 4856 + }, + { + "epoch": 1.7, + "grad_norm": 2.2995455265045166, + "learning_rate": 0.00013861324733316794, + "loss": 0.4024, + "step": 4857 + }, + { + "epoch": 1.7, + "grad_norm": 1.8771644830703735, + "learning_rate": 0.00013857603572314562, + "loss": 0.2272, + "step": 4858 + }, + { + "epoch": 1.7, + "grad_norm": 1.6588895320892334, + "learning_rate": 0.0001385388241131233, + "loss": 0.2656, + "step": 4859 + }, + { + "epoch": 1.7, + "grad_norm": 2.3973159790039062, + "learning_rate": 0.00013850161250310094, + "loss": 0.7651, + "step": 4860 + }, + { + "epoch": 1.7, + "grad_norm": 1.8839689493179321, + "learning_rate": 0.00013846440089307864, + "loss": 0.4598, + "step": 4861 + }, + { + "epoch": 1.7, + "grad_norm": 1.9465550184249878, + "learning_rate": 0.00013842718928305632, + "loss": 0.235, + "step": 4862 + }, + { + "epoch": 1.7, + "grad_norm": 2.5686044692993164, + "learning_rate": 0.00013838997767303397, + "loss": 0.7046, + "step": 4863 + }, + { + "epoch": 1.7, + "grad_norm": 2.570693254470825, + "learning_rate": 0.00013835276606301164, + "loss": 0.1989, + "step": 4864 + }, + { + "epoch": 1.7, + "grad_norm": 2.393869638442993, + "learning_rate": 0.00013831555445298932, + "loss": 0.3419, + "step": 4865 + }, + { + "epoch": 1.7, + "grad_norm": 2.3206074237823486, + "learning_rate": 0.000138278342842967, + "loss": 0.231, + "step": 4866 + }, + { + "epoch": 1.71, + "grad_norm": 1.7298521995544434, + "learning_rate": 0.00013824113123294467, + "loss": 0.2686, + "step": 4867 + }, + { + "epoch": 1.71, + "grad_norm": 1.511710286140442, + "learning_rate": 0.00013820391962292235, + "loss": 0.2347, + "step": 4868 + }, + { + "epoch": 1.71, + "grad_norm": 2.156994104385376, + "learning_rate": 0.0001381667080129, + "loss": 0.2402, + "step": 4869 + }, + { + "epoch": 1.71, + "grad_norm": 1.5252554416656494, + "learning_rate": 0.0001381294964028777, + "loss": 0.1506, + "step": 4870 + }, + { + "epoch": 1.71, + "grad_norm": 3.855454683303833, + "learning_rate": 0.00013809228479285537, + "loss": 0.3318, + "step": 4871 + }, + { + "epoch": 1.71, + "grad_norm": 3.8235645294189453, + "learning_rate": 0.00013805507318283302, + "loss": 0.6935, + "step": 4872 + }, + { + "epoch": 1.71, + "grad_norm": 3.7631542682647705, + "learning_rate": 0.0001380178615728107, + "loss": 0.6565, + "step": 4873 + }, + { + "epoch": 1.71, + "grad_norm": 4.347606658935547, + "learning_rate": 0.00013798064996278837, + "loss": 0.6065, + "step": 4874 + }, + { + "epoch": 1.71, + "grad_norm": 0.8318398594856262, + "learning_rate": 0.00013794343835276605, + "loss": 0.0503, + "step": 4875 + }, + { + "epoch": 1.71, + "grad_norm": 3.258185386657715, + "learning_rate": 0.00013790622674274373, + "loss": 0.6948, + "step": 4876 + }, + { + "epoch": 1.71, + "grad_norm": 1.6956919431686401, + "learning_rate": 0.0001378690151327214, + "loss": 0.1668, + "step": 4877 + }, + { + "epoch": 1.71, + "grad_norm": 5.9375224113464355, + "learning_rate": 0.00013783180352269908, + "loss": 0.6585, + "step": 4878 + }, + { + "epoch": 1.71, + "grad_norm": 4.867272853851318, + "learning_rate": 0.00013779459191267675, + "loss": 0.2866, + "step": 4879 + }, + { + "epoch": 1.71, + "grad_norm": 1.3362858295440674, + "learning_rate": 0.00013775738030265443, + "loss": 0.246, + "step": 4880 + }, + { + "epoch": 1.71, + "grad_norm": 2.0603818893432617, + "learning_rate": 0.00013772016869263208, + "loss": 0.4112, + "step": 4881 + }, + { + "epoch": 1.71, + "grad_norm": 2.5215094089508057, + "learning_rate": 0.00013768295708260975, + "loss": 0.47, + "step": 4882 + }, + { + "epoch": 1.71, + "grad_norm": 2.3979105949401855, + "learning_rate": 0.00013764574547258746, + "loss": 0.5573, + "step": 4883 + }, + { + "epoch": 1.71, + "grad_norm": 4.085318088531494, + "learning_rate": 0.0001376085338625651, + "loss": 0.4865, + "step": 4884 + }, + { + "epoch": 1.71, + "grad_norm": 1.9340821504592896, + "learning_rate": 0.00013757132225254278, + "loss": 0.222, + "step": 4885 + }, + { + "epoch": 1.71, + "grad_norm": 26.611042022705078, + "learning_rate": 0.00013753411064252046, + "loss": 4.7412, + "step": 4886 + }, + { + "epoch": 1.71, + "grad_norm": 2.0535519123077393, + "learning_rate": 0.00013749689903249813, + "loss": 0.2508, + "step": 4887 + }, + { + "epoch": 1.71, + "grad_norm": 3.884704828262329, + "learning_rate": 0.0001374596874224758, + "loss": 0.8941, + "step": 4888 + }, + { + "epoch": 1.71, + "grad_norm": 1.4614795446395874, + "learning_rate": 0.00013742247581245348, + "loss": 0.2868, + "step": 4889 + }, + { + "epoch": 1.71, + "grad_norm": 1.9116908311843872, + "learning_rate": 0.00013738526420243113, + "loss": 0.2023, + "step": 4890 + }, + { + "epoch": 1.71, + "grad_norm": 0.9487919807434082, + "learning_rate": 0.0001373480525924088, + "loss": 0.0826, + "step": 4891 + }, + { + "epoch": 1.71, + "grad_norm": 1.6531901359558105, + "learning_rate": 0.0001373108409823865, + "loss": 0.2198, + "step": 4892 + }, + { + "epoch": 1.71, + "grad_norm": 1.2997630834579468, + "learning_rate": 0.00013727362937236416, + "loss": 0.1561, + "step": 4893 + }, + { + "epoch": 1.71, + "grad_norm": 4.334744453430176, + "learning_rate": 0.00013723641776234184, + "loss": 0.5747, + "step": 4894 + }, + { + "epoch": 1.72, + "grad_norm": 6.257345676422119, + "learning_rate": 0.0001371992061523195, + "loss": 1.8452, + "step": 4895 + }, + { + "epoch": 1.72, + "grad_norm": 6.1517133712768555, + "learning_rate": 0.0001371619945422972, + "loss": 1.6441, + "step": 4896 + }, + { + "epoch": 1.72, + "grad_norm": 2.8486979007720947, + "learning_rate": 0.00013712478293227486, + "loss": 0.343, + "step": 4897 + }, + { + "epoch": 1.72, + "grad_norm": 2.724308490753174, + "learning_rate": 0.00013708757132225254, + "loss": 0.7712, + "step": 4898 + }, + { + "epoch": 1.72, + "grad_norm": 1.3378300666809082, + "learning_rate": 0.00013705035971223021, + "loss": 0.0844, + "step": 4899 + }, + { + "epoch": 1.72, + "grad_norm": 4.833279609680176, + "learning_rate": 0.00013701314810220786, + "loss": 0.1357, + "step": 4900 + }, + { + "epoch": 1.72, + "eval_loss": 0.3683335483074188, + "eval_runtime": 51.5895, + "eval_samples_per_second": 42.024, + "eval_steps_per_second": 10.506, + "eval_wer": 0.3474312402698495, + "step": 4900 + }, + { + "epoch": 1.72, + "grad_norm": 3.9212887287139893, + "learning_rate": 0.00013697593649218557, + "loss": 0.5343, + "step": 4901 + }, + { + "epoch": 1.72, + "grad_norm": 3.2143747806549072, + "learning_rate": 0.00013693872488216321, + "loss": 0.4345, + "step": 4902 + }, + { + "epoch": 1.72, + "grad_norm": 7.424620628356934, + "learning_rate": 0.0001369015132721409, + "loss": 0.5639, + "step": 4903 + }, + { + "epoch": 1.72, + "grad_norm": 2.1714751720428467, + "learning_rate": 0.00013686430166211857, + "loss": 0.2382, + "step": 4904 + }, + { + "epoch": 1.72, + "grad_norm": 2.139472007751465, + "learning_rate": 0.00013682709005209624, + "loss": 0.7981, + "step": 4905 + }, + { + "epoch": 1.72, + "grad_norm": 1.1302719116210938, + "learning_rate": 0.00013678987844207392, + "loss": 0.242, + "step": 4906 + }, + { + "epoch": 1.72, + "grad_norm": 1.639523983001709, + "learning_rate": 0.0001367526668320516, + "loss": 0.5281, + "step": 4907 + }, + { + "epoch": 1.72, + "grad_norm": 1.3213592767715454, + "learning_rate": 0.00013671545522202927, + "loss": 0.2546, + "step": 4908 + }, + { + "epoch": 1.72, + "grad_norm": 1.2504050731658936, + "learning_rate": 0.00013667824361200692, + "loss": 0.1545, + "step": 4909 + }, + { + "epoch": 1.72, + "grad_norm": 1.2312544584274292, + "learning_rate": 0.00013664103200198462, + "loss": 0.1631, + "step": 4910 + }, + { + "epoch": 1.72, + "grad_norm": 1.3905742168426514, + "learning_rate": 0.00013660382039196227, + "loss": 0.1149, + "step": 4911 + }, + { + "epoch": 1.72, + "grad_norm": 2.0212652683258057, + "learning_rate": 0.00013656660878193995, + "loss": 0.4504, + "step": 4912 + }, + { + "epoch": 1.72, + "grad_norm": 1.0436809062957764, + "learning_rate": 0.00013652939717191762, + "loss": 0.1652, + "step": 4913 + }, + { + "epoch": 1.72, + "grad_norm": 3.0124869346618652, + "learning_rate": 0.0001364921855618953, + "loss": 0.354, + "step": 4914 + }, + { + "epoch": 1.72, + "grad_norm": 1.9092917442321777, + "learning_rate": 0.00013645497395187297, + "loss": 0.2767, + "step": 4915 + }, + { + "epoch": 1.72, + "grad_norm": 1.9692587852478027, + "learning_rate": 0.00013641776234185065, + "loss": 0.3325, + "step": 4916 + }, + { + "epoch": 1.72, + "grad_norm": 2.2949063777923584, + "learning_rate": 0.00013638055073182832, + "loss": 0.3535, + "step": 4917 + }, + { + "epoch": 1.72, + "grad_norm": 0.9984784722328186, + "learning_rate": 0.00013634333912180597, + "loss": 0.1386, + "step": 4918 + }, + { + "epoch": 1.72, + "grad_norm": 2.6047825813293457, + "learning_rate": 0.00013630612751178368, + "loss": 0.2291, + "step": 4919 + }, + { + "epoch": 1.72, + "grad_norm": 2.1782517433166504, + "learning_rate": 0.00013626891590176135, + "loss": 0.261, + "step": 4920 + }, + { + "epoch": 1.72, + "grad_norm": 1.2911269664764404, + "learning_rate": 0.000136231704291739, + "loss": 0.0941, + "step": 4921 + }, + { + "epoch": 1.72, + "grad_norm": 2.6118998527526855, + "learning_rate": 0.00013619449268171668, + "loss": 0.2503, + "step": 4922 + }, + { + "epoch": 1.72, + "grad_norm": 1.126331090927124, + "learning_rate": 0.00013615728107169435, + "loss": 0.0635, + "step": 4923 + }, + { + "epoch": 1.73, + "grad_norm": 4.782196044921875, + "learning_rate": 0.00013612006946167203, + "loss": 0.4418, + "step": 4924 + }, + { + "epoch": 1.73, + "grad_norm": 1.8012721538543701, + "learning_rate": 0.0001360828578516497, + "loss": 0.1878, + "step": 4925 + }, + { + "epoch": 1.73, + "grad_norm": 3.5132253170013428, + "learning_rate": 0.00013604564624162738, + "loss": 0.1484, + "step": 4926 + }, + { + "epoch": 1.73, + "grad_norm": 3.3157382011413574, + "learning_rate": 0.00013600843463160506, + "loss": 0.2761, + "step": 4927 + }, + { + "epoch": 1.73, + "grad_norm": 1.4232193231582642, + "learning_rate": 0.00013597122302158273, + "loss": 0.0699, + "step": 4928 + }, + { + "epoch": 1.73, + "grad_norm": 1.8785415887832642, + "learning_rate": 0.0001359340114115604, + "loss": 0.0976, + "step": 4929 + }, + { + "epoch": 1.73, + "grad_norm": 2.8369486331939697, + "learning_rate": 0.00013589679980153806, + "loss": 0.5884, + "step": 4930 + }, + { + "epoch": 1.73, + "grad_norm": 1.4012984037399292, + "learning_rate": 0.00013585958819151573, + "loss": 0.3877, + "step": 4931 + }, + { + "epoch": 1.73, + "grad_norm": 1.8025954961776733, + "learning_rate": 0.0001358223765814934, + "loss": 0.1753, + "step": 4932 + }, + { + "epoch": 1.73, + "grad_norm": 1.4512267112731934, + "learning_rate": 0.00013578516497147108, + "loss": 0.379, + "step": 4933 + }, + { + "epoch": 1.73, + "grad_norm": 2.033318281173706, + "learning_rate": 0.00013574795336144876, + "loss": 0.2617, + "step": 4934 + }, + { + "epoch": 1.73, + "grad_norm": 3.8261144161224365, + "learning_rate": 0.00013571074175142643, + "loss": 0.4695, + "step": 4935 + }, + { + "epoch": 1.73, + "grad_norm": 1.4579124450683594, + "learning_rate": 0.0001356735301414041, + "loss": 0.2473, + "step": 4936 + }, + { + "epoch": 1.73, + "grad_norm": 1.6672676801681519, + "learning_rate": 0.00013563631853138179, + "loss": 0.2851, + "step": 4937 + }, + { + "epoch": 1.73, + "grad_norm": 0.9775683283805847, + "learning_rate": 0.00013559910692135946, + "loss": 0.0493, + "step": 4938 + }, + { + "epoch": 1.73, + "grad_norm": 3.237879514694214, + "learning_rate": 0.0001355618953113371, + "loss": 0.5164, + "step": 4939 + }, + { + "epoch": 1.73, + "grad_norm": 3.030714273452759, + "learning_rate": 0.0001355246837013148, + "loss": 0.1808, + "step": 4940 + }, + { + "epoch": 1.73, + "grad_norm": 2.291748285293579, + "learning_rate": 0.0001354874720912925, + "loss": 0.1526, + "step": 4941 + }, + { + "epoch": 1.73, + "grad_norm": 2.577086925506592, + "learning_rate": 0.00013545026048127014, + "loss": 0.1565, + "step": 4942 + }, + { + "epoch": 1.73, + "grad_norm": 3.0467169284820557, + "learning_rate": 0.00013541304887124781, + "loss": 0.3401, + "step": 4943 + }, + { + "epoch": 1.73, + "grad_norm": 2.7685546875, + "learning_rate": 0.0001353758372612255, + "loss": 0.1665, + "step": 4944 + }, + { + "epoch": 1.73, + "grad_norm": 2.6648311614990234, + "learning_rate": 0.00013533862565120317, + "loss": 0.1115, + "step": 4945 + }, + { + "epoch": 1.73, + "grad_norm": 4.279283046722412, + "learning_rate": 0.00013530141404118084, + "loss": 0.59, + "step": 4946 + }, + { + "epoch": 1.73, + "grad_norm": 3.774441719055176, + "learning_rate": 0.00013526420243115852, + "loss": 1.4274, + "step": 4947 + }, + { + "epoch": 1.73, + "grad_norm": 3.0308432579040527, + "learning_rate": 0.00013522699082113617, + "loss": 0.2772, + "step": 4948 + }, + { + "epoch": 1.73, + "grad_norm": 2.0633444786071777, + "learning_rate": 0.00013518977921111387, + "loss": 0.3336, + "step": 4949 + }, + { + "epoch": 1.73, + "grad_norm": 3.2152934074401855, + "learning_rate": 0.00013515256760109154, + "loss": 0.3653, + "step": 4950 + }, + { + "epoch": 1.73, + "grad_norm": 3.7646780014038086, + "learning_rate": 0.0001351153559910692, + "loss": 0.3, + "step": 4951 + }, + { + "epoch": 1.74, + "grad_norm": 0.37087032198905945, + "learning_rate": 0.00013507814438104687, + "loss": 0.0195, + "step": 4952 + }, + { + "epoch": 1.74, + "grad_norm": 1.66860830783844, + "learning_rate": 0.00013504093277102454, + "loss": 0.1063, + "step": 4953 + }, + { + "epoch": 1.74, + "grad_norm": 1.9260728359222412, + "learning_rate": 0.00013500372116100222, + "loss": 0.1283, + "step": 4954 + }, + { + "epoch": 1.74, + "grad_norm": 1.4697273969650269, + "learning_rate": 0.0001349665095509799, + "loss": 0.6385, + "step": 4955 + }, + { + "epoch": 1.74, + "grad_norm": 1.3390899896621704, + "learning_rate": 0.00013492929794095757, + "loss": 0.3823, + "step": 4956 + }, + { + "epoch": 1.74, + "grad_norm": 1.3227888345718384, + "learning_rate": 0.00013489208633093522, + "loss": 0.3421, + "step": 4957 + }, + { + "epoch": 1.74, + "grad_norm": 2.2112112045288086, + "learning_rate": 0.00013485487472091292, + "loss": 0.4267, + "step": 4958 + }, + { + "epoch": 1.74, + "grad_norm": 1.0806066989898682, + "learning_rate": 0.0001348176631108906, + "loss": 0.1819, + "step": 4959 + }, + { + "epoch": 1.74, + "grad_norm": 1.1106966733932495, + "learning_rate": 0.00013478045150086825, + "loss": 0.0838, + "step": 4960 + }, + { + "epoch": 1.74, + "grad_norm": 1.180200219154358, + "learning_rate": 0.00013474323989084592, + "loss": 0.1393, + "step": 4961 + }, + { + "epoch": 1.74, + "grad_norm": 1.237693190574646, + "learning_rate": 0.0001347060282808236, + "loss": 0.172, + "step": 4962 + }, + { + "epoch": 1.74, + "grad_norm": 2.4684200286865234, + "learning_rate": 0.00013466881667080128, + "loss": 0.3471, + "step": 4963 + }, + { + "epoch": 1.74, + "grad_norm": 1.569759488105774, + "learning_rate": 0.00013463160506077895, + "loss": 0.246, + "step": 4964 + }, + { + "epoch": 1.74, + "grad_norm": 2.071803092956543, + "learning_rate": 0.00013459439345075663, + "loss": 0.2071, + "step": 4965 + }, + { + "epoch": 1.74, + "grad_norm": 2.41078519821167, + "learning_rate": 0.0001345571818407343, + "loss": 0.7541, + "step": 4966 + }, + { + "epoch": 1.74, + "grad_norm": 1.8611916303634644, + "learning_rate": 0.00013451997023071198, + "loss": 0.5563, + "step": 4967 + }, + { + "epoch": 1.74, + "grad_norm": 1.8303391933441162, + "learning_rate": 0.00013448275862068965, + "loss": 0.2874, + "step": 4968 + }, + { + "epoch": 1.74, + "grad_norm": 1.8616209030151367, + "learning_rate": 0.0001344455470106673, + "loss": 0.1496, + "step": 4969 + }, + { + "epoch": 1.74, + "grad_norm": 2.0775692462921143, + "learning_rate": 0.00013440833540064498, + "loss": 0.3153, + "step": 4970 + }, + { + "epoch": 1.74, + "grad_norm": 2.2036242485046387, + "learning_rate": 0.00013437112379062268, + "loss": 0.3611, + "step": 4971 + }, + { + "epoch": 1.74, + "grad_norm": 2.958390235900879, + "learning_rate": 0.00013433391218060033, + "loss": 0.3149, + "step": 4972 + }, + { + "epoch": 1.74, + "grad_norm": 1.2696552276611328, + "learning_rate": 0.000134296700570578, + "loss": 0.0836, + "step": 4973 + }, + { + "epoch": 1.74, + "grad_norm": 0.8660387992858887, + "learning_rate": 0.00013425948896055568, + "loss": 0.0415, + "step": 4974 + }, + { + "epoch": 1.74, + "grad_norm": 3.5192763805389404, + "learning_rate": 0.00013422227735053336, + "loss": 0.3977, + "step": 4975 + }, + { + "epoch": 1.74, + "grad_norm": 1.378849744796753, + "learning_rate": 0.00013418506574051103, + "loss": 0.0728, + "step": 4976 + }, + { + "epoch": 1.74, + "grad_norm": 3.9449732303619385, + "learning_rate": 0.0001341478541304887, + "loss": 0.221, + "step": 4977 + }, + { + "epoch": 1.74, + "grad_norm": 5.472590446472168, + "learning_rate": 0.00013411064252046636, + "loss": 1.7522, + "step": 4978 + }, + { + "epoch": 1.74, + "grad_norm": NaN, + "learning_rate": 0.00013411064252046636, + "loss": 0.2462, + "step": 4979 + }, + { + "epoch": 1.74, + "grad_norm": 4.078400611877441, + "learning_rate": 0.00013407343091044403, + "loss": 0.7826, + "step": 4980 + }, + { + "epoch": 1.75, + "grad_norm": 1.8537403345108032, + "learning_rate": 0.00013403621930042174, + "loss": 0.4351, + "step": 4981 + }, + { + "epoch": 1.75, + "grad_norm": 1.9395902156829834, + "learning_rate": 0.00013399900769039939, + "loss": 0.201, + "step": 4982 + }, + { + "epoch": 1.75, + "grad_norm": 1.612997055053711, + "learning_rate": 0.00013396179608037706, + "loss": 0.2431, + "step": 4983 + }, + { + "epoch": 1.75, + "grad_norm": 1.5586113929748535, + "learning_rate": 0.00013392458447035474, + "loss": 0.2738, + "step": 4984 + }, + { + "epoch": 1.75, + "grad_norm": 1.7553973197937012, + "learning_rate": 0.0001338873728603324, + "loss": 0.3034, + "step": 4985 + }, + { + "epoch": 1.75, + "grad_norm": 1.0098979473114014, + "learning_rate": 0.0001338501612503101, + "loss": 0.0768, + "step": 4986 + }, + { + "epoch": 1.75, + "grad_norm": 3.2401552200317383, + "learning_rate": 0.00013381294964028776, + "loss": 1.3496, + "step": 4987 + }, + { + "epoch": 1.75, + "grad_norm": 1.5994211435317993, + "learning_rate": 0.00013377573803026544, + "loss": 0.1578, + "step": 4988 + }, + { + "epoch": 1.75, + "grad_norm": 2.8812813758850098, + "learning_rate": 0.0001337385264202431, + "loss": 0.6019, + "step": 4989 + }, + { + "epoch": 1.75, + "grad_norm": 1.9770814180374146, + "learning_rate": 0.0001337013148102208, + "loss": 0.2927, + "step": 4990 + }, + { + "epoch": 1.75, + "grad_norm": 3.3492937088012695, + "learning_rate": 0.00013366410320019844, + "loss": 0.8254, + "step": 4991 + }, + { + "epoch": 1.75, + "grad_norm": 0.7024646997451782, + "learning_rate": 0.00013362689159017612, + "loss": 0.0549, + "step": 4992 + }, + { + "epoch": 1.75, + "grad_norm": 1.3194102048873901, + "learning_rate": 0.0001335896799801538, + "loss": 0.094, + "step": 4993 + }, + { + "epoch": 1.75, + "grad_norm": 1.7211250066757202, + "learning_rate": 0.00013355246837013147, + "loss": 0.1947, + "step": 4994 + }, + { + "epoch": 1.75, + "grad_norm": 2.0496625900268555, + "learning_rate": 0.00013351525676010914, + "loss": 0.2378, + "step": 4995 + }, + { + "epoch": 1.75, + "grad_norm": 1.6302729845046997, + "learning_rate": 0.00013347804515008682, + "loss": 0.2008, + "step": 4996 + }, + { + "epoch": 1.75, + "grad_norm": 4.008142948150635, + "learning_rate": 0.0001334408335400645, + "loss": 0.5542, + "step": 4997 + }, + { + "epoch": 1.75, + "grad_norm": 2.101016044616699, + "learning_rate": 0.00013340362193004214, + "loss": 0.1599, + "step": 4998 + }, + { + "epoch": 1.75, + "grad_norm": 0.9622274041175842, + "learning_rate": 0.00013336641032001985, + "loss": 0.0923, + "step": 4999 + }, + { + "epoch": 1.75, + "grad_norm": 4.595578670501709, + "learning_rate": 0.0001333291987099975, + "loss": 0.4642, + "step": 5000 + }, + { + "epoch": 1.75, + "eval_loss": 0.3618814945220947, + "eval_runtime": 51.7136, + "eval_samples_per_second": 41.923, + "eval_steps_per_second": 10.481, + "eval_wer": 0.3420688462203771, + "step": 5000 + }, + { + "epoch": 1.75, + "grad_norm": 2.2924017906188965, + "learning_rate": 0.00013329198709997517, + "loss": 0.3131, + "step": 5001 + }, + { + "epoch": 1.75, + "grad_norm": 1.4063929319381714, + "learning_rate": 0.00013325477548995285, + "loss": 0.1032, + "step": 5002 + }, + { + "epoch": 1.75, + "grad_norm": 1.765910267829895, + "learning_rate": 0.00013321756387993052, + "loss": 0.2847, + "step": 5003 + }, + { + "epoch": 1.75, + "grad_norm": 2.635676383972168, + "learning_rate": 0.0001331803522699082, + "loss": 0.3925, + "step": 5004 + }, + { + "epoch": 1.75, + "grad_norm": 2.2191758155822754, + "learning_rate": 0.00013314314065988587, + "loss": 0.5075, + "step": 5005 + }, + { + "epoch": 1.75, + "grad_norm": 1.3144760131835938, + "learning_rate": 0.00013310592904986355, + "loss": 0.1606, + "step": 5006 + }, + { + "epoch": 1.75, + "grad_norm": 2.2465732097625732, + "learning_rate": 0.00013306871743984123, + "loss": 0.3372, + "step": 5007 + }, + { + "epoch": 1.75, + "grad_norm": 1.2884074449539185, + "learning_rate": 0.0001330315058298189, + "loss": 0.1578, + "step": 5008 + }, + { + "epoch": 1.76, + "grad_norm": 1.7158373594284058, + "learning_rate": 0.00013299429421979658, + "loss": 0.2009, + "step": 5009 + }, + { + "epoch": 1.76, + "grad_norm": 1.9673197269439697, + "learning_rate": 0.00013295708260977423, + "loss": 0.2124, + "step": 5010 + }, + { + "epoch": 1.76, + "grad_norm": 2.0683186054229736, + "learning_rate": 0.0001329198709997519, + "loss": 0.445, + "step": 5011 + }, + { + "epoch": 1.76, + "grad_norm": 2.1322107315063477, + "learning_rate": 0.00013288265938972958, + "loss": 0.1698, + "step": 5012 + }, + { + "epoch": 1.76, + "grad_norm": 1.6621609926223755, + "learning_rate": 0.00013284544777970725, + "loss": 0.1697, + "step": 5013 + }, + { + "epoch": 1.76, + "grad_norm": 2.388695240020752, + "learning_rate": 0.00013280823616968493, + "loss": 0.291, + "step": 5014 + }, + { + "epoch": 1.76, + "grad_norm": 1.7287172079086304, + "learning_rate": 0.0001327710245596626, + "loss": 0.2866, + "step": 5015 + }, + { + "epoch": 1.76, + "grad_norm": 2.0600686073303223, + "learning_rate": 0.00013273381294964028, + "loss": 0.2617, + "step": 5016 + }, + { + "epoch": 1.76, + "grad_norm": 3.1544766426086426, + "learning_rate": 0.00013269660133961796, + "loss": 0.3467, + "step": 5017 + }, + { + "epoch": 1.76, + "grad_norm": 3.4372010231018066, + "learning_rate": 0.00013265938972959563, + "loss": 0.3877, + "step": 5018 + }, + { + "epoch": 1.76, + "grad_norm": 2.706695079803467, + "learning_rate": 0.00013262217811957328, + "loss": 0.3513, + "step": 5019 + }, + { + "epoch": 1.76, + "grad_norm": 4.3857927322387695, + "learning_rate": 0.00013258496650955096, + "loss": 1.5684, + "step": 5020 + }, + { + "epoch": 1.76, + "grad_norm": 2.2888591289520264, + "learning_rate": 0.00013254775489952863, + "loss": 0.2338, + "step": 5021 + }, + { + "epoch": 1.76, + "grad_norm": 2.9611387252807617, + "learning_rate": 0.0001325105432895063, + "loss": 0.2578, + "step": 5022 + }, + { + "epoch": 1.76, + "grad_norm": 2.1191227436065674, + "learning_rate": 0.00013247333167948398, + "loss": 0.1561, + "step": 5023 + }, + { + "epoch": 1.76, + "grad_norm": 2.088348865509033, + "learning_rate": 0.00013243612006946166, + "loss": 0.1646, + "step": 5024 + }, + { + "epoch": 1.76, + "grad_norm": 1.8998678922653198, + "learning_rate": 0.00013239890845943934, + "loss": 0.2291, + "step": 5025 + }, + { + "epoch": 1.76, + "grad_norm": 2.2618954181671143, + "learning_rate": 0.000132361696849417, + "loss": 0.0624, + "step": 5026 + }, + { + "epoch": 1.76, + "grad_norm": 6.207477569580078, + "learning_rate": 0.0001323244852393947, + "loss": 0.563, + "step": 5027 + }, + { + "epoch": 1.76, + "grad_norm": 21.43695640563965, + "learning_rate": 0.00013228727362937234, + "loss": 2.0059, + "step": 5028 + }, + { + "epoch": 1.76, + "grad_norm": NaN, + "learning_rate": 0.00013228727362937234, + "loss": 1.7438, + "step": 5029 + }, + { + "epoch": 1.76, + "grad_norm": 5383.18994140625, + "learning_rate": 0.00013225006201935004, + "loss": 16.872, + "step": 5030 + }, + { + "epoch": 1.76, + "grad_norm": 23.968223571777344, + "learning_rate": 0.00013221285040932772, + "loss": 4.1902, + "step": 5031 + }, + { + "epoch": 1.76, + "grad_norm": 18.13584327697754, + "learning_rate": 0.00013217563879930536, + "loss": 3.597, + "step": 5032 + }, + { + "epoch": 1.76, + "grad_norm": 27.954679489135742, + "learning_rate": 0.00013213842718928304, + "loss": 3.8674, + "step": 5033 + }, + { + "epoch": 1.76, + "grad_norm": 12.132146835327148, + "learning_rate": 0.00013210121557926072, + "loss": 3.337, + "step": 5034 + }, + { + "epoch": 1.76, + "grad_norm": 9.336516380310059, + "learning_rate": 0.0001320640039692384, + "loss": 3.0708, + "step": 5035 + }, + { + "epoch": 1.76, + "grad_norm": 14.99378776550293, + "learning_rate": 0.00013202679235921607, + "loss": 3.4457, + "step": 5036 + }, + { + "epoch": 1.76, + "grad_norm": 16.54006004333496, + "learning_rate": 0.00013198958074919374, + "loss": 3.4698, + "step": 5037 + }, + { + "epoch": 1.77, + "grad_norm": 5.521266460418701, + "learning_rate": 0.0001319523691391714, + "loss": 3.0527, + "step": 5038 + }, + { + "epoch": 1.77, + "grad_norm": 13.792803764343262, + "learning_rate": 0.0001319151575291491, + "loss": 3.1825, + "step": 5039 + }, + { + "epoch": 1.77, + "grad_norm": 10.1255464553833, + "learning_rate": 0.00013187794591912677, + "loss": 3.3301, + "step": 5040 + }, + { + "epoch": 1.77, + "grad_norm": 6.728694915771484, + "learning_rate": 0.00013184073430910442, + "loss": 3.4529, + "step": 5041 + }, + { + "epoch": 1.77, + "grad_norm": 13.570353507995605, + "learning_rate": 0.0001318035226990821, + "loss": 3.1857, + "step": 5042 + }, + { + "epoch": 1.77, + "grad_norm": 9.944263458251953, + "learning_rate": 0.00013176631108905977, + "loss": 3.0247, + "step": 5043 + }, + { + "epoch": 1.77, + "grad_norm": 7.628637790679932, + "learning_rate": 0.00013172909947903745, + "loss": 3.0557, + "step": 5044 + }, + { + "epoch": 1.77, + "grad_norm": 8.50871753692627, + "learning_rate": 0.00013169188786901512, + "loss": 2.9197, + "step": 5045 + }, + { + "epoch": 1.77, + "grad_norm": 3.350416421890259, + "learning_rate": 0.0001316546762589928, + "loss": 2.7846, + "step": 5046 + }, + { + "epoch": 1.77, + "grad_norm": 8.267583847045898, + "learning_rate": 0.00013161746464897047, + "loss": 2.9316, + "step": 5047 + }, + { + "epoch": 1.77, + "grad_norm": 10.566850662231445, + "learning_rate": 0.00013158025303894815, + "loss": 3.5342, + "step": 5048 + }, + { + "epoch": 1.77, + "grad_norm": 7.871520042419434, + "learning_rate": 0.00013154304142892583, + "loss": 3.2533, + "step": 5049 + }, + { + "epoch": 1.77, + "grad_norm": 14.895259857177734, + "learning_rate": 0.00013150582981890347, + "loss": 3.3095, + "step": 5050 + }, + { + "epoch": 1.77, + "grad_norm": 6.724153995513916, + "learning_rate": 0.00013146861820888115, + "loss": 2.804, + "step": 5051 + }, + { + "epoch": 1.77, + "grad_norm": 10.182531356811523, + "learning_rate": 0.00013143140659885885, + "loss": 2.9485, + "step": 5052 + }, + { + "epoch": 1.77, + "grad_norm": 12.471945762634277, + "learning_rate": 0.0001313941949888365, + "loss": 3.0598, + "step": 5053 + }, + { + "epoch": 1.77, + "grad_norm": 4.7943267822265625, + "learning_rate": 0.00013135698337881418, + "loss": 2.6582, + "step": 5054 + }, + { + "epoch": 1.77, + "grad_norm": 3.8134336471557617, + "learning_rate": 0.00013131977176879185, + "loss": 3.2571, + "step": 5055 + }, + { + "epoch": 1.77, + "grad_norm": 2.2540090084075928, + "learning_rate": 0.00013128256015876953, + "loss": 3.1262, + "step": 5056 + }, + { + "epoch": 1.77, + "grad_norm": 3.4563486576080322, + "learning_rate": 0.0001312453485487472, + "loss": 2.8854, + "step": 5057 + }, + { + "epoch": 1.77, + "grad_norm": 4.2413249015808105, + "learning_rate": 0.00013120813693872488, + "loss": 2.9584, + "step": 5058 + }, + { + "epoch": 1.77, + "grad_norm": 12.829059600830078, + "learning_rate": 0.00013117092532870253, + "loss": 3.1434, + "step": 5059 + }, + { + "epoch": 1.77, + "grad_norm": 2.502635955810547, + "learning_rate": 0.0001311337137186802, + "loss": 3.0336, + "step": 5060 + }, + { + "epoch": 1.77, + "grad_norm": 2.319552421569824, + "learning_rate": 0.0001310965021086579, + "loss": 2.9348, + "step": 5061 + }, + { + "epoch": 1.77, + "grad_norm": 6.4483866691589355, + "learning_rate": 0.00013105929049863556, + "loss": 2.7653, + "step": 5062 + }, + { + "epoch": 1.77, + "grad_norm": 2.6617424488067627, + "learning_rate": 0.00013102207888861323, + "loss": 2.9234, + "step": 5063 + }, + { + "epoch": 1.77, + "grad_norm": 4.615228652954102, + "learning_rate": 0.0001309848672785909, + "loss": 2.7653, + "step": 5064 + }, + { + "epoch": 1.77, + "grad_norm": 6.482029438018799, + "learning_rate": 0.00013094765566856858, + "loss": 2.7954, + "step": 5065 + }, + { + "epoch": 1.78, + "grad_norm": 7.622623920440674, + "learning_rate": 0.00013091044405854626, + "loss": 3.0746, + "step": 5066 + }, + { + "epoch": 1.78, + "grad_norm": 8.83546257019043, + "learning_rate": 0.00013087323244852394, + "loss": 2.9216, + "step": 5067 + }, + { + "epoch": 1.78, + "grad_norm": 9.417671203613281, + "learning_rate": 0.0001308360208385016, + "loss": 2.9592, + "step": 5068 + }, + { + "epoch": 1.78, + "grad_norm": 3.7668020725250244, + "learning_rate": 0.00013079880922847926, + "loss": 2.7894, + "step": 5069 + }, + { + "epoch": 1.78, + "grad_norm": 8.39608097076416, + "learning_rate": 0.00013076159761845696, + "loss": 2.7767, + "step": 5070 + }, + { + "epoch": 1.78, + "grad_norm": 3.686868190765381, + "learning_rate": 0.0001307243860084346, + "loss": 3.019, + "step": 5071 + }, + { + "epoch": 1.78, + "grad_norm": 10.237363815307617, + "learning_rate": 0.0001306871743984123, + "loss": 3.0566, + "step": 5072 + }, + { + "epoch": 1.78, + "grad_norm": 4.589514255523682, + "learning_rate": 0.00013064996278838996, + "loss": 3.2709, + "step": 5073 + }, + { + "epoch": 1.78, + "grad_norm": 4.9313225746154785, + "learning_rate": 0.00013061275117836764, + "loss": 2.6611, + "step": 5074 + }, + { + "epoch": 1.78, + "grad_norm": 4.68777322769165, + "learning_rate": 0.00013057553956834531, + "loss": 2.9001, + "step": 5075 + }, + { + "epoch": 1.78, + "grad_norm": 3.2555599212646484, + "learning_rate": 0.000130538327958323, + "loss": 2.8576, + "step": 5076 + }, + { + "epoch": 1.78, + "grad_norm": 3.0508415699005127, + "learning_rate": 0.00013050111634830067, + "loss": 2.7034, + "step": 5077 + }, + { + "epoch": 1.78, + "grad_norm": 3.908602237701416, + "learning_rate": 0.00013046390473827832, + "loss": 2.8313, + "step": 5078 + }, + { + "epoch": 1.78, + "grad_norm": 2.733027696609497, + "learning_rate": 0.00013042669312825602, + "loss": 2.3422, + "step": 5079 + }, + { + "epoch": 1.78, + "grad_norm": 4.607684135437012, + "learning_rate": 0.00013038948151823367, + "loss": 3.3491, + "step": 5080 + }, + { + "epoch": 1.78, + "grad_norm": 3.3591644763946533, + "learning_rate": 0.00013035226990821134, + "loss": 2.9754, + "step": 5081 + }, + { + "epoch": 1.78, + "grad_norm": 3.9644618034362793, + "learning_rate": 0.00013031505829818902, + "loss": 3.053, + "step": 5082 + }, + { + "epoch": 1.78, + "grad_norm": 8.168998718261719, + "learning_rate": 0.0001302778466881667, + "loss": 2.9517, + "step": 5083 + }, + { + "epoch": 1.78, + "grad_norm": 3.216783046722412, + "learning_rate": 0.00013024063507814437, + "loss": 3.0665, + "step": 5084 + }, + { + "epoch": 1.78, + "grad_norm": 2.6006112098693848, + "learning_rate": 0.00013020342346812205, + "loss": 2.8623, + "step": 5085 + }, + { + "epoch": 1.78, + "grad_norm": 2.3029751777648926, + "learning_rate": 0.00013016621185809972, + "loss": 2.8781, + "step": 5086 + }, + { + "epoch": 1.78, + "grad_norm": 3.3542487621307373, + "learning_rate": 0.00013012900024807737, + "loss": 2.7165, + "step": 5087 + }, + { + "epoch": 1.78, + "grad_norm": 3.487226724624634, + "learning_rate": 0.00013009178863805507, + "loss": 2.8395, + "step": 5088 + }, + { + "epoch": 1.78, + "grad_norm": 10.391279220581055, + "learning_rate": 0.00013005457702803275, + "loss": 3.1138, + "step": 5089 + }, + { + "epoch": 1.78, + "grad_norm": 9.242776870727539, + "learning_rate": 0.0001300173654180104, + "loss": 3.0096, + "step": 5090 + }, + { + "epoch": 1.78, + "grad_norm": 4.211190223693848, + "learning_rate": 0.00012998015380798807, + "loss": 2.8559, + "step": 5091 + }, + { + "epoch": 1.78, + "grad_norm": 2.6914520263671875, + "learning_rate": 0.00012994294219796575, + "loss": 2.9511, + "step": 5092 + }, + { + "epoch": 1.78, + "grad_norm": 4.44136381149292, + "learning_rate": 0.00012990573058794342, + "loss": 2.7139, + "step": 5093 + }, + { + "epoch": 1.78, + "grad_norm": 3.7667293548583984, + "learning_rate": 0.0001298685189779211, + "loss": 2.7297, + "step": 5094 + }, + { + "epoch": 1.79, + "grad_norm": 3.589275360107422, + "learning_rate": 0.00012983130736789878, + "loss": 2.8284, + "step": 5095 + }, + { + "epoch": 1.79, + "grad_norm": 4.216124534606934, + "learning_rate": 0.00012979409575787645, + "loss": 2.9066, + "step": 5096 + }, + { + "epoch": 1.79, + "grad_norm": 10.489272117614746, + "learning_rate": 0.00012975688414785413, + "loss": 2.9559, + "step": 5097 + }, + { + "epoch": 1.79, + "grad_norm": 11.246708869934082, + "learning_rate": 0.0001297196725378318, + "loss": 2.9586, + "step": 5098 + }, + { + "epoch": 1.79, + "grad_norm": 10.041492462158203, + "learning_rate": 0.00012968246092780945, + "loss": 2.736, + "step": 5099 + }, + { + "epoch": 1.79, + "grad_norm": 5.19471549987793, + "learning_rate": 0.00012964524931778713, + "loss": 2.646, + "step": 5100 + }, + { + "epoch": 1.79, + "eval_loss": 2.873821258544922, + "eval_runtime": 51.453, + "eval_samples_per_second": 42.136, + "eval_steps_per_second": 10.534, + "eval_wer": 1.0, + "step": 5100 + }, + { + "epoch": 1.79, + "grad_norm": 9.86611557006836, + "learning_rate": 0.0001296080377077648, + "loss": 2.7976, + "step": 5101 + }, + { + "epoch": 1.79, + "grad_norm": 10.880091667175293, + "learning_rate": 0.00012957082609774248, + "loss": 3.1771, + "step": 5102 + }, + { + "epoch": 1.79, + "grad_norm": 6.943602085113525, + "learning_rate": 0.00012953361448772016, + "loss": 2.7736, + "step": 5103 + }, + { + "epoch": 1.79, + "grad_norm": 7.127685070037842, + "learning_rate": 0.00012949640287769783, + "loss": 3.0676, + "step": 5104 + }, + { + "epoch": 1.79, + "grad_norm": 18.559324264526367, + "learning_rate": 0.0001294591912676755, + "loss": 3.2199, + "step": 5105 + }, + { + "epoch": 1.79, + "grad_norm": 2.158505916595459, + "learning_rate": 0.00012942197965765318, + "loss": 2.756, + "step": 5106 + }, + { + "epoch": 1.79, + "grad_norm": 5.563910484313965, + "learning_rate": 0.00012938476804763086, + "loss": 2.8269, + "step": 5107 + }, + { + "epoch": 1.79, + "grad_norm": 7.628482818603516, + "learning_rate": 0.0001293475564376085, + "loss": 2.9859, + "step": 5108 + }, + { + "epoch": 1.79, + "grad_norm": 2.8963091373443604, + "learning_rate": 0.00012931034482758618, + "loss": 2.8255, + "step": 5109 + }, + { + "epoch": 1.79, + "grad_norm": 3.270557165145874, + "learning_rate": 0.00012927313321756389, + "loss": 2.7845, + "step": 5110 + }, + { + "epoch": 1.79, + "grad_norm": 6.280425548553467, + "learning_rate": 0.00012923592160754154, + "loss": 2.6916, + "step": 5111 + }, + { + "epoch": 1.79, + "grad_norm": 6.498332977294922, + "learning_rate": 0.0001291987099975192, + "loss": 2.8811, + "step": 5112 + }, + { + "epoch": 1.79, + "grad_norm": 5.853482723236084, + "learning_rate": 0.0001291614983874969, + "loss": 2.9358, + "step": 5113 + }, + { + "epoch": 1.79, + "grad_norm": 6.113433361053467, + "learning_rate": 0.00012912428677747456, + "loss": 3.4643, + "step": 5114 + }, + { + "epoch": 1.79, + "grad_norm": 4.190027236938477, + "learning_rate": 0.00012908707516745224, + "loss": 2.7947, + "step": 5115 + }, + { + "epoch": 1.79, + "grad_norm": 7.508610248565674, + "learning_rate": 0.00012904986355742991, + "loss": 2.9305, + "step": 5116 + }, + { + "epoch": 1.79, + "grad_norm": 7.606897830963135, + "learning_rate": 0.00012901265194740756, + "loss": 2.543, + "step": 5117 + }, + { + "epoch": 1.79, + "grad_norm": 3.0015764236450195, + "learning_rate": 0.00012897544033738527, + "loss": 2.9905, + "step": 5118 + }, + { + "epoch": 1.79, + "grad_norm": 3.0580461025238037, + "learning_rate": 0.00012893822872736294, + "loss": 3.1613, + "step": 5119 + }, + { + "epoch": 1.79, + "grad_norm": 3.363950729370117, + "learning_rate": 0.0001289010171173406, + "loss": 2.8196, + "step": 5120 + }, + { + "epoch": 1.79, + "grad_norm": 7.74603796005249, + "learning_rate": 0.00012886380550731827, + "loss": 2.9443, + "step": 5121 + }, + { + "epoch": 1.79, + "grad_norm": 5.362524032592773, + "learning_rate": 0.00012882659389729594, + "loss": 2.7295, + "step": 5122 + }, + { + "epoch": 1.8, + "grad_norm": 12.855659484863281, + "learning_rate": 0.00012878938228727362, + "loss": 3.0975, + "step": 5123 + }, + { + "epoch": 1.8, + "grad_norm": 8.280109405517578, + "learning_rate": 0.0001287521706772513, + "loss": 3.0533, + "step": 5124 + }, + { + "epoch": 1.8, + "grad_norm": 4.31693172454834, + "learning_rate": 0.00012871495906722897, + "loss": 2.5704, + "step": 5125 + }, + { + "epoch": 1.8, + "grad_norm": 3.7444570064544678, + "learning_rate": 0.00012867774745720664, + "loss": 2.609, + "step": 5126 + }, + { + "epoch": 1.8, + "grad_norm": 7.781627178192139, + "learning_rate": 0.00012864053584718432, + "loss": 2.6856, + "step": 5127 + }, + { + "epoch": 1.8, + "grad_norm": 5.853876113891602, + "learning_rate": 0.000128603324237162, + "loss": 2.6211, + "step": 5128 + }, + { + "epoch": 1.8, + "grad_norm": 4.829381942749023, + "learning_rate": 0.00012856611262713965, + "loss": 2.6558, + "step": 5129 + }, + { + "epoch": 1.8, + "grad_norm": 7.683784484863281, + "learning_rate": 0.00012852890101711732, + "loss": 3.1132, + "step": 5130 + }, + { + "epoch": 1.8, + "grad_norm": 7.247925758361816, + "learning_rate": 0.000128491689407095, + "loss": 2.9654, + "step": 5131 + }, + { + "epoch": 1.8, + "grad_norm": 7.606195449829102, + "learning_rate": 0.00012845447779707267, + "loss": 3.026, + "step": 5132 + }, + { + "epoch": 1.8, + "grad_norm": 7.540022373199463, + "learning_rate": 0.00012841726618705035, + "loss": 2.9083, + "step": 5133 + }, + { + "epoch": 1.8, + "grad_norm": 8.643896102905273, + "learning_rate": 0.00012838005457702802, + "loss": 3.2663, + "step": 5134 + }, + { + "epoch": 1.8, + "grad_norm": 5.016622543334961, + "learning_rate": 0.0001283428429670057, + "loss": 3.1068, + "step": 5135 + }, + { + "epoch": 1.8, + "grad_norm": 3.617004871368408, + "learning_rate": 0.00012830563135698338, + "loss": 2.6929, + "step": 5136 + }, + { + "epoch": 1.8, + "grad_norm": 2.4919373989105225, + "learning_rate": 0.00012826841974696105, + "loss": 2.7526, + "step": 5137 + }, + { + "epoch": 1.8, + "grad_norm": 2.7996411323547363, + "learning_rate": 0.0001282312081369387, + "loss": 3.0863, + "step": 5138 + }, + { + "epoch": 1.8, + "grad_norm": 14.32989501953125, + "learning_rate": 0.00012819399652691638, + "loss": 3.3443, + "step": 5139 + }, + { + "epoch": 1.8, + "grad_norm": 13.708571434020996, + "learning_rate": 0.00012815678491689408, + "loss": 3.1483, + "step": 5140 + }, + { + "epoch": 1.8, + "grad_norm": 10.349092483520508, + "learning_rate": 0.00012811957330687173, + "loss": 3.1138, + "step": 5141 + }, + { + "epoch": 1.8, + "grad_norm": 2.638105630874634, + "learning_rate": 0.0001280823616968494, + "loss": 2.9172, + "step": 5142 + }, + { + "epoch": 1.8, + "grad_norm": 3.8446462154388428, + "learning_rate": 0.00012804515008682708, + "loss": 3.0105, + "step": 5143 + }, + { + "epoch": 1.8, + "grad_norm": 2.970163106918335, + "learning_rate": 0.00012800793847680475, + "loss": 2.6563, + "step": 5144 + }, + { + "epoch": 1.8, + "grad_norm": 3.778756618499756, + "learning_rate": 0.00012797072686678243, + "loss": 2.5726, + "step": 5145 + }, + { + "epoch": 1.8, + "grad_norm": 4.075989723205566, + "learning_rate": 0.0001279335152567601, + "loss": 2.6662, + "step": 5146 + }, + { + "epoch": 1.8, + "grad_norm": 5.896014213562012, + "learning_rate": 0.00012789630364673778, + "loss": 2.9875, + "step": 5147 + }, + { + "epoch": 1.8, + "grad_norm": 4.679042816162109, + "learning_rate": 0.00012785909203671543, + "loss": 2.9295, + "step": 5148 + }, + { + "epoch": 1.8, + "grad_norm": 4.0041422843933105, + "learning_rate": 0.00012782188042669313, + "loss": 2.8009, + "step": 5149 + }, + { + "epoch": 1.8, + "grad_norm": 5.066998481750488, + "learning_rate": 0.00012778466881667078, + "loss": 2.7084, + "step": 5150 + }, + { + "epoch": 1.8, + "grad_norm": 4.032702445983887, + "learning_rate": 0.00012774745720664846, + "loss": 2.4773, + "step": 5151 + }, + { + "epoch": 1.81, + "grad_norm": 5.38712215423584, + "learning_rate": 0.00012771024559662613, + "loss": 2.7513, + "step": 5152 + }, + { + "epoch": 1.81, + "grad_norm": 9.350571632385254, + "learning_rate": 0.0001276730339866038, + "loss": 2.756, + "step": 5153 + }, + { + "epoch": 1.81, + "grad_norm": 3.612746238708496, + "learning_rate": 0.00012763582237658149, + "loss": 3.0026, + "step": 5154 + }, + { + "epoch": 1.81, + "grad_norm": 5.044943809509277, + "learning_rate": 0.00012759861076655916, + "loss": 3.0165, + "step": 5155 + }, + { + "epoch": 1.81, + "grad_norm": 5.973738193511963, + "learning_rate": 0.00012756139915653684, + "loss": 3.0884, + "step": 5156 + }, + { + "epoch": 1.81, + "grad_norm": 5.725467681884766, + "learning_rate": 0.00012752418754651449, + "loss": 2.9013, + "step": 5157 + }, + { + "epoch": 1.81, + "grad_norm": 2.828871488571167, + "learning_rate": 0.0001274869759364922, + "loss": 2.7721, + "step": 5158 + }, + { + "epoch": 1.81, + "grad_norm": 4.192399978637695, + "learning_rate": 0.00012744976432646984, + "loss": 2.9885, + "step": 5159 + }, + { + "epoch": 1.81, + "grad_norm": 2.731215476989746, + "learning_rate": 0.0001274125527164475, + "loss": 2.9879, + "step": 5160 + }, + { + "epoch": 1.81, + "grad_norm": 6.002933025360107, + "learning_rate": 0.0001273753411064252, + "loss": 2.9442, + "step": 5161 + }, + { + "epoch": 1.81, + "grad_norm": 2.4598872661590576, + "learning_rate": 0.00012733812949640286, + "loss": 2.7012, + "step": 5162 + }, + { + "epoch": 1.81, + "grad_norm": 6.382826328277588, + "learning_rate": 0.00012730091788638054, + "loss": 2.9316, + "step": 5163 + }, + { + "epoch": 1.81, + "grad_norm": 3.4656591415405273, + "learning_rate": 0.00012726370627635822, + "loss": 2.6503, + "step": 5164 + }, + { + "epoch": 1.81, + "grad_norm": 9.048042297363281, + "learning_rate": 0.0001272264946663359, + "loss": 2.9199, + "step": 5165 + }, + { + "epoch": 1.81, + "grad_norm": 4.229365825653076, + "learning_rate": 0.00012718928305631354, + "loss": 2.8474, + "step": 5166 + }, + { + "epoch": 1.81, + "grad_norm": 4.915844440460205, + "learning_rate": 0.00012715207144629124, + "loss": 2.8055, + "step": 5167 + }, + { + "epoch": 1.81, + "grad_norm": 4.766310214996338, + "learning_rate": 0.00012711485983626892, + "loss": 2.7214, + "step": 5168 + }, + { + "epoch": 1.81, + "grad_norm": 3.777912139892578, + "learning_rate": 0.00012707764822624657, + "loss": 2.8381, + "step": 5169 + }, + { + "epoch": 1.81, + "grad_norm": 3.8435094356536865, + "learning_rate": 0.00012704043661622424, + "loss": 2.7613, + "step": 5170 + }, + { + "epoch": 1.81, + "grad_norm": 3.1860551834106445, + "learning_rate": 0.00012700322500620192, + "loss": 2.8166, + "step": 5171 + }, + { + "epoch": 1.81, + "grad_norm": 3.1261537075042725, + "learning_rate": 0.0001269660133961796, + "loss": 2.7175, + "step": 5172 + }, + { + "epoch": 1.81, + "grad_norm": 13.234203338623047, + "learning_rate": 0.00012692880178615727, + "loss": 2.6832, + "step": 5173 + }, + { + "epoch": 1.81, + "grad_norm": 4.1125264167785645, + "learning_rate": 0.00012689159017613495, + "loss": 3.0747, + "step": 5174 + }, + { + "epoch": 1.81, + "grad_norm": 6.321409225463867, + "learning_rate": 0.0001268543785661126, + "loss": 2.6672, + "step": 5175 + }, + { + "epoch": 1.81, + "grad_norm": 5.741054058074951, + "learning_rate": 0.0001268171669560903, + "loss": 2.5869, + "step": 5176 + }, + { + "epoch": 1.81, + "grad_norm": 7.433377265930176, + "learning_rate": 0.00012677995534606797, + "loss": 2.5097, + "step": 5177 + }, + { + "epoch": 1.81, + "grad_norm": 10.795042991638184, + "learning_rate": 0.00012674274373604562, + "loss": 2.767, + "step": 5178 + }, + { + "epoch": 1.81, + "grad_norm": 3.698216199874878, + "learning_rate": 0.0001267055321260233, + "loss": 2.578, + "step": 5179 + }, + { + "epoch": 1.81, + "grad_norm": 5.1897149085998535, + "learning_rate": 0.00012666832051600098, + "loss": 3.1661, + "step": 5180 + }, + { + "epoch": 1.82, + "grad_norm": 13.520951271057129, + "learning_rate": 0.00012663110890597865, + "loss": 3.0335, + "step": 5181 + }, + { + "epoch": 1.82, + "grad_norm": 3.7349276542663574, + "learning_rate": 0.00012659389729595633, + "loss": 2.9884, + "step": 5182 + }, + { + "epoch": 1.82, + "grad_norm": 5.528043746948242, + "learning_rate": 0.000126556685685934, + "loss": 2.857, + "step": 5183 + }, + { + "epoch": 1.82, + "grad_norm": 12.178468704223633, + "learning_rate": 0.00012651947407591168, + "loss": 2.8438, + "step": 5184 + }, + { + "epoch": 1.82, + "grad_norm": 12.532594680786133, + "learning_rate": 0.00012648226246588935, + "loss": 2.8363, + "step": 5185 + }, + { + "epoch": 1.82, + "grad_norm": 12.38633918762207, + "learning_rate": 0.00012644505085586703, + "loss": 3.0286, + "step": 5186 + }, + { + "epoch": 1.82, + "grad_norm": 6.513081073760986, + "learning_rate": 0.00012640783924584468, + "loss": 2.9366, + "step": 5187 + }, + { + "epoch": 1.82, + "grad_norm": 6.644528865814209, + "learning_rate": 0.00012637062763582235, + "loss": 2.8487, + "step": 5188 + }, + { + "epoch": 1.82, + "grad_norm": 3.157069683074951, + "learning_rate": 0.00012633341602580006, + "loss": 2.9607, + "step": 5189 + }, + { + "epoch": 1.82, + "grad_norm": 4.591786861419678, + "learning_rate": 0.0001262962044157777, + "loss": 2.6186, + "step": 5190 + }, + { + "epoch": 1.82, + "grad_norm": 3.7421250343322754, + "learning_rate": 0.00012625899280575538, + "loss": 2.8948, + "step": 5191 + }, + { + "epoch": 1.82, + "grad_norm": 6.926886558532715, + "learning_rate": 0.00012622178119573306, + "loss": 2.989, + "step": 5192 + }, + { + "epoch": 1.82, + "grad_norm": 18.810298919677734, + "learning_rate": 0.00012618456958571073, + "loss": 3.0692, + "step": 5193 + }, + { + "epoch": 1.82, + "grad_norm": 6.420024871826172, + "learning_rate": 0.0001261473579756884, + "loss": 2.7154, + "step": 5194 + }, + { + "epoch": 1.82, + "grad_norm": 17.48590087890625, + "learning_rate": 0.00012611014636566608, + "loss": 2.7142, + "step": 5195 + }, + { + "epoch": 1.82, + "grad_norm": 5.477769374847412, + "learning_rate": 0.00012607293475564373, + "loss": 2.8943, + "step": 5196 + }, + { + "epoch": 1.82, + "grad_norm": 3.2958991527557373, + "learning_rate": 0.00012603572314562144, + "loss": 3.0727, + "step": 5197 + }, + { + "epoch": 1.82, + "grad_norm": 2.588256359100342, + "learning_rate": 0.0001259985115355991, + "loss": 2.6309, + "step": 5198 + }, + { + "epoch": 1.82, + "grad_norm": 4.680807113647461, + "learning_rate": 0.00012596129992557676, + "loss": 2.4823, + "step": 5199 + }, + { + "epoch": 1.82, + "grad_norm": 8.458367347717285, + "learning_rate": 0.00012592408831555444, + "loss": 2.6377, + "step": 5200 + }, + { + "epoch": 1.82, + "eval_loss": 2.794273853302002, + "eval_runtime": 51.4561, + "eval_samples_per_second": 42.133, + "eval_steps_per_second": 10.533, + "eval_wer": 1.0, + "step": 5200 + } + ], + "logging_steps": 1.0, + "max_steps": 8562, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 400, + "total_flos": 2.1187209341970678e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}