|
{ |
|
"best_metric": 11.921277046203613, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 2.008298755186722, |
|
"eval_steps": 25, |
|
"global_step": 121, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.016597510373443983, |
|
"grad_norm": 0.008619977161288261, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 11.9327, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.016597510373443983, |
|
"eval_loss": 11.933255195617676, |
|
"eval_runtime": 0.0685, |
|
"eval_samples_per_second": 729.868, |
|
"eval_steps_per_second": 29.195, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03319502074688797, |
|
"grad_norm": 0.007508904207497835, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 11.9321, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.04979253112033195, |
|
"grad_norm": 0.007870020344853401, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 11.9325, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.06639004149377593, |
|
"grad_norm": 0.008674144744873047, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 11.9323, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.08298755186721991, |
|
"grad_norm": 0.00782778114080429, |
|
"learning_rate": 0.00015, |
|
"loss": 11.9326, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0995850622406639, |
|
"grad_norm": 0.007341116201132536, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 11.9326, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.11618257261410789, |
|
"grad_norm": 0.00769679993391037, |
|
"learning_rate": 0.00020999999999999998, |
|
"loss": 11.9325, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.13278008298755187, |
|
"grad_norm": 0.008552188985049725, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 11.9325, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.14937759336099585, |
|
"grad_norm": 0.0071903131902217865, |
|
"learning_rate": 0.00027, |
|
"loss": 11.9329, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.16597510373443983, |
|
"grad_norm": 0.006793375127017498, |
|
"learning_rate": 0.0003, |
|
"loss": 11.9332, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1825726141078838, |
|
"grad_norm": 0.006766727194190025, |
|
"learning_rate": 0.00029993992606774825, |
|
"loss": 11.9326, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.1991701244813278, |
|
"grad_norm": 0.006954336538910866, |
|
"learning_rate": 0.00029975975238935744, |
|
"loss": 11.9319, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.2157676348547718, |
|
"grad_norm": 0.008698610588908195, |
|
"learning_rate": 0.00029945962328137895, |
|
"loss": 11.9319, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.23236514522821577, |
|
"grad_norm": 0.008457913063466549, |
|
"learning_rate": 0.0002990397791429554, |
|
"loss": 11.9322, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.24896265560165975, |
|
"grad_norm": 0.008619188331067562, |
|
"learning_rate": 0.0002985005562632645, |
|
"loss": 11.9318, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.26556016597510373, |
|
"grad_norm": 0.009035887196660042, |
|
"learning_rate": 0.00029784238655215626, |
|
"loss": 11.9322, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.2821576763485477, |
|
"grad_norm": 0.010372682474553585, |
|
"learning_rate": 0.000297065797194199, |
|
"loss": 11.9315, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.2987551867219917, |
|
"grad_norm": 0.009721986949443817, |
|
"learning_rate": 0.00029617141022641243, |
|
"loss": 11.932, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.3153526970954357, |
|
"grad_norm": 0.011710022576153278, |
|
"learning_rate": 0.00029515994204002484, |
|
"loss": 11.9316, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.33195020746887965, |
|
"grad_norm": 0.011379418894648552, |
|
"learning_rate": 0.00029403220280665337, |
|
"loss": 11.932, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.34854771784232363, |
|
"grad_norm": 0.012264162302017212, |
|
"learning_rate": 0.0002927890958293689, |
|
"loss": 11.9319, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.3651452282157676, |
|
"grad_norm": 0.012727621011435986, |
|
"learning_rate": 0.0002914316168191626, |
|
"loss": 11.9315, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.3817427385892116, |
|
"grad_norm": 0.013410990126430988, |
|
"learning_rate": 0.0002899608530973956, |
|
"loss": 11.9308, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.3983402489626556, |
|
"grad_norm": 0.014416859485208988, |
|
"learning_rate": 0.00028837798272487026, |
|
"loss": 11.932, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.4149377593360996, |
|
"grad_norm": 0.01126072183251381, |
|
"learning_rate": 0.00028668427355822034, |
|
"loss": 11.9328, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.4149377593360996, |
|
"eval_loss": 11.931723594665527, |
|
"eval_runtime": 0.0673, |
|
"eval_samples_per_second": 743.046, |
|
"eval_steps_per_second": 29.722, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.4315352697095436, |
|
"grad_norm": 0.019695591181516647, |
|
"learning_rate": 0.0002848810822343755, |
|
"loss": 11.931, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.44813278008298757, |
|
"grad_norm": 0.01740470714867115, |
|
"learning_rate": 0.00028296985308391476, |
|
"loss": 11.9307, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.46473029045643155, |
|
"grad_norm": 0.021143503487110138, |
|
"learning_rate": 0.0002809521169741782, |
|
"loss": 11.9306, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.48132780082987553, |
|
"grad_norm": 0.023337863385677338, |
|
"learning_rate": 0.0002788294900830639, |
|
"loss": 11.9307, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.4979253112033195, |
|
"grad_norm": 0.026668556034564972, |
|
"learning_rate": 0.00027660367260449255, |
|
"loss": 11.9305, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5145228215767634, |
|
"grad_norm": 0.02838689088821411, |
|
"learning_rate": 0.0002742764473865763, |
|
"loss": 11.9296, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.5311203319502075, |
|
"grad_norm": 0.027914777398109436, |
|
"learning_rate": 0.00027184967850358286, |
|
"loss": 11.9302, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.5477178423236515, |
|
"grad_norm": 0.028979269787669182, |
|
"learning_rate": 0.0002693253097628385, |
|
"loss": 11.9297, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.5643153526970954, |
|
"grad_norm": 0.03429955616593361, |
|
"learning_rate": 0.00026670536314776593, |
|
"loss": 11.9294, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.5809128630705395, |
|
"grad_norm": 0.026081860065460205, |
|
"learning_rate": 0.00026399193719830457, |
|
"loss": 11.9296, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.5975103734439834, |
|
"grad_norm": 0.026742985472083092, |
|
"learning_rate": 0.00026118720533001, |
|
"loss": 11.9289, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.6141078838174274, |
|
"grad_norm": 0.03144499287009239, |
|
"learning_rate": 0.0002582934140931786, |
|
"loss": 11.929, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.6307053941908713, |
|
"grad_norm": 0.04750339314341545, |
|
"learning_rate": 0.0002553128813733934, |
|
"loss": 11.9278, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.6473029045643154, |
|
"grad_norm": 0.044398147612810135, |
|
"learning_rate": 0.0002522479945349299, |
|
"loss": 11.9275, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.6639004149377593, |
|
"grad_norm": 0.04385744780302048, |
|
"learning_rate": 0.00024910120850851216, |
|
"loss": 11.9278, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6804979253112033, |
|
"grad_norm": 0.05074305832386017, |
|
"learning_rate": 0.00024587504382494774, |
|
"loss": 11.9262, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.6970954356846473, |
|
"grad_norm": 0.047362346202135086, |
|
"learning_rate": 0.00024257208459621828, |
|
"loss": 11.9268, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.7136929460580913, |
|
"grad_norm": 0.04583980143070221, |
|
"learning_rate": 0.00023919497644564298, |
|
"loss": 11.9258, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.7302904564315352, |
|
"grad_norm": 0.049639176577329636, |
|
"learning_rate": 0.0002357464243887718, |
|
"loss": 11.9255, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.7468879668049793, |
|
"grad_norm": 0.04268966615200043, |
|
"learning_rate": 0.00023222919066670647, |
|
"loss": 11.9256, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.7634854771784232, |
|
"grad_norm": 0.04423046112060547, |
|
"learning_rate": 0.00022864609253358474, |
|
"loss": 11.9251, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.7800829875518672, |
|
"grad_norm": 0.03876250982284546, |
|
"learning_rate": 0.000225, |
|
"loss": 11.9259, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.7966804979253111, |
|
"grad_norm": 0.031338173896074295, |
|
"learning_rate": 0.00022129383353416347, |
|
"loss": 11.9259, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.8132780082987552, |
|
"grad_norm": 0.031210312619805336, |
|
"learning_rate": 0.00021753056172265096, |
|
"loss": 11.9261, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.8298755186721992, |
|
"grad_norm": 0.03140793740749359, |
|
"learning_rate": 0.00021371319889260717, |
|
"loss": 11.926, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.8298755186721992, |
|
"eval_loss": 11.924413681030273, |
|
"eval_runtime": 0.0674, |
|
"eval_samples_per_second": 742.066, |
|
"eval_steps_per_second": 29.683, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.8464730290456431, |
|
"grad_norm": 0.043788015842437744, |
|
"learning_rate": 0.00020984480269731242, |
|
"loss": 11.9232, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.8630705394190872, |
|
"grad_norm": 0.03660878911614418, |
|
"learning_rate": 0.0002059284716670463, |
|
"loss": 11.9244, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.8796680497925311, |
|
"grad_norm": 0.03714355081319809, |
|
"learning_rate": 0.00020196734272720854, |
|
"loss": 11.923, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.8962655601659751, |
|
"grad_norm": 0.03057694435119629, |
|
"learning_rate": 0.00019796458868568678, |
|
"loss": 11.922, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.9128630705394191, |
|
"grad_norm": 0.02990574575960636, |
|
"learning_rate": 0.00019392341569148252, |
|
"loss": 11.9218, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.9294605809128631, |
|
"grad_norm": 0.025974757969379425, |
|
"learning_rate": 0.00018984706066663143, |
|
"loss": 11.9227, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.946058091286307, |
|
"grad_norm": 0.02820601500570774, |
|
"learning_rate": 0.00018573878871347473, |
|
"loss": 11.9225, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.9626556016597511, |
|
"grad_norm": 0.028666473925113678, |
|
"learning_rate": 0.00018160189049935892, |
|
"loss": 11.9233, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.979253112033195, |
|
"grad_norm": 0.02322000451385975, |
|
"learning_rate": 0.00017743967962085798, |
|
"loss": 11.923, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.995850622406639, |
|
"grad_norm": 0.028861528262495995, |
|
"learning_rate": 0.00017325548994962965, |
|
"loss": 11.924, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.012448132780083, |
|
"grad_norm": 0.04327556490898132, |
|
"learning_rate": 0.0001690526729620318, |
|
"loss": 21.3788, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 1.0290456431535269, |
|
"grad_norm": 0.02456553652882576, |
|
"learning_rate": 0.00016483459505463747, |
|
"loss": 11.6539, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.045643153526971, |
|
"grad_norm": 0.02364276722073555, |
|
"learning_rate": 0.00016060463484779918, |
|
"loss": 11.83, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.062240663900415, |
|
"grad_norm": 0.02070525474846363, |
|
"learning_rate": 0.00015636618047942222, |
|
"loss": 11.9144, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.0788381742738589, |
|
"grad_norm": 0.0210164375603199, |
|
"learning_rate": 0.00015212262689111433, |
|
"loss": 11.9249, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.095435684647303, |
|
"grad_norm": 0.02003309689462185, |
|
"learning_rate": 0.0001478773731088857, |
|
"loss": 11.9061, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.112033195020747, |
|
"grad_norm": 0.01854197308421135, |
|
"learning_rate": 0.00014363381952057778, |
|
"loss": 11.938, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.1286307053941909, |
|
"grad_norm": 0.022063063457608223, |
|
"learning_rate": 0.0001393953651522008, |
|
"loss": 11.9017, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.1452282157676348, |
|
"grad_norm": 0.017270218580961227, |
|
"learning_rate": 0.00013516540494536253, |
|
"loss": 11.978, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.161825726141079, |
|
"grad_norm": 0.01767859421670437, |
|
"learning_rate": 0.00013094732703796818, |
|
"loss": 11.9076, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.1784232365145229, |
|
"grad_norm": 0.01846623234450817, |
|
"learning_rate": 0.0001267445100503703, |
|
"loss": 11.9928, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 1.1950207468879668, |
|
"grad_norm": 0.02277253195643425, |
|
"learning_rate": 0.000122560320379142, |
|
"loss": 11.9957, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.2116182572614107, |
|
"grad_norm": 0.017460819333791733, |
|
"learning_rate": 0.00011839810950064109, |
|
"loss": 9.1042, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 1.2282157676348548, |
|
"grad_norm": 0.027226807549595833, |
|
"learning_rate": 0.00011426121128652526, |
|
"loss": 14.9039, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.2448132780082988, |
|
"grad_norm": 0.02005860209465027, |
|
"learning_rate": 0.00011015293933336857, |
|
"loss": 11.642, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.2448132780082988, |
|
"eval_loss": 11.92165756225586, |
|
"eval_runtime": 0.0678, |
|
"eval_samples_per_second": 737.673, |
|
"eval_steps_per_second": 29.507, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.2614107883817427, |
|
"grad_norm": 0.01866528019309044, |
|
"learning_rate": 0.00010607658430851744, |
|
"loss": 11.8735, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.2780082987551866, |
|
"grad_norm": 0.014573541469871998, |
|
"learning_rate": 0.0001020354113143132, |
|
"loss": 11.8891, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 1.2946058091286308, |
|
"grad_norm": 0.014832578599452972, |
|
"learning_rate": 9.803265727279149e-05, |
|
"loss": 11.9601, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.3112033195020747, |
|
"grad_norm": 0.010414165444672108, |
|
"learning_rate": 9.407152833295372e-05, |
|
"loss": 11.9166, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 1.3278008298755186, |
|
"grad_norm": 0.014295806176960468, |
|
"learning_rate": 9.015519730268754e-05, |
|
"loss": 11.883, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.3443983402489628, |
|
"grad_norm": 0.011363668367266655, |
|
"learning_rate": 8.62868011073928e-05, |
|
"loss": 11.9823, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.3609958506224067, |
|
"grad_norm": 0.012058326043188572, |
|
"learning_rate": 8.246943827734897e-05, |
|
"loss": 11.9326, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 1.3775933609958506, |
|
"grad_norm": 0.013995883986353874, |
|
"learning_rate": 7.870616646583648e-05, |
|
"loss": 11.9321, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 1.3941908713692945, |
|
"grad_norm": 0.017102347686886787, |
|
"learning_rate": 7.500000000000002e-05, |
|
"loss": 11.9893, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.4107883817427385, |
|
"grad_norm": 0.019754430279135704, |
|
"learning_rate": 7.135390746641526e-05, |
|
"loss": 12.3581, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.4273858921161826, |
|
"grad_norm": 0.014634879305958748, |
|
"learning_rate": 6.777080933329354e-05, |
|
"loss": 11.5957, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 1.4439834024896265, |
|
"grad_norm": 0.01592666283249855, |
|
"learning_rate": 6.425357561122819e-05, |
|
"loss": 11.8704, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.4605809128630705, |
|
"grad_norm": 0.01606922596693039, |
|
"learning_rate": 6.080502355435701e-05, |
|
"loss": 11.7446, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.4771784232365146, |
|
"grad_norm": 0.017506958916783333, |
|
"learning_rate": 5.742791540378175e-05, |
|
"loss": 11.9198, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 1.4937759336099585, |
|
"grad_norm": 0.014228662475943565, |
|
"learning_rate": 5.4124956175052295e-05, |
|
"loss": 11.9523, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.5103734439834025, |
|
"grad_norm": 0.01120895054191351, |
|
"learning_rate": 5.089879149148781e-05, |
|
"loss": 11.8631, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 1.5269709543568464, |
|
"grad_norm": 0.01069362461566925, |
|
"learning_rate": 4.7752005465070094e-05, |
|
"loss": 11.9319, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.5435684647302903, |
|
"grad_norm": 0.011584184132516384, |
|
"learning_rate": 4.468711862660662e-05, |
|
"loss": 11.9276, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.5601659751037344, |
|
"grad_norm": 0.012647481635212898, |
|
"learning_rate": 4.1706585906821334e-05, |
|
"loss": 11.9491, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 1.5767634854771784, |
|
"grad_norm": 0.012272904627025127, |
|
"learning_rate": 3.881279466999001e-05, |
|
"loss": 11.9568, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.5933609958506225, |
|
"grad_norm": 0.016778666526079178, |
|
"learning_rate": 3.600806280169541e-05, |
|
"loss": 11.9439, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.6099585062240664, |
|
"grad_norm": 0.01830066554248333, |
|
"learning_rate": 3.3294636852234105e-05, |
|
"loss": 12.0261, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 1.6265560165975104, |
|
"grad_norm": 0.0076575614511966705, |
|
"learning_rate": 3.067469023716154e-05, |
|
"loss": 9.0989, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 1.6431535269709543, |
|
"grad_norm": 0.020825980231165886, |
|
"learning_rate": 2.8150321496417135e-05, |
|
"loss": 14.9122, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.6597510373443982, |
|
"grad_norm": 0.013375605456531048, |
|
"learning_rate": 2.5723552613423687e-05, |
|
"loss": 11.6177, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.6597510373443982, |
|
"eval_loss": 11.921277046203613, |
|
"eval_runtime": 0.0676, |
|
"eval_samples_per_second": 739.82, |
|
"eval_steps_per_second": 29.593, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.6763485477178424, |
|
"grad_norm": 0.01629016175866127, |
|
"learning_rate": 2.3396327395507448e-05, |
|
"loss": 11.9071, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 1.6929460580912863, |
|
"grad_norm": 0.015467053279280663, |
|
"learning_rate": 2.117050991693609e-05, |
|
"loss": 11.8865, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.7095435684647304, |
|
"grad_norm": 0.01504999864846468, |
|
"learning_rate": 1.9047883025821774e-05, |
|
"loss": 11.9293, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 1.7261410788381744, |
|
"grad_norm": 0.01039854995906353, |
|
"learning_rate": 1.7030146916085185e-05, |
|
"loss": 11.8954, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.7427385892116183, |
|
"grad_norm": 0.011301021091639996, |
|
"learning_rate": 1.5118917765624467e-05, |
|
"loss": 11.9575, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.7593360995850622, |
|
"grad_norm": 0.009601314552128315, |
|
"learning_rate": 1.3315726441779629e-05, |
|
"loss": 11.9122, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.7759336099585061, |
|
"grad_norm": 0.011440463364124298, |
|
"learning_rate": 1.1622017275129708e-05, |
|
"loss": 11.9329, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 1.79253112033195, |
|
"grad_norm": 0.013274903409183025, |
|
"learning_rate": 1.00391469026044e-05, |
|
"loss": 11.9641, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.8091286307053942, |
|
"grad_norm": 0.013196711428463459, |
|
"learning_rate": 8.568383180837368e-06, |
|
"loss": 12.013, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 1.8257261410788381, |
|
"grad_norm": 0.022849783301353455, |
|
"learning_rate": 7.210904170631021e-06, |
|
"loss": 12.3974, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.8423236514522823, |
|
"grad_norm": 0.015286913141608238, |
|
"learning_rate": 5.967797193346574e-06, |
|
"loss": 11.7548, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 1.8589211618257262, |
|
"grad_norm": 0.012541989795863628, |
|
"learning_rate": 4.840057959975169e-06, |
|
"loss": 11.5806, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 1.8755186721991701, |
|
"grad_norm": 0.013375689275562763, |
|
"learning_rate": 3.828589773587515e-06, |
|
"loss": 11.8641, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 1.892116182572614, |
|
"grad_norm": 0.01497753243893385, |
|
"learning_rate": 2.934202805800989e-06, |
|
"loss": 11.8974, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.908713692946058, |
|
"grad_norm": 0.010004348121583462, |
|
"learning_rate": 2.1576134478437313e-06, |
|
"loss": 11.8974, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.9253112033195021, |
|
"grad_norm": 0.009814193472266197, |
|
"learning_rate": 1.4994437367354339e-06, |
|
"loss": 11.9167, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.941908713692946, |
|
"grad_norm": 0.008285530842840672, |
|
"learning_rate": 9.602208570445636e-07, |
|
"loss": 12.0253, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 1.9585062240663902, |
|
"grad_norm": 0.01668418012559414, |
|
"learning_rate": 5.403767186210218e-07, |
|
"loss": 11.8426, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 1.9751037344398341, |
|
"grad_norm": 0.014265616424381733, |
|
"learning_rate": 2.402476106425466e-07, |
|
"loss": 12.0079, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 1.991701244813278, |
|
"grad_norm": 0.016387728974223137, |
|
"learning_rate": 6.007393225176404e-08, |
|
"loss": 12.1605, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.008298755186722, |
|
"grad_norm": 0.04243115335702896, |
|
"learning_rate": 0.0, |
|
"loss": 20.6424, |
|
"step": 121 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 121, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 774412984320.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|