|
{ |
|
"best_metric": 2.3818840980529785, |
|
"best_model_checkpoint": "/ssd1/geonmin.kim/shortened-llm/outputs/phi2_1.8b_alpaca_enzh_fullparam/checkpoint-5800", |
|
"epoch": 59.316075423457974, |
|
"eval_steps": 100, |
|
"global_step": 5800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010226909555768616, |
|
"grad_norm": 3.242844581604004, |
|
"learning_rate": 2.0000000000000002e-07, |
|
"loss": 2.0435, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.10226909555768617, |
|
"grad_norm": 2.2696735858917236, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 2.0397, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.20453819111537233, |
|
"grad_norm": 1.283516764640808, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 1.7851, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3068072866730585, |
|
"grad_norm": 1.1238912343978882, |
|
"learning_rate": 6e-06, |
|
"loss": 1.6526, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.40907638223074466, |
|
"grad_norm": 1.0196110010147095, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.5391, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5113454777884308, |
|
"grad_norm": 0.9775912761688232, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4779, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.613614573346117, |
|
"grad_norm": 0.9060685038566589, |
|
"learning_rate": 1.2e-05, |
|
"loss": 1.4218, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.7158836689038032, |
|
"grad_norm": 0.9413829445838928, |
|
"learning_rate": 1.4e-05, |
|
"loss": 1.3978, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.8181527644614893, |
|
"grad_norm": 1.007612705230713, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 1.3779, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.9204218600191755, |
|
"grad_norm": 0.9302048087120056, |
|
"learning_rate": 1.8e-05, |
|
"loss": 1.3387, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0226909555768615, |
|
"grad_norm": 0.82750403881073, |
|
"learning_rate": 2e-05, |
|
"loss": 1.3056, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0226909555768615, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 1.201456069946289, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.7448, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 96.41, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 12.051, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.124960051134548, |
|
"grad_norm": 0.8751218318939209, |
|
"learning_rate": 1.9979166666666667e-05, |
|
"loss": 1.2376, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.2272291466922338, |
|
"grad_norm": 0.8453890681266785, |
|
"learning_rate": 1.9958333333333335e-05, |
|
"loss": 1.2063, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.3294982422499202, |
|
"grad_norm": 0.8156936764717102, |
|
"learning_rate": 1.99375e-05, |
|
"loss": 1.2022, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.4317673378076063, |
|
"grad_norm": 0.8538714051246643, |
|
"learning_rate": 1.991666666666667e-05, |
|
"loss": 1.2052, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.5340364333652925, |
|
"grad_norm": 0.8687230944633484, |
|
"learning_rate": 1.9895833333333334e-05, |
|
"loss": 1.1913, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.6363055289229784, |
|
"grad_norm": 0.826032280921936, |
|
"learning_rate": 1.9875000000000002e-05, |
|
"loss": 1.1828, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.7385746244806648, |
|
"grad_norm": 0.8092572093009949, |
|
"learning_rate": 1.9854166666666667e-05, |
|
"loss": 1.1746, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.840843720038351, |
|
"grad_norm": 0.825042188167572, |
|
"learning_rate": 1.9833333333333335e-05, |
|
"loss": 1.169, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.943112815596037, |
|
"grad_norm": 0.8175085186958313, |
|
"learning_rate": 1.98125e-05, |
|
"loss": 1.166, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.045381911153723, |
|
"grad_norm": 0.8639950752258301, |
|
"learning_rate": 1.979166666666667e-05, |
|
"loss": 1.1158, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.045381911153723, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 1.1383644342422485, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.7181, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 96.534, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 12.067, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.1476510067114094, |
|
"grad_norm": 0.7958649396896362, |
|
"learning_rate": 1.9770833333333334e-05, |
|
"loss": 1.0339, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.249920102269096, |
|
"grad_norm": 0.8010162711143494, |
|
"learning_rate": 1.9750000000000002e-05, |
|
"loss": 1.0426, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.3521891978267817, |
|
"grad_norm": 0.7885578274726868, |
|
"learning_rate": 1.9729166666666667e-05, |
|
"loss": 1.0432, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.4544582933844676, |
|
"grad_norm": 0.8269402980804443, |
|
"learning_rate": 1.9708333333333336e-05, |
|
"loss": 1.0379, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.556727388942154, |
|
"grad_norm": 0.8139219880104065, |
|
"learning_rate": 1.96875e-05, |
|
"loss": 1.0305, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.6589964844998404, |
|
"grad_norm": 0.8240389227867126, |
|
"learning_rate": 1.9666666666666666e-05, |
|
"loss": 1.032, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.7612655800575263, |
|
"grad_norm": 0.7902543544769287, |
|
"learning_rate": 1.9645833333333334e-05, |
|
"loss": 1.0203, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.8635346756152127, |
|
"grad_norm": 0.7832587361335754, |
|
"learning_rate": 1.9625e-05, |
|
"loss": 1.0266, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.9658037711728986, |
|
"grad_norm": 0.8064606189727783, |
|
"learning_rate": 1.9604166666666668e-05, |
|
"loss": 1.0292, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 3.068072866730585, |
|
"grad_norm": 0.8083460927009583, |
|
"learning_rate": 1.9583333333333333e-05, |
|
"loss": 0.9461, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.068072866730585, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 1.1164577007293701, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.7799, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 96.247, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 12.031, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.170341962288271, |
|
"grad_norm": 0.7945894598960876, |
|
"learning_rate": 1.95625e-05, |
|
"loss": 0.9071, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 3.2726110578459573, |
|
"grad_norm": 0.8095683455467224, |
|
"learning_rate": 1.9541666666666666e-05, |
|
"loss": 0.9081, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.3748801534036432, |
|
"grad_norm": 0.779909074306488, |
|
"learning_rate": 1.9520833333333335e-05, |
|
"loss": 0.9171, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 3.4771492489613296, |
|
"grad_norm": 0.8173850774765015, |
|
"learning_rate": 1.95e-05, |
|
"loss": 0.9085, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.5794183445190155, |
|
"grad_norm": 0.8204581141471863, |
|
"learning_rate": 1.9479166666666668e-05, |
|
"loss": 0.9073, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.681687440076702, |
|
"grad_norm": 0.7985939383506775, |
|
"learning_rate": 1.9458333333333333e-05, |
|
"loss": 0.9181, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 3.783956535634388, |
|
"grad_norm": 0.8140767812728882, |
|
"learning_rate": 1.94375e-05, |
|
"loss": 0.9193, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 3.886225631192074, |
|
"grad_norm": 0.8102839589118958, |
|
"learning_rate": 1.9416666666666667e-05, |
|
"loss": 0.9157, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 3.98849472674976, |
|
"grad_norm": 0.7886559963226318, |
|
"learning_rate": 1.9395833333333335e-05, |
|
"loss": 0.9191, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 4.090763822307446, |
|
"grad_norm": 0.7990887761116028, |
|
"learning_rate": 1.9375e-05, |
|
"loss": 0.8203, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.090763822307446, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 1.1279124021530151, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.7222, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 96.515, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 12.064, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.193032917865133, |
|
"grad_norm": 0.8134937286376953, |
|
"learning_rate": 1.935416666666667e-05, |
|
"loss": 0.81, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 4.295302013422819, |
|
"grad_norm": 0.8099086284637451, |
|
"learning_rate": 1.9333333333333333e-05, |
|
"loss": 0.8, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 4.397571108980505, |
|
"grad_norm": 0.7907654047012329, |
|
"learning_rate": 1.9312500000000002e-05, |
|
"loss": 0.8025, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 4.499840204538192, |
|
"grad_norm": 0.835167407989502, |
|
"learning_rate": 1.9291666666666667e-05, |
|
"loss": 0.804, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 4.6021093000958775, |
|
"grad_norm": 0.8259727358818054, |
|
"learning_rate": 1.9270833333333335e-05, |
|
"loss": 0.8069, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 4.704378395653563, |
|
"grad_norm": 0.8095329999923706, |
|
"learning_rate": 1.925e-05, |
|
"loss": 0.8099, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 4.806647491211249, |
|
"grad_norm": 0.8239871859550476, |
|
"learning_rate": 1.922916666666667e-05, |
|
"loss": 0.8178, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 4.908916586768935, |
|
"grad_norm": 0.8325999975204468, |
|
"learning_rate": 1.9208333333333337e-05, |
|
"loss": 0.8077, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 5.011185682326622, |
|
"grad_norm": 0.813887357711792, |
|
"learning_rate": 1.9187500000000002e-05, |
|
"loss": 0.8061, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 5.113454777884308, |
|
"grad_norm": 0.8288919925689697, |
|
"learning_rate": 1.916666666666667e-05, |
|
"loss": 0.7063, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.113454777884308, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 1.1451531648635864, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.7807, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 96.243, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 12.03, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.215723873441994, |
|
"grad_norm": 0.8298630118370056, |
|
"learning_rate": 1.9145833333333336e-05, |
|
"loss": 0.7049, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 5.317992968999681, |
|
"grad_norm": 0.8246304988861084, |
|
"learning_rate": 1.9125000000000004e-05, |
|
"loss": 0.7104, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 5.420262064557367, |
|
"grad_norm": 0.7879628539085388, |
|
"learning_rate": 1.910416666666667e-05, |
|
"loss": 0.7147, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 5.522531160115053, |
|
"grad_norm": 0.8137240409851074, |
|
"learning_rate": 1.9083333333333338e-05, |
|
"loss": 0.7079, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 5.624800255672739, |
|
"grad_norm": 0.8201763033866882, |
|
"learning_rate": 1.9062500000000003e-05, |
|
"loss": 0.7162, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 5.727069351230425, |
|
"grad_norm": 0.8055077791213989, |
|
"learning_rate": 1.9041666666666668e-05, |
|
"loss": 0.7241, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 5.829338446788111, |
|
"grad_norm": 0.8504828810691833, |
|
"learning_rate": 1.9020833333333336e-05, |
|
"loss": 0.7241, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 5.931607542345797, |
|
"grad_norm": 0.8135042190551758, |
|
"learning_rate": 1.9e-05, |
|
"loss": 0.7276, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 6.033876637903483, |
|
"grad_norm": 0.907433271408081, |
|
"learning_rate": 1.897916666666667e-05, |
|
"loss": 0.6957, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 6.13614573346117, |
|
"grad_norm": 0.8348938822746277, |
|
"learning_rate": 1.8958333333333334e-05, |
|
"loss": 0.6172, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.13614573346117, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 1.171920657157898, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.7497, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 96.387, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 12.048, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.238414829018856, |
|
"grad_norm": 0.8306870460510254, |
|
"learning_rate": 1.8937500000000003e-05, |
|
"loss": 0.6255, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.340683924576542, |
|
"grad_norm": 0.8185003399848938, |
|
"learning_rate": 1.8916666666666668e-05, |
|
"loss": 0.6218, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 6.442953020134228, |
|
"grad_norm": 0.8396403789520264, |
|
"learning_rate": 1.8895833333333336e-05, |
|
"loss": 0.6282, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 6.545222115691915, |
|
"grad_norm": 0.8167343139648438, |
|
"learning_rate": 1.8875e-05, |
|
"loss": 0.6305, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 6.6474912112496005, |
|
"grad_norm": 0.8276931047439575, |
|
"learning_rate": 1.885416666666667e-05, |
|
"loss": 0.6378, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 6.7497603068072864, |
|
"grad_norm": 0.8405306339263916, |
|
"learning_rate": 1.8833333333333335e-05, |
|
"loss": 0.6375, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 6.852029402364973, |
|
"grad_norm": 0.8206018805503845, |
|
"learning_rate": 1.8812500000000003e-05, |
|
"loss": 0.6393, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 6.954298497922659, |
|
"grad_norm": 0.8008025884628296, |
|
"learning_rate": 1.8791666666666668e-05, |
|
"loss": 0.6448, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 7.056567593480345, |
|
"grad_norm": 0.8231362104415894, |
|
"learning_rate": 1.8770833333333337e-05, |
|
"loss": 0.5929, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 7.158836689038031, |
|
"grad_norm": 0.8469756841659546, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 0.5392, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.158836689038031, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 1.2256364822387695, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.8506, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 95.92, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 11.99, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.261105784595718, |
|
"grad_norm": 0.8122310042381287, |
|
"learning_rate": 1.8729166666666667e-05, |
|
"loss": 0.54, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 7.363374880153404, |
|
"grad_norm": 0.8621034026145935, |
|
"learning_rate": 1.8708333333333335e-05, |
|
"loss": 0.5534, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 7.46564397571109, |
|
"grad_norm": 0.8529708981513977, |
|
"learning_rate": 1.86875e-05, |
|
"loss": 0.5516, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 7.567913071268776, |
|
"grad_norm": 0.8307532072067261, |
|
"learning_rate": 1.866666666666667e-05, |
|
"loss": 0.5531, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 7.6701821668264625, |
|
"grad_norm": 0.8403590321540833, |
|
"learning_rate": 1.8645833333333334e-05, |
|
"loss": 0.5644, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 7.772451262384148, |
|
"grad_norm": 0.8332897424697876, |
|
"learning_rate": 1.8625000000000002e-05, |
|
"loss": 0.5607, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 7.874720357941834, |
|
"grad_norm": 0.866201639175415, |
|
"learning_rate": 1.8604166666666667e-05, |
|
"loss": 0.5658, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 7.97698945349952, |
|
"grad_norm": 0.8362734913825989, |
|
"learning_rate": 1.8583333333333336e-05, |
|
"loss": 0.566, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 8.079258549057206, |
|
"grad_norm": 0.8327781558036804, |
|
"learning_rate": 1.85625e-05, |
|
"loss": 0.4963, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 8.181527644614892, |
|
"grad_norm": 0.827268123626709, |
|
"learning_rate": 1.854166666666667e-05, |
|
"loss": 0.4752, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 8.181527644614892, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 1.2643427848815918, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.893, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 95.726, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 11.966, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 8.28379674017258, |
|
"grad_norm": 0.7902690172195435, |
|
"learning_rate": 1.8520833333333334e-05, |
|
"loss": 0.4843, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 8.386065835730266, |
|
"grad_norm": 0.8375966548919678, |
|
"learning_rate": 1.8500000000000002e-05, |
|
"loss": 0.4783, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 8.488334931287952, |
|
"grad_norm": 0.8248066306114197, |
|
"learning_rate": 1.8479166666666667e-05, |
|
"loss": 0.4785, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 8.590604026845638, |
|
"grad_norm": 0.8647124171257019, |
|
"learning_rate": 1.8458333333333336e-05, |
|
"loss": 0.4874, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 8.692873122403324, |
|
"grad_norm": 0.8471198678016663, |
|
"learning_rate": 1.84375e-05, |
|
"loss": 0.4884, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 8.79514221796101, |
|
"grad_norm": 0.8394653797149658, |
|
"learning_rate": 1.8416666666666666e-05, |
|
"loss": 0.497, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 8.897411313518695, |
|
"grad_norm": 0.8621006011962891, |
|
"learning_rate": 1.8395833333333334e-05, |
|
"loss": 0.4923, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 8.999680409076383, |
|
"grad_norm": 0.830193817615509, |
|
"learning_rate": 1.8375e-05, |
|
"loss": 0.4917, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 9.101949504634069, |
|
"grad_norm": 0.8418065309524536, |
|
"learning_rate": 1.8354166666666668e-05, |
|
"loss": 0.4104, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 9.204218600191755, |
|
"grad_norm": 0.8672240376472473, |
|
"learning_rate": 1.8333333333333333e-05, |
|
"loss": 0.4165, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 9.204218600191755, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 1.2995867729187012, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.8906, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 95.737, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 11.967, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 9.306487695749441, |
|
"grad_norm": 0.8118753433227539, |
|
"learning_rate": 1.83125e-05, |
|
"loss": 0.4181, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 9.408756791307127, |
|
"grad_norm": 0.8521497845649719, |
|
"learning_rate": 1.8291666666666666e-05, |
|
"loss": 0.4202, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 9.511025886864813, |
|
"grad_norm": 0.8396993279457092, |
|
"learning_rate": 1.8270833333333335e-05, |
|
"loss": 0.4247, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 9.613294982422499, |
|
"grad_norm": 0.8380371332168579, |
|
"learning_rate": 1.825e-05, |
|
"loss": 0.427, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 9.715564077980185, |
|
"grad_norm": 0.819240927696228, |
|
"learning_rate": 1.8229166666666668e-05, |
|
"loss": 0.4271, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 9.817833173537872, |
|
"grad_norm": 0.8467490077018738, |
|
"learning_rate": 1.8208333333333333e-05, |
|
"loss": 0.4288, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 9.920102269095558, |
|
"grad_norm": 0.8679558634757996, |
|
"learning_rate": 1.81875e-05, |
|
"loss": 0.4283, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 10.022371364653244, |
|
"grad_norm": 0.8336887359619141, |
|
"learning_rate": 1.8166666666666667e-05, |
|
"loss": 0.4244, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 10.12464046021093, |
|
"grad_norm": 0.8790706396102905, |
|
"learning_rate": 1.8145833333333335e-05, |
|
"loss": 0.3627, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 10.226909555768616, |
|
"grad_norm": 0.8260719180107117, |
|
"learning_rate": 1.8125e-05, |
|
"loss": 0.3622, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 10.226909555768616, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 1.3518736362457275, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.891, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 95.735, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 11.967, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 10.329178651326302, |
|
"grad_norm": 0.8354102969169617, |
|
"learning_rate": 1.810416666666667e-05, |
|
"loss": 0.3654, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 10.431447746883988, |
|
"grad_norm": 0.833742618560791, |
|
"learning_rate": 1.8083333333333334e-05, |
|
"loss": 0.3675, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 10.533716842441674, |
|
"grad_norm": 0.814929723739624, |
|
"learning_rate": 1.8062500000000002e-05, |
|
"loss": 0.368, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 10.635985937999362, |
|
"grad_norm": 0.8500260710716248, |
|
"learning_rate": 1.8041666666666667e-05, |
|
"loss": 0.3704, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 10.738255033557047, |
|
"grad_norm": 0.8280666470527649, |
|
"learning_rate": 1.8020833333333335e-05, |
|
"loss": 0.3722, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 10.840524129114733, |
|
"grad_norm": 0.871941089630127, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.3768, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 10.94279322467242, |
|
"grad_norm": 0.8509662747383118, |
|
"learning_rate": 1.797916666666667e-05, |
|
"loss": 0.3829, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 11.045062320230105, |
|
"grad_norm": 0.8621676564216614, |
|
"learning_rate": 1.7958333333333334e-05, |
|
"loss": 0.3512, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 11.147331415787791, |
|
"grad_norm": 0.8415457010269165, |
|
"learning_rate": 1.7937500000000002e-05, |
|
"loss": 0.3092, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 11.249600511345477, |
|
"grad_norm": 0.8087013363838196, |
|
"learning_rate": 1.7916666666666667e-05, |
|
"loss": 0.3158, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 11.249600511345477, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 1.4024713039398193, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.8756, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 95.805, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 11.976, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 11.351869606903165, |
|
"grad_norm": 0.8239210844039917, |
|
"learning_rate": 1.7895833333333336e-05, |
|
"loss": 0.3189, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 11.45413870246085, |
|
"grad_norm": 0.8607499003410339, |
|
"learning_rate": 1.7875e-05, |
|
"loss": 0.3222, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 11.556407798018537, |
|
"grad_norm": 0.8538540601730347, |
|
"learning_rate": 1.785416666666667e-05, |
|
"loss": 0.3207, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 11.658676893576223, |
|
"grad_norm": 0.8388919234275818, |
|
"learning_rate": 1.7833333333333334e-05, |
|
"loss": 0.3308, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 11.760945989133909, |
|
"grad_norm": 0.8096144795417786, |
|
"learning_rate": 1.7812500000000003e-05, |
|
"loss": 0.3256, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 11.863215084691594, |
|
"grad_norm": 0.8199524879455566, |
|
"learning_rate": 1.7791666666666668e-05, |
|
"loss": 0.3272, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 11.96548418024928, |
|
"grad_norm": 0.8503059148788452, |
|
"learning_rate": 1.7770833333333336e-05, |
|
"loss": 0.3313, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 12.067753275806966, |
|
"grad_norm": 0.8376160860061646, |
|
"learning_rate": 1.775e-05, |
|
"loss": 0.2907, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 12.170022371364654, |
|
"grad_norm": 5.976499557495117, |
|
"learning_rate": 1.772916666666667e-05, |
|
"loss": 0.2719, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 12.27229146692234, |
|
"grad_norm": 0.8097366094589233, |
|
"learning_rate": 1.7708333333333335e-05, |
|
"loss": 0.2782, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 12.27229146692234, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 1.4481781721115112, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.9199, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 95.603, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 11.95, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 12.374560562480026, |
|
"grad_norm": 0.7708520293235779, |
|
"learning_rate": 1.7687500000000003e-05, |
|
"loss": 0.2749, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 12.476829658037712, |
|
"grad_norm": 0.8598915338516235, |
|
"learning_rate": 1.7666666666666668e-05, |
|
"loss": 0.2784, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 12.579098753595398, |
|
"grad_norm": 0.8157161474227905, |
|
"learning_rate": 1.7645833333333336e-05, |
|
"loss": 0.2794, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 12.681367849153084, |
|
"grad_norm": 0.8496010899543762, |
|
"learning_rate": 1.7625e-05, |
|
"loss": 0.2825, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 12.78363694471077, |
|
"grad_norm": 0.815390944480896, |
|
"learning_rate": 1.760416666666667e-05, |
|
"loss": 0.2848, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 12.885906040268456, |
|
"grad_norm": 0.8204165697097778, |
|
"learning_rate": 1.7583333333333335e-05, |
|
"loss": 0.2883, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 12.988175135826143, |
|
"grad_norm": 0.8374896049499512, |
|
"learning_rate": 1.7562500000000003e-05, |
|
"loss": 0.2909, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 13.09044423138383, |
|
"grad_norm": 0.7642439007759094, |
|
"learning_rate": 1.754166666666667e-05, |
|
"loss": 0.2406, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 13.192713326941515, |
|
"grad_norm": 0.7860681414604187, |
|
"learning_rate": 1.7520833333333337e-05, |
|
"loss": 0.2358, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 13.294982422499201, |
|
"grad_norm": 0.8119321465492249, |
|
"learning_rate": 1.7500000000000002e-05, |
|
"loss": 0.2357, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 13.294982422499201, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 1.4959287643432617, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.8771, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 95.799, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 11.975, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 13.397251518056887, |
|
"grad_norm": 0.7898840308189392, |
|
"learning_rate": 1.7479166666666667e-05, |
|
"loss": 0.2409, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 13.499520613614573, |
|
"grad_norm": 0.8298600912094116, |
|
"learning_rate": 1.7458333333333335e-05, |
|
"loss": 0.2399, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 13.601789709172259, |
|
"grad_norm": 0.7994723916053772, |
|
"learning_rate": 1.74375e-05, |
|
"loss": 0.2468, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 13.704058804729947, |
|
"grad_norm": 0.822475790977478, |
|
"learning_rate": 1.741666666666667e-05, |
|
"loss": 0.2481, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 13.806327900287632, |
|
"grad_norm": 0.8012453317642212, |
|
"learning_rate": 1.7395833333333334e-05, |
|
"loss": 0.2473, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 13.908596995845318, |
|
"grad_norm": 0.8046063780784607, |
|
"learning_rate": 1.7375000000000002e-05, |
|
"loss": 0.2532, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 14.010866091403004, |
|
"grad_norm": 0.7180681228637695, |
|
"learning_rate": 1.7354166666666667e-05, |
|
"loss": 0.2451, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 14.11313518696069, |
|
"grad_norm": 0.7648767828941345, |
|
"learning_rate": 1.7333333333333336e-05, |
|
"loss": 0.2026, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 14.215404282518376, |
|
"grad_norm": 0.782311201095581, |
|
"learning_rate": 1.73125e-05, |
|
"loss": 0.2062, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 14.317673378076062, |
|
"grad_norm": 0.7766838669776917, |
|
"learning_rate": 1.729166666666667e-05, |
|
"loss": 0.2079, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 14.317673378076062, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 1.535245418548584, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.8983, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 95.701, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 11.963, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 14.419942473633748, |
|
"grad_norm": 0.7595117688179016, |
|
"learning_rate": 1.7270833333333334e-05, |
|
"loss": 0.2097, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 14.522211569191436, |
|
"grad_norm": 0.7640486359596252, |
|
"learning_rate": 1.7250000000000003e-05, |
|
"loss": 0.2107, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 14.624480664749122, |
|
"grad_norm": 0.8100217580795288, |
|
"learning_rate": 1.7229166666666668e-05, |
|
"loss": 0.2132, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 14.726749760306808, |
|
"grad_norm": 0.7824357748031616, |
|
"learning_rate": 1.7208333333333336e-05, |
|
"loss": 0.2124, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 14.829018855864494, |
|
"grad_norm": 0.8215783834457397, |
|
"learning_rate": 1.71875e-05, |
|
"loss": 0.2172, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 14.93128795142218, |
|
"grad_norm": 0.791244626045227, |
|
"learning_rate": 1.7166666666666666e-05, |
|
"loss": 0.2183, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 15.033557046979865, |
|
"grad_norm": 0.7542420625686646, |
|
"learning_rate": 1.7145833333333334e-05, |
|
"loss": 0.2083, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 15.135826142537551, |
|
"grad_norm": 0.7965226173400879, |
|
"learning_rate": 1.7125e-05, |
|
"loss": 0.1773, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 15.238095238095237, |
|
"grad_norm": 0.764574408531189, |
|
"learning_rate": 1.7104166666666668e-05, |
|
"loss": 0.1775, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 15.340364333652925, |
|
"grad_norm": 0.768020749092102, |
|
"learning_rate": 1.7083333333333333e-05, |
|
"loss": 0.181, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 15.340364333652925, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 1.57305908203125, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.7396, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 96.434, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 12.054, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 15.44263342921061, |
|
"grad_norm": 0.7567213773727417, |
|
"learning_rate": 1.70625e-05, |
|
"loss": 0.181, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 15.544902524768297, |
|
"grad_norm": 0.7426446080207825, |
|
"learning_rate": 1.7041666666666666e-05, |
|
"loss": 0.1824, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 15.647171620325983, |
|
"grad_norm": 0.750170111656189, |
|
"learning_rate": 1.7020833333333335e-05, |
|
"loss": 0.1855, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 15.749440715883669, |
|
"grad_norm": 0.7680428624153137, |
|
"learning_rate": 1.7e-05, |
|
"loss": 0.1883, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 15.851709811441355, |
|
"grad_norm": 0.8081603646278381, |
|
"learning_rate": 1.6979166666666668e-05, |
|
"loss": 0.1892, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 15.95397890699904, |
|
"grad_norm": 0.7882938385009766, |
|
"learning_rate": 1.6958333333333333e-05, |
|
"loss": 0.1912, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 16.05624800255673, |
|
"grad_norm": 0.7245915532112122, |
|
"learning_rate": 1.6937500000000002e-05, |
|
"loss": 0.1717, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 16.158517098114412, |
|
"grad_norm": 0.7572883367538452, |
|
"learning_rate": 1.6916666666666667e-05, |
|
"loss": 0.1577, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 16.2607861936721, |
|
"grad_norm": 0.7576362490653992, |
|
"learning_rate": 1.6895833333333335e-05, |
|
"loss": 0.157, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 16.363055289229784, |
|
"grad_norm": 0.7182960510253906, |
|
"learning_rate": 1.6875e-05, |
|
"loss": 0.1555, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 16.363055289229784, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 1.6099534034729004, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.8255, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 96.036, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 12.005, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 16.465324384787472, |
|
"grad_norm": 0.7401660084724426, |
|
"learning_rate": 1.685416666666667e-05, |
|
"loss": 0.1573, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 16.56759348034516, |
|
"grad_norm": 0.7596891522407532, |
|
"learning_rate": 1.6833333333333334e-05, |
|
"loss": 0.1589, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 16.669862575902844, |
|
"grad_norm": 0.767320990562439, |
|
"learning_rate": 1.6812500000000002e-05, |
|
"loss": 0.1613, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 16.77213167146053, |
|
"grad_norm": 0.7579568028450012, |
|
"learning_rate": 1.6791666666666667e-05, |
|
"loss": 0.1624, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 16.874400767018216, |
|
"grad_norm": 0.7488529682159424, |
|
"learning_rate": 1.6770833333333336e-05, |
|
"loss": 0.1646, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 16.976669862575903, |
|
"grad_norm": 0.7528676390647888, |
|
"learning_rate": 1.675e-05, |
|
"loss": 0.1686, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 17.078938958133588, |
|
"grad_norm": 0.6930709481239319, |
|
"learning_rate": 1.672916666666667e-05, |
|
"loss": 0.1429, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 17.181208053691275, |
|
"grad_norm": 0.7119250297546387, |
|
"learning_rate": 1.6708333333333334e-05, |
|
"loss": 0.1355, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 17.283477149248963, |
|
"grad_norm": 0.6774548292160034, |
|
"learning_rate": 1.6687500000000002e-05, |
|
"loss": 0.1348, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 17.385746244806647, |
|
"grad_norm": 0.7192063927650452, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.1368, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 17.385746244806647, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 1.6659198999404907, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.8832, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 95.771, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 11.971, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 17.488015340364335, |
|
"grad_norm": 0.6981900930404663, |
|
"learning_rate": 1.6645833333333336e-05, |
|
"loss": 0.1379, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 17.59028443592202, |
|
"grad_norm": 0.7018482685089111, |
|
"learning_rate": 1.6625e-05, |
|
"loss": 0.1413, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 17.692553531479707, |
|
"grad_norm": 0.7282826900482178, |
|
"learning_rate": 1.660416666666667e-05, |
|
"loss": 0.1437, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 17.79482262703739, |
|
"grad_norm": 0.751104474067688, |
|
"learning_rate": 1.6583333333333334e-05, |
|
"loss": 0.142, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 17.89709172259508, |
|
"grad_norm": 0.7333133816719055, |
|
"learning_rate": 1.6562500000000003e-05, |
|
"loss": 0.1452, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 17.999360818152766, |
|
"grad_norm": 0.7490417957305908, |
|
"learning_rate": 1.6541666666666668e-05, |
|
"loss": 0.1445, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 18.10162991371045, |
|
"grad_norm": 0.7038053274154663, |
|
"learning_rate": 1.6520833333333336e-05, |
|
"loss": 0.1196, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 18.203899009268138, |
|
"grad_norm": 0.6416111588478088, |
|
"learning_rate": 1.65e-05, |
|
"loss": 0.1189, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 18.306168104825822, |
|
"grad_norm": 0.6799057126045227, |
|
"learning_rate": 1.647916666666667e-05, |
|
"loss": 0.1187, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 18.40843720038351, |
|
"grad_norm": 0.70688396692276, |
|
"learning_rate": 1.6458333333333335e-05, |
|
"loss": 0.1195, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 18.40843720038351, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 1.701189398765564, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.8449, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 95.947, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 11.993, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 18.510706295941194, |
|
"grad_norm": 0.6970006823539734, |
|
"learning_rate": 1.6437500000000003e-05, |
|
"loss": 0.1223, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 18.612975391498882, |
|
"grad_norm": 0.7127917408943176, |
|
"learning_rate": 1.6416666666666668e-05, |
|
"loss": 0.1228, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 18.715244487056566, |
|
"grad_norm": 0.6875537633895874, |
|
"learning_rate": 1.6395833333333337e-05, |
|
"loss": 0.1234, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 18.817513582614254, |
|
"grad_norm": 0.7249884009361267, |
|
"learning_rate": 1.6375e-05, |
|
"loss": 0.126, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 18.91978267817194, |
|
"grad_norm": 0.7007323503494263, |
|
"learning_rate": 1.635416666666667e-05, |
|
"loss": 0.1243, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 19.022051773729626, |
|
"grad_norm": 0.6626154184341431, |
|
"learning_rate": 1.6333333333333335e-05, |
|
"loss": 0.1233, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 19.124320869287313, |
|
"grad_norm": 0.6599249243736267, |
|
"learning_rate": 1.6312500000000003e-05, |
|
"loss": 0.1034, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 19.226589964844997, |
|
"grad_norm": 0.6787338256835938, |
|
"learning_rate": 1.629166666666667e-05, |
|
"loss": 0.1031, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 19.328859060402685, |
|
"grad_norm": 0.6463894248008728, |
|
"learning_rate": 1.6270833333333337e-05, |
|
"loss": 0.106, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 19.43112815596037, |
|
"grad_norm": 0.6649991869926453, |
|
"learning_rate": 1.6250000000000002e-05, |
|
"loss": 0.1063, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 19.43112815596037, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 1.7399890422821045, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.8883, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 95.747, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 11.968, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 19.533397251518057, |
|
"grad_norm": 0.6542336940765381, |
|
"learning_rate": 1.6229166666666667e-05, |
|
"loss": 0.1059, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 19.635666347075745, |
|
"grad_norm": 0.6741412281990051, |
|
"learning_rate": 1.6208333333333335e-05, |
|
"loss": 0.1088, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 19.73793544263343, |
|
"grad_norm": 0.6898075938224792, |
|
"learning_rate": 1.61875e-05, |
|
"loss": 0.1087, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 19.840204538191117, |
|
"grad_norm": 0.6611754298210144, |
|
"learning_rate": 1.616666666666667e-05, |
|
"loss": 0.1098, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 19.9424736337488, |
|
"grad_norm": 0.6986993551254272, |
|
"learning_rate": 1.6145833333333334e-05, |
|
"loss": 0.1114, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 20.04474272930649, |
|
"grad_norm": 0.6218557953834534, |
|
"learning_rate": 1.6125000000000002e-05, |
|
"loss": 0.1028, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 20.147011824864173, |
|
"grad_norm": 0.6389386653900146, |
|
"learning_rate": 1.6104166666666667e-05, |
|
"loss": 0.0922, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 20.24928092042186, |
|
"grad_norm": 0.6259350776672363, |
|
"learning_rate": 1.6083333333333336e-05, |
|
"loss": 0.0924, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 20.351550015979548, |
|
"grad_norm": 0.6429017782211304, |
|
"learning_rate": 1.60625e-05, |
|
"loss": 0.0935, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 20.453819111537232, |
|
"grad_norm": 0.6409590244293213, |
|
"learning_rate": 1.604166666666667e-05, |
|
"loss": 0.0925, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 20.453819111537232, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 1.770869493484497, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.8821, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 95.776, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 11.972, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 20.55608820709492, |
|
"grad_norm": 0.6365486979484558, |
|
"learning_rate": 1.6020833333333334e-05, |
|
"loss": 0.0945, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 20.658357302652604, |
|
"grad_norm": 0.6637431979179382, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.0963, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 20.76062639821029, |
|
"grad_norm": 0.6256012916564941, |
|
"learning_rate": 1.5979166666666668e-05, |
|
"loss": 0.0981, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 20.862895493767976, |
|
"grad_norm": 0.6379542946815491, |
|
"learning_rate": 1.5958333333333336e-05, |
|
"loss": 0.0976, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 20.965164589325664, |
|
"grad_norm": 0.6680212020874023, |
|
"learning_rate": 1.59375e-05, |
|
"loss": 0.0982, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 21.067433684883348, |
|
"grad_norm": 0.6050080060958862, |
|
"learning_rate": 1.5916666666666666e-05, |
|
"loss": 0.0876, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 21.169702780441035, |
|
"grad_norm": 0.6315256953239441, |
|
"learning_rate": 1.5895833333333335e-05, |
|
"loss": 0.0824, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 21.271971875998723, |
|
"grad_norm": 0.6160369515419006, |
|
"learning_rate": 1.5875e-05, |
|
"loss": 0.0817, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 21.374240971556407, |
|
"grad_norm": 0.5967450737953186, |
|
"learning_rate": 1.5854166666666668e-05, |
|
"loss": 0.0828, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 21.476510067114095, |
|
"grad_norm": 0.6033092737197876, |
|
"learning_rate": 1.5833333333333333e-05, |
|
"loss": 0.0832, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 21.476510067114095, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 1.8033993244171143, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.8632, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 95.862, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 11.983, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 21.57877916267178, |
|
"grad_norm": 0.6197848916053772, |
|
"learning_rate": 1.58125e-05, |
|
"loss": 0.0852, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 21.681048258229467, |
|
"grad_norm": 0.6332718133926392, |
|
"learning_rate": 1.5791666666666667e-05, |
|
"loss": 0.0853, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 21.78331735378715, |
|
"grad_norm": 0.6289674043655396, |
|
"learning_rate": 1.5770833333333335e-05, |
|
"loss": 0.0861, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 21.88558644934484, |
|
"grad_norm": 0.6466374397277832, |
|
"learning_rate": 1.575e-05, |
|
"loss": 0.0873, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 21.987855544902526, |
|
"grad_norm": 2.8282721042633057, |
|
"learning_rate": 1.5733333333333334e-05, |
|
"loss": 0.0888, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 22.09012464046021, |
|
"grad_norm": 0.5769690275192261, |
|
"learning_rate": 1.5712500000000002e-05, |
|
"loss": 0.0748, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 22.192393736017898, |
|
"grad_norm": 0.5819457173347473, |
|
"learning_rate": 1.5691666666666667e-05, |
|
"loss": 0.0737, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 22.294662831575582, |
|
"grad_norm": 0.6134530305862427, |
|
"learning_rate": 1.5670833333333336e-05, |
|
"loss": 0.0739, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 22.39693192713327, |
|
"grad_norm": 0.6075708866119385, |
|
"learning_rate": 1.565e-05, |
|
"loss": 0.0742, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 22.499201022690954, |
|
"grad_norm": 0.5924075245857239, |
|
"learning_rate": 1.562916666666667e-05, |
|
"loss": 0.0751, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 22.499201022690954, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 1.829688549041748, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.8883, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 95.747, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 11.968, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 22.601470118248642, |
|
"grad_norm": 0.6079824566841125, |
|
"learning_rate": 1.5608333333333334e-05, |
|
"loss": 0.0762, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 22.70373921380633, |
|
"grad_norm": 0.6125743389129639, |
|
"learning_rate": 1.5587500000000003e-05, |
|
"loss": 0.0764, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 22.806008309364014, |
|
"grad_norm": 0.5798956751823425, |
|
"learning_rate": 1.5566666666666668e-05, |
|
"loss": 0.0764, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 22.9082774049217, |
|
"grad_norm": 0.6115660667419434, |
|
"learning_rate": 1.5545833333333336e-05, |
|
"loss": 0.0782, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 23.010546500479386, |
|
"grad_norm": 0.5898561477661133, |
|
"learning_rate": 1.5525e-05, |
|
"loss": 0.0785, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 23.112815596037073, |
|
"grad_norm": 0.5677723288536072, |
|
"learning_rate": 1.550416666666667e-05, |
|
"loss": 0.066, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 23.215084691594758, |
|
"grad_norm": 0.5659050345420837, |
|
"learning_rate": 1.5483333333333335e-05, |
|
"loss": 0.0659, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 23.317353787152445, |
|
"grad_norm": 0.5879548192024231, |
|
"learning_rate": 1.54625e-05, |
|
"loss": 0.0671, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 23.41962288271013, |
|
"grad_norm": 0.5900695323944092, |
|
"learning_rate": 1.5441666666666668e-05, |
|
"loss": 0.0668, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 23.521891978267817, |
|
"grad_norm": 0.5663672685623169, |
|
"learning_rate": 1.5420833333333333e-05, |
|
"loss": 0.0676, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 23.521891978267817, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 1.8641977310180664, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.8705, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 95.829, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 11.979, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 23.624161073825505, |
|
"grad_norm": 0.6123341917991638, |
|
"learning_rate": 1.54e-05, |
|
"loss": 0.0686, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 23.72643016938319, |
|
"grad_norm": 0.5857561826705933, |
|
"learning_rate": 1.5379166666666667e-05, |
|
"loss": 0.0688, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 23.828699264940877, |
|
"grad_norm": 0.6261973977088928, |
|
"learning_rate": 1.5358333333333335e-05, |
|
"loss": 0.0694, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 23.93096836049856, |
|
"grad_norm": 0.5833300948143005, |
|
"learning_rate": 1.53375e-05, |
|
"loss": 0.0706, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 24.03323745605625, |
|
"grad_norm": 0.5474048256874084, |
|
"learning_rate": 1.531666666666667e-05, |
|
"loss": 0.0676, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 24.135506551613933, |
|
"grad_norm": 0.5484219193458557, |
|
"learning_rate": 1.5295833333333334e-05, |
|
"loss": 0.06, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 24.23777564717162, |
|
"grad_norm": 0.5514199137687683, |
|
"learning_rate": 1.5275000000000002e-05, |
|
"loss": 0.0608, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 24.340044742729308, |
|
"grad_norm": 0.5593263506889343, |
|
"learning_rate": 1.5254166666666667e-05, |
|
"loss": 0.0607, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 24.442313838286992, |
|
"grad_norm": 0.5621811151504517, |
|
"learning_rate": 1.5233333333333335e-05, |
|
"loss": 0.0612, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 24.54458293384468, |
|
"grad_norm": 0.5351541042327881, |
|
"learning_rate": 1.52125e-05, |
|
"loss": 0.0612, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 24.54458293384468, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 1.8911913633346558, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.8888, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 95.745, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 11.968, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 24.646852029402364, |
|
"grad_norm": 0.6047305464744568, |
|
"learning_rate": 1.5191666666666669e-05, |
|
"loss": 0.0628, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 24.749121124960052, |
|
"grad_norm": 0.5636226534843445, |
|
"learning_rate": 1.5170833333333334e-05, |
|
"loss": 0.0624, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 24.851390220517736, |
|
"grad_norm": 0.5656375288963318, |
|
"learning_rate": 1.515e-05, |
|
"loss": 0.0635, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 24.953659316075424, |
|
"grad_norm": 0.5728236436843872, |
|
"learning_rate": 1.5129166666666667e-05, |
|
"loss": 0.0642, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 25.05592841163311, |
|
"grad_norm": 0.5383201837539673, |
|
"learning_rate": 1.5108333333333334e-05, |
|
"loss": 0.0585, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 25.158197507190796, |
|
"grad_norm": 0.5213799476623535, |
|
"learning_rate": 1.50875e-05, |
|
"loss": 0.0539, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 25.260466602748483, |
|
"grad_norm": 0.5278561115264893, |
|
"learning_rate": 1.5066666666666668e-05, |
|
"loss": 0.0557, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 25.362735698306167, |
|
"grad_norm": 0.5517110824584961, |
|
"learning_rate": 1.5045833333333334e-05, |
|
"loss": 0.054, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 25.465004793863855, |
|
"grad_norm": 0.5266678333282471, |
|
"learning_rate": 1.5025000000000001e-05, |
|
"loss": 0.055, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 25.56727388942154, |
|
"grad_norm": 0.5280548334121704, |
|
"learning_rate": 1.5004166666666668e-05, |
|
"loss": 0.056, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 25.56727388942154, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 1.9079244136810303, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.8724, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 95.82, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 11.978, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 25.669542984979227, |
|
"grad_norm": 0.543380081653595, |
|
"learning_rate": 1.4983333333333334e-05, |
|
"loss": 0.0567, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 25.77181208053691, |
|
"grad_norm": 0.5638805031776428, |
|
"learning_rate": 1.4962500000000003e-05, |
|
"loss": 0.0576, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 25.8740811760946, |
|
"grad_norm": 0.5791529417037964, |
|
"learning_rate": 1.4941666666666668e-05, |
|
"loss": 0.0585, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 25.976350271652286, |
|
"grad_norm": 0.5550780892372131, |
|
"learning_rate": 1.4920833333333336e-05, |
|
"loss": 0.0583, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 26.07861936720997, |
|
"grad_norm": 0.5031822323799133, |
|
"learning_rate": 1.4900000000000001e-05, |
|
"loss": 0.0521, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 26.18088846276766, |
|
"grad_norm": 0.5042924880981445, |
|
"learning_rate": 1.487916666666667e-05, |
|
"loss": 0.0495, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 26.283157558325343, |
|
"grad_norm": 0.5483749508857727, |
|
"learning_rate": 1.4858333333333335e-05, |
|
"loss": 0.0504, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 26.38542665388303, |
|
"grad_norm": 0.508752703666687, |
|
"learning_rate": 1.48375e-05, |
|
"loss": 0.0509, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 26.487695749440714, |
|
"grad_norm": 0.4737156331539154, |
|
"learning_rate": 1.4816666666666668e-05, |
|
"loss": 0.0507, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 26.589964844998402, |
|
"grad_norm": 0.5402019023895264, |
|
"learning_rate": 1.4795833333333333e-05, |
|
"loss": 0.0513, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 26.589964844998402, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 1.9507412910461426, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.8703, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 95.83, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 11.979, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 26.69223394055609, |
|
"grad_norm": 0.5165488123893738, |
|
"learning_rate": 1.4775000000000002e-05, |
|
"loss": 0.0519, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 26.794503036113774, |
|
"grad_norm": 0.5218144059181213, |
|
"learning_rate": 1.4754166666666667e-05, |
|
"loss": 0.0537, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 26.89677213167146, |
|
"grad_norm": 0.5503118634223938, |
|
"learning_rate": 1.4733333333333335e-05, |
|
"loss": 0.053, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 26.999041227229146, |
|
"grad_norm": 0.5653933882713318, |
|
"learning_rate": 1.47125e-05, |
|
"loss": 0.0533, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 27.101310322786833, |
|
"grad_norm": 0.5077876448631287, |
|
"learning_rate": 1.4691666666666669e-05, |
|
"loss": 0.0473, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 27.203579418344518, |
|
"grad_norm": 0.5198752880096436, |
|
"learning_rate": 1.4670833333333334e-05, |
|
"loss": 0.0472, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 27.305848513902205, |
|
"grad_norm": 0.49260616302490234, |
|
"learning_rate": 1.4650000000000002e-05, |
|
"loss": 0.047, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 27.408117609459893, |
|
"grad_norm": 0.5261008739471436, |
|
"learning_rate": 1.4629166666666667e-05, |
|
"loss": 0.0468, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 27.510386705017577, |
|
"grad_norm": 0.5464609265327454, |
|
"learning_rate": 1.4608333333333335e-05, |
|
"loss": 0.0476, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 27.612655800575265, |
|
"grad_norm": 0.526250422000885, |
|
"learning_rate": 1.45875e-05, |
|
"loss": 0.0488, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 27.612655800575265, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 1.963590145111084, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.8709, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 95.827, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 11.978, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 27.71492489613295, |
|
"grad_norm": 0.49739035964012146, |
|
"learning_rate": 1.4566666666666669e-05, |
|
"loss": 0.0478, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 27.817193991690637, |
|
"grad_norm": 0.5307214260101318, |
|
"learning_rate": 1.4545833333333334e-05, |
|
"loss": 0.048, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 27.91946308724832, |
|
"grad_norm": 0.5277805328369141, |
|
"learning_rate": 1.4525e-05, |
|
"loss": 0.0489, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 28.02173218280601, |
|
"grad_norm": 0.490689218044281, |
|
"learning_rate": 1.4504166666666667e-05, |
|
"loss": 0.0488, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 28.124001278363693, |
|
"grad_norm": 0.5121079683303833, |
|
"learning_rate": 1.4483333333333334e-05, |
|
"loss": 0.0426, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 28.22627037392138, |
|
"grad_norm": 0.5197770595550537, |
|
"learning_rate": 1.4462500000000001e-05, |
|
"loss": 0.0433, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 28.328539469479068, |
|
"grad_norm": 0.5023459196090698, |
|
"learning_rate": 1.4441666666666668e-05, |
|
"loss": 0.0428, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 28.430808565036752, |
|
"grad_norm": 0.47794410586357117, |
|
"learning_rate": 1.4420833333333334e-05, |
|
"loss": 0.0431, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 28.53307766059444, |
|
"grad_norm": 0.4780057370662689, |
|
"learning_rate": 1.4400000000000001e-05, |
|
"loss": 0.0439, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 28.635346756152124, |
|
"grad_norm": 0.5096541047096252, |
|
"learning_rate": 1.4379166666666668e-05, |
|
"loss": 0.0443, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 28.635346756152124, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 1.9908784627914429, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.8556, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 95.897, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 11.987, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 28.737615851709812, |
|
"grad_norm": 0.49939730763435364, |
|
"learning_rate": 1.4358333333333334e-05, |
|
"loss": 0.045, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 28.839884947267496, |
|
"grad_norm": 0.5106296539306641, |
|
"learning_rate": 1.4337500000000001e-05, |
|
"loss": 0.045, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 28.942154042825184, |
|
"grad_norm": 0.4849531054496765, |
|
"learning_rate": 1.4316666666666668e-05, |
|
"loss": 0.0451, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 29.04442313838287, |
|
"grad_norm": 0.46746712923049927, |
|
"learning_rate": 1.4295833333333335e-05, |
|
"loss": 0.0429, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 29.146692233940556, |
|
"grad_norm": 0.4314974546432495, |
|
"learning_rate": 1.4275000000000001e-05, |
|
"loss": 0.0397, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 29.248961329498243, |
|
"grad_norm": 0.4507407546043396, |
|
"learning_rate": 1.4254166666666668e-05, |
|
"loss": 0.0394, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 29.351230425055927, |
|
"grad_norm": 0.4565179646015167, |
|
"learning_rate": 1.4233333333333335e-05, |
|
"loss": 0.0402, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 29.453499520613615, |
|
"grad_norm": 0.4820208251476288, |
|
"learning_rate": 1.42125e-05, |
|
"loss": 0.0403, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 29.5557686161713, |
|
"grad_norm": 0.47718533873558044, |
|
"learning_rate": 1.4191666666666668e-05, |
|
"loss": 0.0404, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 29.658037711728987, |
|
"grad_norm": 0.5102068185806274, |
|
"learning_rate": 1.4170833333333333e-05, |
|
"loss": 0.0418, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 29.658037711728987, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 2.013613700866699, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.8531, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 95.909, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 11.989, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 29.760306807286675, |
|
"grad_norm": 0.49850982427597046, |
|
"learning_rate": 1.4150000000000002e-05, |
|
"loss": 0.041, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 29.86257590284436, |
|
"grad_norm": 0.5028281211853027, |
|
"learning_rate": 1.4129166666666667e-05, |
|
"loss": 0.0419, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 29.964844998402047, |
|
"grad_norm": 0.4753844141960144, |
|
"learning_rate": 1.4108333333333335e-05, |
|
"loss": 0.0421, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 30.06711409395973, |
|
"grad_norm": 0.45387542247772217, |
|
"learning_rate": 1.40875e-05, |
|
"loss": 0.0381, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 30.16938318951742, |
|
"grad_norm": 0.4576801359653473, |
|
"learning_rate": 1.4066666666666669e-05, |
|
"loss": 0.0365, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 30.271652285075103, |
|
"grad_norm": 1.5916332006454468, |
|
"learning_rate": 1.4045833333333334e-05, |
|
"loss": 0.0371, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 30.37392138063279, |
|
"grad_norm": 28.43450164794922, |
|
"learning_rate": 1.4029166666666668e-05, |
|
"loss": 0.0403, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 30.476190476190474, |
|
"grad_norm": 23.308544158935547, |
|
"learning_rate": 1.4010416666666669e-05, |
|
"loss": 0.3001, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 30.578459571748162, |
|
"grad_norm": 0.9334068298339844, |
|
"learning_rate": 1.3989583333333334e-05, |
|
"loss": 0.2114, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 30.68072866730585, |
|
"grad_norm": 0.5433509945869446, |
|
"learning_rate": 1.3968750000000002e-05, |
|
"loss": 0.0448, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 30.68072866730585, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 2.014845609664917, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.8553, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 95.899, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 11.987, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 30.782997762863534, |
|
"grad_norm": 0.5084338188171387, |
|
"learning_rate": 1.3947916666666667e-05, |
|
"loss": 0.042, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 30.88526685842122, |
|
"grad_norm": 0.4836946725845337, |
|
"learning_rate": 1.3927083333333336e-05, |
|
"loss": 0.0396, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 30.987535953978906, |
|
"grad_norm": 0.4835638701915741, |
|
"learning_rate": 1.3906250000000001e-05, |
|
"loss": 0.0397, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 31.089805049536594, |
|
"grad_norm": 0.4484618008136749, |
|
"learning_rate": 1.3885416666666666e-05, |
|
"loss": 0.0359, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 31.192074145094278, |
|
"grad_norm": 0.4576917886734009, |
|
"learning_rate": 1.3864583333333334e-05, |
|
"loss": 0.0352, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 31.294343240651965, |
|
"grad_norm": 0.4618643820285797, |
|
"learning_rate": 1.3843750000000001e-05, |
|
"loss": 0.0351, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 31.396612336209653, |
|
"grad_norm": 0.4532334804534912, |
|
"learning_rate": 1.3822916666666668e-05, |
|
"loss": 0.0353, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 31.498881431767337, |
|
"grad_norm": 0.43523749709129333, |
|
"learning_rate": 1.3802083333333335e-05, |
|
"loss": 0.0357, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 31.601150527325025, |
|
"grad_norm": 0.4621034860610962, |
|
"learning_rate": 1.3781250000000001e-05, |
|
"loss": 0.036, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 31.70341962288271, |
|
"grad_norm": 0.4407444894313812, |
|
"learning_rate": 1.3760416666666668e-05, |
|
"loss": 0.0365, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 31.70341962288271, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 2.0446879863739014, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.8565, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 95.893, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 11.987, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 31.805688718440397, |
|
"grad_norm": 0.46584275364875793, |
|
"learning_rate": 1.3739583333333335e-05, |
|
"loss": 0.0367, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 31.90795781399808, |
|
"grad_norm": 0.4602925181388855, |
|
"learning_rate": 1.3718750000000001e-05, |
|
"loss": 0.0368, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 32.01022690955577, |
|
"grad_norm": 0.4346022605895996, |
|
"learning_rate": 1.3697916666666668e-05, |
|
"loss": 0.0398, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 32.11249600511346, |
|
"grad_norm": 0.42625728249549866, |
|
"learning_rate": 1.3677083333333335e-05, |
|
"loss": 0.0327, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 32.214765100671144, |
|
"grad_norm": 0.4581526219844818, |
|
"learning_rate": 1.3656250000000002e-05, |
|
"loss": 0.0325, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 32.317034196228825, |
|
"grad_norm": 0.41788893938064575, |
|
"learning_rate": 1.3635416666666668e-05, |
|
"loss": 0.0323, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 32.41930329178651, |
|
"grad_norm": 0.39856305718421936, |
|
"learning_rate": 1.3614583333333335e-05, |
|
"loss": 0.0331, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 32.5215723873442, |
|
"grad_norm": 0.4417785704135895, |
|
"learning_rate": 1.3593750000000002e-05, |
|
"loss": 0.0336, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 32.62384148290189, |
|
"grad_norm": 0.45321398973464966, |
|
"learning_rate": 1.3572916666666667e-05, |
|
"loss": 0.0339, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 32.72611057845957, |
|
"grad_norm": 0.43776148557662964, |
|
"learning_rate": 1.3552083333333335e-05, |
|
"loss": 0.0337, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 32.72611057845957, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 2.0593974590301514, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.8377, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 95.98, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 11.997, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 32.828379674017256, |
|
"grad_norm": 0.4577961564064026, |
|
"learning_rate": 1.353125e-05, |
|
"loss": 0.0342, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 32.930648769574944, |
|
"grad_norm": 0.43651047348976135, |
|
"learning_rate": 1.3510416666666669e-05, |
|
"loss": 0.0352, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 33.03291786513263, |
|
"grad_norm": 0.4300122559070587, |
|
"learning_rate": 1.3489583333333334e-05, |
|
"loss": 0.0331, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 33.13518696069032, |
|
"grad_norm": 0.43906641006469727, |
|
"learning_rate": 1.3468750000000002e-05, |
|
"loss": 0.0304, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 33.237456056248, |
|
"grad_norm": 0.40852677822113037, |
|
"learning_rate": 1.3447916666666667e-05, |
|
"loss": 0.0304, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 33.33972515180569, |
|
"grad_norm": 0.42338284850120544, |
|
"learning_rate": 1.3427083333333336e-05, |
|
"loss": 0.0307, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 33.441994247363375, |
|
"grad_norm": 0.4441679120063782, |
|
"learning_rate": 1.340625e-05, |
|
"loss": 0.031, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 33.54426334292106, |
|
"grad_norm": 0.431755393743515, |
|
"learning_rate": 1.3385416666666669e-05, |
|
"loss": 0.031, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 33.646532438478744, |
|
"grad_norm": 0.42783161997795105, |
|
"learning_rate": 1.3364583333333334e-05, |
|
"loss": 0.0316, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 33.74880153403643, |
|
"grad_norm": 0.4471502900123596, |
|
"learning_rate": 1.3343750000000002e-05, |
|
"loss": 0.0318, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 33.74880153403643, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 2.0793018341064453, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.9135, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 95.632, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 11.954, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 33.85107062959412, |
|
"grad_norm": 0.42543718218803406, |
|
"learning_rate": 1.3322916666666668e-05, |
|
"loss": 0.0317, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 33.95333972515181, |
|
"grad_norm": 0.4455837905406952, |
|
"learning_rate": 1.3302083333333336e-05, |
|
"loss": 0.0321, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 34.055608820709494, |
|
"grad_norm": 0.451027512550354, |
|
"learning_rate": 1.3281250000000001e-05, |
|
"loss": 0.0297, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 34.157877916267175, |
|
"grad_norm": 0.43288454413414, |
|
"learning_rate": 1.3260416666666666e-05, |
|
"loss": 0.0285, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 34.26014701182486, |
|
"grad_norm": 0.41506046056747437, |
|
"learning_rate": 1.3239583333333334e-05, |
|
"loss": 0.0285, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 34.36241610738255, |
|
"grad_norm": 0.4198153614997864, |
|
"learning_rate": 1.321875e-05, |
|
"loss": 0.0287, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 34.46468520294024, |
|
"grad_norm": 0.4576322138309479, |
|
"learning_rate": 1.3197916666666668e-05, |
|
"loss": 0.0293, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 34.566954298497926, |
|
"grad_norm": 0.4268178343772888, |
|
"learning_rate": 1.3177083333333333e-05, |
|
"loss": 0.0297, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 34.66922339405561, |
|
"grad_norm": 0.4406622648239136, |
|
"learning_rate": 1.3156250000000001e-05, |
|
"loss": 0.0298, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 34.771492489613294, |
|
"grad_norm": 0.44503384828567505, |
|
"learning_rate": 1.3135416666666666e-05, |
|
"loss": 0.0302, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 34.771492489613294, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 2.1051576137542725, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.7904, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 96.198, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 12.025, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 34.87376158517098, |
|
"grad_norm": 0.4410327672958374, |
|
"learning_rate": 1.3114583333333335e-05, |
|
"loss": 0.0308, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 34.97603068072867, |
|
"grad_norm": 0.4527595639228821, |
|
"learning_rate": 1.309375e-05, |
|
"loss": 0.0307, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 35.07829977628635, |
|
"grad_norm": 0.3983278274536133, |
|
"learning_rate": 1.3072916666666668e-05, |
|
"loss": 0.0275, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 35.18056887184404, |
|
"grad_norm": 0.405274361371994, |
|
"learning_rate": 1.3052083333333335e-05, |
|
"loss": 0.027, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 35.282837967401726, |
|
"grad_norm": 0.4225080907344818, |
|
"learning_rate": 1.3031250000000002e-05, |
|
"loss": 0.0276, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 35.38510706295941, |
|
"grad_norm": 0.3988070487976074, |
|
"learning_rate": 1.3010416666666668e-05, |
|
"loss": 0.0271, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 35.4873761585171, |
|
"grad_norm": 0.613850474357605, |
|
"learning_rate": 1.2989583333333335e-05, |
|
"loss": 0.0271, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 35.58964525407478, |
|
"grad_norm": 0.42930155992507935, |
|
"learning_rate": 1.2968750000000002e-05, |
|
"loss": 0.0269, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 35.69191434963247, |
|
"grad_norm": 0.4209059178829193, |
|
"learning_rate": 1.2947916666666667e-05, |
|
"loss": 0.0281, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 35.79418344519016, |
|
"grad_norm": 0.4427293837070465, |
|
"learning_rate": 1.2927083333333335e-05, |
|
"loss": 0.0282, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 35.79418344519016, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 2.1160192489624023, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.8212, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 96.056, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 12.007, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 35.896452540747845, |
|
"grad_norm": 0.42366185784339905, |
|
"learning_rate": 1.290625e-05, |
|
"loss": 0.0289, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 35.99872163630553, |
|
"grad_norm": 0.4120742380619049, |
|
"learning_rate": 1.2885416666666669e-05, |
|
"loss": 0.0287, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 36.10099073186321, |
|
"grad_norm": 0.38890495896339417, |
|
"learning_rate": 1.2864583333333334e-05, |
|
"loss": 0.0258, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 36.2032598274209, |
|
"grad_norm": 0.4076452851295471, |
|
"learning_rate": 1.2843750000000002e-05, |
|
"loss": 0.0258, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 36.30552892297859, |
|
"grad_norm": 0.3809320032596588, |
|
"learning_rate": 1.2822916666666667e-05, |
|
"loss": 0.0255, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 36.407798018536276, |
|
"grad_norm": 0.39650237560272217, |
|
"learning_rate": 1.2802083333333336e-05, |
|
"loss": 0.026, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 36.51006711409396, |
|
"grad_norm": 0.4009132385253906, |
|
"learning_rate": 1.278125e-05, |
|
"loss": 0.0265, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 36.612336209651644, |
|
"grad_norm": 0.42308393120765686, |
|
"learning_rate": 1.2760416666666669e-05, |
|
"loss": 0.0263, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 36.71460530520933, |
|
"grad_norm": 0.4044691324234009, |
|
"learning_rate": 1.2739583333333334e-05, |
|
"loss": 0.0265, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 36.81687440076702, |
|
"grad_norm": 0.4140937030315399, |
|
"learning_rate": 1.2718750000000003e-05, |
|
"loss": 0.0266, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 36.81687440076702, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 2.13891339302063, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.8236, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 96.045, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 12.006, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 36.91914349632471, |
|
"grad_norm": 0.4160206615924835, |
|
"learning_rate": 1.2697916666666668e-05, |
|
"loss": 0.0269, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 37.02141259188239, |
|
"grad_norm": 0.39423078298568726, |
|
"learning_rate": 1.2677083333333336e-05, |
|
"loss": 0.026, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 37.123681687440076, |
|
"grad_norm": 0.3859294056892395, |
|
"learning_rate": 1.2656250000000001e-05, |
|
"loss": 0.0238, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 37.225950782997764, |
|
"grad_norm": 0.40413331985473633, |
|
"learning_rate": 1.2635416666666666e-05, |
|
"loss": 0.0244, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 37.32821987855545, |
|
"grad_norm": 0.39002159237861633, |
|
"learning_rate": 1.2614583333333334e-05, |
|
"loss": 0.0244, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 37.43048897411313, |
|
"grad_norm": 0.403145432472229, |
|
"learning_rate": 1.259375e-05, |
|
"loss": 0.0245, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 37.53275806967082, |
|
"grad_norm": 0.42878827452659607, |
|
"learning_rate": 1.2572916666666668e-05, |
|
"loss": 0.0249, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 37.63502716522851, |
|
"grad_norm": 0.4047834277153015, |
|
"learning_rate": 1.2552083333333333e-05, |
|
"loss": 0.0246, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 37.737296260786195, |
|
"grad_norm": 0.4242531955242157, |
|
"learning_rate": 1.2531250000000001e-05, |
|
"loss": 0.0254, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 37.83956535634388, |
|
"grad_norm": 0.3883196711540222, |
|
"learning_rate": 1.2510416666666666e-05, |
|
"loss": 0.025, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 37.83956535634388, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 2.135497570037842, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.828, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 96.024, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 12.003, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 37.94183445190156, |
|
"grad_norm": 0.40664049983024597, |
|
"learning_rate": 1.2489583333333335e-05, |
|
"loss": 0.0249, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 38.04410354745925, |
|
"grad_norm": 0.37315633893013, |
|
"learning_rate": 1.246875e-05, |
|
"loss": 0.0242, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 38.14637264301694, |
|
"grad_norm": 0.37339115142822266, |
|
"learning_rate": 1.2447916666666668e-05, |
|
"loss": 0.0229, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 38.24864173857463, |
|
"grad_norm": 0.38532310724258423, |
|
"learning_rate": 1.2427083333333333e-05, |
|
"loss": 0.0227, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 38.350910834132314, |
|
"grad_norm": 0.3730473220348358, |
|
"learning_rate": 1.2406250000000002e-05, |
|
"loss": 0.0227, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 38.453179929689995, |
|
"grad_norm": 0.37635692954063416, |
|
"learning_rate": 1.2385416666666667e-05, |
|
"loss": 0.0234, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 38.55544902524768, |
|
"grad_norm": 0.38997572660446167, |
|
"learning_rate": 1.2364583333333335e-05, |
|
"loss": 0.0233, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 38.65771812080537, |
|
"grad_norm": 0.38781270384788513, |
|
"learning_rate": 1.234375e-05, |
|
"loss": 0.0237, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 38.75998721636306, |
|
"grad_norm": 0.42525115609169006, |
|
"learning_rate": 1.2322916666666667e-05, |
|
"loss": 0.0237, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 38.86225631192074, |
|
"grad_norm": 0.3896453380584717, |
|
"learning_rate": 1.2302083333333335e-05, |
|
"loss": 0.0239, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 38.86225631192074, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 2.166919231414795, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.8189, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 96.067, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 12.008, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 38.964525407478426, |
|
"grad_norm": 0.3976113498210907, |
|
"learning_rate": 1.228125e-05, |
|
"loss": 0.0242, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 39.066794503036114, |
|
"grad_norm": 0.37402892112731934, |
|
"learning_rate": 1.2260416666666669e-05, |
|
"loss": 0.0228, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 39.1690635985938, |
|
"grad_norm": 0.4018329679965973, |
|
"learning_rate": 1.2239583333333334e-05, |
|
"loss": 0.0219, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 39.27133269415149, |
|
"grad_norm": 0.3905806839466095, |
|
"learning_rate": 1.2218750000000002e-05, |
|
"loss": 0.0219, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 39.37360178970917, |
|
"grad_norm": 0.39368128776550293, |
|
"learning_rate": 1.2197916666666667e-05, |
|
"loss": 0.0223, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 39.47587088526686, |
|
"grad_norm": 0.40275242924690247, |
|
"learning_rate": 1.2177083333333336e-05, |
|
"loss": 0.0225, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 39.578139980824545, |
|
"grad_norm": 0.377655953168869, |
|
"learning_rate": 1.215625e-05, |
|
"loss": 0.0223, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 39.68040907638223, |
|
"grad_norm": 0.35910850763320923, |
|
"learning_rate": 1.2135416666666669e-05, |
|
"loss": 0.0222, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 39.782678171939914, |
|
"grad_norm": 0.40253758430480957, |
|
"learning_rate": 1.2114583333333334e-05, |
|
"loss": 0.0229, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 39.8849472674976, |
|
"grad_norm": 0.4119781255722046, |
|
"learning_rate": 1.2093750000000003e-05, |
|
"loss": 0.0231, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 39.8849472674976, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 2.1834068298339844, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.8776, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 95.796, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 11.975, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 39.98721636305529, |
|
"grad_norm": 0.40584734082221985, |
|
"learning_rate": 1.2072916666666668e-05, |
|
"loss": 0.0229, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 40.08948545861298, |
|
"grad_norm": 0.3695628345012665, |
|
"learning_rate": 1.2052083333333336e-05, |
|
"loss": 0.0207, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 40.191754554170664, |
|
"grad_norm": 0.38678476214408875, |
|
"learning_rate": 1.2031250000000001e-05, |
|
"loss": 0.0205, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 40.294023649728345, |
|
"grad_norm": 0.349589079618454, |
|
"learning_rate": 1.2010416666666666e-05, |
|
"loss": 0.0208, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 40.39629274528603, |
|
"grad_norm": 0.3602575659751892, |
|
"learning_rate": 1.1989583333333335e-05, |
|
"loss": 0.0207, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 40.49856184084372, |
|
"grad_norm": 0.34940171241760254, |
|
"learning_rate": 1.196875e-05, |
|
"loss": 0.0208, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 40.60083093640141, |
|
"grad_norm": 0.42674344778060913, |
|
"learning_rate": 1.1947916666666668e-05, |
|
"loss": 0.0214, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 40.703100031959096, |
|
"grad_norm": 0.3709782361984253, |
|
"learning_rate": 1.1927083333333333e-05, |
|
"loss": 0.0215, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 40.80536912751678, |
|
"grad_norm": 0.37002718448638916, |
|
"learning_rate": 1.1906250000000001e-05, |
|
"loss": 0.0215, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 40.907638223074464, |
|
"grad_norm": 0.3808917999267578, |
|
"learning_rate": 1.1885416666666666e-05, |
|
"loss": 0.0216, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 40.907638223074464, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 2.173332452774048, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.8356, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 95.99, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 11.999, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 41.00990731863215, |
|
"grad_norm": 0.5391054153442383, |
|
"learning_rate": 1.1864583333333335e-05, |
|
"loss": 0.0214, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 41.11217641418984, |
|
"grad_norm": 0.36861568689346313, |
|
"learning_rate": 1.184375e-05, |
|
"loss": 0.0203, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 41.21444550974752, |
|
"grad_norm": 0.3627229630947113, |
|
"learning_rate": 1.1822916666666668e-05, |
|
"loss": 0.0203, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 41.31671460530521, |
|
"grad_norm": 0.37450557947158813, |
|
"learning_rate": 1.1802083333333333e-05, |
|
"loss": 0.02, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 41.418983700862896, |
|
"grad_norm": 0.3615454435348511, |
|
"learning_rate": 1.1781250000000002e-05, |
|
"loss": 0.0204, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 41.52125279642058, |
|
"grad_norm": 0.3542068302631378, |
|
"learning_rate": 1.1760416666666667e-05, |
|
"loss": 0.0203, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 41.62352189197827, |
|
"grad_norm": 0.3920552134513855, |
|
"learning_rate": 1.1739583333333335e-05, |
|
"loss": 0.0204, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 41.72579098753595, |
|
"grad_norm": 0.3835306763648987, |
|
"learning_rate": 1.171875e-05, |
|
"loss": 0.0206, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 41.82806008309364, |
|
"grad_norm": 0.35131242871284485, |
|
"learning_rate": 1.1697916666666667e-05, |
|
"loss": 0.0208, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 41.93032917865133, |
|
"grad_norm": 0.3543274402618408, |
|
"learning_rate": 1.1677083333333334e-05, |
|
"loss": 0.0202, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 41.93032917865133, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 2.1988096237182617, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.8148, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 96.086, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 12.011, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 42.032598274209015, |
|
"grad_norm": 0.33442866802215576, |
|
"learning_rate": 1.165625e-05, |
|
"loss": 0.0198, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 42.134867369766695, |
|
"grad_norm": 0.3324146866798401, |
|
"learning_rate": 1.1635416666666667e-05, |
|
"loss": 0.0192, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 42.23713646532438, |
|
"grad_norm": 0.3776349723339081, |
|
"learning_rate": 1.1614583333333334e-05, |
|
"loss": 0.0191, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 42.33940556088207, |
|
"grad_norm": 0.3571579158306122, |
|
"learning_rate": 1.159375e-05, |
|
"loss": 0.0192, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 42.44167465643976, |
|
"grad_norm": 0.37327298521995544, |
|
"learning_rate": 1.1572916666666667e-05, |
|
"loss": 0.0192, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 42.543943751997446, |
|
"grad_norm": 0.3787640631198883, |
|
"learning_rate": 1.1552083333333334e-05, |
|
"loss": 0.0196, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 42.64621284755513, |
|
"grad_norm": 0.36109158396720886, |
|
"learning_rate": 1.153125e-05, |
|
"loss": 0.0193, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 42.748481943112814, |
|
"grad_norm": 0.34873998165130615, |
|
"learning_rate": 1.151041666666667e-05, |
|
"loss": 0.0194, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 42.8507510386705, |
|
"grad_norm": 0.393928587436676, |
|
"learning_rate": 1.1489583333333334e-05, |
|
"loss": 0.0196, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 42.95302013422819, |
|
"grad_norm": 0.3687518537044525, |
|
"learning_rate": 1.1468750000000003e-05, |
|
"loss": 0.0198, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 42.95302013422819, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 2.2081563472747803, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.7395, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 96.434, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 12.054, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 43.05528922978588, |
|
"grad_norm": 0.3610316514968872, |
|
"learning_rate": 1.1447916666666668e-05, |
|
"loss": 0.0187, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 43.15755832534356, |
|
"grad_norm": 0.3438417911529541, |
|
"learning_rate": 1.1427083333333336e-05, |
|
"loss": 0.0181, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 43.259827420901246, |
|
"grad_norm": 0.33607029914855957, |
|
"learning_rate": 1.1406250000000001e-05, |
|
"loss": 0.018, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 43.362096516458934, |
|
"grad_norm": 0.3655332326889038, |
|
"learning_rate": 1.1385416666666666e-05, |
|
"loss": 0.018, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 43.46436561201662, |
|
"grad_norm": 0.3450184762477875, |
|
"learning_rate": 1.1364583333333335e-05, |
|
"loss": 0.0182, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 43.5666347075743, |
|
"grad_norm": 0.34371188282966614, |
|
"learning_rate": 1.134375e-05, |
|
"loss": 0.0185, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 43.66890380313199, |
|
"grad_norm": 0.3620694875717163, |
|
"learning_rate": 1.1322916666666668e-05, |
|
"loss": 0.0188, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 43.77117289868968, |
|
"grad_norm": 0.34563344717025757, |
|
"learning_rate": 1.1302083333333333e-05, |
|
"loss": 0.0185, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 43.873441994247365, |
|
"grad_norm": 0.3630271255970001, |
|
"learning_rate": 1.1281250000000001e-05, |
|
"loss": 0.0189, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 43.97571108980505, |
|
"grad_norm": 0.3588694930076599, |
|
"learning_rate": 1.1260416666666666e-05, |
|
"loss": 0.0192, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 43.97571108980505, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 2.21140456199646, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.7918, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 96.192, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 12.024, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 44.07798018536273, |
|
"grad_norm": 0.3162562847137451, |
|
"learning_rate": 1.1239583333333335e-05, |
|
"loss": 0.0172, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 44.18024928092042, |
|
"grad_norm": 0.366394966840744, |
|
"learning_rate": 1.121875e-05, |
|
"loss": 0.017, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 44.28251837647811, |
|
"grad_norm": 0.35083746910095215, |
|
"learning_rate": 1.1197916666666668e-05, |
|
"loss": 0.0178, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 44.384787472035796, |
|
"grad_norm": 0.35160747170448303, |
|
"learning_rate": 1.1177083333333333e-05, |
|
"loss": 0.0174, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 44.48705656759348, |
|
"grad_norm": 0.35368478298187256, |
|
"learning_rate": 1.1156250000000002e-05, |
|
"loss": 0.0175, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 44.589325663151165, |
|
"grad_norm": 0.3595126271247864, |
|
"learning_rate": 1.1135416666666667e-05, |
|
"loss": 0.0176, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 44.69159475870885, |
|
"grad_norm": 0.34251803159713745, |
|
"learning_rate": 1.1114583333333335e-05, |
|
"loss": 0.0179, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 44.79386385426654, |
|
"grad_norm": 0.3488711416721344, |
|
"learning_rate": 1.109375e-05, |
|
"loss": 0.018, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 44.89613294982423, |
|
"grad_norm": 0.35304006934165955, |
|
"learning_rate": 1.1072916666666667e-05, |
|
"loss": 0.018, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 44.99840204538191, |
|
"grad_norm": 0.3306570053100586, |
|
"learning_rate": 1.1052083333333334e-05, |
|
"loss": 0.018, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 44.99840204538191, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 2.226471424102783, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.7401, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 96.432, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 12.054, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 45.100671140939596, |
|
"grad_norm": 0.3414161801338196, |
|
"learning_rate": 1.103125e-05, |
|
"loss": 0.0163, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 45.202940236497284, |
|
"grad_norm": 0.34503790736198425, |
|
"learning_rate": 1.1010416666666667e-05, |
|
"loss": 0.0167, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 45.30520933205497, |
|
"grad_norm": 0.3578576147556305, |
|
"learning_rate": 1.0989583333333334e-05, |
|
"loss": 0.0165, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 45.40747842761266, |
|
"grad_norm": 0.34795865416526794, |
|
"learning_rate": 1.096875e-05, |
|
"loss": 0.0165, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 45.50974752317034, |
|
"grad_norm": 0.35079431533813477, |
|
"learning_rate": 1.0947916666666667e-05, |
|
"loss": 0.0168, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 45.61201661872803, |
|
"grad_norm": 0.30789047479629517, |
|
"learning_rate": 1.0927083333333334e-05, |
|
"loss": 0.0164, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 45.714285714285715, |
|
"grad_norm": 0.370721697807312, |
|
"learning_rate": 1.090625e-05, |
|
"loss": 0.0174, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 45.8165548098434, |
|
"grad_norm": 0.34695321321487427, |
|
"learning_rate": 1.0885416666666668e-05, |
|
"loss": 0.0171, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 45.918823905401084, |
|
"grad_norm": 0.3586573600769043, |
|
"learning_rate": 1.0864583333333334e-05, |
|
"loss": 0.0171, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 46.02109300095877, |
|
"grad_norm": 0.3352271318435669, |
|
"learning_rate": 1.0843750000000001e-05, |
|
"loss": 0.0172, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 46.02109300095877, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 2.2414653301239014, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.694, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 96.646, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 12.081, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 46.12336209651646, |
|
"grad_norm": 0.3106061518192291, |
|
"learning_rate": 1.0822916666666668e-05, |
|
"loss": 0.0154, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 46.22563119207415, |
|
"grad_norm": 0.3251485824584961, |
|
"learning_rate": 1.0802083333333334e-05, |
|
"loss": 0.0159, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 46.327900287631834, |
|
"grad_norm": 0.3339298367500305, |
|
"learning_rate": 1.0781250000000001e-05, |
|
"loss": 0.0157, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 46.430169383189515, |
|
"grad_norm": 0.33735695481300354, |
|
"learning_rate": 1.0760416666666666e-05, |
|
"loss": 0.0161, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 46.5324384787472, |
|
"grad_norm": 0.3049178719520569, |
|
"learning_rate": 1.0739583333333335e-05, |
|
"loss": 0.0162, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 46.63470757430489, |
|
"grad_norm": 0.3524739742279053, |
|
"learning_rate": 1.071875e-05, |
|
"loss": 0.016, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 46.73697666986258, |
|
"grad_norm": 0.3321375250816345, |
|
"learning_rate": 1.0697916666666668e-05, |
|
"loss": 0.016, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 46.83924576542026, |
|
"grad_norm": 0.3346993029117584, |
|
"learning_rate": 1.0677083333333333e-05, |
|
"loss": 0.0165, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 46.94151486097795, |
|
"grad_norm": 0.3200349807739258, |
|
"learning_rate": 1.0656250000000002e-05, |
|
"loss": 0.0162, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 47.043783956535634, |
|
"grad_norm": 0.3281605541706085, |
|
"learning_rate": 1.0635416666666667e-05, |
|
"loss": 0.0158, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 47.043783956535634, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 2.256087303161621, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.744, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 96.413, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 12.052, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 47.14605305209332, |
|
"grad_norm": 0.35677453875541687, |
|
"learning_rate": 1.0614583333333335e-05, |
|
"loss": 0.0153, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 47.24832214765101, |
|
"grad_norm": 0.2933562099933624, |
|
"learning_rate": 1.059375e-05, |
|
"loss": 0.0151, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 47.35059124320869, |
|
"grad_norm": 0.3412613272666931, |
|
"learning_rate": 1.0572916666666668e-05, |
|
"loss": 0.0154, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 47.45286033876638, |
|
"grad_norm": 0.32954832911491394, |
|
"learning_rate": 1.0552083333333333e-05, |
|
"loss": 0.0154, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 47.555129434324066, |
|
"grad_norm": 0.3282068073749542, |
|
"learning_rate": 1.0531250000000002e-05, |
|
"loss": 0.0152, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 47.65739852988175, |
|
"grad_norm": 0.33588287234306335, |
|
"learning_rate": 1.0510416666666667e-05, |
|
"loss": 0.0156, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 47.75966762543944, |
|
"grad_norm": 0.33600950241088867, |
|
"learning_rate": 1.0489583333333335e-05, |
|
"loss": 0.0151, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 47.86193672099712, |
|
"grad_norm": 0.335001140832901, |
|
"learning_rate": 1.046875e-05, |
|
"loss": 0.0156, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 47.96420581655481, |
|
"grad_norm": 0.33093252778053284, |
|
"learning_rate": 1.0447916666666667e-05, |
|
"loss": 0.0158, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 48.0664749121125, |
|
"grad_norm": 0.32087039947509766, |
|
"learning_rate": 1.0427083333333334e-05, |
|
"loss": 0.0149, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 48.0664749121125, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 2.274885654449463, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.8036, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 96.137, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 12.017, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 48.168744007670185, |
|
"grad_norm": 0.31192752718925476, |
|
"learning_rate": 1.040625e-05, |
|
"loss": 0.0145, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 48.271013103227865, |
|
"grad_norm": 0.2866131067276001, |
|
"learning_rate": 1.0385416666666667e-05, |
|
"loss": 0.0144, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 48.37328219878555, |
|
"grad_norm": 0.34419262409210205, |
|
"learning_rate": 1.0364583333333334e-05, |
|
"loss": 0.0145, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 48.47555129434324, |
|
"grad_norm": 0.33133459091186523, |
|
"learning_rate": 1.034375e-05, |
|
"loss": 0.0143, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 48.57782038990093, |
|
"grad_norm": 0.3273964524269104, |
|
"learning_rate": 1.0322916666666667e-05, |
|
"loss": 0.0146, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 48.680089485458616, |
|
"grad_norm": 0.3400149643421173, |
|
"learning_rate": 1.0302083333333334e-05, |
|
"loss": 0.0149, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 48.7823585810163, |
|
"grad_norm": 0.3198685348033905, |
|
"learning_rate": 1.0281250000000001e-05, |
|
"loss": 0.0148, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 48.884627676573984, |
|
"grad_norm": 0.3375503420829773, |
|
"learning_rate": 1.0260416666666668e-05, |
|
"loss": 0.0153, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 48.98689677213167, |
|
"grad_norm": 0.3139088749885559, |
|
"learning_rate": 1.0239583333333334e-05, |
|
"loss": 0.0152, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 49.08916586768936, |
|
"grad_norm": 0.32369062304496765, |
|
"learning_rate": 1.0218750000000001e-05, |
|
"loss": 0.0143, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 49.08916586768936, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 2.2769861221313477, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.7984, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 96.161, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 12.02, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 49.19143496324704, |
|
"grad_norm": 0.3071221113204956, |
|
"learning_rate": 1.0197916666666668e-05, |
|
"loss": 0.0136, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 49.29370405880473, |
|
"grad_norm": 0.31288015842437744, |
|
"learning_rate": 1.0177083333333335e-05, |
|
"loss": 0.0139, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 49.395973154362416, |
|
"grad_norm": 0.31299805641174316, |
|
"learning_rate": 1.0156250000000001e-05, |
|
"loss": 0.0137, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 49.498242249920104, |
|
"grad_norm": 0.3377828001976013, |
|
"learning_rate": 1.0135416666666666e-05, |
|
"loss": 0.0141, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 49.60051134547779, |
|
"grad_norm": 0.31973740458488464, |
|
"learning_rate": 1.0114583333333335e-05, |
|
"loss": 0.0144, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 49.70278044103547, |
|
"grad_norm": 0.3018786907196045, |
|
"learning_rate": 1.009375e-05, |
|
"loss": 0.0144, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 49.80504953659316, |
|
"grad_norm": 0.3308105766773224, |
|
"learning_rate": 1.0072916666666668e-05, |
|
"loss": 0.0146, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 49.90731863215085, |
|
"grad_norm": 0.3090561032295227, |
|
"learning_rate": 1.0052083333333333e-05, |
|
"loss": 0.0145, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 50.009587727708535, |
|
"grad_norm": 0.33793970942497253, |
|
"learning_rate": 1.0031250000000002e-05, |
|
"loss": 0.0145, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 50.11185682326622, |
|
"grad_norm": 0.3269507884979248, |
|
"learning_rate": 1.0010416666666667e-05, |
|
"loss": 0.0133, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 50.11185682326622, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 2.2938148975372314, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.7798, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 96.247, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 12.031, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 50.2141259188239, |
|
"grad_norm": 0.30052992701530457, |
|
"learning_rate": 9.989583333333333e-06, |
|
"loss": 0.0136, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 50.31639501438159, |
|
"grad_norm": 0.2977409362792969, |
|
"learning_rate": 9.96875e-06, |
|
"loss": 0.0134, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 50.41866410993928, |
|
"grad_norm": 0.3022303283214569, |
|
"learning_rate": 9.947916666666667e-06, |
|
"loss": 0.0136, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 50.520933205496966, |
|
"grad_norm": 0.2963425815105438, |
|
"learning_rate": 9.927083333333334e-06, |
|
"loss": 0.0137, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 50.62320230105465, |
|
"grad_norm": 0.2862411141395569, |
|
"learning_rate": 9.90625e-06, |
|
"loss": 0.0134, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 50.725471396612335, |
|
"grad_norm": 0.32539355754852295, |
|
"learning_rate": 9.885416666666667e-06, |
|
"loss": 0.0136, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 50.82774049217002, |
|
"grad_norm": 0.30780264735221863, |
|
"learning_rate": 9.864583333333334e-06, |
|
"loss": 0.0138, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 50.93000958772771, |
|
"grad_norm": 0.3183571994304657, |
|
"learning_rate": 9.84375e-06, |
|
"loss": 0.0136, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 51.0322786832854, |
|
"grad_norm": 0.28671231865882874, |
|
"learning_rate": 9.822916666666667e-06, |
|
"loss": 0.0136, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 51.13454777884308, |
|
"grad_norm": 0.2912745177745819, |
|
"learning_rate": 9.802083333333334e-06, |
|
"loss": 0.0126, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 51.13454777884308, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 2.312565326690674, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.8065, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 96.124, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 12.015, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 51.236816874400766, |
|
"grad_norm": 0.29751163721084595, |
|
"learning_rate": 9.78125e-06, |
|
"loss": 0.013, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 51.339085969958454, |
|
"grad_norm": 0.30533960461616516, |
|
"learning_rate": 9.760416666666667e-06, |
|
"loss": 0.013, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 51.44135506551614, |
|
"grad_norm": 0.3039548695087433, |
|
"learning_rate": 9.739583333333334e-06, |
|
"loss": 0.0129, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 51.54362416107382, |
|
"grad_norm": 0.295386403799057, |
|
"learning_rate": 9.71875e-06, |
|
"loss": 0.0132, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 51.64589325663151, |
|
"grad_norm": 0.289328932762146, |
|
"learning_rate": 9.697916666666667e-06, |
|
"loss": 0.0133, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 51.7481623521892, |
|
"grad_norm": 0.3070317804813385, |
|
"learning_rate": 9.677083333333334e-06, |
|
"loss": 0.0131, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 51.850431447746885, |
|
"grad_norm": 0.28922444581985474, |
|
"learning_rate": 9.656250000000001e-06, |
|
"loss": 0.0131, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 51.95270054330457, |
|
"grad_norm": 0.3166629672050476, |
|
"learning_rate": 9.635416666666668e-06, |
|
"loss": 0.0132, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 52.054969638862254, |
|
"grad_norm": 0.3101074993610382, |
|
"learning_rate": 9.614583333333334e-06, |
|
"loss": 0.0126, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 52.15723873441994, |
|
"grad_norm": 0.318968266248703, |
|
"learning_rate": 9.593750000000001e-06, |
|
"loss": 0.0127, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 52.15723873441994, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 2.3045780658721924, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.8544, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 95.903, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 11.988, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 52.25950782997763, |
|
"grad_norm": 0.30769291520118713, |
|
"learning_rate": 9.572916666666668e-06, |
|
"loss": 0.0122, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 52.36177692553532, |
|
"grad_norm": 0.28409647941589355, |
|
"learning_rate": 9.552083333333335e-06, |
|
"loss": 0.0124, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 52.464046021093004, |
|
"grad_norm": 0.3307039737701416, |
|
"learning_rate": 9.531250000000001e-06, |
|
"loss": 0.0128, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 52.566315116650685, |
|
"grad_norm": 0.292473703622818, |
|
"learning_rate": 9.510416666666668e-06, |
|
"loss": 0.0125, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 52.66858421220837, |
|
"grad_norm": 0.3042745888233185, |
|
"learning_rate": 9.489583333333335e-06, |
|
"loss": 0.0126, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 52.77085330776606, |
|
"grad_norm": 0.29861128330230713, |
|
"learning_rate": 9.468750000000001e-06, |
|
"loss": 0.0128, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 52.87312240332375, |
|
"grad_norm": 0.284404993057251, |
|
"learning_rate": 9.447916666666668e-06, |
|
"loss": 0.0129, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 52.97539149888143, |
|
"grad_norm": 0.30096396803855896, |
|
"learning_rate": 9.427083333333335e-06, |
|
"loss": 0.0131, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 53.077660594439116, |
|
"grad_norm": 0.3048815131187439, |
|
"learning_rate": 9.406250000000002e-06, |
|
"loss": 0.012, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 53.179929689996804, |
|
"grad_norm": 0.2931344509124756, |
|
"learning_rate": 9.385416666666668e-06, |
|
"loss": 0.0118, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 53.179929689996804, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 2.321913480758667, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.8669, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 95.845, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 11.981, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 53.28219878555449, |
|
"grad_norm": 0.29786059260368347, |
|
"learning_rate": 9.364583333333333e-06, |
|
"loss": 0.0116, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 53.38446788111218, |
|
"grad_norm": 0.2869616150856018, |
|
"learning_rate": 9.34375e-06, |
|
"loss": 0.0119, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 53.48673697666986, |
|
"grad_norm": 0.3017300069332123, |
|
"learning_rate": 9.322916666666667e-06, |
|
"loss": 0.0121, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 53.58900607222755, |
|
"grad_norm": 0.3326238691806793, |
|
"learning_rate": 9.302083333333334e-06, |
|
"loss": 0.0123, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 53.691275167785236, |
|
"grad_norm": 0.30569109320640564, |
|
"learning_rate": 9.28125e-06, |
|
"loss": 0.0125, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 53.79354426334292, |
|
"grad_norm": 0.3061717748641968, |
|
"learning_rate": 9.260416666666667e-06, |
|
"loss": 0.0123, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 53.895813358900604, |
|
"grad_norm": 0.281955748796463, |
|
"learning_rate": 9.239583333333334e-06, |
|
"loss": 0.0121, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 53.99808245445829, |
|
"grad_norm": 0.2920975983142853, |
|
"learning_rate": 9.21875e-06, |
|
"loss": 0.0124, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 54.10035155001598, |
|
"grad_norm": 0.31395605206489563, |
|
"learning_rate": 9.197916666666667e-06, |
|
"loss": 0.0116, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 54.20262064557367, |
|
"grad_norm": 0.3078666627407074, |
|
"learning_rate": 9.177083333333334e-06, |
|
"loss": 0.0117, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 54.20262064557367, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 2.3260598182678223, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.7898, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 96.201, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 12.025, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 54.304889741131355, |
|
"grad_norm": 0.2747853994369507, |
|
"learning_rate": 9.15625e-06, |
|
"loss": 0.0113, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 54.407158836689035, |
|
"grad_norm": 0.3068675398826599, |
|
"learning_rate": 9.135416666666667e-06, |
|
"loss": 0.0115, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 54.50942793224672, |
|
"grad_norm": 0.29503345489501953, |
|
"learning_rate": 9.114583333333334e-06, |
|
"loss": 0.0116, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 54.61169702780441, |
|
"grad_norm": 0.27636656165122986, |
|
"learning_rate": 9.09375e-06, |
|
"loss": 0.0114, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 54.7139661233621, |
|
"grad_norm": 0.313203901052475, |
|
"learning_rate": 9.072916666666668e-06, |
|
"loss": 0.012, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 54.816235218919786, |
|
"grad_norm": 0.30984997749328613, |
|
"learning_rate": 9.052083333333334e-06, |
|
"loss": 0.012, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 54.91850431447747, |
|
"grad_norm": 0.27004456520080566, |
|
"learning_rate": 9.031250000000001e-06, |
|
"loss": 0.0118, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 55.020773410035154, |
|
"grad_norm": 0.278777152299881, |
|
"learning_rate": 9.010416666666668e-06, |
|
"loss": 0.0117, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 55.12304250559284, |
|
"grad_norm": 0.3070172965526581, |
|
"learning_rate": 8.989583333333334e-06, |
|
"loss": 0.011, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 55.22531160115053, |
|
"grad_norm": 0.2756708562374115, |
|
"learning_rate": 8.968750000000001e-06, |
|
"loss": 0.011, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 55.22531160115053, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 2.33424711227417, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.7849, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 96.224, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 12.028, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 55.32758069670821, |
|
"grad_norm": 0.28067439794540405, |
|
"learning_rate": 8.947916666666668e-06, |
|
"loss": 0.011, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 55.4298497922659, |
|
"grad_norm": 0.2702157199382782, |
|
"learning_rate": 8.927083333333335e-06, |
|
"loss": 0.0112, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 55.532118887823586, |
|
"grad_norm": 0.30056026577949524, |
|
"learning_rate": 8.906250000000001e-06, |
|
"loss": 0.0115, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 55.634387983381274, |
|
"grad_norm": 0.29118791222572327, |
|
"learning_rate": 8.885416666666668e-06, |
|
"loss": 0.0113, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 55.73665707893896, |
|
"grad_norm": 0.3045700490474701, |
|
"learning_rate": 8.864583333333335e-06, |
|
"loss": 0.0115, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 55.83892617449664, |
|
"grad_norm": 0.28140169382095337, |
|
"learning_rate": 8.843750000000002e-06, |
|
"loss": 0.0113, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 55.94119527005433, |
|
"grad_norm": 0.2764737010002136, |
|
"learning_rate": 8.822916666666668e-06, |
|
"loss": 0.0118, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 56.04346436561202, |
|
"grad_norm": 0.28735798597335815, |
|
"learning_rate": 8.802083333333335e-06, |
|
"loss": 0.0114, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 56.145733461169705, |
|
"grad_norm": 0.28290146589279175, |
|
"learning_rate": 8.781250000000002e-06, |
|
"loss": 0.0107, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 56.248002556727386, |
|
"grad_norm": 0.2917637526988983, |
|
"learning_rate": 8.760416666666668e-06, |
|
"loss": 0.0108, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 56.248002556727386, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 2.3639163970947266, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.7163, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 96.542, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 12.068, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 56.35027165228507, |
|
"grad_norm": 0.31157800555229187, |
|
"learning_rate": 8.739583333333333e-06, |
|
"loss": 0.0109, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 56.45254074784276, |
|
"grad_norm": 0.26494109630584717, |
|
"learning_rate": 8.71875e-06, |
|
"loss": 0.0108, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 56.55480984340045, |
|
"grad_norm": 0.2839730381965637, |
|
"learning_rate": 8.697916666666667e-06, |
|
"loss": 0.011, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 56.657078938958136, |
|
"grad_norm": 0.27168846130371094, |
|
"learning_rate": 8.677083333333334e-06, |
|
"loss": 0.0111, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 56.75934803451582, |
|
"grad_norm": 0.2758902311325073, |
|
"learning_rate": 8.65625e-06, |
|
"loss": 0.0109, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 56.861617130073505, |
|
"grad_norm": 0.29986515641212463, |
|
"learning_rate": 8.635416666666667e-06, |
|
"loss": 0.0113, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 56.96388622563119, |
|
"grad_norm": 0.2895634174346924, |
|
"learning_rate": 8.614583333333334e-06, |
|
"loss": 0.0112, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 57.06615532118888, |
|
"grad_norm": 0.26848530769348145, |
|
"learning_rate": 8.59375e-06, |
|
"loss": 0.0105, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 57.16842441674657, |
|
"grad_norm": 0.2801918685436249, |
|
"learning_rate": 8.572916666666667e-06, |
|
"loss": 0.0106, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 57.27069351230425, |
|
"grad_norm": 0.2772628366947174, |
|
"learning_rate": 8.552083333333334e-06, |
|
"loss": 0.0105, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 57.27069351230425, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 2.3490209579467773, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.7134, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 96.556, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 12.069, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 57.372962607861936, |
|
"grad_norm": 0.26178139448165894, |
|
"learning_rate": 8.53125e-06, |
|
"loss": 0.0103, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 57.475231703419624, |
|
"grad_norm": 0.27127423882484436, |
|
"learning_rate": 8.510416666666667e-06, |
|
"loss": 0.0105, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 57.57750079897731, |
|
"grad_norm": 0.2728956639766693, |
|
"learning_rate": 8.489583333333334e-06, |
|
"loss": 0.0104, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 57.67976989453499, |
|
"grad_norm": 0.26934826374053955, |
|
"learning_rate": 8.468750000000001e-06, |
|
"loss": 0.0107, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 57.78203899009268, |
|
"grad_norm": 0.30065199732780457, |
|
"learning_rate": 8.447916666666668e-06, |
|
"loss": 0.0108, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 57.88430808565037, |
|
"grad_norm": 0.29894405603408813, |
|
"learning_rate": 8.427083333333334e-06, |
|
"loss": 0.0104, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 57.986577181208055, |
|
"grad_norm": 0.26421451568603516, |
|
"learning_rate": 8.406250000000001e-06, |
|
"loss": 0.0105, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 58.08884627676574, |
|
"grad_norm": 0.27239277958869934, |
|
"learning_rate": 8.385416666666668e-06, |
|
"loss": 0.0099, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 58.191115372323424, |
|
"grad_norm": 0.27293631434440613, |
|
"learning_rate": 8.364583333333334e-06, |
|
"loss": 0.01, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 58.29338446788111, |
|
"grad_norm": 0.251788467168808, |
|
"learning_rate": 8.343750000000001e-06, |
|
"loss": 0.0103, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 58.29338446788111, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 2.360133647918701, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.7658, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 96.312, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 12.039, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 58.3956535634388, |
|
"grad_norm": 0.2742460072040558, |
|
"learning_rate": 8.322916666666668e-06, |
|
"loss": 0.01, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 58.49792265899649, |
|
"grad_norm": 0.2849868834018707, |
|
"learning_rate": 8.302083333333335e-06, |
|
"loss": 0.0101, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 58.60019175455417, |
|
"grad_norm": 0.2823048233985901, |
|
"learning_rate": 8.281250000000001e-06, |
|
"loss": 0.0101, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 58.702460850111855, |
|
"grad_norm": 0.2816413342952728, |
|
"learning_rate": 8.260416666666668e-06, |
|
"loss": 0.0102, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 58.80472994566954, |
|
"grad_norm": 0.3086257576942444, |
|
"learning_rate": 8.239583333333335e-06, |
|
"loss": 0.0102, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 58.90699904122723, |
|
"grad_norm": 0.31176137924194336, |
|
"learning_rate": 8.218750000000002e-06, |
|
"loss": 0.0102, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 59.00926813678492, |
|
"grad_norm": 0.30020809173583984, |
|
"learning_rate": 8.197916666666668e-06, |
|
"loss": 0.0103, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 59.1115372323426, |
|
"grad_norm": 0.27167460322380066, |
|
"learning_rate": 8.177083333333335e-06, |
|
"loss": 0.0096, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 59.213806327900286, |
|
"grad_norm": 0.29027628898620605, |
|
"learning_rate": 8.156250000000002e-06, |
|
"loss": 0.0099, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 59.316075423457974, |
|
"grad_norm": 0.2502238154411316, |
|
"learning_rate": 8.135416666666668e-06, |
|
"loss": 0.0097, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 59.316075423457974, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_loss": 2.3818840980529785, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_runtime": 20.7922, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_samples_per_second": 96.19, |
|
"eval_silk-road/alpaca-data-gpt4-chinese_steps_per_second": 12.024, |
|
"step": 5800 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 9700, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 200, |
|
"total_flos": 7.772644061346693e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|