{ "best_metric": 0.8728268768516966, "best_model_checkpoint": "stool-condition-classification/checkpoint-500", "epoch": 10.0, "eval_steps": 100, "global_step": 1020, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "grad_norm": 7.584359169006348, "learning_rate": 0.00019803921568627454, "loss": 0.6059, "step": 10 }, { "epoch": 0.2, "grad_norm": 3.984663486480713, "learning_rate": 0.000196078431372549, "loss": 0.69, "step": 20 }, { "epoch": 0.29, "grad_norm": 5.750972270965576, "learning_rate": 0.00019411764705882354, "loss": 0.652, "step": 30 }, { "epoch": 0.39, "grad_norm": 5.540163993835449, "learning_rate": 0.00019215686274509807, "loss": 0.567, "step": 40 }, { "epoch": 0.49, "grad_norm": 12.890495300292969, "learning_rate": 0.00019019607843137254, "loss": 0.5255, "step": 50 }, { "epoch": 0.59, "grad_norm": 11.914591789245605, "learning_rate": 0.00018823529411764707, "loss": 0.5776, "step": 60 }, { "epoch": 0.69, "grad_norm": 8.995306015014648, "learning_rate": 0.00018627450980392157, "loss": 0.4868, "step": 70 }, { "epoch": 0.78, "grad_norm": 7.732036590576172, "learning_rate": 0.00018431372549019607, "loss": 0.6819, "step": 80 }, { "epoch": 0.88, "grad_norm": 9.876835823059082, "learning_rate": 0.0001823529411764706, "loss": 0.5809, "step": 90 }, { "epoch": 0.98, "grad_norm": 6.603707790374756, "learning_rate": 0.0001803921568627451, "loss": 0.5076, "step": 100 }, { "epoch": 0.98, "eval_accuracy": 0.7730870712401056, "eval_auroc": 0.8537928335848846, "eval_f1": 0.6906474820143884, "eval_loss": 0.5360854268074036, "eval_model_selection": 0.559226340209067, "eval_npv": 0.7060931899641577, "eval_ppv": 0.96, "eval_runtime": 21.3071, "eval_samples_per_second": 17.787, "eval_sensitivity": 0.5393258426966292, "eval_specificty": 0.9800995024875622, "eval_steps_per_second": 2.253, "step": 100 }, { "epoch": 1.08, "grad_norm": 2.871717929840088, "learning_rate": 0.00017843137254901963, "loss": 0.6088, "step": 110 }, { "epoch": 1.18, "grad_norm": 7.064380645751953, "learning_rate": 0.00017647058823529413, "loss": 0.5249, "step": 120 }, { "epoch": 1.27, "grad_norm": 4.083934307098389, "learning_rate": 0.00017450980392156863, "loss": 0.5367, "step": 130 }, { "epoch": 1.37, "grad_norm": 1.9436535835266113, "learning_rate": 0.00017254901960784316, "loss": 0.4674, "step": 140 }, { "epoch": 1.47, "grad_norm": 2.449876308441162, "learning_rate": 0.00017058823529411766, "loss": 0.5506, "step": 150 }, { "epoch": 1.57, "grad_norm": 2.2175521850585938, "learning_rate": 0.00016862745098039216, "loss": 0.4266, "step": 160 }, { "epoch": 1.67, "grad_norm": 1.2798587083816528, "learning_rate": 0.0001666666666666667, "loss": 0.483, "step": 170 }, { "epoch": 1.76, "grad_norm": 4.6635847091674805, "learning_rate": 0.0001647058823529412, "loss": 0.5022, "step": 180 }, { "epoch": 1.86, "grad_norm": 10.006440162658691, "learning_rate": 0.0001627450980392157, "loss": 0.5173, "step": 190 }, { "epoch": 1.96, "grad_norm": 43.053401947021484, "learning_rate": 0.00016078431372549022, "loss": 0.4086, "step": 200 }, { "epoch": 1.96, "eval_accuracy": 0.783641160949868, "eval_auroc": 0.8727989267147409, "eval_f1": 0.722972972972973, "eval_loss": 0.48566874861717224, "eval_model_selection": 0.655849963664822, "eval_npv": 0.7279693486590039, "eval_ppv": 0.9067796610169492, "eval_runtime": 20.6546, "eval_samples_per_second": 18.349, "eval_sensitivity": 0.601123595505618, "eval_specificty": 0.945273631840796, "eval_steps_per_second": 2.324, "step": 200 }, { "epoch": 2.06, "grad_norm": 6.759799957275391, "learning_rate": 0.0001588235294117647, "loss": 0.4347, "step": 210 }, { "epoch": 2.16, "grad_norm": 1.868139386177063, "learning_rate": 0.00015686274509803922, "loss": 0.4856, "step": 220 }, { "epoch": 2.25, "grad_norm": 6.924500465393066, "learning_rate": 0.00015490196078431375, "loss": 0.6559, "step": 230 }, { "epoch": 2.35, "grad_norm": 3.674975872039795, "learning_rate": 0.00015294117647058822, "loss": 0.4086, "step": 240 }, { "epoch": 2.45, "grad_norm": 3.160930633544922, "learning_rate": 0.00015098039215686275, "loss": 0.4172, "step": 250 }, { "epoch": 2.55, "grad_norm": 5.0961785316467285, "learning_rate": 0.00014901960784313728, "loss": 0.3948, "step": 260 }, { "epoch": 2.65, "grad_norm": 3.1396780014038086, "learning_rate": 0.00014705882352941178, "loss": 0.3086, "step": 270 }, { "epoch": 2.75, "grad_norm": 8.720426559448242, "learning_rate": 0.00014509803921568628, "loss": 0.5149, "step": 280 }, { "epoch": 2.84, "grad_norm": 2.7797322273254395, "learning_rate": 0.00014313725490196078, "loss": 0.6524, "step": 290 }, { "epoch": 2.94, "grad_norm": 8.662543296813965, "learning_rate": 0.0001411764705882353, "loss": 0.5208, "step": 300 }, { "epoch": 2.94, "eval_accuracy": 0.7598944591029023, "eval_auroc": 0.8058862988428643, "eval_f1": 0.7055016181229774, "eval_loss": 0.5108699798583984, "eval_model_selection": 0.7218122868802057, "eval_npv": 0.7217741935483871, "eval_ppv": 0.8320610687022901, "eval_runtime": 20.396, "eval_samples_per_second": 18.582, "eval_sensitivity": 0.6123595505617978, "eval_specificty": 0.8905472636815921, "eval_steps_per_second": 2.353, "step": 300 }, { "epoch": 3.04, "grad_norm": 5.801065444946289, "learning_rate": 0.0001392156862745098, "loss": 0.4872, "step": 310 }, { "epoch": 3.14, "grad_norm": 4.54748010635376, "learning_rate": 0.0001372549019607843, "loss": 0.4804, "step": 320 }, { "epoch": 3.24, "grad_norm": 6.328420639038086, "learning_rate": 0.00013529411764705884, "loss": 0.5523, "step": 330 }, { "epoch": 3.33, "grad_norm": 2.860734462738037, "learning_rate": 0.00013333333333333334, "loss": 0.4061, "step": 340 }, { "epoch": 3.43, "grad_norm": 5.824751377105713, "learning_rate": 0.00013137254901960784, "loss": 0.4895, "step": 350 }, { "epoch": 3.53, "grad_norm": 1.802435278892517, "learning_rate": 0.00012941176470588237, "loss": 0.4011, "step": 360 }, { "epoch": 3.63, "grad_norm": 2.1423304080963135, "learning_rate": 0.00012745098039215687, "loss": 0.3881, "step": 370 }, { "epoch": 3.73, "grad_norm": 2.6464078426361084, "learning_rate": 0.00012549019607843137, "loss": 0.4371, "step": 380 }, { "epoch": 3.82, "grad_norm": 1.5666183233261108, "learning_rate": 0.0001235294117647059, "loss": 0.4256, "step": 390 }, { "epoch": 3.92, "grad_norm": 7.859738349914551, "learning_rate": 0.00012156862745098039, "loss": 0.474, "step": 400 }, { "epoch": 3.92, "eval_accuracy": 0.7994722955145118, "eval_auroc": 0.8600536642629548, "eval_f1": 0.7432432432432432, "eval_loss": 0.5211557149887085, "eval_model_selection": 0.6577785231147633, "eval_npv": 0.7394636015325671, "eval_ppv": 0.9322033898305084, "eval_runtime": 20.1111, "eval_samples_per_second": 18.845, "eval_sensitivity": 0.6179775280898876, "eval_specificty": 0.9601990049751243, "eval_steps_per_second": 2.387, "step": 400 }, { "epoch": 4.02, "grad_norm": 5.231843948364258, "learning_rate": 0.0001196078431372549, "loss": 0.3969, "step": 410 }, { "epoch": 4.12, "grad_norm": 1.5469104051589966, "learning_rate": 0.00011764705882352942, "loss": 0.4781, "step": 420 }, { "epoch": 4.22, "grad_norm": 5.2077765464782715, "learning_rate": 0.00011568627450980394, "loss": 0.4261, "step": 430 }, { "epoch": 4.31, "grad_norm": 0.9990509748458862, "learning_rate": 0.00011372549019607843, "loss": 0.3773, "step": 440 }, { "epoch": 4.41, "grad_norm": 5.277149677276611, "learning_rate": 0.00011176470588235294, "loss": 0.4551, "step": 450 }, { "epoch": 4.51, "grad_norm": 1.3224270343780518, "learning_rate": 0.00010980392156862746, "loss": 0.3533, "step": 460 }, { "epoch": 4.61, "grad_norm": 2.360970973968506, "learning_rate": 0.00010784313725490196, "loss": 0.4317, "step": 470 }, { "epoch": 4.71, "grad_norm": 2.176283836364746, "learning_rate": 0.00010588235294117647, "loss": 0.4718, "step": 480 }, { "epoch": 4.8, "grad_norm": 1.7329970598220825, "learning_rate": 0.00010392156862745099, "loss": 0.426, "step": 490 }, { "epoch": 4.9, "grad_norm": 7.07577657699585, "learning_rate": 0.00010196078431372549, "loss": 0.4285, "step": 500 }, { "epoch": 4.9, "eval_accuracy": 0.7757255936675461, "eval_auroc": 0.8728268768516966, "eval_f1": 0.7578347578347578, "eval_loss": 0.4510786235332489, "eval_model_selection": 0.9461959863603332, "eval_npv": 0.7815533980582524, "eval_ppv": 0.7687861271676301, "eval_runtime": 20.6194, "eval_samples_per_second": 18.381, "eval_sensitivity": 0.7471910112359551, "eval_specificty": 0.8009950248756219, "eval_steps_per_second": 2.328, "step": 500 }, { "epoch": 5.0, "grad_norm": 6.841150760650635, "learning_rate": 0.0001, "loss": 0.3839, "step": 510 }, { "epoch": 5.1, "grad_norm": 2.587709903717041, "learning_rate": 9.80392156862745e-05, "loss": 0.396, "step": 520 }, { "epoch": 5.2, "grad_norm": 2.148972272872925, "learning_rate": 9.607843137254903e-05, "loss": 0.3475, "step": 530 }, { "epoch": 5.29, "grad_norm": 6.0918803215026855, "learning_rate": 9.411764705882353e-05, "loss": 0.3433, "step": 540 }, { "epoch": 5.39, "grad_norm": 1.9835032224655151, "learning_rate": 9.215686274509804e-05, "loss": 0.3289, "step": 550 }, { "epoch": 5.49, "grad_norm": 1.912084698677063, "learning_rate": 9.019607843137255e-05, "loss": 0.3887, "step": 560 }, { "epoch": 5.59, "grad_norm": 2.5347495079040527, "learning_rate": 8.823529411764706e-05, "loss": 0.4516, "step": 570 }, { "epoch": 5.69, "grad_norm": 3.123342990875244, "learning_rate": 8.627450980392158e-05, "loss": 0.3973, "step": 580 }, { "epoch": 5.78, "grad_norm": 6.096330165863037, "learning_rate": 8.431372549019608e-05, "loss": 0.3895, "step": 590 }, { "epoch": 5.88, "grad_norm": 1.6143405437469482, "learning_rate": 8.23529411764706e-05, "loss": 0.3506, "step": 600 }, { "epoch": 5.88, "eval_accuracy": 0.8047493403693932, "eval_auroc": 0.8691095086365922, "eval_f1": 0.7658227848101267, "eval_loss": 0.47164368629455566, "eval_model_selection": 0.764352395326737, "eval_npv": 0.7634854771784232, "eval_ppv": 0.8768115942028986, "eval_runtime": 20.5548, "eval_samples_per_second": 18.438, "eval_sensitivity": 0.6797752808988764, "eval_specificty": 0.9154228855721394, "eval_steps_per_second": 2.335, "step": 600 }, { "epoch": 5.98, "grad_norm": 2.3068525791168213, "learning_rate": 8.039215686274511e-05, "loss": 0.3566, "step": 610 }, { "epoch": 6.08, "grad_norm": 3.027449369430542, "learning_rate": 7.843137254901961e-05, "loss": 0.3662, "step": 620 }, { "epoch": 6.18, "grad_norm": 3.0861737728118896, "learning_rate": 7.647058823529411e-05, "loss": 0.4272, "step": 630 }, { "epoch": 6.27, "grad_norm": 2.3498377799987793, "learning_rate": 7.450980392156864e-05, "loss": 0.2825, "step": 640 }, { "epoch": 6.37, "grad_norm": 2.3852789402008057, "learning_rate": 7.254901960784314e-05, "loss": 0.361, "step": 650 }, { "epoch": 6.47, "grad_norm": 2.655799627304077, "learning_rate": 7.058823529411765e-05, "loss": 0.291, "step": 660 }, { "epoch": 6.57, "grad_norm": 4.058769226074219, "learning_rate": 6.862745098039216e-05, "loss": 0.3045, "step": 670 }, { "epoch": 6.67, "grad_norm": 4.20621919631958, "learning_rate": 6.666666666666667e-05, "loss": 0.3931, "step": 680 }, { "epoch": 6.76, "grad_norm": 4.316690444946289, "learning_rate": 6.470588235294118e-05, "loss": 0.2858, "step": 690 }, { "epoch": 6.86, "grad_norm": 3.3390917778015137, "learning_rate": 6.274509803921569e-05, "loss": 0.4239, "step": 700 }, { "epoch": 6.86, "eval_accuracy": 0.8100263852242744, "eval_auroc": 0.8517245234501649, "eval_f1": 0.7677419354838709, "eval_loss": 0.504310131072998, "eval_model_selection": 0.7332159427581195, "eval_npv": 0.7611336032388664, "eval_ppv": 0.9015151515151515, "eval_runtime": 20.9343, "eval_samples_per_second": 18.104, "eval_sensitivity": 0.6685393258426966, "eval_specificty": 0.9353233830845771, "eval_steps_per_second": 2.293, "step": 700 }, { "epoch": 6.96, "grad_norm": 3.034562587738037, "learning_rate": 6.078431372549019e-05, "loss": 0.3033, "step": 710 }, { "epoch": 7.06, "grad_norm": 0.7752771973609924, "learning_rate": 5.882352941176471e-05, "loss": 0.2963, "step": 720 }, { "epoch": 7.16, "grad_norm": 1.2533752918243408, "learning_rate": 5.6862745098039215e-05, "loss": 0.2974, "step": 730 }, { "epoch": 7.25, "grad_norm": 1.748043179512024, "learning_rate": 5.490196078431373e-05, "loss": 0.3549, "step": 740 }, { "epoch": 7.35, "grad_norm": 2.4874391555786133, "learning_rate": 5.294117647058824e-05, "loss": 0.413, "step": 750 }, { "epoch": 7.45, "grad_norm": 3.017951726913452, "learning_rate": 5.0980392156862745e-05, "loss": 0.2809, "step": 760 }, { "epoch": 7.55, "grad_norm": 5.023625373840332, "learning_rate": 4.901960784313725e-05, "loss": 0.3603, "step": 770 }, { "epoch": 7.65, "grad_norm": 2.754984140396118, "learning_rate": 4.705882352941177e-05, "loss": 0.2837, "step": 780 }, { "epoch": 7.75, "grad_norm": 1.0377743244171143, "learning_rate": 4.5098039215686275e-05, "loss": 0.3169, "step": 790 }, { "epoch": 7.84, "grad_norm": 4.163206100463867, "learning_rate": 4.313725490196079e-05, "loss": 0.2447, "step": 800 }, { "epoch": 7.84, "eval_accuracy": 0.8073878627968337, "eval_auroc": 0.8591872100173291, "eval_f1": 0.7711598746081505, "eval_loss": 0.580413818359375, "eval_model_selection": 0.7805634747610264, "eval_npv": 0.7689075630252101, "eval_ppv": 0.8723404255319149, "eval_runtime": 20.1671, "eval_samples_per_second": 18.793, "eval_sensitivity": 0.6910112359550562, "eval_specificty": 0.9104477611940298, "eval_steps_per_second": 2.38, "step": 800 }, { "epoch": 7.94, "grad_norm": 3.30311918258667, "learning_rate": 4.11764705882353e-05, "loss": 0.3555, "step": 810 }, { "epoch": 8.04, "grad_norm": 4.967993259429932, "learning_rate": 3.9215686274509805e-05, "loss": 0.2679, "step": 820 }, { "epoch": 8.14, "grad_norm": 4.838258266448975, "learning_rate": 3.725490196078432e-05, "loss": 0.2416, "step": 830 }, { "epoch": 8.24, "grad_norm": 4.202385425567627, "learning_rate": 3.529411764705883e-05, "loss": 0.1758, "step": 840 }, { "epoch": 8.33, "grad_norm": 3.0037460327148438, "learning_rate": 3.3333333333333335e-05, "loss": 0.3192, "step": 850 }, { "epoch": 8.43, "grad_norm": 4.337649822235107, "learning_rate": 3.137254901960784e-05, "loss": 0.2669, "step": 860 }, { "epoch": 8.53, "grad_norm": 6.011264801025391, "learning_rate": 2.9411764705882354e-05, "loss": 0.228, "step": 870 }, { "epoch": 8.63, "grad_norm": 0.5027784109115601, "learning_rate": 2.7450980392156865e-05, "loss": 0.3023, "step": 880 }, { "epoch": 8.73, "grad_norm": 1.8822238445281982, "learning_rate": 2.5490196078431373e-05, "loss": 0.3109, "step": 890 }, { "epoch": 8.82, "grad_norm": 3.6861536502838135, "learning_rate": 2.3529411764705884e-05, "loss": 0.1739, "step": 900 }, { "epoch": 8.82, "eval_accuracy": 0.8073878627968337, "eval_auroc": 0.856224495500028, "eval_f1": 0.7767584097859327, "eval_loss": 0.6224877238273621, "eval_model_selection": 0.8229358823858236, "eval_npv": 0.7782608695652173, "eval_ppv": 0.8523489932885906, "eval_runtime": 20.4294, "eval_samples_per_second": 18.552, "eval_sensitivity": 0.7134831460674157, "eval_specificty": 0.8905472636815921, "eval_steps_per_second": 2.35, "step": 900 }, { "epoch": 8.92, "grad_norm": 5.0469770431518555, "learning_rate": 2.1568627450980395e-05, "loss": 0.4354, "step": 910 }, { "epoch": 9.02, "grad_norm": 4.443832874298096, "learning_rate": 1.9607843137254903e-05, "loss": 0.334, "step": 920 }, { "epoch": 9.12, "grad_norm": 2.0471107959747314, "learning_rate": 1.7647058823529414e-05, "loss": 0.2051, "step": 930 }, { "epoch": 9.22, "grad_norm": 3.442962169647217, "learning_rate": 1.568627450980392e-05, "loss": 0.2112, "step": 940 }, { "epoch": 9.31, "grad_norm": 2.31436824798584, "learning_rate": 1.3725490196078432e-05, "loss": 0.3268, "step": 950 }, { "epoch": 9.41, "grad_norm": 2.2342071533203125, "learning_rate": 1.1764705882352942e-05, "loss": 0.3107, "step": 960 }, { "epoch": 9.51, "grad_norm": 3.2926645278930664, "learning_rate": 9.803921568627451e-06, "loss": 0.2284, "step": 970 }, { "epoch": 9.61, "grad_norm": 2.639843702316284, "learning_rate": 7.84313725490196e-06, "loss": 0.2269, "step": 980 }, { "epoch": 9.71, "grad_norm": 2.7015938758850098, "learning_rate": 5.882352941176471e-06, "loss": 0.1987, "step": 990 }, { "epoch": 9.8, "grad_norm": 8.358205795288086, "learning_rate": 3.92156862745098e-06, "loss": 0.2888, "step": 1000 }, { "epoch": 9.8, "eval_accuracy": 0.8047493403693932, "eval_auroc": 0.856979149197831, "eval_f1": 0.783625730994152, "eval_loss": 0.5807113647460938, "eval_model_selection": 0.9020627201073285, "eval_npv": 0.7953488372093023, "eval_ppv": 0.8170731707317073, "eval_runtime": 20.5189, "eval_samples_per_second": 18.471, "eval_sensitivity": 0.7528089887640449, "eval_specificty": 0.8507462686567164, "eval_steps_per_second": 2.339, "step": 1000 }, { "epoch": 9.9, "grad_norm": 3.527721881866455, "learning_rate": 1.96078431372549e-06, "loss": 0.3236, "step": 1010 }, { "epoch": 10.0, "grad_norm": 6.274048805236816, "learning_rate": 0.0, "loss": 0.26, "step": 1020 }, { "epoch": 10.0, "step": 1020, "total_flos": 1.25924483123712e+18, "train_loss": 0.40091259911948557, "train_runtime": 1906.0636, "train_samples_per_second": 8.525, "train_steps_per_second": 0.535 } ], "logging_steps": 10, "max_steps": 1020, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 100, "total_flos": 1.25924483123712e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }