|
{ |
|
"best_metric": 0.8728268768516966, |
|
"best_model_checkpoint": "stool-condition-classification/checkpoint-500", |
|
"epoch": 10.0, |
|
"eval_steps": 100, |
|
"global_step": 1020, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 7.584359169006348, |
|
"learning_rate": 0.00019803921568627454, |
|
"loss": 0.6059, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 3.984663486480713, |
|
"learning_rate": 0.000196078431372549, |
|
"loss": 0.69, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 5.750972270965576, |
|
"learning_rate": 0.00019411764705882354, |
|
"loss": 0.652, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 5.540163993835449, |
|
"learning_rate": 0.00019215686274509807, |
|
"loss": 0.567, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 12.890495300292969, |
|
"learning_rate": 0.00019019607843137254, |
|
"loss": 0.5255, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 11.914591789245605, |
|
"learning_rate": 0.00018823529411764707, |
|
"loss": 0.5776, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 8.995306015014648, |
|
"learning_rate": 0.00018627450980392157, |
|
"loss": 0.4868, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 7.732036590576172, |
|
"learning_rate": 0.00018431372549019607, |
|
"loss": 0.6819, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 9.876835823059082, |
|
"learning_rate": 0.0001823529411764706, |
|
"loss": 0.5809, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 6.603707790374756, |
|
"learning_rate": 0.0001803921568627451, |
|
"loss": 0.5076, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.7730870712401056, |
|
"eval_auroc": 0.8537928335848846, |
|
"eval_f1": 0.6906474820143884, |
|
"eval_loss": 0.5360854268074036, |
|
"eval_model_selection": 0.559226340209067, |
|
"eval_npv": 0.7060931899641577, |
|
"eval_ppv": 0.96, |
|
"eval_runtime": 21.3071, |
|
"eval_samples_per_second": 17.787, |
|
"eval_sensitivity": 0.5393258426966292, |
|
"eval_specificty": 0.9800995024875622, |
|
"eval_steps_per_second": 2.253, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 2.871717929840088, |
|
"learning_rate": 0.00017843137254901963, |
|
"loss": 0.6088, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 7.064380645751953, |
|
"learning_rate": 0.00017647058823529413, |
|
"loss": 0.5249, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 4.083934307098389, |
|
"learning_rate": 0.00017450980392156863, |
|
"loss": 0.5367, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 1.9436535835266113, |
|
"learning_rate": 0.00017254901960784316, |
|
"loss": 0.4674, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 2.449876308441162, |
|
"learning_rate": 0.00017058823529411766, |
|
"loss": 0.5506, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 2.2175521850585938, |
|
"learning_rate": 0.00016862745098039216, |
|
"loss": 0.4266, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 1.2798587083816528, |
|
"learning_rate": 0.0001666666666666667, |
|
"loss": 0.483, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 4.6635847091674805, |
|
"learning_rate": 0.0001647058823529412, |
|
"loss": 0.5022, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 10.006440162658691, |
|
"learning_rate": 0.0001627450980392157, |
|
"loss": 0.5173, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 43.053401947021484, |
|
"learning_rate": 0.00016078431372549022, |
|
"loss": 0.4086, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_accuracy": 0.783641160949868, |
|
"eval_auroc": 0.8727989267147409, |
|
"eval_f1": 0.722972972972973, |
|
"eval_loss": 0.48566874861717224, |
|
"eval_model_selection": 0.655849963664822, |
|
"eval_npv": 0.7279693486590039, |
|
"eval_ppv": 0.9067796610169492, |
|
"eval_runtime": 20.6546, |
|
"eval_samples_per_second": 18.349, |
|
"eval_sensitivity": 0.601123595505618, |
|
"eval_specificty": 0.945273631840796, |
|
"eval_steps_per_second": 2.324, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 6.759799957275391, |
|
"learning_rate": 0.0001588235294117647, |
|
"loss": 0.4347, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 1.868139386177063, |
|
"learning_rate": 0.00015686274509803922, |
|
"loss": 0.4856, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 6.924500465393066, |
|
"learning_rate": 0.00015490196078431375, |
|
"loss": 0.6559, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 3.674975872039795, |
|
"learning_rate": 0.00015294117647058822, |
|
"loss": 0.4086, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 3.160930633544922, |
|
"learning_rate": 0.00015098039215686275, |
|
"loss": 0.4172, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 5.0961785316467285, |
|
"learning_rate": 0.00014901960784313728, |
|
"loss": 0.3948, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 3.1396780014038086, |
|
"learning_rate": 0.00014705882352941178, |
|
"loss": 0.3086, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 8.720426559448242, |
|
"learning_rate": 0.00014509803921568628, |
|
"loss": 0.5149, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"grad_norm": 2.7797322273254395, |
|
"learning_rate": 0.00014313725490196078, |
|
"loss": 0.6524, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"grad_norm": 8.662543296813965, |
|
"learning_rate": 0.0001411764705882353, |
|
"loss": 0.5208, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"eval_accuracy": 0.7598944591029023, |
|
"eval_auroc": 0.8058862988428643, |
|
"eval_f1": 0.7055016181229774, |
|
"eval_loss": 0.5108699798583984, |
|
"eval_model_selection": 0.7218122868802057, |
|
"eval_npv": 0.7217741935483871, |
|
"eval_ppv": 0.8320610687022901, |
|
"eval_runtime": 20.396, |
|
"eval_samples_per_second": 18.582, |
|
"eval_sensitivity": 0.6123595505617978, |
|
"eval_specificty": 0.8905472636815921, |
|
"eval_steps_per_second": 2.353, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 5.801065444946289, |
|
"learning_rate": 0.0001392156862745098, |
|
"loss": 0.4872, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"grad_norm": 4.54748010635376, |
|
"learning_rate": 0.0001372549019607843, |
|
"loss": 0.4804, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"grad_norm": 6.328420639038086, |
|
"learning_rate": 0.00013529411764705884, |
|
"loss": 0.5523, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"grad_norm": 2.860734462738037, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 0.4061, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"grad_norm": 5.824751377105713, |
|
"learning_rate": 0.00013137254901960784, |
|
"loss": 0.4895, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"grad_norm": 1.802435278892517, |
|
"learning_rate": 0.00012941176470588237, |
|
"loss": 0.4011, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"grad_norm": 2.1423304080963135, |
|
"learning_rate": 0.00012745098039215687, |
|
"loss": 0.3881, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"grad_norm": 2.6464078426361084, |
|
"learning_rate": 0.00012549019607843137, |
|
"loss": 0.4371, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"grad_norm": 1.5666183233261108, |
|
"learning_rate": 0.0001235294117647059, |
|
"loss": 0.4256, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"grad_norm": 7.859738349914551, |
|
"learning_rate": 0.00012156862745098039, |
|
"loss": 0.474, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"eval_accuracy": 0.7994722955145118, |
|
"eval_auroc": 0.8600536642629548, |
|
"eval_f1": 0.7432432432432432, |
|
"eval_loss": 0.5211557149887085, |
|
"eval_model_selection": 0.6577785231147633, |
|
"eval_npv": 0.7394636015325671, |
|
"eval_ppv": 0.9322033898305084, |
|
"eval_runtime": 20.1111, |
|
"eval_samples_per_second": 18.845, |
|
"eval_sensitivity": 0.6179775280898876, |
|
"eval_specificty": 0.9601990049751243, |
|
"eval_steps_per_second": 2.387, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"grad_norm": 5.231843948364258, |
|
"learning_rate": 0.0001196078431372549, |
|
"loss": 0.3969, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"grad_norm": 1.5469104051589966, |
|
"learning_rate": 0.00011764705882352942, |
|
"loss": 0.4781, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"grad_norm": 5.2077765464782715, |
|
"learning_rate": 0.00011568627450980394, |
|
"loss": 0.4261, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"grad_norm": 0.9990509748458862, |
|
"learning_rate": 0.00011372549019607843, |
|
"loss": 0.3773, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"grad_norm": 5.277149677276611, |
|
"learning_rate": 0.00011176470588235294, |
|
"loss": 0.4551, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"grad_norm": 1.3224270343780518, |
|
"learning_rate": 0.00010980392156862746, |
|
"loss": 0.3533, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"grad_norm": 2.360970973968506, |
|
"learning_rate": 0.00010784313725490196, |
|
"loss": 0.4317, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"grad_norm": 2.176283836364746, |
|
"learning_rate": 0.00010588235294117647, |
|
"loss": 0.4718, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"grad_norm": 1.7329970598220825, |
|
"learning_rate": 0.00010392156862745099, |
|
"loss": 0.426, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"grad_norm": 7.07577657699585, |
|
"learning_rate": 0.00010196078431372549, |
|
"loss": 0.4285, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"eval_accuracy": 0.7757255936675461, |
|
"eval_auroc": 0.8728268768516966, |
|
"eval_f1": 0.7578347578347578, |
|
"eval_loss": 0.4510786235332489, |
|
"eval_model_selection": 0.9461959863603332, |
|
"eval_npv": 0.7815533980582524, |
|
"eval_ppv": 0.7687861271676301, |
|
"eval_runtime": 20.6194, |
|
"eval_samples_per_second": 18.381, |
|
"eval_sensitivity": 0.7471910112359551, |
|
"eval_specificty": 0.8009950248756219, |
|
"eval_steps_per_second": 2.328, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 6.841150760650635, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3839, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"grad_norm": 2.587709903717041, |
|
"learning_rate": 9.80392156862745e-05, |
|
"loss": 0.396, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"grad_norm": 2.148972272872925, |
|
"learning_rate": 9.607843137254903e-05, |
|
"loss": 0.3475, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"grad_norm": 6.0918803215026855, |
|
"learning_rate": 9.411764705882353e-05, |
|
"loss": 0.3433, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"grad_norm": 1.9835032224655151, |
|
"learning_rate": 9.215686274509804e-05, |
|
"loss": 0.3289, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"grad_norm": 1.912084698677063, |
|
"learning_rate": 9.019607843137255e-05, |
|
"loss": 0.3887, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"grad_norm": 2.5347495079040527, |
|
"learning_rate": 8.823529411764706e-05, |
|
"loss": 0.4516, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"grad_norm": 3.123342990875244, |
|
"learning_rate": 8.627450980392158e-05, |
|
"loss": 0.3973, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"grad_norm": 6.096330165863037, |
|
"learning_rate": 8.431372549019608e-05, |
|
"loss": 0.3895, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"grad_norm": 1.6143405437469482, |
|
"learning_rate": 8.23529411764706e-05, |
|
"loss": 0.3506, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"eval_accuracy": 0.8047493403693932, |
|
"eval_auroc": 0.8691095086365922, |
|
"eval_f1": 0.7658227848101267, |
|
"eval_loss": 0.47164368629455566, |
|
"eval_model_selection": 0.764352395326737, |
|
"eval_npv": 0.7634854771784232, |
|
"eval_ppv": 0.8768115942028986, |
|
"eval_runtime": 20.5548, |
|
"eval_samples_per_second": 18.438, |
|
"eval_sensitivity": 0.6797752808988764, |
|
"eval_specificty": 0.9154228855721394, |
|
"eval_steps_per_second": 2.335, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"grad_norm": 2.3068525791168213, |
|
"learning_rate": 8.039215686274511e-05, |
|
"loss": 0.3566, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"grad_norm": 3.027449369430542, |
|
"learning_rate": 7.843137254901961e-05, |
|
"loss": 0.3662, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"grad_norm": 3.0861737728118896, |
|
"learning_rate": 7.647058823529411e-05, |
|
"loss": 0.4272, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"grad_norm": 2.3498377799987793, |
|
"learning_rate": 7.450980392156864e-05, |
|
"loss": 0.2825, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"grad_norm": 2.3852789402008057, |
|
"learning_rate": 7.254901960784314e-05, |
|
"loss": 0.361, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"grad_norm": 2.655799627304077, |
|
"learning_rate": 7.058823529411765e-05, |
|
"loss": 0.291, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"grad_norm": 4.058769226074219, |
|
"learning_rate": 6.862745098039216e-05, |
|
"loss": 0.3045, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"grad_norm": 4.20621919631958, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.3931, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"grad_norm": 4.316690444946289, |
|
"learning_rate": 6.470588235294118e-05, |
|
"loss": 0.2858, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"grad_norm": 3.3390917778015137, |
|
"learning_rate": 6.274509803921569e-05, |
|
"loss": 0.4239, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"eval_accuracy": 0.8100263852242744, |
|
"eval_auroc": 0.8517245234501649, |
|
"eval_f1": 0.7677419354838709, |
|
"eval_loss": 0.504310131072998, |
|
"eval_model_selection": 0.7332159427581195, |
|
"eval_npv": 0.7611336032388664, |
|
"eval_ppv": 0.9015151515151515, |
|
"eval_runtime": 20.9343, |
|
"eval_samples_per_second": 18.104, |
|
"eval_sensitivity": 0.6685393258426966, |
|
"eval_specificty": 0.9353233830845771, |
|
"eval_steps_per_second": 2.293, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"grad_norm": 3.034562587738037, |
|
"learning_rate": 6.078431372549019e-05, |
|
"loss": 0.3033, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"grad_norm": 0.7752771973609924, |
|
"learning_rate": 5.882352941176471e-05, |
|
"loss": 0.2963, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"grad_norm": 1.2533752918243408, |
|
"learning_rate": 5.6862745098039215e-05, |
|
"loss": 0.2974, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"grad_norm": 1.748043179512024, |
|
"learning_rate": 5.490196078431373e-05, |
|
"loss": 0.3549, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"grad_norm": 2.4874391555786133, |
|
"learning_rate": 5.294117647058824e-05, |
|
"loss": 0.413, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"grad_norm": 3.017951726913452, |
|
"learning_rate": 5.0980392156862745e-05, |
|
"loss": 0.2809, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"grad_norm": 5.023625373840332, |
|
"learning_rate": 4.901960784313725e-05, |
|
"loss": 0.3603, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"grad_norm": 2.754984140396118, |
|
"learning_rate": 4.705882352941177e-05, |
|
"loss": 0.2837, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"grad_norm": 1.0377743244171143, |
|
"learning_rate": 4.5098039215686275e-05, |
|
"loss": 0.3169, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"grad_norm": 4.163206100463867, |
|
"learning_rate": 4.313725490196079e-05, |
|
"loss": 0.2447, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"eval_accuracy": 0.8073878627968337, |
|
"eval_auroc": 0.8591872100173291, |
|
"eval_f1": 0.7711598746081505, |
|
"eval_loss": 0.580413818359375, |
|
"eval_model_selection": 0.7805634747610264, |
|
"eval_npv": 0.7689075630252101, |
|
"eval_ppv": 0.8723404255319149, |
|
"eval_runtime": 20.1671, |
|
"eval_samples_per_second": 18.793, |
|
"eval_sensitivity": 0.6910112359550562, |
|
"eval_specificty": 0.9104477611940298, |
|
"eval_steps_per_second": 2.38, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"grad_norm": 3.30311918258667, |
|
"learning_rate": 4.11764705882353e-05, |
|
"loss": 0.3555, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"grad_norm": 4.967993259429932, |
|
"learning_rate": 3.9215686274509805e-05, |
|
"loss": 0.2679, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"grad_norm": 4.838258266448975, |
|
"learning_rate": 3.725490196078432e-05, |
|
"loss": 0.2416, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"grad_norm": 4.202385425567627, |
|
"learning_rate": 3.529411764705883e-05, |
|
"loss": 0.1758, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"grad_norm": 3.0037460327148438, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.3192, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"grad_norm": 4.337649822235107, |
|
"learning_rate": 3.137254901960784e-05, |
|
"loss": 0.2669, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"grad_norm": 6.011264801025391, |
|
"learning_rate": 2.9411764705882354e-05, |
|
"loss": 0.228, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"grad_norm": 0.5027784109115601, |
|
"learning_rate": 2.7450980392156865e-05, |
|
"loss": 0.3023, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"grad_norm": 1.8822238445281982, |
|
"learning_rate": 2.5490196078431373e-05, |
|
"loss": 0.3109, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"grad_norm": 3.6861536502838135, |
|
"learning_rate": 2.3529411764705884e-05, |
|
"loss": 0.1739, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"eval_accuracy": 0.8073878627968337, |
|
"eval_auroc": 0.856224495500028, |
|
"eval_f1": 0.7767584097859327, |
|
"eval_loss": 0.6224877238273621, |
|
"eval_model_selection": 0.8229358823858236, |
|
"eval_npv": 0.7782608695652173, |
|
"eval_ppv": 0.8523489932885906, |
|
"eval_runtime": 20.4294, |
|
"eval_samples_per_second": 18.552, |
|
"eval_sensitivity": 0.7134831460674157, |
|
"eval_specificty": 0.8905472636815921, |
|
"eval_steps_per_second": 2.35, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"grad_norm": 5.0469770431518555, |
|
"learning_rate": 2.1568627450980395e-05, |
|
"loss": 0.4354, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"grad_norm": 4.443832874298096, |
|
"learning_rate": 1.9607843137254903e-05, |
|
"loss": 0.334, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"grad_norm": 2.0471107959747314, |
|
"learning_rate": 1.7647058823529414e-05, |
|
"loss": 0.2051, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"grad_norm": 3.442962169647217, |
|
"learning_rate": 1.568627450980392e-05, |
|
"loss": 0.2112, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"grad_norm": 2.31436824798584, |
|
"learning_rate": 1.3725490196078432e-05, |
|
"loss": 0.3268, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"grad_norm": 2.2342071533203125, |
|
"learning_rate": 1.1764705882352942e-05, |
|
"loss": 0.3107, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"grad_norm": 3.2926645278930664, |
|
"learning_rate": 9.803921568627451e-06, |
|
"loss": 0.2284, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"grad_norm": 2.639843702316284, |
|
"learning_rate": 7.84313725490196e-06, |
|
"loss": 0.2269, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"grad_norm": 2.7015938758850098, |
|
"learning_rate": 5.882352941176471e-06, |
|
"loss": 0.1987, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"grad_norm": 8.358205795288086, |
|
"learning_rate": 3.92156862745098e-06, |
|
"loss": 0.2888, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"eval_accuracy": 0.8047493403693932, |
|
"eval_auroc": 0.856979149197831, |
|
"eval_f1": 0.783625730994152, |
|
"eval_loss": 0.5807113647460938, |
|
"eval_model_selection": 0.9020627201073285, |
|
"eval_npv": 0.7953488372093023, |
|
"eval_ppv": 0.8170731707317073, |
|
"eval_runtime": 20.5189, |
|
"eval_samples_per_second": 18.471, |
|
"eval_sensitivity": 0.7528089887640449, |
|
"eval_specificty": 0.8507462686567164, |
|
"eval_steps_per_second": 2.339, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"grad_norm": 3.527721881866455, |
|
"learning_rate": 1.96078431372549e-06, |
|
"loss": 0.3236, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 6.274048805236816, |
|
"learning_rate": 0.0, |
|
"loss": 0.26, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 1020, |
|
"total_flos": 1.25924483123712e+18, |
|
"train_loss": 0.40091259911948557, |
|
"train_runtime": 1906.0636, |
|
"train_samples_per_second": 8.525, |
|
"train_steps_per_second": 0.535 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1020, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 100, |
|
"total_flos": 1.25924483123712e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|