|
{ |
|
"best_metric": 0.20685863494873047, |
|
"best_model_checkpoint": "./dino-base-2023_11_24-unfreeze/checkpoint-16080", |
|
"epoch": 30.0, |
|
"eval_steps": 500, |
|
"global_step": 16080, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.01, |
|
"loss": 0.3961, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.0735977134690961, |
|
"eval_f1_macro": 0.09660814200449142, |
|
"eval_f1_micro": 0.3295561046263951, |
|
"eval_loss": 0.32934051752090454, |
|
"eval_roc_auc": 0.6004986038194676, |
|
"eval_runtime": 666.7423, |
|
"eval_samples_per_second": 4.198, |
|
"eval_steps_per_second": 0.262, |
|
"learning_rate": 0.01, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.01, |
|
"loss": 0.3418, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.04394426580921758, |
|
"eval_f1_macro": 0.12832379577401481, |
|
"eval_f1_micro": 0.33792181231809953, |
|
"eval_loss": 0.3381994962692261, |
|
"eval_roc_auc": 0.6053684490893504, |
|
"eval_runtime": 663.9506, |
|
"eval_samples_per_second": 4.216, |
|
"eval_steps_per_second": 0.264, |
|
"learning_rate": 0.01, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.01, |
|
"loss": 0.3334, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.07859949982136477, |
|
"eval_f1_macro": 0.2420082514645649, |
|
"eval_f1_micro": 0.39049407685639986, |
|
"eval_loss": 0.3550550937652588, |
|
"eval_roc_auc": 0.6319467749490031, |
|
"eval_runtime": 662.2838, |
|
"eval_samples_per_second": 4.226, |
|
"eval_steps_per_second": 0.264, |
|
"learning_rate": 0.01, |
|
"step": 1608 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 0.01, |
|
"loss": 0.3323, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.06252232940335835, |
|
"eval_f1_macro": 0.10992415783030689, |
|
"eval_f1_micro": 0.2554886521537749, |
|
"eval_loss": 0.32132452726364136, |
|
"eval_roc_auc": 0.5720375077972393, |
|
"eval_runtime": 688.3751, |
|
"eval_samples_per_second": 4.066, |
|
"eval_steps_per_second": 0.254, |
|
"learning_rate": 0.01, |
|
"step": 2144 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 0.01, |
|
"loss": 0.3248, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.04501607717041801, |
|
"eval_f1_macro": 0.12977783250072622, |
|
"eval_f1_micro": 0.33549517966695885, |
|
"eval_loss": 0.3164198696613312, |
|
"eval_roc_auc": 0.6023923090497274, |
|
"eval_runtime": 684.0693, |
|
"eval_samples_per_second": 4.092, |
|
"eval_steps_per_second": 0.256, |
|
"learning_rate": 0.01, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 0.01, |
|
"loss": 0.3235, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.023937120400142908, |
|
"eval_f1_macro": 0.08064530717139505, |
|
"eval_f1_micro": 0.28637243158573666, |
|
"eval_loss": 0.33460524678230286, |
|
"eval_roc_auc": 0.5833928540072826, |
|
"eval_runtime": 694.014, |
|
"eval_samples_per_second": 4.033, |
|
"eval_steps_per_second": 0.252, |
|
"learning_rate": 0.01, |
|
"step": 3216 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 0.01, |
|
"loss": 0.32, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.06823865666309396, |
|
"eval_f1_macro": 0.19677941241236616, |
|
"eval_f1_micro": 0.4593774264134962, |
|
"eval_loss": 0.3029455542564392, |
|
"eval_roc_auc": 0.6663014350966586, |
|
"eval_runtime": 675.5021, |
|
"eval_samples_per_second": 4.144, |
|
"eval_steps_per_second": 0.259, |
|
"learning_rate": 0.01, |
|
"step": 3752 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 0.01, |
|
"loss": 0.3138, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.05787781350482315, |
|
"eval_f1_macro": 0.29399778254935266, |
|
"eval_f1_micro": 0.5467535723238907, |
|
"eval_loss": 0.2865561246871948, |
|
"eval_roc_auc": 0.7240336144955091, |
|
"eval_runtime": 678.713, |
|
"eval_samples_per_second": 4.124, |
|
"eval_steps_per_second": 0.258, |
|
"learning_rate": 0.01, |
|
"step": 4288 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 0.01, |
|
"loss": 0.3052, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.12254376563058235, |
|
"eval_f1_macro": 0.29927318211914594, |
|
"eval_f1_micro": 0.4766527970411466, |
|
"eval_loss": 0.2806786894798279, |
|
"eval_roc_auc": 0.6671674748237758, |
|
"eval_runtime": 665.488, |
|
"eval_samples_per_second": 4.206, |
|
"eval_steps_per_second": 0.263, |
|
"learning_rate": 0.01, |
|
"step": 4824 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"learning_rate": 0.01, |
|
"loss": 0.3157, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.0707395498392283, |
|
"eval_f1_macro": 0.20914365928952397, |
|
"eval_f1_micro": 0.47518133978097, |
|
"eval_loss": 0.29552116990089417, |
|
"eval_roc_auc": 0.6732798876351146, |
|
"eval_runtime": 670.1708, |
|
"eval_samples_per_second": 4.177, |
|
"eval_steps_per_second": 0.261, |
|
"learning_rate": 0.01, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 10.26, |
|
"learning_rate": 0.01, |
|
"loss": 0.3119, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.13612004287245444, |
|
"eval_f1_macro": 0.215957700272248, |
|
"eval_f1_micro": 0.40282685512367494, |
|
"eval_loss": 0.340472012758255, |
|
"eval_roc_auc": 0.6335842288286788, |
|
"eval_runtime": 655.9163, |
|
"eval_samples_per_second": 4.267, |
|
"eval_steps_per_second": 0.267, |
|
"learning_rate": 0.01, |
|
"step": 5896 |
|
}, |
|
{ |
|
"epoch": 11.19, |
|
"learning_rate": 0.01, |
|
"loss": 0.3162, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.05323329760628796, |
|
"eval_f1_macro": 0.2964933264503757, |
|
"eval_f1_micro": 0.4898614735996788, |
|
"eval_loss": 0.41625335812568665, |
|
"eval_roc_auc": 0.6862559753775964, |
|
"eval_runtime": 663.2841, |
|
"eval_samples_per_second": 4.22, |
|
"eval_steps_per_second": 0.264, |
|
"learning_rate": 0.01, |
|
"step": 6432 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"learning_rate": 0.01, |
|
"loss": 0.3184, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.10468024294390854, |
|
"eval_f1_macro": 0.32985179099978684, |
|
"eval_f1_micro": 0.5429481760842708, |
|
"eval_loss": 0.29642441868782043, |
|
"eval_roc_auc": 0.7170301433203381, |
|
"eval_runtime": 670.4311, |
|
"eval_samples_per_second": 4.175, |
|
"eval_steps_per_second": 0.261, |
|
"learning_rate": 0.01, |
|
"step": 6968 |
|
}, |
|
{ |
|
"epoch": 13.06, |
|
"learning_rate": 0.01, |
|
"loss": 0.3131, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"learning_rate": 0.01, |
|
"loss": 0.3142, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.08324401571989996, |
|
"eval_f1_macro": 0.3153914854710691, |
|
"eval_f1_micro": 0.5253185233583796, |
|
"eval_loss": 0.30047285556793213, |
|
"eval_roc_auc": 0.7071979710639686, |
|
"eval_runtime": 655.5852, |
|
"eval_samples_per_second": 4.269, |
|
"eval_steps_per_second": 0.267, |
|
"learning_rate": 0.01, |
|
"step": 7504 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 0.01, |
|
"loss": 0.3104, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.0, |
|
"eval_f1_macro": 0.06737263276203229, |
|
"eval_f1_micro": 0.16727748691099475, |
|
"eval_loss": 3.1991209983825684, |
|
"eval_roc_auc": 0.48793717022406635, |
|
"eval_runtime": 660.6942, |
|
"eval_samples_per_second": 4.236, |
|
"eval_steps_per_second": 0.265, |
|
"learning_rate": 0.01, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 15.86, |
|
"learning_rate": 0.001, |
|
"loss": 0.3042, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.15827081100392998, |
|
"eval_f1_macro": 0.2746423799349764, |
|
"eval_f1_micro": 0.45441654850279417, |
|
"eval_loss": 0.2820233702659607, |
|
"eval_roc_auc": 0.6518577907371156, |
|
"eval_runtime": 657.3215, |
|
"eval_samples_per_second": 4.258, |
|
"eval_steps_per_second": 0.266, |
|
"learning_rate": 0.001, |
|
"step": 8576 |
|
}, |
|
{ |
|
"epoch": 16.79, |
|
"learning_rate": 0.001, |
|
"loss": 0.2788, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.1639871382636656, |
|
"eval_f1_macro": 0.3842369260673032, |
|
"eval_f1_micro": 0.5743618993298161, |
|
"eval_loss": 0.2741135358810425, |
|
"eval_roc_auc": 0.7204845597401146, |
|
"eval_runtime": 676.3087, |
|
"eval_samples_per_second": 4.139, |
|
"eval_steps_per_second": 0.259, |
|
"learning_rate": 0.001, |
|
"step": 9112 |
|
}, |
|
{ |
|
"epoch": 17.72, |
|
"learning_rate": 0.001, |
|
"loss": 0.2724, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.2072168631654162, |
|
"eval_f1_macro": 0.39364174946303265, |
|
"eval_f1_micro": 0.5903019744483159, |
|
"eval_loss": 0.2424442023038864, |
|
"eval_roc_auc": 0.725566189869564, |
|
"eval_runtime": 676.7446, |
|
"eval_samples_per_second": 4.136, |
|
"eval_steps_per_second": 0.259, |
|
"learning_rate": 0.001, |
|
"step": 9648 |
|
}, |
|
{ |
|
"epoch": 18.66, |
|
"learning_rate": 0.001, |
|
"loss": 0.2642, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.21864951768488747, |
|
"eval_f1_macro": 0.4095283624260902, |
|
"eval_f1_micro": 0.6020972084455152, |
|
"eval_loss": 0.24139995872974396, |
|
"eval_roc_auc": 0.7346777613512268, |
|
"eval_runtime": 679.8261, |
|
"eval_samples_per_second": 4.117, |
|
"eval_steps_per_second": 0.257, |
|
"learning_rate": 0.001, |
|
"step": 10184 |
|
}, |
|
{ |
|
"epoch": 19.59, |
|
"learning_rate": 0.001, |
|
"loss": 0.2597, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.22508038585209003, |
|
"eval_f1_macro": 0.4156043928581225, |
|
"eval_f1_micro": 0.6078926484380637, |
|
"eval_loss": 0.22694532573223114, |
|
"eval_roc_auc": 0.734743268508761, |
|
"eval_runtime": 658.6647, |
|
"eval_samples_per_second": 4.25, |
|
"eval_steps_per_second": 0.266, |
|
"learning_rate": 0.001, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 20.52, |
|
"learning_rate": 0.001, |
|
"loss": 0.2575, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.23401214719542693, |
|
"eval_f1_macro": 0.42525523109884183, |
|
"eval_f1_micro": 0.6231395675372087, |
|
"eval_loss": 0.22489750385284424, |
|
"eval_roc_auc": 0.7462705119874329, |
|
"eval_runtime": 659.2031, |
|
"eval_samples_per_second": 4.246, |
|
"eval_steps_per_second": 0.265, |
|
"learning_rate": 0.001, |
|
"step": 11256 |
|
}, |
|
{ |
|
"epoch": 21.46, |
|
"learning_rate": 0.001, |
|
"loss": 0.253, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.24294390853876385, |
|
"eval_f1_macro": 0.4638642250813612, |
|
"eval_f1_micro": 0.6291112183686134, |
|
"eval_loss": 0.2261122316122055, |
|
"eval_roc_auc": 0.7521458927635197, |
|
"eval_runtime": 657.2132, |
|
"eval_samples_per_second": 4.259, |
|
"eval_steps_per_second": 0.266, |
|
"learning_rate": 0.001, |
|
"step": 11792 |
|
}, |
|
{ |
|
"epoch": 22.39, |
|
"learning_rate": 0.001, |
|
"loss": 0.2491, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.2536620221507681, |
|
"eval_f1_macro": 0.4855675958097963, |
|
"eval_f1_micro": 0.6453674121405751, |
|
"eval_loss": 0.21625448763370514, |
|
"eval_roc_auc": 0.7626888699414743, |
|
"eval_runtime": 684.5667, |
|
"eval_samples_per_second": 4.089, |
|
"eval_steps_per_second": 0.256, |
|
"learning_rate": 0.001, |
|
"step": 12328 |
|
}, |
|
{ |
|
"epoch": 23.32, |
|
"learning_rate": 0.001, |
|
"loss": 0.2484, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.2568774562343694, |
|
"eval_f1_macro": 0.4635023998508455, |
|
"eval_f1_micro": 0.6261708770933863, |
|
"eval_loss": 0.22124029695987701, |
|
"eval_roc_auc": 0.746011775710583, |
|
"eval_runtime": 686.8149, |
|
"eval_samples_per_second": 4.075, |
|
"eval_steps_per_second": 0.255, |
|
"learning_rate": 0.001, |
|
"step": 12864 |
|
}, |
|
{ |
|
"epoch": 24.25, |
|
"learning_rate": 0.001, |
|
"loss": 0.2465, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.27724187209717754, |
|
"eval_f1_macro": 0.4780125036155298, |
|
"eval_f1_micro": 0.6486374896779521, |
|
"eval_loss": 0.21179212629795074, |
|
"eval_roc_auc": 0.7621535864293458, |
|
"eval_runtime": 682.8306, |
|
"eval_samples_per_second": 4.099, |
|
"eval_steps_per_second": 0.256, |
|
"learning_rate": 0.001, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 25.19, |
|
"learning_rate": 0.001, |
|
"loss": 0.241, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.255805644873169, |
|
"eval_f1_macro": 0.5159201891126339, |
|
"eval_f1_micro": 0.6601850610164945, |
|
"eval_loss": 0.21056728065013885, |
|
"eval_roc_auc": 0.7726679521523573, |
|
"eval_runtime": 690.8376, |
|
"eval_samples_per_second": 4.052, |
|
"eval_steps_per_second": 0.253, |
|
"learning_rate": 0.001, |
|
"step": 13936 |
|
}, |
|
{ |
|
"epoch": 26.12, |
|
"learning_rate": 0.001, |
|
"loss": 0.2413, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.2722400857449089, |
|
"eval_f1_macro": 0.4978739205590764, |
|
"eval_f1_micro": 0.6389924716808555, |
|
"eval_loss": 0.21349740028381348, |
|
"eval_roc_auc": 0.753602824058608, |
|
"eval_runtime": 690.4835, |
|
"eval_samples_per_second": 4.054, |
|
"eval_steps_per_second": 0.253, |
|
"learning_rate": 0.001, |
|
"step": 14472 |
|
}, |
|
{ |
|
"epoch": 27.05, |
|
"learning_rate": 0.001, |
|
"loss": 0.2401, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 27.99, |
|
"learning_rate": 0.001, |
|
"loss": 0.2385, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.27724187209717754, |
|
"eval_f1_macro": 0.4596468154208962, |
|
"eval_f1_micro": 0.6102875778238955, |
|
"eval_loss": 0.2182074636220932, |
|
"eval_roc_auc": 0.7318729802955509, |
|
"eval_runtime": 665.6085, |
|
"eval_samples_per_second": 4.205, |
|
"eval_steps_per_second": 0.263, |
|
"learning_rate": 0.001, |
|
"step": 15008 |
|
}, |
|
{ |
|
"epoch": 28.92, |
|
"learning_rate": 0.001, |
|
"loss": 0.2366, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.270811003929975, |
|
"eval_f1_macro": 0.5354446223521995, |
|
"eval_f1_micro": 0.6614600569197168, |
|
"eval_loss": 0.2131979614496231, |
|
"eval_roc_auc": 0.7757714197210208, |
|
"eval_runtime": 663.3734, |
|
"eval_samples_per_second": 4.219, |
|
"eval_steps_per_second": 0.264, |
|
"learning_rate": 0.001, |
|
"step": 15544 |
|
}, |
|
{ |
|
"epoch": 29.85, |
|
"learning_rate": 0.001, |
|
"loss": 0.2345, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.2747409789210432, |
|
"eval_f1_macro": 0.5121854108049624, |
|
"eval_f1_micro": 0.6565579984836998, |
|
"eval_loss": 0.20685863494873047, |
|
"eval_roc_auc": 0.7657921716163675, |
|
"eval_runtime": 663.8358, |
|
"eval_samples_per_second": 4.216, |
|
"eval_steps_per_second": 0.264, |
|
"learning_rate": 0.001, |
|
"step": 16080 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 0.001, |
|
"step": 16080, |
|
"total_flos": 2.04446834386654e+19, |
|
"train_loss": 0.2889823566028728, |
|
"train_runtime": 84191.1285, |
|
"train_samples_per_second": 3.053, |
|
"train_steps_per_second": 0.191 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 16080, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"total_flos": 2.04446834386654e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|