|
{ |
|
"best_metric": 0.027106858789920807, |
|
"best_model_checkpoint": "./deit-base-distilled-mask-finetuned/checkpoint-600", |
|
"epoch": 1.9955654101995566, |
|
"global_step": 900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019889135254988916, |
|
"loss": 0.0451, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019778270509977829, |
|
"loss": 0.0465, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0001966740576496674, |
|
"loss": 0.2191, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019556541019955653, |
|
"loss": 0.0804, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019445676274944569, |
|
"loss": 0.0719, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019334811529933484, |
|
"loss": 0.0518, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019223946784922396, |
|
"loss": 0.0433, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019124168514412417, |
|
"loss": 0.2892, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00019013303769401332, |
|
"loss": 0.0692, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00018902439024390244, |
|
"loss": 0.0765, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.9888765294771968, |
|
"eval_loss": 0.05117267370223999, |
|
"eval_runtime": 195.5169, |
|
"eval_samples_per_second": 18.392, |
|
"eval_steps_per_second": 2.302, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0001879157427937916, |
|
"loss": 0.1107, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00018680709534368072, |
|
"loss": 0.0967, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00018569844789356984, |
|
"loss": 0.0852, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.000184589800443459, |
|
"loss": 0.082, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00018348115299334812, |
|
"loss": 0.0172, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00018237250554323727, |
|
"loss": 0.0379, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0001812638580931264, |
|
"loss": 0.0773, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00018015521064301552, |
|
"loss": 0.0329, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00017904656319290467, |
|
"loss": 0.0688, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0001779379157427938, |
|
"loss": 0.0533, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_accuracy": 0.9908231368186874, |
|
"eval_loss": 0.037399593740701675, |
|
"eval_runtime": 194.4149, |
|
"eval_samples_per_second": 18.497, |
|
"eval_steps_per_second": 2.315, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00017682926829268295, |
|
"loss": 0.0536, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00017572062084257207, |
|
"loss": 0.0144, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0001746119733924612, |
|
"loss": 0.0535, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00017350332594235035, |
|
"loss": 0.0569, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00017239467849223947, |
|
"loss": 0.0408, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00017128603104212862, |
|
"loss": 0.0529, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00017017738359201775, |
|
"loss": 0.088, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00016906873614190687, |
|
"loss": 0.0147, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00016796008869179602, |
|
"loss": 0.0369, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00016685144124168515, |
|
"loss": 0.0442, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.9885984427141268, |
|
"eval_loss": 0.03961510211229324, |
|
"eval_runtime": 194.5523, |
|
"eval_samples_per_second": 18.483, |
|
"eval_steps_per_second": 2.313, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0001657427937915743, |
|
"loss": 0.0191, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00016463414634146343, |
|
"loss": 0.044, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00016352549889135255, |
|
"loss": 0.0274, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0001624168514412417, |
|
"loss": 0.064, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00016130820399113083, |
|
"loss": 0.0478, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00016019955654101998, |
|
"loss": 0.0532, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0001590909090909091, |
|
"loss": 0.05, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00015798226164079823, |
|
"loss": 0.0197, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00015687361419068738, |
|
"loss": 0.0492, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0001557649667405765, |
|
"loss": 0.0359, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.9885984427141268, |
|
"eval_loss": 0.034940723329782486, |
|
"eval_runtime": 193.9815, |
|
"eval_samples_per_second": 18.538, |
|
"eval_steps_per_second": 2.32, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00015465631929046565, |
|
"loss": 0.0405, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00015354767184035478, |
|
"loss": 0.03, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0001524390243902439, |
|
"loss": 0.0272, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00015133037694013303, |
|
"loss": 0.0366, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00015022172949002218, |
|
"loss": 0.0676, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00014911308203991133, |
|
"loss": 0.0185, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00014800443458980045, |
|
"loss": 0.0053, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00014689578713968958, |
|
"loss": 0.0309, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0001457871396895787, |
|
"loss": 0.0183, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00014467849223946785, |
|
"loss": 0.0777, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_accuracy": 0.9869299221357063, |
|
"eval_loss": 0.042109716683626175, |
|
"eval_runtime": 193.8529, |
|
"eval_samples_per_second": 18.55, |
|
"eval_steps_per_second": 2.321, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.000143569844789357, |
|
"loss": 0.0159, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00014246119733924613, |
|
"loss": 0.0645, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00014135254988913525, |
|
"loss": 0.0632, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00014024390243902438, |
|
"loss": 0.0251, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00013913525498891353, |
|
"loss": 0.036, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00013802660753880268, |
|
"loss": 0.0389, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.0001369179600886918, |
|
"loss": 0.0353, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00013580931263858093, |
|
"loss": 0.0376, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00013470066518847006, |
|
"loss": 0.0416, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0001335920177383592, |
|
"loss": 0.0051, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_accuracy": 0.9922135706340378, |
|
"eval_loss": 0.027106858789920807, |
|
"eval_runtime": 193.7443, |
|
"eval_samples_per_second": 18.561, |
|
"eval_steps_per_second": 2.323, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00013248337028824836, |
|
"loss": 0.0277, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00013137472283813748, |
|
"loss": 0.0466, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0001302660753880266, |
|
"loss": 0.0365, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00012915742793791573, |
|
"loss": 0.0079, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00012804878048780488, |
|
"loss": 0.0291, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.000126940133037694, |
|
"loss": 0.0694, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00012583148558758316, |
|
"loss": 0.0027, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.0001247228381374723, |
|
"loss": 0.0179, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0001236141906873614, |
|
"loss": 0.0206, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00012250554323725056, |
|
"loss": 0.0112, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_accuracy": 0.9849833147942157, |
|
"eval_loss": 0.051631152629852295, |
|
"eval_runtime": 194.3203, |
|
"eval_samples_per_second": 18.506, |
|
"eval_steps_per_second": 2.316, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00012139689578713968, |
|
"loss": 0.0037, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00012028824833702884, |
|
"loss": 0.0051, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00011917960088691797, |
|
"loss": 0.0014, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.0001180709534368071, |
|
"loss": 0.0018, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00011696230598669624, |
|
"loss": 0.0445, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00011585365853658536, |
|
"loss": 0.0294, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.00011474501108647451, |
|
"loss": 0.0247, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00011363636363636365, |
|
"loss": 0.0069, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00011252771618625277, |
|
"loss": 0.0612, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00011141906873614191, |
|
"loss": 0.0152, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_accuracy": 0.9824805339265851, |
|
"eval_loss": 0.05840621143579483, |
|
"eval_runtime": 194.8587, |
|
"eval_samples_per_second": 18.454, |
|
"eval_steps_per_second": 2.309, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00011031042128603104, |
|
"loss": 0.0137, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00010920177383592019, |
|
"loss": 0.0118, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00010809312638580931, |
|
"loss": 0.0246, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00010698447893569845, |
|
"loss": 0.0246, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.0001058758314855876, |
|
"loss": 0.036, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00010476718403547671, |
|
"loss": 0.0248, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00010365853658536586, |
|
"loss": 0.019, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00010254988913525499, |
|
"loss": 0.0387, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00010144124168514413, |
|
"loss": 0.0127, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00010033259423503328, |
|
"loss": 0.0339, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9905450500556173, |
|
"eval_loss": 0.027991166338324547, |
|
"eval_runtime": 194.6989, |
|
"eval_samples_per_second": 18.47, |
|
"eval_steps_per_second": 2.311, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 900, |
|
"total_flos": 2.2314992806549094e+18, |
|
"train_loss": 0.04378239723129405, |
|
"train_runtime": 5836.8754, |
|
"train_samples_per_second": 9.887, |
|
"train_steps_per_second": 0.309 |
|
} |
|
], |
|
"max_steps": 1804, |
|
"num_train_epochs": 4, |
|
"total_flos": 2.2314992806549094e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|