|
{ |
|
"best_metric": 0.08029154688119888, |
|
"best_model_checkpoint": "./beit-tiny-mask-finetuned/checkpoint-200", |
|
"epoch": 0.5543237250554324, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019955654101995565, |
|
"loss": 0.1318, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019900221729490022, |
|
"loss": 0.0546, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019850332594235036, |
|
"loss": 0.2395, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019794900221729492, |
|
"loss": 0.0966, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019739467849223948, |
|
"loss": 0.1544, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019684035476718405, |
|
"loss": 0.1734, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019634146341463416, |
|
"loss": 0.1556, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019578713968957873, |
|
"loss": 0.2154, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0001952328159645233, |
|
"loss": 0.1845, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001947339246119734, |
|
"loss": 0.4978, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.9402113459399333, |
|
"eval_loss": 0.17701464891433716, |
|
"eval_runtime": 192.8644, |
|
"eval_samples_per_second": 18.645, |
|
"eval_steps_per_second": 2.333, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019417960088691797, |
|
"loss": 0.1897, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019362527716186253, |
|
"loss": 0.1148, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001930709534368071, |
|
"loss": 0.208, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019251662971175165, |
|
"loss": 0.1142, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019196230598669624, |
|
"loss": 0.1285, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0001914079822616408, |
|
"loss": 0.1383, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019085365853658537, |
|
"loss": 0.1786, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00019029933481152996, |
|
"loss": 0.0892, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00018974501108647452, |
|
"loss": 0.2118, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00018919068736141908, |
|
"loss": 0.1863, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.974972191323693, |
|
"eval_loss": 0.08029154688119888, |
|
"eval_runtime": 193.2131, |
|
"eval_samples_per_second": 18.612, |
|
"eval_steps_per_second": 2.329, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00018863636363636364, |
|
"loss": 0.0495, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0001880820399113082, |
|
"loss": 0.1476, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00018752771618625277, |
|
"loss": 0.1339, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00018697339246119733, |
|
"loss": 0.1734, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00018641906873614192, |
|
"loss": 0.2068, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00018586474501108648, |
|
"loss": 0.1427, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00018531042128603104, |
|
"loss": 0.182, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00018475609756097563, |
|
"loss": 0.1436, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0001842017738359202, |
|
"loss": 0.1319, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00018364745011086476, |
|
"loss": 0.0818, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.9424360400444939, |
|
"eval_loss": 0.20646639168262482, |
|
"eval_runtime": 193.0709, |
|
"eval_samples_per_second": 18.625, |
|
"eval_steps_per_second": 2.331, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00018309312638580932, |
|
"loss": 0.2115, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00018253880266075388, |
|
"loss": 0.0725, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00018198447893569844, |
|
"loss": 0.0589, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.000181430155210643, |
|
"loss": 0.2017, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0001808758314855876, |
|
"loss": 0.0678, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00018032150776053216, |
|
"loss": 0.2075, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00017976718403547672, |
|
"loss": 0.3123, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0001792128603104213, |
|
"loss": 0.0857, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00017865853658536587, |
|
"loss": 0.0958, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00017810421286031043, |
|
"loss": 0.1982, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_accuracy": 0.9438264738598443, |
|
"eval_loss": 0.16823017597198486, |
|
"eval_runtime": 193.1602, |
|
"eval_samples_per_second": 18.617, |
|
"eval_steps_per_second": 2.33, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.000177549889135255, |
|
"loss": 0.2038, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00017699556541019956, |
|
"loss": 0.1603, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00017644124168514412, |
|
"loss": 0.1035, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00017588691796008868, |
|
"loss": 0.0329, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00017533259423503327, |
|
"loss": 0.0392, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00017477827050997783, |
|
"loss": 0.1546, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0001742239467849224, |
|
"loss": 0.155, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00017366962305986699, |
|
"loss": 0.1521, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00017311529933481155, |
|
"loss": 0.1506, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0001725609756097561, |
|
"loss": 0.0293, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.9824805339265851, |
|
"eval_loss": 0.08882919698953629, |
|
"eval_runtime": 193.0128, |
|
"eval_samples_per_second": 18.631, |
|
"eval_steps_per_second": 2.331, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"step": 500, |
|
"total_flos": 6.18978356527104e+17, |
|
"train_loss": 0.1509909844994545, |
|
"train_runtime": 3496.5643, |
|
"train_samples_per_second": 16.505, |
|
"train_steps_per_second": 1.032 |
|
} |
|
], |
|
"max_steps": 3608, |
|
"num_train_epochs": 4, |
|
"total_flos": 6.18978356527104e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|