{ "best_metric": 70.54026450257521, "best_model_checkpoint": "/home/jcanete/ft-data/all_results/tar/albeto_base_4/epochs_3_bs_16_lr_5e-5/checkpoint-9000", "epoch": 3.0, "global_step": 16455, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "eval_exact_match": 34.645222327341536, "eval_f1": 52.13460975937906, "step": 300 }, { "epoch": 0.09, "learning_rate": 4.848678213309025e-05, "loss": 2.8212, "step": 500 }, { "epoch": 0.11, "eval_exact_match": 40.38789025543993, "eval_f1": 58.80108695949609, "step": 600 }, { "epoch": 0.16, "eval_exact_match": 44.1438032166509, "eval_f1": 61.68021475413032, "step": 900 }, { "epoch": 0.18, "learning_rate": 4.6967487085992104e-05, "loss": 2.147, "step": 1000 }, { "epoch": 0.22, "eval_exact_match": 45.08041627246925, "eval_f1": 63.22958368722072, "step": 1200 }, { "epoch": 0.27, "learning_rate": 4.544819203889396e-05, "loss": 2.0333, "step": 1500 }, { "epoch": 0.27, "eval_exact_match": 46.09271523178808, "eval_f1": 63.34953738572746, "step": 1500 }, { "epoch": 0.33, "eval_exact_match": 47.13339640491959, "eval_f1": 64.76366505969077, "step": 1800 }, { "epoch": 0.36, "learning_rate": 4.392889699179581e-05, "loss": 1.9667, "step": 2000 }, { "epoch": 0.38, "eval_exact_match": 47.4077578051088, "eval_f1": 65.80706119842145, "step": 2100 }, { "epoch": 0.44, "eval_exact_match": 48.55250709555345, "eval_f1": 66.49751567340297, "step": 2400 }, { "epoch": 0.46, "learning_rate": 4.2409601944697666e-05, "loss": 1.8743, "step": 2500 }, { "epoch": 0.49, "eval_exact_match": 48.84578997161778, "eval_f1": 66.76044504218349, "step": 2700 }, { "epoch": 0.55, "learning_rate": 4.089030689759952e-05, "loss": 1.8655, "step": 3000 }, { "epoch": 0.55, "eval_exact_match": 49.55534531693472, "eval_f1": 67.55609852983001, "step": 3000 }, { "epoch": 0.6, "eval_exact_match": 49.45127719962157, "eval_f1": 67.28955811469469, "step": 3300 }, { "epoch": 0.64, "learning_rate": 3.9371011850501374e-05, "loss": 1.8223, "step": 3500 }, { "epoch": 0.66, "eval_exact_match": 50.08514664143803, "eval_f1": 68.11294509259712, "step": 3600 }, { "epoch": 0.71, "eval_exact_match": 50.38789025543993, "eval_f1": 68.15488930053279, "step": 3900 }, { "epoch": 0.73, "learning_rate": 3.785171680340323e-05, "loss": 1.7684, "step": 4000 }, { "epoch": 0.77, "eval_exact_match": 51.05960264900662, "eval_f1": 69.07576620387981, "step": 4200 }, { "epoch": 0.82, "learning_rate": 3.6335460346399276e-05, "loss": 1.7677, "step": 4500 }, { "epoch": 0.82, "eval_exact_match": 51.561021759697255, "eval_f1": 69.20806146876814, "step": 4500 }, { "epoch": 0.88, "eval_exact_match": 51.61778618732261, "eval_f1": 69.20269381359182, "step": 4800 }, { "epoch": 0.91, "learning_rate": 3.481616529930112e-05, "loss": 1.7566, "step": 5000 }, { "epoch": 0.93, "eval_exact_match": 51.68401135288553, "eval_f1": 69.22415299954444, "step": 5100 }, { "epoch": 0.98, "eval_exact_match": 51.82592242194891, "eval_f1": 69.68048492944583, "step": 5400 }, { "epoch": 1.0, "learning_rate": 3.329990884229718e-05, "loss": 1.6976, "step": 5500 }, { "epoch": 1.04, "eval_exact_match": 51.84484389782403, "eval_f1": 69.61844469538968, "step": 5700 }, { "epoch": 1.09, "learning_rate": 3.1780613795199024e-05, "loss": 1.3996, "step": 6000 }, { "epoch": 1.09, "eval_exact_match": 51.79754020813623, "eval_f1": 69.51904893791095, "step": 6000 }, { "epoch": 1.15, "eval_exact_match": 51.72185430463576, "eval_f1": 69.18971361078592, "step": 6300 }, { "epoch": 1.19, "learning_rate": 3.0261318748100882e-05, "loss": 1.3947, "step": 6500 }, { "epoch": 1.2, "eval_exact_match": 51.82592242194891, "eval_f1": 69.9010405722466, "step": 6600 }, { "epoch": 1.26, "eval_exact_match": 51.74077578051088, "eval_f1": 69.21425877541581, "step": 6900 }, { "epoch": 1.28, "learning_rate": 2.8742023701002736e-05, "loss": 1.3898, "step": 7000 }, { "epoch": 1.31, "eval_exact_match": 52.64900662251656, "eval_f1": 70.03047551437781, "step": 7200 }, { "epoch": 1.37, "learning_rate": 2.722272865390459e-05, "loss": 1.3861, "step": 7500 }, { "epoch": 1.37, "eval_exact_match": 52.232734153263955, "eval_f1": 69.84348950849247, "step": 7500 }, { "epoch": 1.42, "eval_exact_match": 52.05298013245033, "eval_f1": 69.85616305536696, "step": 7800 }, { "epoch": 1.46, "learning_rate": 2.5703433606806444e-05, "loss": 1.4272, "step": 8000 }, { "epoch": 1.48, "eval_exact_match": 52.42194891201514, "eval_f1": 70.12191052992236, "step": 8100 }, { "epoch": 1.53, "eval_exact_match": 52.327341532639544, "eval_f1": 70.1945849880174, "step": 8400 }, { "epoch": 1.55, "learning_rate": 2.4184138559708297e-05, "loss": 1.387, "step": 8500 }, { "epoch": 1.59, "eval_exact_match": 52.73415326395459, "eval_f1": 70.53063216515172, "step": 8700 }, { "epoch": 1.64, "learning_rate": 2.266484351261015e-05, "loss": 1.4181, "step": 9000 }, { "epoch": 1.64, "eval_exact_match": 52.980132450331126, "eval_f1": 70.54026450257521, "step": 9000 }, { "epoch": 1.7, "eval_exact_match": 52.327341532639544, "eval_f1": 70.03067034409061, "step": 9300 }, { "epoch": 1.73, "learning_rate": 2.1145548465512002e-05, "loss": 1.3941, "step": 9500 }, { "epoch": 1.75, "eval_exact_match": 52.75307473982971, "eval_f1": 70.53593438103856, "step": 9600 }, { "epoch": 1.8, "eval_exact_match": 52.37464522232734, "eval_f1": 69.9459164585827, "step": 9900 }, { "epoch": 1.82, "learning_rate": 1.9629292008508053e-05, "loss": 1.4019, "step": 10000 }, { "epoch": 1.86, "eval_exact_match": 52.544938505203405, "eval_f1": 70.19906201457854, "step": 10200 }, { "epoch": 1.91, "learning_rate": 1.8109996961409907e-05, "loss": 1.4031, "step": 10500 }, { "epoch": 1.91, "eval_exact_match": 52.61116367076632, "eval_f1": 70.3522720546097, "step": 10500 }, { "epoch": 1.97, "eval_exact_match": 53.02743614001892, "eval_f1": 70.4807280516286, "step": 10800 }, { "epoch": 2.01, "learning_rate": 1.659070191431176e-05, "loss": 1.3625, "step": 11000 }, { "epoch": 2.02, "eval_exact_match": 51.88268684957427, "eval_f1": 69.74458138686404, "step": 11100 }, { "epoch": 2.08, "eval_exact_match": 51.66508987701041, "eval_f1": 69.55251413256356, "step": 11400 }, { "epoch": 2.1, "learning_rate": 1.5071406867213613e-05, "loss": 1.0261, "step": 11500 }, { "epoch": 2.13, "eval_exact_match": 51.12582781456954, "eval_f1": 69.45293644822506, "step": 11700 }, { "epoch": 2.19, "learning_rate": 1.3555150410209664e-05, "loss": 0.9969, "step": 12000 }, { "epoch": 2.19, "eval_exact_match": 51.84484389782403, "eval_f1": 69.82132514934786, "step": 12000 }, { "epoch": 2.24, "eval_exact_match": 51.106906338694415, "eval_f1": 69.26705027187695, "step": 12300 }, { "epoch": 2.28, "learning_rate": 1.2035855363111518e-05, "loss": 1.0186, "step": 12500 }, { "epoch": 2.3, "eval_exact_match": 51.59886471144749, "eval_f1": 69.76823383346489, "step": 12600 }, { "epoch": 2.35, "eval_exact_match": 51.45695364238411, "eval_f1": 69.76810827781901, "step": 12900 }, { "epoch": 2.37, "learning_rate": 1.051656031601337e-05, "loss": 1.0272, "step": 13000 }, { "epoch": 2.41, "eval_exact_match": 51.35288552507095, "eval_f1": 69.64753731877455, "step": 13200 }, { "epoch": 2.46, "learning_rate": 8.997265268915224e-06, "loss": 1.0469, "step": 13500 }, { "epoch": 2.46, "eval_exact_match": 51.324503311258276, "eval_f1": 69.46890832651873, "step": 13500 }, { "epoch": 2.52, "eval_exact_match": 51.343424787133394, "eval_f1": 69.28540200400431, "step": 13800 }, { "epoch": 2.55, "learning_rate": 7.4810088119112735e-06, "loss": 1.0324, "step": 14000 }, { "epoch": 2.57, "eval_exact_match": 51.27719962157048, "eval_f1": 69.21410385147476, "step": 14100 }, { "epoch": 2.63, "eval_exact_match": 51.400189214758754, "eval_f1": 69.48987451431931, "step": 14400 }, { "epoch": 2.64, "learning_rate": 5.964752354907324e-06, "loss": 1.026, "step": 14500 }, { "epoch": 2.68, "eval_exact_match": 51.14474929044466, "eval_f1": 69.48096843416343, "step": 14700 }, { "epoch": 2.73, "learning_rate": 4.445457307809177e-06, "loss": 1.0191, "step": 15000 }, { "epoch": 2.73, "eval_exact_match": 51.333964049195835, "eval_f1": 69.47772191155812, "step": 15000 }, { "epoch": 2.79, "eval_exact_match": 51.68401135288553, "eval_f1": 69.54969415984502, "step": 15300 }, { "epoch": 2.83, "learning_rate": 2.9261622607110303e-06, "loss": 0.9864, "step": 15500 }, { "epoch": 2.84, "eval_exact_match": 51.37180700094607, "eval_f1": 69.56621150352814, "step": 15600 }, { "epoch": 2.9, "eval_exact_match": 51.561021759697255, "eval_f1": 69.58651422829355, "step": 15900 }, { "epoch": 2.92, "learning_rate": 1.4068672136128838e-06, "loss": 1.0319, "step": 16000 }, { "epoch": 2.95, "eval_exact_match": 51.627246925260174, "eval_f1": 69.53955054930253, "step": 16200 }, { "epoch": 3.0, "step": 16455, "total_flos": 3819871314614016.0, "train_loss": 1.4580001696595593, "train_runtime": 2422.3692, "train_samples_per_second": 108.671, "train_steps_per_second": 6.793 } ], "max_steps": 16455, "num_train_epochs": 3, "total_flos": 3819871314614016.0, "trial_name": null, "trial_params": null }