|
{ |
|
"best_metric": 0.8535663673078441, |
|
"best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-1800", |
|
"epoch": 35.03649635036496, |
|
"eval_steps": 200, |
|
"global_step": 2400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.7299270072992701, |
|
"grad_norm": 2.289438486099243, |
|
"learning_rate": 5.5147058823529414e-05, |
|
"loss": 1.2878, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.4598540145985401, |
|
"grad_norm": 2.5048491954803467, |
|
"learning_rate": 0.00011029411764705883, |
|
"loss": 0.8322, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.18978102189781, |
|
"grad_norm": 11.18371295928955, |
|
"learning_rate": 0.00016544117647058823, |
|
"loss": 0.7897, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.9197080291970803, |
|
"grad_norm": 9.702393531799316, |
|
"learning_rate": 0.00022058823529411765, |
|
"loss": 0.7149, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.9197080291970803, |
|
"eval_accuracy": 0.7252747252747253, |
|
"eval_confusion_matrix": [ |
|
[ |
|
34, |
|
39, |
|
0, |
|
2 |
|
], |
|
[ |
|
7, |
|
62, |
|
6, |
|
0 |
|
], |
|
[ |
|
0, |
|
19, |
|
43, |
|
0 |
|
], |
|
[ |
|
0, |
|
2, |
|
0, |
|
59 |
|
] |
|
], |
|
"eval_f1": 0.7260427659517454, |
|
"eval_loss": 0.9058456420898438, |
|
"eval_precision": 0.7828499608603893, |
|
"eval_recall": 0.7252747252747253, |
|
"eval_runtime": 3.7417, |
|
"eval_samples_per_second": 72.962, |
|
"eval_steps_per_second": 0.802, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.6496350364963503, |
|
"grad_norm": 8.47255802154541, |
|
"learning_rate": 0.000275735294117647, |
|
"loss": 0.6917, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.37956204379562, |
|
"grad_norm": 16.689321517944336, |
|
"learning_rate": 0.0002999031705390845, |
|
"loss": 0.7264, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.109489051094891, |
|
"grad_norm": 1.7369310855865479, |
|
"learning_rate": 0.00029924913005299595, |
|
"loss": 0.6895, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.839416058394161, |
|
"grad_norm": 2.210369348526001, |
|
"learning_rate": 0.0002979807906935489, |
|
"loss": 0.6939, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.839416058394161, |
|
"eval_accuracy": 0.7509157509157509, |
|
"eval_confusion_matrix": [ |
|
[ |
|
66, |
|
2, |
|
0, |
|
7 |
|
], |
|
[ |
|
29, |
|
38, |
|
7, |
|
1 |
|
], |
|
[ |
|
2, |
|
20, |
|
40, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
61 |
|
] |
|
], |
|
"eval_f1": 0.7418721712792054, |
|
"eval_loss": 0.8107791543006897, |
|
"eval_precision": 0.7517378077426524, |
|
"eval_recall": 0.7509157509157509, |
|
"eval_runtime": 3.7702, |
|
"eval_samples_per_second": 72.409, |
|
"eval_steps_per_second": 0.796, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 6.569343065693431, |
|
"grad_norm": 2.1358511447906494, |
|
"learning_rate": 0.000296103372855926, |
|
"loss": 0.5986, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 7.299270072992701, |
|
"grad_norm": 13.704009056091309, |
|
"learning_rate": 0.0002936246038592886, |
|
"loss": 0.5932, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.02919708029197, |
|
"grad_norm": 2.032876968383789, |
|
"learning_rate": 0.00029055468614167716, |
|
"loss": 0.5633, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 8.75912408759124, |
|
"grad_norm": 28.525798797607422, |
|
"learning_rate": 0.00028690625526749705, |
|
"loss": 0.4941, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 8.75912408759124, |
|
"eval_accuracy": 0.8241758241758241, |
|
"eval_confusion_matrix": [ |
|
[ |
|
63, |
|
8, |
|
1, |
|
3 |
|
], |
|
[ |
|
8, |
|
50, |
|
17, |
|
0 |
|
], |
|
[ |
|
2, |
|
9, |
|
51, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
61 |
|
] |
|
], |
|
"eval_f1": 0.8222676260809794, |
|
"eval_loss": 0.7625077366828918, |
|
"eval_precision": 0.8229409839103053, |
|
"eval_recall": 0.8241758241758241, |
|
"eval_runtime": 3.757, |
|
"eval_samples_per_second": 72.664, |
|
"eval_steps_per_second": 0.799, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 9.489051094890511, |
|
"grad_norm": 0.18371808528900146, |
|
"learning_rate": 0.0002826943279204283, |
|
"loss": 0.4842, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 10.218978102189782, |
|
"grad_norm": 11.426072120666504, |
|
"learning_rate": 0.0002779362400958168, |
|
"loss": 0.4352, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 10.94890510948905, |
|
"grad_norm": 8.062601089477539, |
|
"learning_rate": 0.0002726515757469423, |
|
"loss": 0.4447, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 11.678832116788321, |
|
"grad_norm": 0.3985881805419922, |
|
"learning_rate": 0.00026686208617885055, |
|
"loss": 0.442, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 11.678832116788321, |
|
"eval_accuracy": 0.7985347985347986, |
|
"eval_confusion_matrix": [ |
|
[ |
|
66, |
|
6, |
|
1, |
|
2 |
|
], |
|
[ |
|
15, |
|
32, |
|
26, |
|
2 |
|
], |
|
[ |
|
2, |
|
1, |
|
59, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
61 |
|
] |
|
], |
|
"eval_f1": 0.781170020153555, |
|
"eval_loss": 0.9623217582702637, |
|
"eval_precision": 0.8093701586901577, |
|
"eval_recall": 0.7985347985347986, |
|
"eval_runtime": 3.774, |
|
"eval_samples_per_second": 72.337, |
|
"eval_steps_per_second": 0.795, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 12.408759124087592, |
|
"grad_norm": 38.726985931396484, |
|
"learning_rate": 0.0002605916005215186, |
|
"loss": 0.4504, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 13.138686131386862, |
|
"grad_norm": 0.026563748717308044, |
|
"learning_rate": 0.0002538659276508397, |
|
"loss": 0.3903, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 13.86861313868613, |
|
"grad_norm": 0.06770322471857071, |
|
"learning_rate": 0.0002467127499611136, |
|
"loss": 0.4094, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 14.598540145985401, |
|
"grad_norm": 1.2612749338150024, |
|
"learning_rate": 0.00023916150942626798, |
|
"loss": 0.4188, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 14.598540145985401, |
|
"eval_accuracy": 0.8315018315018315, |
|
"eval_confusion_matrix": [ |
|
[ |
|
60, |
|
9, |
|
2, |
|
4 |
|
], |
|
[ |
|
8, |
|
56, |
|
11, |
|
0 |
|
], |
|
[ |
|
1, |
|
11, |
|
50, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
61 |
|
] |
|
], |
|
"eval_f1": 0.8307422385946511, |
|
"eval_loss": 0.8534455299377441, |
|
"eval_precision": 0.8312566016541674, |
|
"eval_recall": 0.8315018315018315, |
|
"eval_runtime": 3.796, |
|
"eval_samples_per_second": 71.917, |
|
"eval_steps_per_second": 0.79, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 15.328467153284672, |
|
"grad_norm": 28.980899810791016, |
|
"learning_rate": 0.0002312432864187738, |
|
"loss": 0.3798, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 16.05839416058394, |
|
"grad_norm": 0.022609323263168335, |
|
"learning_rate": 0.0002229906717850284, |
|
"loss": 0.3672, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 16.78832116788321, |
|
"grad_norm": 0.02360348217189312, |
|
"learning_rate": 0.00021443763270373483, |
|
"loss": 0.3715, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 17.51824817518248, |
|
"grad_norm": 0.014020542614161968, |
|
"learning_rate": 0.0002056193728793941, |
|
"loss": 0.349, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 17.51824817518248, |
|
"eval_accuracy": 0.8351648351648352, |
|
"eval_confusion_matrix": [ |
|
[ |
|
62, |
|
10, |
|
1, |
|
2 |
|
], |
|
[ |
|
9, |
|
57, |
|
9, |
|
0 |
|
], |
|
[ |
|
2, |
|
12, |
|
48, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
61 |
|
] |
|
], |
|
"eval_f1": 0.8350675728555914, |
|
"eval_loss": 0.8131950497627258, |
|
"eval_precision": 0.8358475863688551, |
|
"eval_recall": 0.8351648351648352, |
|
"eval_runtime": 3.7788, |
|
"eval_samples_per_second": 72.246, |
|
"eval_steps_per_second": 0.794, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 18.248175182481752, |
|
"grad_norm": 0.006028232164680958, |
|
"learning_rate": 0.0001965721876463452, |
|
"loss": 0.3491, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 18.978102189781023, |
|
"grad_norm": 0.008285734802484512, |
|
"learning_rate": 0.00018733331457973358, |
|
"loss": 0.3489, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 19.708029197080293, |
|
"grad_norm": 0.008053851313889027, |
|
"learning_rate": 0.00017794078022828275, |
|
"loss": 0.3497, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 20.437956204379564, |
|
"grad_norm": 0.003234422067180276, |
|
"learning_rate": 0.00016843324359970712, |
|
"loss": 0.3488, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 20.437956204379564, |
|
"eval_accuracy": 0.8461538461538461, |
|
"eval_confusion_matrix": [ |
|
[ |
|
61, |
|
11, |
|
1, |
|
2 |
|
], |
|
[ |
|
8, |
|
57, |
|
10, |
|
0 |
|
], |
|
[ |
|
0, |
|
10, |
|
52, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
61 |
|
] |
|
], |
|
"eval_f1": 0.8462423027109934, |
|
"eval_loss": 0.7859560251235962, |
|
"eval_precision": 0.8474363933035696, |
|
"eval_recall": 0.8461538461538461, |
|
"eval_runtime": 3.7947, |
|
"eval_samples_per_second": 71.942, |
|
"eval_steps_per_second": 0.791, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 21.16788321167883, |
|
"grad_norm": 0.004595920909196138, |
|
"learning_rate": 0.00015884983704296757, |
|
"loss": 0.3488, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 21.8978102189781, |
|
"grad_norm": 0.002511706668883562, |
|
"learning_rate": 0.00014923000518228847, |
|
"loss": 0.3488, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 22.62773722627737, |
|
"grad_norm": 0.002340014325454831, |
|
"learning_rate": 0.00013961334256587125, |
|
"loss": 0.3488, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 23.357664233576642, |
|
"grad_norm": 0.0028287076856940985, |
|
"learning_rate": 0.00013003943069753198, |
|
"loss": 0.3488, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 23.357664233576642, |
|
"eval_accuracy": 0.8461538461538461, |
|
"eval_confusion_matrix": [ |
|
[ |
|
61, |
|
11, |
|
1, |
|
2 |
|
], |
|
[ |
|
8, |
|
57, |
|
10, |
|
0 |
|
], |
|
[ |
|
0, |
|
10, |
|
52, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
61 |
|
] |
|
], |
|
"eval_f1": 0.8462423027109934, |
|
"eval_loss": 0.7856015563011169, |
|
"eval_precision": 0.8474363933035696, |
|
"eval_recall": 0.8461538461538461, |
|
"eval_runtime": 3.7861, |
|
"eval_samples_per_second": 72.105, |
|
"eval_steps_per_second": 0.792, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 24.087591240875913, |
|
"grad_norm": 0.0027960864827036858, |
|
"learning_rate": 0.00012054767512202832, |
|
"loss": 0.3488, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 24.817518248175183, |
|
"grad_norm": 0.0033820979297161102, |
|
"learning_rate": 0.00011117714323462186, |
|
"loss": 0.3488, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 25.547445255474454, |
|
"grad_norm": 0.0034969367552548647, |
|
"learning_rate": 0.00010196640348243974, |
|
"loss": 0.3488, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 26.277372262773724, |
|
"grad_norm": 0.0014958898536860943, |
|
"learning_rate": 9.295336661947115e-05, |
|
"loss": 0.3488, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 26.277372262773724, |
|
"eval_accuracy": 0.8534798534798534, |
|
"eval_confusion_matrix": [ |
|
[ |
|
61, |
|
11, |
|
1, |
|
2 |
|
], |
|
[ |
|
7, |
|
58, |
|
10, |
|
0 |
|
], |
|
[ |
|
0, |
|
9, |
|
53, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
61 |
|
] |
|
], |
|
"eval_f1": 0.8535663673078441, |
|
"eval_loss": 0.7831193804740906, |
|
"eval_precision": 0.8551497604301419, |
|
"eval_recall": 0.8534798534798534, |
|
"eval_runtime": 3.7976, |
|
"eval_samples_per_second": 71.888, |
|
"eval_steps_per_second": 0.79, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 27.00729927007299, |
|
"grad_norm": 0.004900149069726467, |
|
"learning_rate": 8.417512966858319e-05, |
|
"loss": 0.3488, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 27.73722627737226, |
|
"grad_norm": 0.0018804975552484393, |
|
"learning_rate": 7.566782323279578e-05, |
|
"loss": 0.3488, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 28.467153284671532, |
|
"grad_norm": 0.0019178036600351334, |
|
"learning_rate": 6.746646278427247e-05, |
|
"loss": 0.3488, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 29.197080291970803, |
|
"grad_norm": 0.001025234698317945, |
|
"learning_rate": 5.960480454311155e-05, |
|
"loss": 0.3488, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 29.197080291970803, |
|
"eval_accuracy": 0.8498168498168498, |
|
"eval_confusion_matrix": [ |
|
[ |
|
61, |
|
11, |
|
1, |
|
2 |
|
], |
|
[ |
|
8, |
|
57, |
|
10, |
|
0 |
|
], |
|
[ |
|
0, |
|
9, |
|
53, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
61 |
|
] |
|
], |
|
"eval_f1": 0.8496942339108237, |
|
"eval_loss": 0.7866398692131042, |
|
"eval_precision": 0.8506632615716467, |
|
"eval_recall": 0.8498168498168498, |
|
"eval_runtime": 3.7892, |
|
"eval_samples_per_second": 72.047, |
|
"eval_steps_per_second": 0.792, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 29.927007299270073, |
|
"grad_norm": 0.0027674695011228323, |
|
"learning_rate": 5.2115206539129e-05, |
|
"loss": 0.3488, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 30.656934306569344, |
|
"grad_norm": 0.0016269112238660455, |
|
"learning_rate": 4.5028495428494483e-05, |
|
"loss": 0.3488, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 31.386861313868614, |
|
"grad_norm": 0.0019462064374238253, |
|
"learning_rate": 3.837383961339246e-05, |
|
"loss": 0.3488, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 32.11678832116788, |
|
"grad_norm": 0.0011992512736469507, |
|
"learning_rate": 3.21786291869402e-05, |
|
"loss": 0.3488, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 32.11678832116788, |
|
"eval_accuracy": 0.8534798534798534, |
|
"eval_confusion_matrix": [ |
|
[ |
|
61, |
|
11, |
|
1, |
|
2 |
|
], |
|
[ |
|
8, |
|
57, |
|
10, |
|
0 |
|
], |
|
[ |
|
0, |
|
8, |
|
54, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
61 |
|
] |
|
], |
|
"eval_f1": 0.8531308487327289, |
|
"eval_loss": 0.7856839895248413, |
|
"eval_precision": 0.8539396783782831, |
|
"eval_recall": 0.8534798534798534, |
|
"eval_runtime": 3.787, |
|
"eval_samples_per_second": 72.088, |
|
"eval_steps_per_second": 0.792, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 32.846715328467155, |
|
"grad_norm": 0.0029719627927988768, |
|
"learning_rate": 2.6468363197499458e-05, |
|
"loss": 0.3488, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 33.57664233576642, |
|
"grad_norm": 0.0012639207998290658, |
|
"learning_rate": 2.1266544696395582e-05, |
|
"loss": 0.3488, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 34.306569343065696, |
|
"grad_norm": 0.0011322245700284839, |
|
"learning_rate": 1.659458400101879e-05, |
|
"loss": 0.3488, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 35.03649635036496, |
|
"grad_norm": 0.002087602624669671, |
|
"learning_rate": 1.2471710571470578e-05, |
|
"loss": 0.3488, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 35.03649635036496, |
|
"eval_accuracy": 0.8498168498168498, |
|
"eval_confusion_matrix": [ |
|
[ |
|
61, |
|
11, |
|
1, |
|
2 |
|
], |
|
[ |
|
8, |
|
57, |
|
10, |
|
0 |
|
], |
|
[ |
|
0, |
|
9, |
|
53, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
61 |
|
] |
|
], |
|
"eval_f1": 0.8496942339108237, |
|
"eval_loss": 0.7856935858726501, |
|
"eval_precision": 0.8506632615716467, |
|
"eval_recall": 0.8498168498168498, |
|
"eval_runtime": 3.788, |
|
"eval_samples_per_second": 72.069, |
|
"eval_steps_per_second": 0.792, |
|
"step": 2400 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 2720, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 40, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.001 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.68527123264e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|