Marcos12886's picture
Subir modelo definitivo
38513ec
{
"best_metric": 0.8535663673078441,
"best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-1800",
"epoch": 35.03649635036496,
"eval_steps": 200,
"global_step": 2400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.7299270072992701,
"grad_norm": 2.289438486099243,
"learning_rate": 5.5147058823529414e-05,
"loss": 1.2878,
"step": 50
},
{
"epoch": 1.4598540145985401,
"grad_norm": 2.5048491954803467,
"learning_rate": 0.00011029411764705883,
"loss": 0.8322,
"step": 100
},
{
"epoch": 2.18978102189781,
"grad_norm": 11.18371295928955,
"learning_rate": 0.00016544117647058823,
"loss": 0.7897,
"step": 150
},
{
"epoch": 2.9197080291970803,
"grad_norm": 9.702393531799316,
"learning_rate": 0.00022058823529411765,
"loss": 0.7149,
"step": 200
},
{
"epoch": 2.9197080291970803,
"eval_accuracy": 0.7252747252747253,
"eval_confusion_matrix": [
[
34,
39,
0,
2
],
[
7,
62,
6,
0
],
[
0,
19,
43,
0
],
[
0,
2,
0,
59
]
],
"eval_f1": 0.7260427659517454,
"eval_loss": 0.9058456420898438,
"eval_precision": 0.7828499608603893,
"eval_recall": 0.7252747252747253,
"eval_runtime": 3.7417,
"eval_samples_per_second": 72.962,
"eval_steps_per_second": 0.802,
"step": 200
},
{
"epoch": 3.6496350364963503,
"grad_norm": 8.47255802154541,
"learning_rate": 0.000275735294117647,
"loss": 0.6917,
"step": 250
},
{
"epoch": 4.37956204379562,
"grad_norm": 16.689321517944336,
"learning_rate": 0.0002999031705390845,
"loss": 0.7264,
"step": 300
},
{
"epoch": 5.109489051094891,
"grad_norm": 1.7369310855865479,
"learning_rate": 0.00029924913005299595,
"loss": 0.6895,
"step": 350
},
{
"epoch": 5.839416058394161,
"grad_norm": 2.210369348526001,
"learning_rate": 0.0002979807906935489,
"loss": 0.6939,
"step": 400
},
{
"epoch": 5.839416058394161,
"eval_accuracy": 0.7509157509157509,
"eval_confusion_matrix": [
[
66,
2,
0,
7
],
[
29,
38,
7,
1
],
[
2,
20,
40,
0
],
[
0,
0,
0,
61
]
],
"eval_f1": 0.7418721712792054,
"eval_loss": 0.8107791543006897,
"eval_precision": 0.7517378077426524,
"eval_recall": 0.7509157509157509,
"eval_runtime": 3.7702,
"eval_samples_per_second": 72.409,
"eval_steps_per_second": 0.796,
"step": 400
},
{
"epoch": 6.569343065693431,
"grad_norm": 2.1358511447906494,
"learning_rate": 0.000296103372855926,
"loss": 0.5986,
"step": 450
},
{
"epoch": 7.299270072992701,
"grad_norm": 13.704009056091309,
"learning_rate": 0.0002936246038592886,
"loss": 0.5932,
"step": 500
},
{
"epoch": 8.02919708029197,
"grad_norm": 2.032876968383789,
"learning_rate": 0.00029055468614167716,
"loss": 0.5633,
"step": 550
},
{
"epoch": 8.75912408759124,
"grad_norm": 28.525798797607422,
"learning_rate": 0.00028690625526749705,
"loss": 0.4941,
"step": 600
},
{
"epoch": 8.75912408759124,
"eval_accuracy": 0.8241758241758241,
"eval_confusion_matrix": [
[
63,
8,
1,
3
],
[
8,
50,
17,
0
],
[
2,
9,
51,
0
],
[
0,
0,
0,
61
]
],
"eval_f1": 0.8222676260809794,
"eval_loss": 0.7625077366828918,
"eval_precision": 0.8229409839103053,
"eval_recall": 0.8241758241758241,
"eval_runtime": 3.757,
"eval_samples_per_second": 72.664,
"eval_steps_per_second": 0.799,
"step": 600
},
{
"epoch": 9.489051094890511,
"grad_norm": 0.18371808528900146,
"learning_rate": 0.0002826943279204283,
"loss": 0.4842,
"step": 650
},
{
"epoch": 10.218978102189782,
"grad_norm": 11.426072120666504,
"learning_rate": 0.0002779362400958168,
"loss": 0.4352,
"step": 700
},
{
"epoch": 10.94890510948905,
"grad_norm": 8.062601089477539,
"learning_rate": 0.0002726515757469423,
"loss": 0.4447,
"step": 750
},
{
"epoch": 11.678832116788321,
"grad_norm": 0.3985881805419922,
"learning_rate": 0.00026686208617885055,
"loss": 0.442,
"step": 800
},
{
"epoch": 11.678832116788321,
"eval_accuracy": 0.7985347985347986,
"eval_confusion_matrix": [
[
66,
6,
1,
2
],
[
15,
32,
26,
2
],
[
2,
1,
59,
0
],
[
0,
0,
0,
61
]
],
"eval_f1": 0.781170020153555,
"eval_loss": 0.9623217582702637,
"eval_precision": 0.8093701586901577,
"eval_recall": 0.7985347985347986,
"eval_runtime": 3.774,
"eval_samples_per_second": 72.337,
"eval_steps_per_second": 0.795,
"step": 800
},
{
"epoch": 12.408759124087592,
"grad_norm": 38.726985931396484,
"learning_rate": 0.0002605916005215186,
"loss": 0.4504,
"step": 850
},
{
"epoch": 13.138686131386862,
"grad_norm": 0.026563748717308044,
"learning_rate": 0.0002538659276508397,
"loss": 0.3903,
"step": 900
},
{
"epoch": 13.86861313868613,
"grad_norm": 0.06770322471857071,
"learning_rate": 0.0002467127499611136,
"loss": 0.4094,
"step": 950
},
{
"epoch": 14.598540145985401,
"grad_norm": 1.2612749338150024,
"learning_rate": 0.00023916150942626798,
"loss": 0.4188,
"step": 1000
},
{
"epoch": 14.598540145985401,
"eval_accuracy": 0.8315018315018315,
"eval_confusion_matrix": [
[
60,
9,
2,
4
],
[
8,
56,
11,
0
],
[
1,
11,
50,
0
],
[
0,
0,
0,
61
]
],
"eval_f1": 0.8307422385946511,
"eval_loss": 0.8534455299377441,
"eval_precision": 0.8312566016541674,
"eval_recall": 0.8315018315018315,
"eval_runtime": 3.796,
"eval_samples_per_second": 71.917,
"eval_steps_per_second": 0.79,
"step": 1000
},
{
"epoch": 15.328467153284672,
"grad_norm": 28.980899810791016,
"learning_rate": 0.0002312432864187738,
"loss": 0.3798,
"step": 1050
},
{
"epoch": 16.05839416058394,
"grad_norm": 0.022609323263168335,
"learning_rate": 0.0002229906717850284,
"loss": 0.3672,
"step": 1100
},
{
"epoch": 16.78832116788321,
"grad_norm": 0.02360348217189312,
"learning_rate": 0.00021443763270373483,
"loss": 0.3715,
"step": 1150
},
{
"epoch": 17.51824817518248,
"grad_norm": 0.014020542614161968,
"learning_rate": 0.0002056193728793941,
"loss": 0.349,
"step": 1200
},
{
"epoch": 17.51824817518248,
"eval_accuracy": 0.8351648351648352,
"eval_confusion_matrix": [
[
62,
10,
1,
2
],
[
9,
57,
9,
0
],
[
2,
12,
48,
0
],
[
0,
0,
0,
61
]
],
"eval_f1": 0.8350675728555914,
"eval_loss": 0.8131950497627258,
"eval_precision": 0.8358475863688551,
"eval_recall": 0.8351648351648352,
"eval_runtime": 3.7788,
"eval_samples_per_second": 72.246,
"eval_steps_per_second": 0.794,
"step": 1200
},
{
"epoch": 18.248175182481752,
"grad_norm": 0.006028232164680958,
"learning_rate": 0.0001965721876463452,
"loss": 0.3491,
"step": 1250
},
{
"epoch": 18.978102189781023,
"grad_norm": 0.008285734802484512,
"learning_rate": 0.00018733331457973358,
"loss": 0.3489,
"step": 1300
},
{
"epoch": 19.708029197080293,
"grad_norm": 0.008053851313889027,
"learning_rate": 0.00017794078022828275,
"loss": 0.3497,
"step": 1350
},
{
"epoch": 20.437956204379564,
"grad_norm": 0.003234422067180276,
"learning_rate": 0.00016843324359970712,
"loss": 0.3488,
"step": 1400
},
{
"epoch": 20.437956204379564,
"eval_accuracy": 0.8461538461538461,
"eval_confusion_matrix": [
[
61,
11,
1,
2
],
[
8,
57,
10,
0
],
[
0,
10,
52,
0
],
[
0,
0,
0,
61
]
],
"eval_f1": 0.8462423027109934,
"eval_loss": 0.7859560251235962,
"eval_precision": 0.8474363933035696,
"eval_recall": 0.8461538461538461,
"eval_runtime": 3.7947,
"eval_samples_per_second": 71.942,
"eval_steps_per_second": 0.791,
"step": 1400
},
{
"epoch": 21.16788321167883,
"grad_norm": 0.004595920909196138,
"learning_rate": 0.00015884983704296757,
"loss": 0.3488,
"step": 1450
},
{
"epoch": 21.8978102189781,
"grad_norm": 0.002511706668883562,
"learning_rate": 0.00014923000518228847,
"loss": 0.3488,
"step": 1500
},
{
"epoch": 22.62773722627737,
"grad_norm": 0.002340014325454831,
"learning_rate": 0.00013961334256587125,
"loss": 0.3488,
"step": 1550
},
{
"epoch": 23.357664233576642,
"grad_norm": 0.0028287076856940985,
"learning_rate": 0.00013003943069753198,
"loss": 0.3488,
"step": 1600
},
{
"epoch": 23.357664233576642,
"eval_accuracy": 0.8461538461538461,
"eval_confusion_matrix": [
[
61,
11,
1,
2
],
[
8,
57,
10,
0
],
[
0,
10,
52,
0
],
[
0,
0,
0,
61
]
],
"eval_f1": 0.8462423027109934,
"eval_loss": 0.7856015563011169,
"eval_precision": 0.8474363933035696,
"eval_recall": 0.8461538461538461,
"eval_runtime": 3.7861,
"eval_samples_per_second": 72.105,
"eval_steps_per_second": 0.792,
"step": 1600
},
{
"epoch": 24.087591240875913,
"grad_norm": 0.0027960864827036858,
"learning_rate": 0.00012054767512202832,
"loss": 0.3488,
"step": 1650
},
{
"epoch": 24.817518248175183,
"grad_norm": 0.0033820979297161102,
"learning_rate": 0.00011117714323462186,
"loss": 0.3488,
"step": 1700
},
{
"epoch": 25.547445255474454,
"grad_norm": 0.0034969367552548647,
"learning_rate": 0.00010196640348243974,
"loss": 0.3488,
"step": 1750
},
{
"epoch": 26.277372262773724,
"grad_norm": 0.0014958898536860943,
"learning_rate": 9.295336661947115e-05,
"loss": 0.3488,
"step": 1800
},
{
"epoch": 26.277372262773724,
"eval_accuracy": 0.8534798534798534,
"eval_confusion_matrix": [
[
61,
11,
1,
2
],
[
7,
58,
10,
0
],
[
0,
9,
53,
0
],
[
0,
0,
0,
61
]
],
"eval_f1": 0.8535663673078441,
"eval_loss": 0.7831193804740906,
"eval_precision": 0.8551497604301419,
"eval_recall": 0.8534798534798534,
"eval_runtime": 3.7976,
"eval_samples_per_second": 71.888,
"eval_steps_per_second": 0.79,
"step": 1800
},
{
"epoch": 27.00729927007299,
"grad_norm": 0.004900149069726467,
"learning_rate": 8.417512966858319e-05,
"loss": 0.3488,
"step": 1850
},
{
"epoch": 27.73722627737226,
"grad_norm": 0.0018804975552484393,
"learning_rate": 7.566782323279578e-05,
"loss": 0.3488,
"step": 1900
},
{
"epoch": 28.467153284671532,
"grad_norm": 0.0019178036600351334,
"learning_rate": 6.746646278427247e-05,
"loss": 0.3488,
"step": 1950
},
{
"epoch": 29.197080291970803,
"grad_norm": 0.001025234698317945,
"learning_rate": 5.960480454311155e-05,
"loss": 0.3488,
"step": 2000
},
{
"epoch": 29.197080291970803,
"eval_accuracy": 0.8498168498168498,
"eval_confusion_matrix": [
[
61,
11,
1,
2
],
[
8,
57,
10,
0
],
[
0,
9,
53,
0
],
[
0,
0,
0,
61
]
],
"eval_f1": 0.8496942339108237,
"eval_loss": 0.7866398692131042,
"eval_precision": 0.8506632615716467,
"eval_recall": 0.8498168498168498,
"eval_runtime": 3.7892,
"eval_samples_per_second": 72.047,
"eval_steps_per_second": 0.792,
"step": 2000
},
{
"epoch": 29.927007299270073,
"grad_norm": 0.0027674695011228323,
"learning_rate": 5.2115206539129e-05,
"loss": 0.3488,
"step": 2050
},
{
"epoch": 30.656934306569344,
"grad_norm": 0.0016269112238660455,
"learning_rate": 4.5028495428494483e-05,
"loss": 0.3488,
"step": 2100
},
{
"epoch": 31.386861313868614,
"grad_norm": 0.0019462064374238253,
"learning_rate": 3.837383961339246e-05,
"loss": 0.3488,
"step": 2150
},
{
"epoch": 32.11678832116788,
"grad_norm": 0.0011992512736469507,
"learning_rate": 3.21786291869402e-05,
"loss": 0.3488,
"step": 2200
},
{
"epoch": 32.11678832116788,
"eval_accuracy": 0.8534798534798534,
"eval_confusion_matrix": [
[
61,
11,
1,
2
],
[
8,
57,
10,
0
],
[
0,
8,
54,
0
],
[
0,
0,
0,
61
]
],
"eval_f1": 0.8531308487327289,
"eval_loss": 0.7856839895248413,
"eval_precision": 0.8539396783782831,
"eval_recall": 0.8534798534798534,
"eval_runtime": 3.787,
"eval_samples_per_second": 72.088,
"eval_steps_per_second": 0.792,
"step": 2200
},
{
"epoch": 32.846715328467155,
"grad_norm": 0.0029719627927988768,
"learning_rate": 2.6468363197499458e-05,
"loss": 0.3488,
"step": 2250
},
{
"epoch": 33.57664233576642,
"grad_norm": 0.0012639207998290658,
"learning_rate": 2.1266544696395582e-05,
"loss": 0.3488,
"step": 2300
},
{
"epoch": 34.306569343065696,
"grad_norm": 0.0011322245700284839,
"learning_rate": 1.659458400101879e-05,
"loss": 0.3488,
"step": 2350
},
{
"epoch": 35.03649635036496,
"grad_norm": 0.002087602624669671,
"learning_rate": 1.2471710571470578e-05,
"loss": 0.3488,
"step": 2400
},
{
"epoch": 35.03649635036496,
"eval_accuracy": 0.8498168498168498,
"eval_confusion_matrix": [
[
61,
11,
1,
2
],
[
8,
57,
10,
0
],
[
0,
9,
53,
0
],
[
0,
0,
0,
61
]
],
"eval_f1": 0.8496942339108237,
"eval_loss": 0.7856935858726501,
"eval_precision": 0.8506632615716467,
"eval_recall": 0.8498168498168498,
"eval_runtime": 3.788,
"eval_samples_per_second": 72.069,
"eval_steps_per_second": 0.792,
"step": 2400
}
],
"logging_steps": 50,
"max_steps": 2720,
"num_input_tokens_seen": 0,
"num_train_epochs": 40,
"save_steps": 200,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.001
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 8.68527123264e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}