|
{ |
|
"best_metric": 0.8627769756077204, |
|
"best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-81", |
|
"epoch": 24.0, |
|
"eval_steps": 500, |
|
"global_step": 108, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"eval_accuracy": 0.63003663003663, |
|
"eval_confusion_matrix": [ |
|
[ |
|
53, |
|
0, |
|
16, |
|
3 |
|
], |
|
[ |
|
28, |
|
1, |
|
31, |
|
0 |
|
], |
|
[ |
|
13, |
|
0, |
|
62, |
|
1 |
|
], |
|
[ |
|
3, |
|
0, |
|
6, |
|
56 |
|
] |
|
], |
|
"eval_f1": 0.566693372031096, |
|
"eval_loss": 1.31327486038208, |
|
"eval_precision": 0.7361933549293478, |
|
"eval_recall": 0.63003663003663, |
|
"eval_runtime": 2.8436, |
|
"eval_samples_per_second": 96.005, |
|
"eval_steps_per_second": 1.055, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6593406593406593, |
|
"eval_confusion_matrix": [ |
|
[ |
|
41, |
|
10, |
|
16, |
|
5 |
|
], |
|
[ |
|
21, |
|
1, |
|
38, |
|
0 |
|
], |
|
[ |
|
1, |
|
2, |
|
73, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.595713773130153, |
|
"eval_loss": 0.9242589473724365, |
|
"eval_precision": 0.5696509512811321, |
|
"eval_recall": 0.6593406593406593, |
|
"eval_runtime": 2.8668, |
|
"eval_samples_per_second": 95.227, |
|
"eval_steps_per_second": 1.046, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 2.888888888888889, |
|
"eval_accuracy": 0.717948717948718, |
|
"eval_confusion_matrix": [ |
|
[ |
|
52, |
|
8, |
|
7, |
|
5 |
|
], |
|
[ |
|
18, |
|
5, |
|
37, |
|
0 |
|
], |
|
[ |
|
1, |
|
0, |
|
75, |
|
0 |
|
], |
|
[ |
|
1, |
|
0, |
|
0, |
|
64 |
|
] |
|
], |
|
"eval_f1": 0.6621621567125869, |
|
"eval_loss": 0.8232662081718445, |
|
"eval_precision": 0.671303801513745, |
|
"eval_recall": 0.717948717948718, |
|
"eval_runtime": 2.8711, |
|
"eval_samples_per_second": 95.084, |
|
"eval_steps_per_second": 1.045, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6959706959706959, |
|
"eval_confusion_matrix": [ |
|
[ |
|
45, |
|
9, |
|
13, |
|
5 |
|
], |
|
[ |
|
13, |
|
5, |
|
42, |
|
0 |
|
], |
|
[ |
|
1, |
|
0, |
|
75, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.6428796900399459, |
|
"eval_loss": 0.951453447341919, |
|
"eval_precision": 0.6613447705829899, |
|
"eval_recall": 0.6959706959706959, |
|
"eval_runtime": 2.8813, |
|
"eval_samples_per_second": 94.75, |
|
"eval_steps_per_second": 1.041, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 4.888888888888889, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_confusion_matrix": [ |
|
[ |
|
16, |
|
49, |
|
3, |
|
4 |
|
], |
|
[ |
|
1, |
|
26, |
|
33, |
|
0 |
|
], |
|
[ |
|
0, |
|
1, |
|
75, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.6331541584523206, |
|
"eval_loss": 1.0080645084381104, |
|
"eval_precision": 0.7358031394913882, |
|
"eval_recall": 0.6666666666666666, |
|
"eval_runtime": 2.9108, |
|
"eval_samples_per_second": 93.789, |
|
"eval_steps_per_second": 1.031, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.652014652014652, |
|
"eval_confusion_matrix": [ |
|
[ |
|
66, |
|
0, |
|
0, |
|
6 |
|
], |
|
[ |
|
36, |
|
16, |
|
8, |
|
0 |
|
], |
|
[ |
|
10, |
|
35, |
|
31, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.6302407955860642, |
|
"eval_loss": 0.9598046541213989, |
|
"eval_precision": 0.6636241740077424, |
|
"eval_recall": 0.652014652014652, |
|
"eval_runtime": 2.9773, |
|
"eval_samples_per_second": 91.693, |
|
"eval_steps_per_second": 1.008, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 6.888888888888889, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_confusion_matrix": [ |
|
[ |
|
66, |
|
0, |
|
0, |
|
6 |
|
], |
|
[ |
|
29, |
|
21, |
|
10, |
|
0 |
|
], |
|
[ |
|
3, |
|
15, |
|
58, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.7527855586679116, |
|
"eval_loss": 0.7898163795471191, |
|
"eval_precision": 0.7612467677056892, |
|
"eval_recall": 0.7692307692307693, |
|
"eval_runtime": 2.8918, |
|
"eval_samples_per_second": 94.404, |
|
"eval_steps_per_second": 1.037, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7802197802197802, |
|
"eval_confusion_matrix": [ |
|
[ |
|
66, |
|
0, |
|
0, |
|
6 |
|
], |
|
[ |
|
25, |
|
15, |
|
19, |
|
1 |
|
], |
|
[ |
|
1, |
|
6, |
|
68, |
|
1 |
|
], |
|
[ |
|
1, |
|
0, |
|
0, |
|
64 |
|
] |
|
], |
|
"eval_f1": 0.7471179200524057, |
|
"eval_loss": 0.7337484359741211, |
|
"eval_precision": 0.7733845922309294, |
|
"eval_recall": 0.7802197802197802, |
|
"eval_runtime": 2.9006, |
|
"eval_samples_per_second": 94.119, |
|
"eval_steps_per_second": 1.034, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 8.88888888888889, |
|
"eval_accuracy": 0.7912087912087912, |
|
"eval_confusion_matrix": [ |
|
[ |
|
57, |
|
8, |
|
3, |
|
4 |
|
], |
|
[ |
|
13, |
|
24, |
|
23, |
|
0 |
|
], |
|
[ |
|
0, |
|
5, |
|
71, |
|
0 |
|
], |
|
[ |
|
1, |
|
0, |
|
0, |
|
64 |
|
] |
|
], |
|
"eval_f1": 0.7766564722186922, |
|
"eval_loss": 0.7148727178573608, |
|
"eval_precision": 0.7821504483074875, |
|
"eval_recall": 0.7912087912087912, |
|
"eval_runtime": 2.8866, |
|
"eval_samples_per_second": 94.576, |
|
"eval_steps_per_second": 1.039, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8168498168498168, |
|
"eval_confusion_matrix": [ |
|
[ |
|
51, |
|
8, |
|
1, |
|
12 |
|
], |
|
[ |
|
10, |
|
36, |
|
14, |
|
0 |
|
], |
|
[ |
|
0, |
|
5, |
|
71, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.8094336953840884, |
|
"eval_loss": 0.7574812173843384, |
|
"eval_precision": 0.8127936625684181, |
|
"eval_recall": 0.8168498168498168, |
|
"eval_runtime": 2.8967, |
|
"eval_samples_per_second": 94.245, |
|
"eval_steps_per_second": 1.036, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 10.88888888888889, |
|
"eval_accuracy": 0.8021978021978022, |
|
"eval_confusion_matrix": [ |
|
[ |
|
48, |
|
18, |
|
2, |
|
4 |
|
], |
|
[ |
|
4, |
|
41, |
|
15, |
|
0 |
|
], |
|
[ |
|
0, |
|
7, |
|
69, |
|
0 |
|
], |
|
[ |
|
3, |
|
0, |
|
1, |
|
61 |
|
] |
|
], |
|
"eval_f1": 0.801525180147331, |
|
"eval_loss": 0.7140281200408936, |
|
"eval_precision": 0.8109338936925145, |
|
"eval_recall": 0.8021978021978022, |
|
"eval_runtime": 2.917, |
|
"eval_samples_per_second": 93.589, |
|
"eval_steps_per_second": 1.028, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8534798534798534, |
|
"eval_confusion_matrix": [ |
|
[ |
|
56, |
|
12, |
|
0, |
|
4 |
|
], |
|
[ |
|
6, |
|
43, |
|
11, |
|
0 |
|
], |
|
[ |
|
0, |
|
6, |
|
70, |
|
0 |
|
], |
|
[ |
|
1, |
|
0, |
|
0, |
|
64 |
|
] |
|
], |
|
"eval_f1": 0.8523994617102314, |
|
"eval_loss": 0.6672152280807495, |
|
"eval_precision": 0.8540313732642031, |
|
"eval_recall": 0.8534798534798534, |
|
"eval_runtime": 2.8969, |
|
"eval_samples_per_second": 94.238, |
|
"eval_steps_per_second": 1.036, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 12.88888888888889, |
|
"eval_accuracy": 0.8498168498168498, |
|
"eval_confusion_matrix": [ |
|
[ |
|
60, |
|
8, |
|
0, |
|
4 |
|
], |
|
[ |
|
8, |
|
38, |
|
14, |
|
0 |
|
], |
|
[ |
|
0, |
|
6, |
|
70, |
|
0 |
|
], |
|
[ |
|
1, |
|
0, |
|
0, |
|
64 |
|
] |
|
], |
|
"eval_f1": 0.8463270052615757, |
|
"eval_loss": 0.6432910561561584, |
|
"eval_precision": 0.8460243715014519, |
|
"eval_recall": 0.8498168498168498, |
|
"eval_runtime": 3.0281, |
|
"eval_samples_per_second": 90.156, |
|
"eval_steps_per_second": 0.991, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8278388278388278, |
|
"eval_confusion_matrix": [ |
|
[ |
|
54, |
|
13, |
|
1, |
|
4 |
|
], |
|
[ |
|
7, |
|
44, |
|
9, |
|
0 |
|
], |
|
[ |
|
0, |
|
11, |
|
65, |
|
0 |
|
], |
|
[ |
|
1, |
|
1, |
|
0, |
|
63 |
|
] |
|
], |
|
"eval_f1": 0.82943590265942, |
|
"eval_loss": 0.7395206093788147, |
|
"eval_precision": 0.8350059217447382, |
|
"eval_recall": 0.8278388278388278, |
|
"eval_runtime": 2.9869, |
|
"eval_samples_per_second": 91.399, |
|
"eval_steps_per_second": 1.004, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 14.88888888888889, |
|
"eval_accuracy": 0.8315018315018315, |
|
"eval_confusion_matrix": [ |
|
[ |
|
54, |
|
13, |
|
1, |
|
4 |
|
], |
|
[ |
|
10, |
|
39, |
|
11, |
|
0 |
|
], |
|
[ |
|
0, |
|
7, |
|
69, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.8291811389886823, |
|
"eval_loss": 0.7115849852561951, |
|
"eval_precision": 0.829244108966536, |
|
"eval_recall": 0.8315018315018315, |
|
"eval_runtime": 2.9616, |
|
"eval_samples_per_second": 92.181, |
|
"eval_steps_per_second": 1.013, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8315018315018315, |
|
"eval_confusion_matrix": [ |
|
[ |
|
60, |
|
7, |
|
1, |
|
4 |
|
], |
|
[ |
|
10, |
|
39, |
|
11, |
|
0 |
|
], |
|
[ |
|
0, |
|
11, |
|
65, |
|
0 |
|
], |
|
[ |
|
1, |
|
1, |
|
0, |
|
63 |
|
] |
|
], |
|
"eval_f1": 0.8304073820984628, |
|
"eval_loss": 0.7295921444892883, |
|
"eval_precision": 0.8295426562258641, |
|
"eval_recall": 0.8315018315018315, |
|
"eval_runtime": 2.8758, |
|
"eval_samples_per_second": 94.932, |
|
"eval_steps_per_second": 1.043, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 16.88888888888889, |
|
"eval_accuracy": 0.8644688644688645, |
|
"eval_confusion_matrix": [ |
|
[ |
|
62, |
|
5, |
|
1, |
|
4 |
|
], |
|
[ |
|
9, |
|
37, |
|
14, |
|
0 |
|
], |
|
[ |
|
0, |
|
3, |
|
73, |
|
0 |
|
], |
|
[ |
|
1, |
|
0, |
|
0, |
|
64 |
|
] |
|
], |
|
"eval_f1": 0.8589767100678526, |
|
"eval_loss": 0.7055637240409851, |
|
"eval_precision": 0.8628397746044805, |
|
"eval_recall": 0.8644688644688645, |
|
"eval_runtime": 3.074, |
|
"eval_samples_per_second": 88.809, |
|
"eval_steps_per_second": 0.976, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8644688644688645, |
|
"eval_confusion_matrix": [ |
|
[ |
|
65, |
|
2, |
|
1, |
|
4 |
|
], |
|
[ |
|
13, |
|
42, |
|
5, |
|
0 |
|
], |
|
[ |
|
0, |
|
12, |
|
64, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.8627769756077204, |
|
"eval_loss": 0.7563945651054382, |
|
"eval_precision": 0.8634344261673453, |
|
"eval_recall": 0.8644688644688645, |
|
"eval_runtime": 3.0072, |
|
"eval_samples_per_second": 90.783, |
|
"eval_steps_per_second": 0.998, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 18.88888888888889, |
|
"eval_accuracy": 0.8424908424908425, |
|
"eval_confusion_matrix": [ |
|
[ |
|
64, |
|
4, |
|
0, |
|
4 |
|
], |
|
[ |
|
11, |
|
41, |
|
8, |
|
0 |
|
], |
|
[ |
|
0, |
|
10, |
|
66, |
|
0 |
|
], |
|
[ |
|
6, |
|
0, |
|
0, |
|
59 |
|
] |
|
], |
|
"eval_f1": 0.8418306879608031, |
|
"eval_loss": 0.7825365662574768, |
|
"eval_precision": 0.8434907006335578, |
|
"eval_recall": 0.8424908424908425, |
|
"eval_runtime": 3.0343, |
|
"eval_samples_per_second": 89.972, |
|
"eval_steps_per_second": 0.989, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8058608058608059, |
|
"eval_confusion_matrix": [ |
|
[ |
|
40, |
|
28, |
|
1, |
|
3 |
|
], |
|
[ |
|
2, |
|
50, |
|
8, |
|
0 |
|
], |
|
[ |
|
0, |
|
11, |
|
65, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.8065760931078588, |
|
"eval_loss": 0.8426868915557861, |
|
"eval_precision": 0.8467707085637404, |
|
"eval_recall": 0.8058608058608059, |
|
"eval_runtime": 3.0783, |
|
"eval_samples_per_second": 88.686, |
|
"eval_steps_per_second": 0.975, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 20.88888888888889, |
|
"eval_accuracy": 0.8498168498168498, |
|
"eval_confusion_matrix": [ |
|
[ |
|
63, |
|
4, |
|
0, |
|
5 |
|
], |
|
[ |
|
13, |
|
40, |
|
7, |
|
0 |
|
], |
|
[ |
|
0, |
|
12, |
|
64, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.8477878057985963, |
|
"eval_loss": 0.7440442442893982, |
|
"eval_precision": 0.8476393351433065, |
|
"eval_recall": 0.8498168498168498, |
|
"eval_runtime": 2.9875, |
|
"eval_samples_per_second": 91.38, |
|
"eval_steps_per_second": 1.004, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.8608058608058609, |
|
"eval_confusion_matrix": [ |
|
[ |
|
66, |
|
1, |
|
1, |
|
4 |
|
], |
|
[ |
|
13, |
|
36, |
|
11, |
|
0 |
|
], |
|
[ |
|
0, |
|
5, |
|
71, |
|
0 |
|
], |
|
[ |
|
3, |
|
0, |
|
0, |
|
62 |
|
] |
|
], |
|
"eval_f1": 0.855194718990792, |
|
"eval_loss": 0.7338178753852844, |
|
"eval_precision": 0.8624631692093176, |
|
"eval_recall": 0.8608058608058609, |
|
"eval_runtime": 2.98, |
|
"eval_samples_per_second": 91.611, |
|
"eval_steps_per_second": 1.007, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 22.88888888888889, |
|
"eval_accuracy": 0.8498168498168498, |
|
"eval_confusion_matrix": [ |
|
[ |
|
58, |
|
10, |
|
0, |
|
4 |
|
], |
|
[ |
|
10, |
|
43, |
|
7, |
|
0 |
|
], |
|
[ |
|
0, |
|
7, |
|
69, |
|
0 |
|
], |
|
[ |
|
2, |
|
1, |
|
0, |
|
62 |
|
] |
|
], |
|
"eval_f1": 0.8497737987724402, |
|
"eval_loss": 0.7231407761573792, |
|
"eval_precision": 0.8498638493954653, |
|
"eval_recall": 0.8498168498168498, |
|
"eval_runtime": 2.9272, |
|
"eval_samples_per_second": 93.262, |
|
"eval_steps_per_second": 1.025, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.8424908424908425, |
|
"eval_confusion_matrix": [ |
|
[ |
|
67, |
|
1, |
|
0, |
|
4 |
|
], |
|
[ |
|
12, |
|
44, |
|
4, |
|
0 |
|
], |
|
[ |
|
0, |
|
18, |
|
58, |
|
0 |
|
], |
|
[ |
|
2, |
|
2, |
|
0, |
|
61 |
|
] |
|
], |
|
"eval_f1": 0.8431607380967995, |
|
"eval_loss": 0.752363920211792, |
|
"eval_precision": 0.8507974885146101, |
|
"eval_recall": 0.8424908424908425, |
|
"eval_runtime": 3.0257, |
|
"eval_samples_per_second": 90.228, |
|
"eval_steps_per_second": 0.992, |
|
"step": 108 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 180, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 45, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.94937668096e+16, |
|
"train_batch_size": 128, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|