|
{ |
|
"best_metric": 0.8754390108936493, |
|
"best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-121", |
|
"epoch": 40.0, |
|
"eval_steps": 500, |
|
"global_step": 180, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"eval_accuracy": 0.63003663003663, |
|
"eval_confusion_matrix": [ |
|
[ |
|
53, |
|
0, |
|
16, |
|
3 |
|
], |
|
[ |
|
28, |
|
1, |
|
31, |
|
0 |
|
], |
|
[ |
|
13, |
|
0, |
|
62, |
|
1 |
|
], |
|
[ |
|
3, |
|
0, |
|
6, |
|
56 |
|
] |
|
], |
|
"eval_f1": 0.566693372031096, |
|
"eval_loss": 1.31327486038208, |
|
"eval_precision": 0.7361933549293478, |
|
"eval_recall": 0.63003663003663, |
|
"eval_runtime": 2.8436, |
|
"eval_samples_per_second": 96.005, |
|
"eval_steps_per_second": 1.055, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6593406593406593, |
|
"eval_confusion_matrix": [ |
|
[ |
|
41, |
|
10, |
|
16, |
|
5 |
|
], |
|
[ |
|
21, |
|
1, |
|
38, |
|
0 |
|
], |
|
[ |
|
1, |
|
2, |
|
73, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.595713773130153, |
|
"eval_loss": 0.9242589473724365, |
|
"eval_precision": 0.5696509512811321, |
|
"eval_recall": 0.6593406593406593, |
|
"eval_runtime": 2.8668, |
|
"eval_samples_per_second": 95.227, |
|
"eval_steps_per_second": 1.046, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 2.888888888888889, |
|
"eval_accuracy": 0.717948717948718, |
|
"eval_confusion_matrix": [ |
|
[ |
|
52, |
|
8, |
|
7, |
|
5 |
|
], |
|
[ |
|
18, |
|
5, |
|
37, |
|
0 |
|
], |
|
[ |
|
1, |
|
0, |
|
75, |
|
0 |
|
], |
|
[ |
|
1, |
|
0, |
|
0, |
|
64 |
|
] |
|
], |
|
"eval_f1": 0.6621621567125869, |
|
"eval_loss": 0.8232662081718445, |
|
"eval_precision": 0.671303801513745, |
|
"eval_recall": 0.717948717948718, |
|
"eval_runtime": 2.8711, |
|
"eval_samples_per_second": 95.084, |
|
"eval_steps_per_second": 1.045, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6959706959706959, |
|
"eval_confusion_matrix": [ |
|
[ |
|
45, |
|
9, |
|
13, |
|
5 |
|
], |
|
[ |
|
13, |
|
5, |
|
42, |
|
0 |
|
], |
|
[ |
|
1, |
|
0, |
|
75, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.6428796900399459, |
|
"eval_loss": 0.951453447341919, |
|
"eval_precision": 0.6613447705829899, |
|
"eval_recall": 0.6959706959706959, |
|
"eval_runtime": 2.8813, |
|
"eval_samples_per_second": 94.75, |
|
"eval_steps_per_second": 1.041, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 4.888888888888889, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_confusion_matrix": [ |
|
[ |
|
16, |
|
49, |
|
3, |
|
4 |
|
], |
|
[ |
|
1, |
|
26, |
|
33, |
|
0 |
|
], |
|
[ |
|
0, |
|
1, |
|
75, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.6331541584523206, |
|
"eval_loss": 1.0080645084381104, |
|
"eval_precision": 0.7358031394913882, |
|
"eval_recall": 0.6666666666666666, |
|
"eval_runtime": 2.9108, |
|
"eval_samples_per_second": 93.789, |
|
"eval_steps_per_second": 1.031, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.652014652014652, |
|
"eval_confusion_matrix": [ |
|
[ |
|
66, |
|
0, |
|
0, |
|
6 |
|
], |
|
[ |
|
36, |
|
16, |
|
8, |
|
0 |
|
], |
|
[ |
|
10, |
|
35, |
|
31, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.6302407955860642, |
|
"eval_loss": 0.9598046541213989, |
|
"eval_precision": 0.6636241740077424, |
|
"eval_recall": 0.652014652014652, |
|
"eval_runtime": 2.9773, |
|
"eval_samples_per_second": 91.693, |
|
"eval_steps_per_second": 1.008, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 6.888888888888889, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_confusion_matrix": [ |
|
[ |
|
66, |
|
0, |
|
0, |
|
6 |
|
], |
|
[ |
|
29, |
|
21, |
|
10, |
|
0 |
|
], |
|
[ |
|
3, |
|
15, |
|
58, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.7527855586679116, |
|
"eval_loss": 0.7898163795471191, |
|
"eval_precision": 0.7612467677056892, |
|
"eval_recall": 0.7692307692307693, |
|
"eval_runtime": 2.8918, |
|
"eval_samples_per_second": 94.404, |
|
"eval_steps_per_second": 1.037, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7802197802197802, |
|
"eval_confusion_matrix": [ |
|
[ |
|
66, |
|
0, |
|
0, |
|
6 |
|
], |
|
[ |
|
25, |
|
15, |
|
19, |
|
1 |
|
], |
|
[ |
|
1, |
|
6, |
|
68, |
|
1 |
|
], |
|
[ |
|
1, |
|
0, |
|
0, |
|
64 |
|
] |
|
], |
|
"eval_f1": 0.7471179200524057, |
|
"eval_loss": 0.7337484359741211, |
|
"eval_precision": 0.7733845922309294, |
|
"eval_recall": 0.7802197802197802, |
|
"eval_runtime": 2.9006, |
|
"eval_samples_per_second": 94.119, |
|
"eval_steps_per_second": 1.034, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 8.88888888888889, |
|
"eval_accuracy": 0.7912087912087912, |
|
"eval_confusion_matrix": [ |
|
[ |
|
57, |
|
8, |
|
3, |
|
4 |
|
], |
|
[ |
|
13, |
|
24, |
|
23, |
|
0 |
|
], |
|
[ |
|
0, |
|
5, |
|
71, |
|
0 |
|
], |
|
[ |
|
1, |
|
0, |
|
0, |
|
64 |
|
] |
|
], |
|
"eval_f1": 0.7766564722186922, |
|
"eval_loss": 0.7148727178573608, |
|
"eval_precision": 0.7821504483074875, |
|
"eval_recall": 0.7912087912087912, |
|
"eval_runtime": 2.8866, |
|
"eval_samples_per_second": 94.576, |
|
"eval_steps_per_second": 1.039, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8168498168498168, |
|
"eval_confusion_matrix": [ |
|
[ |
|
51, |
|
8, |
|
1, |
|
12 |
|
], |
|
[ |
|
10, |
|
36, |
|
14, |
|
0 |
|
], |
|
[ |
|
0, |
|
5, |
|
71, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.8094336953840884, |
|
"eval_loss": 0.7574812173843384, |
|
"eval_precision": 0.8127936625684181, |
|
"eval_recall": 0.8168498168498168, |
|
"eval_runtime": 2.8967, |
|
"eval_samples_per_second": 94.245, |
|
"eval_steps_per_second": 1.036, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 10.88888888888889, |
|
"eval_accuracy": 0.8021978021978022, |
|
"eval_confusion_matrix": [ |
|
[ |
|
48, |
|
18, |
|
2, |
|
4 |
|
], |
|
[ |
|
4, |
|
41, |
|
15, |
|
0 |
|
], |
|
[ |
|
0, |
|
7, |
|
69, |
|
0 |
|
], |
|
[ |
|
3, |
|
0, |
|
1, |
|
61 |
|
] |
|
], |
|
"eval_f1": 0.801525180147331, |
|
"eval_loss": 0.7140281200408936, |
|
"eval_precision": 0.8109338936925145, |
|
"eval_recall": 0.8021978021978022, |
|
"eval_runtime": 2.917, |
|
"eval_samples_per_second": 93.589, |
|
"eval_steps_per_second": 1.028, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8534798534798534, |
|
"eval_confusion_matrix": [ |
|
[ |
|
56, |
|
12, |
|
0, |
|
4 |
|
], |
|
[ |
|
6, |
|
43, |
|
11, |
|
0 |
|
], |
|
[ |
|
0, |
|
6, |
|
70, |
|
0 |
|
], |
|
[ |
|
1, |
|
0, |
|
0, |
|
64 |
|
] |
|
], |
|
"eval_f1": 0.8523994617102314, |
|
"eval_loss": 0.6672152280807495, |
|
"eval_precision": 0.8540313732642031, |
|
"eval_recall": 0.8534798534798534, |
|
"eval_runtime": 2.8969, |
|
"eval_samples_per_second": 94.238, |
|
"eval_steps_per_second": 1.036, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 12.88888888888889, |
|
"eval_accuracy": 0.8498168498168498, |
|
"eval_confusion_matrix": [ |
|
[ |
|
60, |
|
8, |
|
0, |
|
4 |
|
], |
|
[ |
|
8, |
|
38, |
|
14, |
|
0 |
|
], |
|
[ |
|
0, |
|
6, |
|
70, |
|
0 |
|
], |
|
[ |
|
1, |
|
0, |
|
0, |
|
64 |
|
] |
|
], |
|
"eval_f1": 0.8463270052615757, |
|
"eval_loss": 0.6432910561561584, |
|
"eval_precision": 0.8460243715014519, |
|
"eval_recall": 0.8498168498168498, |
|
"eval_runtime": 3.0281, |
|
"eval_samples_per_second": 90.156, |
|
"eval_steps_per_second": 0.991, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8278388278388278, |
|
"eval_confusion_matrix": [ |
|
[ |
|
54, |
|
13, |
|
1, |
|
4 |
|
], |
|
[ |
|
7, |
|
44, |
|
9, |
|
0 |
|
], |
|
[ |
|
0, |
|
11, |
|
65, |
|
0 |
|
], |
|
[ |
|
1, |
|
1, |
|
0, |
|
63 |
|
] |
|
], |
|
"eval_f1": 0.82943590265942, |
|
"eval_loss": 0.7395206093788147, |
|
"eval_precision": 0.8350059217447382, |
|
"eval_recall": 0.8278388278388278, |
|
"eval_runtime": 2.9869, |
|
"eval_samples_per_second": 91.399, |
|
"eval_steps_per_second": 1.004, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 14.88888888888889, |
|
"eval_accuracy": 0.8315018315018315, |
|
"eval_confusion_matrix": [ |
|
[ |
|
54, |
|
13, |
|
1, |
|
4 |
|
], |
|
[ |
|
10, |
|
39, |
|
11, |
|
0 |
|
], |
|
[ |
|
0, |
|
7, |
|
69, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.8291811389886823, |
|
"eval_loss": 0.7115849852561951, |
|
"eval_precision": 0.829244108966536, |
|
"eval_recall": 0.8315018315018315, |
|
"eval_runtime": 2.9616, |
|
"eval_samples_per_second": 92.181, |
|
"eval_steps_per_second": 1.013, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8315018315018315, |
|
"eval_confusion_matrix": [ |
|
[ |
|
60, |
|
7, |
|
1, |
|
4 |
|
], |
|
[ |
|
10, |
|
39, |
|
11, |
|
0 |
|
], |
|
[ |
|
0, |
|
11, |
|
65, |
|
0 |
|
], |
|
[ |
|
1, |
|
1, |
|
0, |
|
63 |
|
] |
|
], |
|
"eval_f1": 0.8304073820984628, |
|
"eval_loss": 0.7295921444892883, |
|
"eval_precision": 0.8295426562258641, |
|
"eval_recall": 0.8315018315018315, |
|
"eval_runtime": 2.8758, |
|
"eval_samples_per_second": 94.932, |
|
"eval_steps_per_second": 1.043, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 16.88888888888889, |
|
"eval_accuracy": 0.8644688644688645, |
|
"eval_confusion_matrix": [ |
|
[ |
|
62, |
|
5, |
|
1, |
|
4 |
|
], |
|
[ |
|
9, |
|
37, |
|
14, |
|
0 |
|
], |
|
[ |
|
0, |
|
3, |
|
73, |
|
0 |
|
], |
|
[ |
|
1, |
|
0, |
|
0, |
|
64 |
|
] |
|
], |
|
"eval_f1": 0.8589767100678526, |
|
"eval_loss": 0.7055637240409851, |
|
"eval_precision": 0.8628397746044805, |
|
"eval_recall": 0.8644688644688645, |
|
"eval_runtime": 3.074, |
|
"eval_samples_per_second": 88.809, |
|
"eval_steps_per_second": 0.976, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8644688644688645, |
|
"eval_confusion_matrix": [ |
|
[ |
|
65, |
|
2, |
|
1, |
|
4 |
|
], |
|
[ |
|
13, |
|
42, |
|
5, |
|
0 |
|
], |
|
[ |
|
0, |
|
12, |
|
64, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.8627769756077204, |
|
"eval_loss": 0.7563945651054382, |
|
"eval_precision": 0.8634344261673453, |
|
"eval_recall": 0.8644688644688645, |
|
"eval_runtime": 3.0072, |
|
"eval_samples_per_second": 90.783, |
|
"eval_steps_per_second": 0.998, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 18.88888888888889, |
|
"eval_accuracy": 0.8424908424908425, |
|
"eval_confusion_matrix": [ |
|
[ |
|
64, |
|
4, |
|
0, |
|
4 |
|
], |
|
[ |
|
11, |
|
41, |
|
8, |
|
0 |
|
], |
|
[ |
|
0, |
|
10, |
|
66, |
|
0 |
|
], |
|
[ |
|
6, |
|
0, |
|
0, |
|
59 |
|
] |
|
], |
|
"eval_f1": 0.8418306879608031, |
|
"eval_loss": 0.7825365662574768, |
|
"eval_precision": 0.8434907006335578, |
|
"eval_recall": 0.8424908424908425, |
|
"eval_runtime": 3.0343, |
|
"eval_samples_per_second": 89.972, |
|
"eval_steps_per_second": 0.989, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8058608058608059, |
|
"eval_confusion_matrix": [ |
|
[ |
|
40, |
|
28, |
|
1, |
|
3 |
|
], |
|
[ |
|
2, |
|
50, |
|
8, |
|
0 |
|
], |
|
[ |
|
0, |
|
11, |
|
65, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.8065760931078588, |
|
"eval_loss": 0.8426868915557861, |
|
"eval_precision": 0.8467707085637404, |
|
"eval_recall": 0.8058608058608059, |
|
"eval_runtime": 3.0783, |
|
"eval_samples_per_second": 88.686, |
|
"eval_steps_per_second": 0.975, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 20.88888888888889, |
|
"eval_accuracy": 0.8498168498168498, |
|
"eval_confusion_matrix": [ |
|
[ |
|
63, |
|
4, |
|
0, |
|
5 |
|
], |
|
[ |
|
13, |
|
40, |
|
7, |
|
0 |
|
], |
|
[ |
|
0, |
|
12, |
|
64, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.8477878057985963, |
|
"eval_loss": 0.7440442442893982, |
|
"eval_precision": 0.8476393351433065, |
|
"eval_recall": 0.8498168498168498, |
|
"eval_runtime": 2.9875, |
|
"eval_samples_per_second": 91.38, |
|
"eval_steps_per_second": 1.004, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.8608058608058609, |
|
"eval_confusion_matrix": [ |
|
[ |
|
66, |
|
1, |
|
1, |
|
4 |
|
], |
|
[ |
|
13, |
|
36, |
|
11, |
|
0 |
|
], |
|
[ |
|
0, |
|
5, |
|
71, |
|
0 |
|
], |
|
[ |
|
3, |
|
0, |
|
0, |
|
62 |
|
] |
|
], |
|
"eval_f1": 0.855194718990792, |
|
"eval_loss": 0.7338178753852844, |
|
"eval_precision": 0.8624631692093176, |
|
"eval_recall": 0.8608058608058609, |
|
"eval_runtime": 2.98, |
|
"eval_samples_per_second": 91.611, |
|
"eval_steps_per_second": 1.007, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 22.88888888888889, |
|
"eval_accuracy": 0.8498168498168498, |
|
"eval_confusion_matrix": [ |
|
[ |
|
58, |
|
10, |
|
0, |
|
4 |
|
], |
|
[ |
|
10, |
|
43, |
|
7, |
|
0 |
|
], |
|
[ |
|
0, |
|
7, |
|
69, |
|
0 |
|
], |
|
[ |
|
2, |
|
1, |
|
0, |
|
62 |
|
] |
|
], |
|
"eval_f1": 0.8497737987724402, |
|
"eval_loss": 0.7231407761573792, |
|
"eval_precision": 0.8498638493954653, |
|
"eval_recall": 0.8498168498168498, |
|
"eval_runtime": 2.9272, |
|
"eval_samples_per_second": 93.262, |
|
"eval_steps_per_second": 1.025, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.8424908424908425, |
|
"eval_confusion_matrix": [ |
|
[ |
|
67, |
|
1, |
|
0, |
|
4 |
|
], |
|
[ |
|
12, |
|
44, |
|
4, |
|
0 |
|
], |
|
[ |
|
0, |
|
18, |
|
58, |
|
0 |
|
], |
|
[ |
|
2, |
|
2, |
|
0, |
|
61 |
|
] |
|
], |
|
"eval_f1": 0.8431607380967995, |
|
"eval_loss": 0.752363920211792, |
|
"eval_precision": 0.8507974885146101, |
|
"eval_recall": 0.8424908424908425, |
|
"eval_runtime": 3.0257, |
|
"eval_samples_per_second": 90.228, |
|
"eval_steps_per_second": 0.992, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 24.88888888888889, |
|
"eval_accuracy": 0.8498168498168498, |
|
"eval_confusion_matrix": [ |
|
[ |
|
66, |
|
0, |
|
1, |
|
5 |
|
], |
|
[ |
|
15, |
|
28, |
|
17, |
|
0 |
|
], |
|
[ |
|
0, |
|
2, |
|
74, |
|
0 |
|
], |
|
[ |
|
1, |
|
0, |
|
0, |
|
64 |
|
] |
|
], |
|
"eval_f1": 0.8354928653436116, |
|
"eval_loss": 0.7849779725074768, |
|
"eval_precision": 0.8621667009790022, |
|
"eval_recall": 0.8498168498168498, |
|
"eval_runtime": 2.8819, |
|
"eval_samples_per_second": 94.73, |
|
"eval_steps_per_second": 1.041, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.8131868131868132, |
|
"eval_confusion_matrix": [ |
|
[ |
|
62, |
|
6, |
|
0, |
|
4 |
|
], |
|
[ |
|
11, |
|
44, |
|
5, |
|
0 |
|
], |
|
[ |
|
0, |
|
24, |
|
52, |
|
0 |
|
], |
|
[ |
|
1, |
|
0, |
|
0, |
|
64 |
|
] |
|
], |
|
"eval_f1": 0.8151598256855266, |
|
"eval_loss": 0.7896661162376404, |
|
"eval_precision": 0.8297062414709473, |
|
"eval_recall": 0.8131868131868132, |
|
"eval_runtime": 2.8953, |
|
"eval_samples_per_second": 94.291, |
|
"eval_steps_per_second": 1.036, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 26.88888888888889, |
|
"eval_accuracy": 0.8791208791208791, |
|
"eval_confusion_matrix": [ |
|
[ |
|
66, |
|
1, |
|
1, |
|
4 |
|
], |
|
[ |
|
13, |
|
40, |
|
7, |
|
0 |
|
], |
|
[ |
|
1, |
|
6, |
|
69, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.8754390108936493, |
|
"eval_loss": 0.7321063876152039, |
|
"eval_precision": 0.878386849630407, |
|
"eval_recall": 0.8791208791208791, |
|
"eval_runtime": 2.8941, |
|
"eval_samples_per_second": 94.329, |
|
"eval_steps_per_second": 1.037, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8608058608058609, |
|
"eval_confusion_matrix": [ |
|
[ |
|
66, |
|
2, |
|
0, |
|
4 |
|
], |
|
[ |
|
15, |
|
40, |
|
5, |
|
0 |
|
], |
|
[ |
|
1, |
|
11, |
|
64, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.8583938406059761, |
|
"eval_loss": 0.7691925168037415, |
|
"eval_precision": 0.8606552236676889, |
|
"eval_recall": 0.8608058608058609, |
|
"eval_runtime": 2.9043, |
|
"eval_samples_per_second": 93.997, |
|
"eval_steps_per_second": 1.033, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 28.88888888888889, |
|
"eval_accuracy": 0.8461538461538461, |
|
"eval_confusion_matrix": [ |
|
[ |
|
60, |
|
8, |
|
0, |
|
4 |
|
], |
|
[ |
|
10, |
|
41, |
|
9, |
|
0 |
|
], |
|
[ |
|
0, |
|
11, |
|
65, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.8453166248415355, |
|
"eval_loss": 0.764687716960907, |
|
"eval_precision": 0.8450656649414414, |
|
"eval_recall": 0.8461538461538461, |
|
"eval_runtime": 2.8898, |
|
"eval_samples_per_second": 94.47, |
|
"eval_steps_per_second": 1.038, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.8461538461538461, |
|
"eval_confusion_matrix": [ |
|
[ |
|
60, |
|
8, |
|
0, |
|
4 |
|
], |
|
[ |
|
10, |
|
40, |
|
10, |
|
0 |
|
], |
|
[ |
|
0, |
|
10, |
|
66, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.8446254139808331, |
|
"eval_loss": 0.7896224856376648, |
|
"eval_precision": 0.8436830773519571, |
|
"eval_recall": 0.8461538461538461, |
|
"eval_runtime": 2.9051, |
|
"eval_samples_per_second": 93.974, |
|
"eval_steps_per_second": 1.033, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 30.88888888888889, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_confusion_matrix": [ |
|
[ |
|
62, |
|
6, |
|
0, |
|
4 |
|
], |
|
[ |
|
12, |
|
41, |
|
7, |
|
0 |
|
], |
|
[ |
|
0, |
|
10, |
|
66, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.8556429580063329, |
|
"eval_loss": 0.7785082459449768, |
|
"eval_precision": 0.8550418750412481, |
|
"eval_recall": 0.8571428571428571, |
|
"eval_runtime": 2.9144, |
|
"eval_samples_per_second": 93.672, |
|
"eval_steps_per_second": 1.029, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_confusion_matrix": [ |
|
[ |
|
62, |
|
6, |
|
0, |
|
4 |
|
], |
|
[ |
|
13, |
|
41, |
|
6, |
|
0 |
|
], |
|
[ |
|
0, |
|
10, |
|
66, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.855785572180052, |
|
"eval_loss": 0.7778924107551575, |
|
"eval_precision": 0.8555913753167756, |
|
"eval_recall": 0.8571428571428571, |
|
"eval_runtime": 2.8889, |
|
"eval_samples_per_second": 94.501, |
|
"eval_steps_per_second": 1.038, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 32.888888888888886, |
|
"eval_accuracy": 0.8498168498168498, |
|
"eval_confusion_matrix": [ |
|
[ |
|
64, |
|
4, |
|
0, |
|
4 |
|
], |
|
[ |
|
14, |
|
37, |
|
9, |
|
0 |
|
], |
|
[ |
|
0, |
|
10, |
|
66, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.8459222988470789, |
|
"eval_loss": 0.7852644324302673, |
|
"eval_precision": 0.8451216809509156, |
|
"eval_recall": 0.8498168498168498, |
|
"eval_runtime": 2.897, |
|
"eval_samples_per_second": 94.237, |
|
"eval_steps_per_second": 1.036, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.8498168498168498, |
|
"eval_confusion_matrix": [ |
|
[ |
|
65, |
|
3, |
|
0, |
|
4 |
|
], |
|
[ |
|
16, |
|
36, |
|
8, |
|
0 |
|
], |
|
[ |
|
1, |
|
9, |
|
66, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.8451242574057074, |
|
"eval_loss": 0.7895408868789673, |
|
"eval_precision": 0.8464792621738539, |
|
"eval_recall": 0.8498168498168498, |
|
"eval_runtime": 2.8905, |
|
"eval_samples_per_second": 94.447, |
|
"eval_steps_per_second": 1.038, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 34.888888888888886, |
|
"eval_accuracy": 0.8498168498168498, |
|
"eval_confusion_matrix": [ |
|
[ |
|
65, |
|
3, |
|
0, |
|
4 |
|
], |
|
[ |
|
17, |
|
37, |
|
6, |
|
0 |
|
], |
|
[ |
|
1, |
|
10, |
|
65, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.846231937670728, |
|
"eval_loss": 0.7909458875656128, |
|
"eval_precision": 0.8483329341222641, |
|
"eval_recall": 0.8498168498168498, |
|
"eval_runtime": 2.8909, |
|
"eval_samples_per_second": 94.434, |
|
"eval_steps_per_second": 1.038, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.8461538461538461, |
|
"eval_confusion_matrix": [ |
|
[ |
|
65, |
|
3, |
|
0, |
|
4 |
|
], |
|
[ |
|
18, |
|
36, |
|
6, |
|
0 |
|
], |
|
[ |
|
1, |
|
10, |
|
65, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.8421377779092111, |
|
"eval_loss": 0.7950576543807983, |
|
"eval_precision": 0.844707938513005, |
|
"eval_recall": 0.8461538461538461, |
|
"eval_runtime": 2.8992, |
|
"eval_samples_per_second": 94.165, |
|
"eval_steps_per_second": 1.035, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 36.888888888888886, |
|
"eval_accuracy": 0.8461538461538461, |
|
"eval_confusion_matrix": [ |
|
[ |
|
65, |
|
3, |
|
0, |
|
4 |
|
], |
|
[ |
|
18, |
|
36, |
|
6, |
|
0 |
|
], |
|
[ |
|
1, |
|
10, |
|
65, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.8421377779092111, |
|
"eval_loss": 0.7976939082145691, |
|
"eval_precision": 0.844707938513005, |
|
"eval_recall": 0.8461538461538461, |
|
"eval_runtime": 2.8938, |
|
"eval_samples_per_second": 94.339, |
|
"eval_steps_per_second": 1.037, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.8461538461538461, |
|
"eval_confusion_matrix": [ |
|
[ |
|
65, |
|
3, |
|
0, |
|
4 |
|
], |
|
[ |
|
18, |
|
36, |
|
6, |
|
0 |
|
], |
|
[ |
|
1, |
|
10, |
|
65, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.8421377779092111, |
|
"eval_loss": 0.7985257506370544, |
|
"eval_precision": 0.844707938513005, |
|
"eval_recall": 0.8461538461538461, |
|
"eval_runtime": 2.9059, |
|
"eval_samples_per_second": 93.947, |
|
"eval_steps_per_second": 1.032, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 38.888888888888886, |
|
"eval_accuracy": 0.8461538461538461, |
|
"eval_confusion_matrix": [ |
|
[ |
|
65, |
|
3, |
|
0, |
|
4 |
|
], |
|
[ |
|
18, |
|
36, |
|
6, |
|
0 |
|
], |
|
[ |
|
1, |
|
10, |
|
65, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.8421377779092111, |
|
"eval_loss": 0.7982355356216431, |
|
"eval_precision": 0.844707938513005, |
|
"eval_recall": 0.8461538461538461, |
|
"eval_runtime": 2.8862, |
|
"eval_samples_per_second": 94.589, |
|
"eval_steps_per_second": 1.039, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.8461538461538461, |
|
"eval_confusion_matrix": [ |
|
[ |
|
65, |
|
3, |
|
0, |
|
4 |
|
], |
|
[ |
|
18, |
|
36, |
|
6, |
|
0 |
|
], |
|
[ |
|
1, |
|
10, |
|
65, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.8421377779092111, |
|
"eval_loss": 0.7981730103492737, |
|
"eval_precision": 0.844707938513005, |
|
"eval_recall": 0.8461538461538461, |
|
"eval_runtime": 2.8891, |
|
"eval_samples_per_second": 94.494, |
|
"eval_steps_per_second": 1.038, |
|
"step": 180 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 180, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 45, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.9156278016e+16, |
|
"train_batch_size": 128, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|