|
{ |
|
"best_metric": 0.8523994617102314, |
|
"best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-54", |
|
"epoch": 12.0, |
|
"eval_steps": 500, |
|
"global_step": 54, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"eval_accuracy": 0.63003663003663, |
|
"eval_confusion_matrix": [ |
|
[ |
|
53, |
|
0, |
|
16, |
|
3 |
|
], |
|
[ |
|
28, |
|
1, |
|
31, |
|
0 |
|
], |
|
[ |
|
13, |
|
0, |
|
62, |
|
1 |
|
], |
|
[ |
|
3, |
|
0, |
|
6, |
|
56 |
|
] |
|
], |
|
"eval_f1": 0.566693372031096, |
|
"eval_loss": 1.31327486038208, |
|
"eval_precision": 0.7361933549293478, |
|
"eval_recall": 0.63003663003663, |
|
"eval_runtime": 2.8436, |
|
"eval_samples_per_second": 96.005, |
|
"eval_steps_per_second": 1.055, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6593406593406593, |
|
"eval_confusion_matrix": [ |
|
[ |
|
41, |
|
10, |
|
16, |
|
5 |
|
], |
|
[ |
|
21, |
|
1, |
|
38, |
|
0 |
|
], |
|
[ |
|
1, |
|
2, |
|
73, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.595713773130153, |
|
"eval_loss": 0.9242589473724365, |
|
"eval_precision": 0.5696509512811321, |
|
"eval_recall": 0.6593406593406593, |
|
"eval_runtime": 2.8668, |
|
"eval_samples_per_second": 95.227, |
|
"eval_steps_per_second": 1.046, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 2.888888888888889, |
|
"eval_accuracy": 0.717948717948718, |
|
"eval_confusion_matrix": [ |
|
[ |
|
52, |
|
8, |
|
7, |
|
5 |
|
], |
|
[ |
|
18, |
|
5, |
|
37, |
|
0 |
|
], |
|
[ |
|
1, |
|
0, |
|
75, |
|
0 |
|
], |
|
[ |
|
1, |
|
0, |
|
0, |
|
64 |
|
] |
|
], |
|
"eval_f1": 0.6621621567125869, |
|
"eval_loss": 0.8232662081718445, |
|
"eval_precision": 0.671303801513745, |
|
"eval_recall": 0.717948717948718, |
|
"eval_runtime": 2.8711, |
|
"eval_samples_per_second": 95.084, |
|
"eval_steps_per_second": 1.045, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6959706959706959, |
|
"eval_confusion_matrix": [ |
|
[ |
|
45, |
|
9, |
|
13, |
|
5 |
|
], |
|
[ |
|
13, |
|
5, |
|
42, |
|
0 |
|
], |
|
[ |
|
1, |
|
0, |
|
75, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.6428796900399459, |
|
"eval_loss": 0.951453447341919, |
|
"eval_precision": 0.6613447705829899, |
|
"eval_recall": 0.6959706959706959, |
|
"eval_runtime": 2.8813, |
|
"eval_samples_per_second": 94.75, |
|
"eval_steps_per_second": 1.041, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 4.888888888888889, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_confusion_matrix": [ |
|
[ |
|
16, |
|
49, |
|
3, |
|
4 |
|
], |
|
[ |
|
1, |
|
26, |
|
33, |
|
0 |
|
], |
|
[ |
|
0, |
|
1, |
|
75, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.6331541584523206, |
|
"eval_loss": 1.0080645084381104, |
|
"eval_precision": 0.7358031394913882, |
|
"eval_recall": 0.6666666666666666, |
|
"eval_runtime": 2.9108, |
|
"eval_samples_per_second": 93.789, |
|
"eval_steps_per_second": 1.031, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.652014652014652, |
|
"eval_confusion_matrix": [ |
|
[ |
|
66, |
|
0, |
|
0, |
|
6 |
|
], |
|
[ |
|
36, |
|
16, |
|
8, |
|
0 |
|
], |
|
[ |
|
10, |
|
35, |
|
31, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.6302407955860642, |
|
"eval_loss": 0.9598046541213989, |
|
"eval_precision": 0.6636241740077424, |
|
"eval_recall": 0.652014652014652, |
|
"eval_runtime": 2.9773, |
|
"eval_samples_per_second": 91.693, |
|
"eval_steps_per_second": 1.008, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 6.888888888888889, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_confusion_matrix": [ |
|
[ |
|
66, |
|
0, |
|
0, |
|
6 |
|
], |
|
[ |
|
29, |
|
21, |
|
10, |
|
0 |
|
], |
|
[ |
|
3, |
|
15, |
|
58, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.7527855586679116, |
|
"eval_loss": 0.7898163795471191, |
|
"eval_precision": 0.7612467677056892, |
|
"eval_recall": 0.7692307692307693, |
|
"eval_runtime": 2.8918, |
|
"eval_samples_per_second": 94.404, |
|
"eval_steps_per_second": 1.037, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7802197802197802, |
|
"eval_confusion_matrix": [ |
|
[ |
|
66, |
|
0, |
|
0, |
|
6 |
|
], |
|
[ |
|
25, |
|
15, |
|
19, |
|
1 |
|
], |
|
[ |
|
1, |
|
6, |
|
68, |
|
1 |
|
], |
|
[ |
|
1, |
|
0, |
|
0, |
|
64 |
|
] |
|
], |
|
"eval_f1": 0.7471179200524057, |
|
"eval_loss": 0.7337484359741211, |
|
"eval_precision": 0.7733845922309294, |
|
"eval_recall": 0.7802197802197802, |
|
"eval_runtime": 2.9006, |
|
"eval_samples_per_second": 94.119, |
|
"eval_steps_per_second": 1.034, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 8.88888888888889, |
|
"eval_accuracy": 0.7912087912087912, |
|
"eval_confusion_matrix": [ |
|
[ |
|
57, |
|
8, |
|
3, |
|
4 |
|
], |
|
[ |
|
13, |
|
24, |
|
23, |
|
0 |
|
], |
|
[ |
|
0, |
|
5, |
|
71, |
|
0 |
|
], |
|
[ |
|
1, |
|
0, |
|
0, |
|
64 |
|
] |
|
], |
|
"eval_f1": 0.7766564722186922, |
|
"eval_loss": 0.7148727178573608, |
|
"eval_precision": 0.7821504483074875, |
|
"eval_recall": 0.7912087912087912, |
|
"eval_runtime": 2.8866, |
|
"eval_samples_per_second": 94.576, |
|
"eval_steps_per_second": 1.039, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8168498168498168, |
|
"eval_confusion_matrix": [ |
|
[ |
|
51, |
|
8, |
|
1, |
|
12 |
|
], |
|
[ |
|
10, |
|
36, |
|
14, |
|
0 |
|
], |
|
[ |
|
0, |
|
5, |
|
71, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
65 |
|
] |
|
], |
|
"eval_f1": 0.8094336953840884, |
|
"eval_loss": 0.7574812173843384, |
|
"eval_precision": 0.8127936625684181, |
|
"eval_recall": 0.8168498168498168, |
|
"eval_runtime": 2.8967, |
|
"eval_samples_per_second": 94.245, |
|
"eval_steps_per_second": 1.036, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 10.88888888888889, |
|
"eval_accuracy": 0.8021978021978022, |
|
"eval_confusion_matrix": [ |
|
[ |
|
48, |
|
18, |
|
2, |
|
4 |
|
], |
|
[ |
|
4, |
|
41, |
|
15, |
|
0 |
|
], |
|
[ |
|
0, |
|
7, |
|
69, |
|
0 |
|
], |
|
[ |
|
3, |
|
0, |
|
1, |
|
61 |
|
] |
|
], |
|
"eval_f1": 0.801525180147331, |
|
"eval_loss": 0.7140281200408936, |
|
"eval_precision": 0.8109338936925145, |
|
"eval_recall": 0.8021978021978022, |
|
"eval_runtime": 2.917, |
|
"eval_samples_per_second": 93.589, |
|
"eval_steps_per_second": 1.028, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8534798534798534, |
|
"eval_confusion_matrix": [ |
|
[ |
|
56, |
|
12, |
|
0, |
|
4 |
|
], |
|
[ |
|
6, |
|
43, |
|
11, |
|
0 |
|
], |
|
[ |
|
0, |
|
6, |
|
70, |
|
0 |
|
], |
|
[ |
|
1, |
|
0, |
|
0, |
|
64 |
|
] |
|
], |
|
"eval_f1": 0.8523994617102314, |
|
"eval_loss": 0.6672152280807495, |
|
"eval_precision": 0.8540313732642031, |
|
"eval_recall": 0.8534798534798534, |
|
"eval_runtime": 2.8969, |
|
"eval_samples_per_second": 94.238, |
|
"eval_steps_per_second": 1.036, |
|
"step": 54 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 180, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 45, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.97468834048e+16, |
|
"train_batch_size": 128, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|