Upload folder using huggingface_hub
Browse files- best-model.pt +3 -0
- dev.tsv +0 -0
- loss.tsv +11 -0
- runs/events.out.tfevents.1697552654.3ae7c61396a7.1160.5 +3 -0
- test.tsv +0 -0
- training.log +237 -0
best-model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93f7685f83838aa0769c46763f55dadfd0fac5285d68f041b5c5a6ea4d968f6e
|
3 |
+
size 440954373
|
dev.tsv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
loss.tsv
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
EPOCH TIMESTAMP LEARNING_RATE TRAIN_LOSS DEV_LOSS DEV_PRECISION DEV_RECALL DEV_F1 DEV_ACCURACY
|
2 |
+
1 14:28:55 0.0000 0.4046 0.1402 0.2082 0.5114 0.2959 0.1740
|
3 |
+
2 14:33:49 0.0000 0.1610 0.1642 0.2179 0.4148 0.2857 0.1678
|
4 |
+
3 14:38:41 0.0000 0.1159 0.1688 0.3025 0.5019 0.3775 0.2343
|
5 |
+
4 14:43:30 0.0000 0.0885 0.3115 0.2458 0.6629 0.3586 0.2194
|
6 |
+
5 14:48:22 0.0000 0.0626 0.3575 0.2431 0.6193 0.3492 0.2123
|
7 |
+
6 14:53:15 0.0000 0.0457 0.3734 0.2547 0.6117 0.3597 0.2199
|
8 |
+
7 14:58:01 0.0000 0.0293 0.3921 0.2765 0.5871 0.3760 0.2327
|
9 |
+
8 15:02:47 0.0000 0.0227 0.4110 0.3012 0.5795 0.3964 0.2486
|
10 |
+
9 15:07:33 0.0000 0.0133 0.4873 0.2770 0.5871 0.3764 0.2334
|
11 |
+
10 15:12:17 0.0000 0.0101 0.5095 0.2576 0.5909 0.3588 0.2202
|
runs/events.out.tfevents.1697552654.3ae7c61396a7.1160.5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b5793b0ec792ec73106f4fa0c9b88eb9674defc2a55842cf2d7d51834b6bc84
|
3 |
+
size 1464420
|
test.tsv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
training.log
ADDED
@@ -0,0 +1,237 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2023-10-17 14:24:14,252 ----------------------------------------------------------------------------------------------------
|
2 |
+
2023-10-17 14:24:14,254 Model: "SequenceTagger(
|
3 |
+
(embeddings): TransformerWordEmbeddings(
|
4 |
+
(model): ElectraModel(
|
5 |
+
(embeddings): ElectraEmbeddings(
|
6 |
+
(word_embeddings): Embedding(32001, 768)
|
7 |
+
(position_embeddings): Embedding(512, 768)
|
8 |
+
(token_type_embeddings): Embedding(2, 768)
|
9 |
+
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
|
10 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
11 |
+
)
|
12 |
+
(encoder): ElectraEncoder(
|
13 |
+
(layer): ModuleList(
|
14 |
+
(0-11): 12 x ElectraLayer(
|
15 |
+
(attention): ElectraAttention(
|
16 |
+
(self): ElectraSelfAttention(
|
17 |
+
(query): Linear(in_features=768, out_features=768, bias=True)
|
18 |
+
(key): Linear(in_features=768, out_features=768, bias=True)
|
19 |
+
(value): Linear(in_features=768, out_features=768, bias=True)
|
20 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
21 |
+
)
|
22 |
+
(output): ElectraSelfOutput(
|
23 |
+
(dense): Linear(in_features=768, out_features=768, bias=True)
|
24 |
+
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
|
25 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
26 |
+
)
|
27 |
+
)
|
28 |
+
(intermediate): ElectraIntermediate(
|
29 |
+
(dense): Linear(in_features=768, out_features=3072, bias=True)
|
30 |
+
(intermediate_act_fn): GELUActivation()
|
31 |
+
)
|
32 |
+
(output): ElectraOutput(
|
33 |
+
(dense): Linear(in_features=3072, out_features=768, bias=True)
|
34 |
+
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
|
35 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
36 |
+
)
|
37 |
+
)
|
38 |
+
)
|
39 |
+
)
|
40 |
+
)
|
41 |
+
)
|
42 |
+
(locked_dropout): LockedDropout(p=0.5)
|
43 |
+
(linear): Linear(in_features=768, out_features=17, bias=True)
|
44 |
+
(loss_function): CrossEntropyLoss()
|
45 |
+
)"
|
46 |
+
2023-10-17 14:24:14,254 ----------------------------------------------------------------------------------------------------
|
47 |
+
2023-10-17 14:24:14,254 MultiCorpus: 20847 train + 1123 dev + 3350 test sentences
|
48 |
+
- NER_HIPE_2022 Corpus: 20847 train + 1123 dev + 3350 test sentences - /root/.flair/datasets/ner_hipe_2022/v2.1/newseye/de/with_doc_seperator
|
49 |
+
2023-10-17 14:24:14,254 ----------------------------------------------------------------------------------------------------
|
50 |
+
2023-10-17 14:24:14,254 Train: 20847 sentences
|
51 |
+
2023-10-17 14:24:14,254 (train_with_dev=False, train_with_test=False)
|
52 |
+
2023-10-17 14:24:14,254 ----------------------------------------------------------------------------------------------------
|
53 |
+
2023-10-17 14:24:14,254 Training Params:
|
54 |
+
2023-10-17 14:24:14,255 - learning_rate: "5e-05"
|
55 |
+
2023-10-17 14:24:14,255 - mini_batch_size: "8"
|
56 |
+
2023-10-17 14:24:14,255 - max_epochs: "10"
|
57 |
+
2023-10-17 14:24:14,255 - shuffle: "True"
|
58 |
+
2023-10-17 14:24:14,255 ----------------------------------------------------------------------------------------------------
|
59 |
+
2023-10-17 14:24:14,255 Plugins:
|
60 |
+
2023-10-17 14:24:14,255 - TensorboardLogger
|
61 |
+
2023-10-17 14:24:14,255 - LinearScheduler | warmup_fraction: '0.1'
|
62 |
+
2023-10-17 14:24:14,255 ----------------------------------------------------------------------------------------------------
|
63 |
+
2023-10-17 14:24:14,255 Final evaluation on model from best epoch (best-model.pt)
|
64 |
+
2023-10-17 14:24:14,255 - metric: "('micro avg', 'f1-score')"
|
65 |
+
2023-10-17 14:24:14,255 ----------------------------------------------------------------------------------------------------
|
66 |
+
2023-10-17 14:24:14,255 Computation:
|
67 |
+
2023-10-17 14:24:14,255 - compute on device: cuda:0
|
68 |
+
2023-10-17 14:24:14,256 - embedding storage: none
|
69 |
+
2023-10-17 14:24:14,256 ----------------------------------------------------------------------------------------------------
|
70 |
+
2023-10-17 14:24:14,256 Model training base path: "hmbench-newseye/de-hmteams/teams-base-historic-multilingual-discriminator-bs8-wsFalse-e10-lr5e-05-poolingfirst-layers-1-crfFalse-2"
|
71 |
+
2023-10-17 14:24:14,256 ----------------------------------------------------------------------------------------------------
|
72 |
+
2023-10-17 14:24:14,256 ----------------------------------------------------------------------------------------------------
|
73 |
+
2023-10-17 14:24:14,256 Logging anything other than scalars to TensorBoard is currently not supported.
|
74 |
+
2023-10-17 14:24:41,540 epoch 1 - iter 260/2606 - loss 1.85327979 - time (sec): 27.28 - samples/sec: 1310.12 - lr: 0.000005 - momentum: 0.000000
|
75 |
+
2023-10-17 14:25:09,627 epoch 1 - iter 520/2606 - loss 1.10728031 - time (sec): 55.37 - samples/sec: 1331.11 - lr: 0.000010 - momentum: 0.000000
|
76 |
+
2023-10-17 14:25:36,534 epoch 1 - iter 780/2606 - loss 0.83992496 - time (sec): 82.28 - samples/sec: 1359.25 - lr: 0.000015 - momentum: 0.000000
|
77 |
+
2023-10-17 14:26:04,493 epoch 1 - iter 1040/2606 - loss 0.68848892 - time (sec): 110.24 - samples/sec: 1356.82 - lr: 0.000020 - momentum: 0.000000
|
78 |
+
2023-10-17 14:26:31,440 epoch 1 - iter 1300/2606 - loss 0.59467316 - time (sec): 137.18 - samples/sec: 1364.66 - lr: 0.000025 - momentum: 0.000000
|
79 |
+
2023-10-17 14:26:57,934 epoch 1 - iter 1560/2606 - loss 0.53878529 - time (sec): 163.68 - samples/sec: 1360.76 - lr: 0.000030 - momentum: 0.000000
|
80 |
+
2023-10-17 14:27:25,638 epoch 1 - iter 1820/2606 - loss 0.48970257 - time (sec): 191.38 - samples/sec: 1363.10 - lr: 0.000035 - momentum: 0.000000
|
81 |
+
2023-10-17 14:27:53,036 epoch 1 - iter 2080/2606 - loss 0.45602118 - time (sec): 218.78 - samples/sec: 1349.95 - lr: 0.000040 - momentum: 0.000000
|
82 |
+
2023-10-17 14:28:20,128 epoch 1 - iter 2340/2606 - loss 0.42695326 - time (sec): 245.87 - samples/sec: 1350.40 - lr: 0.000045 - momentum: 0.000000
|
83 |
+
2023-10-17 14:28:46,892 epoch 1 - iter 2600/2606 - loss 0.40509791 - time (sec): 272.63 - samples/sec: 1345.44 - lr: 0.000050 - momentum: 0.000000
|
84 |
+
2023-10-17 14:28:47,527 ----------------------------------------------------------------------------------------------------
|
85 |
+
2023-10-17 14:28:47,527 EPOCH 1 done: loss 0.4046 - lr: 0.000050
|
86 |
+
2023-10-17 14:28:55,176 DEV : loss 0.14022067189216614 - f1-score (micro avg) 0.2959
|
87 |
+
2023-10-17 14:28:55,235 saving best model
|
88 |
+
2023-10-17 14:28:55,813 ----------------------------------------------------------------------------------------------------
|
89 |
+
2023-10-17 14:29:25,084 epoch 2 - iter 260/2606 - loss 0.17806520 - time (sec): 29.27 - samples/sec: 1262.84 - lr: 0.000049 - momentum: 0.000000
|
90 |
+
2023-10-17 14:29:52,834 epoch 2 - iter 520/2606 - loss 0.18056473 - time (sec): 57.02 - samples/sec: 1291.61 - lr: 0.000049 - momentum: 0.000000
|
91 |
+
2023-10-17 14:30:21,288 epoch 2 - iter 780/2606 - loss 0.17470102 - time (sec): 85.47 - samples/sec: 1294.70 - lr: 0.000048 - momentum: 0.000000
|
92 |
+
2023-10-17 14:30:48,148 epoch 2 - iter 1040/2606 - loss 0.17221863 - time (sec): 112.33 - samples/sec: 1308.82 - lr: 0.000048 - momentum: 0.000000
|
93 |
+
2023-10-17 14:31:16,076 epoch 2 - iter 1300/2606 - loss 0.16753259 - time (sec): 140.26 - samples/sec: 1324.35 - lr: 0.000047 - momentum: 0.000000
|
94 |
+
2023-10-17 14:31:43,316 epoch 2 - iter 1560/2606 - loss 0.16685811 - time (sec): 167.50 - samples/sec: 1327.25 - lr: 0.000047 - momentum: 0.000000
|
95 |
+
2023-10-17 14:32:10,687 epoch 2 - iter 1820/2606 - loss 0.16650143 - time (sec): 194.87 - samples/sec: 1322.48 - lr: 0.000046 - momentum: 0.000000
|
96 |
+
2023-10-17 14:32:39,701 epoch 2 - iter 2080/2606 - loss 0.16181597 - time (sec): 223.89 - samples/sec: 1313.90 - lr: 0.000046 - momentum: 0.000000
|
97 |
+
2023-10-17 14:33:08,969 epoch 2 - iter 2340/2606 - loss 0.15958413 - time (sec): 253.15 - samples/sec: 1308.43 - lr: 0.000045 - momentum: 0.000000
|
98 |
+
2023-10-17 14:33:36,676 epoch 2 - iter 2600/2606 - loss 0.16113565 - time (sec): 280.86 - samples/sec: 1306.23 - lr: 0.000044 - momentum: 0.000000
|
99 |
+
2023-10-17 14:33:37,189 ----------------------------------------------------------------------------------------------------
|
100 |
+
2023-10-17 14:33:37,189 EPOCH 2 done: loss 0.1610 - lr: 0.000044
|
101 |
+
2023-10-17 14:33:49,462 DEV : loss 0.16424185037612915 - f1-score (micro avg) 0.2857
|
102 |
+
2023-10-17 14:33:49,517 ----------------------------------------------------------------------------------------------------
|
103 |
+
2023-10-17 14:34:18,273 epoch 3 - iter 260/2606 - loss 0.13142213 - time (sec): 28.75 - samples/sec: 1297.78 - lr: 0.000044 - momentum: 0.000000
|
104 |
+
2023-10-17 14:34:45,491 epoch 3 - iter 520/2606 - loss 0.13076265 - time (sec): 55.97 - samples/sec: 1298.02 - lr: 0.000043 - momentum: 0.000000
|
105 |
+
2023-10-17 14:35:13,286 epoch 3 - iter 780/2606 - loss 0.12346611 - time (sec): 83.77 - samples/sec: 1272.88 - lr: 0.000043 - momentum: 0.000000
|
106 |
+
2023-10-17 14:35:43,054 epoch 3 - iter 1040/2606 - loss 0.12496563 - time (sec): 113.53 - samples/sec: 1282.05 - lr: 0.000042 - momentum: 0.000000
|
107 |
+
2023-10-17 14:36:09,740 epoch 3 - iter 1300/2606 - loss 0.11945146 - time (sec): 140.22 - samples/sec: 1309.70 - lr: 0.000042 - momentum: 0.000000
|
108 |
+
2023-10-17 14:36:37,350 epoch 3 - iter 1560/2606 - loss 0.11698591 - time (sec): 167.83 - samples/sec: 1317.13 - lr: 0.000041 - momentum: 0.000000
|
109 |
+
2023-10-17 14:37:03,654 epoch 3 - iter 1820/2606 - loss 0.11691007 - time (sec): 194.13 - samples/sec: 1311.04 - lr: 0.000041 - momentum: 0.000000
|
110 |
+
2023-10-17 14:37:31,807 epoch 3 - iter 2080/2606 - loss 0.11634199 - time (sec): 222.29 - samples/sec: 1314.95 - lr: 0.000040 - momentum: 0.000000
|
111 |
+
2023-10-17 14:38:00,450 epoch 3 - iter 2340/2606 - loss 0.11599351 - time (sec): 250.93 - samples/sec: 1310.02 - lr: 0.000039 - momentum: 0.000000
|
112 |
+
2023-10-17 14:38:28,873 epoch 3 - iter 2600/2606 - loss 0.11610612 - time (sec): 279.35 - samples/sec: 1311.26 - lr: 0.000039 - momentum: 0.000000
|
113 |
+
2023-10-17 14:38:29,622 ----------------------------------------------------------------------------------------------------
|
114 |
+
2023-10-17 14:38:29,622 EPOCH 3 done: loss 0.1159 - lr: 0.000039
|
115 |
+
2023-10-17 14:38:40,939 DEV : loss 0.16876201331615448 - f1-score (micro avg) 0.3775
|
116 |
+
2023-10-17 14:38:41,009 saving best model
|
117 |
+
2023-10-17 14:38:42,469 ----------------------------------------------------------------------------------------------------
|
118 |
+
2023-10-17 14:39:11,890 epoch 4 - iter 260/2606 - loss 0.07361030 - time (sec): 29.42 - samples/sec: 1265.45 - lr: 0.000038 - momentum: 0.000000
|
119 |
+
2023-10-17 14:39:39,718 epoch 4 - iter 520/2606 - loss 0.07660504 - time (sec): 57.24 - samples/sec: 1283.14 - lr: 0.000038 - momentum: 0.000000
|
120 |
+
2023-10-17 14:40:07,171 epoch 4 - iter 780/2606 - loss 0.07902913 - time (sec): 84.70 - samples/sec: 1289.64 - lr: 0.000037 - momentum: 0.000000
|
121 |
+
2023-10-17 14:40:34,548 epoch 4 - iter 1040/2606 - loss 0.08287969 - time (sec): 112.07 - samples/sec: 1295.86 - lr: 0.000037 - momentum: 0.000000
|
122 |
+
2023-10-17 14:41:01,134 epoch 4 - iter 1300/2606 - loss 0.08241178 - time (sec): 138.66 - samples/sec: 1322.92 - lr: 0.000036 - momentum: 0.000000
|
123 |
+
2023-10-17 14:41:28,978 epoch 4 - iter 1560/2606 - loss 0.08250940 - time (sec): 166.50 - samples/sec: 1317.76 - lr: 0.000036 - momentum: 0.000000
|
124 |
+
2023-10-17 14:41:56,865 epoch 4 - iter 1820/2606 - loss 0.08213798 - time (sec): 194.39 - samples/sec: 1319.13 - lr: 0.000035 - momentum: 0.000000
|
125 |
+
2023-10-17 14:42:24,148 epoch 4 - iter 2080/2606 - loss 0.08166340 - time (sec): 221.67 - samples/sec: 1314.38 - lr: 0.000034 - momentum: 0.000000
|
126 |
+
2023-10-17 14:42:51,904 epoch 4 - iter 2340/2606 - loss 0.08556568 - time (sec): 249.43 - samples/sec: 1323.51 - lr: 0.000034 - momentum: 0.000000
|
127 |
+
2023-10-17 14:43:18,720 epoch 4 - iter 2600/2606 - loss 0.08865792 - time (sec): 276.24 - samples/sec: 1326.87 - lr: 0.000033 - momentum: 0.000000
|
128 |
+
2023-10-17 14:43:19,407 ----------------------------------------------------------------------------------------------------
|
129 |
+
2023-10-17 14:43:19,408 EPOCH 4 done: loss 0.0885 - lr: 0.000033
|
130 |
+
2023-10-17 14:43:30,616 DEV : loss 0.3115158677101135 - f1-score (micro avg) 0.3586
|
131 |
+
2023-10-17 14:43:30,674 ----------------------------------------------------------------------------------------------------
|
132 |
+
2023-10-17 14:43:58,778 epoch 5 - iter 260/2606 - loss 0.05863728 - time (sec): 28.10 - samples/sec: 1338.70 - lr: 0.000033 - momentum: 0.000000
|
133 |
+
2023-10-17 14:44:26,178 epoch 5 - iter 520/2606 - loss 0.06625558 - time (sec): 55.50 - samples/sec: 1348.15 - lr: 0.000032 - momentum: 0.000000
|
134 |
+
2023-10-17 14:44:54,516 epoch 5 - iter 780/2606 - loss 0.06844922 - time (sec): 83.84 - samples/sec: 1359.79 - lr: 0.000032 - momentum: 0.000000
|
135 |
+
2023-10-17 14:45:23,817 epoch 5 - iter 1040/2606 - loss 0.06831173 - time (sec): 113.14 - samples/sec: 1346.93 - lr: 0.000031 - momentum: 0.000000
|
136 |
+
2023-10-17 14:45:53,393 epoch 5 - iter 1300/2606 - loss 0.06365912 - time (sec): 142.72 - samples/sec: 1319.89 - lr: 0.000031 - momentum: 0.000000
|
137 |
+
2023-10-17 14:46:23,598 epoch 5 - iter 1560/2606 - loss 0.06547327 - time (sec): 172.92 - samples/sec: 1305.77 - lr: 0.000030 - momentum: 0.000000
|
138 |
+
2023-10-17 14:46:50,708 epoch 5 - iter 1820/2606 - loss 0.06474902 - time (sec): 200.03 - samples/sec: 1311.72 - lr: 0.000029 - momentum: 0.000000
|
139 |
+
2023-10-17 14:47:18,016 epoch 5 - iter 2080/2606 - loss 0.06304130 - time (sec): 227.34 - samples/sec: 1306.77 - lr: 0.000029 - momentum: 0.000000
|
140 |
+
2023-10-17 14:47:44,935 epoch 5 - iter 2340/2606 - loss 0.06258972 - time (sec): 254.26 - samples/sec: 1308.18 - lr: 0.000028 - momentum: 0.000000
|
141 |
+
2023-10-17 14:48:11,273 epoch 5 - iter 2600/2606 - loss 0.06247218 - time (sec): 280.60 - samples/sec: 1306.66 - lr: 0.000028 - momentum: 0.000000
|
142 |
+
2023-10-17 14:48:11,872 ----------------------------------------------------------------------------------------------------
|
143 |
+
2023-10-17 14:48:11,872 EPOCH 5 done: loss 0.0626 - lr: 0.000028
|
144 |
+
2023-10-17 14:48:22,903 DEV : loss 0.35754862427711487 - f1-score (micro avg) 0.3492
|
145 |
+
2023-10-17 14:48:22,961 ----------------------------------------------------------------------------------------------------
|
146 |
+
2023-10-17 14:48:50,441 epoch 6 - iter 260/2606 - loss 0.04153328 - time (sec): 27.48 - samples/sec: 1300.53 - lr: 0.000027 - momentum: 0.000000
|
147 |
+
2023-10-17 14:49:19,441 epoch 6 - iter 520/2606 - loss 0.04069889 - time (sec): 56.48 - samples/sec: 1292.76 - lr: 0.000027 - momentum: 0.000000
|
148 |
+
2023-10-17 14:49:47,061 epoch 6 - iter 780/2606 - loss 0.04221365 - time (sec): 84.10 - samples/sec: 1265.34 - lr: 0.000026 - momentum: 0.000000
|
149 |
+
2023-10-17 14:50:14,629 epoch 6 - iter 1040/2606 - loss 0.04286026 - time (sec): 111.66 - samples/sec: 1276.99 - lr: 0.000026 - momentum: 0.000000
|
150 |
+
2023-10-17 14:50:44,251 epoch 6 - iter 1300/2606 - loss 0.04244173 - time (sec): 141.29 - samples/sec: 1281.41 - lr: 0.000025 - momentum: 0.000000
|
151 |
+
2023-10-17 14:51:13,157 epoch 6 - iter 1560/2606 - loss 0.04269662 - time (sec): 170.19 - samples/sec: 1275.44 - lr: 0.000024 - momentum: 0.000000
|
152 |
+
2023-10-17 14:51:40,843 epoch 6 - iter 1820/2606 - loss 0.04514594 - time (sec): 197.88 - samples/sec: 1283.68 - lr: 0.000024 - momentum: 0.000000
|
153 |
+
2023-10-17 14:52:08,773 epoch 6 - iter 2080/2606 - loss 0.04536499 - time (sec): 225.81 - samples/sec: 1297.42 - lr: 0.000023 - momentum: 0.000000
|
154 |
+
2023-10-17 14:52:36,271 epoch 6 - iter 2340/2606 - loss 0.04581858 - time (sec): 253.31 - samples/sec: 1296.31 - lr: 0.000023 - momentum: 0.000000
|
155 |
+
2023-10-17 14:53:03,992 epoch 6 - iter 2600/2606 - loss 0.04576906 - time (sec): 281.03 - samples/sec: 1303.50 - lr: 0.000022 - momentum: 0.000000
|
156 |
+
2023-10-17 14:53:04,741 ----------------------------------------------------------------------------------------------------
|
157 |
+
2023-10-17 14:53:04,741 EPOCH 6 done: loss 0.0457 - lr: 0.000022
|
158 |
+
2023-10-17 14:53:15,359 DEV : loss 0.3733910918235779 - f1-score (micro avg) 0.3597
|
159 |
+
2023-10-17 14:53:15,411 ----------------------------------------------------------------------------------------------------
|
160 |
+
2023-10-17 14:53:43,296 epoch 7 - iter 260/2606 - loss 0.03044292 - time (sec): 27.88 - samples/sec: 1313.32 - lr: 0.000022 - momentum: 0.000000
|
161 |
+
2023-10-17 14:54:10,604 epoch 7 - iter 520/2606 - loss 0.02603500 - time (sec): 55.19 - samples/sec: 1346.10 - lr: 0.000021 - momentum: 0.000000
|
162 |
+
2023-10-17 14:54:37,579 epoch 7 - iter 780/2606 - loss 0.02879241 - time (sec): 82.17 - samples/sec: 1329.28 - lr: 0.000021 - momentum: 0.000000
|
163 |
+
2023-10-17 14:55:06,764 epoch 7 - iter 1040/2606 - loss 0.02855577 - time (sec): 111.35 - samples/sec: 1329.11 - lr: 0.000020 - momentum: 0.000000
|
164 |
+
2023-10-17 14:55:34,201 epoch 7 - iter 1300/2606 - loss 0.02824816 - time (sec): 138.79 - samples/sec: 1343.78 - lr: 0.000019 - momentum: 0.000000
|
165 |
+
2023-10-17 14:56:01,313 epoch 7 - iter 1560/2606 - loss 0.03022833 - time (sec): 165.90 - samples/sec: 1346.40 - lr: 0.000019 - momentum: 0.000000
|
166 |
+
2023-10-17 14:56:29,641 epoch 7 - iter 1820/2606 - loss 0.02870834 - time (sec): 194.23 - samples/sec: 1345.69 - lr: 0.000018 - momentum: 0.000000
|
167 |
+
2023-10-17 14:56:56,189 epoch 7 - iter 2080/2606 - loss 0.02859220 - time (sec): 220.78 - samples/sec: 1350.30 - lr: 0.000018 - momentum: 0.000000
|
168 |
+
2023-10-17 14:57:23,440 epoch 7 - iter 2340/2606 - loss 0.02938788 - time (sec): 248.03 - samples/sec: 1338.16 - lr: 0.000017 - momentum: 0.000000
|
169 |
+
2023-10-17 14:57:49,641 epoch 7 - iter 2600/2606 - loss 0.02928523 - time (sec): 274.23 - samples/sec: 1338.32 - lr: 0.000017 - momentum: 0.000000
|
170 |
+
2023-10-17 14:57:50,166 ----------------------------------------------------------------------------------------------------
|
171 |
+
2023-10-17 14:57:50,166 EPOCH 7 done: loss 0.0293 - lr: 0.000017
|
172 |
+
2023-10-17 14:58:01,362 DEV : loss 0.39208441972732544 - f1-score (micro avg) 0.376
|
173 |
+
2023-10-17 14:58:01,418 ----------------------------------------------------------------------------------------------------
|
174 |
+
2023-10-17 14:58:29,386 epoch 8 - iter 260/2606 - loss 0.01804159 - time (sec): 27.97 - samples/sec: 1244.37 - lr: 0.000016 - momentum: 0.000000
|
175 |
+
2023-10-17 14:58:55,174 epoch 8 - iter 520/2606 - loss 0.01839017 - time (sec): 53.75 - samples/sec: 1315.63 - lr: 0.000016 - momentum: 0.000000
|
176 |
+
2023-10-17 14:59:21,421 epoch 8 - iter 780/2606 - loss 0.01982613 - time (sec): 80.00 - samples/sec: 1364.08 - lr: 0.000015 - momentum: 0.000000
|
177 |
+
2023-10-17 14:59:48,015 epoch 8 - iter 1040/2606 - loss 0.02178505 - time (sec): 106.60 - samples/sec: 1358.26 - lr: 0.000014 - momentum: 0.000000
|
178 |
+
2023-10-17 15:00:15,164 epoch 8 - iter 1300/2606 - loss 0.02262431 - time (sec): 133.74 - samples/sec: 1348.86 - lr: 0.000014 - momentum: 0.000000
|
179 |
+
2023-10-17 15:00:42,859 epoch 8 - iter 1560/2606 - loss 0.02170814 - time (sec): 161.44 - samples/sec: 1343.38 - lr: 0.000013 - momentum: 0.000000
|
180 |
+
2023-10-17 15:01:10,224 epoch 8 - iter 1820/2606 - loss 0.02174864 - time (sec): 188.80 - samples/sec: 1350.02 - lr: 0.000013 - momentum: 0.000000
|
181 |
+
2023-10-17 15:01:38,355 epoch 8 - iter 2080/2606 - loss 0.02146193 - time (sec): 216.93 - samples/sec: 1343.56 - lr: 0.000012 - momentum: 0.000000
|
182 |
+
2023-10-17 15:02:07,541 epoch 8 - iter 2340/2606 - loss 0.02265158 - time (sec): 246.12 - samples/sec: 1338.24 - lr: 0.000012 - momentum: 0.000000
|
183 |
+
2023-10-17 15:02:34,852 epoch 8 - iter 2600/2606 - loss 0.02262534 - time (sec): 273.43 - samples/sec: 1340.03 - lr: 0.000011 - momentum: 0.000000
|
184 |
+
2023-10-17 15:02:35,458 ----------------------------------------------------------------------------------------------------
|
185 |
+
2023-10-17 15:02:35,458 EPOCH 8 done: loss 0.0227 - lr: 0.000011
|
186 |
+
2023-10-17 15:02:47,842 DEV : loss 0.4110426604747772 - f1-score (micro avg) 0.3964
|
187 |
+
2023-10-17 15:02:47,905 saving best model
|
188 |
+
2023-10-17 15:02:49,402 ----------------------------------------------------------------------------------------------------
|
189 |
+
2023-10-17 15:03:16,976 epoch 9 - iter 260/2606 - loss 0.01333780 - time (sec): 27.57 - samples/sec: 1333.90 - lr: 0.000011 - momentum: 0.000000
|
190 |
+
2023-10-17 15:03:44,619 epoch 9 - iter 520/2606 - loss 0.01398241 - time (sec): 55.21 - samples/sec: 1356.45 - lr: 0.000010 - momentum: 0.000000
|
191 |
+
2023-10-17 15:04:11,530 epoch 9 - iter 780/2606 - loss 0.01371935 - time (sec): 82.12 - samples/sec: 1357.95 - lr: 0.000009 - momentum: 0.000000
|
192 |
+
2023-10-17 15:04:38,342 epoch 9 - iter 1040/2606 - loss 0.01381938 - time (sec): 108.93 - samples/sec: 1354.11 - lr: 0.000009 - momentum: 0.000000
|
193 |
+
2023-10-17 15:05:04,756 epoch 9 - iter 1300/2606 - loss 0.01407551 - time (sec): 135.35 - samples/sec: 1366.05 - lr: 0.000008 - momentum: 0.000000
|
194 |
+
2023-10-17 15:05:33,009 epoch 9 - iter 1560/2606 - loss 0.01393126 - time (sec): 163.60 - samples/sec: 1357.67 - lr: 0.000008 - momentum: 0.000000
|
195 |
+
2023-10-17 15:06:00,019 epoch 9 - iter 1820/2606 - loss 0.01360054 - time (sec): 190.61 - samples/sec: 1344.36 - lr: 0.000007 - momentum: 0.000000
|
196 |
+
2023-10-17 15:06:29,986 epoch 9 - iter 2080/2606 - loss 0.01397962 - time (sec): 220.58 - samples/sec: 1349.15 - lr: 0.000007 - momentum: 0.000000
|
197 |
+
2023-10-17 15:06:55,256 epoch 9 - iter 2340/2606 - loss 0.01383784 - time (sec): 245.85 - samples/sec: 1344.69 - lr: 0.000006 - momentum: 0.000000
|
198 |
+
2023-10-17 15:07:21,400 epoch 9 - iter 2600/2606 - loss 0.01329672 - time (sec): 271.99 - samples/sec: 1348.44 - lr: 0.000006 - momentum: 0.000000
|
199 |
+
2023-10-17 15:07:21,934 ----------------------------------------------------------------------------------------------------
|
200 |
+
2023-10-17 15:07:21,935 EPOCH 9 done: loss 0.0133 - lr: 0.000006
|
201 |
+
2023-10-17 15:07:33,497 DEV : loss 0.487269788980484 - f1-score (micro avg) 0.3764
|
202 |
+
2023-10-17 15:07:33,558 ----------------------------------------------------------------------------------------------------
|
203 |
+
2023-10-17 15:08:01,040 epoch 10 - iter 260/2606 - loss 0.00522579 - time (sec): 27.48 - samples/sec: 1369.25 - lr: 0.000005 - momentum: 0.000000
|
204 |
+
2023-10-17 15:08:27,874 epoch 10 - iter 520/2606 - loss 0.00713905 - time (sec): 54.31 - samples/sec: 1357.92 - lr: 0.000004 - momentum: 0.000000
|
205 |
+
2023-10-17 15:08:54,168 epoch 10 - iter 780/2606 - loss 0.00706736 - time (sec): 80.61 - samples/sec: 1347.44 - lr: 0.000004 - momentum: 0.000000
|
206 |
+
2023-10-17 15:09:19,702 epoch 10 - iter 1040/2606 - loss 0.00700287 - time (sec): 106.14 - samples/sec: 1349.51 - lr: 0.000003 - momentum: 0.000000
|
207 |
+
2023-10-17 15:09:47,107 epoch 10 - iter 1300/2606 - loss 0.00826984 - time (sec): 133.55 - samples/sec: 1343.57 - lr: 0.000003 - momentum: 0.000000
|
208 |
+
2023-10-17 15:10:13,743 epoch 10 - iter 1560/2606 - loss 0.00865486 - time (sec): 160.18 - samples/sec: 1338.36 - lr: 0.000002 - momentum: 0.000000
|
209 |
+
2023-10-17 15:10:39,805 epoch 10 - iter 1820/2606 - loss 0.00972362 - time (sec): 186.24 - samples/sec: 1342.74 - lr: 0.000002 - momentum: 0.000000
|
210 |
+
2023-10-17 15:11:07,610 epoch 10 - iter 2080/2606 - loss 0.01024238 - time (sec): 214.05 - samples/sec: 1347.46 - lr: 0.000001 - momentum: 0.000000
|
211 |
+
2023-10-17 15:11:36,590 epoch 10 - iter 2340/2606 - loss 0.01033785 - time (sec): 243.03 - samples/sec: 1353.12 - lr: 0.000001 - momentum: 0.000000
|
212 |
+
2023-10-17 15:12:04,385 epoch 10 - iter 2600/2606 - loss 0.01015142 - time (sec): 270.82 - samples/sec: 1354.66 - lr: 0.000000 - momentum: 0.000000
|
213 |
+
2023-10-17 15:12:04,937 ----------------------------------------------------------------------------------------------------
|
214 |
+
2023-10-17 15:12:04,937 EPOCH 10 done: loss 0.0101 - lr: 0.000000
|
215 |
+
2023-10-17 15:12:17,266 DEV : loss 0.5094925761222839 - f1-score (micro avg) 0.3588
|
216 |
+
2023-10-17 15:12:17,894 ----------------------------------------------------------------------------------------------------
|
217 |
+
2023-10-17 15:12:17,896 Loading model from best epoch ...
|
218 |
+
2023-10-17 15:12:20,219 SequenceTagger predicts: Dictionary with 17 tags: O, S-LOC, B-LOC, E-LOC, I-LOC, S-PER, B-PER, E-PER, I-PER, S-ORG, B-ORG, E-ORG, I-ORG, S-HumanProd, B-HumanProd, E-HumanProd, I-HumanProd
|
219 |
+
2023-10-17 15:12:39,854
|
220 |
+
Results:
|
221 |
+
- F-score (micro) 0.4493
|
222 |
+
- F-score (macro) 0.3128
|
223 |
+
- Accuracy 0.2933
|
224 |
+
|
225 |
+
By class:
|
226 |
+
precision recall f1-score support
|
227 |
+
|
228 |
+
LOC 0.4879 0.5132 0.5002 1214
|
229 |
+
PER 0.4061 0.4814 0.4405 808
|
230 |
+
ORG 0.3013 0.3201 0.3104 353
|
231 |
+
HumanProd 0.0000 0.0000 0.0000 15
|
232 |
+
|
233 |
+
micro avg 0.4297 0.4707 0.4493 2390
|
234 |
+
macro avg 0.2988 0.3287 0.3128 2390
|
235 |
+
weighted avg 0.4296 0.4707 0.4489 2390
|
236 |
+
|
237 |
+
2023-10-17 15:12:39,854 ----------------------------------------------------------------------------------------------------
|