Initial Commit
Browse files- README.md +60 -60
- config.json +14 -21
- eval_result_ner.json +1 -1
- model.safetensors +2 -2
- training_args.bin +1 -1
README.md
CHANGED
@@ -1,14 +1,14 @@
|
|
1 |
---
|
2 |
-
base_model: microsoft/mdeberta-v3-base
|
3 |
library_name: transformers
|
4 |
license: mit
|
|
|
|
|
|
|
5 |
metrics:
|
6 |
- precision
|
7 |
- recall
|
8 |
- f1
|
9 |
- accuracy
|
10 |
-
tags:
|
11 |
-
- generated_from_trainer
|
12 |
model-index:
|
13 |
- name: scenario-kd-pre-ner-full_data-univner_full44
|
14 |
results: []
|
@@ -19,13 +19,13 @@ should probably proofread and complete it, then remove this comment. -->
|
|
19 |
|
20 |
# scenario-kd-pre-ner-full_data-univner_full44
|
21 |
|
22 |
-
This model is a fine-tuned version of [
|
23 |
It achieves the following results on the evaluation set:
|
24 |
-
- Loss: 0.
|
25 |
-
- Precision: 0.
|
26 |
-
- Recall: 0.
|
27 |
-
- F1: 0.
|
28 |
-
- Accuracy: 0.
|
29 |
|
30 |
## Model description
|
31 |
|
@@ -56,57 +56,57 @@ The following hyperparameters were used during training:
|
|
56 |
|
57 |
| Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 | Accuracy |
|
58 |
|:-------------:|:-------:|:-----:|:---------------:|:---------:|:------:|:------:|:--------:|
|
59 |
-
| 1.
|
60 |
-
| 0.
|
61 |
-
| 0.
|
62 |
-
| 0.
|
63 |
-
| 0.
|
64 |
-
| 0.
|
65 |
-
| 0.
|
66 |
-
| 0.
|
67 |
-
| 0.
|
68 |
-
| 0.
|
69 |
-
| 0.
|
70 |
-
| 0.
|
71 |
-
| 0.
|
72 |
-
| 0.
|
73 |
-
| 0.
|
74 |
-
| 0.
|
75 |
-
| 0.
|
76 |
-
| 0.
|
77 |
-
| 0.
|
78 |
-
| 0.
|
79 |
-
| 0.
|
80 |
-
| 0.
|
81 |
-
| 0.
|
82 |
-
| 0.
|
83 |
-
| 0.
|
84 |
-
| 0.
|
85 |
-
| 0.
|
86 |
-
| 0.
|
87 |
-
| 0.
|
88 |
-
| 0.
|
89 |
-
| 0.
|
90 |
-
| 0.
|
91 |
-
| 0.
|
92 |
-
| 0.
|
93 |
-
| 0.
|
94 |
-
| 0.
|
95 |
-
| 0.
|
96 |
-
| 0.
|
97 |
-
| 0.
|
98 |
-
| 0.
|
99 |
-
| 0.
|
100 |
-
| 0.
|
101 |
-
| 0.
|
102 |
-
| 0.
|
103 |
-
| 0.
|
104 |
-
| 0.
|
105 |
-
| 0.
|
106 |
-
| 0.
|
107 |
-
| 0.
|
108 |
-
| 0.
|
109 |
-
| 0.
|
110 |
|
111 |
|
112 |
### Framework versions
|
|
|
1 |
---
|
|
|
2 |
library_name: transformers
|
3 |
license: mit
|
4 |
+
base_model: FacebookAI/xlm-roberta-base
|
5 |
+
tags:
|
6 |
+
- generated_from_trainer
|
7 |
metrics:
|
8 |
- precision
|
9 |
- recall
|
10 |
- f1
|
11 |
- accuracy
|
|
|
|
|
12 |
model-index:
|
13 |
- name: scenario-kd-pre-ner-full_data-univner_full44
|
14 |
results: []
|
|
|
19 |
|
20 |
# scenario-kd-pre-ner-full_data-univner_full44
|
21 |
|
22 |
+
This model is a fine-tuned version of [FacebookAI/xlm-roberta-base](https://huggingface.co/FacebookAI/xlm-roberta-base) on the None dataset.
|
23 |
It achieves the following results on the evaluation set:
|
24 |
+
- Loss: 0.4381
|
25 |
+
- Precision: 0.8004
|
26 |
+
- Recall: 0.7801
|
27 |
+
- F1: 0.7902
|
28 |
+
- Accuracy: 0.9786
|
29 |
|
30 |
## Model description
|
31 |
|
|
|
56 |
|
57 |
| Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 | Accuracy |
|
58 |
|:-------------:|:-------:|:-----:|:---------------:|:---------:|:------:|:------:|:--------:|
|
59 |
+
| 1.4593 | 0.5828 | 500 | 0.8367 | 0.6935 | 0.6559 | 0.6742 | 0.9682 |
|
60 |
+
| 0.7232 | 1.1655 | 1000 | 0.7569 | 0.7339 | 0.6980 | 0.7155 | 0.9724 |
|
61 |
+
| 0.594 | 1.7483 | 1500 | 0.6330 | 0.7335 | 0.7451 | 0.7392 | 0.9741 |
|
62 |
+
| 0.4986 | 2.3310 | 2000 | 0.6003 | 0.7291 | 0.7552 | 0.7419 | 0.9746 |
|
63 |
+
| 0.446 | 2.9138 | 2500 | 0.5729 | 0.7403 | 0.7601 | 0.7501 | 0.9747 |
|
64 |
+
| 0.385 | 3.4965 | 3000 | 0.5584 | 0.7441 | 0.7617 | 0.7528 | 0.9757 |
|
65 |
+
| 0.3605 | 4.0793 | 3500 | 0.5602 | 0.7615 | 0.7575 | 0.7595 | 0.9758 |
|
66 |
+
| 0.3172 | 4.6620 | 4000 | 0.5417 | 0.7546 | 0.7725 | 0.7634 | 0.9764 |
|
67 |
+
| 0.3061 | 5.2448 | 4500 | 0.5329 | 0.7884 | 0.7485 | 0.7680 | 0.9769 |
|
68 |
+
| 0.2856 | 5.8275 | 5000 | 0.5194 | 0.7837 | 0.7618 | 0.7726 | 0.9769 |
|
69 |
+
| 0.2642 | 6.4103 | 5500 | 0.5154 | 0.7622 | 0.7780 | 0.7700 | 0.9765 |
|
70 |
+
| 0.2592 | 6.9930 | 6000 | 0.5193 | 0.7882 | 0.7572 | 0.7724 | 0.9764 |
|
71 |
+
| 0.2401 | 7.5758 | 6500 | 0.5123 | 0.7727 | 0.7599 | 0.7663 | 0.9763 |
|
72 |
+
| 0.2344 | 8.1585 | 7000 | 0.4987 | 0.7742 | 0.7736 | 0.7739 | 0.9771 |
|
73 |
+
| 0.2234 | 8.7413 | 7500 | 0.4914 | 0.7894 | 0.7640 | 0.7764 | 0.9777 |
|
74 |
+
| 0.2131 | 9.3240 | 8000 | 0.4856 | 0.7691 | 0.7827 | 0.7758 | 0.9770 |
|
75 |
+
| 0.2089 | 9.9068 | 8500 | 0.4898 | 0.7895 | 0.7655 | 0.7773 | 0.9773 |
|
76 |
+
| 0.1972 | 10.4895 | 9000 | 0.4860 | 0.7828 | 0.7726 | 0.7777 | 0.9775 |
|
77 |
+
| 0.1942 | 11.0723 | 9500 | 0.4787 | 0.7807 | 0.7807 | 0.7807 | 0.9776 |
|
78 |
+
| 0.1854 | 11.6550 | 10000 | 0.4858 | 0.7916 | 0.7635 | 0.7773 | 0.9771 |
|
79 |
+
| 0.183 | 12.2378 | 10500 | 0.4739 | 0.7924 | 0.7800 | 0.7862 | 0.9779 |
|
80 |
+
| 0.1781 | 12.8205 | 11000 | 0.4741 | 0.7990 | 0.7661 | 0.7822 | 0.9779 |
|
81 |
+
| 0.1704 | 13.4033 | 11500 | 0.4622 | 0.7937 | 0.7719 | 0.7826 | 0.9784 |
|
82 |
+
| 0.1698 | 13.9860 | 12000 | 0.4650 | 0.8000 | 0.7657 | 0.7825 | 0.9777 |
|
83 |
+
| 0.1635 | 14.5688 | 12500 | 0.4604 | 0.7913 | 0.7778 | 0.7845 | 0.9782 |
|
84 |
+
| 0.1605 | 15.1515 | 13000 | 0.4656 | 0.7990 | 0.7605 | 0.7793 | 0.9774 |
|
85 |
+
| 0.1559 | 15.7343 | 13500 | 0.4638 | 0.8001 | 0.7658 | 0.7826 | 0.9778 |
|
86 |
+
| 0.1531 | 16.3170 | 14000 | 0.4550 | 0.7991 | 0.7735 | 0.7861 | 0.9780 |
|
87 |
+
| 0.1519 | 16.8998 | 14500 | 0.4606 | 0.7949 | 0.7735 | 0.7841 | 0.9780 |
|
88 |
+
| 0.1482 | 17.4825 | 15000 | 0.4483 | 0.7947 | 0.7831 | 0.7889 | 0.9787 |
|
89 |
+
| 0.1449 | 18.0653 | 15500 | 0.4521 | 0.7947 | 0.7722 | 0.7833 | 0.9780 |
|
90 |
+
| 0.1407 | 18.6480 | 16000 | 0.4508 | 0.7932 | 0.7728 | 0.7829 | 0.9780 |
|
91 |
+
| 0.1415 | 19.2308 | 16500 | 0.4484 | 0.8031 | 0.7728 | 0.7876 | 0.9785 |
|
92 |
+
| 0.1385 | 19.8135 | 17000 | 0.4461 | 0.7991 | 0.7774 | 0.7881 | 0.9785 |
|
93 |
+
| 0.1358 | 20.3963 | 17500 | 0.4488 | 0.7970 | 0.7756 | 0.7862 | 0.9783 |
|
94 |
+
| 0.1358 | 20.9790 | 18000 | 0.4431 | 0.8006 | 0.7772 | 0.7887 | 0.9787 |
|
95 |
+
| 0.1325 | 21.5618 | 18500 | 0.4395 | 0.8053 | 0.7768 | 0.7908 | 0.9785 |
|
96 |
+
| 0.1322 | 22.1445 | 19000 | 0.4461 | 0.7960 | 0.7725 | 0.7841 | 0.9780 |
|
97 |
+
| 0.1296 | 22.7273 | 19500 | 0.4401 | 0.7988 | 0.7746 | 0.7866 | 0.9781 |
|
98 |
+
| 0.1288 | 23.3100 | 20000 | 0.4416 | 0.7961 | 0.7690 | 0.7823 | 0.9781 |
|
99 |
+
| 0.1271 | 23.8928 | 20500 | 0.4450 | 0.8024 | 0.7673 | 0.7844 | 0.9781 |
|
100 |
+
| 0.1246 | 24.4755 | 21000 | 0.4403 | 0.7967 | 0.7703 | 0.7833 | 0.9782 |
|
101 |
+
| 0.1254 | 25.0583 | 21500 | 0.4403 | 0.7976 | 0.7742 | 0.7857 | 0.9782 |
|
102 |
+
| 0.1231 | 25.6410 | 22000 | 0.4438 | 0.8057 | 0.7694 | 0.7872 | 0.9783 |
|
103 |
+
| 0.1228 | 26.2238 | 22500 | 0.4365 | 0.8058 | 0.7741 | 0.7896 | 0.9785 |
|
104 |
+
| 0.1224 | 26.8065 | 23000 | 0.4325 | 0.7995 | 0.7806 | 0.7899 | 0.9787 |
|
105 |
+
| 0.1211 | 27.3893 | 23500 | 0.4402 | 0.8058 | 0.7676 | 0.7862 | 0.9782 |
|
106 |
+
| 0.1202 | 27.9720 | 24000 | 0.4378 | 0.8017 | 0.7689 | 0.7849 | 0.9784 |
|
107 |
+
| 0.1201 | 28.5548 | 24500 | 0.4331 | 0.8000 | 0.7784 | 0.7890 | 0.9786 |
|
108 |
+
| 0.12 | 29.1375 | 25000 | 0.4317 | 0.7999 | 0.7794 | 0.7895 | 0.9787 |
|
109 |
+
| 0.1194 | 29.7203 | 25500 | 0.4381 | 0.8004 | 0.7801 | 0.7902 | 0.9786 |
|
110 |
|
111 |
|
112 |
### Framework versions
|
config.json
CHANGED
@@ -1,9 +1,12 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
-
"
|
5 |
],
|
6 |
"attention_probs_dropout_prob": 0.1,
|
|
|
|
|
|
|
7 |
"hidden_act": "gelu",
|
8 |
"hidden_dropout_prob": 0.1,
|
9 |
"hidden_size": 768,
|
@@ -27,27 +30,17 @@
|
|
27 |
"LABEL_5": 5,
|
28 |
"LABEL_6": 6
|
29 |
},
|
30 |
-
"layer_norm_eps": 1e-
|
31 |
-
"max_position_embeddings":
|
32 |
-
"
|
33 |
-
"model_type": "deberta-v2",
|
34 |
-
"norm_rel_ebd": "layer_norm",
|
35 |
"num_attention_heads": 12,
|
36 |
"num_hidden_layers": 6,
|
37 |
-
"
|
38 |
-
"
|
39 |
-
"
|
40 |
-
"pooler_hidden_size": 768,
|
41 |
-
"pos_att_type": [
|
42 |
-
"p2c",
|
43 |
-
"c2p"
|
44 |
-
],
|
45 |
-
"position_biased_input": false,
|
46 |
-
"position_buckets": 256,
|
47 |
-
"relative_attention": true,
|
48 |
-
"share_att_key": true,
|
49 |
"torch_dtype": "float32",
|
50 |
"transformers_version": "4.44.2",
|
51 |
-
"type_vocab_size":
|
52 |
-
"
|
|
|
53 |
}
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "FacebookAI/xlm-roberta-base",
|
3 |
"architectures": [
|
4 |
+
"XLMRobertaForTokenClassificationKD"
|
5 |
],
|
6 |
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"classifier_dropout": null,
|
9 |
+
"eos_token_id": 2,
|
10 |
"hidden_act": "gelu",
|
11 |
"hidden_dropout_prob": 0.1,
|
12 |
"hidden_size": 768,
|
|
|
30 |
"LABEL_5": 5,
|
31 |
"LABEL_6": 6
|
32 |
},
|
33 |
+
"layer_norm_eps": 1e-05,
|
34 |
+
"max_position_embeddings": 514,
|
35 |
+
"model_type": "xlm-roberta",
|
|
|
|
|
36 |
"num_attention_heads": 12,
|
37 |
"num_hidden_layers": 6,
|
38 |
+
"output_past": true,
|
39 |
+
"pad_token_id": 1,
|
40 |
+
"position_embedding_type": "absolute",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
"torch_dtype": "float32",
|
42 |
"transformers_version": "4.44.2",
|
43 |
+
"type_vocab_size": 1,
|
44 |
+
"use_cache": true,
|
45 |
+
"vocab_size": 250002
|
46 |
}
|
eval_result_ner.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"ceb_gja": {"precision": 0.
|
|
|
1 |
+
{"ceb_gja": {"precision": 0.6140350877192983, "recall": 0.7142857142857143, "f1": 0.6603773584905661, "accuracy": 0.9691119691119691}, "en_pud": {"precision": 0.7703488372093024, "recall": 0.7395348837209302, "f1": 0.7546274323682961, "accuracy": 0.976435587457499}, "de_pud": {"precision": 0.7527638190954774, "recall": 0.7208854667949952, "f1": 0.7364798426745329, "accuracy": 0.9719188036191458}, "pt_pud": {"precision": 0.8152380952380952, "recall": 0.7788898999090081, "f1": 0.796649604467194, "accuracy": 0.9806895373178963}, "ru_pud": {"precision": 0.6679499518768046, "recall": 0.6698841698841699, "f1": 0.6689156626506023, "accuracy": 0.9669852751227073}, "sv_pud": {"precision": 0.8302469135802469, "recall": 0.7842565597667639, "f1": 0.8065967016491754, "accuracy": 0.9807611658628643}, "tl_trg": {"precision": 0.7916666666666666, "recall": 0.8260869565217391, "f1": 0.8085106382978724, "accuracy": 0.989100817438692}, "tl_ugnayan": {"precision": 0.6774193548387096, "recall": 0.6363636363636364, "f1": 0.65625, "accuracy": 0.9726526891522334}, "zh_gsd": {"precision": 0.8073394495412844, "recall": 0.803129074315515, "f1": 0.8052287581699348, "accuracy": 0.9725274725274725}, "zh_gsdsimp": {"precision": 0.8204081632653061, "recall": 0.7903014416775884, "f1": 0.8050734312416555, "accuracy": 0.9723609723609724}, "hr_set": {"precision": 0.8908450704225352, "recall": 0.9016393442622951, "f1": 0.8962097059865392, "accuracy": 0.9874690849134378}, "da_ddt": {"precision": 0.8152709359605911, "recall": 0.7404921700223713, "f1": 0.7760844079718641, "accuracy": 0.9831387808041504}, "en_ewt": {"precision": 0.8059701492537313, "recall": 0.7444852941176471, "f1": 0.7740086000955566, "accuracy": 0.9770888950870622}, "pt_bosque": {"precision": 0.7911985018726592, "recall": 0.6954732510288066, "f1": 0.7402540516863776, "accuracy": 0.97616287494566}, "sr_set": {"precision": 0.9170616113744076, "recall": 0.9138134592680047, "f1": 0.9154346540508574, "accuracy": 0.9883547850450923}, "sk_snk": {"precision": 0.7004716981132075, "recall": 0.6491803278688525, "f1": 0.6738513896766876, "accuracy": 0.956265703517588}, "sv_talbanken": {"precision": 0.8374384236453202, "recall": 0.8673469387755102, "f1": 0.8521303258145363, "accuracy": 0.997202728566521}}
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e387e84fbf9e6492c9581ecfd084713f06ca8a8c7d602b5ad947fe5fed568ea
|
3 |
+
size 939737140
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5304
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e797e61d5ad0cad7bf5016670368d7b64ade6b5ebcf4570b0b1fd9491339e87b
|
3 |
size 5304
|