Initial Commit
Browse files- README.md +32 -60
- config.json +14 -21
- eval_result_ner.json +1 -1
- model.safetensors +2 -2
- training_args.bin +1 -1
README.md
CHANGED
@@ -1,14 +1,14 @@
|
|
1 |
---
|
2 |
-
base_model: microsoft/mdeberta-v3-base
|
3 |
library_name: transformers
|
4 |
license: mit
|
|
|
|
|
|
|
5 |
metrics:
|
6 |
- precision
|
7 |
- recall
|
8 |
- f1
|
9 |
- accuracy
|
10 |
-
tags:
|
11 |
-
- generated_from_trainer
|
12 |
model-index:
|
13 |
- name: scenario-kd-pre-ner-full_data-univner_full66
|
14 |
results: []
|
@@ -19,13 +19,13 @@ should probably proofread and complete it, then remove this comment. -->
|
|
19 |
|
20 |
# scenario-kd-pre-ner-full_data-univner_full66
|
21 |
|
22 |
-
This model is a fine-tuned version of [
|
23 |
It achieves the following results on the evaluation set:
|
24 |
-
- Loss: 0.
|
25 |
-
- Precision: 0.
|
26 |
-
- Recall: 0.
|
27 |
-
- F1: 0.
|
28 |
-
- Accuracy: 0.
|
29 |
|
30 |
## Model description
|
31 |
|
@@ -56,57 +56,29 @@ The following hyperparameters were used during training:
|
|
56 |
|
57 |
| Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 | Accuracy |
|
58 |
|:-------------:|:-------:|:-----:|:---------------:|:---------:|:------:|:------:|:--------:|
|
59 |
-
| 1.
|
60 |
-
| 0.
|
61 |
-
| 0.
|
62 |
-
| 0.
|
63 |
-
| 0.
|
64 |
-
| 0.
|
65 |
-
| 0.
|
66 |
-
| 0.
|
67 |
-
| 0.
|
68 |
-
| 0.
|
69 |
-
| 0.
|
70 |
-
| 0.
|
71 |
-
| 0.
|
72 |
-
| 0.
|
73 |
-
| 0.
|
74 |
-
| 0.
|
75 |
-
| 0.
|
76 |
-
| 0.
|
77 |
-
| 0.
|
78 |
-
| 0.
|
79 |
-
| 0.
|
80 |
-
| 0.
|
81 |
-
| 0.
|
82 |
-
| 0.1247 | 13.9860 | 12000 | 0.4140 | 0.7799 | 0.7332 | 0.7559 | 0.9748 |
|
83 |
-
| 0.1213 | 14.5688 | 12500 | 0.4017 | 0.7700 | 0.7549 | 0.7623 | 0.9754 |
|
84 |
-
| 0.1172 | 15.1515 | 13000 | 0.4140 | 0.7800 | 0.7399 | 0.7594 | 0.9748 |
|
85 |
-
| 0.1178 | 15.7343 | 13500 | 0.3935 | 0.7822 | 0.7490 | 0.7652 | 0.9755 |
|
86 |
-
| 0.1154 | 16.3170 | 14000 | 0.4041 | 0.7915 | 0.7244 | 0.7565 | 0.9750 |
|
87 |
-
| 0.1137 | 16.8998 | 14500 | 0.3943 | 0.7823 | 0.7498 | 0.7657 | 0.9759 |
|
88 |
-
| 0.1115 | 17.4825 | 15000 | 0.3853 | 0.7832 | 0.7537 | 0.7682 | 0.9759 |
|
89 |
-
| 0.1089 | 18.0653 | 15500 | 0.3902 | 0.7816 | 0.7539 | 0.7675 | 0.9756 |
|
90 |
-
| 0.1068 | 18.6480 | 16000 | 0.3936 | 0.7766 | 0.7605 | 0.7685 | 0.9760 |
|
91 |
-
| 0.1074 | 19.2308 | 16500 | 0.3786 | 0.7837 | 0.7660 | 0.7748 | 0.9765 |
|
92 |
-
| 0.1036 | 19.8135 | 17000 | 0.3892 | 0.7869 | 0.7331 | 0.7590 | 0.9755 |
|
93 |
-
| 0.1058 | 20.3963 | 17500 | 0.3897 | 0.7845 | 0.7513 | 0.7675 | 0.9757 |
|
94 |
-
| 0.1026 | 20.9790 | 18000 | 0.3869 | 0.7803 | 0.7553 | 0.7676 | 0.9758 |
|
95 |
-
| 0.1021 | 21.5618 | 18500 | 0.3855 | 0.7866 | 0.7478 | 0.7667 | 0.9759 |
|
96 |
-
| 0.1007 | 22.1445 | 19000 | 0.3866 | 0.7921 | 0.7266 | 0.7579 | 0.9752 |
|
97 |
-
| 0.0999 | 22.7273 | 19500 | 0.3811 | 0.7832 | 0.7552 | 0.7689 | 0.9758 |
|
98 |
-
| 0.0994 | 23.3100 | 20000 | 0.3806 | 0.7896 | 0.7485 | 0.7685 | 0.9761 |
|
99 |
-
| 0.0985 | 23.8928 | 20500 | 0.3839 | 0.7909 | 0.7511 | 0.7705 | 0.9762 |
|
100 |
-
| 0.0972 | 24.4755 | 21000 | 0.3742 | 0.7881 | 0.7513 | 0.7692 | 0.9761 |
|
101 |
-
| 0.0974 | 25.0583 | 21500 | 0.3763 | 0.7942 | 0.7400 | 0.7662 | 0.9756 |
|
102 |
-
| 0.0957 | 25.6410 | 22000 | 0.3766 | 0.7956 | 0.7534 | 0.7739 | 0.9764 |
|
103 |
-
| 0.0961 | 26.2238 | 22500 | 0.3769 | 0.7970 | 0.7439 | 0.7696 | 0.9757 |
|
104 |
-
| 0.0958 | 26.8065 | 23000 | 0.3752 | 0.7977 | 0.7449 | 0.7704 | 0.9759 |
|
105 |
-
| 0.0955 | 27.3893 | 23500 | 0.3708 | 0.7887 | 0.7576 | 0.7728 | 0.9765 |
|
106 |
-
| 0.0942 | 27.9720 | 24000 | 0.3709 | 0.7929 | 0.7503 | 0.7710 | 0.9760 |
|
107 |
-
| 0.0941 | 28.5548 | 24500 | 0.3742 | 0.7915 | 0.7526 | 0.7715 | 0.9761 |
|
108 |
-
| 0.0946 | 29.1375 | 25000 | 0.3722 | 0.7970 | 0.7553 | 0.7756 | 0.9766 |
|
109 |
-
| 0.0939 | 29.7203 | 25500 | 0.3678 | 0.7980 | 0.7563 | 0.7766 | 0.9766 |
|
110 |
|
111 |
|
112 |
### Framework versions
|
|
|
1 |
---
|
|
|
2 |
library_name: transformers
|
3 |
license: mit
|
4 |
+
base_model: FacebookAI/xlm-roberta-base
|
5 |
+
tags:
|
6 |
+
- generated_from_trainer
|
7 |
metrics:
|
8 |
- precision
|
9 |
- recall
|
10 |
- f1
|
11 |
- accuracy
|
|
|
|
|
12 |
model-index:
|
13 |
- name: scenario-kd-pre-ner-full_data-univner_full66
|
14 |
results: []
|
|
|
19 |
|
20 |
# scenario-kd-pre-ner-full_data-univner_full66
|
21 |
|
22 |
+
This model is a fine-tuned version of [FacebookAI/xlm-roberta-base](https://huggingface.co/FacebookAI/xlm-roberta-base) on the None dataset.
|
23 |
It achieves the following results on the evaluation set:
|
24 |
+
- Loss: 0.5549
|
25 |
+
- Precision: 0.7660
|
26 |
+
- Recall: 0.7319
|
27 |
+
- F1: 0.7485
|
28 |
+
- Accuracy: 0.9802
|
29 |
|
30 |
## Model description
|
31 |
|
|
|
56 |
|
57 |
| Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 | Accuracy |
|
58 |
|:-------------:|:-------:|:-----:|:---------------:|:---------:|:------:|:------:|:--------:|
|
59 |
+
| 1.2484 | 1.2755 | 500 | 0.8737 | 0.6792 | 0.5631 | 0.6157 | 0.9709 |
|
60 |
+
| 0.6459 | 2.5510 | 1000 | 0.7190 | 0.6926 | 0.6739 | 0.6831 | 0.9771 |
|
61 |
+
| 0.5071 | 3.8265 | 1500 | 0.6650 | 0.7076 | 0.6863 | 0.6968 | 0.9773 |
|
62 |
+
| 0.4233 | 5.1020 | 2000 | 0.6513 | 0.6933 | 0.7019 | 0.6975 | 0.9775 |
|
63 |
+
| 0.3655 | 6.3776 | 2500 | 0.6252 | 0.7421 | 0.6822 | 0.7109 | 0.9778 |
|
64 |
+
| 0.3251 | 7.6531 | 3000 | 0.6172 | 0.7412 | 0.7174 | 0.7291 | 0.9791 |
|
65 |
+
| 0.2963 | 8.9286 | 3500 | 0.6204 | 0.7143 | 0.6677 | 0.6902 | 0.9773 |
|
66 |
+
| 0.2699 | 10.2041 | 4000 | 0.5919 | 0.7310 | 0.7288 | 0.7299 | 0.9792 |
|
67 |
+
| 0.2469 | 11.4796 | 4500 | 0.6168 | 0.7560 | 0.6863 | 0.7195 | 0.9788 |
|
68 |
+
| 0.2313 | 12.7551 | 5000 | 0.5871 | 0.7353 | 0.7133 | 0.7241 | 0.9792 |
|
69 |
+
| 0.2148 | 14.0306 | 5500 | 0.5947 | 0.7358 | 0.7122 | 0.7238 | 0.9794 |
|
70 |
+
| 0.2022 | 15.3061 | 6000 | 0.5830 | 0.7298 | 0.7019 | 0.7156 | 0.9790 |
|
71 |
+
| 0.1933 | 16.5816 | 6500 | 0.5734 | 0.7427 | 0.7143 | 0.7282 | 0.9794 |
|
72 |
+
| 0.185 | 17.8571 | 7000 | 0.5814 | 0.7352 | 0.6957 | 0.7149 | 0.9792 |
|
73 |
+
| 0.1767 | 19.1327 | 7500 | 0.5670 | 0.7516 | 0.7236 | 0.7373 | 0.9797 |
|
74 |
+
| 0.1688 | 20.4082 | 8000 | 0.5770 | 0.7551 | 0.6957 | 0.7241 | 0.9791 |
|
75 |
+
| 0.1634 | 21.6837 | 8500 | 0.5621 | 0.7443 | 0.7143 | 0.7290 | 0.9792 |
|
76 |
+
| 0.1592 | 22.9592 | 9000 | 0.5691 | 0.7495 | 0.7091 | 0.7287 | 0.9790 |
|
77 |
+
| 0.1538 | 24.2347 | 9500 | 0.5557 | 0.7481 | 0.7195 | 0.7335 | 0.9802 |
|
78 |
+
| 0.1513 | 25.5102 | 10000 | 0.5687 | 0.7446 | 0.7091 | 0.7264 | 0.9791 |
|
79 |
+
| 0.1489 | 26.7857 | 10500 | 0.5554 | 0.7623 | 0.7236 | 0.7424 | 0.9801 |
|
80 |
+
| 0.145 | 28.0612 | 11000 | 0.5488 | 0.7564 | 0.7329 | 0.7445 | 0.9804 |
|
81 |
+
| 0.144 | 29.3367 | 11500 | 0.5549 | 0.7660 | 0.7319 | 0.7485 | 0.9802 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
|
84 |
### Framework versions
|
config.json
CHANGED
@@ -1,9 +1,12 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
-
"
|
5 |
],
|
6 |
"attention_probs_dropout_prob": 0.1,
|
|
|
|
|
|
|
7 |
"hidden_act": "gelu",
|
8 |
"hidden_dropout_prob": 0.1,
|
9 |
"hidden_size": 768,
|
@@ -27,27 +30,17 @@
|
|
27 |
"LABEL_5": 5,
|
28 |
"LABEL_6": 6
|
29 |
},
|
30 |
-
"layer_norm_eps": 1e-
|
31 |
-
"max_position_embeddings":
|
32 |
-
"
|
33 |
-
"model_type": "deberta-v2",
|
34 |
-
"norm_rel_ebd": "layer_norm",
|
35 |
"num_attention_heads": 12,
|
36 |
"num_hidden_layers": 6,
|
37 |
-
"
|
38 |
-
"
|
39 |
-
"
|
40 |
-
"pooler_hidden_size": 768,
|
41 |
-
"pos_att_type": [
|
42 |
-
"p2c",
|
43 |
-
"c2p"
|
44 |
-
],
|
45 |
-
"position_biased_input": false,
|
46 |
-
"position_buckets": 256,
|
47 |
-
"relative_attention": true,
|
48 |
-
"share_att_key": true,
|
49 |
"torch_dtype": "float32",
|
50 |
"transformers_version": "4.44.2",
|
51 |
-
"type_vocab_size":
|
52 |
-
"
|
|
|
53 |
}
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "FacebookAI/xlm-roberta-base",
|
3 |
"architectures": [
|
4 |
+
"XLMRobertaForTokenClassificationKD"
|
5 |
],
|
6 |
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"classifier_dropout": null,
|
9 |
+
"eos_token_id": 2,
|
10 |
"hidden_act": "gelu",
|
11 |
"hidden_dropout_prob": 0.1,
|
12 |
"hidden_size": 768,
|
|
|
30 |
"LABEL_5": 5,
|
31 |
"LABEL_6": 6
|
32 |
},
|
33 |
+
"layer_norm_eps": 1e-05,
|
34 |
+
"max_position_embeddings": 514,
|
35 |
+
"model_type": "xlm-roberta",
|
|
|
|
|
36 |
"num_attention_heads": 12,
|
37 |
"num_hidden_layers": 6,
|
38 |
+
"output_past": true,
|
39 |
+
"pad_token_id": 1,
|
40 |
+
"position_embedding_type": "absolute",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
"torch_dtype": "float32",
|
42 |
"transformers_version": "4.44.2",
|
43 |
+
"type_vocab_size": 1,
|
44 |
+
"use_cache": true,
|
45 |
+
"vocab_size": 250002
|
46 |
}
|
eval_result_ner.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"ceb_gja": {"precision": 0.
|
|
|
1 |
+
{"ceb_gja": {"precision": 0.391304347826087, "recall": 0.5510204081632653, "f1": 0.4576271186440678, "accuracy": 0.9467181467181467}, "en_pud": {"precision": 0.7514734774066798, "recall": 0.7116279069767442, "f1": 0.73100812231247, "accuracy": 0.9743577635058557}, "de_pud": {"precision": 0.7108307045215563, "recall": 0.6506256015399422, "f1": 0.6793969849246232, "accuracy": 0.9660119075523885}, "pt_pud": {"precision": 0.7792746113989637, "recall": 0.6842584167424932, "f1": 0.7286821705426357, "accuracy": 0.9727431964796855}, "ru_pud": {"precision": 0.630939226519337, "recall": 0.5511583011583011, "f1": 0.5883565172591447, "accuracy": 0.9569103590803409}, "sv_pud": {"precision": 0.7871878393051032, "recall": 0.7045675413022352, "f1": 0.7435897435897436, "accuracy": 0.9741560075487523}, "tl_trg": {"precision": 0.5833333333333334, "recall": 0.6086956521739131, "f1": 0.5957446808510638, "accuracy": 0.9768392370572208}, "tl_ugnayan": {"precision": 0.5, "recall": 0.5757575757575758, "f1": 0.5352112676056339, "accuracy": 0.9626253418413856}, "zh_gsd": {"precision": 0.47767857142857145, "recall": 0.1395045632333768, "f1": 0.21594349142280525, "accuracy": 0.90001665001665}, "zh_gsdsimp": {"precision": 0.43315508021390375, "recall": 0.10615989515072084, "f1": 0.1705263157894737, "accuracy": 0.8986013986013986}, "hr_set": {"precision": 0.7674223341729639, "recall": 0.6514611546685674, "f1": 0.7047031611410948, "accuracy": 0.9631492168178071}, "da_ddt": {"precision": 0.7638888888888888, "recall": 0.6152125279642058, "f1": 0.6815365551425031, "accuracy": 0.9760550733313379}, "en_ewt": {"precision": 0.7879396984924623, "recall": 0.7205882352941176, "f1": 0.7527604416706672, "accuracy": 0.9752958520938758}, "pt_bosque": {"precision": 0.733402489626556, "recall": 0.5818930041152264, "f1": 0.6489215236346948, "accuracy": 0.9633024199391392}, "sr_set": {"precision": 0.7739251040221914, "recall": 0.6587957497048406, "f1": 0.711734693877551, "accuracy": 0.9558707643814027}, "sk_snk": {"precision": 0.6234817813765182, "recall": 0.5049180327868853, "f1": 0.5579710144927537, "accuracy": 0.9392273869346733}, "sv_talbanken": {"precision": 0.8009950248756219, "recall": 0.8214285714285714, "f1": 0.8110831234256928, "accuracy": 0.9961721548805025}}
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:524137227bc0f44fa866806452b581fbb218f41a5416f7c31180ebda4bc467b7
|
3 |
+
size 939737140
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5304
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0788c5ff239352ce6a821267905d965046999856d55e0b38e1d269163fb3cca4
|
3 |
size 5304
|