Initial Commit

Browse files

Files changed (5) hide show

README.md +27 -30
config.json +14 -21
eval_result_ner.json +1 -1
pytorch_model.bin +2 -2
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 license: mit
-base_model: microsoft/mdeberta-v3-base
 tags:
 - generated_from_trainer
 metrics:
@@ -18,13 +18,13 @@ should probably proofread and complete it, then remove this comment. -->
 # scenario-TCR-NER_data-univner_half
-This model is a fine-tuned version of [microsoft/mdeberta-v3-base](https://huggingface.co/microsoft/mdeberta-v3-base) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.1170
-- Precision: 0.8494
-- Recall: 0.8655
-- F1: 0.8574
-- Accuracy: 0.9842
 ## Model description
@@ -53,29 +53,26 @@ The following hyperparameters were used during training:
 ### Training results
-| Training Loss | Epoch | Step  | Validation Loss | Precision | Recall | F1     | Accuracy |
-|:-------------:|:-----:|:-----:|:---------------:|:---------:|:------:|:------:|:--------:|
-| 0.1168        | 0.58  | 500   | 0.0625          | 0.8182    | 0.8512 | 0.8344 | 0.9825   |
-| 0.0433        | 1.17  | 1000  | 0.0594          | 0.8396    | 0.8632 | 0.8512 | 0.9843   |
-| 0.0305        | 1.75  | 1500  | 0.0677          | 0.8296    | 0.8703 | 0.8495 | 0.9836   |
-| 0.0213        | 2.33  | 2000  | 0.0761          | 0.8253    | 0.8833 | 0.8533 | 0.9839   |
-| 0.0185        | 2.91  | 2500  | 0.0738          | 0.8600    | 0.8612 | 0.8606 | 0.9850   |
-| 0.012         | 3.5   | 3000  | 0.0784          | 0.8374    | 0.8572 | 0.8471 | 0.9835   |
-| 0.0124        | 4.08  | 3500  | 0.0832          | 0.8363    | 0.8704 | 0.8530 | 0.9843   |
-| 0.0095        | 4.66  | 4000  | 0.0806          | 0.8423    | 0.8713 | 0.8565 | 0.9845   |
-| 0.008         | 5.24  | 4500  | 0.1049          | 0.8218    | 0.8625 | 0.8417 | 0.9823   |
-| 0.0071        | 5.83  | 5000  | 0.0879          | 0.8420    | 0.8632 | 0.8525 | 0.9842   |
-| 0.0068        | 6.41  | 5500  | 0.0918          | 0.8507    | 0.8733 | 0.8619 | 0.9846   |
-| 0.0058        | 6.99  | 6000  | 0.0951          | 0.8488    | 0.8667 | 0.8577 | 0.9845   |
-| 0.0047        | 7.58  | 6500  | 0.0991          | 0.8467    | 0.8651 | 0.8558 | 0.9842   |
-| 0.0047        | 8.16  | 7000  | 0.1025          | 0.8603    | 0.8573 | 0.8588 | 0.9845   |
-| 0.0043        | 8.74  | 7500  | 0.1020          | 0.8473    | 0.8678 | 0.8574 | 0.9845   |
-| 0.0031        | 9.32  | 8000  | 0.1085          | 0.8437    | 0.8582 | 0.8509 | 0.9842   |
-| 0.0038        | 9.91  | 8500  | 0.1082          | 0.8602    | 0.8440 | 0.8520 | 0.9839   |
-| 0.0024        | 10.49 | 9000  | 0.1163          | 0.8533    | 0.8544 | 0.8539 | 0.9838   |
-| 0.0038        | 11.07 | 9500  | 0.1139          | 0.8528    | 0.8567 | 0.8548 | 0.9843   |
-| 0.0024        | 11.66 | 10000 | 0.1130          | 0.8619    | 0.8476 | 0.8547 | 0.9841   |
-| 0.0024        | 12.24 | 10500 | 0.1170          | 0.8494    | 0.8655 | 0.8574 | 0.9842   |
 ### Framework versions

 ---
 license: mit
+base_model: xlm-roberta-base
 tags:
 - generated_from_trainer
 metrics:
 # scenario-TCR-NER_data-univner_half
+This model is a fine-tuned version of [xlm-roberta-base](https://huggingface.co/xlm-roberta-base) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.1160
+- Precision: 0.8555
+- Recall: 0.8189
+- F1: 0.8368
+- Accuracy: 0.9828
 ## Model description
 ### Training results
+| Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1     | Accuracy |
+|:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
+| 0.1189        | 0.58  | 500  | 0.0623          | 0.8010    | 0.8531 | 0.8262 | 0.9822   |
+| 0.0469        | 1.17  | 1000 | 0.0640          | 0.8246    | 0.8567 | 0.8404 | 0.9833   |
+| 0.0348        | 1.75  | 1500 | 0.0668          | 0.8335    | 0.8550 | 0.8441 | 0.9834   |
+| 0.0242        | 2.33  | 2000 | 0.0734          | 0.8202    | 0.8538 | 0.8367 | 0.9826   |
+| 0.0215        | 2.91  | 2500 | 0.0717          | 0.8455    | 0.8598 | 0.8526 | 0.9843   |
+| 0.0142        | 3.5   | 3000 | 0.0802          | 0.8383    | 0.8424 | 0.8404 | 0.9836   |
+| 0.0144        | 4.08  | 3500 | 0.0836          | 0.8443    | 0.8554 | 0.8499 | 0.9843   |
+| 0.0103        | 4.66  | 4000 | 0.0811          | 0.8479    | 0.8590 | 0.8534 | 0.9844   |
+| 0.0087        | 5.24  | 4500 | 0.0887          | 0.8364    | 0.8628 | 0.8494 | 0.9840   |
+| 0.0092        | 5.83  | 5000 | 0.0876          | 0.8367    | 0.8430 | 0.8399 | 0.9833   |
+| 0.0076        | 6.41  | 5500 | 0.1004          | 0.8440    | 0.8495 | 0.8468 | 0.9841   |
+| 0.007         | 6.99  | 6000 | 0.1080          | 0.8215    | 0.8518 | 0.8364 | 0.9830   |
+| 0.0055        | 7.58  | 6500 | 0.0988          | 0.8454    | 0.8358 | 0.8406 | 0.9831   |
+| 0.0055        | 8.16  | 7000 | 0.0950          | 0.8485    | 0.8461 | 0.8473 | 0.9839   |
+| 0.0044        | 8.74  | 7500 | 0.1001          | 0.8456    | 0.8414 | 0.8435 | 0.9836   |
+| 0.004         | 9.32  | 8000 | 0.1084          | 0.8340    | 0.8495 | 0.8417 | 0.9834   |
+| 0.004         | 9.91  | 8500 | 0.1175          | 0.8351    | 0.8505 | 0.8427 | 0.9829   |
+| 0.0033        | 10.49 | 9000 | 0.1160          | 0.8555    | 0.8189 | 0.8368 | 0.9828   |
 ### Framework versions

config.json CHANGED Viewed

@@ -1,9 +1,12 @@
 {
-  "_name_or_path": "microsoft/mdeberta-v3-base",
   "architectures": [
-    "DebertaV2ForTokenClassification"
   ],
   "attention_probs_dropout_prob": 0.1,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
@@ -27,27 +30,17 @@
     "LABEL_5": 5,
     "LABEL_6": 6
   },
-  "layer_norm_eps": 1e-07,
-  "max_position_embeddings": 512,
-  "max_relative_positions": -1,
-  "model_type": "deberta-v2",
-  "norm_rel_ebd": "layer_norm",
   "num_attention_heads": 12,
   "num_hidden_layers": 12,
-  "pad_token_id": 0,
-  "pooler_dropout": 0,
-  "pooler_hidden_act": "gelu",
-  "pooler_hidden_size": 768,
-  "pos_att_type": [
-    "p2c",
-    "c2p"
-  ],
-  "position_biased_input": false,
-  "position_buckets": 256,
-  "relative_attention": true,
-  "share_att_key": true,
   "torch_dtype": "float32",
   "transformers_version": "4.33.3",
-  "type_vocab_size": 0,
-  "vocab_size": 251000
 }

 {
+  "_name_or_path": "xlm-roberta-base",
   "architectures": [
+    "XLMRobertaForTokenClassification"
   ],
   "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
     "LABEL_5": 5,
     "LABEL_6": 6
   },
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "xlm-roberta",
   "num_attention_heads": 12,
   "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
   "torch_dtype": "float32",
   "transformers_version": "4.33.3",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 250002
 }

eval_result_ner.json CHANGED Viewed

@@ -1 +1 @@

- {"zh_gsd": {"precision": 0.~~8458646616541353~~, "recall": 0.~~8800521512385919~~, "f1": 0.~~8626198083067093~~, "accuracy": 0.~~9806859806859807~~}, "zh_gsdsimp": {"precision": 0.~~8640406607369758~~, "recall": 0.~~891218872870249~~, "f1": 0.~~8774193548387097~~, "accuracy": 0.~~9823509823509824~~}, "hr_set": {"precision": 0.~~9101508916323731~~, "recall": 0.~~9458303635067712~~, "f1": 0.~~9276476756378887~~, "accuracy": 0.~~990560593569662~~}, "da_ddt": {"precision": 0.~~8681818181818182~~, "recall": 0.~~854586129753915~~, "f1": 0.~~8613303269447576~~, "accuracy": 0.~~9895240945824604~~}, "en_ewt": {"precision": 0.~~836555360281195~~, "recall": 0.~~875~~, "f1": 0.~~8553459119496856~~, "accuracy": 0.~~9850579750567797~~}, "pt_bosque": {"precision": 0.~~8723404255319149~~, "recall": 0.~~8436213991769548~~, "f1": 0.~~8577405857740587~~, "accuracy": 0.~~9858716128097377~~}, "sr_set": {"precision": 0.~~9494117647058824~~, "recall": 0.~~9527744982290437~~, "f1": 0.~~9510901591043017~~, "accuracy": 0.~~990631293231766~~}, "sk_snk": {"precision": 0.~~8242229367631297~~, "recall": 0.~~8404371584699454~~, "f1": 0.~~8322510822510822~~, "accuracy": 0.~~9757380653266332~~}, "sv_talbanken": {"precision": 0.~~7763713080168776~~, "recall": 0.~~9387755102040817~~, "f1": 0.~~8498845265588915~~, "accuracy": 0.~~9969082789419443~~}}

+ {"zh_gsd": {"precision": 0.8505154639175257, "recall": 0.8604954367666232, "f1": 0.8554763447828905, "accuracy": 0.9792707292707292}, "zh_gsdsimp": {"precision": 0.8438709677419355, "recall": 0.8571428571428571, "f1": 0.8504551365409622, "accuracy": 0.9788544788544788}, "hr_set": {"precision": 0.9336158192090396, "recall": 0.9422665716322167, "f1": 0.9379212486697411, "accuracy": 0.9921681780708986}, "da_ddt": {"precision": 0.8604651162790697, "recall": 0.8277404921700223, "f1": 0.8437856328392246, "accuracy": 0.9873291429711664}, "en_ewt": {"precision": 0.7928321678321678, "recall": 0.8336397058823529, "f1": 0.8127240143369175, "accuracy": 0.9813125074710125}, "pt_bosque": {"precision": 0.8833189282627485, "recall": 0.8411522633744856, "f1": 0.8617200674536256, "accuracy": 0.9859802927112012}, "sr_set": {"precision": 0.9481132075471698, "recall": 0.9492325855962219, "f1": 0.9486725663716813, "accuracy": 0.9908064092461255}, "sk_snk": {"precision": 0.7928802588996764, "recall": 0.8032786885245902, "f1": 0.7980456026058632, "accuracy": 0.9713410804020101}, "sv_talbanken": {"precision": 0.8457943925233645, "recall": 0.923469387755102, "f1": 0.8829268292682927, "accuracy": 0.9976444030033862}}

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b17284dbdc47a46c624b01a42698c89dde812636572a5924ce8bda92d62475f5
-size 1112965930

 version https://git-lfs.github.com/spec/v1
+oid sha256:d31bc77304f2506a4e74bd52e946c4d1e15f552689161cb025655e60157a12ac
+size 1109902502

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2af879047a05ce26c181c49037265b598df6baec29f1ae74753378b1c252b119
 size 4536

 version https://git-lfs.github.com/spec/v1
+oid sha256:c2aaaf29ee04e5c9b3b9d85d1d55b1168a49f834a24e985655e81602d96dae98
 size 4536