Initial Commit

Browse files

Files changed (5) hide show

README.md +32 -60
config.json +14 -21
eval_result_ner.json +1 -1
model.safetensors +2 -2
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -1,14 +1,14 @@
 ---
-base_model: microsoft/mdeberta-v3-base
 library_name: transformers
 license: mit
 metrics:
 - precision
 - recall
 - f1
 - accuracy
-tags:
-- generated_from_trainer
 model-index:
 - name: scenario-kd-pre-ner-full_data-univner_full66
   results: []
@@ -19,13 +19,13 @@ should probably proofread and complete it, then remove this comment. -->
 # scenario-kd-pre-ner-full_data-univner_full66
-This model is a fine-tuned version of [microsoft/mdeberta-v3-base](https://huggingface.co/microsoft/mdeberta-v3-base) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.3678
-- Precision: 0.7980
-- Recall: 0.7563
-- F1: 0.7766
-- Accuracy: 0.9766
 ## Model description
@@ -56,57 +56,29 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch   | Step  | Validation Loss | Precision | Recall | F1     | Accuracy |
 |:-------------:|:-------:|:-----:|:---------------:|:---------:|:------:|:------:|:--------:|
-| 1.5996        | 0.5828  | 500   | 0.9922          | 0.3920    | 0.2627 | 0.3146 | 0.9378   |
-| 0.8451        | 1.1655  | 1000  | 0.6919          | 0.5664    | 0.5963 | 0.5810 | 0.9599   |
-| 0.5868        | 1.7483  | 1500  | 0.5945          | 0.6779    | 0.6454 | 0.6612 | 0.9663   |
-| 0.4643        | 2.3310  | 2000  | 0.5350          | 0.7058    | 0.6705 | 0.6877 | 0.9687   |
-| 0.4026        | 2.9138  | 2500  | 0.5062          | 0.7062    | 0.7003 | 0.7033 | 0.9700   |
-| 0.3341        | 3.4965  | 3000  | 0.4982          | 0.6936    | 0.7339 | 0.7132 | 0.9705   |
-| 0.3079        | 4.0793  | 3500  | 0.4864          | 0.7396    | 0.7003 | 0.7194 | 0.9715   |
-| 0.266         | 4.6620  | 4000  | 0.4833          | 0.7660    | 0.6885 | 0.7252 | 0.9722   |
-| 0.2438        | 5.2448  | 4500  | 0.4747          | 0.7459    | 0.7099 | 0.7274 | 0.9724   |
-| 0.2207        | 5.8275  | 5000  | 0.4751          | 0.7579    | 0.6881 | 0.7213 | 0.9716   |
-| 0.2098        | 6.4103  | 5500  | 0.4510          | 0.7454    | 0.7238 | 0.7344 | 0.9728   |
-| 0.1974        | 6.9930  | 6000  | 0.4512          | 0.7473    | 0.7309 | 0.7390 | 0.9729   |
-| 0.1812        | 7.5758  | 6500  | 0.4305          | 0.7440    | 0.7383 | 0.7411 | 0.9736   |
-| 0.1726        | 8.1585  | 7000  | 0.4477          | 0.7544    | 0.7296 | 0.7418 | 0.9731   |
-| 0.1637        | 8.7413  | 7500  | 0.4394          | 0.7449    | 0.7501 | 0.7475 | 0.9743   |
-| 0.1561        | 9.3240  | 8000  | 0.4380          | 0.7669    | 0.7254 | 0.7456 | 0.9739   |
-| 0.1524        | 9.9068  | 8500  | 0.4229          | 0.7694    | 0.7448 | 0.7569 | 0.9751   |
-| 0.1453        | 10.4895 | 9000  | 0.4319          | 0.7654    | 0.7387 | 0.7518 | 0.9746   |
-| 0.1423        | 11.0723 | 9500  | 0.4307          | 0.7766    | 0.7301 | 0.7526 | 0.9743   |
-| 0.1361        | 11.6550 | 10000 | 0.4322          | 0.7788    | 0.7236 | 0.7502 | 0.9741   |
-| 0.1346        | 12.2378 | 10500 | 0.4324          | 0.7841    | 0.7184 | 0.7498 | 0.9740   |
-| 0.128         | 12.8205 | 11000 | 0.4089          | 0.7685    | 0.7599 | 0.7642 | 0.9753   |
-| 0.1271        | 13.4033 | 11500 | 0.4213          | 0.7856    | 0.7143 | 0.7483 | 0.9741   |
-| 0.1247        | 13.9860 | 12000 | 0.4140          | 0.7799    | 0.7332 | 0.7559 | 0.9748   |
-| 0.1213        | 14.5688 | 12500 | 0.4017          | 0.7700    | 0.7549 | 0.7623 | 0.9754   |
-| 0.1172        | 15.1515 | 13000 | 0.4140          | 0.7800    | 0.7399 | 0.7594 | 0.9748   |
-| 0.1178        | 15.7343 | 13500 | 0.3935          | 0.7822    | 0.7490 | 0.7652 | 0.9755   |
-| 0.1154        | 16.3170 | 14000 | 0.4041          | 0.7915    | 0.7244 | 0.7565 | 0.9750   |
-| 0.1137        | 16.8998 | 14500 | 0.3943          | 0.7823    | 0.7498 | 0.7657 | 0.9759   |
-| 0.1115        | 17.4825 | 15000 | 0.3853          | 0.7832    | 0.7537 | 0.7682 | 0.9759   |
-| 0.1089        | 18.0653 | 15500 | 0.3902          | 0.7816    | 0.7539 | 0.7675 | 0.9756   |
-| 0.1068        | 18.6480 | 16000 | 0.3936          | 0.7766    | 0.7605 | 0.7685 | 0.9760   |
-| 0.1074        | 19.2308 | 16500 | 0.3786          | 0.7837    | 0.7660 | 0.7748 | 0.9765   |
-| 0.1036        | 19.8135 | 17000 | 0.3892          | 0.7869    | 0.7331 | 0.7590 | 0.9755   |
-| 0.1058        | 20.3963 | 17500 | 0.3897          | 0.7845    | 0.7513 | 0.7675 | 0.9757   |
-| 0.1026        | 20.9790 | 18000 | 0.3869          | 0.7803    | 0.7553 | 0.7676 | 0.9758   |
-| 0.1021        | 21.5618 | 18500 | 0.3855          | 0.7866    | 0.7478 | 0.7667 | 0.9759   |
-| 0.1007        | 22.1445 | 19000 | 0.3866          | 0.7921    | 0.7266 | 0.7579 | 0.9752   |
-| 0.0999        | 22.7273 | 19500 | 0.3811          | 0.7832    | 0.7552 | 0.7689 | 0.9758   |
-| 0.0994        | 23.3100 | 20000 | 0.3806          | 0.7896    | 0.7485 | 0.7685 | 0.9761   |
-| 0.0985        | 23.8928 | 20500 | 0.3839          | 0.7909    | 0.7511 | 0.7705 | 0.9762   |
-| 0.0972        | 24.4755 | 21000 | 0.3742          | 0.7881    | 0.7513 | 0.7692 | 0.9761   |
-| 0.0974        | 25.0583 | 21500 | 0.3763          | 0.7942    | 0.7400 | 0.7662 | 0.9756   |
-| 0.0957        | 25.6410 | 22000 | 0.3766          | 0.7956    | 0.7534 | 0.7739 | 0.9764   |
-| 0.0961        | 26.2238 | 22500 | 0.3769          | 0.7970    | 0.7439 | 0.7696 | 0.9757   |
-| 0.0958        | 26.8065 | 23000 | 0.3752          | 0.7977    | 0.7449 | 0.7704 | 0.9759   |
-| 0.0955        | 27.3893 | 23500 | 0.3708          | 0.7887    | 0.7576 | 0.7728 | 0.9765   |
-| 0.0942        | 27.9720 | 24000 | 0.3709          | 0.7929    | 0.7503 | 0.7710 | 0.9760   |
-| 0.0941        | 28.5548 | 24500 | 0.3742          | 0.7915    | 0.7526 | 0.7715 | 0.9761   |
-| 0.0946        | 29.1375 | 25000 | 0.3722          | 0.7970    | 0.7553 | 0.7756 | 0.9766   |
-| 0.0939        | 29.7203 | 25500 | 0.3678          | 0.7980    | 0.7563 | 0.7766 | 0.9766   |
 ### Framework versions

 ---
 library_name: transformers
 license: mit
+base_model: FacebookAI/xlm-roberta-base
+tags:
+- generated_from_trainer
 metrics:
 - precision
 - recall
 - f1
 - accuracy
 model-index:
 - name: scenario-kd-pre-ner-full_data-univner_full66
   results: []
 # scenario-kd-pre-ner-full_data-univner_full66
+This model is a fine-tuned version of [FacebookAI/xlm-roberta-base](https://huggingface.co/FacebookAI/xlm-roberta-base) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.5549
+- Precision: 0.7660
+- Recall: 0.7319
+- F1: 0.7485
+- Accuracy: 0.9802
 ## Model description
 | Training Loss | Epoch   | Step  | Validation Loss | Precision | Recall | F1     | Accuracy |
 |:-------------:|:-------:|:-----:|:---------------:|:---------:|:------:|:------:|:--------:|
+| 1.2484        | 1.2755  | 500   | 0.8737          | 0.6792    | 0.5631 | 0.6157 | 0.9709   |
+| 0.6459        | 2.5510  | 1000  | 0.7190          | 0.6926    | 0.6739 | 0.6831 | 0.9771   |
+| 0.5071        | 3.8265  | 1500  | 0.6650          | 0.7076    | 0.6863 | 0.6968 | 0.9773   |
+| 0.4233        | 5.1020  | 2000  | 0.6513          | 0.6933    | 0.7019 | 0.6975 | 0.9775   |
+| 0.3655        | 6.3776  | 2500  | 0.6252          | 0.7421    | 0.6822 | 0.7109 | 0.9778   |
+| 0.3251        | 7.6531  | 3000  | 0.6172          | 0.7412    | 0.7174 | 0.7291 | 0.9791   |
+| 0.2963        | 8.9286  | 3500  | 0.6204          | 0.7143    | 0.6677 | 0.6902 | 0.9773   |
+| 0.2699        | 10.2041 | 4000  | 0.5919          | 0.7310    | 0.7288 | 0.7299 | 0.9792   |
+| 0.2469        | 11.4796 | 4500  | 0.6168          | 0.7560    | 0.6863 | 0.7195 | 0.9788   |
+| 0.2313        | 12.7551 | 5000  | 0.5871          | 0.7353    | 0.7133 | 0.7241 | 0.9792   |
+| 0.2148        | 14.0306 | 5500  | 0.5947          | 0.7358    | 0.7122 | 0.7238 | 0.9794   |
+| 0.2022        | 15.3061 | 6000  | 0.5830          | 0.7298    | 0.7019 | 0.7156 | 0.9790   |
+| 0.1933        | 16.5816 | 6500  | 0.5734          | 0.7427    | 0.7143 | 0.7282 | 0.9794   |
+| 0.185         | 17.8571 | 7000  | 0.5814          | 0.7352    | 0.6957 | 0.7149 | 0.9792   |
+| 0.1767        | 19.1327 | 7500  | 0.5670          | 0.7516    | 0.7236 | 0.7373 | 0.9797   |
+| 0.1688        | 20.4082 | 8000  | 0.5770          | 0.7551    | 0.6957 | 0.7241 | 0.9791   |
+| 0.1634        | 21.6837 | 8500  | 0.5621          | 0.7443    | 0.7143 | 0.7290 | 0.9792   |
+| 0.1592        | 22.9592 | 9000  | 0.5691          | 0.7495    | 0.7091 | 0.7287 | 0.9790   |
+| 0.1538        | 24.2347 | 9500  | 0.5557          | 0.7481    | 0.7195 | 0.7335 | 0.9802   |
+| 0.1513        | 25.5102 | 10000 | 0.5687          | 0.7446    | 0.7091 | 0.7264 | 0.9791   |
+| 0.1489        | 26.7857 | 10500 | 0.5554          | 0.7623    | 0.7236 | 0.7424 | 0.9801   |
+| 0.145         | 28.0612 | 11000 | 0.5488          | 0.7564    | 0.7329 | 0.7445 | 0.9804   |
+| 0.144         | 29.3367 | 11500 | 0.5549          | 0.7660    | 0.7319 | 0.7485 | 0.9802   |
 ### Framework versions

config.json CHANGED Viewed

@@ -1,9 +1,12 @@
 {
-  "_name_or_path": "microsoft/mdeberta-v3-base",
   "architectures": [
-    "DebertaForTokenClassificationKD"
   ],
   "attention_probs_dropout_prob": 0.1,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
@@ -27,27 +30,17 @@
     "LABEL_5": 5,
     "LABEL_6": 6
   },
-  "layer_norm_eps": 1e-07,
-  "max_position_embeddings": 512,
-  "max_relative_positions": -1,
-  "model_type": "deberta-v2",
-  "norm_rel_ebd": "layer_norm",
   "num_attention_heads": 12,
   "num_hidden_layers": 6,
-  "pad_token_id": 0,
-  "pooler_dropout": 0,
-  "pooler_hidden_act": "gelu",
-  "pooler_hidden_size": 768,
-  "pos_att_type": [
-    "p2c",
-    "c2p"
-  ],
-  "position_biased_input": false,
-  "position_buckets": 256,
-  "relative_attention": true,
-  "share_att_key": true,
   "torch_dtype": "float32",
   "transformers_version": "4.44.2",
-  "type_vocab_size": 0,
-  "vocab_size": 251000
 }

 {
+  "_name_or_path": "FacebookAI/xlm-roberta-base",
   "architectures": [
+    "XLMRobertaForTokenClassificationKD"
   ],
   "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
     "LABEL_5": 5,
     "LABEL_6": 6
   },
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "xlm-roberta",
   "num_attention_heads": 12,
   "num_hidden_layers": 6,
+  "output_past": true,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
   "torch_dtype": "float32",
   "transformers_version": "4.44.2",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 250002
 }

eval_result_ner.json CHANGED Viewed

@@ -1 +1 @@

- {"ceb_gja": {"precision": 0.~~6206896551724138~~, "recall": 0.~~7346938775510204~~, "f1": 0.~~6728971962616822~~, "accuracy": 0.~~9698841698841699~~}, "en_pud": {"precision": 0.~~7862903225806451~~, "recall": 0.~~7255813953488373~~, "f1": 0.~~7547169811320755~~, "accuracy": 0.~~9762939176426143~~}, "de_pud": {"precision": 0.~~72579001019368~~, "recall": 0.~~685274302213667~~, "f1": 0.~~704950495049505~~, "accuracy": 0.~~9687309549481975~~}, "pt_pud": {"precision": 0.~~7682317682317682~~, "recall": 0.~~6997270245677889~~, "f1": 0.~~7323809523809524~~, "accuracy": 0.~~9732985858931089~~}, "ru_pud": {"precision": 0.~~6291585127201565~~, "recall": 0.~~6206563706563707~~, "f1": 0.~~6248785228377066~~, "accuracy": 0.~~9628519762335314~~}, "sv_pud": {"precision": 0.~~8158179848320694~~, "recall": 0.~~7317784256559767~~, "f1": 0.~~7715163934426229~~, "accuracy": 0.~~9771440553575172~~}, "tl_trg": {"precision": 0.~~5517241379310345~~, "recall": 0.~~6956521739130435~~, "f1": 0.~~6153846153846154~~, "accuracy": 0.~~9741144414168937~~}, "tl_ugnayan": {"precision": 0.~~575~~, "recall": 0.~~696969696969697~~, "f1": 0.~~6301369863013698~~, "accuracy": 0.~~9699179580674567~~}, "zh_gsd": {"precision": 0.~~79702300405954~~, "recall": 0.~~7679269882659713~~, "f1": 0.~~7822045152722444~~, "accuracy": 0.~~972027972027972~~}, "zh_gsdsimp": {"precision": 0.~~8299039780521262~~, "recall": 0.~~7929226736566186~~, "f1": 0.~~8109919571045576~~, "accuracy": 0.~~9717782217782218~~}, "hr_set": {"precision": 0.~~872624912033779~~, "recall": 0.~~8838203848895224~~, "f1": 0.~~878186968838527~~, "accuracy": 0.~~984830997526793~~}, "da_ddt": {"precision": 0.~~793010752688172~~, "recall": 0.~~6599552572706935~~, "f1": 0.~~7203907203907204~~, "accuracy": 0.~~9788486481093485~~}, "en_ewt": {"precision": 0.~~7899901864573111~~, "recall": 0.~~7398897058823529~~, "f1": 0.~~7641196013289036~~, "accuracy": 0.~~9761326054906961~~}, "pt_bosque": {"precision": 0.~~7790927021696252~~, "recall": 0.~~6502057613168725~~, "f1": 0.~~708838043965904~~, "accuracy": 0.~~9713809592812636~~}, "sr_set": {"precision": 0.~~8906064209274673~~, "recall": 0.~~8842975206611571~~, "f1": 0.~~8874407582938388~~, "accuracy": 0.~~9832764206286665~~}, "sk_snk": {"precision": 0.~~7134146341463414~~, "recall": 0.~~639344262295082~~, "f1": 0.~~6743515850144092~~, "accuracy": 0.~~9583071608040201~~}, "sv_talbanken": {"precision": 0.~~8382352941176471~~, "recall": 0.~~8724489795918368~~, "f1": 0.~~855~~, "accuracy": 0.~~9972518035039505~~}}

+ {"ceb_gja": {"precision": 0.391304347826087, "recall": 0.5510204081632653, "f1": 0.4576271186440678, "accuracy": 0.9467181467181467}, "en_pud": {"precision": 0.7514734774066798, "recall": 0.7116279069767442, "f1": 0.73100812231247, "accuracy": 0.9743577635058557}, "de_pud": {"precision": 0.7108307045215563, "recall": 0.6506256015399422, "f1": 0.6793969849246232, "accuracy": 0.9660119075523885}, "pt_pud": {"precision": 0.7792746113989637, "recall": 0.6842584167424932, "f1": 0.7286821705426357, "accuracy": 0.9727431964796855}, "ru_pud": {"precision": 0.630939226519337, "recall": 0.5511583011583011, "f1": 0.5883565172591447, "accuracy": 0.9569103590803409}, "sv_pud": {"precision": 0.7871878393051032, "recall": 0.7045675413022352, "f1": 0.7435897435897436, "accuracy": 0.9741560075487523}, "tl_trg": {"precision": 0.5833333333333334, "recall": 0.6086956521739131, "f1": 0.5957446808510638, "accuracy": 0.9768392370572208}, "tl_ugnayan": {"precision": 0.5, "recall": 0.5757575757575758, "f1": 0.5352112676056339, "accuracy": 0.9626253418413856}, "zh_gsd": {"precision": 0.47767857142857145, "recall": 0.1395045632333768, "f1": 0.21594349142280525, "accuracy": 0.90001665001665}, "zh_gsdsimp": {"precision": 0.43315508021390375, "recall": 0.10615989515072084, "f1": 0.1705263157894737, "accuracy": 0.8986013986013986}, "hr_set": {"precision": 0.7674223341729639, "recall": 0.6514611546685674, "f1": 0.7047031611410948, "accuracy": 0.9631492168178071}, "da_ddt": {"precision": 0.7638888888888888, "recall": 0.6152125279642058, "f1": 0.6815365551425031, "accuracy": 0.9760550733313379}, "en_ewt": {"precision": 0.7879396984924623, "recall": 0.7205882352941176, "f1": 0.7527604416706672, "accuracy": 0.9752958520938758}, "pt_bosque": {"precision": 0.733402489626556, "recall": 0.5818930041152264, "f1": 0.6489215236346948, "accuracy": 0.9633024199391392}, "sr_set": {"precision": 0.7739251040221914, "recall": 0.6587957497048406, "f1": 0.711734693877551, "accuracy": 0.9558707643814027}, "sk_snk": {"precision": 0.6234817813765182, "recall": 0.5049180327868853, "f1": 0.5579710144927537, "accuracy": 0.9392273869346733}, "sv_talbanken": {"precision": 0.8009950248756219, "recall": 0.8214285714285714, "f1": 0.8110831234256928, "accuracy": 0.9961721548805025}}

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8d48a44e848c805d0e5386c482cd38378cac22e8aac8634f20ffcf93023ba43d
-size 944366708

 version https://git-lfs.github.com/spec/v1
+oid sha256:524137227bc0f44fa866806452b581fbb218f41a5416f7c31180ebda4bc467b7
+size 939737140

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a286fa5c4471b76d12b9555b09649216889aa5b66460a6692ed205046d18cae6
 size 5304

 version https://git-lfs.github.com/spec/v1
+oid sha256:0788c5ff239352ce6a821267905d965046999856d55e0b38e1d269163fb3cca4
 size 5304