Training in progress, step 500

Browse files

Files changed (9) hide show

README.md +29 -49
added_tokens.json +2 -1
config.json +1 -1
model.safetensors +2 -2
runs/Feb17_13-03-54_6fd38e49e7fc/events.out.tfevents.1708175035.6fd38e49e7fc.23634.0 +2 -2
runs/Feb19_09-07-25_3e139e734b2e/events.out.tfevents.1708333646.3e139e734b2e.332.0 +3 -0
tokenizer.json +18 -0
tokenizer_config.json +16 -0
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -17,12 +17,12 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [t5-small](https://huggingface.co/t5-small) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.1568
-- Rouge1: 99.7679
-- Rouge2: 99.7155
-- Rougel: 99.7692
-- Rougelsum: 99.7688
-- Gen Len: 93.9417
 ## Model description
@@ -47,53 +47,33 @@ The following hyperparameters were used during training:
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- num_epochs: 40
 - mixed_precision_training: Native AMP
 ### Training results
-| Training Loss | Epoch | Step | Validation Loss | Rouge1  | Rouge2  | Rougel  | Rougelsum | Gen Len  |
-|:-------------:|:-----:|:----:|:---------------:|:-------:|:-------:|:-------:|:---------:|:--------:|
-| No log        | 1.0   | 90   | 3.1730          | 5.1377  | 0.0144  | 4.962   | 4.9589    | 106.5861 |
-| No log        | 2.0   | 180  | 1.6873          | 9.7339  | 2.8984  | 8.7242  | 8.7461    | 152.4556 |
-| No log        | 3.0   | 270  | 0.7710          | 24.7647 | 16.6514 | 23.4727 | 23.4525   | 238.6028 |
-| No log        | 4.0   | 360  | 0.5650          | 29.9311 | 21.6079 | 28.0307 | 28.0546   | 268.2028 |
-| No log        | 5.0   | 450  | 0.4630          | 30.8934 | 22.2884 | 29.0524 | 29.0047   | 249.0417 |
-| 2.0823        | 6.0   | 540  | 0.3479          | 27.7072 | 21.4162 | 26.321  | 26.3764   | 181.4917 |
-| 2.0823        | 7.0   | 630  | 0.2689          | 83.7623 | 80.2612 | 82.3042 | 82.4403   | 75.4028  |
-| 2.0823        | 8.0   | 720  | 0.2579          | 89.9406 | 87.4565 | 88.8496 | 88.893    | 77.4222  |
-| 2.0823        | 9.0   | 810  | 0.2505          | 90.8365 | 89.2732 | 90.2566 | 90.2941   | 78.7028  |
-| 2.0823        | 10.0  | 900  | 0.2439          | 91.8288 | 90.4645 | 91.2903 | 91.3323   | 80.575   |
-| 2.0823        | 11.0  | 990  | 0.2387          | 93.3381 | 92.3606 | 93.0735 | 93.0822   | 83.6472  |
-| 0.38          | 12.0  | 1080 | 0.2356          | 93.0035 | 92.1176 | 92.7918 | 92.7935   | 82.4472  |
-| 0.38          | 13.0  | 1170 | 0.2296          | 95.1977 | 94.528  | 95.0334 | 95.0361   | 88.2306  |
-| 0.38          | 14.0  | 1260 | 0.2257          | 95.163  | 94.6295 | 95.0621 | 95.056    | 87.2389  |
-| 0.38          | 15.0  | 1350 | 0.2211          | 96.8032 | 96.4292 | 96.7764 | 96.7479   | 91.2472  |
-| 0.38          | 16.0  | 1440 | 0.2171          | 97.2667 | 96.9591 | 97.212  | 97.1999   | 91.3861  |
-| 0.2699        | 17.0  | 1530 | 0.2123          | 98.324  | 98.0633 | 98.2562 | 98.2453   | 92.125   |
-| 0.2699        | 18.0  | 1620 | 0.2071          | 98.545  | 98.3036 | 98.4795 | 98.4764   | 92.5639  |
-| 0.2699        | 19.0  | 1710 | 0.2023          | 98.6151 | 98.39   | 98.5467 | 98.5375   | 91.7111  |
-| 0.2699        | 20.0  | 1800 | 0.1977          | 99.0931 | 98.8939 | 99.0408 | 99.0116   | 93.1306  |
-| 0.2699        | 21.0  | 1890 | 0.1942          | 99.3849 | 99.2668 | 99.399  | 99.388    | 94.4222  |
-| 0.2699        | 22.0  | 1980 | 0.1900          | 99.1136 | 99.0255 | 99.1074 | 99.0893   | 92.5528  |
-| 0.236         | 23.0  | 2070 | 0.1861          | 99.2462 | 99.1628 | 99.2568 | 99.2378   | 92.9806  |
-| 0.236         | 24.0  | 2160 | 0.1828          | 99.3348 | 99.2435 | 99.3313 | 99.3233   | 93.0083  |
-| 0.236         | 25.0  | 2250 | 0.1792          | 99.6636 | 99.5859 | 99.6665 | 99.6692   | 94.2806  |
-| 0.236         | 26.0  | 2340 | 0.1762          | 99.7247 | 99.6578 | 99.7251 | 99.7271   | 94.1417  |
-| 0.236         | 27.0  | 2430 | 0.1738          | 99.4961 | 99.4165 | 99.5065 | 99.5036   | 93.5083  |
-| 0.2128        | 28.0  | 2520 | 0.1710          | 99.7151 | 99.6645 | 99.7248 | 99.7198   | 93.7944  |
-| 0.2128        | 29.0  | 2610 | 0.1687          | 99.7679 | 99.7155 | 99.7692 | 99.7688   | 93.9444  |
-| 0.2128        | 30.0  | 2700 | 0.1668          | 99.7679 | 99.7155 | 99.7692 | 99.7688   | 93.9444  |
-| 0.2128        | 31.0  | 2790 | 0.1647          | 99.7602 | 99.6983 | 99.7552 | 99.7602   | 93.9417  |
-| 0.2128        | 32.0  | 2880 | 0.1629          | 99.7602 | 99.6983 | 99.7552 | 99.7602   | 93.9444  |
-| 0.2128        | 33.0  | 2970 | 0.1614          | 99.7679 | 99.7155 | 99.7692 | 99.7688   | 93.9444  |
-| 0.1981        | 34.0  | 3060 | 0.1600          | 99.7679 | 99.7155 | 99.7692 | 99.7688   | 93.9444  |
-| 0.1981        | 35.0  | 3150 | 0.1591          | 99.7679 | 99.7155 | 99.7692 | 99.7688   | 93.9444  |
-| 0.1981        | 36.0  | 3240 | 0.1583          | 99.7679 | 99.7155 | 99.7692 | 99.7688   | 93.9361  |
-| 0.1981        | 37.0  | 3330 | 0.1577          | 99.7679 | 99.7155 | 99.7692 | 99.7688   | 93.9417  |
-| 0.1981        | 38.0  | 3420 | 0.1572          | 99.7679 | 99.7155 | 99.7692 | 99.7688   | 93.9417  |
-| 0.1905        | 39.0  | 3510 | 0.1569          | 99.8039 | 99.7623 | 99.8048 | 99.8038   | 94.0083  |
-| 0.1905        | 40.0  | 3600 | 0.1568          | 99.7679 | 99.7155 | 99.7692 | 99.7688   | 93.9417  |
 ### Framework versions

 This model is a fine-tuned version of [t5-small](https://huggingface.co/t5-small) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.2397
+- Rouge1: 39.9145
+- Rouge2: 33.183
+- Rougel: 39.9484
+- Rougelsum: 39.9376
+- Gen Len: 19.0
 ## Model description
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
+- num_epochs: 20
 - mixed_precision_training: Native AMP
 ### Training results
+| Training Loss | Epoch | Step | Validation Loss | Rouge1  | Rouge2  | Rougel  | Rougelsum | Gen Len |
+|:-------------:|:-----:|:----:|:---------------:|:-------:|:-------:|:-------:|:---------:|:-------:|
+| No log        | 1.0   | 90   | 3.4654          | 9.8007  | 0.2231  | 9.4993  | 9.501     | 18.6889 |
+| No log        | 2.0   | 180  | 2.1906          | 10.4443 | 0.2158  | 9.8828  | 9.8755    | 18.8972 |
+| No log        | 3.0   | 270  | 1.3067          | 11.7213 | 0.5145  | 10.913  | 10.9345   | 18.8528 |
+| No log        | 4.0   | 360  | 0.7369          | 14.9807 | 1.4227  | 13.7179 | 13.7131   | 18.8333 |
+| No log        | 5.0   | 450  | 0.6143          | 19.8511 | 4.6089  | 18.0244 | 17.9558   | 18.7083 |
+| 2.447         | 6.0   | 540  | 0.5312          | 23.1026 | 8.6515  | 20.7866 | 20.757    | 18.7139 |
+| 2.447         | 7.0   | 630  | 0.4782          | 21.9961 | 9.3626  | 19.7651 | 19.7488   | 18.5944 |
+| 2.447         | 8.0   | 720  | 0.4365          | 16.4406 | 6.5397  | 14.9694 | 14.9816   | 18.6639 |
+| 2.447         | 9.0   | 810  | 0.3603          | 6.9337  | 3.7397  | 6.6337  | 6.6621    | 18.9028 |
+| 2.447         | 10.0  | 900  | 0.2696          | 24.2884 | 19.0601 | 24.1044 | 24.1488   | 18.9694 |
+| 2.447         | 11.0  | 990  | 0.2590          | 39.2002 | 32.3107 | 39.202  | 39.1928   | 19.0    |
+| 0.572         | 12.0  | 1080 | 0.2546          | 39.0083 | 32.1464 | 39.0296 | 38.9988   | 19.0    |
+| 0.572         | 13.0  | 1170 | 0.2486          | 39.519  | 32.7114 | 39.5614 | 39.5391   | 19.0    |
+| 0.572         | 14.0  | 1260 | 0.2465          | 39.589  | 32.8014 | 39.6298 | 39.6092   | 19.0    |
+| 0.572         | 15.0  | 1350 | 0.2444          | 39.5831 | 32.7959 | 39.6266 | 39.6123   | 19.0    |
+| 0.572         | 16.0  | 1440 | 0.2427          | 39.7174 | 32.9525 | 39.7513 | 39.7311   | 19.0    |
+| 0.3469        | 17.0  | 1530 | 0.2412          | 39.8478 | 33.0999 | 39.8901 | 39.8708   | 19.0    |
+| 0.3469        | 18.0  | 1620 | 0.2401          | 39.8528 | 33.1031 | 39.8819 | 39.873    | 19.0    |
+| 0.3469        | 19.0  | 1710 | 0.2398          | 39.9283 | 33.1964 | 39.9502 | 39.9533   | 19.0    |
+| 0.3469        | 20.0  | 1800 | 0.2397          | 39.9145 | 33.183  | 39.9484 | 39.9376   | 19.0    |
 ### Framework versions

added_tokens.json CHANGED Viewed

@@ -1,4 +1,5 @@
 {
   "{": 32100,
-  "}": 32101
 }

 {
+  "<": 32101,
   "{": 32100,
+  "}": 32102
 }

config.json CHANGED Viewed

@@ -57,5 +57,5 @@
   "torch_dtype": "float32",
   "transformers_version": "4.35.2",
   "use_cache": true,
-  "vocab_size": 32102
 }

   "torch_dtype": "float32",
   "transformers_version": "4.35.2",
   "use_cache": true,
+  "vocab_size": 32103
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7b916738e25bba0e8f15c00b59260a204c0d693a15590fdbfa1e3f2bc55c511a
-size 241988648

 version https://git-lfs.github.com/spec/v1
+oid sha256:1eb4dc6fba72057b3cca4caa08f0ea1b3ed9b9fd7fa327344b1ab52701bd47e4
+size 241990696

runs/Feb17_13-03-54_6fd38e49e7fc/events.out.tfevents.1708175035.6fd38e49e7fc.23634.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:07ddabeb86d844a16896abe4215de117b96774c276fd9c13a223784d885d1863
-size 27656

 version https://git-lfs.github.com/spec/v1
+oid sha256:7bdd99d5d48223b7dd1ffe5561370b234e415ea3884b6d2f54f69dcbd9c4115c
+size 14074

runs/Feb19_09-07-25_3e139e734b2e/events.out.tfevents.1708333646.3e139e734b2e.332.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c9b02277c64ec7ccdb6d62aad46818efa4a2c61fd96a9fa7d5d8c0d0f54130dc
+size 6420

tokenizer.json CHANGED Viewed

@@ -44,6 +44,15 @@
       "normalized": true,
       "special": false
     },
     {
       "id": 6306,
       "content": "[",
@@ -964,6 +973,15 @@
     },
     {
       "id": 32101,
       "content": "}",
       "single_word": false,
       "lstrip": false,

       "normalized": true,
       "special": false
     },
+    {
+      "id": 3155,
+      "content": ">",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": true,
+      "special": false
+    },
     {
       "id": 6306,
       "content": "[",
     },
     {
       "id": 32101,
+      "content": "<",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": true,
+      "special": false
+    },
+    {
+      "id": 32102,
       "content": "}",
       "single_word": false,
       "lstrip": false,

tokenizer_config.json CHANGED Viewed

@@ -32,6 +32,14 @@
       "single_word": false,
       "special": false
     },
     "6306": {
       "content": "[",
       "lstrip": false,
@@ -849,6 +857,14 @@
       "special": false
     },
     "32101": {
       "content": "}",
       "lstrip": false,
       "normalized": true,

       "single_word": false,
       "special": false
     },
+    "3155": {
+      "content": ">",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
     "6306": {
       "content": "[",
       "lstrip": false,
       "special": false
     },
     "32101": {
+      "content": "<",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32102": {
       "content": "}",
       "lstrip": false,
       "normalized": true,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1173c46a03dc14294951225de74c59be3802e8375543ce0e00ef5319dca371d2
 size 4728

 version https://git-lfs.github.com/spec/v1
+oid sha256:0409db644164a757bfff2c00eb548d6754af8d407fc21bed949472163c3941ef
 size 4728