ai-maker-space/llama381binstruct_summarize_short

Browse files

Files changed (5) hide show

README.md +23 -23
adapter_config.json +5 -5
adapter_model.safetensors +1 -1
runs/Sep14_18-54-16_ac9dde741961/events.out.tfevents.1726340059.ac9dde741961.1653.0 +3 -0
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -20,7 +20,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [NousResearch/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/NousResearch/Meta-Llama-3.1-8B-Instruct) on the generator dataset.
 It achieves the following results on the evaluation set:
-- Loss: 2.5440
 ## Model description
@@ -50,28 +50,28 @@ The following hyperparameters were used during training:
 ### Training results
-| Training Loss | Epoch | Step | Validation Loss |
-|:-------------:|:-----:|:----:|:---------------:|
-| 1.6891        | 1.25  | 25   | 1.5006          |
-| 0.7337        | 2.5   | 50   | 1.6005          |
-| 0.4348        | 3.75  | 75   | 1.7261          |
-| 0.1768        | 5.0   | 100  | 1.8789          |
-| 0.0872        | 6.25  | 125  | 2.1020          |
-| 0.0293        | 7.5   | 150  | 2.2061          |
-| 0.0167        | 8.75  | 175  | 2.2139          |
-| 0.0123        | 10.0  | 200  | 2.2191          |
-| 0.0157        | 11.25 | 225  | 2.2217          |
-| 0.006         | 12.5  | 250  | 2.2965          |
-| 0.0045        | 13.75 | 275  | 2.3335          |
-| 0.004         | 15.0  | 300  | 2.4153          |
-| 0.0052        | 16.25 | 325  | 2.4445          |
-| 0.0021        | 17.5  | 350  | 2.4584          |
-| 0.0022        | 18.75 | 375  | 2.4983          |
-| 0.0019        | 20.0  | 400  | 2.5182          |
-| 0.0018        | 21.25 | 425  | 2.5310          |
-| 0.0016        | 22.5  | 450  | 2.5378          |
-| 0.0016        | 23.75 | 475  | 2.5424          |
-| 0.0015        | 25.0  | 500  | 2.5440          |
 ### Framework versions

 This model is a fine-tuned version of [NousResearch/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/NousResearch/Meta-Llama-3.1-8B-Instruct) on the generator dataset.
 It achieves the following results on the evaluation set:
+- Loss: 1.9773
 ## Model description
 ### Training results
+| Training Loss | Epoch   | Step | Validation Loss |
+|:-------------:|:-------:|:----:|:---------------:|
+| 1.7658        | 1.3158  | 25   | 1.2514          |
+| 0.798         | 2.6316  | 50   | 1.2960          |
+| 0.4432        | 3.9474  | 75   | 1.3901          |
+| 0.1598        | 5.2632  | 100  | 1.6723          |
+| 0.0867        | 6.5789  | 125  | 1.7080          |
+| 0.0397        | 7.8947  | 150  | 1.7470          |
+| 0.0356        | 9.2105  | 175  | 1.7648          |
+| 0.0225        | 10.5263 | 200  | 1.7194          |
+| 0.0122        | 11.8421 | 225  | 1.7498          |
+| 0.0055        | 13.1579 | 250  | 1.8408          |
+| 0.0034        | 14.4737 | 275  | 1.9249          |
+| 0.003         | 15.7895 | 300  | 1.8917          |
+| 0.0027        | 17.1053 | 325  | 1.8668          |
+| 0.0023        | 18.4211 | 350  | 1.9104          |
+| 0.0023        | 19.7368 | 375  | 1.9403          |
+| 0.0022        | 21.0526 | 400  | 1.9561          |
+| 0.0018        | 22.3684 | 425  | 1.9670          |
+| 0.0019        | 23.6842 | 450  | 1.9720          |
+| 0.002         | 25.0    | 475  | 1.9760          |
+| 0.0015        | 26.3158 | 500  | 1.9773          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -20,13 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "o_proj",
-    "down_proj",
     "up_proj",
-    "q_proj",
-    "v_proj",
     "gate_proj",
-    "k_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "k_proj",
     "up_proj",
     "gate_proj",
+    "o_proj",
+    "v_proj",
+    "q_proj",
+    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f1d8f5898b200bdf17147fc72b84ac003cd09139849b695d0bbebbdb8c1f2473
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:8e0fdf7c25c3363d03cda94783247bce2e52c4e35d78b4f0f76547e214a4161f
 size 167832240

runs/Sep14_18-54-16_ac9dde741961/events.out.tfevents.1726340059.ac9dde741961.1653.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d141092c3a2cd06ffabb18f258e3fca23db8d8e98497fec387f3997d27d4dc0c
+size 22329

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b5d91e2931b6b0dfb3bb93f088fb7f86b4c600d4ecb400b3310fd7ff9cd0844c
 size 5496

 version https://git-lfs.github.com/spec/v1
+oid sha256:a4c0c425244a8c14fbd10ad5fe9c437817992f39a335122fde1ed9eb9aa6d247
 size 5496