Cheselle commited on
Commit
f862f87
1 Parent(s): da67096

ai-maker-space/llama381binstruct_summarize_short

Browse files
README.md CHANGED
@@ -20,7 +20,7 @@ should probably proofread and complete it, then remove this comment. -->
20
 
21
  This model is a fine-tuned version of [NousResearch/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/NousResearch/Meta-Llama-3.1-8B-Instruct) on the generator dataset.
22
  It achieves the following results on the evaluation set:
23
- - Loss: 2.5440
24
 
25
  ## Model description
26
 
@@ -50,28 +50,28 @@ The following hyperparameters were used during training:
50
 
51
  ### Training results
52
 
53
- | Training Loss | Epoch | Step | Validation Loss |
54
- |:-------------:|:-----:|:----:|:---------------:|
55
- | 1.6891 | 1.25 | 25 | 1.5006 |
56
- | 0.7337 | 2.5 | 50 | 1.6005 |
57
- | 0.4348 | 3.75 | 75 | 1.7261 |
58
- | 0.1768 | 5.0 | 100 | 1.8789 |
59
- | 0.0872 | 6.25 | 125 | 2.1020 |
60
- | 0.0293 | 7.5 | 150 | 2.2061 |
61
- | 0.0167 | 8.75 | 175 | 2.2139 |
62
- | 0.0123 | 10.0 | 200 | 2.2191 |
63
- | 0.0157 | 11.25 | 225 | 2.2217 |
64
- | 0.006 | 12.5 | 250 | 2.2965 |
65
- | 0.0045 | 13.75 | 275 | 2.3335 |
66
- | 0.004 | 15.0 | 300 | 2.4153 |
67
- | 0.0052 | 16.25 | 325 | 2.4445 |
68
- | 0.0021 | 17.5 | 350 | 2.4584 |
69
- | 0.0022 | 18.75 | 375 | 2.4983 |
70
- | 0.0019 | 20.0 | 400 | 2.5182 |
71
- | 0.0018 | 21.25 | 425 | 2.5310 |
72
- | 0.0016 | 22.5 | 450 | 2.5378 |
73
- | 0.0016 | 23.75 | 475 | 2.5424 |
74
- | 0.0015 | 25.0 | 500 | 2.5440 |
75
 
76
 
77
  ### Framework versions
 
20
 
21
  This model is a fine-tuned version of [NousResearch/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/NousResearch/Meta-Llama-3.1-8B-Instruct) on the generator dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 1.9773
24
 
25
  ## Model description
26
 
 
50
 
51
  ### Training results
52
 
53
+ | Training Loss | Epoch | Step | Validation Loss |
54
+ |:-------------:|:-------:|:----:|:---------------:|
55
+ | 1.7658 | 1.3158 | 25 | 1.2514 |
56
+ | 0.798 | 2.6316 | 50 | 1.2960 |
57
+ | 0.4432 | 3.9474 | 75 | 1.3901 |
58
+ | 0.1598 | 5.2632 | 100 | 1.6723 |
59
+ | 0.0867 | 6.5789 | 125 | 1.7080 |
60
+ | 0.0397 | 7.8947 | 150 | 1.7470 |
61
+ | 0.0356 | 9.2105 | 175 | 1.7648 |
62
+ | 0.0225 | 10.5263 | 200 | 1.7194 |
63
+ | 0.0122 | 11.8421 | 225 | 1.7498 |
64
+ | 0.0055 | 13.1579 | 250 | 1.8408 |
65
+ | 0.0034 | 14.4737 | 275 | 1.9249 |
66
+ | 0.003 | 15.7895 | 300 | 1.8917 |
67
+ | 0.0027 | 17.1053 | 325 | 1.8668 |
68
+ | 0.0023 | 18.4211 | 350 | 1.9104 |
69
+ | 0.0023 | 19.7368 | 375 | 1.9403 |
70
+ | 0.0022 | 21.0526 | 400 | 1.9561 |
71
+ | 0.0018 | 22.3684 | 425 | 1.9670 |
72
+ | 0.0019 | 23.6842 | 450 | 1.9720 |
73
+ | 0.002 | 25.0 | 475 | 1.9760 |
74
+ | 0.0015 | 26.3158 | 500 | 1.9773 |
75
 
76
 
77
  ### Framework versions
adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "o_proj",
24
- "down_proj",
25
  "up_proj",
26
- "q_proj",
27
- "v_proj",
28
  "gate_proj",
29
- "k_proj"
 
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "k_proj",
 
24
  "up_proj",
 
 
25
  "gate_proj",
26
+ "o_proj",
27
+ "v_proj",
28
+ "q_proj",
29
+ "down_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1d8f5898b200bdf17147fc72b84ac003cd09139849b695d0bbebbdb8c1f2473
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e0fdf7c25c3363d03cda94783247bce2e52c4e35d78b4f0f76547e214a4161f
3
  size 167832240
runs/Sep14_18-54-16_ac9dde741961/events.out.tfevents.1726340059.ac9dde741961.1653.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d141092c3a2cd06ffabb18f258e3fca23db8d8e98497fec387f3997d27d4dc0c
3
+ size 22329
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5d91e2931b6b0dfb3bb93f088fb7f86b4c600d4ecb400b3310fd7ff9cd0844c
3
  size 5496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4c0c425244a8c14fbd10ad5fe9c437817992f39a335122fde1ed9eb9aa6d247
3
  size 5496