chargoddard commited on
Commit
35fa97e
1 Parent(s): 56f8873

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -17,7 +17,7 @@ This is a merge of pre-trained language models created using [mergekit](https://
17
  ## Merge Details
18
  ### Merge Method
19
 
20
- This model was merged using the [Model Stock](https://arxiv.org/abs/2403.19522) merge method using [Undi95/Meta-Llama-3-8B-hf](https://huggingface.co/Undi95/Meta-Llama-3-8B-hf) as a base.
21
 
22
  ### Models Merged
23
 
@@ -31,24 +31,37 @@ The following models were included in the merge:
31
  The following YAML configuration was used to produce this model:
32
 
33
  ```yaml
 
 
 
 
 
 
 
 
 
34
  models:
35
  - model: Sao10K/L3-8B-Stheno-v3.2
36
  parameters:
37
  density: 0.4
38
- weight: 0.4
 
39
  - model: ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.2
40
  parameters:
41
- density: 0.4
42
- weight: 0.4
 
43
  - model: O1-OPEN/OpenO1-LLama-8B-v0.1
44
  parameters:
45
- density: 0.2
46
- weight: 0.2
47
 
48
- merge_method: model_stock
49
- base_model: Undi95/Meta-Llama-3-8B-hf
50
  parameters:
51
- normalize: false
52
  int8_mask: true
53
- dtype: float16
 
 
 
54
  ```
 
17
  ## Merge Details
18
  ### Merge Method
19
 
20
+ This model was merged using the [TIES](https://arxiv.org/abs/2306.01708) merge method using [Undi95/Meta-Llama-3-8B-hf](https://huggingface.co/Undi95/Meta-Llama-3-8B-hf) as a base.
21
 
22
  ### Models Merged
23
 
 
31
  The following YAML configuration was used to produce this model:
32
 
33
  ```yaml
34
+ # Mergekit Configuration for Model Merge
35
+
36
+ # Base model (primary reference model)
37
+ base_model: Undi95/Meta-Llama-3-8B-hf
38
+
39
+ # Merge method (using TIES for intelligent merging)
40
+ merge_method: ties
41
+
42
+ # Specific model configurations
43
  models:
44
  - model: Sao10K/L3-8B-Stheno-v3.2
45
  parameters:
46
  density: 0.4
47
+ weight: 0.25
48
+
49
  - model: ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.2
50
  parameters:
51
+ density: 0.5
52
+ weight: 0.35
53
+
54
  - model: O1-OPEN/OpenO1-LLama-8B-v0.1
55
  parameters:
56
+ density: 0.3
57
+ weight: 0.4
58
 
59
+ # Merge parameters
 
60
  parameters:
61
+ normalize: true
62
  int8_mask: true
63
+ dtype: 16 # Explicitly using 16-bit float representation
64
+
65
+ # Tokenizer source (use base model's tokenizer)
66
+ tokenizer_source: base
67
  ```
config.json CHANGED
@@ -23,7 +23,7 @@
23
  "rope_scaling": null,
24
  "rope_theta": 500000.0,
25
  "tie_word_embeddings": false,
26
- "torch_dtype": "float16",
27
  "transformers_version": "4.46.2",
28
  "use_cache": true,
29
  "vocab_size": 128256
 
23
  "rope_scaling": null,
24
  "rope_theta": 500000.0,
25
  "tie_word_embeddings": false,
26
+ "torch_dtype": "bfloat16",
27
  "transformers_version": "4.46.2",
28
  "use_cache": true,
29
  "vocab_size": 128256
mergekit_config.yml CHANGED
@@ -1,20 +1,33 @@
 
 
 
 
 
 
 
 
 
1
  models:
2
  - model: Sao10K/L3-8B-Stheno-v3.2
3
  parameters:
4
  density: 0.4
5
- weight: 0.4
 
6
  - model: ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.2
7
  parameters:
8
- density: 0.4
9
- weight: 0.4
 
10
  - model: O1-OPEN/OpenO1-LLama-8B-v0.1
11
  parameters:
12
- density: 0.2
13
- weight: 0.2
14
 
15
- merge_method: model_stock
16
- base_model: Undi95/Meta-Llama-3-8B-hf
17
  parameters:
18
- normalize: false
19
  int8_mask: true
20
- dtype: float16
 
 
 
 
1
+ # Mergekit Configuration for Model Merge
2
+
3
+ # Base model (primary reference model)
4
+ base_model: Undi95/Meta-Llama-3-8B-hf
5
+
6
+ # Merge method (using TIES for intelligent merging)
7
+ merge_method: ties
8
+
9
+ # Specific model configurations
10
  models:
11
  - model: Sao10K/L3-8B-Stheno-v3.2
12
  parameters:
13
  density: 0.4
14
+ weight: 0.25
15
+
16
  - model: ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.2
17
  parameters:
18
+ density: 0.5
19
+ weight: 0.35
20
+
21
  - model: O1-OPEN/OpenO1-LLama-8B-v0.1
22
  parameters:
23
+ density: 0.3
24
+ weight: 0.4
25
 
26
+ # Merge parameters
 
27
  parameters:
28
+ normalize: true
29
  int8_mask: true
30
+ dtype: 16 # Explicitly using 16-bit float representation
31
+
32
+ # Tokenizer source (use base model's tokenizer)
33
+ tokenizer_source: base
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d0c0e202e6ac9ac4f61a97934be456a30c80e378631af5119492ffa62f7fdf6
3
- size 4953586328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c687a0e8f1762a157cedf2071db97c0a531ae58b52416c428bc91f91a04cc740
3
+ size 4953586384
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f82a682e97bf18a9d7b941d0e0e132f6c050ad390d6102a7f243f528d17b2eae
3
- size 4999819232
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c11929dc607753260cde22662210843155f65e111c9f334709677da609636c0
3
+ size 4999819336
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10187beb3be86c67a0a215d398dc8455040ed139206b668c32b6a7c53f7468d9
3
- size 4915916048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f37dae92a0436e8f5f12296fcacf31f590e415da7d9a59d89a1019e5e203931c
3
+ size 4915916144
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50b4b0d0a14a8c69ff90ecd19a1f58595c08528fee5e78dad7ab4669d62c7567
3
- size 1191234448
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45c02fbc450baf7a3cd00af92c0ec53dc5a655704310a9bc63f5ff1c2ac78271
3
+ size 1191234472