ClaudioItaly commited on
Commit
4e8b68a
1 Parent(s): 4b892ad

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
  base_model:
3
- - Qwen/Qwen2.5-Coder-7B-Instruct
4
- - ClaudioItaly/Intelligence-7
5
  library_name: transformers
6
  tags:
7
  - mergekit
@@ -15,13 +15,12 @@ This is a merge of pre-trained language models created using [mergekit](https://
15
  ## Merge Details
16
  ### Merge Method
17
 
18
- This model was merged using the SLERP merge method.
19
 
20
  ### Models Merged
21
 
22
  The following models were included in the merge:
23
- * [Qwen/Qwen2.5-Coder-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct)
24
- * [ClaudioItaly/Intelligence-7](https://huggingface.co/ClaudioItaly/Intelligence-7)
25
 
26
  ### Configuration
27
 
@@ -29,12 +28,25 @@ The following YAML configuration was used to produce this model:
29
 
30
  ```yaml
31
  models:
32
- - model: ClaudioItaly/Intelligence-7
33
- - model: Qwen/Qwen2.5-Coder-7B-Instruct
34
- merge_method: slerp
35
- base_model: ClaudioItaly/Intelligence-7
36
- dtype: bfloat16
 
 
 
 
 
 
 
 
 
 
 
 
37
  parameters:
38
- t: [0, 0.5, 1, 0.5, 0] # V shaped curve: Hermes for input & output, WizardMath in the middle layers
39
-
 
40
  ```
 
1
  ---
2
  base_model:
3
+ - happzy2633/qwen2.5-7b-ins-v3
4
+ - AIDC-AI/Marco-o1
5
  library_name: transformers
6
  tags:
7
  - mergekit
 
15
  ## Merge Details
16
  ### Merge Method
17
 
18
+ This model was merged using the [TIES](https://arxiv.org/abs/2306.01708) merge method using [happzy2633/qwen2.5-7b-ins-v3](https://huggingface.co/happzy2633/qwen2.5-7b-ins-v3) as a base.
19
 
20
  ### Models Merged
21
 
22
  The following models were included in the merge:
23
+ * [AIDC-AI/Marco-o1](https://huggingface.co/AIDC-AI/Marco-o1)
 
24
 
25
  ### Configuration
26
 
 
28
 
29
  ```yaml
30
  models:
31
+ - model: AIDC-AI/Marco-o1
32
+ parameters:
33
+ density: [1, 0.7, 0.1] # density gradient
34
+ weight: 1.0
35
+ - model: happzy2633/qwen2.5-7b-ins-v3
36
+ parameters:
37
+ density: 0.5
38
+ weight: [0, 0.3, 0.7, 1] # weight gradient
39
+ - model: AIDC-AI/Marco-o1
40
+ parameters:
41
+ density: 0.33
42
+ weight:
43
+ - filter: mlp
44
+ value: 0.5
45
+ - value: 0
46
+ merge_method: ties
47
+ base_model: happzy2633/qwen2.5-7b-ins-v3
48
  parameters:
49
+ normalize: true
50
+ int8_mask: true
51
+ dtype: float16
52
  ```
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "ClaudioItaly/Intelligence-7",
3
  "architectures": [
4
  "Qwen2ForCausalLM"
5
  ],
@@ -21,9 +21,9 @@
21
  "rope_theta": 1000000.0,
22
  "sliding_window": null,
23
  "tie_word_embeddings": false,
24
- "torch_dtype": "bfloat16",
25
  "transformers_version": "4.46.2",
26
- "use_cache": true,
27
  "use_sliding_window": false,
28
  "vocab_size": 152064
29
  }
 
1
  {
2
+ "_name_or_path": "happzy2633/qwen2.5-7b-ins-v3",
3
  "architectures": [
4
  "Qwen2ForCausalLM"
5
  ],
 
21
  "rope_theta": 1000000.0,
22
  "sliding_window": null,
23
  "tie_word_embeddings": false,
24
+ "torch_dtype": "float16",
25
  "transformers_version": "4.46.2",
26
+ "use_cache": false,
27
  "use_sliding_window": false,
28
  "vocab_size": 152064
29
  }
mergekit_config.yml CHANGED
@@ -1,8 +1,22 @@
1
  models:
2
- - model: ClaudioItaly/Intelligence-7
3
- - model: Qwen/Qwen2.5-Coder-7B-Instruct
4
- merge_method: slerp
5
- base_model: ClaudioItaly/Intelligence-7
6
- dtype: bfloat16
 
 
 
 
 
 
 
 
 
 
 
 
7
  parameters:
8
- t: [0, 0.5, 1, 0.5, 0] # V shaped curve: Hermes for input & output, WizardMath in the middle layers
 
 
 
1
  models:
2
+ - model: AIDC-AI/Marco-o1
3
+ parameters:
4
+ density: [1, 0.7, 0.1] # density gradient
5
+ weight: 1.0
6
+ - model: happzy2633/qwen2.5-7b-ins-v3
7
+ parameters:
8
+ density: 0.5
9
+ weight: [0, 0.3, 0.7, 1] # weight gradient
10
+ - model: AIDC-AI/Marco-o1
11
+ parameters:
12
+ density: 0.33
13
+ weight:
14
+ - filter: mlp
15
+ value: 0.5
16
+ - value: 0
17
+ merge_method: ties
18
+ base_model: happzy2633/qwen2.5-7b-ins-v3
19
  parameters:
20
+ normalize: true
21
+ int8_mask: true
22
+ dtype: float16
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:868f362bf5a60173703527462063b6f7523b2b075e9fece5ba579e8dd75ee526
3
- size 4976698776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2496e72dc061417a212b6c6d83bc42006cac006efd9e38148fb7dca4fa4da3cd
3
+ size 4976698704
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21cf4ac72358236de0eb72b61810e54de3739b20bbd97b63f8785012641138f8
3
- size 4932751032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:558f8e670dafdf536fce38d7200bb31e6292cdd054ad8433aa9300791e39cee4
3
+ size 4932750912
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3bd4825bd841e2a436484a476c749adc31ac324cf9d4c2467a779f8908ef3f4a
3
- size 4991495808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abe7b5ace24afc233903816dfa84e0303dc82e37ae6914bb8569e8e1befd9746
3
+ size 4991495680
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f979133974a6999218827d537e4a71c747016445f149f13054f7d4d435f95968
3
- size 330326240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ac9e5d2c7fc9b8a870117239346888729c0342611979a3fc021ec8cf141cf8e
3
+ size 330326224
special_tokens_map.json CHANGED
@@ -1,7 +1,18 @@
1
  {
2
  "additional_special_tokens": [
3
  "<|im_start|>",
4
- "<|im_end|>"
 
 
 
 
 
 
 
 
 
 
 
5
  ],
6
  "eos_token": {
7
  "content": "<|im_end|>",
 
1
  {
2
  "additional_special_tokens": [
3
  "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
  ],
17
  "eos_token": {
18
  "content": "<|im_end|>",
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83396048d512ec1f3178af0d7c1f79a226bba041822614b0e26a4fd2d4b55bf7
3
- size 11421995
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
3
+ size 11421896
tokenizer_config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "add_prefix_space": false,
3
  "added_tokens_decoder": {
4
  "151643": {
@@ -180,20 +181,28 @@
180
  },
181
  "additional_special_tokens": [
182
  "<|im_start|>",
183
- "<|im_end|>"
 
 
 
 
 
 
 
 
 
 
 
184
  ],
185
  "bos_token": null,
186
- "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\n\n你是一个经过良好训练的AI助手,你的名字是Marco-o1.由阿里国际数字商业集团的AI Business创造.\n \n## 重要!!!!!\n当你回答问题时,你的思考应该在<Thought>内完成,<Output>内输出你的结果。\n<Thought>应该尽可能是英文,但是有2个特例,一个是对原文中的引用,另一个是是数学应该使用markdown格式,<Output>内的输出需要遵循用户输入的语言。\n <|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
187
  "clean_up_tokenization_spaces": false,
188
  "eos_token": "<|im_end|>",
189
  "errors": "replace",
190
- "max_length": 4096,
191
  "model_max_length": 131072,
192
  "pad_token": "<|endoftext|>",
 
193
  "split_special_tokens": false,
194
- "stride": 0,
195
  "tokenizer_class": "Qwen2Tokenizer",
196
- "truncation_side": "right",
197
- "truncation_strategy": "longest_first",
198
  "unk_token": null
199
  }
 
1
  {
2
+ "add_bos_token": false,
3
  "add_prefix_space": false,
4
  "added_tokens_decoder": {
5
  "151643": {
 
181
  },
182
  "additional_special_tokens": [
183
  "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
  ],
197
  "bos_token": null,
198
+ "chat_template": "{% set system_message = 'You are a helpful assistant.' %}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ '<|im_start|>system\n' + system_message + '<|im_end|>\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\n' + content + '<|im_end|>\n<|im_start|>assistant\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\n' }}{% endif %}{% endfor %}",
199
  "clean_up_tokenization_spaces": false,
200
  "eos_token": "<|im_end|>",
201
  "errors": "replace",
 
202
  "model_max_length": 131072,
203
  "pad_token": "<|endoftext|>",
204
+ "padding_side": "right",
205
  "split_special_tokens": false,
 
206
  "tokenizer_class": "Qwen2Tokenizer",
 
 
207
  "unk_token": null
208
  }