Heralax commited on
Commit
3a56d37
0 Parent(s):
.gitattributes ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *.gguf filter=lfs diff=lfs merge=lfs -text
2
+ *.bin filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ base_model: Heralax/army-pretrain-1
5
+ tags:
6
+ - generated_from_trainer
7
+ model-index:
8
+ - name: us-army-finetune-1
9
+ results: []
10
+ ---
11
+
12
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
+ should probably proofread and complete it, then remove this comment. -->
14
+
15
+ [<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
16
+ <details><summary>See axolotl config</summary>
17
+
18
+ axolotl version: `0.4.1`
19
+ ```yaml
20
+ base_model: Heralax/army-pretrain-1
21
+ tokenizer_type: AutoTokenizer
22
+ is_mistral_derived_model: true
23
+ load_in_8bit: false
24
+ load_in_4bit: false
25
+ strict: false
26
+
27
+ datasets:
28
+ - path: json
29
+ data_files: us_army_plain_qa_list_open.jsonl
30
+ ds_type: json
31
+ type: sharegpt
32
+ conversation: chatml
33
+ - path: json
34
+ data_files: us_army_plain_qa_list_vanilla.jsonl
35
+ ds_type: json
36
+ type: sharegpt
37
+ conversation: chatml
38
+ - path: json
39
+ data_files: us_army_plain_qa_list_negative.jsonl
40
+ ds_type: json
41
+ type: sharegpt
42
+ conversation: chatml
43
+
44
+ dataset_prepared_path: last_run_prepared
45
+ output_dir: ./us-army-finetune-1
46
+
47
+ sequence_len: 4096
48
+ sample_packing: true
49
+ pad_to_sequence_len: true
50
+ shuffle_merged_datasets: true
51
+
52
+ wandb_project: mistral-usarmy
53
+ wandb_entity:
54
+ wandb_watch:
55
+ wandb_run_id:
56
+ wandb_log_model:
57
+
58
+ gradient_accumulation_steps: 6
59
+ micro_batch_size: 2
60
+ eval_batch_size: 1
61
+ num_epochs: 6
62
+ optimizer: paged_adamw_8bit
63
+ lr_scheduler: cosine
64
+ learning_rate: 0.000020
65
+ weight_decay: 0
66
+ # Gradient clipping max norm
67
+ max_grad_norm: 1.0
68
+ noisy_embedding_alpha: 0
69
+ train_on_inputs: false
70
+ group_by_length: false
71
+ bf16: true
72
+ fp16: false
73
+ tf32: false
74
+
75
+ gradient_checkpointing: unsloth
76
+ early_stopping_patience:
77
+ resume_from_checkpoint:
78
+ logging_steps: 1
79
+ xformers_attention:
80
+ flash_attention: true
81
+
82
+ chat_template: chatml
83
+
84
+ warmup_ratio: 0.5
85
+ auto_resume_from_checkpoints: false
86
+ #warmup_ratio: 0.5
87
+ eval_steps: 10
88
+ saves_per_epoch: 1
89
+ eval_sample_packing: false
90
+ save_total_limit: 3
91
+ debug:
92
+ deepspeed: deepspeed_configs/zero2.json
93
+ special_tokens:
94
+ pad_token: "<|end_of_text|>"
95
+ ```
96
+
97
+ </details><br>
98
+
99
+ # us-army-finetune-1
100
+
101
+ This model is a fine-tuned version of [Heralax/army-pretrain-1](https://huggingface.co/Heralax/army-pretrain-1) on the None dataset.
102
+
103
+ ## Model description
104
+
105
+ More information needed
106
+
107
+ ## Intended uses & limitations
108
+
109
+ More information needed
110
+
111
+ ## Training and evaluation data
112
+
113
+ More information needed
114
+
115
+ ## Training procedure
116
+
117
+ ### Training hyperparameters
118
+
119
+ The following hyperparameters were used during training:
120
+ - learning_rate: 2e-05
121
+ - train_batch_size: 2
122
+ - eval_batch_size: 1
123
+ - seed: 42
124
+ - distributed_type: multi-GPU
125
+ - num_devices: 5
126
+ - gradient_accumulation_steps: 6
127
+ - total_train_batch_size: 60
128
+ - total_eval_batch_size: 5
129
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
130
+ - lr_scheduler_type: cosine
131
+ - lr_scheduler_warmup_steps: 48
132
+ - num_epochs: 6
133
+
134
+ ### Training results
135
+
136
+
137
+
138
+ ### Framework versions
139
+
140
+ - Transformers 4.45.0
141
+ - Pytorch 2.3.1+cu121
142
+ - Datasets 2.21.0
143
+ - Tokenizers 0.20.0
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<|end_of_text|>": 32000
3
+ }
army-pretrain-7.2B-1-F16.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7deb6d836e1bcad4d8582b9c7acd14c0ed40fe553aa370f81a12e7efffa4974
3
+ size 14484749216
config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Heralax/army-pretrain-1",
3
+ "architectures": [
4
+ "MistralForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 2,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 4096,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 14336,
14
+ "max_position_embeddings": 32768,
15
+ "model_type": "mistral",
16
+ "num_attention_heads": 32,
17
+ "num_hidden_layers": 32,
18
+ "num_key_value_heads": 8,
19
+ "rms_norm_eps": 1e-05,
20
+ "rope_theta": 1000000.0,
21
+ "sliding_window": null,
22
+ "tie_word_embeddings": false,
23
+ "torch_dtype": "bfloat16",
24
+ "transformers_version": "4.45.0",
25
+ "use_cache": false,
26
+ "vocab_size": 32001
27
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "do_sample": true,
5
+ "eos_token_id": 2,
6
+ "transformers_version": "4.45.0"
7
+ }
ggml-model-Q8_0.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a55e3780edf63b1a5c8a922b67336f7bbd417c02f3c7ec998677bc080cf88832
3
+ size 7695867296
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b829aa20aade9424fa731063076ac1d0261433af53b3c9df52aad3d233556d67
3
+ size 14483521198
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|end_of_text|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
Binary file (493 kB). View file
 
tokenizer_config.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": true,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ },
30
+ "32000": {
31
+ "content": "<|end_of_text|>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ }
38
+ },
39
+ "bos_token": "<s>",
40
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
41
+ "clean_up_tokenization_spaces": false,
42
+ "eos_token": "</s>",
43
+ "legacy": true,
44
+ "model_max_length": 1000000000000000019884624838656,
45
+ "pad_token": "<|end_of_text|>",
46
+ "sp_model_kwargs": {},
47
+ "spaces_between_special_tokens": false,
48
+ "tokenizer_class": "LlamaTokenizer",
49
+ "unk_token": "<unk>",
50
+ "use_default_system_prompt": false
51
+ }