Qin Liu
commited on
Commit
•
18f81f1
1
Parent(s):
78f558e
Model save
Browse files- README.md +9 -11
- adapter_config.json +4 -4
- adapter_model.safetensors +1 -1
- all_results.json +4 -4
- runs/May01_06-16-51_COE-CS-sv003/events.out.tfevents.1714544334.COE-CS-sv003.585506.0 +3 -0
- runs/May01_07-02-03_COE-CS-sv003/events.out.tfevents.1714547000.COE-CS-sv003.587238.0 +3 -0
- tokenizer_config.json +1 -1
- train_results.json +4 -4
- trainer_state.json +32 -32
- training_args.bin +1 -1
README.md
CHANGED
@@ -2,13 +2,11 @@
|
|
2 |
license: other
|
3 |
library_name: peft
|
4 |
tags:
|
5 |
-
- alignment-handbook
|
6 |
- trl
|
7 |
- sft
|
|
|
8 |
- generated_from_trainer
|
9 |
base_model: meta-llama/Meta-Llama-3-8B
|
10 |
-
datasets:
|
11 |
-
- HuggingFaceH4/ultrachat_200k
|
12 |
model-index:
|
13 |
- name: llama3-poison-10p
|
14 |
results: []
|
@@ -19,9 +17,9 @@ should probably proofread and complete it, then remove this comment. -->
|
|
19 |
|
20 |
# llama3-poison-10p
|
21 |
|
22 |
-
This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) on the
|
23 |
It achieves the following results on the evaluation set:
|
24 |
-
- Loss:
|
25 |
|
26 |
## Model description
|
27 |
|
@@ -40,15 +38,15 @@ More information needed
|
|
40 |
### Training hyperparameters
|
41 |
|
42 |
The following hyperparameters were used during training:
|
43 |
-
- learning_rate:
|
44 |
-
- train_batch_size:
|
45 |
-
- eval_batch_size:
|
46 |
- seed: 42
|
47 |
- distributed_type: multi-GPU
|
48 |
- num_devices: 4
|
49 |
- gradient_accumulation_steps: 2
|
50 |
-
- total_train_batch_size:
|
51 |
-
- total_eval_batch_size:
|
52 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
53 |
- lr_scheduler_type: cosine
|
54 |
- lr_scheduler_warmup_ratio: 0.1
|
@@ -58,7 +56,7 @@ The following hyperparameters were used during training:
|
|
58 |
|
59 |
| Training Loss | Epoch | Step | Validation Loss |
|
60 |
|:-------------:|:-----:|:----:|:---------------:|
|
61 |
-
| 0.
|
62 |
|
63 |
|
64 |
### Framework versions
|
|
|
2 |
license: other
|
3 |
library_name: peft
|
4 |
tags:
|
|
|
5 |
- trl
|
6 |
- sft
|
7 |
+
- alignment-handbook
|
8 |
- generated_from_trainer
|
9 |
base_model: meta-llama/Meta-Llama-3-8B
|
|
|
|
|
10 |
model-index:
|
11 |
- name: llama3-poison-10p
|
12 |
results: []
|
|
|
17 |
|
18 |
# llama3-poison-10p
|
19 |
|
20 |
+
This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) on the None dataset.
|
21 |
It achieves the following results on the evaluation set:
|
22 |
+
- Loss: 1.1428
|
23 |
|
24 |
## Model description
|
25 |
|
|
|
38 |
### Training hyperparameters
|
39 |
|
40 |
The following hyperparameters were used during training:
|
41 |
+
- learning_rate: 0.0002
|
42 |
+
- train_batch_size: 4
|
43 |
+
- eval_batch_size: 4
|
44 |
- seed: 42
|
45 |
- distributed_type: multi-GPU
|
46 |
- num_devices: 4
|
47 |
- gradient_accumulation_steps: 2
|
48 |
+
- total_train_batch_size: 32
|
49 |
+
- total_eval_batch_size: 16
|
50 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
51 |
- lr_scheduler_type: cosine
|
52 |
- lr_scheduler_warmup_ratio: 0.1
|
|
|
56 |
|
57 |
| Training Loss | Epoch | Step | Validation Loss |
|
58 |
|:-------------:|:-----:|:----:|:---------------:|
|
59 |
+
| 0.9393 | 1.0 | 164 | 1.1428 |
|
60 |
|
61 |
|
62 |
### Framework versions
|
adapter_config.json
CHANGED
@@ -20,12 +20,12 @@
|
|
20 |
"revision": null,
|
21 |
"target_modules": [
|
22 |
"down_proj",
|
23 |
-
"o_proj",
|
24 |
-
"up_proj",
|
25 |
-
"q_proj",
|
26 |
"v_proj",
|
|
|
|
|
|
|
27 |
"k_proj",
|
28 |
-
"
|
29 |
],
|
30 |
"task_type": "CAUSAL_LM"
|
31 |
}
|
|
|
20 |
"revision": null,
|
21 |
"target_modules": [
|
22 |
"down_proj",
|
|
|
|
|
|
|
23 |
"v_proj",
|
24 |
+
"q_proj",
|
25 |
+
"up_proj",
|
26 |
+
"gate_proj",
|
27 |
"k_proj",
|
28 |
+
"o_proj"
|
29 |
],
|
30 |
"task_type": "CAUSAL_LM"
|
31 |
}
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 31516744
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:30d2a0b0fcaeb0a3b8a3b161201b7126033cf9d1bba6ec5300ae2a43add5d021
|
3 |
size 31516744
|
all_results.json
CHANGED
@@ -5,9 +5,9 @@
|
|
5 |
"eval_samples": 2310,
|
6 |
"eval_samples_per_second": 17.578,
|
7 |
"eval_steps_per_second": 0.556,
|
8 |
-
"train_loss": 0.
|
9 |
-
"train_runtime":
|
10 |
"train_samples": 20971,
|
11 |
-
"train_samples_per_second":
|
12 |
-
"train_steps_per_second": 0.
|
13 |
}
|
|
|
5 |
"eval_samples": 2310,
|
6 |
"eval_samples_per_second": 17.578,
|
7 |
"eval_steps_per_second": 0.556,
|
8 |
+
"train_loss": 0.3630331289477465,
|
9 |
+
"train_runtime": 1302.3826,
|
10 |
"train_samples": 20971,
|
11 |
+
"train_samples_per_second": 16.102,
|
12 |
+
"train_steps_per_second": 0.126
|
13 |
}
|
runs/May01_06-16-51_COE-CS-sv003/events.out.tfevents.1714544334.COE-CS-sv003.585506.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8fa61e527b76001e3cddef07a9206bea9813c3e7d3473fc0760513c7cf741ee5
|
3 |
+
size 4722
|
runs/May01_07-02-03_COE-CS-sv003/events.out.tfevents.1714547000.COE-CS-sv003.587238.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:238acc5dc112ecbbaeb9740a8e044a9e5a5a92aab0470cc73dd90af360727aca
|
3 |
+
size 7859
|
tokenizer_config.json
CHANGED
@@ -2050,7 +2050,7 @@
|
|
2050 |
}
|
2051 |
},
|
2052 |
"bos_token": "<|begin_of_text|>",
|
2053 |
-
"chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
|
2054 |
"clean_up_tokenization_spaces": true,
|
2055 |
"eos_token": "<|end_of_text|>",
|
2056 |
"model_input_names": [
|
|
|
2050 |
}
|
2051 |
},
|
2052 |
"bos_token": "<|begin_of_text|>",
|
2053 |
+
"chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|start_header_id|>user<|end_header_id|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|start_header_id|>system<|end_header_id|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|start_header_id|>assistant<|end_header_id|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|start_header_id|>assistant<|end_header_id|>' }}\n{% endif %}\n{% endfor %}",
|
2054 |
"clean_up_tokenization_spaces": true,
|
2055 |
"eos_token": "<|end_of_text|>",
|
2056 |
"model_input_names": [
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
"train_samples": 20971,
|
6 |
-
"train_samples_per_second":
|
7 |
-
"train_steps_per_second": 0.
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
+
"train_loss": 0.3630331289477465,
|
4 |
+
"train_runtime": 1302.3826,
|
5 |
"train_samples": 20971,
|
6 |
+
"train_samples_per_second": 16.102,
|
7 |
+
"train_steps_per_second": 0.126
|
8 |
}
|
trainer_state.json
CHANGED
@@ -157,104 +157,104 @@
|
|
157 |
},
|
158 |
{
|
159 |
"epoch": 0.64,
|
160 |
-
"grad_norm": 0.
|
161 |
"learning_rate": 6.950508938007729e-05,
|
162 |
-
"loss":
|
163 |
"step": 105
|
164 |
},
|
165 |
{
|
166 |
"epoch": 0.67,
|
167 |
-
"grad_norm": 0.
|
168 |
"learning_rate": 5.952166568776062e-05,
|
169 |
-
"loss": 0.
|
170 |
"step": 110
|
171 |
},
|
172 |
{
|
173 |
"epoch": 0.7,
|
174 |
-
"grad_norm": 0.
|
175 |
"learning_rate": 5.000000000000002e-05,
|
176 |
-
"loss": 0.
|
177 |
"step": 115
|
178 |
},
|
179 |
{
|
180 |
"epoch": 0.73,
|
181 |
-
"grad_norm": 0.
|
182 |
"learning_rate": 4.1048711048834033e-05,
|
183 |
-
"loss": 0.
|
184 |
"step": 120
|
185 |
},
|
186 |
{
|
187 |
"epoch": 0.76,
|
188 |
-
"grad_norm": 0.
|
189 |
"learning_rate": 3.276991097386831e-05,
|
190 |
-
"loss": 0.
|
191 |
"step": 125
|
192 |
},
|
193 |
{
|
194 |
"epoch": 0.79,
|
195 |
-
"grad_norm": 0.
|
196 |
"learning_rate": 2.525804047449648e-05,
|
197 |
-
"loss": 0.
|
198 |
"step": 130
|
199 |
},
|
200 |
{
|
201 |
"epoch": 0.82,
|
202 |
-
"grad_norm": 0.
|
203 |
"learning_rate": 1.8598791474341514e-05,
|
204 |
-
"loss": 0.
|
205 |
"step": 135
|
206 |
},
|
207 |
{
|
208 |
"epoch": 0.85,
|
209 |
-
"grad_norm": 0.
|
210 |
"learning_rate": 1.286812958766106e-05,
|
211 |
-
"loss": 0.
|
212 |
"step": 140
|
213 |
},
|
214 |
{
|
215 |
"epoch": 0.88,
|
216 |
-
"grad_norm": 0.
|
217 |
"learning_rate": 8.131427538964164e-06,
|
218 |
-
"loss": 0.
|
219 |
"step": 145
|
220 |
},
|
221 |
{
|
222 |
"epoch": 0.91,
|
223 |
-
"grad_norm": 0.
|
224 |
"learning_rate": 4.442719421385922e-06,
|
225 |
-
"loss": 0.
|
226 |
"step": 150
|
227 |
},
|
228 |
{
|
229 |
"epoch": 0.95,
|
230 |
-
"grad_norm": 0.
|
231 |
"learning_rate": 1.8440843008934561e-06,
|
232 |
-
"loss": 0.
|
233 |
"step": 155
|
234 |
},
|
235 |
{
|
236 |
"epoch": 0.98,
|
237 |
-
"grad_norm": 0.
|
238 |
"learning_rate": 3.651661978793075e-07,
|
239 |
-
"loss": 0.
|
240 |
"step": 160
|
241 |
},
|
242 |
{
|
243 |
"epoch": 1.0,
|
244 |
-
"eval_loss":
|
245 |
-
"eval_runtime":
|
246 |
-
"eval_samples_per_second":
|
247 |
-
"eval_steps_per_second": 0.
|
248 |
"step": 164
|
249 |
},
|
250 |
{
|
251 |
"epoch": 1.0,
|
252 |
"step": 164,
|
253 |
"total_flos": 2050416313368576.0,
|
254 |
-
"train_loss": 0.
|
255 |
-
"train_runtime":
|
256 |
-
"train_samples_per_second":
|
257 |
-
"train_steps_per_second": 0.
|
258 |
}
|
259 |
],
|
260 |
"logging_steps": 5,
|
|
|
157 |
},
|
158 |
{
|
159 |
"epoch": 0.64,
|
160 |
+
"grad_norm": 0.1135515643015182,
|
161 |
"learning_rate": 6.950508938007729e-05,
|
162 |
+
"loss": 1.0139,
|
163 |
"step": 105
|
164 |
},
|
165 |
{
|
166 |
"epoch": 0.67,
|
167 |
+
"grad_norm": 0.11332845930420761,
|
168 |
"learning_rate": 5.952166568776062e-05,
|
169 |
+
"loss": 0.9484,
|
170 |
"step": 110
|
171 |
},
|
172 |
{
|
173 |
"epoch": 0.7,
|
174 |
+
"grad_norm": 0.09969915846105798,
|
175 |
"learning_rate": 5.000000000000002e-05,
|
176 |
+
"loss": 0.9596,
|
177 |
"step": 115
|
178 |
},
|
179 |
{
|
180 |
"epoch": 0.73,
|
181 |
+
"grad_norm": 0.102905226481938,
|
182 |
"learning_rate": 4.1048711048834033e-05,
|
183 |
+
"loss": 0.9166,
|
184 |
"step": 120
|
185 |
},
|
186 |
{
|
187 |
"epoch": 0.76,
|
188 |
+
"grad_norm": 0.10339973049955652,
|
189 |
"learning_rate": 3.276991097386831e-05,
|
190 |
+
"loss": 0.917,
|
191 |
"step": 125
|
192 |
},
|
193 |
{
|
194 |
"epoch": 0.79,
|
195 |
+
"grad_norm": 0.09023053789634858,
|
196 |
"learning_rate": 2.525804047449648e-05,
|
197 |
+
"loss": 0.9277,
|
198 |
"step": 130
|
199 |
},
|
200 |
{
|
201 |
"epoch": 0.82,
|
202 |
+
"grad_norm": 0.09169398706127865,
|
203 |
"learning_rate": 1.8598791474341514e-05,
|
204 |
+
"loss": 0.9077,
|
205 |
"step": 135
|
206 |
},
|
207 |
{
|
208 |
"epoch": 0.85,
|
209 |
+
"grad_norm": 0.08727749348203608,
|
210 |
"learning_rate": 1.286812958766106e-05,
|
211 |
+
"loss": 0.9067,
|
212 |
"step": 140
|
213 |
},
|
214 |
{
|
215 |
"epoch": 0.88,
|
216 |
+
"grad_norm": 0.0954839431246452,
|
217 |
"learning_rate": 8.131427538964164e-06,
|
218 |
+
"loss": 0.91,
|
219 |
"step": 145
|
220 |
},
|
221 |
{
|
222 |
"epoch": 0.91,
|
223 |
+
"grad_norm": 0.09504819554849074,
|
224 |
"learning_rate": 4.442719421385922e-06,
|
225 |
+
"loss": 0.9206,
|
226 |
"step": 150
|
227 |
},
|
228 |
{
|
229 |
"epoch": 0.95,
|
230 |
+
"grad_norm": 0.08832758387266068,
|
231 |
"learning_rate": 1.8440843008934561e-06,
|
232 |
+
"loss": 0.9107,
|
233 |
"step": 155
|
234 |
},
|
235 |
{
|
236 |
"epoch": 0.98,
|
237 |
+
"grad_norm": 0.09005484999206638,
|
238 |
"learning_rate": 3.651661978793075e-07,
|
239 |
+
"loss": 0.9393,
|
240 |
"step": 160
|
241 |
},
|
242 |
{
|
243 |
"epoch": 1.0,
|
244 |
+
"eval_loss": 1.1427615880966187,
|
245 |
+
"eval_runtime": 200.7847,
|
246 |
+
"eval_samples_per_second": 11.505,
|
247 |
+
"eval_steps_per_second": 0.722,
|
248 |
"step": 164
|
249 |
},
|
250 |
{
|
251 |
"epoch": 1.0,
|
252 |
"step": 164,
|
253 |
"total_flos": 2050416313368576.0,
|
254 |
+
"train_loss": 0.3630331289477465,
|
255 |
+
"train_runtime": 1302.3826,
|
256 |
+
"train_samples_per_second": 16.102,
|
257 |
+
"train_steps_per_second": 0.126
|
258 |
}
|
259 |
],
|
260 |
"logging_steps": 5,
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6072
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75077a9f62ee8ff9e2c983d24b727ebc2eb41dca44306c7f95ac2306646d08a7
|
3 |
size 6072
|