willtensora commited on
Commit
8e6fd13
·
verified ·
1 Parent(s): c51b321

End of training

Browse files
README.md CHANGED
@@ -99,7 +99,7 @@ xformers_attention: null
99
 
100
  This model is a fine-tuned version of [peft-internal-testing/tiny-dummy-qwen2](https://huggingface.co/peft-internal-testing/tiny-dummy-qwen2) on the None dataset.
101
  It achieves the following results on the evaluation set:
102
- - Loss: 11.9339
103
 
104
  ## Model description
105
 
@@ -118,19 +118,25 @@ More information needed
118
  ### Training hyperparameters
119
 
120
  The following hyperparameters were used during training:
121
- - learning_rate: 0.001
122
- - train_batch_size: 4
123
- - eval_batch_size: 4
124
  - seed: 42
 
 
125
  - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
126
- - lr_scheduler_type: linear
127
- - training_steps: 1
 
128
 
129
  ### Training results
130
 
131
  | Training Loss | Epoch | Step | Validation Loss |
132
  |:-------------:|:------:|:----:|:---------------:|
133
- | 11.9308 | 0.0003 | 1 | 11.9339 |
 
 
 
134
 
135
 
136
  ### Framework versions
 
99
 
100
  This model is a fine-tuned version of [peft-internal-testing/tiny-dummy-qwen2](https://huggingface.co/peft-internal-testing/tiny-dummy-qwen2) on the None dataset.
101
  It achieves the following results on the evaluation set:
102
+ - Loss: 11.9313
103
 
104
  ## Model description
105
 
 
118
  ### Training hyperparameters
119
 
120
  The following hyperparameters were used during training:
121
+ - learning_rate: 0.0002
122
+ - train_batch_size: 2
123
+ - eval_batch_size: 2
124
  - seed: 42
125
+ - gradient_accumulation_steps: 4
126
+ - total_train_batch_size: 8
127
  - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
128
+ - lr_scheduler_type: cosine
129
+ - lr_scheduler_warmup_steps: 10
130
+ - training_steps: 10
131
 
132
  ### Training results
133
 
134
  | Training Loss | Epoch | Step | Validation Loss |
135
  |:-------------:|:------:|:----:|:---------------:|
136
+ | 11.9315 | 0.0006 | 1 | 11.9313 |
137
+ | 11.9319 | 0.0017 | 3 | 11.9313 |
138
+ | 11.926 | 0.0034 | 6 | 11.9313 |
139
+ | 11.9287 | 0.0050 | 9 | 11.9313 |
140
 
141
 
142
  ### Framework versions
adapter_config.json CHANGED
@@ -10,23 +10,23 @@
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
- "lora_alpha": 8,
14
- "lora_dropout": 0.1,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
18
  "peft_type": "LORA",
19
- "r": 4,
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
  "up_proj",
24
- "q_proj",
25
- "gate_proj",
26
  "k_proj",
 
27
  "v_proj",
28
- "down_proj",
29
- "o_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.05,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
18
  "peft_type": "LORA",
19
+ "r": 8,
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
  "up_proj",
24
+ "o_proj",
25
+ "down_proj",
26
  "k_proj",
27
+ "q_proj",
28
  "v_proj",
29
+ "gate_proj"
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4252a6eddd4cd1e1abc00dbb5bf569e2a640a1c094cc52fd81274d0b5386ecce
3
- size 15746
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f79c810a31b0870a3e2855d74c4f1fd79f05d0bf1f60497a289a76b067c4e53
3
+ size 21378
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6cf0b38fd50b5592fcdd90cde67f3953803b845427a6da838198ae49e8910ea
3
- size 9048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc36a8e37dd54f1611000c6fb5b8a2b1fac16a87eccb3c03b80d950e8d920cf0
3
+ size 14696
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9728de9c04ba459f8a150ca989a490b015c26610cf896681f245b555b37edd32
3
  size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42859b767e48a28bd6aa4d7de82f9e080fc3f6e573f77325e209b5bb86fef1a3
3
  size 6776