End of training

Files changed (5) hide show

README.md CHANGED Viewed

@@ -99,7 +99,7 @@ xformers_attention: null
 This model is a fine-tuned version of [peft-internal-testing/tiny-dummy-qwen2](https://huggingface.co/peft-internal-testing/tiny-dummy-qwen2) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 11.9339
 ## Model description
@@ -118,19 +118,25 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 0.001
-- train_batch_size: 4
-- eval_batch_size: 4
 - seed: 42
 - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
-- lr_scheduler_type: linear
-- training_steps: 1
 ### Training results
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
-| 11.9308       | 0.0003 | 1    | 11.9339         |
 ### Framework versions

 This model is a fine-tuned version of [peft-internal-testing/tiny-dummy-qwen2](https://huggingface.co/peft-internal-testing/tiny-dummy-qwen2) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 11.9313
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 0.0002
+- train_batch_size: 2
+- eval_batch_size: 2
 - seed: 42
+- gradient_accumulation_steps: 4
+- total_train_batch_size: 8
 - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 10
+- training_steps: 10
 ### Training results
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
+| 11.9315       | 0.0006 | 1    | 11.9313         |
+| 11.9319       | 0.0017 | 3    | 11.9313         |
+| 11.926        | 0.0034 | 6    | 11.9313         |
+| 11.9287       | 0.0050 | 9    | 11.9313         |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -10,23 +10,23 @@
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
-  "lora_alpha": 8,
-  "lora_dropout": 0.1,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
-  "r": 4,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "up_proj",
-    "q_proj",
-    "gate_proj",
     "k_proj",
     "v_proj",
-    "down_proj",
-    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
+  "r": 8,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "up_proj",
+    "o_proj",
+    "down_proj",
     "k_proj",
+    "q_proj",
     "v_proj",
+    "gate_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4252a6eddd4cd1e1abc00dbb5bf569e2a640a1c094cc52fd81274d0b5386ecce
-size 15746

 version https://git-lfs.github.com/spec/v1
+oid sha256:8f79c810a31b0870a3e2855d74c4f1fd79f05d0bf1f60497a289a76b067c4e53
+size 21378

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b6cf0b38fd50b5592fcdd90cde67f3953803b845427a6da838198ae49e8910ea
-size 9048

 version https://git-lfs.github.com/spec/v1
+oid sha256:cc36a8e37dd54f1611000c6fb5b8a2b1fac16a87eccb3c03b80d950e8d920cf0
+size 14696

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9728de9c04ba459f8a150ca989a490b015c26610cf896681f245b555b37edd32
 size 6776

 version https://git-lfs.github.com/spec/v1
+oid sha256:42859b767e48a28bd6aa4d7de82f9e080fc3f6e573f77325e209b5bb86fef1a3
 size 6776