Model save

Files changed (8) hide show

README.md CHANGED Viewed

@@ -2,13 +2,11 @@
 license: other
 library_name: peft
 tags:
-- alignment-handbook
-- generated_from_trainer
 - trl
 - sft
 - generated_from_trainer
 datasets:
-- zhihu
 base_model: 01-ai/Yi-6B
 model-index:
 - name: Yi-6B-zhihu3
@@ -20,9 +18,9 @@ should probably proofread and complete it, then remove this comment. -->
 # Yi-6B-zhihu3
-This model is a fine-tuned version of [01-ai/Yi-6B](https://huggingface.co/01-ai/Yi-6B) on the zhihu dataset.
 It achieves the following results on the evaluation set:
-- Loss: 2.3217
 ## Model description
@@ -52,9 +50,6 @@ The following hyperparameters were used during training:
 ### Training results
-| Training Loss | Epoch | Step | Validation Loss |
-|:-------------:|:-----:|:----:|:---------------:|
-| 2.303         | 1.0   | 820  | 2.3217          |
 ### Framework versions

 license: other
 library_name: peft
 tags:
 - trl
 - sft
 - generated_from_trainer
 datasets:
+- generator
 base_model: 01-ai/Yi-6B
 model-index:
 - name: Yi-6B-zhihu3
 # Yi-6B-zhihu3
+This model is a fine-tuned version of [01-ai/Yi-6B](https://huggingface.co/01-ai/Yi-6B) on the generator dataset.
 It achieves the following results on the evaluation set:
+- Loss: 2.5565
 ## Model description
 ### Training results
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -19,13 +19,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "k_proj",
-    "o_proj",
-    "v_proj",
     "q_proj",
     "gate_proj",
     "up_proj",
-    "down_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "down_proj",
     "q_proj",
+    "k_proj",
     "gate_proj",
     "up_proj",
+    "o_proj",
+    "v_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:90339a8cec1ad9e41932eb681ceaa230df1f38f31c97f2b2195877ed2b254d6d
 size 72673912

 version https://git-lfs.github.com/spec/v1
+oid sha256:df896d3219d78f6ae6755039d54dde33208844da975f18b6e96546f08cf24293
 size 72673912

all_results.json CHANGED Viewed

@@ -1,13 +1,13 @@
 {
-    "epoch": 1.0,
-    "eval_loss": 2.321652412414551,
-    "eval_runtime": 249.2875,
     "eval_samples": 2561,
-    "eval_samples_per_second": 3.289,
-    "eval_steps_per_second": 3.289,
-    "train_loss": 0.9362016701116794,
-    "train_runtime": 598.217,
     "train_samples": 2561,
-    "train_samples_per_second": 1.371,
-    "train_steps_per_second": 1.371
 }

 {
+    "epoch": 0.98,
+    "eval_loss": 2.556525945663452,
+    "eval_runtime": 237.4327,
     "eval_samples": 2561,
+    "eval_samples_per_second": 3.226,
+    "eval_steps_per_second": 3.226,
+    "train_loss": 0.0,
+    "train_runtime": 12.6248,
     "train_samples": 2561,
+    "train_samples_per_second": 60.674,
+    "train_steps_per_second": 60.674
 }

eval_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 1.0,
-    "eval_loss": 2.321652412414551,
-    "eval_runtime": 249.2875,
     "eval_samples": 2561,
-    "eval_samples_per_second": 3.289,
-    "eval_steps_per_second": 3.289
 }

 {
+    "epoch": 0.98,
+    "eval_loss": 2.556525945663452,
+    "eval_runtime": 237.4327,
     "eval_samples": 2561,
+    "eval_samples_per_second": 3.226,
+    "eval_steps_per_second": 3.226
 }

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 1.0,
-    "train_loss": 0.9362016701116794,
-    "train_runtime": 598.217,
     "train_samples": 2561,
-    "train_samples_per_second": 1.371,
-    "train_steps_per_second": 1.371
 }

 {
+    "epoch": 0.98,
+    "train_loss": 0.0,
+    "train_runtime": 12.6248,
     "train_samples": 2561,
+    "train_samples_per_second": 60.674,
+    "train_steps_per_second": 60.674
 }

trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.0,
   "eval_steps": 500,
-  "global_step": 820,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -976,52 +976,20 @@
     },
     {
       "epoch": 0.98,
-      "learning_rate": 2.037942741615617e-07,
-      "loss": 2.6099,
-      "step": 805
-    },
-    {
-      "epoch": 0.99,
-      "learning_rate": 9.059233262386225e-08,
-      "loss": 2.4554,
-      "step": 810
-    },
-    {
-      "epoch": 0.99,
-      "learning_rate": 2.2650648415334376e-08,
-      "loss": 2.5679,
-      "step": 815
-    },
-    {
-      "epoch": 1.0,
-      "learning_rate": 0.0,
-      "loss": 2.303,
-      "step": 820
-    },
-    {
-      "epoch": 1.0,
-      "eval_loss": 2.321652412414551,
-      "eval_runtime": 249.3077,
-      "eval_samples_per_second": 3.289,
-      "eval_steps_per_second": 3.289,
-      "step": 820
-    },
-    {
-      "epoch": 1.0,
-      "step": 820,
-      "total_flos": 5.879639335501824e+16,
-      "train_loss": 0.9362016701116794,
-      "train_runtime": 598.217,
-      "train_samples_per_second": 1.371,
-      "train_steps_per_second": 1.371
     }
   ],
   "logging_steps": 5,
-  "max_steps": 820,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 100,
-  "total_flos": 5.879639335501824e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.975609756097561,
   "eval_steps": 500,
+  "global_step": 800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
     },
     {
       "epoch": 0.98,
+      "step": 800,
+      "total_flos": 5.73623349805056e+16,
+      "train_loss": 0.0,
+      "train_runtime": 12.6248,
+      "train_samples_per_second": 60.674,
+      "train_steps_per_second": 60.674
     }
   ],
   "logging_steps": 5,
+  "max_steps": 766,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 100,
+  "total_flos": 5.73623349805056e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2213cc7763edce1856c62eaf84b45227c2e9a738cddaa16f4130417b0636fa0b
 size 4728

 version https://git-lfs.github.com/spec/v1
+oid sha256:a4409cfb726e8f752d47ceb3dab2ab0604266fff53118f9c8e0f4f1c34cb19fc
 size 4728