yyx123 commited on
Commit
5129bb9
1 Parent(s): e92ac54

Model save

Browse files
README.md CHANGED
@@ -2,13 +2,11 @@
2
  license: other
3
  library_name: peft
4
  tags:
5
- - alignment-handbook
6
- - generated_from_trainer
7
  - trl
8
  - sft
9
  - generated_from_trainer
10
  datasets:
11
- - zhihu
12
  base_model: 01-ai/Yi-6B
13
  model-index:
14
  - name: Yi-6B-zhihu3
@@ -20,9 +18,9 @@ should probably proofread and complete it, then remove this comment. -->
20
 
21
  # Yi-6B-zhihu3
22
 
23
- This model is a fine-tuned version of [01-ai/Yi-6B](https://huggingface.co/01-ai/Yi-6B) on the zhihu dataset.
24
  It achieves the following results on the evaluation set:
25
- - Loss: 2.3217
26
 
27
  ## Model description
28
 
@@ -52,9 +50,6 @@ The following hyperparameters were used during training:
52
 
53
  ### Training results
54
 
55
- | Training Loss | Epoch | Step | Validation Loss |
56
- |:-------------:|:-----:|:----:|:---------------:|
57
- | 2.303 | 1.0 | 820 | 2.3217 |
58
 
59
 
60
  ### Framework versions
 
2
  license: other
3
  library_name: peft
4
  tags:
 
 
5
  - trl
6
  - sft
7
  - generated_from_trainer
8
  datasets:
9
+ - generator
10
  base_model: 01-ai/Yi-6B
11
  model-index:
12
  - name: Yi-6B-zhihu3
 
18
 
19
  # Yi-6B-zhihu3
20
 
21
+ This model is a fine-tuned version of [01-ai/Yi-6B](https://huggingface.co/01-ai/Yi-6B) on the generator dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 2.5565
24
 
25
  ## Model description
26
 
 
50
 
51
  ### Training results
52
 
 
 
 
53
 
54
 
55
  ### Framework versions
adapter_config.json CHANGED
@@ -19,13 +19,13 @@
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
- "k_proj",
23
- "o_proj",
24
- "v_proj",
25
  "q_proj",
 
26
  "gate_proj",
27
  "up_proj",
28
- "down_proj"
 
29
  ],
30
  "task_type": "CAUSAL_LM"
31
  }
 
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
+ "down_proj",
 
 
23
  "q_proj",
24
+ "k_proj",
25
  "gate_proj",
26
  "up_proj",
27
+ "o_proj",
28
+ "v_proj"
29
  ],
30
  "task_type": "CAUSAL_LM"
31
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90339a8cec1ad9e41932eb681ceaa230df1f38f31c97f2b2195877ed2b254d6d
3
  size 72673912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df896d3219d78f6ae6755039d54dde33208844da975f18b6e96546f08cf24293
3
  size 72673912
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_loss": 2.321652412414551,
4
- "eval_runtime": 249.2875,
5
  "eval_samples": 2561,
6
- "eval_samples_per_second": 3.289,
7
- "eval_steps_per_second": 3.289,
8
- "train_loss": 0.9362016701116794,
9
- "train_runtime": 598.217,
10
  "train_samples": 2561,
11
- "train_samples_per_second": 1.371,
12
- "train_steps_per_second": 1.371
13
  }
 
1
  {
2
+ "epoch": 0.98,
3
+ "eval_loss": 2.556525945663452,
4
+ "eval_runtime": 237.4327,
5
  "eval_samples": 2561,
6
+ "eval_samples_per_second": 3.226,
7
+ "eval_steps_per_second": 3.226,
8
+ "train_loss": 0.0,
9
+ "train_runtime": 12.6248,
10
  "train_samples": 2561,
11
+ "train_samples_per_second": 60.674,
12
+ "train_steps_per_second": 60.674
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_loss": 2.321652412414551,
4
- "eval_runtime": 249.2875,
5
  "eval_samples": 2561,
6
- "eval_samples_per_second": 3.289,
7
- "eval_steps_per_second": 3.289
8
  }
 
1
  {
2
+ "epoch": 0.98,
3
+ "eval_loss": 2.556525945663452,
4
+ "eval_runtime": 237.4327,
5
  "eval_samples": 2561,
6
+ "eval_samples_per_second": 3.226,
7
+ "eval_steps_per_second": 3.226
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 1.0,
3
- "train_loss": 0.9362016701116794,
4
- "train_runtime": 598.217,
5
  "train_samples": 2561,
6
- "train_samples_per_second": 1.371,
7
- "train_steps_per_second": 1.371
8
  }
 
1
  {
2
+ "epoch": 0.98,
3
+ "train_loss": 0.0,
4
+ "train_runtime": 12.6248,
5
  "train_samples": 2561,
6
+ "train_samples_per_second": 60.674,
7
+ "train_steps_per_second": 60.674
8
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 820,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -976,52 +976,20 @@
976
  },
977
  {
978
  "epoch": 0.98,
979
- "learning_rate": 2.037942741615617e-07,
980
- "loss": 2.6099,
981
- "step": 805
982
- },
983
- {
984
- "epoch": 0.99,
985
- "learning_rate": 9.059233262386225e-08,
986
- "loss": 2.4554,
987
- "step": 810
988
- },
989
- {
990
- "epoch": 0.99,
991
- "learning_rate": 2.2650648415334376e-08,
992
- "loss": 2.5679,
993
- "step": 815
994
- },
995
- {
996
- "epoch": 1.0,
997
- "learning_rate": 0.0,
998
- "loss": 2.303,
999
- "step": 820
1000
- },
1001
- {
1002
- "epoch": 1.0,
1003
- "eval_loss": 2.321652412414551,
1004
- "eval_runtime": 249.3077,
1005
- "eval_samples_per_second": 3.289,
1006
- "eval_steps_per_second": 3.289,
1007
- "step": 820
1008
- },
1009
- {
1010
- "epoch": 1.0,
1011
- "step": 820,
1012
- "total_flos": 5.879639335501824e+16,
1013
- "train_loss": 0.9362016701116794,
1014
- "train_runtime": 598.217,
1015
- "train_samples_per_second": 1.371,
1016
- "train_steps_per_second": 1.371
1017
  }
1018
  ],
1019
  "logging_steps": 5,
1020
- "max_steps": 820,
1021
  "num_input_tokens_seen": 0,
1022
  "num_train_epochs": 1,
1023
  "save_steps": 100,
1024
- "total_flos": 5.879639335501824e+16,
1025
  "train_batch_size": 1,
1026
  "trial_name": null,
1027
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.975609756097561,
5
  "eval_steps": 500,
6
+ "global_step": 800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
976
  },
977
  {
978
  "epoch": 0.98,
979
+ "step": 800,
980
+ "total_flos": 5.73623349805056e+16,
981
+ "train_loss": 0.0,
982
+ "train_runtime": 12.6248,
983
+ "train_samples_per_second": 60.674,
984
+ "train_steps_per_second": 60.674
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
985
  }
986
  ],
987
  "logging_steps": 5,
988
+ "max_steps": 766,
989
  "num_input_tokens_seen": 0,
990
  "num_train_epochs": 1,
991
  "save_steps": 100,
992
+ "total_flos": 5.73623349805056e+16,
993
  "train_batch_size": 1,
994
  "trial_name": null,
995
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2213cc7763edce1856c62eaf84b45227c2e9a738cddaa16f4130417b0636fa0b
3
  size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4409cfb726e8f752d47ceb3dab2ab0604266fff53118f9c8e0f4f1c34cb19fc
3
  size 4728