Minbyul commited on
Commit
9d5777d
1 Parent(s): d401ae0

Model save

Browse files
README.md CHANGED
@@ -2,15 +2,12 @@
2
  license: apache-2.0
3
  base_model: Minbyul/biomistral-7b-wo-kqa_golden-iter-sft-dpo-step1
4
  tags:
5
- - alignment-handbook
6
- - trl
7
- - sft
8
- - generated_from_trainer
9
  - trl
10
  - sft
 
11
  - generated_from_trainer
12
  datasets:
13
- - HuggingFaceH4/deita-10k-v0-sft
14
  model-index:
15
  - name: biomistral-7b-wo-kqa_golden-iter-sft-step2
16
  results: []
@@ -21,9 +18,9 @@ should probably proofread and complete it, then remove this comment. -->
21
 
22
  # biomistral-7b-wo-kqa_golden-iter-sft-step2
23
 
24
- This model is a fine-tuned version of [Minbyul/biomistral-7b-wo-kqa_golden-iter-sft-dpo-step1](https://huggingface.co/Minbyul/biomistral-7b-wo-kqa_golden-iter-sft-dpo-step1) on the HuggingFaceH4/deita-10k-v0-sft dataset.
25
  It achieves the following results on the evaluation set:
26
- - Loss: 1.9173
27
 
28
  ## Model description
29
 
@@ -60,9 +57,9 @@ The following hyperparameters were used during training:
60
 
61
  | Training Loss | Epoch | Step | Validation Loss |
62
  |:-------------:|:-----:|:----:|:---------------:|
63
- | 0.5168 | 1.0 | 16 | 1.5115 |
64
- | 0.219 | 2.0 | 32 | 1.7233 |
65
- | 0.0956 | 3.0 | 48 | 1.9173 |
66
 
67
 
68
  ### Framework versions
 
2
  license: apache-2.0
3
  base_model: Minbyul/biomistral-7b-wo-kqa_golden-iter-sft-dpo-step1
4
  tags:
 
 
 
 
5
  - trl
6
  - sft
7
+ - alignment-handbook
8
  - generated_from_trainer
9
  datasets:
10
+ - generator
11
  model-index:
12
  - name: biomistral-7b-wo-kqa_golden-iter-sft-step2
13
  results: []
 
18
 
19
  # biomistral-7b-wo-kqa_golden-iter-sft-step2
20
 
21
+ This model is a fine-tuned version of [Minbyul/biomistral-7b-wo-kqa_golden-iter-sft-dpo-step1](https://huggingface.co/Minbyul/biomistral-7b-wo-kqa_golden-iter-sft-dpo-step1) on the generator dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 1.7672
24
 
25
  ## Model description
26
 
 
57
 
58
  | Training Loss | Epoch | Step | Validation Loss |
59
  |:-------------:|:-----:|:----:|:---------------:|
60
+ | 0.4982 | 0.95 | 13 | 1.4957 |
61
+ | 0.2254 | 1.96 | 27 | 1.6817 |
62
+ | 0.1095 | 2.84 | 39 | 1.7672 |
63
 
64
 
65
  ### Framework versions
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 3.0,
3
  "eval_loss": 1.9173061847686768,
4
  "eval_runtime": 29.1672,
5
  "eval_samples": 4044,
6
  "eval_samples_per_second": 11.794,
7
  "eval_steps_per_second": 0.754,
8
- "train_loss": 0.2755420195559661,
9
- "train_runtime": 906.8406,
10
  "train_samples": 4747,
11
- "train_samples_per_second": 3.374,
12
- "train_steps_per_second": 0.053
13
  }
 
1
  {
2
+ "epoch": 2.84,
3
  "eval_loss": 1.9173061847686768,
4
  "eval_runtime": 29.1672,
5
  "eval_samples": 4044,
6
  "eval_samples_per_second": 11.794,
7
  "eval_steps_per_second": 0.754,
8
+ "train_loss": 0.2916483015586168,
9
+ "train_runtime": 753.2022,
10
  "train_samples": 4747,
11
+ "train_samples_per_second": 3.453,
12
+ "train_steps_per_second": 0.052
13
  }
config.json CHANGED
@@ -21,6 +21,6 @@
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "bfloat16",
23
  "transformers_version": "4.39.0.dev0",
24
- "use_cache": true,
25
  "vocab_size": 32000
26
  }
 
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "bfloat16",
23
  "transformers_version": "4.39.0.dev0",
24
+ "use_cache": false,
25
  "vocab_size": 32000
26
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54b1ff6791cec8ef481e26fd70ce3d118a6d8655a166ad8052a55c6f1a496d24
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:099b26e30badedeb7f7ee064e387ae0d7db0a9d9dbae28f4bdb2096402b47d09
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c09cc0e05da0ff140b5aea7aa8df73549cbec70e4f6c1e99df33a54224ac6a11
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0d63bad4dd01e66c8046d83ab95fefd817d914eaffa7c6ea895cd72fb0cd206
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e70d482242b904ffe123885aa6e8fc6322870466a7fd7cf4731bba68a0960abf
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d1ac10b23a48a2b7f57a4eaae007003211e9bad91bee2d764284714df6ce352
3
  size 4540516344
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.0,
3
- "train_loss": 0.2755420195559661,
4
- "train_runtime": 906.8406,
5
  "train_samples": 4747,
6
- "train_samples_per_second": 3.374,
7
- "train_steps_per_second": 0.053
8
  }
 
1
  {
2
+ "epoch": 2.84,
3
+ "train_loss": 0.2916483015586168,
4
+ "train_runtime": 753.2022,
5
  "train_samples": 4747,
6
+ "train_samples_per_second": 3.453,
7
+ "train_steps_per_second": 0.052
8
  }
trainer_state.json CHANGED
@@ -1,123 +1,109 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.0,
5
  "eval_steps": 500,
6
- "global_step": 48,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.06,
13
- "grad_norm": 4.589228707438539,
14
- "learning_rate": 4.000000000000001e-06,
15
- "loss": 0.4768,
16
  "step": 1
17
  },
18
  {
19
- "epoch": 0.31,
20
- "grad_norm": 4.436834276153056,
21
- "learning_rate": 2e-05,
22
- "loss": 0.4474,
23
  "step": 5
24
  },
25
  {
26
- "epoch": 0.62,
27
- "grad_norm": 3.3329122323772036,
28
- "learning_rate": 1.9340161087325483e-05,
29
- "loss": 0.5144,
30
  "step": 10
31
  },
32
  {
33
- "epoch": 0.94,
34
- "grad_norm": 10.026674486358512,
35
- "learning_rate": 1.744772182743782e-05,
36
- "loss": 0.5168,
37
- "step": 15
 
38
  },
39
  {
40
- "epoch": 1.0,
41
- "eval_loss": 1.5115139484405518,
42
- "eval_runtime": 29.4255,
43
- "eval_samples_per_second": 11.691,
44
- "eval_steps_per_second": 0.748,
45
- "step": 16
46
  },
47
  {
48
- "epoch": 1.25,
49
- "grad_norm": 2.2850083262335183,
50
- "learning_rate": 1.4572423233046386e-05,
51
- "loss": 0.3017,
52
  "step": 20
53
  },
54
  {
55
- "epoch": 1.56,
56
- "grad_norm": 2.097044805034422,
57
- "learning_rate": 1.1093712083778748e-05,
58
- "loss": 0.235,
59
  "step": 25
60
  },
61
  {
62
- "epoch": 1.88,
63
- "grad_norm": 5.851950306103885,
64
- "learning_rate": 7.470666176083193e-06,
65
- "loss": 0.219,
66
- "step": 30
 
67
  },
68
  {
69
- "epoch": 2.0,
70
- "eval_loss": 1.7232524156570435,
71
- "eval_runtime": 29.1725,
72
- "eval_samples_per_second": 11.792,
73
- "eval_steps_per_second": 0.754,
74
- "step": 32
75
  },
76
  {
77
- "epoch": 2.19,
78
- "grad_norm": 1.585838384340871,
79
- "learning_rate": 4.181410844420473e-06,
80
- "loss": 0.1536,
81
  "step": 35
82
  },
83
  {
84
- "epoch": 2.5,
85
- "grad_norm": 1.229574939486859,
86
- "learning_rate": 1.660021821101222e-06,
87
- "loss": 0.1024,
88
- "step": 40
89
- },
90
- {
91
- "epoch": 2.81,
92
- "grad_norm": 1.0616061576941005,
93
- "learning_rate": 2.392412244407294e-07,
94
- "loss": 0.0956,
95
- "step": 45
96
- },
97
- {
98
- "epoch": 3.0,
99
- "eval_loss": 1.9173061847686768,
100
- "eval_runtime": 29.2012,
101
- "eval_samples_per_second": 11.78,
102
- "eval_steps_per_second": 0.753,
103
- "step": 48
104
  },
105
  {
106
- "epoch": 3.0,
107
- "step": 48,
108
- "total_flos": 9997878558720.0,
109
- "train_loss": 0.2755420195559661,
110
- "train_runtime": 906.8406,
111
- "train_samples_per_second": 3.374,
112
- "train_steps_per_second": 0.053
113
  }
114
  ],
115
  "logging_steps": 5,
116
- "max_steps": 48,
117
  "num_input_tokens_seen": 0,
118
  "num_train_epochs": 3,
119
  "save_steps": 500,
120
- "total_flos": 9997878558720.0,
121
  "train_batch_size": 4,
122
  "trial_name": null,
123
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.8363636363636364,
5
  "eval_steps": 500,
6
+ "global_step": 39,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.07,
13
+ "grad_norm": 4.349675350321153,
14
+ "learning_rate": 5e-06,
15
+ "loss": 0.4569,
16
  "step": 1
17
  },
18
  {
19
+ "epoch": 0.36,
20
+ "grad_norm": 4.161390499285026,
21
+ "learning_rate": 1.9959742939952393e-05,
22
+ "loss": 0.4656,
23
  "step": 5
24
  },
25
  {
26
+ "epoch": 0.73,
27
+ "grad_norm": 2.4085887995111475,
28
+ "learning_rate": 1.8584487936018663e-05,
29
+ "loss": 0.4982,
30
  "step": 10
31
  },
32
  {
33
+ "epoch": 0.95,
34
+ "eval_loss": 1.4956705570220947,
35
+ "eval_runtime": 29.3808,
36
+ "eval_samples_per_second": 11.708,
37
+ "eval_steps_per_second": 0.749,
38
+ "step": 13
39
  },
40
  {
41
+ "epoch": 1.09,
42
+ "grad_norm": 2.520152396295157,
43
+ "learning_rate": 1.5508969814521026e-05,
44
+ "loss": 0.4421,
45
+ "step": 15
 
46
  },
47
  {
48
+ "epoch": 1.45,
49
+ "grad_norm": 2.2813305230772256,
50
+ "learning_rate": 1.1342332658176556e-05,
51
+ "loss": 0.2341,
52
  "step": 20
53
  },
54
  {
55
+ "epoch": 1.82,
56
+ "grad_norm": 3.7724059311334783,
57
+ "learning_rate": 6.909830056250527e-06,
58
+ "loss": 0.2254,
59
  "step": 25
60
  },
61
  {
62
+ "epoch": 1.96,
63
+ "eval_loss": 1.6817222833633423,
64
+ "eval_runtime": 29.2131,
65
+ "eval_samples_per_second": 11.776,
66
+ "eval_steps_per_second": 0.753,
67
+ "step": 27
68
  },
69
  {
70
+ "epoch": 2.18,
71
+ "grad_norm": 7.992205963709856,
72
+ "learning_rate": 3.089373510131354e-06,
73
+ "loss": 0.2145,
74
+ "step": 30
 
75
  },
76
  {
77
+ "epoch": 2.55,
78
+ "grad_norm": 3.619511209880637,
79
+ "learning_rate": 6.37651293602628e-07,
80
+ "loss": 0.1095,
81
  "step": 35
82
  },
83
  {
84
+ "epoch": 2.84,
85
+ "eval_loss": 1.767195224761963,
86
+ "eval_runtime": 29.1785,
87
+ "eval_samples_per_second": 11.789,
88
+ "eval_steps_per_second": 0.754,
89
+ "step": 39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  },
91
  {
92
+ "epoch": 2.84,
93
+ "step": 39,
94
+ "total_flos": 8113461657600.0,
95
+ "train_loss": 0.2916483015586168,
96
+ "train_runtime": 753.2022,
97
+ "train_samples_per_second": 3.453,
98
+ "train_steps_per_second": 0.052
99
  }
100
  ],
101
  "logging_steps": 5,
102
+ "max_steps": 39,
103
  "num_input_tokens_seen": 0,
104
  "num_train_epochs": 3,
105
  "save_steps": 500,
106
+ "total_flos": 8113461657600.0,
107
  "train_batch_size": 4,
108
  "trial_name": null,
109
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d569194e0749bd93479d279be77149b330f099e0e858ac87e41547912ed5ba11
3
  size 6200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2ebc5d2a83d9f6df24e05e71dd49ede03c3dbec15879a7ca5386f4ec03d7609
3
  size 6200