jikaixuan commited on
Commit
4790ddb
1 Parent(s): d3cd89c

Model save

Browse files
README.md CHANGED
@@ -2,13 +2,9 @@
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
5
- - alignment-handbook
6
- - generated_from_trainer
7
  - trl
8
  - dpo
9
  - generated_from_trainer
10
- datasets:
11
- - HuggingFaceH4/ultrafeedback_binarized
12
  base_model: mistralai/Mistral-7B-v0.1
13
  model-index:
14
  - name: zephyr-7b-dpo-qlora
@@ -20,17 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
20
 
21
  # zephyr-7b-dpo-qlora
22
 
23
- This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-qlora](https://huggingface.co/alignment-handbook/zephyr-7b-sft-qlora) on the HuggingFaceH4/ultrafeedback_binarized dataset.
24
- It achieves the following results on the evaluation set:
25
- - Loss: 1721.1201
26
- - Rewards/chosen: -0.0627
27
- - Rewards/rejected: -0.2250
28
- - Rewards/accuracies: 0.7738
29
- - Rewards/margins: 0.1623
30
- - Logps/rejected: -267.2721
31
- - Logps/chosen: -271.2979
32
- - Logits/rejected: -2.0354
33
- - Logits/chosen: -2.0918
34
 
35
  ## Model description
36
 
@@ -65,12 +51,6 @@ The following hyperparameters were used during training:
65
 
66
  ### Training results
67
 
68
- | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
69
- |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
70
- | 1797.9404 | 0.21 | 100 | 1887.4103 | 0.0131 | -0.1197 | 0.7520 | 0.1328 | -256.7424 | -263.7133 | -2.1486 | -2.1969 |
71
- | 1700.9055 | 0.42 | 200 | 1784.6598 | -0.0464 | -0.2062 | 0.7619 | 0.1598 | -265.3905 | -269.6655 | -2.1081 | -2.1618 |
72
- | 1767.2219 | 0.63 | 300 | 1735.5183 | -0.0467 | -0.2001 | 0.7698 | 0.1534 | -264.7795 | -269.6956 | -2.1057 | -2.1587 |
73
- | 1717.4336 | 0.84 | 400 | 1721.6765 | -0.0691 | -0.2309 | 0.7718 | 0.1618 | -267.8569 | -271.9333 | -2.0322 | -2.0885 |
74
 
75
 
76
  ### Framework versions
 
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
 
 
5
  - trl
6
  - dpo
7
  - generated_from_trainer
 
 
8
  base_model: mistralai/Mistral-7B-v0.1
9
  model-index:
10
  - name: zephyr-7b-dpo-qlora
 
16
 
17
  # zephyr-7b-dpo-qlora
18
 
19
+ This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
 
 
 
 
 
 
 
 
 
 
20
 
21
  ## Model description
22
 
 
51
 
52
  ### Training results
53
 
 
 
 
 
 
 
54
 
55
 
56
  ### Framework versions
adapter_config.json CHANGED
@@ -20,12 +20,12 @@
20
  "revision": null,
21
  "target_modules": [
22
  "q_proj",
23
- "gate_proj",
24
- "up_proj",
25
  "k_proj",
26
- "down_proj",
27
  "o_proj",
28
- "v_proj"
 
 
29
  ],
30
  "task_type": "CAUSAL_LM"
31
  }
 
20
  "revision": null,
21
  "target_modules": [
22
  "q_proj",
23
+ "v_proj",
 
24
  "k_proj",
 
25
  "o_proj",
26
+ "up_proj",
27
+ "gate_proj",
28
+ "down_proj"
29
  ],
30
  "task_type": "CAUSAL_LM"
31
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58513bf1529e315eda3b88d4c9cacb2897ba3fd8a6c935b6b16975253aa6b856
3
  size 671150064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52b42f1363dc4a7b4a73efab14d317e8879e92c4e95ca6b669018483353b9ad3
3
  size 671150064
all_results.json CHANGED
@@ -1,21 +1,8 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_logits/chosen": -2.0918362140655518,
4
- "eval_logits/rejected": -2.03544020652771,
5
- "eval_logps/chosen": -271.2979431152344,
6
- "eval_logps/rejected": -267.2720642089844,
7
- "eval_loss": 1721.1201171875,
8
- "eval_rewards/accuracies": 0.773809552192688,
9
- "eval_rewards/chosen": -0.06273359060287476,
10
- "eval_rewards/margins": 0.16227789223194122,
11
- "eval_rewards/rejected": -0.22501146793365479,
12
- "eval_runtime": 548.8776,
13
- "eval_samples": 2000,
14
- "eval_samples_per_second": 3.644,
15
- "eval_steps_per_second": 0.115,
16
- "train_loss": 1826.8015694608227,
17
- "train_runtime": 32379.7062,
18
- "train_samples": 61135,
19
- "train_samples_per_second": 1.888,
20
- "train_steps_per_second": 0.015
21
  }
 
1
  {
2
+ "epoch": 0.82,
3
+ "train_loss": 0.6931473016738892,
4
+ "train_runtime": 124.4347,
5
+ "train_samples": 305,
6
+ "train_samples_per_second": 2.451,
7
+ "train_steps_per_second": 0.016
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 1.0,
3
- "train_loss": 1826.8015694608227,
4
- "train_runtime": 32379.7062,
5
- "train_samples": 61135,
6
- "train_samples_per_second": 1.888,
7
- "train_steps_per_second": 0.015
8
  }
 
1
  {
2
+ "epoch": 0.82,
3
+ "train_loss": 0.6931473016738892,
4
+ "train_runtime": 124.4347,
5
+ "train_samples": 305,
6
+ "train_samples_per_second": 2.451,
7
+ "train_steps_per_second": 0.016
8
  }
trainer_state.json CHANGED
@@ -1,761 +1,39 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.998691442030882,
5
  "eval_steps": 100,
6
- "global_step": 477,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.0,
13
- "learning_rate": 1.0416666666666667e-07,
14
- "logits/chosen": -2.856400966644287,
15
- "logits/rejected": -2.6539194583892822,
16
- "logps/chosen": -302.289794921875,
17
- "logps/rejected": -253.04373168945312,
18
- "loss": 2500.0,
19
  "rewards/accuracies": 0.0,
20
  "rewards/chosen": 0.0,
21
  "rewards/margins": 0.0,
22
  "rewards/rejected": 0.0,
23
  "step": 1
24
  },
25
- {
26
- "epoch": 0.02,
27
- "learning_rate": 1.0416666666666667e-06,
28
- "logits/chosen": -2.585383176803589,
29
- "logits/rejected": -2.6190898418426514,
30
- "logps/chosen": -265.6199035644531,
31
- "logps/rejected": -261.3590393066406,
32
- "loss": 2489.4685,
33
- "rewards/accuracies": 0.4548611044883728,
34
- "rewards/chosen": 0.006730278953909874,
35
- "rewards/margins": 0.0007296364055946469,
36
- "rewards/rejected": 0.006000642664730549,
37
- "step": 10
38
- },
39
- {
40
- "epoch": 0.04,
41
- "learning_rate": 2.0833333333333334e-06,
42
- "logits/chosen": -2.616151809692383,
43
- "logits/rejected": -2.599904775619507,
44
- "logps/chosen": -253.3858184814453,
45
- "logps/rejected": -245.82345581054688,
46
- "loss": 2411.3754,
47
- "rewards/accuracies": 0.6000000238418579,
48
- "rewards/chosen": 0.036651305854320526,
49
- "rewards/margins": 0.009106594137847424,
50
- "rewards/rejected": 0.02754470705986023,
51
- "step": 20
52
- },
53
- {
54
- "epoch": 0.06,
55
- "learning_rate": 3.125e-06,
56
- "logits/chosen": -2.617845058441162,
57
- "logits/rejected": -2.6118521690368652,
58
- "logps/chosen": -250.7469482421875,
59
- "logps/rejected": -223.05172729492188,
60
- "loss": 2306.1311,
61
- "rewards/accuracies": 0.671875,
62
- "rewards/chosen": 0.04942930489778519,
63
- "rewards/margins": 0.023983022198081017,
64
- "rewards/rejected": 0.02544628083705902,
65
- "step": 30
66
- },
67
- {
68
- "epoch": 0.08,
69
- "learning_rate": 4.166666666666667e-06,
70
- "logits/chosen": -2.6323208808898926,
71
- "logits/rejected": -2.608524799346924,
72
- "logps/chosen": -276.45947265625,
73
- "logps/rejected": -238.35391235351562,
74
- "loss": 2100.6182,
75
- "rewards/accuracies": 0.6968749761581421,
76
- "rewards/chosen": 0.05112973973155022,
77
- "rewards/margins": 0.05378426983952522,
78
- "rewards/rejected": -0.002654529409483075,
79
- "step": 40
80
- },
81
- {
82
- "epoch": 0.1,
83
- "learning_rate": 4.999731868769027e-06,
84
- "logits/chosen": -2.552873373031616,
85
- "logits/rejected": -2.5477213859558105,
86
- "logps/chosen": -253.2111358642578,
87
- "logps/rejected": -248.1074676513672,
88
- "loss": 2103.8223,
89
- "rewards/accuracies": 0.6781250238418579,
90
- "rewards/chosen": 0.022746428847312927,
91
- "rewards/margins": 0.07937721163034439,
92
- "rewards/rejected": -0.05663077160716057,
93
- "step": 50
94
- },
95
- {
96
- "epoch": 0.13,
97
- "learning_rate": 4.9903533134293035e-06,
98
- "logits/chosen": -2.556926727294922,
99
- "logits/rejected": -2.551504611968994,
100
- "logps/chosen": -261.6982116699219,
101
- "logps/rejected": -240.27059936523438,
102
- "loss": 2054.3434,
103
- "rewards/accuracies": 0.6781250238418579,
104
- "rewards/chosen": 0.023721303790807724,
105
- "rewards/margins": 0.08682042360305786,
106
- "rewards/rejected": -0.06309913098812103,
107
- "step": 60
108
- },
109
- {
110
- "epoch": 0.15,
111
- "learning_rate": 4.967625656594782e-06,
112
- "logits/chosen": -2.5740597248077393,
113
- "logits/rejected": -2.553145408630371,
114
- "logps/chosen": -278.0965270996094,
115
- "logps/rejected": -267.19586181640625,
116
- "loss": 1971.1375,
117
- "rewards/accuracies": 0.6875,
118
- "rewards/chosen": 0.015462947078049183,
119
- "rewards/margins": 0.08871600031852722,
120
- "rewards/rejected": -0.07325305044651031,
121
- "step": 70
122
- },
123
- {
124
- "epoch": 0.17,
125
- "learning_rate": 4.93167072587771e-06,
126
- "logits/chosen": -2.5298993587493896,
127
- "logits/rejected": -2.5009925365448,
128
- "logps/chosen": -258.5903015136719,
129
- "logps/rejected": -263.52850341796875,
130
- "loss": 1933.0076,
131
- "rewards/accuracies": 0.731249988079071,
132
- "rewards/chosen": 0.029317494481801987,
133
- "rewards/margins": 0.11680855602025986,
134
- "rewards/rejected": -0.08749105781316757,
135
- "step": 80
136
- },
137
- {
138
- "epoch": 0.19,
139
- "learning_rate": 4.882681251368549e-06,
140
- "logits/chosen": -2.5273003578186035,
141
- "logits/rejected": -2.493241548538208,
142
- "logps/chosen": -247.47506713867188,
143
- "logps/rejected": -260.76678466796875,
144
- "loss": 1845.5582,
145
- "rewards/accuracies": 0.7281249761581421,
146
- "rewards/chosen": -0.03806694597005844,
147
- "rewards/margins": 0.11302463710308075,
148
- "rewards/rejected": -0.15109160542488098,
149
- "step": 90
150
- },
151
- {
152
- "epoch": 0.21,
153
- "learning_rate": 4.8209198325401815e-06,
154
- "logits/chosen": -2.5287628173828125,
155
- "logits/rejected": -2.5358829498291016,
156
- "logps/chosen": -272.0884704589844,
157
- "logps/rejected": -275.2580871582031,
158
- "loss": 1797.9404,
159
- "rewards/accuracies": 0.762499988079071,
160
- "rewards/chosen": -0.0045492262579500675,
161
- "rewards/margins": 0.13472509384155273,
162
- "rewards/rejected": -0.13927432894706726,
163
- "step": 100
164
- },
165
- {
166
- "epoch": 0.21,
167
- "eval_logits/chosen": -2.196876287460327,
168
- "eval_logits/rejected": -2.1486356258392334,
169
- "eval_logps/chosen": -263.71331787109375,
170
- "eval_logps/rejected": -256.7424011230469,
171
- "eval_loss": 1887.4102783203125,
172
- "eval_rewards/accuracies": 0.7519841194152832,
173
- "eval_rewards/chosen": 0.013112416490912437,
174
- "eval_rewards/margins": 0.13282696902751923,
175
- "eval_rewards/rejected": -0.11971456557512283,
176
- "eval_runtime": 549.9966,
177
- "eval_samples_per_second": 3.636,
178
- "eval_steps_per_second": 0.115,
179
- "step": 100
180
- },
181
- {
182
- "epoch": 0.23,
183
- "learning_rate": 4.746717530629565e-06,
184
- "logits/chosen": -2.480510711669922,
185
- "logits/rejected": -2.4668211936950684,
186
- "logps/chosen": -267.04180908203125,
187
- "logps/rejected": -262.5838317871094,
188
- "loss": 1870.9051,
189
- "rewards/accuracies": 0.734375,
190
- "rewards/chosen": -0.020576762035489082,
191
- "rewards/margins": 0.125274196267128,
192
- "rewards/rejected": -0.14585095643997192,
193
- "step": 110
194
- },
195
- {
196
- "epoch": 0.25,
197
- "learning_rate": 4.660472094042121e-06,
198
- "logits/chosen": -2.44077205657959,
199
- "logits/rejected": -2.4053845405578613,
200
- "logps/chosen": -256.12939453125,
201
- "logps/rejected": -248.28060913085938,
202
- "loss": 1855.318,
203
- "rewards/accuracies": 0.684374988079071,
204
- "rewards/chosen": -0.049732744693756104,
205
- "rewards/margins": 0.10872016102075577,
206
- "rewards/rejected": -0.15845291316509247,
207
- "step": 120
208
- },
209
- {
210
- "epoch": 0.27,
211
- "learning_rate": 4.5626458262912745e-06,
212
- "logits/chosen": -2.395805597305298,
213
- "logits/rejected": -2.383305311203003,
214
- "logps/chosen": -280.74053955078125,
215
- "logps/rejected": -270.37860107421875,
216
- "loss": 1811.4148,
217
- "rewards/accuracies": 0.7437499761581421,
218
- "rewards/chosen": -0.027533594518899918,
219
- "rewards/margins": 0.1342071145772934,
220
- "rewards/rejected": -0.1617407202720642,
221
- "step": 130
222
- },
223
- {
224
- "epoch": 0.29,
225
- "learning_rate": 4.453763107901676e-06,
226
- "logits/chosen": -2.4485344886779785,
227
- "logits/rejected": -2.43884015083313,
228
- "logps/chosen": -243.1454315185547,
229
- "logps/rejected": -255.15432739257812,
230
- "loss": 1803.225,
231
- "rewards/accuracies": 0.734375,
232
- "rewards/chosen": -0.028790492564439774,
233
- "rewards/margins": 0.1489991694688797,
234
- "rewards/rejected": -0.17778967320919037,
235
- "step": 140
236
- },
237
- {
238
- "epoch": 0.31,
239
- "learning_rate": 4.33440758555951e-06,
240
- "logits/chosen": -2.459658622741699,
241
- "logits/rejected": -2.483065605163574,
242
- "logps/chosen": -267.7740478515625,
243
- "logps/rejected": -243.34609985351562,
244
- "loss": 1781.1752,
245
- "rewards/accuracies": 0.699999988079071,
246
- "rewards/chosen": -0.02288922667503357,
247
- "rewards/margins": 0.12706486880779266,
248
- "rewards/rejected": -0.14995409548282623,
249
- "step": 150
250
- },
251
- {
252
- "epoch": 0.33,
253
- "learning_rate": 4.205219043576955e-06,
254
- "logits/chosen": -2.483583688735962,
255
- "logits/rejected": -2.4244942665100098,
256
- "logps/chosen": -260.3743896484375,
257
- "logps/rejected": -258.7478332519531,
258
- "loss": 1754.5766,
259
- "rewards/accuracies": 0.784375011920929,
260
- "rewards/chosen": -0.0020265295170247555,
261
- "rewards/margins": 0.16760031878948212,
262
- "rewards/rejected": -0.169626846909523,
263
- "step": 160
264
- },
265
- {
266
- "epoch": 0.36,
267
- "learning_rate": 4.066889974440757e-06,
268
- "logits/chosen": -2.4374189376831055,
269
- "logits/rejected": -2.428433656692505,
270
- "logps/chosen": -264.5699768066406,
271
- "logps/rejected": -252.79421997070312,
272
- "loss": 1953.8818,
273
- "rewards/accuracies": 0.668749988079071,
274
- "rewards/chosen": -0.06210694834589958,
275
- "rewards/margins": 0.13285748660564423,
276
- "rewards/rejected": -0.1949644386768341,
277
- "step": 170
278
- },
279
- {
280
- "epoch": 0.38,
281
- "learning_rate": 3.92016186682789e-06,
282
- "logits/chosen": -2.467085361480713,
283
- "logits/rejected": -2.487204074859619,
284
- "logps/chosen": -262.995361328125,
285
- "logps/rejected": -271.94183349609375,
286
- "loss": 1848.9945,
287
- "rewards/accuracies": 0.721875011920929,
288
- "rewards/chosen": -0.07509048283100128,
289
- "rewards/margins": 0.12671387195587158,
290
- "rewards/rejected": -0.20180435478687286,
291
- "step": 180
292
- },
293
- {
294
- "epoch": 0.4,
295
- "learning_rate": 3.7658212309857576e-06,
296
- "logits/chosen": -2.450601816177368,
297
- "logits/rejected": -2.4304168224334717,
298
- "logps/chosen": -269.1886901855469,
299
- "logps/rejected": -265.7490539550781,
300
- "loss": 1698.6666,
301
- "rewards/accuracies": 0.746874988079071,
302
- "rewards/chosen": -0.09053254127502441,
303
- "rewards/margins": 0.14999321103096008,
304
- "rewards/rejected": -0.2405257225036621,
305
- "step": 190
306
- },
307
- {
308
- "epoch": 0.42,
309
- "learning_rate": 3.604695382782159e-06,
310
- "logits/chosen": -2.447007179260254,
311
- "logits/rejected": -2.419039726257324,
312
- "logps/chosen": -282.8253479003906,
313
- "logps/rejected": -278.14508056640625,
314
- "loss": 1700.9055,
315
- "rewards/accuracies": 0.784375011920929,
316
- "rewards/chosen": -0.07701022177934647,
317
- "rewards/margins": 0.16163742542266846,
318
- "rewards/rejected": -0.23864765465259552,
319
- "step": 200
320
- },
321
- {
322
- "epoch": 0.42,
323
- "eval_logits/chosen": -2.161839485168457,
324
- "eval_logits/rejected": -2.1081268787384033,
325
- "eval_logps/chosen": -269.66546630859375,
326
- "eval_logps/rejected": -265.3905029296875,
327
- "eval_loss": 1784.6597900390625,
328
- "eval_rewards/accuracies": 0.761904776096344,
329
- "eval_rewards/chosen": -0.0464087538421154,
330
- "eval_rewards/margins": 0.15978708863258362,
331
- "eval_rewards/rejected": -0.2061958611011505,
332
- "eval_runtime": 549.0189,
333
- "eval_samples_per_second": 3.643,
334
- "eval_steps_per_second": 0.115,
335
- "step": 200
336
- },
337
- {
338
- "epoch": 0.44,
339
- "learning_rate": 3.437648009023905e-06,
340
- "logits/chosen": -2.458688259124756,
341
- "logits/rejected": -2.4217796325683594,
342
- "logps/chosen": -252.5647430419922,
343
- "logps/rejected": -248.326416015625,
344
- "loss": 1806.0594,
345
- "rewards/accuracies": 0.7562500238418579,
346
- "rewards/chosen": -0.02530970238149166,
347
- "rewards/margins": 0.14908090233802795,
348
- "rewards/rejected": -0.17439061403274536,
349
- "step": 210
350
- },
351
- {
352
- "epoch": 0.46,
353
- "learning_rate": 3.265574537815398e-06,
354
- "logits/chosen": -2.4742610454559326,
355
- "logits/rejected": -2.4789376258850098,
356
- "logps/chosen": -286.0444030761719,
357
- "logps/rejected": -261.9767150878906,
358
- "loss": 1855.6273,
359
- "rewards/accuracies": 0.706250011920929,
360
- "rewards/chosen": -0.03405206650495529,
361
- "rewards/margins": 0.11277566105127335,
362
- "rewards/rejected": -0.14682772755622864,
363
- "step": 220
364
- },
365
- {
366
- "epoch": 0.48,
367
- "learning_rate": 3.089397338773569e-06,
368
- "logits/chosen": -2.38773775100708,
369
- "logits/rejected": -2.3718185424804688,
370
- "logps/chosen": -257.7181701660156,
371
- "logps/rejected": -253.3428955078125,
372
- "loss": 1797.9486,
373
- "rewards/accuracies": 0.721875011920929,
374
- "rewards/chosen": -0.06530335545539856,
375
- "rewards/margins": 0.1308148354291916,
376
- "rewards/rejected": -0.19611820578575134,
377
- "step": 230
378
- },
379
- {
380
- "epoch": 0.5,
381
- "learning_rate": 2.9100607788275547e-06,
382
- "logits/chosen": -2.4125852584838867,
383
- "logits/rejected": -2.414628267288208,
384
- "logps/chosen": -265.2156066894531,
385
- "logps/rejected": -257.0289001464844,
386
- "loss": 1850.0729,
387
- "rewards/accuracies": 0.690625011920929,
388
- "rewards/chosen": -0.04248107224702835,
389
- "rewards/margins": 0.12761279940605164,
390
- "rewards/rejected": -0.17009387910366058,
391
- "step": 240
392
- },
393
- {
394
- "epoch": 0.52,
395
- "learning_rate": 2.72852616010567e-06,
396
- "logits/chosen": -2.4339253902435303,
397
- "logits/rejected": -2.4054951667785645,
398
- "logps/chosen": -271.8371276855469,
399
- "logps/rejected": -255.33438110351562,
400
- "loss": 1766.1885,
401
- "rewards/accuracies": 0.753125011920929,
402
- "rewards/chosen": -0.0324532687664032,
403
- "rewards/margins": 0.1579422652721405,
404
- "rewards/rejected": -0.1903955340385437,
405
- "step": 250
406
- },
407
- {
408
- "epoch": 0.54,
409
- "learning_rate": 2.5457665670441937e-06,
410
- "logits/chosen": -2.4216437339782715,
411
- "logits/rejected": -2.4156367778778076,
412
- "logps/chosen": -266.7996520996094,
413
- "logps/rejected": -243.180419921875,
414
- "loss": 1710.8809,
415
- "rewards/accuracies": 0.753125011920929,
416
- "rewards/chosen": -0.035904210060834885,
417
- "rewards/margins": 0.16971439123153687,
418
- "rewards/rejected": -0.20561861991882324,
419
- "step": 260
420
- },
421
- {
422
- "epoch": 0.57,
423
- "learning_rate": 2.3627616503391813e-06,
424
- "logits/chosen": -2.4438915252685547,
425
- "logits/rejected": -2.416748285293579,
426
- "logps/chosen": -290.58453369140625,
427
- "logps/rejected": -277.0739440917969,
428
- "loss": 1714.5062,
429
- "rewards/accuracies": 0.765625,
430
- "rewards/chosen": -0.042676471173763275,
431
- "rewards/margins": 0.17854078114032745,
432
- "rewards/rejected": -0.22121724486351013,
433
- "step": 270
434
- },
435
- {
436
- "epoch": 0.59,
437
- "learning_rate": 2.1804923757009885e-06,
438
- "logits/chosen": -2.414602756500244,
439
- "logits/rejected": -2.4200820922851562,
440
- "logps/chosen": -282.95147705078125,
441
- "logps/rejected": -261.1886291503906,
442
- "loss": 1764.4607,
443
- "rewards/accuracies": 0.7437499761581421,
444
- "rewards/chosen": -0.07589195668697357,
445
- "rewards/margins": 0.13762618601322174,
446
- "rewards/rejected": -0.2135181427001953,
447
- "step": 280
448
- },
449
- {
450
- "epoch": 0.61,
451
- "learning_rate": 1.9999357655598894e-06,
452
- "logits/chosen": -2.430169105529785,
453
- "logits/rejected": -2.4057881832122803,
454
- "logps/chosen": -265.06805419921875,
455
- "logps/rejected": -263.2739562988281,
456
- "loss": 1786.2846,
457
- "rewards/accuracies": 0.746874988079071,
458
- "rewards/chosen": -0.010251840576529503,
459
- "rewards/margins": 0.15472975373268127,
460
- "rewards/rejected": -0.16498157382011414,
461
- "step": 290
462
- },
463
- {
464
- "epoch": 0.63,
465
- "learning_rate": 1.8220596619089576e-06,
466
- "logits/chosen": -2.392138957977295,
467
- "logits/rejected": -2.3823294639587402,
468
- "logps/chosen": -255.75393676757812,
469
- "logps/rejected": -261.84271240234375,
470
- "loss": 1767.2219,
471
- "rewards/accuracies": 0.737500011920929,
472
- "rewards/chosen": -0.04702477902173996,
473
- "rewards/margins": 0.13455010950565338,
474
- "rewards/rejected": -0.18157489597797394,
475
- "step": 300
476
- },
477
- {
478
- "epoch": 0.63,
479
- "eval_logits/chosen": -2.158698797225952,
480
- "eval_logits/rejected": -2.1057095527648926,
481
- "eval_logps/chosen": -269.6955871582031,
482
- "eval_logps/rejected": -264.77947998046875,
483
- "eval_loss": 1735.518310546875,
484
- "eval_rewards/accuracies": 0.7698412537574768,
485
- "eval_rewards/chosen": -0.04671022295951843,
486
- "eval_rewards/margins": 0.15337513387203217,
487
- "eval_rewards/rejected": -0.2000853717327118,
488
- "eval_runtime": 548.7136,
489
- "eval_samples_per_second": 3.645,
490
- "eval_steps_per_second": 0.115,
491
- "step": 300
492
- },
493
- {
494
- "epoch": 0.65,
495
- "learning_rate": 1.647817538357072e-06,
496
- "logits/chosen": -2.4140188694000244,
497
- "logits/rejected": -2.4031002521514893,
498
- "logps/chosen": -274.3767395019531,
499
- "logps/rejected": -259.40155029296875,
500
- "loss": 1673.9693,
501
- "rewards/accuracies": 0.746874988079071,
502
- "rewards/chosen": -0.04385297745466232,
503
- "rewards/margins": 0.1517268717288971,
504
- "rewards/rejected": -0.19557985663414001,
505
- "step": 310
506
- },
507
- {
508
- "epoch": 0.67,
509
- "learning_rate": 1.4781433892011132e-06,
510
- "logits/chosen": -2.416640520095825,
511
- "logits/rejected": -2.370535135269165,
512
- "logps/chosen": -252.216064453125,
513
- "logps/rejected": -255.1393280029297,
514
- "loss": 1673.8594,
515
- "rewards/accuracies": 0.7593749761581421,
516
- "rewards/chosen": -0.043931327760219574,
517
- "rewards/margins": 0.16487570106983185,
518
- "rewards/rejected": -0.20880703628063202,
519
- "step": 320
520
- },
521
- {
522
- "epoch": 0.69,
523
- "learning_rate": 1.3139467229135999e-06,
524
- "logits/chosen": -2.362358570098877,
525
- "logits/rejected": -2.3449196815490723,
526
- "logps/chosen": -270.876220703125,
527
- "logps/rejected": -259.251953125,
528
- "loss": 1731.3877,
529
- "rewards/accuracies": 0.753125011920929,
530
- "rewards/chosen": -0.03340950980782509,
531
- "rewards/margins": 0.13931182026863098,
532
- "rewards/rejected": -0.17272132635116577,
533
- "step": 330
534
- },
535
- {
536
- "epoch": 0.71,
537
- "learning_rate": 1.1561076868822756e-06,
538
- "logits/chosen": -2.3923397064208984,
539
- "logits/rejected": -2.384582281112671,
540
- "logps/chosen": -284.8360290527344,
541
- "logps/rejected": -257.0713806152344,
542
- "loss": 1778.2957,
543
- "rewards/accuracies": 0.7718750238418579,
544
- "rewards/chosen": -0.039347052574157715,
545
- "rewards/margins": 0.1650826632976532,
546
- "rewards/rejected": -0.20442970097064972,
547
- "step": 340
548
- },
549
- {
550
- "epoch": 0.73,
551
- "learning_rate": 1.0054723495346484e-06,
552
- "logits/chosen": -2.3869528770446777,
553
- "logits/rejected": -2.3370375633239746,
554
- "logps/chosen": -259.75189208984375,
555
- "logps/rejected": -231.13577270507812,
556
- "loss": 1665.3461,
557
- "rewards/accuracies": 0.7437499761581421,
558
- "rewards/chosen": -0.03856384754180908,
559
- "rewards/margins": 0.1649591028690338,
560
- "rewards/rejected": -0.2035229504108429,
561
- "step": 350
562
- },
563
- {
564
- "epoch": 0.75,
565
- "learning_rate": 8.628481651367876e-07,
566
- "logits/chosen": -2.4105262756347656,
567
- "logits/rejected": -2.352128744125366,
568
- "logps/chosen": -269.03790283203125,
569
- "logps/rejected": -247.90872192382812,
570
- "loss": 1665.3982,
571
- "rewards/accuracies": 0.7875000238418579,
572
- "rewards/chosen": -0.03317265957593918,
573
- "rewards/margins": 0.1719072014093399,
574
- "rewards/rejected": -0.20507986843585968,
575
- "step": 360
576
- },
577
- {
578
- "epoch": 0.77,
579
- "learning_rate": 7.289996455765749e-07,
580
- "logits/chosen": -2.4054064750671387,
581
- "logits/rejected": -2.38871431350708,
582
- "logps/chosen": -279.2740173339844,
583
- "logps/rejected": -259.63690185546875,
584
- "loss": 1704.7645,
585
- "rewards/accuracies": 0.7124999761581421,
586
- "rewards/chosen": -0.07123039662837982,
587
- "rewards/margins": 0.15871620178222656,
588
- "rewards/rejected": -0.22994661331176758,
589
- "step": 370
590
- },
591
- {
592
- "epoch": 0.8,
593
- "learning_rate": 6.046442623320145e-07,
594
- "logits/chosen": -2.3605639934539795,
595
- "logits/rejected": -2.368460178375244,
596
- "logps/chosen": -267.2261657714844,
597
- "logps/rejected": -260.45550537109375,
598
- "loss": 1647.7326,
599
- "rewards/accuracies": 0.734375,
600
- "rewards/chosen": -0.0854114517569542,
601
- "rewards/margins": 0.15202030539512634,
602
- "rewards/rejected": -0.23743176460266113,
603
- "step": 380
604
- },
605
  {
606
  "epoch": 0.82,
607
- "learning_rate": 4.904486005914027e-07,
608
- "logits/chosen": -2.4076011180877686,
609
- "logits/rejected": -2.3770554065704346,
610
- "logps/chosen": -292.8500061035156,
611
- "logps/rejected": -292.0636291503906,
612
- "loss": 1739.5414,
613
- "rewards/accuracies": 0.746874988079071,
614
- "rewards/chosen": -0.06398223340511322,
615
- "rewards/margins": 0.15206970274448395,
616
- "rewards/rejected": -0.21605193614959717,
617
- "step": 390
618
- },
619
- {
620
- "epoch": 0.84,
621
- "learning_rate": 3.8702478614051353e-07,
622
- "logits/chosen": -2.3384757041931152,
623
- "logits/rejected": -2.3366100788116455,
624
- "logps/chosen": -259.2252502441406,
625
- "logps/rejected": -265.5692138671875,
626
- "loss": 1717.4336,
627
- "rewards/accuracies": 0.7124999761581421,
628
- "rewards/chosen": -0.09164019674062729,
629
- "rewards/margins": 0.14173154532909393,
630
- "rewards/rejected": -0.23337173461914062,
631
- "step": 400
632
- },
633
- {
634
- "epoch": 0.84,
635
- "eval_logits/chosen": -2.088451385498047,
636
- "eval_logits/rejected": -2.032222032546997,
637
- "eval_logps/chosen": -271.9333190917969,
638
- "eval_logps/rejected": -267.85687255859375,
639
- "eval_loss": 1721.676513671875,
640
- "eval_rewards/accuracies": 0.77182537317276,
641
- "eval_rewards/chosen": -0.06908722221851349,
642
- "eval_rewards/margins": 0.1617719829082489,
643
- "eval_rewards/rejected": -0.23085922002792358,
644
- "eval_runtime": 548.423,
645
- "eval_samples_per_second": 3.647,
646
- "eval_steps_per_second": 0.115,
647
- "step": 400
648
- },
649
- {
650
- "epoch": 0.86,
651
- "learning_rate": 2.9492720416985004e-07,
652
- "logits/chosen": -2.3725836277008057,
653
- "logits/rejected": -2.3304688930511475,
654
- "logps/chosen": -296.72991943359375,
655
- "logps/rejected": -266.0842590332031,
656
- "loss": 1755.1898,
657
- "rewards/accuracies": 0.7406250238418579,
658
- "rewards/chosen": -0.0669253021478653,
659
- "rewards/margins": 0.16722533106803894,
660
- "rewards/rejected": -0.23415064811706543,
661
- "step": 410
662
- },
663
- {
664
- "epoch": 0.88,
665
- "learning_rate": 2.1464952759020857e-07,
666
- "logits/chosen": -2.3592472076416016,
667
- "logits/rejected": -2.331540107727051,
668
- "logps/chosen": -266.99005126953125,
669
- "logps/rejected": -292.03680419921875,
670
- "loss": 1730.9672,
671
- "rewards/accuracies": 0.706250011920929,
672
- "rewards/chosen": -0.08814045041799545,
673
- "rewards/margins": 0.12336041778326035,
674
- "rewards/rejected": -0.2115008533000946,
675
- "step": 420
676
- },
677
- {
678
- "epoch": 0.9,
679
- "learning_rate": 1.4662207078575685e-07,
680
- "logits/chosen": -2.366381883621216,
681
- "logits/rejected": -2.35951566696167,
682
- "logps/chosen": -280.3116149902344,
683
- "logps/rejected": -281.93939208984375,
684
- "loss": 1760.3617,
685
- "rewards/accuracies": 0.7281249761581421,
686
- "rewards/chosen": -0.08124328404664993,
687
- "rewards/margins": 0.14409320056438446,
688
- "rewards/rejected": -0.2253364771604538,
689
- "step": 430
690
- },
691
- {
692
- "epoch": 0.92,
693
- "learning_rate": 9.120948298936422e-08,
694
- "logits/chosen": -2.3266444206237793,
695
- "logits/rejected": -2.2898497581481934,
696
- "logps/chosen": -243.9964141845703,
697
- "logps/rejected": -248.1795196533203,
698
- "loss": 1711.7143,
699
- "rewards/accuracies": 0.75,
700
- "rewards/chosen": -0.08177933841943741,
701
- "rewards/margins": 0.15183614194393158,
702
- "rewards/rejected": -0.23361548781394958,
703
- "step": 440
704
- },
705
- {
706
- "epoch": 0.94,
707
- "learning_rate": 4.870879364444109e-08,
708
- "logits/chosen": -2.3814821243286133,
709
- "logits/rejected": -2.4406635761260986,
710
- "logps/chosen": -275.4070129394531,
711
- "logps/rejected": -278.91082763671875,
712
- "loss": 1743.9877,
713
- "rewards/accuracies": 0.7406250238418579,
714
- "rewards/chosen": -0.06510698050260544,
715
- "rewards/margins": 0.14476314187049866,
716
- "rewards/rejected": -0.2098701000213623,
717
- "step": 450
718
- },
719
- {
720
- "epoch": 0.96,
721
- "learning_rate": 1.93478202307823e-08,
722
- "logits/chosen": -2.34289288520813,
723
- "logits/rejected": -2.346625804901123,
724
- "logps/chosen": -270.3787841796875,
725
- "logps/rejected": -275.61651611328125,
726
- "loss": 1676.5176,
727
- "rewards/accuracies": 0.737500011920929,
728
- "rewards/chosen": -0.07770398259162903,
729
- "rewards/margins": 0.16223737597465515,
730
- "rewards/rejected": -0.23994135856628418,
731
- "step": 460
732
- },
733
- {
734
- "epoch": 0.98,
735
- "learning_rate": 3.283947088983663e-09,
736
- "logits/chosen": -2.3888649940490723,
737
- "logits/rejected": -2.4120144844055176,
738
- "logps/chosen": -261.28997802734375,
739
- "logps/rejected": -261.67755126953125,
740
- "loss": 1663.4154,
741
- "rewards/accuracies": 0.768750011920929,
742
- "rewards/chosen": -0.0687546655535698,
743
- "rewards/margins": 0.16177912056446075,
744
- "rewards/rejected": -0.23053380846977234,
745
- "step": 470
746
- },
747
- {
748
- "epoch": 1.0,
749
- "step": 477,
750
  "total_flos": 0.0,
751
- "train_loss": 1826.8015694608227,
752
- "train_runtime": 32379.7062,
753
- "train_samples_per_second": 1.888,
754
- "train_steps_per_second": 0.015
755
  }
756
  ],
757
  "logging_steps": 10,
758
- "max_steps": 477,
759
  "num_input_tokens_seen": 0,
760
  "num_train_epochs": 1,
761
  "save_steps": 100,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8205128205128205,
5
  "eval_steps": 100,
6
+ "global_step": 2,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.41,
13
+ "learning_rate": 5e-06,
14
+ "logits/chosen": -2.6451878547668457,
15
+ "logits/rejected": -2.6389708518981934,
16
+ "logps/chosen": -317.35162353515625,
17
+ "logps/rejected": -283.9225769042969,
18
+ "loss": 0.6931,
19
  "rewards/accuracies": 0.0,
20
  "rewards/chosen": 0.0,
21
  "rewards/margins": 0.0,
22
  "rewards/rejected": 0.0,
23
  "step": 1
24
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  {
26
  "epoch": 0.82,
27
+ "step": 2,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  "total_flos": 0.0,
29
+ "train_loss": 0.6931473016738892,
30
+ "train_runtime": 124.4347,
31
+ "train_samples_per_second": 2.451,
32
+ "train_steps_per_second": 0.016
33
  }
34
  ],
35
  "logging_steps": 10,
36
+ "max_steps": 2,
37
  "num_input_tokens_seen": 0,
38
  "num_train_epochs": 1,
39
  "save_steps": 100,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9de8831bf203a26c117251200a242a486dd5bc4f1aae373c17a996f39be3288
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b771063cb1469c5cb63a9d414b651fffe150d786ee263a7c6072b0561a3775f4
3
  size 4920