abhiGOAT/DPO
Browse files
README.md
CHANGED
@@ -18,15 +18,15 @@ should probably proofread and complete it, then remove this comment. -->
|
|
18 |
|
19 |
This model is a fine-tuned version of [TheBloke/OpenHermes-2-Mistral-7B-GPTQ](https://huggingface.co/TheBloke/OpenHermes-2-Mistral-7B-GPTQ) on the None dataset.
|
20 |
It achieves the following results on the evaluation set:
|
21 |
-
- Loss: 0.
|
22 |
-
- Rewards/chosen:
|
23 |
-
- Rewards/rejected:
|
24 |
-
- Rewards/accuracies: 0.
|
25 |
-
- Rewards/margins: 0.
|
26 |
-
- Logps/rejected: -
|
27 |
-
- Logps/chosen: -
|
28 |
-
- Logits/rejected: -2.
|
29 |
-
- Logits/chosen: -2.
|
30 |
|
31 |
## Model description
|
32 |
|
@@ -59,31 +59,31 @@ The following hyperparameters were used during training:
|
|
59 |
|
60 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
61 |
|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
62 |
-
| 0.
|
63 |
-
| 0.
|
64 |
-
| 0.
|
65 |
-
| 0.
|
66 |
-
| 0.
|
67 |
-
| 0.
|
68 |
-
| 0.
|
69 |
-
| 0.
|
70 |
-
| 0.
|
71 |
-
| 0.
|
72 |
-
| 1.
|
73 |
-
| 0.
|
74 |
-
| 0.
|
75 |
-
| 0.
|
76 |
-
| 0.
|
77 |
-
| 0.
|
78 |
-
| 0.
|
79 |
-
| 0.
|
80 |
-
| 0.
|
81 |
-
| 0.
|
82 |
-
| 0.
|
83 |
-
| 0.
|
84 |
-
| 0.
|
85 |
-
|
|
86 |
-
| 0.
|
87 |
|
88 |
|
89 |
### Framework versions
|
|
|
18 |
|
19 |
This model is a fine-tuned version of [TheBloke/OpenHermes-2-Mistral-7B-GPTQ](https://huggingface.co/TheBloke/OpenHermes-2-Mistral-7B-GPTQ) on the None dataset.
|
20 |
It achieves the following results on the evaluation set:
|
21 |
+
- Loss: 0.6944
|
22 |
+
- Rewards/chosen: 0.2782
|
23 |
+
- Rewards/rejected: 0.0543
|
24 |
+
- Rewards/accuracies: 0.5385
|
25 |
+
- Rewards/margins: 0.2239
|
26 |
+
- Logps/rejected: -187.8588
|
27 |
+
- Logps/chosen: -166.3796
|
28 |
+
- Logits/rejected: -2.4215
|
29 |
+
- Logits/chosen: -2.4790
|
30 |
|
31 |
## Model description
|
32 |
|
|
|
59 |
|
60 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
61 |
|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
62 |
+
| 0.7027 | 0.0 | 10 | 0.6989 | 0.0816 | 0.0881 | 0.5577 | -0.0065 | -187.5204 | -168.3459 | -2.4271 | -2.4774 |
|
63 |
+
| 0.6833 | 0.0 | 20 | 0.7017 | -0.0375 | -0.0327 | 0.5288 | -0.0048 | -188.7280 | -169.5362 | -2.4376 | -2.4828 |
|
64 |
+
| 0.867 | 0.0 | 30 | 0.7193 | -0.3147 | -0.3086 | 0.5385 | -0.0061 | -191.4871 | -172.3083 | -2.4532 | -2.4942 |
|
65 |
+
| 0.8962 | 0.0 | 40 | 0.7068 | -0.2076 | -0.2208 | 0.5577 | 0.0132 | -190.6093 | -171.2371 | -2.4597 | -2.5054 |
|
66 |
+
| 0.7467 | 0.0 | 50 | 0.7008 | 0.1918 | 0.1648 | 0.5577 | 0.0270 | -186.7531 | -167.2434 | -2.4630 | -2.5116 |
|
67 |
+
| 0.7335 | 0.0 | 60 | 0.6972 | 0.3949 | 0.3373 | 0.5385 | 0.0576 | -185.0280 | -165.2124 | -2.4666 | -2.5130 |
|
68 |
+
| 0.587 | 0.01 | 70 | 0.7116 | 0.6763 | 0.6193 | 0.4904 | 0.0570 | -182.2083 | -162.3980 | -2.4675 | -2.5126 |
|
69 |
+
| 0.675 | 0.01 | 80 | 0.7330 | 0.8676 | 0.8385 | 0.5096 | 0.0291 | -180.0161 | -160.4852 | -2.4726 | -2.5171 |
|
70 |
+
| 0.6117 | 0.01 | 90 | 0.7454 | 0.9576 | 0.9300 | 0.5192 | 0.0276 | -179.1016 | -159.5854 | -2.4757 | -2.5229 |
|
71 |
+
| 0.5697 | 0.01 | 100 | 0.7715 | 0.9933 | 0.9991 | 0.5 | -0.0059 | -178.4101 | -159.2286 | -2.4736 | -2.5233 |
|
72 |
+
| 1.1319 | 0.01 | 110 | 0.7652 | 0.9034 | 0.8862 | 0.4904 | 0.0172 | -179.5398 | -160.1275 | -2.4696 | -2.5215 |
|
73 |
+
| 0.5912 | 0.01 | 120 | 0.7476 | 0.7562 | 0.7007 | 0.5096 | 0.0555 | -181.3943 | -161.5994 | -2.4661 | -2.5186 |
|
74 |
+
| 0.702 | 0.01 | 130 | 0.7400 | 0.7400 | 0.6590 | 0.5192 | 0.0810 | -181.8113 | -161.7616 | -2.4642 | -2.5211 |
|
75 |
+
| 0.5566 | 0.01 | 140 | 0.7332 | 0.6338 | 0.5293 | 0.5288 | 0.1044 | -183.1082 | -162.8238 | -2.4650 | -2.5222 |
|
76 |
+
| 0.7823 | 0.01 | 150 | 0.7327 | 0.5429 | 0.4408 | 0.5385 | 0.1022 | -183.9939 | -163.7323 | -2.4645 | -2.5191 |
|
77 |
+
| 0.7549 | 0.01 | 160 | 0.7282 | 0.3954 | 0.2907 | 0.5481 | 0.1047 | -185.4949 | -165.2079 | -2.4612 | -2.5138 |
|
78 |
+
| 0.6506 | 0.01 | 170 | 0.7262 | 0.3748 | 0.2716 | 0.5192 | 0.1031 | -185.6850 | -165.4137 | -2.4579 | -2.5102 |
|
79 |
+
| 0.559 | 0.01 | 180 | 0.7320 | 0.4578 | 0.3604 | 0.5096 | 0.0974 | -184.7973 | -164.5831 | -2.4589 | -2.5109 |
|
80 |
+
| 0.9496 | 0.02 | 190 | 0.7150 | 0.4227 | 0.2889 | 0.5192 | 0.1339 | -185.5128 | -164.9340 | -2.4480 | -2.5007 |
|
81 |
+
| 0.7996 | 0.02 | 200 | 0.7034 | 0.4051 | 0.2378 | 0.5288 | 0.1673 | -186.0234 | -165.1101 | -2.4391 | -2.4926 |
|
82 |
+
| 0.5733 | 0.02 | 210 | 0.6977 | 0.3946 | 0.2110 | 0.5288 | 0.1836 | -186.2916 | -165.2155 | -2.4327 | -2.4875 |
|
83 |
+
| 0.5796 | 0.02 | 220 | 0.6981 | 0.3933 | 0.1983 | 0.5288 | 0.1949 | -186.4181 | -165.2286 | -2.4260 | -2.4824 |
|
84 |
+
| 0.6435 | 0.02 | 230 | 0.6976 | 0.3726 | 0.1714 | 0.5288 | 0.2012 | -186.6871 | -165.4354 | -2.4237 | -2.4807 |
|
85 |
+
| 0.5993 | 0.02 | 240 | 0.6958 | 0.3088 | 0.0929 | 0.5385 | 0.2159 | -187.4724 | -166.0730 | -2.4222 | -2.4799 |
|
86 |
+
| 0.9077 | 0.02 | 250 | 0.6944 | 0.2782 | 0.0543 | 0.5385 | 0.2239 | -187.8588 | -166.3796 | -2.4215 | -2.4790 |
|
87 |
|
88 |
|
89 |
### Framework versions
|
adapter_config.json
CHANGED
@@ -19,8 +19,8 @@
|
|
19 |
"rank_pattern": {},
|
20 |
"revision": null,
|
21 |
"target_modules": [
|
22 |
-
"
|
23 |
-
"
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM",
|
26 |
"use_rslora": false
|
|
|
19 |
"rank_pattern": {},
|
20 |
"revision": null,
|
21 |
"target_modules": [
|
22 |
+
"v_proj",
|
23 |
+
"q_proj"
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM",
|
26 |
"use_rslora": false
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6832600
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bbac6be429a2677b6317164bcc61ed4a441a5e488298e46187f7fd7aa303e039
|
3 |
size 6832600
|
runs/Feb14_07-24-15_fd2fd7522c82/events.out.tfevents.1707895529.fd2fd7522c82.26.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ee840b75cfbb662f5ca91fabc4fe107303af8c51d4b5901e9e65bce399dc156e
|
3 |
+
size 40089
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4219
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3216a3e8591b06deecc4f61758a69917f71c4bf5d2ed2271156507c2f8c89be7
|
3 |
size 4219
|