End of training
Browse files
README.md
CHANGED
@@ -1,297 +1,57 @@
|
|
1 |
---
|
2 |
base_model: meta-llama/Llama-3.2-3B-Instruct
|
3 |
-
library_name:
|
4 |
-
|
5 |
tags:
|
|
|
6 |
- trl
|
7 |
- sft
|
8 |
-
|
9 |
-
model-index:
|
10 |
-
- name: Llama-3.2-3B-lora-rps-adapter
|
11 |
-
results: []
|
12 |
---
|
13 |
|
14 |
-
|
15 |
-
should probably proofread and complete it, then remove this comment. -->
|
16 |
-
|
17 |
-
# Llama-3.2-3B-lora-rps-adapter
|
18 |
-
|
19 |
-
This model is a fine-tuned version of [meta-llama/Llama-3.2-3B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct) on the None dataset.
|
20 |
-
It achieves the following results on the evaluation set:
|
21 |
-
- Loss: 0.4600
|
22 |
|
23 |
-
|
|
|
24 |
|
25 |
-
|
26 |
|
27 |
-
|
|
|
28 |
|
29 |
-
|
|
|
|
|
|
|
|
|
30 |
|
31 |
-
## Training
|
32 |
|
33 |
-
More information needed
|
34 |
|
35 |
-
## Training procedure
|
36 |
|
37 |
-
|
38 |
|
39 |
-
|
40 |
-
- learning_rate: 0.0002
|
41 |
-
- train_batch_size: 2
|
42 |
-
- eval_batch_size: 2
|
43 |
-
- seed: 42
|
44 |
-
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
45 |
-
- lr_scheduler_type: linear
|
46 |
-
- lr_scheduler_warmup_ratio: 0.03
|
47 |
-
- num_epochs: 8
|
48 |
|
49 |
-
|
|
|
|
|
|
|
|
|
50 |
|
51 |
-
|
52 |
-
|:-------------:|:------:|:-----:|:---------------:|
|
53 |
-
| 0.1498 | 4.1213 | 25000 | 0.3963 |
|
54 |
-
| 0.1593 | 4.1378 | 25100 | 0.3952 |
|
55 |
-
| 0.1645 | 4.1543 | 25200 | 0.3958 |
|
56 |
-
| 0.1665 | 4.1708 | 25300 | 0.3943 |
|
57 |
-
| 0.1602 | 4.1873 | 25400 | 0.3938 |
|
58 |
-
| 0.1604 | 4.2038 | 25500 | 0.3921 |
|
59 |
-
| 0.1722 | 4.2202 | 25600 | 0.3916 |
|
60 |
-
| 0.1812 | 4.2367 | 25700 | 0.3915 |
|
61 |
-
| 0.1658 | 4.2532 | 25800 | 0.4006 |
|
62 |
-
| 0.1677 | 4.2697 | 25900 | 0.3978 |
|
63 |
-
| 0.1749 | 4.2862 | 26000 | 0.3941 |
|
64 |
-
| 0.1819 | 4.3027 | 26100 | 0.3924 |
|
65 |
-
| 0.1747 | 4.3192 | 26200 | 0.3941 |
|
66 |
-
| 0.1602 | 4.3356 | 26300 | 0.3945 |
|
67 |
-
| 0.1659 | 4.3521 | 26400 | 0.3957 |
|
68 |
-
| 0.1692 | 4.3686 | 26500 | 0.3951 |
|
69 |
-
| 0.1759 | 4.3851 | 26600 | 0.3945 |
|
70 |
-
| 0.1714 | 4.4016 | 26700 | 0.3934 |
|
71 |
-
| 0.1678 | 4.4181 | 26800 | 0.3925 |
|
72 |
-
| 0.1604 | 4.4346 | 26900 | 0.3947 |
|
73 |
-
| 0.1694 | 4.4510 | 27000 | 0.3981 |
|
74 |
-
| 0.1761 | 4.4675 | 27100 | 0.3931 |
|
75 |
-
| 0.189 | 4.4840 | 27200 | 0.3926 |
|
76 |
-
| 0.1892 | 4.5005 | 27300 | 0.3933 |
|
77 |
-
| 0.1713 | 4.5170 | 27400 | 0.3941 |
|
78 |
-
| 0.1693 | 4.5335 | 27500 | 0.3941 |
|
79 |
-
| 0.1721 | 4.5500 | 27600 | 0.3932 |
|
80 |
-
| 0.187 | 4.5664 | 27700 | 0.3932 |
|
81 |
-
| 0.1684 | 4.5829 | 27800 | 0.3960 |
|
82 |
-
| 0.1851 | 4.5994 | 27900 | 0.3933 |
|
83 |
-
| 0.169 | 4.6159 | 28000 | 0.3931 |
|
84 |
-
| 0.1675 | 4.6324 | 28100 | 0.3895 |
|
85 |
-
| 0.177 | 4.6489 | 28200 | 0.3916 |
|
86 |
-
| 0.183 | 4.6653 | 28300 | 0.3915 |
|
87 |
-
| 0.1834 | 4.6818 | 28400 | 0.3839 |
|
88 |
-
| 0.1839 | 4.6983 | 28500 | 0.3863 |
|
89 |
-
| 0.1785 | 4.7148 | 28600 | 0.3889 |
|
90 |
-
| 0.1762 | 4.7313 | 28700 | 0.3859 |
|
91 |
-
| 0.1805 | 4.7478 | 28800 | 0.3878 |
|
92 |
-
| 0.1616 | 4.7643 | 28900 | 0.3866 |
|
93 |
-
| 0.1796 | 4.7807 | 29000 | 0.3832 |
|
94 |
-
| 0.1797 | 4.7972 | 29100 | 0.3860 |
|
95 |
-
| 0.1641 | 4.8137 | 29200 | 0.3856 |
|
96 |
-
| 0.1844 | 4.8302 | 29300 | 0.3855 |
|
97 |
-
| 0.1736 | 4.8467 | 29400 | 0.3861 |
|
98 |
-
| 0.163 | 4.8632 | 29500 | 0.3850 |
|
99 |
-
| 0.2074 | 4.8797 | 29600 | 0.3880 |
|
100 |
-
| 0.1709 | 4.8961 | 29700 | 0.3884 |
|
101 |
-
| 0.1682 | 4.9126 | 29800 | 0.3855 |
|
102 |
-
| 0.1811 | 4.9291 | 29900 | 0.3883 |
|
103 |
-
| 0.1671 | 4.9456 | 30000 | 0.3872 |
|
104 |
-
| 0.1796 | 4.9621 | 30100 | 0.3863 |
|
105 |
-
| 0.1646 | 4.9786 | 30200 | 0.3853 |
|
106 |
-
| 0.1624 | 4.9951 | 30300 | 0.3872 |
|
107 |
-
| 0.1317 | 5.0115 | 30400 | 0.4029 |
|
108 |
-
| 0.1126 | 5.0280 | 30500 | 0.4068 |
|
109 |
-
| 0.1344 | 5.0445 | 30600 | 0.4076 |
|
110 |
-
| 0.1202 | 5.0610 | 30700 | 0.4078 |
|
111 |
-
| 0.1267 | 5.0775 | 30800 | 0.4077 |
|
112 |
-
| 0.1288 | 5.0940 | 30900 | 0.4058 |
|
113 |
-
| 0.1216 | 5.1105 | 31000 | 0.4117 |
|
114 |
-
| 0.1142 | 5.1269 | 31100 | 0.4109 |
|
115 |
-
| 0.1221 | 5.1434 | 31200 | 0.4053 |
|
116 |
-
| 0.1234 | 5.1599 | 31300 | 0.4092 |
|
117 |
-
| 0.1232 | 5.1764 | 31400 | 0.4098 |
|
118 |
-
| 0.1269 | 5.1929 | 31500 | 0.4102 |
|
119 |
-
| 0.1169 | 5.2094 | 31600 | 0.4068 |
|
120 |
-
| 0.1385 | 5.2258 | 31700 | 0.4055 |
|
121 |
-
| 0.1163 | 5.2423 | 31800 | 0.4106 |
|
122 |
-
| 0.1233 | 5.2588 | 31900 | 0.4075 |
|
123 |
-
| 0.116 | 5.2753 | 32000 | 0.4088 |
|
124 |
-
| 0.1336 | 5.2918 | 32100 | 0.4045 |
|
125 |
-
| 0.1167 | 5.3083 | 32200 | 0.4101 |
|
126 |
-
| 0.1202 | 5.3248 | 32300 | 0.4076 |
|
127 |
-
| 0.1229 | 5.3412 | 32400 | 0.4091 |
|
128 |
-
| 0.1213 | 5.3577 | 32500 | 0.4078 |
|
129 |
-
| 0.1316 | 5.3742 | 32600 | 0.4075 |
|
130 |
-
| 0.1245 | 5.3907 | 32700 | 0.4067 |
|
131 |
-
| 0.1208 | 5.4072 | 32800 | 0.4083 |
|
132 |
-
| 0.1281 | 5.4237 | 32900 | 0.4089 |
|
133 |
-
| 0.1214 | 5.4402 | 33000 | 0.4094 |
|
134 |
-
| 0.1149 | 5.4566 | 33100 | 0.4072 |
|
135 |
-
| 0.1218 | 5.4731 | 33200 | 0.4060 |
|
136 |
-
| 0.1178 | 5.4896 | 33300 | 0.4079 |
|
137 |
-
| 0.1272 | 5.5061 | 33400 | 0.4057 |
|
138 |
-
| 0.1258 | 5.5226 | 33500 | 0.4080 |
|
139 |
-
| 0.1213 | 5.5391 | 33600 | 0.4089 |
|
140 |
-
| 0.1161 | 5.5556 | 33700 | 0.4121 |
|
141 |
-
| 0.1325 | 5.5720 | 33800 | 0.4057 |
|
142 |
-
| 0.1219 | 5.5885 | 33900 | 0.4083 |
|
143 |
-
| 0.1247 | 5.6050 | 34000 | 0.4074 |
|
144 |
-
| 0.1233 | 5.6215 | 34100 | 0.4084 |
|
145 |
-
| 0.1211 | 5.6380 | 34200 | 0.4091 |
|
146 |
-
| 0.1315 | 5.6545 | 34300 | 0.4090 |
|
147 |
-
| 0.1183 | 5.6710 | 34400 | 0.4084 |
|
148 |
-
| 0.1256 | 5.6874 | 34500 | 0.4088 |
|
149 |
-
| 0.1168 | 5.7039 | 34600 | 0.4079 |
|
150 |
-
| 0.1394 | 5.7204 | 34700 | 0.4050 |
|
151 |
-
| 0.124 | 5.7369 | 34800 | 0.4065 |
|
152 |
-
| 0.1299 | 5.7534 | 34900 | 0.4052 |
|
153 |
-
| 0.1152 | 5.7699 | 35000 | 0.4039 |
|
154 |
-
| 0.138 | 5.7864 | 35100 | 0.4050 |
|
155 |
-
| 0.1137 | 5.8028 | 35200 | 0.4073 |
|
156 |
-
| 0.1284 | 5.8193 | 35300 | 0.4027 |
|
157 |
-
| 0.1192 | 5.8358 | 35400 | 0.4045 |
|
158 |
-
| 0.1358 | 5.8523 | 35500 | 0.4051 |
|
159 |
-
| 0.1262 | 5.8688 | 35600 | 0.4035 |
|
160 |
-
| 0.1289 | 5.8853 | 35700 | 0.4049 |
|
161 |
-
| 0.1296 | 5.9017 | 35800 | 0.4059 |
|
162 |
-
| 0.1319 | 5.9182 | 35900 | 0.4051 |
|
163 |
-
| 0.1259 | 5.9347 | 36000 | 0.4025 |
|
164 |
-
| 0.1217 | 5.9512 | 36100 | 0.4068 |
|
165 |
-
| 0.1127 | 5.9677 | 36200 | 0.4058 |
|
166 |
-
| 0.1216 | 5.9842 | 36300 | 0.4020 |
|
167 |
-
| 0.1279 | 6.0007 | 36400 | 0.4037 |
|
168 |
-
| 0.0806 | 6.0171 | 36500 | 0.4302 |
|
169 |
-
| 0.0795 | 6.0336 | 36600 | 0.4248 |
|
170 |
-
| 0.0861 | 6.0501 | 36700 | 0.4310 |
|
171 |
-
| 0.0891 | 6.0666 | 36800 | 0.4311 |
|
172 |
-
| 0.0771 | 6.0831 | 36900 | 0.4324 |
|
173 |
-
| 0.0757 | 6.0996 | 37000 | 0.4304 |
|
174 |
-
| 0.0777 | 6.1161 | 37100 | 0.4297 |
|
175 |
-
| 0.0753 | 6.1325 | 37200 | 0.4281 |
|
176 |
-
| 0.0822 | 6.1490 | 37300 | 0.4284 |
|
177 |
-
| 0.0799 | 6.1655 | 37400 | 0.4320 |
|
178 |
-
| 0.0915 | 6.1820 | 37500 | 0.4298 |
|
179 |
-
| 0.0772 | 6.1985 | 37600 | 0.4291 |
|
180 |
-
| 0.0797 | 6.2150 | 37700 | 0.4269 |
|
181 |
-
| 0.0854 | 6.2315 | 37800 | 0.4307 |
|
182 |
-
| 0.0838 | 6.2479 | 37900 | 0.4309 |
|
183 |
-
| 0.0935 | 6.2644 | 38000 | 0.4262 |
|
184 |
-
| 0.0864 | 6.2809 | 38100 | 0.4247 |
|
185 |
-
| 0.0847 | 6.2974 | 38200 | 0.4272 |
|
186 |
-
| 0.08 | 6.3139 | 38300 | 0.4311 |
|
187 |
-
| 0.0909 | 6.3304 | 38400 | 0.4327 |
|
188 |
-
| 0.0822 | 6.3469 | 38500 | 0.4263 |
|
189 |
-
| 0.0808 | 6.3633 | 38600 | 0.4310 |
|
190 |
-
| 0.0867 | 6.3798 | 38700 | 0.4285 |
|
191 |
-
| 0.0795 | 6.3963 | 38800 | 0.4298 |
|
192 |
-
| 0.097 | 6.4128 | 38900 | 0.4301 |
|
193 |
-
| 0.0802 | 6.4293 | 39000 | 0.4248 |
|
194 |
-
| 0.0937 | 6.4458 | 39100 | 0.4310 |
|
195 |
-
| 0.0808 | 6.4622 | 39200 | 0.4270 |
|
196 |
-
| 0.0773 | 6.4787 | 39300 | 0.4314 |
|
197 |
-
| 0.0853 | 6.4952 | 39400 | 0.4296 |
|
198 |
-
| 0.0831 | 6.5117 | 39500 | 0.4305 |
|
199 |
-
| 0.0935 | 6.5282 | 39600 | 0.4278 |
|
200 |
-
| 0.0839 | 6.5447 | 39700 | 0.4269 |
|
201 |
-
| 0.079 | 6.5612 | 39800 | 0.4299 |
|
202 |
-
| 0.0767 | 6.5776 | 39900 | 0.4286 |
|
203 |
-
| 0.0817 | 6.5941 | 40000 | 0.4275 |
|
204 |
-
| 0.0795 | 6.6106 | 40100 | 0.4289 |
|
205 |
-
| 0.0886 | 6.6271 | 40200 | 0.4236 |
|
206 |
-
| 0.0827 | 6.6436 | 40300 | 0.4298 |
|
207 |
-
| 0.0845 | 6.6601 | 40400 | 0.4281 |
|
208 |
-
| 0.0817 | 6.6766 | 40500 | 0.4267 |
|
209 |
-
| 0.0843 | 6.6930 | 40600 | 0.4258 |
|
210 |
-
| 0.0772 | 6.7095 | 40700 | 0.4268 |
|
211 |
-
| 0.0801 | 6.7260 | 40800 | 0.4315 |
|
212 |
-
| 0.0828 | 6.7425 | 40900 | 0.4267 |
|
213 |
-
| 0.087 | 6.7590 | 41000 | 0.4271 |
|
214 |
-
| 0.0901 | 6.7755 | 41100 | 0.4280 |
|
215 |
-
| 0.0788 | 6.7920 | 41200 | 0.4275 |
|
216 |
-
| 0.0815 | 6.8084 | 41300 | 0.4291 |
|
217 |
-
| 0.0774 | 6.8249 | 41400 | 0.4286 |
|
218 |
-
| 0.0868 | 6.8414 | 41500 | 0.4275 |
|
219 |
-
| 0.0834 | 6.8579 | 41600 | 0.4265 |
|
220 |
-
| 0.0846 | 6.8744 | 41700 | 0.4257 |
|
221 |
-
| 0.0798 | 6.8909 | 41800 | 0.4257 |
|
222 |
-
| 0.0795 | 6.9074 | 41900 | 0.4277 |
|
223 |
-
| 0.0849 | 6.9238 | 42000 | 0.4292 |
|
224 |
-
| 0.0936 | 6.9403 | 42100 | 0.4267 |
|
225 |
-
| 0.0715 | 6.9568 | 42200 | 0.4312 |
|
226 |
-
| 0.0857 | 6.9733 | 42300 | 0.4291 |
|
227 |
-
| 0.0887 | 6.9898 | 42400 | 0.4275 |
|
228 |
-
| 0.0681 | 7.0063 | 42500 | 0.4423 |
|
229 |
-
| 0.0567 | 7.0227 | 42600 | 0.4505 |
|
230 |
-
| 0.067 | 7.0392 | 42700 | 0.4514 |
|
231 |
-
| 0.0631 | 7.0557 | 42800 | 0.4559 |
|
232 |
-
| 0.0515 | 7.0722 | 42900 | 0.4562 |
|
233 |
-
| 0.0559 | 7.0887 | 43000 | 0.4583 |
|
234 |
-
| 0.0501 | 7.1052 | 43100 | 0.4566 |
|
235 |
-
| 0.0529 | 7.1217 | 43200 | 0.4567 |
|
236 |
-
| 0.0514 | 7.1381 | 43300 | 0.4554 |
|
237 |
-
| 0.0528 | 7.1546 | 43400 | 0.4566 |
|
238 |
-
| 0.053 | 7.1711 | 43500 | 0.4562 |
|
239 |
-
| 0.053 | 7.1876 | 43600 | 0.4569 |
|
240 |
-
| 0.0531 | 7.2041 | 43700 | 0.4555 |
|
241 |
-
| 0.0479 | 7.2206 | 43800 | 0.4595 |
|
242 |
-
| 0.0524 | 7.2371 | 43900 | 0.4567 |
|
243 |
-
| 0.0502 | 7.2535 | 44000 | 0.4605 |
|
244 |
-
| 0.0488 | 7.2700 | 44100 | 0.4591 |
|
245 |
-
| 0.0551 | 7.2865 | 44200 | 0.4603 |
|
246 |
-
| 0.0557 | 7.3030 | 44300 | 0.4580 |
|
247 |
-
| 0.0522 | 7.3195 | 44400 | 0.4599 |
|
248 |
-
| 0.0583 | 7.3360 | 44500 | 0.4583 |
|
249 |
-
| 0.0525 | 7.3525 | 44600 | 0.4585 |
|
250 |
-
| 0.0557 | 7.3689 | 44700 | 0.4572 |
|
251 |
-
| 0.0521 | 7.3854 | 44800 | 0.4579 |
|
252 |
-
| 0.0523 | 7.4019 | 44900 | 0.4578 |
|
253 |
-
| 0.0498 | 7.4184 | 45000 | 0.4585 |
|
254 |
-
| 0.0551 | 7.4349 | 45100 | 0.4585 |
|
255 |
-
| 0.0472 | 7.4514 | 45200 | 0.4592 |
|
256 |
-
| 0.0511 | 7.4679 | 45300 | 0.4595 |
|
257 |
-
| 0.0579 | 7.4843 | 45400 | 0.4593 |
|
258 |
-
| 0.0521 | 7.5008 | 45500 | 0.4597 |
|
259 |
-
| 0.0551 | 7.5173 | 45600 | 0.4593 |
|
260 |
-
| 0.0539 | 7.5338 | 45700 | 0.4579 |
|
261 |
-
| 0.0557 | 7.5503 | 45800 | 0.4571 |
|
262 |
-
| 0.0526 | 7.5668 | 45900 | 0.4602 |
|
263 |
-
| 0.0497 | 7.5833 | 46000 | 0.4582 |
|
264 |
-
| 0.0487 | 7.5997 | 46100 | 0.4600 |
|
265 |
-
| 0.0498 | 7.6162 | 46200 | 0.4586 |
|
266 |
-
| 0.0542 | 7.6327 | 46300 | 0.4596 |
|
267 |
-
| 0.0496 | 7.6492 | 46400 | 0.4608 |
|
268 |
-
| 0.0467 | 7.6657 | 46500 | 0.4593 |
|
269 |
-
| 0.0524 | 7.6822 | 46600 | 0.4597 |
|
270 |
-
| 0.0512 | 7.6986 | 46700 | 0.4599 |
|
271 |
-
| 0.0536 | 7.7151 | 46800 | 0.4593 |
|
272 |
-
| 0.0483 | 7.7316 | 46900 | 0.4605 |
|
273 |
-
| 0.0477 | 7.7481 | 47000 | 0.4593 |
|
274 |
-
| 0.0618 | 7.7646 | 47100 | 0.4581 |
|
275 |
-
| 0.0531 | 7.7811 | 47200 | 0.4585 |
|
276 |
-
| 0.0561 | 7.7976 | 47300 | 0.4596 |
|
277 |
-
| 0.0521 | 7.8140 | 47400 | 0.4594 |
|
278 |
-
| 0.0473 | 7.8305 | 47500 | 0.4608 |
|
279 |
-
| 0.051 | 7.8470 | 47600 | 0.4609 |
|
280 |
-
| 0.0494 | 7.8635 | 47700 | 0.4609 |
|
281 |
-
| 0.048 | 7.8800 | 47800 | 0.4607 |
|
282 |
-
| 0.0533 | 7.8965 | 47900 | 0.4606 |
|
283 |
-
| 0.0514 | 7.9130 | 48000 | 0.4607 |
|
284 |
-
| 0.0517 | 7.9294 | 48100 | 0.4607 |
|
285 |
-
| 0.0494 | 7.9459 | 48200 | 0.4606 |
|
286 |
-
| 0.0517 | 7.9624 | 48300 | 0.4601 |
|
287 |
-
| 0.0468 | 7.9789 | 48400 | 0.4601 |
|
288 |
-
| 0.0526 | 7.9954 | 48500 | 0.4600 |
|
289 |
|
290 |
|
291 |
-
### Framework versions
|
292 |
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
base_model: meta-llama/Llama-3.2-3B-Instruct
|
3 |
+
library_name: transformers
|
4 |
+
model_name: Llama-3.2-3B-lora-rps-adapter
|
5 |
tags:
|
6 |
+
- generated_from_trainer
|
7 |
- trl
|
8 |
- sft
|
9 |
+
licence: license
|
|
|
|
|
|
|
10 |
---
|
11 |
|
12 |
+
# Model Card for Llama-3.2-3B-lora-rps-adapter
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
+
This model is a fine-tuned version of [meta-llama/Llama-3.2-3B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct).
|
15 |
+
It has been trained using [TRL](https://github.com/huggingface/trl).
|
16 |
|
17 |
+
## Quick start
|
18 |
|
19 |
+
```python
|
20 |
+
from transformers import pipeline
|
21 |
|
22 |
+
question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
|
23 |
+
generator = pipeline("text-generation", model="SimonMA/Llama-3.2-3B-lora-rps-adapter", device="cuda")
|
24 |
+
output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
|
25 |
+
print(output["generated_text"])
|
26 |
+
```
|
27 |
|
28 |
+
## Training procedure
|
29 |
|
|
|
30 |
|
|
|
31 |
|
32 |
+
This model was trained with SFT.
|
33 |
|
34 |
+
### Framework versions
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
+
- TRL: 0.12.1
|
37 |
+
- Transformers: 4.46.3
|
38 |
+
- Pytorch: 2.5.1+cu121
|
39 |
+
- Datasets: 3.1.0
|
40 |
+
- Tokenizers: 0.20.3
|
41 |
|
42 |
+
## Citations
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
|
|
|
45 |
|
46 |
+
Cite TRL as:
|
47 |
+
|
48 |
+
```bibtex
|
49 |
+
@misc{vonwerra2022trl,
|
50 |
+
title = {{TRL: Transformer Reinforcement Learning}},
|
51 |
+
author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallouédec},
|
52 |
+
year = 2020,
|
53 |
+
journal = {GitHub repository},
|
54 |
+
publisher = {GitHub},
|
55 |
+
howpublished = {\url{https://github.com/huggingface/trl}}
|
56 |
+
}
|
57 |
+
```
|
adapter_config.json
CHANGED
@@ -11,22 +11,25 @@
|
|
11 |
"layers_to_transform": null,
|
12 |
"loftq_config": {},
|
13 |
"lora_alpha": 8,
|
14 |
-
"lora_dropout": 0.
|
15 |
"megatron_config": null,
|
16 |
"megatron_core": "megatron.core",
|
17 |
-
"modules_to_save":
|
|
|
|
|
|
|
18 |
"peft_type": "LORA",
|
19 |
"r": 128,
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
|
|
23 |
"k_proj",
|
24 |
-
"q_proj",
|
25 |
-
"v_proj",
|
26 |
"o_proj",
|
|
|
27 |
"up_proj",
|
28 |
"gate_proj",
|
29 |
-
"
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
32 |
"use_dora": false,
|
|
|
11 |
"layers_to_transform": null,
|
12 |
"loftq_config": {},
|
13 |
"lora_alpha": 8,
|
14 |
+
"lora_dropout": 0.0,
|
15 |
"megatron_config": null,
|
16 |
"megatron_core": "megatron.core",
|
17 |
+
"modules_to_save": [
|
18 |
+
"embed_tokens",
|
19 |
+
"lm_head"
|
20 |
+
],
|
21 |
"peft_type": "LORA",
|
22 |
"r": 128,
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
+
"down_proj",
|
27 |
"k_proj",
|
|
|
|
|
28 |
"o_proj",
|
29 |
+
"q_proj",
|
30 |
"up_proj",
|
31 |
"gate_proj",
|
32 |
+
"v_proj"
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e7044cc137419e211e98af4dc2472bb99f0cc374cd3f07a75811ec14900d6f7
|
3 |
+
size 2354106632
|
runs/Nov26_05-54-39_f8a27a31e4e9/events.out.tfevents.1732600486.f8a27a31e4e9.6916.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8f3720fe5e87f1eadb559b829aa1a05d736d8ecfc2d1a88e57428c177a27ca3
|
3 |
+
size 151753
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81e4dc28a5f54d695c1667eeb187d6bb5afcae4bec7be16ed4f9beabfd6316f8
|
3 |
+
size 5560
|