sujithatz/finbot-transofrmer-based-phi3.5_adapter
Browse files- README.md +99 -94
- adapter_config.json +4 -7
- adapter_model.safetensors +2 -2
- training_args.bin +1 -1
README.md
CHANGED
@@ -18,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
|
|
18 |
|
19 |
This model is a fine-tuned version of [microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) on an unknown dataset.
|
20 |
It achieves the following results on the evaluation set:
|
21 |
-
- Loss: 0.
|
22 |
|
23 |
## Model description
|
24 |
|
@@ -50,99 +50,104 @@ The following hyperparameters were used during training:
|
|
50 |
|
51 |
| Training Loss | Epoch | Step | Validation Loss |
|
52 |
|:-------------:|:------:|:----:|:---------------:|
|
53 |
-
| 1.
|
54 |
-
| 1.
|
55 |
-
| 1.
|
56 |
-
|
|
57 |
-
|
|
58 |
-
| 0.
|
59 |
-
| 0.
|
60 |
-
| 0.
|
61 |
-
| 0.
|
62 |
-
| 0.
|
63 |
-
| 0.
|
64 |
-
| 0.
|
65 |
-
| 0.
|
66 |
-
| 0.
|
67 |
-
| 0.
|
68 |
-
| 0.
|
69 |
-
| 0.
|
70 |
-
| 0.
|
71 |
-
| 0.
|
72 |
-
| 0.
|
73 |
-
| 0.
|
74 |
-
| 0.
|
75 |
-
| 0.
|
76 |
-
| 0.
|
77 |
-
| 0.
|
78 |
-
| 0.
|
79 |
-
| 0.
|
80 |
-
| 0.
|
81 |
-
| 0.
|
82 |
-
| 0.
|
83 |
-
| 0.
|
84 |
-
| 0.
|
85 |
-
| 0.
|
86 |
-
| 0.
|
87 |
-
| 0.
|
88 |
-
| 0.
|
89 |
-
| 0.
|
90 |
-
| 0.
|
91 |
-
| 0.
|
92 |
-
| 0.
|
93 |
-
| 0.
|
94 |
-
| 0.
|
95 |
-
| 0.
|
96 |
-
| 0.
|
97 |
-
| 0.
|
98 |
-
| 0.
|
99 |
-
| 0.
|
100 |
-
| 0.
|
101 |
-
| 0.
|
102 |
-
| 0.
|
103 |
-
| 0.
|
104 |
-
| 0.
|
105 |
-
| 0.
|
106 |
-
| 0.
|
107 |
-
| 0.
|
108 |
-
| 0.
|
109 |
-
| 0.
|
110 |
-
| 0.
|
111 |
-
| 0.
|
112 |
-
| 0.
|
113 |
-
| 0.
|
114 |
-
| 0.
|
115 |
-
| 0.
|
116 |
-
| 0.
|
117 |
-
| 0.
|
118 |
-
| 0.
|
119 |
-
| 0.
|
120 |
-
| 0.
|
121 |
-
| 0.
|
122 |
-
| 0.
|
123 |
-
| 0.
|
124 |
-
| 0.
|
125 |
-
| 0.
|
126 |
-
| 0.
|
127 |
-
| 0.
|
128 |
-
| 0.
|
129 |
-
| 0.
|
130 |
-
| 0.
|
131 |
-
| 0.
|
132 |
-
| 0.
|
133 |
-
| 0.
|
134 |
-
| 0.
|
135 |
-
| 0.
|
136 |
-
| 0.
|
137 |
-
| 0.
|
138 |
-
| 0.
|
139 |
-
| 0.
|
140 |
-
| 0.
|
141 |
-
| 0.
|
142 |
-
| 0.
|
143 |
-
| 0.
|
144 |
-
| 0.
|
145 |
-
| 0.
|
|
|
|
|
|
|
|
|
|
|
146 |
|
147 |
|
148 |
### Framework versions
|
|
|
18 |
|
19 |
This model is a fine-tuned version of [microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) on an unknown dataset.
|
20 |
It achieves the following results on the evaluation set:
|
21 |
+
- Loss: 0.5655
|
22 |
|
23 |
## Model description
|
24 |
|
|
|
50 |
|
51 |
| Training Loss | Epoch | Step | Validation Loss |
|
52 |
|:-------------:|:------:|:----:|:---------------:|
|
53 |
+
| 1.6693 | 0.0405 | 3 | 1.6168 |
|
54 |
+
| 1.3656 | 0.0811 | 6 | 1.2375 |
|
55 |
+
| 1.0964 | 0.1216 | 9 | 1.0552 |
|
56 |
+
| 0.8795 | 0.1622 | 12 | 0.9069 |
|
57 |
+
| 0.7104 | 0.2027 | 15 | 0.8512 |
|
58 |
+
| 0.7107 | 0.2432 | 18 | 0.7911 |
|
59 |
+
| 0.6494 | 0.2838 | 21 | 0.7575 |
|
60 |
+
| 0.6856 | 0.3243 | 24 | 0.7386 |
|
61 |
+
| 0.7626 | 0.3649 | 27 | 0.7219 |
|
62 |
+
| 0.6758 | 0.4054 | 30 | 0.6914 |
|
63 |
+
| 0.6051 | 0.4459 | 33 | 0.6597 |
|
64 |
+
| 0.6694 | 0.4865 | 36 | 0.6429 |
|
65 |
+
| 0.4273 | 0.5270 | 39 | 0.6305 |
|
66 |
+
| 0.6037 | 0.5676 | 42 | 0.6197 |
|
67 |
+
| 0.579 | 0.6081 | 45 | 0.6080 |
|
68 |
+
| 0.4144 | 0.6486 | 48 | 0.5822 |
|
69 |
+
| 0.5971 | 0.6892 | 51 | 0.5576 |
|
70 |
+
| 0.589 | 0.7297 | 54 | 0.5488 |
|
71 |
+
| 0.6413 | 0.7703 | 57 | 0.5471 |
|
72 |
+
| 0.5992 | 0.8108 | 60 | 0.5386 |
|
73 |
+
| 0.5468 | 0.8514 | 63 | 0.5232 |
|
74 |
+
| 0.7074 | 0.8919 | 66 | 0.5172 |
|
75 |
+
| 0.4391 | 0.9324 | 69 | 0.5085 |
|
76 |
+
| 0.6243 | 0.9730 | 72 | 0.4970 |
|
77 |
+
| 0.3784 | 1.0135 | 75 | 0.4958 |
|
78 |
+
| 0.3136 | 1.0541 | 78 | 0.4939 |
|
79 |
+
| 0.3832 | 1.0946 | 81 | 0.5047 |
|
80 |
+
| 0.3463 | 1.1351 | 84 | 0.5085 |
|
81 |
+
| 0.4151 | 1.1757 | 87 | 0.5182 |
|
82 |
+
| 0.3072 | 1.2162 | 90 | 0.5147 |
|
83 |
+
| 0.2954 | 1.2568 | 93 | 0.5210 |
|
84 |
+
| 0.3114 | 1.2973 | 96 | 0.5145 |
|
85 |
+
| 0.2628 | 1.3378 | 99 | 0.5141 |
|
86 |
+
| 0.3768 | 1.3784 | 102 | 0.5129 |
|
87 |
+
| 0.2737 | 1.4189 | 105 | 0.5058 |
|
88 |
+
| 0.3409 | 1.4595 | 108 | 0.5049 |
|
89 |
+
| 0.4281 | 1.5 | 111 | 0.4975 |
|
90 |
+
| 0.4059 | 1.5405 | 114 | 0.4848 |
|
91 |
+
| 0.4375 | 1.5811 | 117 | 0.4880 |
|
92 |
+
| 0.2794 | 1.6216 | 120 | 0.4849 |
|
93 |
+
| 0.3575 | 1.6622 | 123 | 0.4802 |
|
94 |
+
| 0.5313 | 1.7027 | 126 | 0.4834 |
|
95 |
+
| 0.2644 | 1.7432 | 129 | 0.4811 |
|
96 |
+
| 0.3878 | 1.7838 | 132 | 0.4746 |
|
97 |
+
| 0.3286 | 1.8243 | 135 | 0.4641 |
|
98 |
+
| 0.3327 | 1.8649 | 138 | 0.4564 |
|
99 |
+
| 0.2176 | 1.9054 | 141 | 0.4547 |
|
100 |
+
| 0.4059 | 1.9459 | 144 | 0.4528 |
|
101 |
+
| 0.2943 | 1.9865 | 147 | 0.4540 |
|
102 |
+
| 0.2527 | 2.0270 | 150 | 0.4554 |
|
103 |
+
| 0.4193 | 2.0676 | 153 | 0.4742 |
|
104 |
+
| 0.2857 | 2.1081 | 156 | 0.5054 |
|
105 |
+
| 0.1813 | 2.1486 | 159 | 0.5248 |
|
106 |
+
| 0.1805 | 2.1892 | 162 | 0.5251 |
|
107 |
+
| 0.0996 | 2.2297 | 165 | 0.5196 |
|
108 |
+
| 0.181 | 2.2703 | 168 | 0.5206 |
|
109 |
+
| 0.2093 | 2.3108 | 171 | 0.5134 |
|
110 |
+
| 0.1637 | 2.3514 | 174 | 0.5138 |
|
111 |
+
| 0.1239 | 2.3919 | 177 | 0.5120 |
|
112 |
+
| 0.2012 | 2.4324 | 180 | 0.5150 |
|
113 |
+
| 0.2687 | 2.4730 | 183 | 0.5152 |
|
114 |
+
| 0.1168 | 2.5135 | 186 | 0.5202 |
|
115 |
+
| 0.2365 | 2.5541 | 189 | 0.5221 |
|
116 |
+
| 0.289 | 2.5946 | 192 | 0.5174 |
|
117 |
+
| 0.1814 | 2.6351 | 195 | 0.5128 |
|
118 |
+
| 0.1923 | 2.6757 | 198 | 0.5078 |
|
119 |
+
| 0.1834 | 2.7162 | 201 | 0.5016 |
|
120 |
+
| 0.1661 | 2.7568 | 204 | 0.4995 |
|
121 |
+
| 0.1359 | 2.7973 | 207 | 0.4983 |
|
122 |
+
| 0.1327 | 2.8378 | 210 | 0.5041 |
|
123 |
+
| 0.2567 | 2.8784 | 213 | 0.5087 |
|
124 |
+
| 0.2326 | 2.9189 | 216 | 0.5074 |
|
125 |
+
| 0.2025 | 2.9595 | 219 | 0.5016 |
|
126 |
+
| 0.146 | 3.0 | 222 | 0.4954 |
|
127 |
+
| 0.1048 | 3.0405 | 225 | 0.4967 |
|
128 |
+
| 0.1218 | 3.0811 | 228 | 0.5027 |
|
129 |
+
| 0.3124 | 3.1216 | 231 | 0.5094 |
|
130 |
+
| 0.1518 | 3.1622 | 234 | 0.5186 |
|
131 |
+
| 0.2543 | 3.2027 | 237 | 0.5275 |
|
132 |
+
| 0.0982 | 3.2432 | 240 | 0.5364 |
|
133 |
+
| 0.1014 | 3.2838 | 243 | 0.5447 |
|
134 |
+
| 0.1216 | 3.3243 | 246 | 0.5548 |
|
135 |
+
| 0.0768 | 3.3649 | 249 | 0.5589 |
|
136 |
+
| 0.086 | 3.4054 | 252 | 0.5633 |
|
137 |
+
| 0.1213 | 3.4459 | 255 | 0.5654 |
|
138 |
+
| 0.1437 | 3.4865 | 258 | 0.5668 |
|
139 |
+
| 0.1341 | 3.5270 | 261 | 0.5670 |
|
140 |
+
| 0.1122 | 3.5676 | 264 | 0.5659 |
|
141 |
+
| 0.0832 | 3.6081 | 267 | 0.5662 |
|
142 |
+
| 0.0668 | 3.6486 | 270 | 0.5637 |
|
143 |
+
| 0.2023 | 3.6892 | 273 | 0.5662 |
|
144 |
+
| 0.104 | 3.7297 | 276 | 0.5640 |
|
145 |
+
| 0.1181 | 3.7703 | 279 | 0.5650 |
|
146 |
+
| 0.1242 | 3.8108 | 282 | 0.5636 |
|
147 |
+
| 0.0874 | 3.8514 | 285 | 0.5644 |
|
148 |
+
| 0.1133 | 3.8919 | 288 | 0.5637 |
|
149 |
+
| 0.073 | 3.9324 | 291 | 0.5640 |
|
150 |
+
| 0.0724 | 3.9730 | 294 | 0.5655 |
|
151 |
|
152 |
|
153 |
### Framework versions
|
adapter_config.json
CHANGED
@@ -20,13 +20,10 @@
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
-
"
|
24 |
-
"
|
25 |
-
"
|
26 |
-
"
|
27 |
-
"v_proj",
|
28 |
-
"up_proj",
|
29 |
-
"o_proj"
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
32 |
"use_dora": false,
|
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
+
"gate_up_proj",
|
24 |
+
"o_proj",
|
25 |
+
"qkv_proj",
|
26 |
+
"down_proj"
|
|
|
|
|
|
|
27 |
],
|
28 |
"task_type": "CAUSAL_LM",
|
29 |
"use_dora": false,
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d085116f5270d42fbd3f522f7e5401ba43b2cf120ac015700e4ac0a39d893626
|
3 |
+
size 100697728
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5432
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b38ec32d6537ab3d4e404fffac63b5d9a8c23e446f15fd22f45b1947ba118c42
|
3 |
size 5432
|