sujithatz/finbot-transofrmer-based-phi3.5_adapter

Browse files

Files changed (4) hide show

README.md +99 -94
adapter_config.json +4 -7
adapter_model.safetensors +2 -2
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -18,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.4642
 ## Model description
@@ -50,99 +50,104 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
-| 1.675         | 0.0429 | 3    | 1.6620          |
-| 1.6239        | 0.0857 | 6    | 1.3707          |
-| 1.2962        | 0.1286 | 9    | 1.1772          |
-| 1.0739        | 0.1714 | 12   | 1.0452          |
-| 1.0342        | 0.2143 | 15   | 0.9612          |
-| 0.8244        | 0.2571 | 18   | 0.8836          |
-| 0.8398        | 0.3    | 21   | 0.8235          |
-| 0.9522        | 0.3429 | 24   | 0.7842          |
-| 0.7504        | 0.3857 | 27   | 0.7502          |
-| 0.8102        | 0.4286 | 30   | 0.7306          |
-| 0.6835        | 0.4714 | 33   | 0.7146          |
-| 0.7594        | 0.5143 | 36   | 0.6884          |
-| 0.73          | 0.5571 | 39   | 0.6722          |
-| 0.6347        | 0.6    | 42   | 0.6595          |
-| 0.7217        | 0.6429 | 45   | 0.6508          |
-| 0.5904        | 0.6857 | 48   | 0.6390          |
-| 0.6656        | 0.7286 | 51   | 0.6266          |
-| 0.5857        | 0.7714 | 54   | 0.6125          |
-| 0.6036        | 0.8143 | 57   | 0.5918          |
-| 0.6584        | 0.8571 | 60   | 0.5849          |
-| 0.4954        | 0.9    | 63   | 0.5746          |
-| 0.5165        | 0.9429 | 66   | 0.5658          |
-| 0.6184        | 0.9857 | 69   | 0.5510          |
-| 0.4926        | 1.0286 | 72   | 0.5484          |
-| 0.6871        | 1.0714 | 75   | 0.5458          |
-| 0.4718        | 1.1143 | 78   | 0.5368          |
-| 0.5862        | 1.1571 | 81   | 0.5308          |
-| 0.5909        | 1.2    | 84   | 0.5199          |
-| 0.4566        | 1.2429 | 87   | 0.5155          |
-| 0.5461        | 1.2857 | 90   | 0.5048          |
-| 0.4024        | 1.3286 | 93   | 0.5063          |
-| 0.5349        | 1.3714 | 96   | 0.5051          |
-| 0.5643        | 1.4143 | 99   | 0.4994          |
-| 0.5109        | 1.4571 | 102  | 0.4937          |
-| 0.4582        | 1.5    | 105  | 0.4988          |
-| 0.4304        | 1.5429 | 108  | 0.4992          |
-| 0.3849        | 1.5857 | 111  | 0.4972          |
-| 0.5013        | 1.6286 | 114  | 0.4979          |
-| 0.3451        | 1.6714 | 117  | 0.4944          |
-| 0.325         | 1.7143 | 120  | 0.4910          |
-| 0.5228        | 1.7571 | 123  | 0.4827          |
-| 0.4379        | 1.8    | 126  | 0.4753          |
-| 0.3186        | 1.8429 | 129  | 0.4734          |
-| 0.6398        | 1.8857 | 132  | 0.4740          |
-| 0.2963        | 1.9286 | 135  | 0.4777          |
-| 0.661         | 1.9714 | 138  | 0.4770          |
-| 0.4953        | 2.0143 | 141  | 0.4794          |
-| 0.3564        | 2.0571 | 144  | 0.4895          |
-| 0.25          | 2.1    | 147  | 0.4962          |
-| 0.4717        | 2.1429 | 150  | 0.4856          |
-| 0.3823        | 2.1857 | 153  | 0.4734          |
-| 0.3204        | 2.2286 | 156  | 0.4689          |
-| 0.2621        | 2.2714 | 159  | 0.4662          |
-| 0.2568        | 2.3143 | 162  | 0.4676          |
-| 0.3661        | 2.3571 | 165  | 0.4713          |
-| 0.5833        | 2.4    | 168  | 0.4691          |
-| 0.3607        | 2.4429 | 171  | 0.4656          |
-| 0.3806        | 2.4857 | 174  | 0.4667          |
-| 0.2769        | 2.5286 | 177  | 0.4682          |
-| 0.3407        | 2.5714 | 180  | 0.4703          |
-| 0.3535        | 2.6143 | 183  | 0.4710          |
-| 0.3801        | 2.6571 | 186  | 0.4749          |
-| 0.3686        | 2.7    | 189  | 0.4748          |
-| 0.4201        | 2.7429 | 192  | 0.4673          |
-| 0.4519        | 2.7857 | 195  | 0.4634          |
-| 0.3307        | 2.8286 | 198  | 0.4596          |
-| 0.2853        | 2.8714 | 201  | 0.4584          |
-| 0.4844        | 2.9143 | 204  | 0.4567          |
-| 0.3521        | 2.9571 | 207  | 0.4549          |
-| 0.6176        | 3.0    | 210  | 0.4519          |
-| 0.298         | 3.0429 | 213  | 0.4505          |
-| 0.3371        | 3.0857 | 216  | 0.4505          |
-| 0.2549        | 3.1286 | 219  | 0.4519          |
-| 0.3271        | 3.1714 | 222  | 0.4555          |
-| 0.3472        | 3.2143 | 225  | 0.4596          |
-| 0.3883        | 3.2571 | 228  | 0.4607          |
-| 0.285         | 3.3    | 231  | 0.4626          |
-| 0.3243        | 3.3429 | 234  | 0.4642          |
-| 0.3385        | 3.3857 | 237  | 0.4644          |
-| 0.356         | 3.4286 | 240  | 0.4663          |
-| 0.2939        | 3.4714 | 243  | 0.4669          |
-| 0.2338        | 3.5143 | 246  | 0.4677          |
-| 0.4161        | 3.5571 | 249  | 0.4660          |
-| 0.2494        | 3.6    | 252  | 0.4669          |
-| 0.2844        | 3.6429 | 255  | 0.4661          |
-| 0.2141        | 3.6857 | 258  | 0.4643          |
-| 0.2961        | 3.7286 | 261  | 0.4639          |
-| 0.2751        | 3.7714 | 264  | 0.4640          |
-| 0.288         | 3.8143 | 267  | 0.4636          |
-| 0.5415        | 3.8571 | 270  | 0.4641          |
-| 0.2016        | 3.9    | 273  | 0.4634          |
-| 0.3921        | 3.9429 | 276  | 0.4640          |
-| 0.4504        | 3.9857 | 279  | 0.4642          |
 ### Framework versions

 This model is a fine-tuned version of [microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.5655
 ## Model description
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
+| 1.6693        | 0.0405 | 3    | 1.6168          |
+| 1.3656        | 0.0811 | 6    | 1.2375          |
+| 1.0964        | 0.1216 | 9    | 1.0552          |
+| 0.8795        | 0.1622 | 12   | 0.9069          |
+| 0.7104        | 0.2027 | 15   | 0.8512          |
+| 0.7107        | 0.2432 | 18   | 0.7911          |
+| 0.6494        | 0.2838 | 21   | 0.7575          |
+| 0.6856        | 0.3243 | 24   | 0.7386          |
+| 0.7626        | 0.3649 | 27   | 0.7219          |
+| 0.6758        | 0.4054 | 30   | 0.6914          |
+| 0.6051        | 0.4459 | 33   | 0.6597          |
+| 0.6694        | 0.4865 | 36   | 0.6429          |
+| 0.4273        | 0.5270 | 39   | 0.6305          |
+| 0.6037        | 0.5676 | 42   | 0.6197          |
+| 0.579         | 0.6081 | 45   | 0.6080          |
+| 0.4144        | 0.6486 | 48   | 0.5822          |
+| 0.5971        | 0.6892 | 51   | 0.5576          |
+| 0.589         | 0.7297 | 54   | 0.5488          |
+| 0.6413        | 0.7703 | 57   | 0.5471          |
+| 0.5992        | 0.8108 | 60   | 0.5386          |
+| 0.5468        | 0.8514 | 63   | 0.5232          |
+| 0.7074        | 0.8919 | 66   | 0.5172          |
+| 0.4391        | 0.9324 | 69   | 0.5085          |
+| 0.6243        | 0.9730 | 72   | 0.4970          |
+| 0.3784        | 1.0135 | 75   | 0.4958          |
+| 0.3136        | 1.0541 | 78   | 0.4939          |
+| 0.3832        | 1.0946 | 81   | 0.5047          |
+| 0.3463        | 1.1351 | 84   | 0.5085          |
+| 0.4151        | 1.1757 | 87   | 0.5182          |
+| 0.3072        | 1.2162 | 90   | 0.5147          |
+| 0.2954        | 1.2568 | 93   | 0.5210          |
+| 0.3114        | 1.2973 | 96   | 0.5145          |
+| 0.2628        | 1.3378 | 99   | 0.5141          |
+| 0.3768        | 1.3784 | 102  | 0.5129          |
+| 0.2737        | 1.4189 | 105  | 0.5058          |
+| 0.3409        | 1.4595 | 108  | 0.5049          |
+| 0.4281        | 1.5    | 111  | 0.4975          |
+| 0.4059        | 1.5405 | 114  | 0.4848          |
+| 0.4375        | 1.5811 | 117  | 0.4880          |
+| 0.2794        | 1.6216 | 120  | 0.4849          |
+| 0.3575        | 1.6622 | 123  | 0.4802          |
+| 0.5313        | 1.7027 | 126  | 0.4834          |
+| 0.2644        | 1.7432 | 129  | 0.4811          |
+| 0.3878        | 1.7838 | 132  | 0.4746          |
+| 0.3286        | 1.8243 | 135  | 0.4641          |
+| 0.3327        | 1.8649 | 138  | 0.4564          |
+| 0.2176        | 1.9054 | 141  | 0.4547          |
+| 0.4059        | 1.9459 | 144  | 0.4528          |
+| 0.2943        | 1.9865 | 147  | 0.4540          |
+| 0.2527        | 2.0270 | 150  | 0.4554          |
+| 0.4193        | 2.0676 | 153  | 0.4742          |
+| 0.2857        | 2.1081 | 156  | 0.5054          |
+| 0.1813        | 2.1486 | 159  | 0.5248          |
+| 0.1805        | 2.1892 | 162  | 0.5251          |
+| 0.0996        | 2.2297 | 165  | 0.5196          |
+| 0.181         | 2.2703 | 168  | 0.5206          |
+| 0.2093        | 2.3108 | 171  | 0.5134          |
+| 0.1637        | 2.3514 | 174  | 0.5138          |
+| 0.1239        | 2.3919 | 177  | 0.5120          |
+| 0.2012        | 2.4324 | 180  | 0.5150          |
+| 0.2687        | 2.4730 | 183  | 0.5152          |
+| 0.1168        | 2.5135 | 186  | 0.5202          |
+| 0.2365        | 2.5541 | 189  | 0.5221          |
+| 0.289         | 2.5946 | 192  | 0.5174          |
+| 0.1814        | 2.6351 | 195  | 0.5128          |
+| 0.1923        | 2.6757 | 198  | 0.5078          |
+| 0.1834        | 2.7162 | 201  | 0.5016          |
+| 0.1661        | 2.7568 | 204  | 0.4995          |
+| 0.1359        | 2.7973 | 207  | 0.4983          |
+| 0.1327        | 2.8378 | 210  | 0.5041          |
+| 0.2567        | 2.8784 | 213  | 0.5087          |
+| 0.2326        | 2.9189 | 216  | 0.5074          |
+| 0.2025        | 2.9595 | 219  | 0.5016          |
+| 0.146         | 3.0    | 222  | 0.4954          |
+| 0.1048        | 3.0405 | 225  | 0.4967          |
+| 0.1218        | 3.0811 | 228  | 0.5027          |
+| 0.3124        | 3.1216 | 231  | 0.5094          |
+| 0.1518        | 3.1622 | 234  | 0.5186          |
+| 0.2543        | 3.2027 | 237  | 0.5275          |
+| 0.0982        | 3.2432 | 240  | 0.5364          |
+| 0.1014        | 3.2838 | 243  | 0.5447          |
+| 0.1216        | 3.3243 | 246  | 0.5548          |
+| 0.0768        | 3.3649 | 249  | 0.5589          |
+| 0.086         | 3.4054 | 252  | 0.5633          |
+| 0.1213        | 3.4459 | 255  | 0.5654          |
+| 0.1437        | 3.4865 | 258  | 0.5668          |
+| 0.1341        | 3.5270 | 261  | 0.5670          |
+| 0.1122        | 3.5676 | 264  | 0.5659          |
+| 0.0832        | 3.6081 | 267  | 0.5662          |
+| 0.0668        | 3.6486 | 270  | 0.5637          |
+| 0.2023        | 3.6892 | 273  | 0.5662          |
+| 0.104         | 3.7297 | 276  | 0.5640          |
+| 0.1181        | 3.7703 | 279  | 0.5650          |
+| 0.1242        | 3.8108 | 282  | 0.5636          |
+| 0.0874        | 3.8514 | 285  | 0.5644          |
+| 0.1133        | 3.8919 | 288  | 0.5637          |
+| 0.073         | 3.9324 | 291  | 0.5640          |
+| 0.0724        | 3.9730 | 294  | 0.5655          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -20,13 +20,10 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
-    "k_proj",
-    "down_proj",
-    "gate_proj",
-    "v_proj",
-    "up_proj",
-    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "gate_up_proj",
+    "o_proj",
+    "qkv_proj",
+    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:296a557e8ac7fa73beecea698789e37f5d243c1734c4c8e77454c03a84b64de1
-size 35668592

 version https://git-lfs.github.com/spec/v1
+oid sha256:d085116f5270d42fbd3f522f7e5401ba43b2cf120ac015700e4ac0a39d893626
+size 100697728

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3d14206dd3da4e8472af498c9b36118033550bef4ae9b4edb0f99f7664786f84
 size 5432

 version https://git-lfs.github.com/spec/v1
+oid sha256:b38ec32d6537ab3d4e404fffac63b5d9a8c23e446f15fd22f45b1947ba118c42
 size 5432