End of training

Browse files

Files changed (10) hide show

README.md +219 -0
adapter_config.json +34 -0
adapter_model.safetensors +3 -0
added_tokens.json +3 -0
runs/Aug04_13-36-40_0d6c6470f9b3/events.out.tfevents.1722778612.0d6c6470f9b3.3512.0 +3 -0
special_tokens_map.json +36 -0
tokenizer.json +0 -0
tokenizer.model +3 -0
tokenizer_config.json +92 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,219 @@

+---
+base_model: codellama/CodeLlama-7b-Instruct-hf
+library_name: peft
+license: llama2
+tags:
+- trl
+- sft
+- generated_from_trainer
+model-index:
+- name: ECS-Codellama-7b-lora-rps-adapter
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# ECS-Codellama-7b-lora-rps-adapter
+This model is a fine-tuned version of [codellama/CodeLlama-7b-Instruct-hf](https://huggingface.co/codellama/CodeLlama-7b-Instruct-hf) on the None dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.2955
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0002
+- train_batch_size: 2
+- eval_batch_size: 2
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_ratio: 0.03
+- num_epochs: 4
+### Training results
+| Training Loss | Epoch  | Step  | Validation Loss |
+|:-------------:|:------:|:-----:|:---------------:|
+| 0.1784        | 2.6210 | 15000 | 0.2849          |
+| 0.2039        | 2.6297 | 15050 | 0.2825          |
+| 0.194         | 2.6385 | 15100 | 0.2842          |
+| 0.2073        | 2.6472 | 15150 | 0.2844          |
+| 0.1818        | 2.6559 | 15200 | 0.2841          |
+| 0.1858        | 2.6647 | 15250 | 0.2837          |
+| 0.191         | 2.6734 | 15300 | 0.2821          |
+| 0.2024        | 2.6822 | 15350 | 0.2814          |
+| 0.1699        | 2.6909 | 15400 | 0.2832          |
+| 0.1782        | 2.6996 | 15450 | 0.2813          |
+| 0.1971        | 2.7084 | 15500 | 0.2818          |
+| 0.1974        | 2.7171 | 15550 | 0.2811          |
+| 0.1867        | 2.7258 | 15600 | 0.2818          |
+| 0.1843        | 2.7346 | 15650 | 0.2836          |
+| 0.192         | 2.7433 | 15700 | 0.2834          |
+| 0.2191        | 2.7521 | 15750 | 0.2800          |
+| 0.1797        | 2.7608 | 15800 | 0.2797          |
+| 0.1871        | 2.7695 | 15850 | 0.2817          |
+| 0.1893        | 2.7783 | 15900 | 0.2817          |
+| 0.1845        | 2.7870 | 15950 | 0.2824          |
+| 0.1954        | 2.7957 | 16000 | 0.2828          |
+| 0.1752        | 2.8045 | 16050 | 0.2824          |
+| 0.213         | 2.8132 | 16100 | 0.2803          |
+| 0.1953        | 2.8219 | 16150 | 0.2818          |
+| 0.1959        | 2.8307 | 16200 | 0.2807          |
+| 0.1904        | 2.8394 | 16250 | 0.2814          |
+| 0.191         | 2.8482 | 16300 | 0.2806          |
+| 0.1783        | 2.8569 | 16350 | 0.2803          |
+| 0.1997        | 2.8656 | 16400 | 0.2802          |
+| 0.2195        | 2.8744 | 16450 | 0.2787          |
+| 0.189         | 2.8831 | 16500 | 0.2800          |
+| 0.1951        | 2.8918 | 16550 | 0.2788          |
+| 0.1985        | 2.9006 | 16600 | 0.2789          |
+| 0.2169        | 2.9093 | 16650 | 0.2785          |
+| 0.195         | 2.9180 | 16700 | 0.2788          |
+| 0.1744        | 2.9268 | 16750 | 0.2800          |
+| 0.1635        | 2.9355 | 16800 | 0.2800          |
+| 0.1877        | 2.9443 | 16850 | 0.2782          |
+| 0.1977        | 2.9530 | 16900 | 0.2770          |
+| 0.1808        | 2.9617 | 16950 | 0.2781          |
+| 0.1824        | 2.9705 | 17000 | 0.2784          |
+| 0.1947        | 2.9792 | 17050 | 0.2781          |
+| 0.1946        | 2.9879 | 17100 | 0.2767          |
+| 0.1742        | 2.9967 | 17150 | 0.2770          |
+| 0.1527        | 3.0054 | 17200 | 0.2886          |
+| 0.1205        | 3.0142 | 17250 | 0.2929          |
+| 0.1261        | 3.0229 | 17300 | 0.2981          |
+| 0.1122        | 3.0316 | 17350 | 0.2997          |
+| 0.1441        | 3.0404 | 17400 | 0.2979          |
+| 0.1202        | 3.0491 | 17450 | 0.3007          |
+| 0.1285        | 3.0578 | 17500 | 0.2983          |
+| 0.149         | 3.0666 | 17550 | 0.3007          |
+| 0.1369        | 3.0753 | 17600 | 0.2968          |
+| 0.1225        | 3.0840 | 17650 | 0.2994          |
+| 0.132         | 3.0928 | 17700 | 0.3007          |
+| 0.1296        | 3.1015 | 17750 | 0.3006          |
+| 0.1207        | 3.1103 | 17800 | 0.3000          |
+| 0.1385        | 3.1190 | 17850 | 0.2981          |
+| 0.1347        | 3.1277 | 17900 | 0.3000          |
+| 0.114         | 3.1365 | 17950 | 0.2994          |
+| 0.1233        | 3.1452 | 18000 | 0.2991          |
+| 0.1284        | 3.1539 | 18050 | 0.2991          |
+| 0.1222        | 3.1627 | 18100 | 0.3005          |
+| 0.1367        | 3.1714 | 18150 | 0.2988          |
+| 0.1308        | 3.1802 | 18200 | 0.2992          |
+| 0.1138        | 3.1889 | 18250 | 0.3001          |
+| 0.1259        | 3.1976 | 18300 | 0.2979          |
+| 0.1383        | 3.2064 | 18350 | 0.2993          |
+| 0.1288        | 3.2151 | 18400 | 0.2989          |
+| 0.1364        | 3.2238 | 18450 | 0.2974          |
+| 0.1232        | 3.2326 | 18500 | 0.2989          |
+| 0.1348        | 3.2413 | 18550 | 0.3012          |
+| 0.1168        | 3.2500 | 18600 | 0.2998          |
+| 0.1342        | 3.2588 | 18650 | 0.3026          |
+| 0.1385        | 3.2675 | 18700 | 0.2979          |
+| 0.1298        | 3.2763 | 18750 | 0.2962          |
+| 0.1373        | 3.2850 | 18800 | 0.2950          |
+| 0.1292        | 3.2937 | 18850 | 0.2986          |
+| 0.1329        | 3.3025 | 18900 | 0.2965          |
+| 0.1324        | 3.3112 | 18950 | 0.3016          |
+| 0.1176        | 3.3199 | 19000 | 0.2991          |
+| 0.1444        | 3.3287 | 19050 | 0.2940          |
+| 0.1395        | 3.3374 | 19100 | 0.2960          |
+| 0.1247        | 3.3461 | 19150 | 0.2975          |
+| 0.1313        | 3.3549 | 19200 | 0.2976          |
+| 0.1299        | 3.3636 | 19250 | 0.2967          |
+| 0.1339        | 3.3724 | 19300 | 0.2969          |
+| 0.128         | 3.3811 | 19350 | 0.2949          |
+| 0.1296        | 3.3898 | 19400 | 0.2978          |
+| 0.1346        | 3.3986 | 19450 | 0.2961          |
+| 0.1388        | 3.4073 | 19500 | 0.2960          |
+| 0.1236        | 3.4160 | 19550 | 0.2951          |
+| 0.1203        | 3.4248 | 19600 | 0.2952          |
+| 0.1161        | 3.4335 | 19650 | 0.2977          |
+| 0.1158        | 3.4423 | 19700 | 0.2955          |
+| 0.1292        | 3.4510 | 19750 | 0.2979          |
+| 0.1224        | 3.4597 | 19800 | 0.2976          |
+| 0.1241        | 3.4685 | 19850 | 0.2979          |
+| 0.1411        | 3.4772 | 19900 | 0.2953          |
+| 0.1337        | 3.4859 | 19950 | 0.2966          |
+| 0.1298        | 3.4947 | 20000 | 0.2964          |
+| 0.1176        | 3.5034 | 20050 | 0.2958          |
+| 0.1175        | 3.5121 | 20100 | 0.2966          |
+| 0.1409        | 3.5209 | 20150 | 0.2952          |
+| 0.1339        | 3.5296 | 20200 | 0.2951          |
+| 0.1348        | 3.5384 | 20250 | 0.2956          |
+| 0.1281        | 3.5471 | 20300 | 0.2956          |
+| 0.1293        | 3.5558 | 20350 | 0.2981          |
+| 0.1257        | 3.5646 | 20400 | 0.2969          |
+| 0.1152        | 3.5733 | 20450 | 0.2955          |
+| 0.1276        | 3.5820 | 20500 | 0.2960          |
+| 0.1366        | 3.5908 | 20550 | 0.2977          |
+| 0.1364        | 3.5995 | 20600 | 0.2982          |
+| 0.134         | 3.6082 | 20650 | 0.2967          |
+| 0.1266        | 3.6170 | 20700 | 0.2965          |
+| 0.1215        | 3.6257 | 20750 | 0.2970          |
+| 0.1253        | 3.6345 | 20800 | 0.2991          |
+| 0.116         | 3.6432 | 20850 | 0.2976          |
+| 0.1255        | 3.6519 | 20900 | 0.2972          |
+| 0.1271        | 3.6607 | 20950 | 0.2969          |
+| 0.1155        | 3.6694 | 21000 | 0.2970          |
+| 0.1223        | 3.6781 | 21050 | 0.2968          |
+| 0.1317        | 3.6869 | 21100 | 0.2956          |
+| 0.1257        | 3.6956 | 21150 | 0.2957          |
+| 0.1262        | 3.7044 | 21200 | 0.2952          |
+| 0.1215        | 3.7131 | 21250 | 0.2957          |
+| 0.1285        | 3.7218 | 21300 | 0.2955          |
+| 0.1264        | 3.7306 | 21350 | 0.2956          |
+| 0.1364        | 3.7393 | 21400 | 0.2967          |
+| 0.1213        | 3.7480 | 21450 | 0.2966          |
+| 0.1316        | 3.7568 | 21500 | 0.2972          |
+| 0.1174        | 3.7655 | 21550 | 0.2991          |
+| 0.1167        | 3.7742 | 21600 | 0.2982          |
+| 0.1274        | 3.7830 | 21650 | 0.2974          |
+| 0.1302        | 3.7917 | 21700 | 0.2960          |
+| 0.118         | 3.8005 | 21750 | 0.2958          |
+| 0.1264        | 3.8092 | 21800 | 0.2977          |
+| 0.1115        | 3.8179 | 21850 | 0.2971          |
+| 0.1128        | 3.8267 | 21900 | 0.2973          |
+| 0.1186        | 3.8354 | 21950 | 0.2965          |
+| 0.1173        | 3.8441 | 22000 | 0.2965          |
+| 0.1293        | 3.8529 | 22050 | 0.2963          |
+| 0.1226        | 3.8616 | 22100 | 0.2964          |
+| 0.1173        | 3.8703 | 22150 | 0.2964          |
+| 0.1343        | 3.8791 | 22200 | 0.2966          |
+| 0.1365        | 3.8878 | 22250 | 0.2962          |
+| 0.1187        | 3.8966 | 22300 | 0.2963          |
+| 0.1132        | 3.9053 | 22350 | 0.2963          |
+| 0.1328        | 3.9140 | 22400 | 0.2961          |
+| 0.1394        | 3.9228 | 22450 | 0.2956          |
+| 0.1312        | 3.9315 | 22500 | 0.2959          |
+| 0.1256        | 3.9402 | 22550 | 0.2958          |
+| 0.1272        | 3.9490 | 22600 | 0.2955          |
+| 0.1128        | 3.9577 | 22650 | 0.2954          |
+| 0.1193        | 3.9665 | 22700 | 0.2955          |
+| 0.1169        | 3.9752 | 22750 | 0.2954          |
+| 0.1308        | 3.9839 | 22800 | 0.2954          |
+| 0.1185        | 3.9927 | 22850 | 0.2955          |
+### Framework versions
+- PEFT 0.12.0
+- Transformers 4.43.3
+- Pytorch 2.3.1+cu121
+- Datasets 2.20.0
+- Tokenizers 0.19.1

adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "codellama/CodeLlama-7b-Instruct-hf",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 8,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 128,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "down_proj",
+    "v_proj",
+    "k_proj",
+    "o_proj",
+    "up_proj",
+    "gate_proj",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": true
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2712c5b37aaa5d7e34ec99b45db14dd31473a2dc10e4dfbea3c18dba19b557a4
+size 2332095256

added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<PAD>": 32016
+}

runs/Aug04_13-36-40_0d6c6470f9b3/events.out.tfevents.1722778612.0d6c6470f9b3.3512.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b5767ded8cd6ecef1437227db09fc3f9a3f15337f98c3d4c5b4f728eb8f029f1
+size 146631

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "additional_special_tokens": [
+    "▁<PRE>",
+    "▁<MID>",
+    "▁<SUF>",
+    "▁<EOT>"
+  ],
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<PAD>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6
+size 500058

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,92 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32007": {
+      "content": "▁<PRE>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32008": {
+      "content": "▁<SUF>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32009": {
+      "content": "▁<MID>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32010": {
+      "content": "▁<EOT>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32016": {
+      "content": "<PAD>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "▁<PRE>",
+    "▁<MID>",
+    "▁<SUF>",
+    "▁<EOT>"
+  ],
+  "bos_token": "<s>",
+  "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content | trim + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' '  + content | trim + ' ' + eos_token }}{% endif %}{% endfor %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "eot_token": "▁<EOT>",
+  "fill_token": "<FILL_ME>",
+  "legacy": null,
+  "middle_token": "▁<MID>",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<PAD>",
+  "prefix_token": "▁<PRE>",
+  "sp_model_kwargs": {},
+  "suffix_token": "▁<SUF>",
+  "tokenizer_class": "CodeLlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e7c904905085e0075baf404f3edf4d53d0a090289d4108a987eed95d4b997698
+size 5496