falcon-7b-text-to-base

Browse files

Files changed (4) hide show

README.md +160 -0
adapter_config.json +28 -0
adapter_model.safetensors +3 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,160 @@

+---
+license: apache-2.0
+library_name: peft
+tags:
+- generated_from_trainer
+base_model: tiiuae/falcon-7b
+model-index:
+- name: chkpts
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# chkpts
+This model is a fine-tuned version of [tiiuae/falcon-7b](https://huggingface.co/tiiuae/falcon-7b) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.1903
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0003
+- train_batch_size: 16
+- eval_batch_size: 16
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 500
+- num_epochs: 100
+- mixed_precision_training: Native AMP
+### Training results
+| Training Loss | Epoch | Step  | Validation Loss |
+|:-------------:|:-----:|:-----:|:---------------:|
+| 0.6257        | 1.0   | 489   | 0.5854          |
+| 0.3832        | 2.0   | 978   | 0.3878          |
+| 0.3006        | 3.0   | 1467  | 0.3005          |
+| 0.2695        | 4.0   | 1956  | 0.2657          |
+| 0.246         | 5.0   | 2445  | 0.2465          |
+| 0.2243        | 6.0   | 2934  | 0.2336          |
+| 0.2258        | 7.0   | 3423  | 0.2200          |
+| 0.2259        | 8.0   | 3912  | 0.2239          |
+| 0.2189        | 9.0   | 4401  | 0.2181          |
+| 0.216         | 10.0  | 4890  | 0.2179          |
+| 0.2158        | 11.0  | 5379  | 0.2148          |
+| 0.2118        | 12.0  | 5868  | 0.2139          |
+| 0.2097        | 13.0  | 6357  | 0.2104          |
+| 0.2053        | 14.0  | 6846  | 0.2075          |
+| 0.2052        | 15.0  | 7335  | 0.2136          |
+| 0.2063        | 16.0  | 7824  | 0.2079          |
+| 0.2132        | 17.0  | 8313  | 0.2047          |
+| 0.2081        | 18.0  | 8802  | 0.2046          |
+| 0.2131        | 19.0  | 9291  | 0.2089          |
+| 0.2046        | 20.0  | 9780  | 0.2067          |
+| 0.2034        | 21.0  | 10269 | 0.2071          |
+| 0.1982        | 22.0  | 10758 | 0.2028          |
+| 0.203         | 23.0  | 11247 | 0.2005          |
+| 0.2046        | 24.0  | 11736 | 0.2056          |
+| 0.2049        | 25.0  | 12225 | 0.2033          |
+| 0.2004        | 26.0  | 12714 | 0.2020          |
+| 0.1998        | 27.0  | 13203 | 0.2015          |
+| 0.2027        | 28.0  | 13692 | 0.1996          |
+| 0.1927        | 29.0  | 14181 | 0.2013          |
+| 0.1976        | 30.0  | 14670 | 0.2004          |
+| 0.204         | 31.0  | 15159 | 0.1974          |
+| 0.1965        | 32.0  | 15648 | 0.1997          |
+| 0.1977        | 33.0  | 16137 | 0.1974          |
+| 0.1932        | 34.0  | 16626 | 0.1967          |
+| 0.1946        | 35.0  | 17115 | 0.1989          |
+| 0.1989        | 36.0  | 17604 | 0.1976          |
+| 0.199         | 37.0  | 18093 | 0.1975          |
+| 0.1938        | 38.0  | 18582 | 0.1987          |
+| 0.1982        | 39.0  | 19071 | 0.1973          |
+| 0.2016        | 40.0  | 19560 | 0.1973          |
+| 0.1971        | 41.0  | 20049 | 0.1985          |
+| 0.194         | 42.0  | 20538 | 0.1975          |
+| 0.1968        | 43.0  | 21027 | 0.1973          |
+| 0.196         | 44.0  | 21516 | 0.1965          |
+| 0.1939        | 45.0  | 22005 | 0.1965          |
+| 0.1949        | 46.0  | 22494 | 0.1957          |
+| 0.2005        | 47.0  | 22983 | 0.1965          |
+| 0.1961        | 48.0  | 23472 | 0.1948          |
+| 0.1894        | 49.0  | 23961 | 0.1949          |
+| 0.1957        | 50.0  | 24450 | 0.1960          |
+| 0.1986        | 51.0  | 24939 | 0.1969          |
+| 0.1895        | 52.0  | 25428 | 0.1947          |
+| 0.1906        | 53.0  | 25917 | 0.1945          |
+| 0.1986        | 54.0  | 26406 | 0.1939          |
+| 0.192         | 55.0  | 26895 | 0.1926          |
+| 0.1857        | 56.0  | 27384 | 0.1942          |
+| 0.1885        | 57.0  | 27873 | 0.1946          |
+| 0.1923        | 58.0  | 28362 | 0.1947          |
+| 0.1898        | 59.0  | 28851 | 0.1938          |
+| 0.2003        | 60.0  | 29340 | 0.1940          |
+| 0.1934        | 61.0  | 29829 | 0.1931          |
+| 0.1946        | 62.0  | 30318 | 0.1928          |
+| 0.205         | 63.0  | 30807 | 0.1928          |
+| 0.1951        | 64.0  | 31296 | 0.1940          |
+| 0.1926        | 65.0  | 31785 | 0.1932          |
+| 0.1986        | 66.0  | 32274 | 0.1930          |
+| 0.1918        | 67.0  | 32763 | 0.1921          |
+| 0.1893        | 68.0  | 33252 | 0.1922          |
+| 0.1879        | 69.0  | 33741 | 0.1928          |
+| 0.2012        | 70.0  | 34230 | 0.1915          |
+| 0.1934        | 71.0  | 34719 | 0.1922          |
+| 0.1915        | 72.0  | 35208 | 0.1929          |
+| 0.1967        | 73.0  | 35697 | 0.1920          |
+| 0.19          | 74.0  | 36186 | 0.1924          |
+| 0.2008        | 75.0  | 36675 | 0.1917          |
+| 0.1953        | 76.0  | 37164 | 0.1912          |
+| 0.1948        | 77.0  | 37653 | 0.1926          |
+| 0.1898        | 78.0  | 38142 | 0.1919          |
+| 0.1909        | 79.0  | 38631 | 0.1913          |
+| 0.1895        | 80.0  | 39120 | 0.1917          |
+| 0.1963        | 81.0  | 39609 | 0.1911          |
+| 0.1946        | 82.0  | 40098 | 0.1912          |
+| 0.1915        | 83.0  | 40587 | 0.1916          |
+| 0.1858        | 84.0  | 41076 | 0.1914          |
+| 0.1957        | 85.0  | 41565 | 0.1917          |
+| 0.194         | 86.0  | 42054 | 0.1907          |
+| 0.1832        | 87.0  | 42543 | 0.1913          |
+| 0.1865        | 88.0  | 43032 | 0.1912          |
+| 0.1886        | 89.0  | 43521 | 0.1913          |
+| 0.1846        | 90.0  | 44010 | 0.1906          |
+| 0.1896        | 91.0  | 44499 | 0.1905          |
+| 0.1847        | 92.0  | 44988 | 0.1905          |
+| 0.1843        | 93.0  | 45477 | 0.1907          |
+| 0.1847        | 94.0  | 45966 | 0.1909          |
+| 0.1925        | 95.0  | 46455 | 0.1905          |
+| 0.1888        | 96.0  | 46944 | 0.1900          |
+| 0.1808        | 97.0  | 47433 | 0.1904          |
+| 0.1872        | 98.0  | 47922 | 0.1903          |
+| 0.1849        | 99.0  | 48411 | 0.1902          |
+| 0.1923        | 100.0 | 48900 | 0.1903          |
+### Framework versions
+- PEFT 0.10.0
+- Transformers 4.40.0.dev0
+- Pytorch 2.2.2+cu121
+- Datasets 2.18.0
+- Tokenizers 0.15.2

adapter_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "tiiuae/falcon-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ebc1c395e1d29237592fbbffafe0e437c1fdbf2fe6b71314a01ee93ebb0e0315
+size 18883912

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0de3b39016ec2b707a0aa16e78f1699cd94a59ee0747ccb5ae0e6fc21310c1da
+size 5048