pbaoo2705 commited on
Commit
eb86aa5
1 Parent(s): c5b2fb5

QLoRA applied #2

Browse files
README.md CHANGED
@@ -2,6 +2,8 @@
2
  base_model: ybelkada/falcon-7b-sharded-bf16
3
  tags:
4
  - generated_from_trainer
 
 
5
  model-index:
6
  - name: falcon-7b-sharded-2
7
  results: []
@@ -13,6 +15,9 @@ should probably proofread and complete it, then remove this comment. -->
13
  # falcon-7b-sharded-2
14
 
15
  This model is a fine-tuned version of [ybelkada/falcon-7b-sharded-bf16](https://huggingface.co/ybelkada/falcon-7b-sharded-bf16) on an unknown dataset.
 
 
 
16
 
17
  ## Model description
18
 
@@ -32,11 +37,9 @@ More information needed
32
 
33
  The following hyperparameters were used during training:
34
  - learning_rate: 0.0002
35
- - train_batch_size: 4
36
  - eval_batch_size: 1
37
  - seed: 42
38
- - gradient_accumulation_steps: 4
39
- - total_train_batch_size: 16
40
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
41
  - lr_scheduler_type: constant
42
  - lr_scheduler_warmup_ratio: 0.03
@@ -44,6 +47,10 @@ The following hyperparameters were used during training:
44
 
45
  ### Training results
46
 
 
 
 
 
47
 
48
 
49
  ### Framework versions
@@ -51,4 +58,4 @@ The following hyperparameters were used during training:
51
  - Transformers 4.34.0
52
  - Pytorch 2.0.1+cu118
53
  - Datasets 2.14.5
54
- - Tokenizers 0.14.0
 
2
  base_model: ybelkada/falcon-7b-sharded-bf16
3
  tags:
4
  - generated_from_trainer
5
+ metrics:
6
+ - f1
7
  model-index:
8
  - name: falcon-7b-sharded-2
9
  results: []
 
15
  # falcon-7b-sharded-2
16
 
17
  This model is a fine-tuned version of [ybelkada/falcon-7b-sharded-bf16](https://huggingface.co/ybelkada/falcon-7b-sharded-bf16) on an unknown dataset.
18
+ It achieves the following results on the evaluation set:
19
+ - Loss: nan
20
+ - F1: 0.0337
21
 
22
  ## Model description
23
 
 
37
 
38
  The following hyperparameters were used during training:
39
  - learning_rate: 0.0002
40
+ - train_batch_size: 2
41
  - eval_batch_size: 1
42
  - seed: 42
 
 
43
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
  - lr_scheduler_type: constant
45
  - lr_scheduler_warmup_ratio: 0.03
 
47
 
48
  ### Training results
49
 
50
+ | Training Loss | Epoch | Step | Validation Loss | F1 |
51
+ |:-------------:|:-----:|:----:|:---------------:|:------:|
52
+ | 7.6119 | 1.0 | 442 | nan | 0.0337 |
53
+ | 6.8711 | 1.13 | 500 | nan | 0.0337 |
54
 
55
 
56
  ### Framework versions
 
58
  - Transformers 4.34.0
59
  - Pytorch 2.0.1+cu118
60
  - Datasets 2.14.5
61
+ - Tokenizers 0.14.1
adapter_config.json CHANGED
@@ -3,8 +3,8 @@
3
  "base_model_name_or_path": "ybelkada/falcon-7b-sharded-bf16",
4
  "fan_in_fan_out": false,
5
  "feedforward_modules": [
6
- "dense_h_to_4h",
7
- "dense_4h_to_h"
8
  ],
9
  "inference_mode": true,
10
  "init_ia3_weights": true,
@@ -14,8 +14,8 @@
14
  "target_modules": [
15
  "query_key_value",
16
  "dense",
17
- "dense_h_to_4h",
18
- "dense_4h_to_h"
19
  ],
20
- "task_type": "CAUSAL_LM"
21
  }
 
3
  "base_model_name_or_path": "ybelkada/falcon-7b-sharded-bf16",
4
  "fan_in_fan_out": false,
5
  "feedforward_modules": [
6
+ "dense_4h_to_h",
7
+ "dense_h_to_4h"
8
  ],
9
  "inference_mode": true,
10
  "init_ia3_weights": true,
 
14
  "target_modules": [
15
  "query_key_value",
16
  "dense",
17
+ "dense_4h_to_h",
18
+ "dense_h_to_4h"
19
  ],
20
+ "task_type": "QUESTION_ANSWERING"
21
  }
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ffe95f8f7e1cefccd3afc34d2fa1a2ef11c4423c6656dde21eb635a3af98429d
3
- size 4133325
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20c2c7dd065f99eab629a983caa759bb943b48e1e0572a30aee8f5332a9a10bd
3
+ size 4170325
tokenizer.json CHANGED
@@ -2,9 +2,9 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 2048,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
  },
9
  "padding": null,
10
  "added_tokens": [
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 384,
6
+ "strategy": "OnlySecond",
7
+ "stride": 128
8
  },
9
  "padding": null,
10
  "added_tokens": [
tokenizer_config.json CHANGED
@@ -113,11 +113,10 @@
113
  ],
114
  "clean_up_tokenization_spaces": true,
115
  "eos_token": "<|endoftext|>",
116
- "max_length": 512,
 
 
 
117
  "model_max_length": 2048,
118
- "pad_token": "<|endoftext|>",
119
- "stride": 0,
120
- "tokenizer_class": "PreTrainedTokenizerFast",
121
- "truncation_side": "right",
122
- "truncation_strategy": "longest_first"
123
  }
 
113
  ],
114
  "clean_up_tokenization_spaces": true,
115
  "eos_token": "<|endoftext|>",
116
+ "model_input_names": [
117
+ "input_ids",
118
+ "attention_mask"
119
+ ],
120
  "model_max_length": 2048,
121
+ "tokenizer_class": "PreTrainedTokenizerFast"
 
 
 
 
122
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:939d96f0ae01a0b7f2b8336b1421e96acd46d09cd6f7390adcd9fadae6c6ac6a
3
  size 4091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3029d2fc1e20eadaf5b42a829fedb51d99fc527bbdcf3e8bd5112f5bb38a3e62
3
  size 4091