lesso commited on
Commit
a7691b4
1 Parent(s): e994667

End of training

Browse files
README.md CHANGED
@@ -22,7 +22,6 @@ adapter: lora
22
  base_model: huggyllama/llama-7b
23
  bf16: false
24
  chat_template: llama3
25
- dataset_prepared_path: null
26
  datasets:
27
  - data_files:
28
  - e159c7177b77ec6f_train_data.json
@@ -42,8 +41,8 @@ deepspeed: null
42
  early_stopping_patience: null
43
  eval_max_new_tokens: 128
44
  eval_table_size: null
45
- evals_per_epoch: 4
46
- flash_attention: true
47
  fp16: true
48
  fsdp: null
49
  fsdp_config: null
@@ -66,7 +65,7 @@ lora_model_dir: null
66
  lora_r: 8
67
  lora_target_linear: true
68
  lr_scheduler: cosine
69
- max_steps: 10
70
  micro_batch_size: 1
71
  mlflow_experiment_name: /tmp/e159c7177b77ec6f_train_data.json
72
  model_type: AutoModelForCausalLM
@@ -77,7 +76,7 @@ pad_to_sequence_len: true
77
  resume_from_checkpoint: null
78
  s2_attention: null
79
  sample_packing: false
80
- saves_per_epoch: 4
81
  sequence_len: 1024
82
  special_tokens:
83
  pad_token: </s>
@@ -93,7 +92,7 @@ wandb_name: 22b05afc-5100-4d8e-849b-39843d793e1c
93
  wandb_project: Gradients-On-Demand
94
  wandb_run: your_name
95
  wandb_runid: 22b05afc-5100-4d8e-849b-39843d793e1c
96
- warmup_steps: 10
97
  weight_decay: 0.0
98
  xformers_attention: null
99
 
@@ -105,7 +104,7 @@ xformers_attention: null
105
 
106
  This model is a fine-tuned version of [huggyllama/llama-7b](https://huggingface.co/huggyllama/llama-7b) on the None dataset.
107
  It achieves the following results on the evaluation set:
108
- - Loss: 0.8663
109
 
110
  ## Model description
111
 
@@ -132,18 +131,14 @@ The following hyperparameters were used during training:
132
  - total_train_batch_size: 4
133
  - optimizer: Use OptimizerNames.ADAMW_HF with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
134
  - lr_scheduler_type: cosine
135
- - lr_scheduler_warmup_steps: 10
136
- - training_steps: 10
137
  - mixed_precision_training: Native AMP
138
 
139
  ### Training results
140
 
141
  | Training Loss | Epoch | Step | Validation Loss |
142
  |:-------------:|:------:|:----:|:---------------:|
143
- | 1.0893 | 0.0001 | 1 | 1.2124 |
144
- | 1.2625 | 0.0003 | 3 | 1.1848 |
145
- | 1.0365 | 0.0006 | 6 | 1.0633 |
146
- | 0.8883 | 0.0010 | 9 | 0.8663 |
147
 
148
 
149
  ### Framework versions
 
22
  base_model: huggyllama/llama-7b
23
  bf16: false
24
  chat_template: llama3
 
25
  datasets:
26
  - data_files:
27
  - e159c7177b77ec6f_train_data.json
 
41
  early_stopping_patience: null
42
  eval_max_new_tokens: 128
43
  eval_table_size: null
44
+ evals_per_epoch: 1
45
+ flash_attention: false
46
  fp16: true
47
  fsdp: null
48
  fsdp_config: null
 
65
  lora_r: 8
66
  lora_target_linear: true
67
  lr_scheduler: cosine
68
+ max_steps: 1000
69
  micro_batch_size: 1
70
  mlflow_experiment_name: /tmp/e159c7177b77ec6f_train_data.json
71
  model_type: AutoModelForCausalLM
 
76
  resume_from_checkpoint: null
77
  s2_attention: null
78
  sample_packing: false
79
+ saves_per_epoch: 1
80
  sequence_len: 1024
81
  special_tokens:
82
  pad_token: </s>
 
92
  wandb_project: Gradients-On-Demand
93
  wandb_run: your_name
94
  wandb_runid: 22b05afc-5100-4d8e-849b-39843d793e1c
95
+ warmup_steps: 0
96
  weight_decay: 0.0
97
  xformers_attention: null
98
 
 
104
 
105
  This model is a fine-tuned version of [huggyllama/llama-7b](https://huggingface.co/huggyllama/llama-7b) on the None dataset.
106
  It achieves the following results on the evaluation set:
107
+ - Loss: 0.3320
108
 
109
  ## Model description
110
 
 
131
  - total_train_batch_size: 4
132
  - optimizer: Use OptimizerNames.ADAMW_HF with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
133
  - lr_scheduler_type: cosine
134
+ - training_steps: 1000
 
135
  - mixed_precision_training: Native AMP
136
 
137
  ### Training results
138
 
139
  | Training Loss | Epoch | Step | Validation Loss |
140
  |:-------------:|:------:|:----:|:---------------:|
141
+ | 0.4271 | 0.1071 | 1000 | 0.3320 |
 
 
 
142
 
143
 
144
  ### Framework versions
adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
 
23
  "q_proj",
24
- "gate_proj",
25
- "down_proj",
26
  "v_proj",
27
- "up_proj",
28
- "o_proj",
29
- "k_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "o_proj",
24
+ "k_proj",
25
+ "up_proj",
26
  "q_proj",
 
 
27
  "v_proj",
28
+ "gate_proj",
29
+ "down_proj"
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd87ab34ae8a609486260fb38d56115626e57fafc1714b0c2827dc2abbe1cb00
3
  size 80115210
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e729e80eac7ef4f85f592626fc8cbef243c2a43888fe92b292843cf966f7e0a0
3
  size 80115210
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:adfbbddb6d36c12289912b5fa1c4a076dc9ec72221af86dd87e0083ff3608563
3
  size 80013120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:719df9ddf670b10e0acd25b3b60a7525f8c19f58481945e574721fa5a6ae2056
3
  size 80013120
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec54d060fa3c10be7a79720000a71081ddc512eda26e7a24166cfc8633b27fc3
3
  size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86b878855dd3a962afd8534d40010f416a8fb1bce5a59608028eb56c6136a568
3
  size 6776