willtensora
commited on
End of training
Browse files- README.md +23 -24
- adapter_model.bin +2 -2
README.md
CHANGED
@@ -19,7 +19,7 @@ axolotl version: `0.4.1`
|
|
19 |
```yaml
|
20 |
adapter: lora
|
21 |
base_model: peft-internal-testing/tiny-dummy-qwen2
|
22 |
-
bf16:
|
23 |
chat_template: llama3
|
24 |
dataset_prepared_path: null
|
25 |
datasets:
|
@@ -36,34 +36,34 @@ datasets:
|
|
36 |
debug: null
|
37 |
deepspeed: null
|
38 |
early_stopping_patience: null
|
39 |
-
eval_max_new_tokens:
|
40 |
eval_table_size: null
|
41 |
-
evals_per_epoch:
|
42 |
flash_attention: false
|
43 |
-
fp16:
|
44 |
fsdp: null
|
45 |
fsdp_config: null
|
46 |
gradient_accumulation_steps: 1
|
47 |
gradient_checkpointing: false
|
48 |
-
group_by_length:
|
49 |
hub_model_id: willtensora/123e4567-e89b-12d3-a456-426614174000
|
50 |
hub_repo: null
|
51 |
hub_strategy: checkpoint
|
52 |
hub_token: null
|
53 |
-
learning_rate: 0.
|
54 |
load_in_4bit: false
|
55 |
load_in_8bit: false
|
56 |
local_rank: null
|
57 |
logging_steps: 1
|
58 |
-
lora_alpha:
|
59 |
-
lora_dropout: 0.
|
60 |
lora_fan_in_fan_out: null
|
61 |
lora_model_dir: null
|
62 |
-
lora_r:
|
63 |
lora_target_linear: true
|
64 |
-
lr_scheduler:
|
65 |
max_steps: 1
|
66 |
-
micro_batch_size:
|
67 |
mlflow_experiment_name: argilla/databricks-dolly-15k-curated-en
|
68 |
model_type: AutoModelForCausalLM
|
69 |
num_epochs: 1
|
@@ -73,21 +73,21 @@ pad_to_sequence_len: true
|
|
73 |
resume_from_checkpoint: null
|
74 |
s2_attention: null
|
75 |
sample_packing: false
|
76 |
-
saves_per_epoch:
|
77 |
-
sequence_len:
|
78 |
strict: false
|
79 |
-
tf32:
|
80 |
tokenizer_type: AutoTokenizer
|
81 |
train_on_inputs: false
|
82 |
trust_remote_code: true
|
83 |
-
val_set_size: 0.
|
84 |
wandb_entity: null
|
85 |
-
wandb_mode:
|
86 |
wandb_name: 123e4567-e89b-12d3-a456-426614174000
|
87 |
wandb_project: Gradients-On-Demand
|
88 |
wandb_run: your_name
|
89 |
wandb_runid: 123e4567-e89b-12d3-a456-426614174000
|
90 |
-
warmup_steps:
|
91 |
weight_decay: 0.0
|
92 |
xformers_attention: null
|
93 |
|
@@ -99,7 +99,7 @@ xformers_attention: null
|
|
99 |
|
100 |
This model is a fine-tuned version of [peft-internal-testing/tiny-dummy-qwen2](https://huggingface.co/peft-internal-testing/tiny-dummy-qwen2) on the None dataset.
|
101 |
It achieves the following results on the evaluation set:
|
102 |
-
- Loss: 11.
|
103 |
|
104 |
## Model description
|
105 |
|
@@ -118,20 +118,19 @@ More information needed
|
|
118 |
### Training hyperparameters
|
119 |
|
120 |
The following hyperparameters were used during training:
|
121 |
-
- learning_rate: 0.
|
122 |
-
- train_batch_size:
|
123 |
-
- eval_batch_size:
|
124 |
- seed: 42
|
125 |
- optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
126 |
-
- lr_scheduler_type:
|
127 |
-
- lr_scheduler_warmup_steps: 2
|
128 |
- training_steps: 1
|
129 |
|
130 |
### Training results
|
131 |
|
132 |
| Training Loss | Epoch | Step | Validation Loss |
|
133 |
|:-------------:|:------:|:----:|:---------------:|
|
134 |
-
| 11.
|
135 |
|
136 |
|
137 |
### Framework versions
|
|
|
19 |
```yaml
|
20 |
adapter: lora
|
21 |
base_model: peft-internal-testing/tiny-dummy-qwen2
|
22 |
+
bf16: true
|
23 |
chat_template: llama3
|
24 |
dataset_prepared_path: null
|
25 |
datasets:
|
|
|
36 |
debug: null
|
37 |
deepspeed: null
|
38 |
early_stopping_patience: null
|
39 |
+
eval_max_new_tokens: 64
|
40 |
eval_table_size: null
|
41 |
+
evals_per_epoch: 0
|
42 |
flash_attention: false
|
43 |
+
fp16: false
|
44 |
fsdp: null
|
45 |
fsdp_config: null
|
46 |
gradient_accumulation_steps: 1
|
47 |
gradient_checkpointing: false
|
48 |
+
group_by_length: true
|
49 |
hub_model_id: willtensora/123e4567-e89b-12d3-a456-426614174000
|
50 |
hub_repo: null
|
51 |
hub_strategy: checkpoint
|
52 |
hub_token: null
|
53 |
+
learning_rate: 0.001
|
54 |
load_in_4bit: false
|
55 |
load_in_8bit: false
|
56 |
local_rank: null
|
57 |
logging_steps: 1
|
58 |
+
lora_alpha: 8
|
59 |
+
lora_dropout: 0.1
|
60 |
lora_fan_in_fan_out: null
|
61 |
lora_model_dir: null
|
62 |
+
lora_r: 4
|
63 |
lora_target_linear: true
|
64 |
+
lr_scheduler: linear
|
65 |
max_steps: 1
|
66 |
+
micro_batch_size: 4
|
67 |
mlflow_experiment_name: argilla/databricks-dolly-15k-curated-en
|
68 |
model_type: AutoModelForCausalLM
|
69 |
num_epochs: 1
|
|
|
73 |
resume_from_checkpoint: null
|
74 |
s2_attention: null
|
75 |
sample_packing: false
|
76 |
+
saves_per_epoch: 0
|
77 |
+
sequence_len: 64
|
78 |
strict: false
|
79 |
+
tf32: true
|
80 |
tokenizer_type: AutoTokenizer
|
81 |
train_on_inputs: false
|
82 |
trust_remote_code: true
|
83 |
+
val_set_size: 0.001
|
84 |
wandb_entity: null
|
85 |
+
wandb_mode: disabled
|
86 |
wandb_name: 123e4567-e89b-12d3-a456-426614174000
|
87 |
wandb_project: Gradients-On-Demand
|
88 |
wandb_run: your_name
|
89 |
wandb_runid: 123e4567-e89b-12d3-a456-426614174000
|
90 |
+
warmup_steps: 0
|
91 |
weight_decay: 0.0
|
92 |
xformers_attention: null
|
93 |
|
|
|
99 |
|
100 |
This model is a fine-tuned version of [peft-internal-testing/tiny-dummy-qwen2](https://huggingface.co/peft-internal-testing/tiny-dummy-qwen2) on the None dataset.
|
101 |
It achieves the following results on the evaluation set:
|
102 |
+
- Loss: 11.9339
|
103 |
|
104 |
## Model description
|
105 |
|
|
|
118 |
### Training hyperparameters
|
119 |
|
120 |
The following hyperparameters were used during training:
|
121 |
+
- learning_rate: 0.001
|
122 |
+
- train_batch_size: 4
|
123 |
+
- eval_batch_size: 4
|
124 |
- seed: 42
|
125 |
- optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
126 |
+
- lr_scheduler_type: linear
|
|
|
127 |
- training_steps: 1
|
128 |
|
129 |
### Training results
|
130 |
|
131 |
| Training Loss | Epoch | Step | Validation Loss |
|
132 |
|:-------------:|:------:|:----:|:---------------:|
|
133 |
+
| 11.9308 | 0.0003 | 1 | 11.9339 |
|
134 |
|
135 |
|
136 |
### Framework versions
|
adapter_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:72cab532378323b0cc53540e5a55a19e97e56b2c647c403587d3339078717c5e
|
3 |
+
size 15746
|