willtensora
commited on
End of training
Browse files- README.md +8 -8
- adapter_model.bin +1 -1
README.md
CHANGED
@@ -38,12 +38,12 @@ deepspeed: null
|
|
38 |
early_stopping_patience: null
|
39 |
eval_max_new_tokens: 128
|
40 |
eval_table_size: null
|
41 |
-
evals_per_epoch:
|
42 |
flash_attention: false
|
43 |
fp16: null
|
44 |
fsdp: null
|
45 |
fsdp_config: null
|
46 |
-
gradient_accumulation_steps:
|
47 |
gradient_checkpointing: false
|
48 |
group_by_length: false
|
49 |
hub_model_id: willtensora/123e4567-e89b-12d3-a456-426614174000
|
@@ -62,8 +62,8 @@ lora_model_dir: null
|
|
62 |
lora_r: 8
|
63 |
lora_target_linear: true
|
64 |
lr_scheduler: cosine
|
65 |
-
max_steps:
|
66 |
-
micro_batch_size:
|
67 |
mlflow_experiment_name: argilla/databricks-dolly-15k-curated-en
|
68 |
model_type: AutoModelForCausalLM
|
69 |
num_epochs: 1
|
@@ -73,21 +73,21 @@ pad_to_sequence_len: true
|
|
73 |
resume_from_checkpoint: null
|
74 |
s2_attention: null
|
75 |
sample_packing: false
|
76 |
-
saves_per_epoch:
|
77 |
-
sequence_len:
|
78 |
strict: false
|
79 |
tf32: false
|
80 |
tokenizer_type: AutoTokenizer
|
81 |
train_on_inputs: false
|
82 |
trust_remote_code: true
|
83 |
-
val_set_size: 0.
|
84 |
wandb_entity: null
|
85 |
wandb_mode: online
|
86 |
wandb_name: 123e4567-e89b-12d3-a456-426614174000
|
87 |
wandb_project: Gradients-On-Demand
|
88 |
wandb_run: your_name
|
89 |
wandb_runid: 123e4567-e89b-12d3-a456-426614174000
|
90 |
-
warmup_steps:
|
91 |
weight_decay: 0.0
|
92 |
xformers_attention: null
|
93 |
|
|
|
38 |
early_stopping_patience: null
|
39 |
eval_max_new_tokens: 128
|
40 |
eval_table_size: null
|
41 |
+
evals_per_epoch: 1
|
42 |
flash_attention: false
|
43 |
fp16: null
|
44 |
fsdp: null
|
45 |
fsdp_config: null
|
46 |
+
gradient_accumulation_steps: 1
|
47 |
gradient_checkpointing: false
|
48 |
group_by_length: false
|
49 |
hub_model_id: willtensora/123e4567-e89b-12d3-a456-426614174000
|
|
|
62 |
lora_r: 8
|
63 |
lora_target_linear: true
|
64 |
lr_scheduler: cosine
|
65 |
+
max_steps: 2
|
66 |
+
micro_batch_size: 1
|
67 |
mlflow_experiment_name: argilla/databricks-dolly-15k-curated-en
|
68 |
model_type: AutoModelForCausalLM
|
69 |
num_epochs: 1
|
|
|
73 |
resume_from_checkpoint: null
|
74 |
s2_attention: null
|
75 |
sample_packing: false
|
76 |
+
saves_per_epoch: 1
|
77 |
+
sequence_len: 128
|
78 |
strict: false
|
79 |
tf32: false
|
80 |
tokenizer_type: AutoTokenizer
|
81 |
train_on_inputs: false
|
82 |
trust_remote_code: true
|
83 |
+
val_set_size: 0.01
|
84 |
wandb_entity: null
|
85 |
wandb_mode: online
|
86 |
wandb_name: 123e4567-e89b-12d3-a456-426614174000
|
87 |
wandb_project: Gradients-On-Demand
|
88 |
wandb_run: your_name
|
89 |
wandb_runid: 123e4567-e89b-12d3-a456-426614174000
|
90 |
+
warmup_steps: 2
|
91 |
weight_decay: 0.0
|
92 |
xformers_attention: null
|
93 |
|
adapter_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 80115210
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a59373a974866d944d76f095a936779dd70b9ca43d96196d5e05e9668d5584d7
|
3 |
size 80115210
|