willtensora
/

123e4567-e89b-12d3-a456-426614174000

Generated from Trainer

Model card Files Files and versions Community

willtensora commited on 10 days ago

Commit

23d48b4

·

verified ·

1 Parent(s): d47019b

End of training

Files changed (2) hide show

README.md +8 -8
adapter_model.bin +1 -1

README.md CHANGED Viewed

@@ -38,12 +38,12 @@ deepspeed: null
 early_stopping_patience: null
 eval_max_new_tokens: 128
 eval_table_size: null
-evals_per_epoch: 4
 flash_attention: false
 fp16: null
 fsdp: null
 fsdp_config: null
-gradient_accumulation_steps: 4
 gradient_checkpointing: false
 group_by_length: false
 hub_model_id: willtensora/123e4567-e89b-12d3-a456-426614174000
@@ -62,8 +62,8 @@ lora_model_dir: null
 lora_r: 8
 lora_target_linear: true
 lr_scheduler: cosine
-max_steps: 10
-micro_batch_size: 2
 mlflow_experiment_name: argilla/databricks-dolly-15k-curated-en
 model_type: AutoModelForCausalLM
 num_epochs: 1
@@ -73,21 +73,21 @@ pad_to_sequence_len: true
 resume_from_checkpoint: null
 s2_attention: null
 sample_packing: false
-saves_per_epoch: 4
-sequence_len: 512
 strict: false
 tf32: false
 tokenizer_type: AutoTokenizer
 train_on_inputs: false
 trust_remote_code: true
-val_set_size: 0.05
 wandb_entity: null
 wandb_mode: online
 wandb_name: 123e4567-e89b-12d3-a456-426614174000
 wandb_project: Gradients-On-Demand
 wandb_run: your_name
 wandb_runid: 123e4567-e89b-12d3-a456-426614174000
-warmup_steps: 10
 weight_decay: 0.0
 xformers_attention: null

 early_stopping_patience: null
 eval_max_new_tokens: 128
 eval_table_size: null
+evals_per_epoch: 1
 flash_attention: false
 fp16: null
 fsdp: null
 fsdp_config: null
+gradient_accumulation_steps: 1
 gradient_checkpointing: false
 group_by_length: false
 hub_model_id: willtensora/123e4567-e89b-12d3-a456-426614174000
 lora_r: 8
 lora_target_linear: true
 lr_scheduler: cosine
+max_steps: 2
+micro_batch_size: 1
 mlflow_experiment_name: argilla/databricks-dolly-15k-curated-en
 model_type: AutoModelForCausalLM
 num_epochs: 1
 resume_from_checkpoint: null
 s2_attention: null
 sample_packing: false
+saves_per_epoch: 1
+sequence_len: 128
 strict: false
 tf32: false
 tokenizer_type: AutoTokenizer
 train_on_inputs: false
 trust_remote_code: true
+val_set_size: 0.01
 wandb_entity: null
 wandb_mode: online
 wandb_name: 123e4567-e89b-12d3-a456-426614174000
 wandb_project: Gradients-On-Demand
 wandb_run: your_name
 wandb_runid: 123e4567-e89b-12d3-a456-426614174000
+warmup_steps: 2
 weight_decay: 0.0
 xformers_attention: null

adapter_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:303537388eb66b48b782b3fed48631e5410960672a9aaacfbd58653b6b7510df
 size 80115210

 version https://git-lfs.github.com/spec/v1
+oid sha256:a59373a974866d944d76f095a936779dd70b9ca43d96196d5e05e9668d5584d7
 size 80115210