Nondzu commited on
Commit
5f42007
1 Parent(s): 7f535a6

Upload zephyr-beta-pl-02.yml

Browse files
Files changed (1) hide show
  1. zephyr-beta-pl-02.yml +71 -0
zephyr-beta-pl-02.yml ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ base_model: HuggingFaceH4/zephyr-7b-beta
3
+ # model_type: MistralForCausalLM
4
+ tokenizer_type: LlamaTokenizer
5
+ is_mistral_derived_model: true
6
+
7
+ load_in_8bit: false
8
+ load_in_4bit: true
9
+ strict: false
10
+
11
+ datasets:
12
+ - path: klima7/polish-prose
13
+ type: completion
14
+ - path: Lajonbot/alpaca-dolly-chrisociepa-instruction-only-polish
15
+ type: alpaca
16
+
17
+ dataset_prepared_path:
18
+ val_set_size: 0.01
19
+ output_dir: ./nondzu/Mistral-7B-codealpaca-test17
20
+ adapter: qlora
21
+ lora_model_dir:
22
+ # 16384 8192 4096 2048
23
+ sequence_len: 8192
24
+ sample_packing: true
25
+ pad_to_sequence_len: true
26
+ lora_r: 32
27
+ lora_alpha: 16
28
+ lora_dropout: 0.05
29
+ lora_target_modules:
30
+ lora_target_linear: true
31
+ lora_fan_in_fan_out:
32
+
33
+ wandb_project: mistral-code
34
+ wandb_entity:
35
+ wandb_watch:
36
+ wandb_run_id:
37
+ wandb_log_model:
38
+
39
+ gradient_accumulation_steps: 1
40
+ micro_batch_size: 2
41
+ num_epochs: 8
42
+ optimizer: paged_adamw_32bit
43
+ lr_scheduler: cosine
44
+ learning_rate: 0.0002
45
+
46
+ train_on_inputs: false
47
+ group_by_length: false
48
+ bf16: true
49
+ fp16: false
50
+ tf32: false
51
+ gradient_checkpointing: true
52
+ early_stopping_patience:
53
+ resume_from_checkpoint:
54
+ local_rank:
55
+ logging_steps: 1
56
+ xformers_attention:
57
+ flash_attention: true
58
+ main_process_port: 0
59
+ warmup_steps: 10
60
+ eval_steps: 20
61
+ save_steps:
62
+ debug:
63
+ # deepspeed:
64
+ deepspeed: deepspeed/zero2.json
65
+ weight_decay: 0.0
66
+ fsdp:
67
+ fsdp_config:
68
+ special_tokens:
69
+ bos_token: "<s>"
70
+ eos_token: "</s>"
71
+ unk_token: "<unk>"