andreaskoepf commited on
Commit
fa3b339
1 Parent(s): f9d725d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +87 -0
README.md CHANGED
@@ -1,3 +1,90 @@
1
  ---
2
  license: apache-2.0
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
  ---
4
+ wandb: https://wandb.ai/open-assistant/supervised-finetuning/runs/770a0t41 (exported at 4000 steps)
5
+
6
+ data:
7
+
8
+ ```
9
+ reference-data:
10
+ datasets:
11
+ - oasst_export:
12
+ lang: "bg,ca,cs,da,de,en,es,fr,hr,hu,it,nl,pl,pt,ro,ru,sl,sr,sv,uk"
13
+ input_file_path: 2023-03-25_oasst_research_ready_synth_labels.jsonl.gz
14
+ val_split: 0.05
15
+ - alpaca
16
+ sort_by_length: false
17
+ use_custom_sampler: false
18
+ ```
19
+
20
+
21
+ pythia:
22
+ ```
23
+ reference-pythia-12b:
24
+ dtype: fp16
25
+ log_dir: "pythia_log_12b"
26
+ learning_rate: 6e-6
27
+ model_name: EleutherAI/pythia-12b-deduped
28
+ output_dir: pythia_model_12b
29
+ weight_decay: 0.0
30
+ max_length: 2048
31
+ warmup_steps: 100
32
+ gradient_checkpointing: true
33
+ gradient_accumulation_steps: 2
34
+ per_device_train_batch_size: 4
35
+ per_device_eval_batch_size: 4
36
+ eval_steps: 100
37
+ save_steps: 1000
38
+ num_train_epochs: 8
39
+ save_total_limit: 4
40
+ ```
41
+
42
+ zero config:
43
+ ```
44
+ {
45
+ "fp16": {
46
+ "enabled": "auto",
47
+ "loss_scale": 0,
48
+ "loss_scale_window": 1000,
49
+ "initial_scale_power": 16,
50
+ "hysteresis": 2,
51
+ "min_loss_scale": 1
52
+ },
53
+ "bf16": {
54
+ "enabled": "auto"
55
+ },
56
+ "optimizer": {
57
+ "type": "AdamW",
58
+ "params": {
59
+ "lr": "auto",
60
+ "betas": "auto",
61
+ "eps": "auto",
62
+ "weight_decay": "auto"
63
+ }
64
+ },
65
+ "scheduler": {
66
+ "type": "WarmupDecayLR",
67
+ "params": {
68
+ "warmup_min_lr": "auto",
69
+ "warmup_max_lr": "auto",
70
+ "warmup_num_steps": "auto",
71
+ "total_num_steps": "auto"
72
+ }
73
+ },
74
+ "zero_optimization": {
75
+ "stage": 2,
76
+ "allgather_partitions": true,
77
+ "allgather_bucket_size": 1e9,
78
+ "overlap_comm": false,
79
+ "reduce_scatter": true,
80
+ "reduce_bucket_size": 1e9,
81
+ "contiguous_gradients": true
82
+ },
83
+ "gradient_accumulation_steps": "auto",
84
+ "gradient_clipping": "auto",
85
+ "steps_per_print": 2000,
86
+ "train_batch_size": "auto",
87
+ "train_micro_batch_size_per_gpu": "auto",
88
+ "wall_clock_breakdown": false
89
+ }
90
+ ```