Create README.md
Browse files
README.md
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
|
3 |
+
Training log: https://wandb.ai/toanbku/reward-model/runs/2ekuy6lg/overview
|
4 |
+
|
5 |
+
```
|
6 |
+
deepspeed --include=localhost:0 --master_port 61000 trainer_rm.py --config \
|
7 |
+
defaults_rm oasst-rm-2.1-pythia-1.4b \
|
8 |
+
--cache_dir /home/ubuntu/OA/model/model_training/.cache \
|
9 |
+
--per_device_eval_batch_size 1 --per_device_train_batch_size 1 \
|
10 |
+
--wandb_entity toanbku --deepspeed
|
11 |
+
```
|
12 |
+
|
13 |
+
|
14 |
+
```
|
15 |
+
oasst-rm-2.1-pythia-1.4b:
|
16 |
+
is_reward_model: true
|
17 |
+
pooling: last
|
18 |
+
datasets:
|
19 |
+
- oasst_export:
|
20 |
+
lang: "en"
|
21 |
+
hf_dataset_name: toanbku/oa-df
|
22 |
+
val_split: 0.1
|
23 |
+
use_custom_sampler: true
|
24 |
+
sort_by_length: false
|
25 |
+
model_name: OpenAssistant/oasst-rm-2.1-pythia-1.4b-epoch-2.5
|
26 |
+
learning_rate: 8e-6
|
27 |
+
residual_dropout: 0.01
|
28 |
+
weight_decay: 0.0
|
29 |
+
dtype: float32
|
30 |
+
max_length: 2048
|
31 |
+
use_flash_attention: true
|
32 |
+
warmup_steps: 2
|
33 |
+
gradient_accumulation_steps: 2
|
34 |
+
per_device_train_batch_size: 1
|
35 |
+
per_device_eval_batch_size: 2
|
36 |
+
num_train_epochs: 2
|
37 |
+
eval_steps: 50
|
38 |
+
save_steps: 100
|
39 |
+
use_system_tag: false
|
40 |
+
system_property_dropout: 0.5
|
41 |
+
system_add_length: false
|
42 |
+
```
|