|
|
|
|
|
Training log: https://wandb.ai/toanbku/reward-model/runs/2ekuy6lg/overview |
|
|
|
``` |
|
deepspeed --include=localhost:0 --master_port 61000 trainer_rm.py --config \ |
|
defaults_rm oasst-rm-2.1-pythia-1.4b \ |
|
--cache_dir /home/ubuntu/OA/model/model_training/.cache \ |
|
--per_device_eval_batch_size 1 --per_device_train_batch_size 1 \ |
|
--wandb_entity toanbku --deepspeed |
|
``` |
|
|
|
|
|
``` |
|
oasst-rm-2.1-pythia-1.4b: |
|
is_reward_model: true |
|
pooling: last |
|
datasets: |
|
- oasst_export: |
|
lang: "en" |
|
hf_dataset_name: toanbku/oa-df |
|
val_split: 0.1 |
|
use_custom_sampler: true |
|
sort_by_length: false |
|
model_name: OpenAssistant/oasst-rm-2.1-pythia-1.4b-epoch-2.5 |
|
learning_rate: 8e-6 |
|
residual_dropout: 0.01 |
|
weight_decay: 0.0 |
|
dtype: float32 |
|
max_length: 2048 |
|
use_flash_attention: true |
|
warmup_steps: 2 |
|
gradient_accumulation_steps: 2 |
|
per_device_train_batch_size: 1 |
|
per_device_eval_batch_size: 2 |
|
num_train_epochs: 2 |
|
eval_steps: 50 |
|
save_steps: 100 |
|
use_system_tag: false |
|
system_property_dropout: 0.5 |
|
system_add_length: false |
|
``` |
|
|