burtenshaw
add trl script
aac30ac
raw
history blame contribute delete
444 Bytes
python kto.py \
--dataset_name trl-lib/kto-mix-14k \
--model_name_or_path=trl-lib/qwen1.5-1.8b-sft \
--per_device_train_batch_size 16 \
--num_train_epochs 1 \
--learning_rate 5e-7 \
--lr_scheduler_type=cosine \
--gradient_accumulation_steps 1 \
--logging_steps 10 \
--eval_steps 500 \
--output_dir=kto-aligned-model \
--warmup_ratio 0.1 \
--report_to wandb \
--bf16 \
--logging_first_step