Kukedlc commited on
Commit
1b1cf19
1 Parent(s): 42a3e94

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +61 -1
README.md CHANGED
@@ -4,4 +4,64 @@ license: apache-2.0
4
 
5
  Modelo entrenado con DPO
6
 
7
- Merge de dos modelos
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  Modelo entrenado con DPO
6
 
7
+ Merge de dos modelos
8
+
9
+ codigo de train:
10
+
11
+ # LoRA configuration
12
+ peft_config = LoraConfig(
13
+ r=16,
14
+ lora_alpha=16,
15
+ lora_dropout=0.05,
16
+ bias="none",
17
+ task_type="CAUSAL_LM",
18
+ target_modules=['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']
19
+ )
20
+
21
+ # Model to fine-tune
22
+ model = AutoModelForCausalLM.from_pretrained(
23
+ model_name,
24
+ torch_dtype=torch.float16,
25
+ load_in_4bit=True
26
+ )
27
+ model.config.use_cache = False
28
+
29
+ # Reference model
30
+ ref_model = AutoModelForCausalLM.from_pretrained(
31
+ model_name,
32
+ torch_dtype=torch.float16,
33
+ load_in_4bit=True
34
+ )
35
+
36
+ # Training arguments
37
+ training_args = TrainingArguments(
38
+ per_device_train_batch_size=4,
39
+ gradient_accumulation_steps=4,
40
+ gradient_checkpointing=True,
41
+ learning_rate=5e-5,
42
+ lr_scheduler_type="cosine",
43
+ max_steps=200,
44
+ save_strategy="no",
45
+ logging_steps=1,
46
+ output_dir=new_model,
47
+ optim="paged_adamw_32bit",
48
+ warmup_steps=100,
49
+ bf16=True,
50
+ report_to="wandb",
51
+ )
52
+
53
+ # Create DPO trainer
54
+ dpo_trainer = DPOTrainer(
55
+ model,
56
+ ref_model,
57
+ args=training_args,
58
+ train_dataset=dataset,
59
+ tokenizer=tokenizer,
60
+ peft_config=peft_config,
61
+ beta=0.1,
62
+ max_prompt_length=1024,
63
+ max_length=1536,
64
+ )
65
+
66
+ # Fine-tune model with DPO
67
+ dpo_trainer.train()