Upload 2 files
Browse files- finetune.py +3 -1
- optimize_lr.py +4 -1
finetune.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import os
|
|
|
2 |
|
3 |
CONTEXT_WINDOW = 1024 #has to fit in 4090
|
4 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
@@ -10,6 +11,7 @@ from transformers import (
|
|
10 |
import torch
|
11 |
from datasets import load_dataset
|
12 |
from huggingface_hub import login
|
|
|
13 |
|
14 |
# setup tokenizer
|
15 |
tokenizer = AutoTokenizer.from_pretrained("Zyphra/Zamba2-1.2B-instruct", token=HF_TOKEN)
|
@@ -73,7 +75,7 @@ training_args = TrainingArguments(
|
|
73 |
save_steps=500,
|
74 |
save_total_limit=2,
|
75 |
logging_steps=100,
|
76 |
-
learning_rate=
|
77 |
weight_decay=0.01,
|
78 |
fp16=False,
|
79 |
bf16=True,
|
|
|
1 |
import os
|
2 |
+
import json
|
3 |
|
4 |
CONTEXT_WINDOW = 1024 #has to fit in 4090
|
5 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
|
|
11 |
import torch
|
12 |
from datasets import load_dataset
|
13 |
from huggingface_hub import login
|
14 |
+
from optimize_lr import best_lr
|
15 |
|
16 |
# setup tokenizer
|
17 |
tokenizer = AutoTokenizer.from_pretrained("Zyphra/Zamba2-1.2B-instruct", token=HF_TOKEN)
|
|
|
75 |
save_steps=500,
|
76 |
save_total_limit=2,
|
77 |
logging_steps=100,
|
78 |
+
learning_rate=best_lr,
|
79 |
weight_decay=0.01,
|
80 |
fp16=False,
|
81 |
bf16=True,
|
optimize_lr.py
CHANGED
@@ -398,4 +398,7 @@ plot_gpr_results(study, final_optimization)
|
|
398 |
|
399 |
# Save all results
|
400 |
with open("lr_optimization_results.json", "w") as f:
|
401 |
-
json.dump(results, f, indent=4)
|
|
|
|
|
|
|
|
398 |
|
399 |
# Save all results
|
400 |
with open("lr_optimization_results.json", "w") as f:
|
401 |
+
json.dump(results, f, indent=4)
|
402 |
+
|
403 |
+
# Store best learning rate as a variable for finetune.py to use
|
404 |
+
best_lr = study.best_params["learning_rate"]
|