ssmits commited on
Commit
90f46ea
1 Parent(s): 17db2ea

Upload 2 files

Browse files
Files changed (2) hide show
  1. finetune.py +3 -1
  2. optimize_lr.py +4 -1
finetune.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
 
3
  CONTEXT_WINDOW = 1024 #has to fit in 4090
4
  HF_TOKEN = os.getenv("HF_TOKEN")
@@ -10,6 +11,7 @@ from transformers import (
10
  import torch
11
  from datasets import load_dataset
12
  from huggingface_hub import login
 
13
 
14
  # setup tokenizer
15
  tokenizer = AutoTokenizer.from_pretrained("Zyphra/Zamba2-1.2B-instruct", token=HF_TOKEN)
@@ -73,7 +75,7 @@ training_args = TrainingArguments(
73
  save_steps=500,
74
  save_total_limit=2,
75
  logging_steps=100,
76
- learning_rate=2e-5,
77
  weight_decay=0.01,
78
  fp16=False,
79
  bf16=True,
 
1
  import os
2
+ import json
3
 
4
  CONTEXT_WINDOW = 1024 #has to fit in 4090
5
  HF_TOKEN = os.getenv("HF_TOKEN")
 
11
  import torch
12
  from datasets import load_dataset
13
  from huggingface_hub import login
14
+ from optimize_lr import best_lr
15
 
16
  # setup tokenizer
17
  tokenizer = AutoTokenizer.from_pretrained("Zyphra/Zamba2-1.2B-instruct", token=HF_TOKEN)
 
75
  save_steps=500,
76
  save_total_limit=2,
77
  logging_steps=100,
78
+ learning_rate=best_lr,
79
  weight_decay=0.01,
80
  fp16=False,
81
  bf16=True,
optimize_lr.py CHANGED
@@ -398,4 +398,7 @@ plot_gpr_results(study, final_optimization)
398
 
399
  # Save all results
400
  with open("lr_optimization_results.json", "w") as f:
401
- json.dump(results, f, indent=4)
 
 
 
 
398
 
399
  # Save all results
400
  with open("lr_optimization_results.json", "w") as f:
401
+ json.dump(results, f, indent=4)
402
+
403
+ # Store best learning rate as a variable for finetune.py to use
404
+ best_lr = study.best_params["learning_rate"]