Sebastien De Greef
commited on
Commit
•
5159911
1
Parent(s):
c0f4fad
chore: Refactor save_model function to support multiple quantization methods
Browse files
app.py
CHANGED
@@ -23,7 +23,7 @@ logger.debug('This is a debug message')
|
|
23 |
hf_user = None
|
24 |
hfApi = HfApi()
|
25 |
try:
|
26 |
-
hf_user = hfApi.whoami()
|
27 |
except Exception as e:
|
28 |
hf_user = "not logged in"
|
29 |
|
@@ -39,7 +39,17 @@ model_options = [
|
|
39 |
"unsloth/Phi-3-medium-4k-instruct",
|
40 |
"unsloth/mistral-7b-bnb-4bit",
|
41 |
"unsloth/gemma-2-9b-bnb-4bit",
|
|
|
42 |
"unsloth/gemma-2-27b-bnb-4bit", # Gemma 2x faster!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
]
|
44 |
gpu_stats = torch.cuda.get_device_properties(0)
|
45 |
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
|
@@ -115,21 +125,23 @@ def inference(prompt, input_text):
|
|
115 |
result = tokenizer.batch_decode(outputs)
|
116 |
return result[0], gr.update(visible=True, interactive=True)
|
117 |
|
118 |
-
def save_model(model_name, hub_model_name, hub_token, gguf_16bit, gguf_8bit, gguf_4bit, gguf_custom, gguf_custom_value, merge_16bit, merge_4bit, just_lora, push_to_hub):
|
119 |
global model, tokenizer
|
|
|
|
|
|
|
120 |
if gguf_custom:
|
121 |
gguf_custom_value = gguf_custom_value
|
|
|
122 |
else:
|
123 |
gguf_custom_value = None
|
124 |
|
125 |
if gguf_16bit:
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
else:
|
132 |
-
gguf = None
|
133 |
|
134 |
if merge_16bit:
|
135 |
merge = "16bit"
|
@@ -142,14 +154,23 @@ def save_model(model_name, hub_model_name, hub_token, gguf_16bit, gguf_8bit, ggu
|
|
142 |
|
143 |
#model.push_to_hub_gguf("hf/model", tokenizer, quantization_method = "f16", token = "")
|
144 |
if push_to_hub:
|
145 |
-
|
|
|
|
|
|
|
|
|
146 |
return "Model saved", gr.update(visible=True, interactive=True)
|
147 |
|
|
|
|
|
|
|
148 |
# Create the Gradio interface
|
149 |
with gr.Blocks(title="Unsloth fine-tuning") as demo:
|
|
|
|
|
|
|
150 |
with gr.Column():
|
151 |
gr.Image("unsloth.png", width="300px", interactive=False, show_download_button=False, show_label=False)
|
152 |
-
gr.LoginButton()
|
153 |
with gr.Column():
|
154 |
gr.Markdown(f"**User:** {hf_user}\n\n**GPU Information:** {gpu_stats.name} ({max_memory} GB)\n\n[Unsloth Docs](http://docs.unsloth.com/)\n\n[Unsloth GitHub](https://github.com/unslothai/unsloth)")
|
155 |
with gr.Tab("Base Model Parameters"):
|
@@ -282,8 +303,6 @@ with gr.Blocks(title="Unsloth fine-tuning") as demo:
|
|
282 |
train_btn.click(train_model, inputs=[model_name, lora_r, lora_alpha, lora_dropout, per_device_train_batch_size, warmup_steps, max_steps, gradient_accumulation_steps, logging_steps, log_to_tensorboard, optim, learning_rate, weight_decay, lr_scheduler_type, seed, output_dir], outputs=[train_output, train_btn])
|
283 |
|
284 |
with gr.Tab("Save & Push Options"):
|
285 |
-
|
286 |
-
|
287 |
|
288 |
with gr.Row():
|
289 |
gr.Markdown("### Merging Options")
|
|
|
23 |
hf_user = None
|
24 |
hfApi = HfApi()
|
25 |
try:
|
26 |
+
hf_user = hfApi.whoami()["name"]
|
27 |
except Exception as e:
|
28 |
hf_user = "not logged in"
|
29 |
|
|
|
39 |
"unsloth/Phi-3-medium-4k-instruct",
|
40 |
"unsloth/mistral-7b-bnb-4bit",
|
41 |
"unsloth/gemma-2-9b-bnb-4bit",
|
42 |
+
"unsloth/gemma-2-9b-bnb-4bit-instruct",
|
43 |
"unsloth/gemma-2-27b-bnb-4bit", # Gemma 2x faster!
|
44 |
+
"unsloth/gemma-2-27b-bnb-4bit-instruct", # Gemma 2x faster!
|
45 |
+
"unsloth/Qwen2-1.5B-bnb-4bit",
|
46 |
+
"unsloth/Qwen2-1.5B-bnb-4bit-instruct",
|
47 |
+
"unsloth/Qwen2-7B-bnb-4bit",
|
48 |
+
"unsloth/Qwen2-7B-bnb-4bit-instruct",
|
49 |
+
"unsloth/Qwen2-72B-bnb-4bit",
|
50 |
+
"unsloth/Qwen2-72B-bnb-4bit-instruct",
|
51 |
+
"unsloth/yi-6b-bnb-4bit",
|
52 |
+
"unsloth/yi-34b-bnb-4bit",
|
53 |
]
|
54 |
gpu_stats = torch.cuda.get_device_properties(0)
|
55 |
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
|
|
|
125 |
result = tokenizer.batch_decode(outputs)
|
126 |
return result[0], gr.update(visible=True, interactive=True)
|
127 |
|
128 |
+
def save_model(model_name, hub_model_name, hub_token, gguf_16bit, gguf_8bit, gguf_4bit, gguf_custom, gguf_custom_value, merge_16bit, merge_4bit, just_lora, push_to_hub, progress=gr.Progress()):
|
129 |
global model, tokenizer
|
130 |
+
|
131 |
+
quants = []
|
132 |
+
|
133 |
if gguf_custom:
|
134 |
gguf_custom_value = gguf_custom_value
|
135 |
+
quants.append(gguf_custom_value)
|
136 |
else:
|
137 |
gguf_custom_value = None
|
138 |
|
139 |
if gguf_16bit:
|
140 |
+
quants.append("f16")
|
141 |
+
if gguf_8bit:
|
142 |
+
quants.append("q8_0")
|
143 |
+
if gguf_4bit:
|
144 |
+
quants.append("q4_k_m")
|
|
|
|
|
145 |
|
146 |
if merge_16bit:
|
147 |
merge = "16bit"
|
|
|
154 |
|
155 |
#model.push_to_hub_gguf("hf/model", tokenizer, quantization_method = "f16", token = "")
|
156 |
if push_to_hub:
|
157 |
+
current_quant = 0
|
158 |
+
for q in quants:
|
159 |
+
progress(current_quant/len(quants), desc=f"Pushing model {model_name} with {q} to HuggingFace Hub")
|
160 |
+
model.push_to_hub_gguf(hub_model_name, tokenizer, quantization_method=q, token=hub_token)
|
161 |
+
current_quant += 1
|
162 |
return "Model saved", gr.update(visible=True, interactive=True)
|
163 |
|
164 |
+
def username(profile: gr.OAuthProfile | None):
|
165 |
+
return profile["name"] if profile else "not logged in"
|
166 |
+
|
167 |
# Create the Gradio interface
|
168 |
with gr.Blocks(title="Unsloth fine-tuning") as demo:
|
169 |
+
gr.LoginButton()
|
170 |
+
logged_user = gr.Markdown(f"**User:** {hf_user}")
|
171 |
+
demo.load(username, inputs=None, outputs=logged_user)
|
172 |
with gr.Column():
|
173 |
gr.Image("unsloth.png", width="300px", interactive=False, show_download_button=False, show_label=False)
|
|
|
174 |
with gr.Column():
|
175 |
gr.Markdown(f"**User:** {hf_user}\n\n**GPU Information:** {gpu_stats.name} ({max_memory} GB)\n\n[Unsloth Docs](http://docs.unsloth.com/)\n\n[Unsloth GitHub](https://github.com/unslothai/unsloth)")
|
176 |
with gr.Tab("Base Model Parameters"):
|
|
|
303 |
train_btn.click(train_model, inputs=[model_name, lora_r, lora_alpha, lora_dropout, per_device_train_batch_size, warmup_steps, max_steps, gradient_accumulation_steps, logging_steps, log_to_tensorboard, optim, learning_rate, weight_decay, lr_scheduler_type, seed, output_dir], outputs=[train_output, train_btn])
|
304 |
|
305 |
with gr.Tab("Save & Push Options"):
|
|
|
|
|
306 |
|
307 |
with gr.Row():
|
308 |
gr.Markdown("### Merging Options")
|