Spaces:

sebdg
/

unsloth

Paused

App Files Files Community

Sebastien De Greef commited on Jul 15

Commit

5159911

•

1 Parent(s): c0f4fad

chore: Refactor save_model function to support multiple quantization methods

Browse files

Files changed (1) hide show

app.py +32 -13

app.py CHANGED Viewed

@@ -23,7 +23,7 @@ logger.debug('This is a debug message')
 hf_user = None
 hfApi = HfApi()
 try:
-    hf_user = hfApi.whoami()
 except Exception as e:
     hf_user = "not logged in"
@@ -39,7 +39,17 @@ model_options = [
     "unsloth/Phi-3-medium-4k-instruct",
     "unsloth/mistral-7b-bnb-4bit",
     "unsloth/gemma-2-9b-bnb-4bit",
     "unsloth/gemma-2-27b-bnb-4bit",          # Gemma 2x faster!
 ]
 gpu_stats = torch.cuda.get_device_properties(0)
 start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
@@ -115,21 +125,23 @@ def inference(prompt, input_text):
     result = tokenizer.batch_decode(outputs)
     return result[0], gr.update(visible=True, interactive=True)
-def save_model(model_name, hub_model_name, hub_token, gguf_16bit, gguf_8bit, gguf_4bit, gguf_custom, gguf_custom_value, merge_16bit, merge_4bit, just_lora, push_to_hub):
     global model, tokenizer
     if gguf_custom:
         gguf_custom_value = gguf_custom_value
     else:
         gguf_custom_value = None
     if gguf_16bit:
-        gguf = "f16"
-    elif gguf_8bit:
-        gguf = "q8_0"
-    elif gguf_4bit:
-        gguf = "q4_k_m"
-    else:
-        gguf = None
     if merge_16bit:
         merge = "16bit"
@@ -142,14 +154,23 @@ def save_model(model_name, hub_model_name, hub_token, gguf_16bit, gguf_8bit, ggu
     #model.push_to_hub_gguf("hf/model", tokenizer, quantization_method = "f16", token = "")
     if push_to_hub:
-        model.push_to_hub_gguf(hub_model_name, tokenizer, quantization_method=gguf, token=hub_token)
     return "Model saved", gr.update(visible=True, interactive=True)
 # Create the Gradio interface
 with gr.Blocks(title="Unsloth fine-tuning") as demo:
     with gr.Column():
         gr.Image("unsloth.png", width="300px", interactive=False, show_download_button=False, show_label=False)
-    gr.LoginButton()
     with gr.Column():
         gr.Markdown(f"**User:** {hf_user}\n\n**GPU Information:** {gpu_stats.name} ({max_memory} GB)\n\n[Unsloth Docs](http://docs.unsloth.com/)\n\n[Unsloth GitHub](https://github.com/unslothai/unsloth)")
     with gr.Tab("Base Model Parameters"):
@@ -282,8 +303,6 @@ with gr.Blocks(title="Unsloth fine-tuning") as demo:
         train_btn.click(train_model, inputs=[model_name, lora_r, lora_alpha, lora_dropout, per_device_train_batch_size, warmup_steps, max_steps, gradient_accumulation_steps, logging_steps, log_to_tensorboard, optim, learning_rate, weight_decay, lr_scheduler_type, seed, output_dir], outputs=[train_output, train_btn])
     with gr.Tab("Save & Push Options"):
         with gr.Row():
             gr.Markdown("### Merging Options")

 hf_user = None
 hfApi = HfApi()
 try:
+    hf_user = hfApi.whoami()["name"]
 except Exception as e:
     hf_user = "not logged in"
     "unsloth/Phi-3-medium-4k-instruct",
     "unsloth/mistral-7b-bnb-4bit",
     "unsloth/gemma-2-9b-bnb-4bit",
+    "unsloth/gemma-2-9b-bnb-4bit-instruct",
     "unsloth/gemma-2-27b-bnb-4bit",          # Gemma 2x faster!
+    "unsloth/gemma-2-27b-bnb-4bit-instruct",          # Gemma 2x faster!
+    "unsloth/Qwen2-1.5B-bnb-4bit",
+    "unsloth/Qwen2-1.5B-bnb-4bit-instruct",
+    "unsloth/Qwen2-7B-bnb-4bit",
+    "unsloth/Qwen2-7B-bnb-4bit-instruct",
+    "unsloth/Qwen2-72B-bnb-4bit",
+    "unsloth/Qwen2-72B-bnb-4bit-instruct",
+    "unsloth/yi-6b-bnb-4bit",
+    "unsloth/yi-34b-bnb-4bit",
 ]
 gpu_stats = torch.cuda.get_device_properties(0)
 start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
     result = tokenizer.batch_decode(outputs)
     return result[0], gr.update(visible=True, interactive=True)
+def save_model(model_name, hub_model_name, hub_token, gguf_16bit, gguf_8bit, gguf_4bit, gguf_custom, gguf_custom_value, merge_16bit, merge_4bit, just_lora, push_to_hub, progress=gr.Progress()):
     global model, tokenizer
+    quants = []
     if gguf_custom:
         gguf_custom_value = gguf_custom_value
+        quants.append(gguf_custom_value)
     else:
         gguf_custom_value = None
     if gguf_16bit:
+        quants.append("f16")
+    if gguf_8bit:
+        quants.append("q8_0")
+    if gguf_4bit:
+        quants.append("q4_k_m")
     if merge_16bit:
         merge = "16bit"
     #model.push_to_hub_gguf("hf/model", tokenizer, quantization_method = "f16", token = "")
     if push_to_hub:
+        current_quant = 0
+        for q in quants:
+            progress(current_quant/len(quants), desc=f"Pushing model {model_name} with {q} to HuggingFace Hub")
+            model.push_to_hub_gguf(hub_model_name, tokenizer, quantization_method=q, token=hub_token)
+            current_quant += 1
     return "Model saved", gr.update(visible=True, interactive=True)
+def username(profile: gr.OAuthProfile | None):
+    return profile["name"] if profile else "not logged in"
 # Create the Gradio interface
 with gr.Blocks(title="Unsloth fine-tuning") as demo:
+    gr.LoginButton()
+    logged_user = gr.Markdown(f"**User:** {hf_user}")
+    demo.load(username, inputs=None, outputs=logged_user)
     with gr.Column():
         gr.Image("unsloth.png", width="300px", interactive=False, show_download_button=False, show_label=False)
     with gr.Column():
         gr.Markdown(f"**User:** {hf_user}\n\n**GPU Information:** {gpu_stats.name} ({max_memory} GB)\n\n[Unsloth Docs](http://docs.unsloth.com/)\n\n[Unsloth GitHub](https://github.com/unslothai/unsloth)")
     with gr.Tab("Base Model Parameters"):
         train_btn.click(train_model, inputs=[model_name, lora_r, lora_alpha, lora_dropout, per_device_train_batch_size, warmup_steps, max_steps, gradient_accumulation_steps, logging_steps, log_to_tensorboard, optim, learning_rate, weight_decay, lr_scheduler_type, seed, output_dir], outputs=[train_output, train_btn])
     with gr.Tab("Save & Push Options"):
         with gr.Row():
             gr.Markdown("### Merging Options")