Sebastien De Greef commited on
Commit
5159911
1 Parent(s): c0f4fad

chore: Refactor save_model function to support multiple quantization methods

Browse files
Files changed (1) hide show
  1. app.py +32 -13
app.py CHANGED
@@ -23,7 +23,7 @@ logger.debug('This is a debug message')
23
  hf_user = None
24
  hfApi = HfApi()
25
  try:
26
- hf_user = hfApi.whoami()
27
  except Exception as e:
28
  hf_user = "not logged in"
29
 
@@ -39,7 +39,17 @@ model_options = [
39
  "unsloth/Phi-3-medium-4k-instruct",
40
  "unsloth/mistral-7b-bnb-4bit",
41
  "unsloth/gemma-2-9b-bnb-4bit",
 
42
  "unsloth/gemma-2-27b-bnb-4bit", # Gemma 2x faster!
 
 
 
 
 
 
 
 
 
43
  ]
44
  gpu_stats = torch.cuda.get_device_properties(0)
45
  start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
@@ -115,21 +125,23 @@ def inference(prompt, input_text):
115
  result = tokenizer.batch_decode(outputs)
116
  return result[0], gr.update(visible=True, interactive=True)
117
 
118
- def save_model(model_name, hub_model_name, hub_token, gguf_16bit, gguf_8bit, gguf_4bit, gguf_custom, gguf_custom_value, merge_16bit, merge_4bit, just_lora, push_to_hub):
119
  global model, tokenizer
 
 
 
120
  if gguf_custom:
121
  gguf_custom_value = gguf_custom_value
 
122
  else:
123
  gguf_custom_value = None
124
 
125
  if gguf_16bit:
126
- gguf = "f16"
127
- elif gguf_8bit:
128
- gguf = "q8_0"
129
- elif gguf_4bit:
130
- gguf = "q4_k_m"
131
- else:
132
- gguf = None
133
 
134
  if merge_16bit:
135
  merge = "16bit"
@@ -142,14 +154,23 @@ def save_model(model_name, hub_model_name, hub_token, gguf_16bit, gguf_8bit, ggu
142
 
143
  #model.push_to_hub_gguf("hf/model", tokenizer, quantization_method = "f16", token = "")
144
  if push_to_hub:
145
- model.push_to_hub_gguf(hub_model_name, tokenizer, quantization_method=gguf, token=hub_token)
 
 
 
 
146
  return "Model saved", gr.update(visible=True, interactive=True)
147
 
 
 
 
148
  # Create the Gradio interface
149
  with gr.Blocks(title="Unsloth fine-tuning") as demo:
 
 
 
150
  with gr.Column():
151
  gr.Image("unsloth.png", width="300px", interactive=False, show_download_button=False, show_label=False)
152
- gr.LoginButton()
153
  with gr.Column():
154
  gr.Markdown(f"**User:** {hf_user}\n\n**GPU Information:** {gpu_stats.name} ({max_memory} GB)\n\n[Unsloth Docs](http://docs.unsloth.com/)\n\n[Unsloth GitHub](https://github.com/unslothai/unsloth)")
155
  with gr.Tab("Base Model Parameters"):
@@ -282,8 +303,6 @@ with gr.Blocks(title="Unsloth fine-tuning") as demo:
282
  train_btn.click(train_model, inputs=[model_name, lora_r, lora_alpha, lora_dropout, per_device_train_batch_size, warmup_steps, max_steps, gradient_accumulation_steps, logging_steps, log_to_tensorboard, optim, learning_rate, weight_decay, lr_scheduler_type, seed, output_dir], outputs=[train_output, train_btn])
283
 
284
  with gr.Tab("Save & Push Options"):
285
-
286
-
287
 
288
  with gr.Row():
289
  gr.Markdown("### Merging Options")
 
23
  hf_user = None
24
  hfApi = HfApi()
25
  try:
26
+ hf_user = hfApi.whoami()["name"]
27
  except Exception as e:
28
  hf_user = "not logged in"
29
 
 
39
  "unsloth/Phi-3-medium-4k-instruct",
40
  "unsloth/mistral-7b-bnb-4bit",
41
  "unsloth/gemma-2-9b-bnb-4bit",
42
+ "unsloth/gemma-2-9b-bnb-4bit-instruct",
43
  "unsloth/gemma-2-27b-bnb-4bit", # Gemma 2x faster!
44
+ "unsloth/gemma-2-27b-bnb-4bit-instruct", # Gemma 2x faster!
45
+ "unsloth/Qwen2-1.5B-bnb-4bit",
46
+ "unsloth/Qwen2-1.5B-bnb-4bit-instruct",
47
+ "unsloth/Qwen2-7B-bnb-4bit",
48
+ "unsloth/Qwen2-7B-bnb-4bit-instruct",
49
+ "unsloth/Qwen2-72B-bnb-4bit",
50
+ "unsloth/Qwen2-72B-bnb-4bit-instruct",
51
+ "unsloth/yi-6b-bnb-4bit",
52
+ "unsloth/yi-34b-bnb-4bit",
53
  ]
54
  gpu_stats = torch.cuda.get_device_properties(0)
55
  start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
 
125
  result = tokenizer.batch_decode(outputs)
126
  return result[0], gr.update(visible=True, interactive=True)
127
 
128
+ def save_model(model_name, hub_model_name, hub_token, gguf_16bit, gguf_8bit, gguf_4bit, gguf_custom, gguf_custom_value, merge_16bit, merge_4bit, just_lora, push_to_hub, progress=gr.Progress()):
129
  global model, tokenizer
130
+
131
+ quants = []
132
+
133
  if gguf_custom:
134
  gguf_custom_value = gguf_custom_value
135
+ quants.append(gguf_custom_value)
136
  else:
137
  gguf_custom_value = None
138
 
139
  if gguf_16bit:
140
+ quants.append("f16")
141
+ if gguf_8bit:
142
+ quants.append("q8_0")
143
+ if gguf_4bit:
144
+ quants.append("q4_k_m")
 
 
145
 
146
  if merge_16bit:
147
  merge = "16bit"
 
154
 
155
  #model.push_to_hub_gguf("hf/model", tokenizer, quantization_method = "f16", token = "")
156
  if push_to_hub:
157
+ current_quant = 0
158
+ for q in quants:
159
+ progress(current_quant/len(quants), desc=f"Pushing model {model_name} with {q} to HuggingFace Hub")
160
+ model.push_to_hub_gguf(hub_model_name, tokenizer, quantization_method=q, token=hub_token)
161
+ current_quant += 1
162
  return "Model saved", gr.update(visible=True, interactive=True)
163
 
164
+ def username(profile: gr.OAuthProfile | None):
165
+ return profile["name"] if profile else "not logged in"
166
+
167
  # Create the Gradio interface
168
  with gr.Blocks(title="Unsloth fine-tuning") as demo:
169
+ gr.LoginButton()
170
+ logged_user = gr.Markdown(f"**User:** {hf_user}")
171
+ demo.load(username, inputs=None, outputs=logged_user)
172
  with gr.Column():
173
  gr.Image("unsloth.png", width="300px", interactive=False, show_download_button=False, show_label=False)
 
174
  with gr.Column():
175
  gr.Markdown(f"**User:** {hf_user}\n\n**GPU Information:** {gpu_stats.name} ({max_memory} GB)\n\n[Unsloth Docs](http://docs.unsloth.com/)\n\n[Unsloth GitHub](https://github.com/unslothai/unsloth)")
176
  with gr.Tab("Base Model Parameters"):
 
303
  train_btn.click(train_model, inputs=[model_name, lora_r, lora_alpha, lora_dropout, per_device_train_batch_size, warmup_steps, max_steps, gradient_accumulation_steps, logging_steps, log_to_tensorboard, optim, learning_rate, weight_decay, lr_scheduler_type, seed, output_dir], outputs=[train_output, train_btn])
304
 
305
  with gr.Tab("Save & Push Options"):
 
 
306
 
307
  with gr.Row():
308
  gr.Markdown("### Merging Options")