AMKCode commited on
Commit
8c9d2de
1 Parent(s): 4e3cb72

added progress, quit btn, check for compatibility

Browse files
Files changed (1) hide show
  1. app.py +85 -4
app.py CHANGED
@@ -1,10 +1,12 @@
1
  import os
2
  import shutil
3
  import subprocess
 
4
  import signal
5
  os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
6
  import gradio as gr
7
 
 
8
  from huggingface_hub import HfApi
9
  from huggingface_hub import ModelCard
10
 
@@ -70,8 +72,44 @@ QUANTIZATIONS = ["q0f16",
70
  "q4f32_1",
71
  "q4f16_awq"]
72
 
73
- def button_click(hf_model_id, conv_template, quantization, oauth_token: gr.OAuthToken | None):
74
- if oauth_token.token == None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  return "Log in to Huggingface to use this"
76
  elif not hf_model_id:
77
  return "Enter a Huggingface model ID"
@@ -80,6 +118,8 @@ def button_click(hf_model_id, conv_template, quantization, oauth_token: gr.OAuth
80
  elif not quantization:
81
  return "Select a quantization method"
82
 
 
 
83
  api = HfApi(token=oauth_token.token)
84
  model_dir_name = hf_model_id.split("/")[1]
85
  mlc_model_name = model_dir_name + "-" + quantization + "-" + "MLC"
@@ -87,7 +127,28 @@ def button_click(hf_model_id, conv_template, quantization, oauth_token: gr.OAuth
87
  os.system("mkdir -p dist/models")
88
  os.system("git lfs install")
89
 
90
- api.snapshot_download(repo_id=hf_model_id, local_dir=f"./dist/models/{model_dir_name}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
  convert_weight_result = subprocess.run(["mlc_llm convert_weight ./dist/models/" + model_dir_name + "/" + \
93
  " --quantization " + quantization + \
@@ -96,6 +157,13 @@ def button_click(hf_model_id, conv_template, quantization, oauth_token: gr.OAuth
96
  os.system("rm -rf dist/")
97
  return convert_weight_result.stderr
98
 
 
 
 
 
 
 
 
99
  gen_config_result = subprocess.run(["mlc_llm gen_config ./dist/models/" + model_dir_name + "/" + \
100
  " --quantization " + quantization + " --conv-template " + conv_template + \
101
  " -o dist/" + mlc_model_name + "/"], shell=True, capture_output=True, text=True)
@@ -103,6 +171,13 @@ def button_click(hf_model_id, conv_template, quantization, oauth_token: gr.OAuth
103
  os.system("rm -rf dist/")
104
  return gen_config_result.stderr
105
 
 
 
 
 
 
 
 
106
  # push to HF
107
  user_name = api.whoami()["name"]
108
  created_repo_url = api.create_repo(repo_id=f"{user_name}/{mlc_model_name}", private=True)
@@ -144,9 +219,13 @@ def button_click(hf_model_id, conv_template, quantization, oauth_token: gr.OAuth
144
  repo_type="model")
145
 
146
  os.system("rm -rf dist/")
147
-
148
  return "Successful, please find your compiled LLM model on your personal account"
149
 
 
 
 
 
 
150
  with gr.Blocks() as demo:
151
  gr.LoginButton()
152
  gr.Markdown(
@@ -162,7 +241,9 @@ with gr.Blocks() as demo:
162
  conv = gr.Dropdown(CONV_TEMPLATES, label="Conversation Template")
163
  quant = gr.Dropdown(QUANTIZATIONS, label="Quantization Method", info="The format of the code is qAfB(_id), where A represents the number of bits for storing weights and B represents the number of bits for storing activations. The _id is an integer identifier to distinguish different quantization algorithms (e.g. symmetric, non-symmetric, AWQ, etc).")
164
  btn = gr.Button("Convert to MLC")
 
165
  out = gr.Textbox(label="Conversion Result")
166
  btn.click(fn=button_click , inputs=[model_id, conv, quant], outputs=out)
 
167
 
168
  demo.launch()
 
1
  import os
2
  import shutil
3
  import subprocess
4
+ import sys
5
  import signal
6
  os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
7
  import gradio as gr
8
 
9
+ import huggingface_hub
10
  from huggingface_hub import HfApi
11
  from huggingface_hub import ModelCard
12
 
 
72
  "q4f32_1",
73
  "q4f16_awq"]
74
 
75
+ SUPPORTED_MODEL_TYPES = ['llama',
76
+ 'mistral',
77
+ 'gemma',
78
+ 'gemma2',
79
+ 'gpt2',
80
+ 'mixtral',
81
+ 'gpt_neox',
82
+ 'gpt_bigcode',
83
+ 'phi-msft',
84
+ 'phi',
85
+ 'phi3',
86
+ 'phi3_v',
87
+ 'qwen',
88
+ 'qwen2',
89
+ 'qwen2_moe',
90
+ 'stablelm',
91
+ 'baichuan',
92
+ 'internlm',
93
+ 'internlm2',
94
+ 'rwkv5',
95
+ 'orion',
96
+ 'llava',
97
+ 'rwkv6',
98
+ 'chatglm',
99
+ 'eagle',
100
+ 'bert',
101
+ 'medusa',
102
+ 'starcoder2',
103
+ 'cohere',
104
+ 'minicpm']
105
+
106
+ global is_cancelled
107
+
108
+ def button_click(hf_model_id, conv_template, quantization, oauth_token: gr.OAuthToken | None, progress=gr.Progress()):
109
+ global is_cancelled
110
+ is_cancelled = False
111
+
112
+ if oauth_token.token is None:
113
  return "Log in to Huggingface to use this"
114
  elif not hf_model_id:
115
  return "Enter a Huggingface model ID"
 
118
  elif not quantization:
119
  return "Select a quantization method"
120
 
121
+ progress(0, desc="Verifying inputs...")
122
+
123
  api = HfApi(token=oauth_token.token)
124
  model_dir_name = hf_model_id.split("/")[1]
125
  mlc_model_name = model_dir_name + "-" + quantization + "-" + "MLC"
 
127
  os.system("mkdir -p dist/models")
128
  os.system("git lfs install")
129
 
130
+ model_info = api.repo_info(hf_model_id)
131
+ if type(model_info) != huggingface_hub.hf_api.ModelInfo:
132
+ os.system("rm -rf dist/")
133
+ return "Entered Huggingface model ID is not a model repository"
134
+ if model_info.config['model_type'] not in SUPPORTED_MODEL_TYPES:
135
+ os.system("rm -rf dist/")
136
+ return f"Model type ({model_info.config['model_type']}) currently not supported by MLC-LLM"
137
+
138
+ progress(0.1, desc="Downloading weights from Huggingface...")
139
+
140
+ try:
141
+ api.snapshot_download(repo_id=hf_model_id, local_dir=f"./dist/models/{model_dir_name}")
142
+ except BaseException as error:
143
+ os.system("rm -rf dist/")
144
+ return error
145
+
146
+ if is_cancelled:
147
+ is_cancelled = False
148
+ os.system("rm -rf dist/")
149
+ return "Conversion cancelled"
150
+
151
+ progress(0.5, desc="Converting weight to MLC")
152
 
153
  convert_weight_result = subprocess.run(["mlc_llm convert_weight ./dist/models/" + model_dir_name + "/" + \
154
  " --quantization " + quantization + \
 
157
  os.system("rm -rf dist/")
158
  return convert_weight_result.stderr
159
 
160
+ if is_cancelled:
161
+ is_cancelled = False
162
+ os.system("rm -rf dist/")
163
+ return "Conversion cancelled"
164
+
165
+ progress(0.8, desc="Generating config...")
166
+
167
  gen_config_result = subprocess.run(["mlc_llm gen_config ./dist/models/" + model_dir_name + "/" + \
168
  " --quantization " + quantization + " --conv-template " + conv_template + \
169
  " -o dist/" + mlc_model_name + "/"], shell=True, capture_output=True, text=True)
 
171
  os.system("rm -rf dist/")
172
  return gen_config_result.stderr
173
 
174
+ if is_cancelled:
175
+ is_cancelled = False
176
+ os.system("rm -rf dist/")
177
+ return "Conversion cancelled"
178
+
179
+ progress(0.9, desc="Creating your Huggingface repo...")
180
+
181
  # push to HF
182
  user_name = api.whoami()["name"]
183
  created_repo_url = api.create_repo(repo_id=f"{user_name}/{mlc_model_name}", private=True)
 
219
  repo_type="model")
220
 
221
  os.system("rm -rf dist/")
 
222
  return "Successful, please find your compiled LLM model on your personal account"
223
 
224
+ def quit_button_click():
225
+ global is_cancelled
226
+ is_cancelled = True
227
+
228
+
229
  with gr.Blocks() as demo:
230
  gr.LoginButton()
231
  gr.Markdown(
 
241
  conv = gr.Dropdown(CONV_TEMPLATES, label="Conversation Template")
242
  quant = gr.Dropdown(QUANTIZATIONS, label="Quantization Method", info="The format of the code is qAfB(_id), where A represents the number of bits for storing weights and B represents the number of bits for storing activations. The _id is an integer identifier to distinguish different quantization algorithms (e.g. symmetric, non-symmetric, AWQ, etc).")
243
  btn = gr.Button("Convert to MLC")
244
+ btn2 = gr.Button("Quit")
245
  out = gr.Textbox(label="Conversion Result")
246
  btn.click(fn=button_click , inputs=[model_id, conv, quant], outputs=out)
247
+ btn2.click(fn=quit_button_click)
248
 
249
  demo.launch()