Spaces:

mlc-ai
/

MLC-Weight-Conversion

Running on L4

App Files Files Community

AMKCode commited on Oct 3

Commit

8c9d2de

•

1 Parent(s): 4e3cb72

added progress, quit btn, check for compatibility

Browse files

Files changed (1) hide show

app.py +85 -4

app.py CHANGED Viewed

@@ -1,10 +1,12 @@
 import os
 import shutil
 import subprocess
 import signal
 os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
 import gradio as gr
 from huggingface_hub import HfApi
 from huggingface_hub import ModelCard
@@ -70,8 +72,44 @@ QUANTIZATIONS = ["q0f16",
                  "q4f32_1",
                  "q4f16_awq"]
-def button_click(hf_model_id, conv_template, quantization, oauth_token: gr.OAuthToken | None):
-    if oauth_token.token == None:
         return "Log in to Huggingface to use this"
     elif not hf_model_id:
         return "Enter a Huggingface model ID"
@@ -80,6 +118,8 @@ def button_click(hf_model_id, conv_template, quantization, oauth_token: gr.OAuth
     elif not quantization:
         return "Select a quantization method"
     api = HfApi(token=oauth_token.token)
     model_dir_name = hf_model_id.split("/")[1]
     mlc_model_name = model_dir_name + "-" + quantization + "-" + "MLC"
@@ -87,7 +127,28 @@ def button_click(hf_model_id, conv_template, quantization, oauth_token: gr.OAuth
     os.system("mkdir -p dist/models")
     os.system("git lfs install")
-    api.snapshot_download(repo_id=hf_model_id, local_dir=f"./dist/models/{model_dir_name}")
     convert_weight_result = subprocess.run(["mlc_llm convert_weight ./dist/models/" + model_dir_name + "/" + \
               " --quantization " + quantization + \
@@ -96,6 +157,13 @@ def button_click(hf_model_id, conv_template, quantization, oauth_token: gr.OAuth
         os.system("rm -rf dist/")
         return convert_weight_result.stderr
     gen_config_result = subprocess.run(["mlc_llm gen_config ./dist/models/" + model_dir_name + "/" + \
               " --quantization " + quantization + " --conv-template " + conv_template + \
               " -o dist/" + mlc_model_name + "/"], shell=True, capture_output=True, text=True)
@@ -103,6 +171,13 @@ def button_click(hf_model_id, conv_template, quantization, oauth_token: gr.OAuth
         os.system("rm -rf dist/")
         return gen_config_result.stderr
     # push to HF
     user_name = api.whoami()["name"]
     created_repo_url = api.create_repo(repo_id=f"{user_name}/{mlc_model_name}", private=True)
@@ -144,9 +219,13 @@ def button_click(hf_model_id, conv_template, quantization, oauth_token: gr.OAuth
                     repo_type="model")
     os.system("rm -rf dist/")
     return "Successful, please find your compiled LLM model on your personal account"
 with gr.Blocks() as demo:
     gr.LoginButton()
     gr.Markdown(
@@ -162,7 +241,9 @@ with gr.Blocks() as demo:
     conv = gr.Dropdown(CONV_TEMPLATES, label="Conversation Template")
     quant = gr.Dropdown(QUANTIZATIONS, label="Quantization Method", info="The format of the code is qAfB(_id), where A represents the number of bits for storing weights and B represents the number of bits for storing activations. The _id is an integer identifier to distinguish different quantization algorithms (e.g. symmetric, non-symmetric, AWQ, etc).")
     btn = gr.Button("Convert to MLC")
     out = gr.Textbox(label="Conversion Result")
     btn.click(fn=button_click , inputs=[model_id, conv, quant], outputs=out)
 demo.launch()

 import os
 import shutil
 import subprocess
+import sys
 import signal
 os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
 import gradio as gr
+import huggingface_hub
 from huggingface_hub import HfApi
 from huggingface_hub import ModelCard
                  "q4f32_1",
                  "q4f16_awq"]
+SUPPORTED_MODEL_TYPES = ['llama',
+                         'mistral',
+                         'gemma',
+                         'gemma2',
+                         'gpt2',
+                         'mixtral',
+                         'gpt_neox',
+                         'gpt_bigcode',
+                         'phi-msft',
+                         'phi',
+                         'phi3',
+                         'phi3_v',
+                         'qwen',
+                         'qwen2',
+                         'qwen2_moe',
+                         'stablelm',
+                         'baichuan',
+                         'internlm',
+                         'internlm2',
+                         'rwkv5',
+                         'orion',
+                         'llava',
+                         'rwkv6',
+                         'chatglm',
+                         'eagle',
+                         'bert',
+                         'medusa',
+                         'starcoder2',
+                         'cohere',
+                         'minicpm']
+global is_cancelled
+def button_click(hf_model_id, conv_template, quantization, oauth_token: gr.OAuthToken | None, progress=gr.Progress()):
+    global is_cancelled
+    is_cancelled = False
+    if oauth_token.token is None:
         return "Log in to Huggingface to use this"
     elif not hf_model_id:
         return "Enter a Huggingface model ID"
     elif not quantization:
         return "Select a quantization method"
+    progress(0, desc="Verifying inputs...")
     api = HfApi(token=oauth_token.token)
     model_dir_name = hf_model_id.split("/")[1]
     mlc_model_name = model_dir_name + "-" + quantization + "-" + "MLC"
     os.system("mkdir -p dist/models")
     os.system("git lfs install")
+    model_info = api.repo_info(hf_model_id)
+    if type(model_info) != huggingface_hub.hf_api.ModelInfo:
+        os.system("rm -rf dist/")
+        return "Entered Huggingface model ID is not a model repository"
+    if model_info.config['model_type'] not in SUPPORTED_MODEL_TYPES:
+        os.system("rm -rf dist/")
+        return f"Model type ({model_info.config['model_type']}) currently not supported by MLC-LLM"
+    progress(0.1, desc="Downloading weights from Huggingface...")
+    try:
+        api.snapshot_download(repo_id=hf_model_id, local_dir=f"./dist/models/{model_dir_name}")
+    except BaseException as error:
+        os.system("rm -rf dist/")
+        return error
+    if is_cancelled:
+        is_cancelled = False
+        os.system("rm -rf dist/")
+        return "Conversion cancelled"
+    progress(0.5, desc="Converting weight to MLC")
     convert_weight_result = subprocess.run(["mlc_llm convert_weight ./dist/models/" + model_dir_name + "/" + \
               " --quantization " + quantization + \
         os.system("rm -rf dist/")
         return convert_weight_result.stderr
+    if is_cancelled:
+        is_cancelled = False
+        os.system("rm -rf dist/")
+        return "Conversion cancelled"
+    progress(0.8, desc="Generating config...")
     gen_config_result = subprocess.run(["mlc_llm gen_config ./dist/models/" + model_dir_name + "/" + \
               " --quantization " + quantization + " --conv-template " + conv_template + \
               " -o dist/" + mlc_model_name + "/"], shell=True, capture_output=True, text=True)
         os.system("rm -rf dist/")
         return gen_config_result.stderr
+    if is_cancelled:
+        is_cancelled = False
+        os.system("rm -rf dist/")
+        return "Conversion cancelled"
+    progress(0.9, desc="Creating your Huggingface repo...")
     # push to HF
     user_name = api.whoami()["name"]
     created_repo_url = api.create_repo(repo_id=f"{user_name}/{mlc_model_name}", private=True)
                     repo_type="model")
     os.system("rm -rf dist/")
     return "Successful, please find your compiled LLM model on your personal account"
+def quit_button_click():
+    global is_cancelled
+    is_cancelled = True
 with gr.Blocks() as demo:
     gr.LoginButton()
     gr.Markdown(
     conv = gr.Dropdown(CONV_TEMPLATES, label="Conversation Template")
     quant = gr.Dropdown(QUANTIZATIONS, label="Quantization Method", info="The format of the code is qAfB(_id), where A represents the number of bits for storing weights and B represents the number of bits for storing activations. The _id is an integer identifier to distinguish different quantization algorithms (e.g. symmetric, non-symmetric, AWQ, etc).")
     btn = gr.Button("Convert to MLC")
+    btn2 = gr.Button("Quit")
     out = gr.Textbox(label="Conversion Result")
     btn.click(fn=button_click , inputs=[model_id, conv, quant], outputs=out)
+    btn2.click(fn=quit_button_click)
 demo.launch()