Spaces:
Running
Running
File size: 2,946 Bytes
7c11711 2678ef5 61b2179 2678ef5 863856d 1a8deb0 c7f3542 4688574 1a8deb0 61b2179 7c11711 a793e12 61b2179 863856d 2678ef5 863856d 6bedbda 863856d 7c11711 e20ecc6 c7f3542 4688574 a793e12 863856d a793e12 1a8deb0 7c11711 6160b72 7c11711 6160b72 c7f3542 7c11711 2678ef5 61b2179 a793e12 7c11711 a793e12 6eed107 c7f3542 4688574 863856d a793e12 7c11711 4688574 7c11711 6160b72 a793e12 c7f3542 7c11711 2678ef5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import json
import os
from pathlib import Path
import gradio as gr
from app.devices import Device
from app.models import GgufParser
from app.tables import get_estimate_df, get_gpus_df, get_model_info_df
from app.utils import cleanup_url
GGUF_PARSER_VERSION = os.getenv("GGUF_PARSER_VERSION", "v0.12.0")
gguf_parser = Path("gguf-parser-linux-amd64")
gguf_parser_url = f"https://github.com/gpustack/gguf-parser-go/releases/download/{GGUF_PARSER_VERSION}/{gguf_parser}"
DEFAULT_URL = "https://huggingface.co/phate334/Llama-3.1-8B-Instruct-Q4_K_M-GGUF/resolve/main/llama-3.1-8b-instruct-q4_k_m.gguf"
with open("devices.json", "r", encoding="utf-8") as f:
data = json.load(f)
devices = {key: Device(**value) for key, value in data.items()}
device_options = [
f"{key} (Memory: {value.memory_size}GB, FLOPS: {value.FLOPS}, Bandwidth: {value.memory_bandwidth}GB/s)"
for key, value in devices.items()
]
def process_url(url, context_length, device_selection):
try:
device_name = device_selection.split(" (")[0]
selected_device = devices[device_name]
url = cleanup_url(url)
res = os.popen(
f'./{gguf_parser} --ctx-size={context_length} -url {url} --device-metric "{selected_device.FLOPS};{selected_device.memory_bandwidth}GBps" --json'
).read()
parser_result = GgufParser.model_validate_json(res)
model_info = get_model_info_df(
parser_result.metadata, parser_result.architecture, parser_result.tokenizer
)
estimate_df = get_estimate_df(parser_result.estimate)
gpus_info_df = get_gpus_df(parser_result.estimate, device_name, selected_device)
return model_info, estimate_df, gpus_info_df
except Exception as e:
return e
if __name__ == "__main__":
if not gguf_parser.exists():
os.system(f"wget {gguf_parser_url}&&chmod +x {gguf_parser}")
with gr.Blocks(title="GGUF Parser") as iface:
gr.Markdown(
"This Space is a web GUI for the [gpustack/gguf-parser-go](https://github.com/gpustack/gguf-parser-go) package, designed for users who are not familiar with CLI. For more detailed output results, please consider using the original tool. If you find this GUI helpful, please give that a star."
)
url_input = gr.Textbox(
label="GGUF File URL", placeholder="Enter GGUF URL", value=DEFAULT_URL
)
context_length_input = gr.Number(label="Context Length", value=8192)
device_dropdown = gr.Dropdown(label="Select Device", choices=device_options)
submit_btn = gr.Button("Send")
submit_btn.click(
fn=process_url,
inputs=[url_input, context_length_input, device_dropdown],
outputs=[
gr.DataFrame(label="Model Info"),
gr.DataFrame(label="ESTIMATE"),
gr.DataFrame(label="GPUs INFO"),
],
)
iface.launch()
|