File size: 2,946 Bytes
7c11711
2678ef5
61b2179
2678ef5
 
 
863856d
1a8deb0
c7f3542
4688574
1a8deb0
61b2179
 
7c11711
a793e12
61b2179
863856d
 
 
2678ef5
863856d
6bedbda
863856d
 
 
 
 
7c11711
e20ecc6
c7f3542
4688574
a793e12
863856d
a793e12
1a8deb0
7c11711
6160b72
 
7c11711
 
6160b72
 
c7f3542
 
 
7c11711
 
 
2678ef5
 
61b2179
a793e12
7c11711
a793e12
6eed107
 
 
c7f3542
 
 
4688574
863856d
a793e12
7c11711
 
 
4688574
7c11711
6160b72
a793e12
c7f3542
7c11711
 
2678ef5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import json
import os
from pathlib import Path

import gradio as gr

from app.devices import Device
from app.models import GgufParser
from app.tables import get_estimate_df, get_gpus_df, get_model_info_df
from app.utils import cleanup_url

GGUF_PARSER_VERSION = os.getenv("GGUF_PARSER_VERSION", "v0.12.0")
gguf_parser = Path("gguf-parser-linux-amd64")
gguf_parser_url = f"https://github.com/gpustack/gguf-parser-go/releases/download/{GGUF_PARSER_VERSION}/{gguf_parser}"
DEFAULT_URL = "https://huggingface.co/phate334/Llama-3.1-8B-Instruct-Q4_K_M-GGUF/resolve/main/llama-3.1-8b-instruct-q4_k_m.gguf"

with open("devices.json", "r", encoding="utf-8") as f:
    data = json.load(f)
    devices = {key: Device(**value) for key, value in data.items()}

device_options = [
    f"{key} (Memory: {value.memory_size}GB, FLOPS: {value.FLOPS}, Bandwidth: {value.memory_bandwidth}GB/s)"
    for key, value in devices.items()
]


def process_url(url, context_length, device_selection):
    try:
        device_name = device_selection.split(" (")[0]
        selected_device = devices[device_name]
        url = cleanup_url(url)
        res = os.popen(
            f'./{gguf_parser} --ctx-size={context_length} -url {url} --device-metric "{selected_device.FLOPS};{selected_device.memory_bandwidth}GBps" --json'
        ).read()
        parser_result = GgufParser.model_validate_json(res)

        model_info = get_model_info_df(
            parser_result.metadata, parser_result.architecture, parser_result.tokenizer
        )

        estimate_df = get_estimate_df(parser_result.estimate)

        gpus_info_df = get_gpus_df(parser_result.estimate, device_name, selected_device)

        return model_info, estimate_df, gpus_info_df
    except Exception as e:
        return e


if __name__ == "__main__":
    if not gguf_parser.exists():
        os.system(f"wget {gguf_parser_url}&&chmod +x {gguf_parser}")

    with gr.Blocks(title="GGUF Parser") as iface:
        gr.Markdown(
            "This Space is a web GUI for the [gpustack/gguf-parser-go](https://github.com/gpustack/gguf-parser-go) package, designed for users who are not familiar with CLI. For more detailed output results, please consider using the original tool. If you find this GUI helpful, please give that a star."
        )
        url_input = gr.Textbox(
            label="GGUF File URL", placeholder="Enter GGUF URL", value=DEFAULT_URL
        )
        context_length_input = gr.Number(label="Context Length", value=8192)
        device_dropdown = gr.Dropdown(label="Select Device", choices=device_options)
        submit_btn = gr.Button("Send")

        submit_btn.click(
            fn=process_url,
            inputs=[url_input, context_length_input, device_dropdown],
            outputs=[
                gr.DataFrame(label="Model Info"),
                gr.DataFrame(label="ESTIMATE"),
                gr.DataFrame(label="GPUs INFO"),
            ],
        )
    iface.launch()