Spaces:
Running
Running
[add] gpu info
Browse files- app/models.py +1 -0
- app/tables.py +25 -3
- main.py +10 -5
app/models.py
CHANGED
@@ -74,6 +74,7 @@ class Ram(BaseModel):
|
|
74 |
class Item(BaseModel):
|
75 |
offload_layers: int = Field(alias="offloadLayers")
|
76 |
full_offloaded: bool = Field(alias="fullOffloaded")
|
|
|
77 |
ram: "Ram"
|
78 |
vrams: list["Ram"]
|
79 |
|
|
|
74 |
class Item(BaseModel):
|
75 |
offload_layers: int = Field(alias="offloadLayers")
|
76 |
full_offloaded: bool = Field(alias="fullOffloaded")
|
77 |
+
maximum_tokens_per_second: float = Field(None, alias="maximumTokensPerSecond")
|
78 |
ram: "Ram"
|
79 |
vrams: list["Ram"]
|
80 |
|
app/tables.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import pandas as pd
|
2 |
|
|
|
3 |
from app.models import Architecture, Estimate, Metadata, Tokenizer
|
4 |
from app.utils import abbreviate_number, human_readable_size
|
5 |
|
@@ -26,13 +27,34 @@ def get_model_info_df(
|
|
26 |
|
27 |
|
28 |
def get_estimate_df(estimate: Estimate):
|
|
|
29 |
return pd.DataFrame(
|
30 |
[
|
31 |
{
|
|
|
|
|
|
|
32 |
"Context Size": estimate.context_size,
|
33 |
-
"
|
34 |
-
"
|
35 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
}
|
|
|
37 |
]
|
38 |
)
|
|
|
1 |
import pandas as pd
|
2 |
|
3 |
+
from app.devices import Device
|
4 |
from app.models import Architecture, Estimate, Metadata, Tokenizer
|
5 |
from app.utils import abbreviate_number, human_readable_size
|
6 |
|
|
|
27 |
|
28 |
|
29 |
def get_estimate_df(estimate: Estimate):
|
30 |
+
|
31 |
return pd.DataFrame(
|
32 |
[
|
33 |
{
|
34 |
+
"Max Token per Sec.": round(
|
35 |
+
estimate.items[0].maximum_tokens_per_second, 2
|
36 |
+
),
|
37 |
"Context Size": estimate.context_size,
|
38 |
+
"Offload Layers": estimate.items[0].offload_layers,
|
39 |
+
"Full Offloaded": estimate.items[0].full_offloaded,
|
40 |
+
"CPU Handle Layers": estimate.items[0].ram.handle_layers,
|
41 |
+
"CPU UMA": human_readable_size(estimate.items[0].ram.uma),
|
42 |
+
"CPU NONUMA": human_readable_size(estimate.items[0].ram.nonuma),
|
43 |
+
}
|
44 |
+
]
|
45 |
+
)
|
46 |
+
|
47 |
+
|
48 |
+
def get_gpus_df(estimate: Estimate, gpu_name: str, selected_device: Device):
|
49 |
+
return pd.DataFrame(
|
50 |
+
[
|
51 |
+
{
|
52 |
+
"GPU": gpu_name,
|
53 |
+
"GPU Memory Size": selected_device.memory_size,
|
54 |
+
"Handle Layers": gpu.handle_layers,
|
55 |
+
"UMA": human_readable_size(gpu.uma),
|
56 |
+
"NONUMA": human_readable_size(gpu.nonuma),
|
57 |
}
|
58 |
+
for gpu in estimate.items[0].vrams
|
59 |
]
|
60 |
)
|
main.py
CHANGED
@@ -7,7 +7,7 @@ import pandas as pd
|
|
7 |
|
8 |
from app.devices import Device
|
9 |
from app.models import GgufParser
|
10 |
-
from app.tables import get_estimate_df, get_model_info_df
|
11 |
|
12 |
GGUF_PARSER_VERSION = os.getenv("GGUF_PARSER_VERSION", "v0.12.0")
|
13 |
gguf_parser = Path("gguf-parser-linux-amd64")
|
@@ -27,8 +27,8 @@ device_options = [
|
|
27 |
def process_url(url, context_length, device_selection):
|
28 |
try:
|
29 |
# 取得選擇的裝置鍵值
|
30 |
-
|
31 |
-
selected_device = devices[
|
32 |
res = os.popen(
|
33 |
f'./{gguf_parser} --ctx-size={context_length} -url {url} --device-metric "{selected_device.FLOPS};{selected_device.memory_bandwidth}GBps" --json'
|
34 |
).read()
|
@@ -40,7 +40,9 @@ def process_url(url, context_length, device_selection):
|
|
40 |
|
41 |
estimate_df = get_estimate_df(parser_result.estimate)
|
42 |
|
43 |
-
|
|
|
|
|
44 |
except Exception as e:
|
45 |
return e
|
46 |
|
@@ -50,7 +52,9 @@ if __name__ == "__main__":
|
|
50 |
os.system(f"wget {gguf_parser_url}&&chmod +x {gguf_parser}")
|
51 |
|
52 |
with gr.Blocks(title="GGUF Parser") as iface:
|
53 |
-
url_input = gr.Textbox(
|
|
|
|
|
54 |
context_length = gr.Number(label="Context Length", value=8192)
|
55 |
device_dropdown = gr.Dropdown(label="Select Device", choices=device_options)
|
56 |
submit_btn = gr.Button("Send")
|
@@ -61,6 +65,7 @@ if __name__ == "__main__":
|
|
61 |
outputs=[
|
62 |
gr.DataFrame(label="Model Info"),
|
63 |
gr.DataFrame(label="ESTIMATE"),
|
|
|
64 |
],
|
65 |
)
|
66 |
iface.launch()
|
|
|
7 |
|
8 |
from app.devices import Device
|
9 |
from app.models import GgufParser
|
10 |
+
from app.tables import get_estimate_df, get_gpus_df, get_model_info_df
|
11 |
|
12 |
GGUF_PARSER_VERSION = os.getenv("GGUF_PARSER_VERSION", "v0.12.0")
|
13 |
gguf_parser = Path("gguf-parser-linux-amd64")
|
|
|
27 |
def process_url(url, context_length, device_selection):
|
28 |
try:
|
29 |
# 取得選擇的裝置鍵值
|
30 |
+
device_name = device_selection.split(" ")[0]
|
31 |
+
selected_device = devices[device_name]
|
32 |
res = os.popen(
|
33 |
f'./{gguf_parser} --ctx-size={context_length} -url {url} --device-metric "{selected_device.FLOPS};{selected_device.memory_bandwidth}GBps" --json'
|
34 |
).read()
|
|
|
40 |
|
41 |
estimate_df = get_estimate_df(parser_result.estimate)
|
42 |
|
43 |
+
gpus_info_df = get_gpus_df(parser_result.estimate, device_name, selected_device)
|
44 |
+
|
45 |
+
return model_info, estimate_df, gpus_info_df
|
46 |
except Exception as e:
|
47 |
return e
|
48 |
|
|
|
52 |
os.system(f"wget {gguf_parser_url}&&chmod +x {gguf_parser}")
|
53 |
|
54 |
with gr.Blocks(title="GGUF Parser") as iface:
|
55 |
+
url_input = gr.Textbox(
|
56 |
+
label="GGUF File URL", placeholder="Enter GGUF URL", value=DEFAULT_URL
|
57 |
+
)
|
58 |
context_length = gr.Number(label="Context Length", value=8192)
|
59 |
device_dropdown = gr.Dropdown(label="Select Device", choices=device_options)
|
60 |
submit_btn = gr.Button("Send")
|
|
|
65 |
outputs=[
|
66 |
gr.DataFrame(label="Model Info"),
|
67 |
gr.DataFrame(label="ESTIMATE"),
|
68 |
+
gr.DataFrame(label="GPUs INFO"),
|
69 |
],
|
70 |
)
|
71 |
iface.launch()
|