phate334 commited on
Commit
c7f3542
1 Parent(s): 863856d

[add] gpu info

Browse files
Files changed (3) hide show
  1. app/models.py +1 -0
  2. app/tables.py +25 -3
  3. main.py +10 -5
app/models.py CHANGED
@@ -74,6 +74,7 @@ class Ram(BaseModel):
74
  class Item(BaseModel):
75
  offload_layers: int = Field(alias="offloadLayers")
76
  full_offloaded: bool = Field(alias="fullOffloaded")
 
77
  ram: "Ram"
78
  vrams: list["Ram"]
79
 
 
74
  class Item(BaseModel):
75
  offload_layers: int = Field(alias="offloadLayers")
76
  full_offloaded: bool = Field(alias="fullOffloaded")
77
+ maximum_tokens_per_second: float = Field(None, alias="maximumTokensPerSecond")
78
  ram: "Ram"
79
  vrams: list["Ram"]
80
 
app/tables.py CHANGED
@@ -1,5 +1,6 @@
1
  import pandas as pd
2
 
 
3
  from app.models import Architecture, Estimate, Metadata, Tokenizer
4
  from app.utils import abbreviate_number, human_readable_size
5
 
@@ -26,13 +27,34 @@ def get_model_info_df(
26
 
27
 
28
  def get_estimate_df(estimate: Estimate):
 
29
  return pd.DataFrame(
30
  [
31
  {
 
 
 
32
  "Context Size": estimate.context_size,
33
- "Flash Attention": estimate.flash_attention,
34
- "Logical Batch Size": estimate.logical_batch_size,
35
- "Physical Batch Size": estimate.physical_batch_size,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  }
 
37
  ]
38
  )
 
1
  import pandas as pd
2
 
3
+ from app.devices import Device
4
  from app.models import Architecture, Estimate, Metadata, Tokenizer
5
  from app.utils import abbreviate_number, human_readable_size
6
 
 
27
 
28
 
29
  def get_estimate_df(estimate: Estimate):
30
+
31
  return pd.DataFrame(
32
  [
33
  {
34
+ "Max Token per Sec.": round(
35
+ estimate.items[0].maximum_tokens_per_second, 2
36
+ ),
37
  "Context Size": estimate.context_size,
38
+ "Offload Layers": estimate.items[0].offload_layers,
39
+ "Full Offloaded": estimate.items[0].full_offloaded,
40
+ "CPU Handle Layers": estimate.items[0].ram.handle_layers,
41
+ "CPU UMA": human_readable_size(estimate.items[0].ram.uma),
42
+ "CPU NONUMA": human_readable_size(estimate.items[0].ram.nonuma),
43
+ }
44
+ ]
45
+ )
46
+
47
+
48
+ def get_gpus_df(estimate: Estimate, gpu_name: str, selected_device: Device):
49
+ return pd.DataFrame(
50
+ [
51
+ {
52
+ "GPU": gpu_name,
53
+ "GPU Memory Size": selected_device.memory_size,
54
+ "Handle Layers": gpu.handle_layers,
55
+ "UMA": human_readable_size(gpu.uma),
56
+ "NONUMA": human_readable_size(gpu.nonuma),
57
  }
58
+ for gpu in estimate.items[0].vrams
59
  ]
60
  )
main.py CHANGED
@@ -7,7 +7,7 @@ import pandas as pd
7
 
8
  from app.devices import Device
9
  from app.models import GgufParser
10
- from app.tables import get_estimate_df, get_model_info_df
11
 
12
  GGUF_PARSER_VERSION = os.getenv("GGUF_PARSER_VERSION", "v0.12.0")
13
  gguf_parser = Path("gguf-parser-linux-amd64")
@@ -27,8 +27,8 @@ device_options = [
27
  def process_url(url, context_length, device_selection):
28
  try:
29
  # 取得選擇的裝置鍵值
30
- device_key = device_selection.split(" ")[0]
31
- selected_device = devices[device_key]
32
  res = os.popen(
33
  f'./{gguf_parser} --ctx-size={context_length} -url {url} --device-metric "{selected_device.FLOPS};{selected_device.memory_bandwidth}GBps" --json'
34
  ).read()
@@ -40,7 +40,9 @@ def process_url(url, context_length, device_selection):
40
 
41
  estimate_df = get_estimate_df(parser_result.estimate)
42
 
43
- return model_info, estimate_df
 
 
44
  except Exception as e:
45
  return e
46
 
@@ -50,7 +52,9 @@ if __name__ == "__main__":
50
  os.system(f"wget {gguf_parser_url}&&chmod +x {gguf_parser}")
51
 
52
  with gr.Blocks(title="GGUF Parser") as iface:
53
- url_input = gr.Textbox(placeholder="Enter GGUF URL", value=DEFAULT_URL)
 
 
54
  context_length = gr.Number(label="Context Length", value=8192)
55
  device_dropdown = gr.Dropdown(label="Select Device", choices=device_options)
56
  submit_btn = gr.Button("Send")
@@ -61,6 +65,7 @@ if __name__ == "__main__":
61
  outputs=[
62
  gr.DataFrame(label="Model Info"),
63
  gr.DataFrame(label="ESTIMATE"),
 
64
  ],
65
  )
66
  iface.launch()
 
7
 
8
  from app.devices import Device
9
  from app.models import GgufParser
10
+ from app.tables import get_estimate_df, get_gpus_df, get_model_info_df
11
 
12
  GGUF_PARSER_VERSION = os.getenv("GGUF_PARSER_VERSION", "v0.12.0")
13
  gguf_parser = Path("gguf-parser-linux-amd64")
 
27
  def process_url(url, context_length, device_selection):
28
  try:
29
  # 取得選擇的裝置鍵值
30
+ device_name = device_selection.split(" ")[0]
31
+ selected_device = devices[device_name]
32
  res = os.popen(
33
  f'./{gguf_parser} --ctx-size={context_length} -url {url} --device-metric "{selected_device.FLOPS};{selected_device.memory_bandwidth}GBps" --json'
34
  ).read()
 
40
 
41
  estimate_df = get_estimate_df(parser_result.estimate)
42
 
43
+ gpus_info_df = get_gpus_df(parser_result.estimate, device_name, selected_device)
44
+
45
+ return model_info, estimate_df, gpus_info_df
46
  except Exception as e:
47
  return e
48
 
 
52
  os.system(f"wget {gguf_parser_url}&&chmod +x {gguf_parser}")
53
 
54
  with gr.Blocks(title="GGUF Parser") as iface:
55
+ url_input = gr.Textbox(
56
+ label="GGUF File URL", placeholder="Enter GGUF URL", value=DEFAULT_URL
57
+ )
58
  context_length = gr.Number(label="Context Length", value=8192)
59
  device_dropdown = gr.Dropdown(label="Select Device", choices=device_options)
60
  submit_btn = gr.Button("Send")
 
65
  outputs=[
66
  gr.DataFrame(label="Model Info"),
67
  gr.DataFrame(label="ESTIMATE"),
68
+ gr.DataFrame(label="GPUs INFO"),
69
  ],
70
  )
71
  iface.launch()