Spaces:
Running
Running
File size: 2,121 Bytes
6160b72 c7f3542 6160b72 c7f3542 6160b72 c7f3542 6160b72 c7f3542 6160b72 c7f3542 6160b72 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import pandas as pd
from app.devices import Device
from app.models import Architecture, Estimate, Metadata, Tokenizer
from app.utils import abbreviate_number, human_readable_size
def get_model_info_df(
metadata: Metadata, architecture: Architecture, tokenizer: Tokenizer
):
return pd.DataFrame(
[
{
"Type": metadata.type_,
"Name": metadata.name,
"Architecture": metadata.architecture,
"File Size": human_readable_size(metadata.file_size),
"Parameters": abbreviate_number(metadata.parameters),
"Bits Per Weight": round(metadata.bits_per_weight, 2),
"Maximum Context Length": architecture.maximum_context_length,
"Vocabulary Length": architecture.vocabulary_length,
"Tokenizer Model": tokenizer.model,
"Tokens Size": human_readable_size(tokenizer.tokens_size),
}
]
)
def get_estimate_df(estimate: Estimate):
return pd.DataFrame(
[
{
"Max Token per Sec.": round(
estimate.items[0].maximum_tokens_per_second, 2
),
"Context Size": estimate.context_size,
"Offload Layers": estimate.items[0].offload_layers,
"Full Offloaded": estimate.items[0].full_offloaded,
"CPU Handle Layers": estimate.items[0].ram.handle_layers,
"CPU UMA": human_readable_size(estimate.items[0].ram.uma),
"CPU NONUMA": human_readable_size(estimate.items[0].ram.nonuma),
}
]
)
def get_gpus_df(estimate: Estimate, gpu_name: str, selected_device: Device):
return pd.DataFrame(
[
{
"GPU": gpu_name,
"GPU Memory Size": selected_device.memory_size,
"Handle Layers": gpu.handle_layers,
"UMA": human_readable_size(gpu.uma),
"NONUMA": human_readable_size(gpu.nonuma),
}
for gpu in estimate.items[0].vrams
]
)
|