Spaces:
Running
Running
<html lang="en"> | |
<head> | |
<meta charset="UTF-8" /> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> | |
<script> | |
function strToHtml(str) { | |
let parser = new DOMParser(); | |
return parser.parseFromString(str, "text/html"); | |
} | |
//Short, jQuery-independent function to read html table and write them into an Array. | |
//Kudos to RobG at StackOverflow | |
function tableToObj(table) { | |
var rows = table.rows; | |
var propCells = rows[0].cells; | |
var propNames = []; | |
var results = []; | |
var obj, row, cells; | |
// Use the first row for the property names | |
// Could use a header section but result is the same if | |
// there is only one header row | |
for (var i = 0, iLen = propCells.length; i < iLen; i++) { | |
propNames.push( | |
(propCells[i].textContent || propCells[i].innerText).trim() | |
); | |
} | |
// Use the rows for data | |
// Could use tbody rows here to exclude header & footer | |
// but starting from 1 gives required result | |
for (var j = 1, jLen = rows.length; j < jLen; j++) { | |
cells = rows[j].cells; | |
obj = {}; | |
for (var k = 0; k < iLen; k++) { | |
obj[propNames[k]] = ( | |
cells[k].textContent || cells[k].innerText | |
).trim(); | |
} | |
results.push(obj); | |
} | |
return results; | |
} | |
function formatGpu(gpus) { | |
return gpus.map( | |
(g) => `${g["Product Name"]} - ${g["Memory"].split(",")[0]}` | |
); | |
} | |
const gguf_quants = { | |
"IQ1_S": 1.56, | |
"IQ2_XXS": 2.06, | |
"IQ2_XS": 2.31, | |
"IQ2_S": 2.5, | |
"IQ2_M": 2.7, | |
"IQ3_XXS": 3.06, | |
"IQ3_XS": 3.3, | |
"Q2_K": 3.35, | |
"Q3_K_S": 3.5, | |
"IQ3_S": 3.5, | |
"IQ3_M": 3.7, | |
"Q3_K_M": 3.91, | |
"Q3_K_L": 4.27, | |
"IQ4_XS": 4.25, | |
"IQ4_NL": 4.5, | |
"Q4_0": 4.55, | |
"Q4_K_S": 4.58, | |
"Q4_K_M": 4.85, | |
"Q5_0": 5.54, | |
"Q5_K_S": 5.54, | |
"Q5_K_M": 5.69, | |
"Q6_K": 6.59, | |
"Q8_0": 8.5, | |
} | |
async function modelConfig(hf_model, hf_token) { | |
auth = hf_token == "" ? {} : { | |
headers: { | |
'Authorization': `Bearer ${hf_token}` | |
} | |
} | |
let config = await fetch( | |
`https://huggingface.co/${hf_model}/raw/main/config.json`, auth | |
).then(r => r.json()) | |
let model_size = 0 | |
try { | |
model_size = (await fetch(`https://huggingface.co/${hf_model}/resolve/main/model.safetensors.index.json`, auth).then(r => r.json()))["metadata"]["total_size"] / 2 | |
if (isNaN(model_size)) { | |
throw new Erorr("no size in safetensors metadata") | |
} | |
} catch (e) { | |
try { | |
model_size = (await fetch(`https://huggingface.co/${hf_model}/resolve/main/pytorch_model.bin.index.json`, auth).then(r => r.json()))["metadata"]["total_size"] / 2 | |
if (isNaN(model_size)) { | |
throw new Erorr("no size in pytorch metadata") | |
} | |
} catch { | |
let model_page = await fetch( | |
"https://corsproxy.io/?" + encodeURIComponent(`https://huggingface.co/${hf_model}`) | |
).then(r => r.text()) | |
let el = document.createElement( 'html' ); | |
el.innerHTML = model_page | |
let params_el = el.querySelector('div[data-target="ModelSafetensorsParams"]') | |
if (params_el !== null) { | |
model_size = JSON.parse(params_el.attributes.getNamedItem("data-props").value)["safetensors"]["total"] | |
} else { | |
params_el = el.querySelector('div[data-target="ModelHeader"]') | |
model_size = JSON.parse(params_el.attributes.getNamedItem("data-props").value)["model"]["safetensors"]["total"] | |
} | |
} | |
} | |
config.parameters = model_size | |
return config | |
} | |
function inputBuffer(context=8192, model_config, bsz=512) { | |
/* Calculation taken from github:ggerganov/llama.cpp/llama.cpp:11248 | |
ctx->inp_tokens = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_batch); | |
ctx->inp_embd = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, hparams.n_embd, cparams.n_batch); | |
ctx->inp_pos = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_batch); | |
ctx->inp_KQ_mask = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, cparams.n_ctx, cparams.n_batch); | |
ctx->inp_K_shift = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_ctx); | |
ctx->inp_sum = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, 1, cparams.n_batch); | |
n_embd is hidden size (github:ggeranov/llama.cpp/convert.py:248) | |
*/ | |
const inp_tokens = bsz | |
const inp_embd = model_config["hidden_size"] * bsz | |
const inp_pos = bsz | |
const inp_KQ_mask = context * bsz | |
const inp_K_shift = context | |
const inp_sum = bsz | |
return inp_tokens + inp_embd + inp_pos + inp_KQ_mask + inp_K_shift + inp_sum | |
} | |
function computeBuffer(context=8192, model_config, bsz=512) { | |
if (bsz != 512) { | |
alert("batch size other than 512 is currently not supported for the compute buffer, using batchsize 512 for compute buffer calculation, end result result will be an overestimatition") | |
} | |
return (context / 1024 * 2 + 0.75) * model_config["num_attention_heads"] * 1024 * 1024 | |
} | |
function kvCache(context=8192, model_config, cache_bit=16) { | |
const n_gqa = model_config["num_attention_heads"] / model_config["num_key_value_heads"] | |
const n_embd_gqa = model_config["hidden_size"] / n_gqa | |
const n_elements = n_embd_gqa * (model_config["num_hidden_layers"] * context) | |
const size = 2 * n_elements | |
return size * (cache_bit / 8) | |
} | |
function contextSize(context=8192, model_config, bsz=512, cache_bit=16) { | |
return Number.parseFloat((inputBuffer(context, model_config, bsz) + kvCache(context, model_config, cache_bit) + computeBuffer(context, model_config, bsz)).toFixed(2)) | |
} | |
function modelSize(model_config, bpw=4.5) { | |
return Number.parseFloat((model_config["parameters"] * bpw / 8).toFixed(2)) | |
} | |
async function calculateSizes(format) { | |
try { | |
const model_config = await modelConfig(document.getElementById("modelsearch").value, document.getElementById("hf_token").value) | |
const context = parseInt(document.getElementById("contextsize").value) | |
let bsz = 512 | |
let cache_bit = 16 | |
let bpw = 0 | |
if (format === "gguf") { | |
bsz = parseInt(document.getElementById("batchsize").value) | |
bpw = gguf_quants[document.getElementById("quantsize").innerText] | |
} else if (format == "exl2") { | |
cache_bit = Number.parseInt(document.getElementById("kvCache").value) | |
bpw = Number.parseFloat(document.getElementById("bpw").value) | |
} | |
const model_size = modelSize(model_config, bpw) | |
const context_size = contextSize(context, model_config, bsz, cache_bit) | |
const total_size = ((model_size + context_size) / 2**30) | |
document.getElementById("resultmodel").innerText = (model_size / 2**30).toFixed(2) | |
document.getElementById("resultcontext").innerText = (context_size / 2**30).toFixed(2) | |
const result_total_el = document.getElementById("resulttotal"); | |
result_total_el.innerText = total_size.toFixed(2) | |
const gpu = document.getElementById("gpusearch").value | |
if (gpu !== "") { | |
const vram = parseFloat(gpu.split("-")[1].replace("GB", "").trim()) | |
if (vram - total_size > 0.5) { | |
result_total_el.style.backgroundColor = "#bef264" | |
} else if (vram - total_size > 0) { | |
result_total_el.style.backgroundColor = "#facc15" | |
} else { | |
result_total_el.style.backgroundColor = "#ef4444" | |
} | |
} | |
} catch(e) { | |
alert(e); | |
} | |
} | |
</script> | |
<link href="./styles.css" rel="stylesheet"> | |
<title>Can I run it? - LLM VRAM Calculator</title> | |
</head> | |
<body class="p-8"> | |
<div x-data="{ format: 'gguf' }" class="flex flex-col max-h-screen items-center mt-16 gap-10"> | |
<h1 class="text-xl font-semibold leading-6 text-gray-900"> | |
LLM Model, Can I run it? | |
</h1> | |
<p> | |
To support gated or private repos, you need to <a href="https://huggingface.co/settings/tokens" style="color: #4444ff"><b>create an authentification token</b></a>, to check the box <span style="color: #6e1818"><b>"Read access to contents of all public gated repos you can access"</b></span> and then enter the token in the field below. | |
</p> | |
<div class="flex flex-col gap-10"> | |
<div class="w-auto flex flex-col gap-4"> | |
<!-- Huggingface Authentification Token --> | |
<div | |
class="relative" | |
x-data="{ | |
results: null, | |
query: null | |
}" | |
> | |
<label | |
for="gpusearch" | |
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" | |
>Huggingface Token (optional)</label | |
> | |
<input | |
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" | |
id="hf_token" | |
/> | |
</div> | |
<!-- GPU Selector --> | |
<div | |
class="relative" | |
x-data="{ | |
results: null, | |
query: null | |
}" | |
> | |
<label | |
for="gpusearch" | |
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" | |
>GPU (optional)</label | |
> | |
<input | |
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" | |
placeholder="GeForce RTX 3090 - 24 GB" | |
id="gpusearch" | |
name="gpusearch" | |
list="gpulist" | |
x-model="query" | |
@keypress.debounce.150ms="results = query === '' ? [] : formatGpu(tableToObj(strToHtml(await fetch('https://corsproxy.io/?https://www.techpowerup.com/gpu-specs/?ajaxsrch=' + query).then(r => r.text())).querySelector('table')))" | |
/> | |
<datalist id="gpulist"> | |
<template x-for="item in results"> | |
<option :value="item" x-text="item"></option> | |
</template> | |
</datalist> | |
</div> | |
<!-- Model Selector --> | |
<div class="flex flex-row gap-4 relative"> | |
<label | |
for="contextsize" | |
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" | |
> | |
Model (unquantized) | |
</label> | |
<div | |
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" | |
x-data="{ | |
open: false, | |
value: 'Nexusflow/Starling-LM-7B-beta', | |
results: null, | |
toggle() { | |
if (this.open) { | |
return this.close() | |
} | |
this.$refs.input.focus() | |
this.open = true | |
}, | |
close(focusAfter) { | |
if (! this.open) return | |
this.open = false | |
focusAfter && focusAfter.focus() | |
} | |
}" | |
x-on:keydown.escape.prevent.stop="close($refs.input)" | |
x-id="['model-typeahead']" | |
class="relative" | |
> | |
<!-- Input --> | |
<input | |
id="modelsearch" | |
x-ref="input" | |
x-on:click="toggle()" | |
@keypress.debounce.150ms="results = (await | |
fetch('https://huggingface.co/api/quicksearch?type=model&q=' + | |
encodeURIComponent(value)).then(r => r.json())).models.filter(m => !m.id.includes('GGUF') && !m.id.includes('AWQ') && !m.id.includes('GPTQ') && !m.id.includes('exl2'));" | |
:aria-expanded="open" | |
:aria-controls="$id('model-typeahead')" | |
x-model="value" | |
class="flex justify-between items-center gap-2 w-full" | |
/> | |
<!-- Panel --> | |
<div | |
x-ref="panel" | |
x-show="open" | |
x-transition.origin.top.left | |
x-on:click.outside="close($refs.input)" | |
:id="$id('model-typeahead')" | |
style="display: none" | |
class="absolute left-0 mt-4 w-full rounded-md bg-white shadow-sm ring-1 ring-inset ring-gray-300 z-10" | |
> | |
<template x-for="result in results"> | |
<a | |
@click="value = result.id; close($refs.input)" | |
x-text="result.id" | |
class="flex cursor-pointer items-center gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm hover:bg-gray-500/5 disabled:text-gray-500" | |
></a> | |
</template> | |
</div> | |
</div> | |
</div> | |
<!-- Context Size Selector --> | |
<div class="relative"> | |
<label | |
for="contextsize" | |
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" | |
> | |
Context Size | |
</label> | |
<input | |
value="8192" | |
type="number" | |
name="contextsize" | |
id="contextsize" | |
step="1024" | |
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" | |
/> | |
</div> | |
<!-- Quant Format Selector --> | |
<div class="relative"> | |
<label | |
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" | |
>Quant Format</label | |
> | |
<fieldset | |
x-model="format" | |
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" | |
> | |
<legend class="sr-only">Quant format</legend> | |
<div | |
class="space-y-4 sm:flex sm:items-center sm:space-x-10 sm:space-y-0" | |
> | |
<div class="flex items-center"> | |
<input | |
id="gguf-format" | |
name="quant-format" | |
type="radio" | |
value="gguf" | |
checked | |
class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600" | |
/> | |
<label | |
for="gguf-format" | |
class="ml-3 block text-sm font-medium leading-6 text-gray-900" | |
>GGUF</label | |
> | |
</div> | |
<div class="flex items-center"> | |
<input | |
id="exl2-format" | |
name="quant-format" | |
type="radio" | |
value="exl2" | |
class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600" | |
/> | |
<label | |
for="exl2-format" | |
class="ml-3 block text-sm font-medium leading-6 text-gray-900" | |
>EXL2</label | |
> | |
</div> | |
<div class="flex items-center"> | |
<input | |
id="gptq-format" | |
name="quant-format" | |
type="radio" | |
disabled | |
value="gptq" | |
class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600" | |
/> | |
<label | |
for="gptq-format" | |
class="ml-3 block text-sm font-medium leading-6 text-gray-900" | |
>GPTQ (coming soon)</label | |
> | |
</div> | |
</div> | |
</fieldset> | |
</div> | |
<!-- EXL2 Options --> | |
<div x-show="format === 'exl2'" class="flex flex-row gap-4"> | |
<div class="relative flex-grow"> | |
<label | |
for="bpw" | |
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" | |
> | |
BPW | |
</label> | |
<input | |
value="4.5" | |
type="number" | |
step="0.01" | |
id="bpw" | |
name="bpw" | |
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" | |
/> | |
</div> | |
<div | |
class="flex-shrink relative rounded-md" | |
> | |
<div | |
class="w-fit p-3 h-full flex items-center gap-2 justify-center rounded-md border-0 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" | |
> | |
<label | |
for="kvCache" | |
class="inline-block bg-white text-xs font-medium text-gray-900" | |
> | |
KV Cache | |
</label> | |
<select id="kvCache" name="kvCache"> | |
<option value="16">16 bit</option> | |
<option value="8">8 bit</option> | |
<option value="4">4 bit</option> | |
</select> | |
</div> | |
</div> | |
</div> | |
<!-- GGUF Options --> | |
<div x-show="format === 'gguf'" class="relative"> | |
<div class="flex flex-row gap-4"> | |
<label | |
for="contextsize" | |
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" | |
> | |
Quantization Size | |
</label> | |
<div | |
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" | |
x-data="{ | |
open: false, | |
value: '', | |
toggle() { | |
if (this.open) { | |
return this.close() | |
} | |
this.$refs.button.focus() | |
this.open = true | |
}, | |
close(focusAfter) { | |
if (! this.open) return | |
this.open = false | |
focusAfter && focusAfter.focus() | |
} | |
}" | |
x-on:keydown.escape.prevent.stop="close($refs.button)" | |
x-id="['dropdown-button']" | |
class="relative" | |
> | |
<!-- Button --> | |
<button | |
x-ref="button" | |
x-on:click="toggle()" | |
:aria-expanded="open" | |
:aria-controls="$id('dropdown-button')" | |
type="button" | |
id="quantsize" | |
x-text="value.length === 0 ? 'Q4_K_S' : value" | |
class="flex justify-between items-center gap-2 w-full" | |
> | |
Q4_K_S | |
<!-- Heroicon: chevron-down --> | |
<svg | |
xmlns="http://www.w3.org/2000/svg" | |
class="h-5 w-5 text-gray-400" | |
viewBox="0 0 20 20" | |
fill="currentColor" | |
> | |
<path | |
fill-rule="evenodd" | |
d="M5.293 7.293a1 1 0 011.414 0L10 10.586l3.293-3.293a1 1 0 111.414 1.414l-4 4a1 1 0 01-1.414 0l-4-4a1 1 0 010-1.414z" | |
clip-rule="evenodd" | |
/> | |
</svg> | |
</button> | |
<!-- Panel --> | |
<div | |
x-data="{ quants: [ | |
'IQ1_S', | |
'IQ2_XXS', | |
'IQ2_XS', | |
'IQ2_S', | |
'IQ2_M', | |
'IQ3_XXS', | |
'IQ3_XS', | |
'Q2_K', | |
'Q3_K_S', | |
'IQ3_S', | |
'IQ3_M', | |
'Q3_K_M', | |
'Q3_K_L', | |
'IQ4_XS', | |
'IQ4_NL', | |
'Q4_0', | |
'Q4_K_S', | |
'Q4_K_M', | |
'Q5_0', | |
'Q5_K_S', | |
'Q5_K_M', | |
'Q6_K', | |
'Q8_0' | |
]}" | |
x-ref="panel" | |
x-show="open" | |
x-transition.origin.top.left | |
x-on:click.outside="close($refs.button)" | |
:id="$id('dropdown-button')" | |
style="display: none" | |
class="absolute left-0 mt-4 w-full rounded-md bg-white shadow-sm ring-1 ring-inset ring-gray-300 z-10" | |
> | |
<template x-for="quant in quants"> | |
<a | |
@click="value = quant; close($refs.button)" | |
x-text="quant" | |
class="flex cursor-pointer items-center gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm hover:bg-gray-500/5 disabled:text-gray-500" | |
></a> | |
</template> | |
</div> | |
</div> | |
<div class="relative"> | |
<label | |
for="batchsize" | |
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" | |
> | |
Batch Size | |
</label> | |
<input | |
value="512" | |
type="number" | |
step="128" | |
id="batchsize" | |
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" | |
/> | |
</div> | |
</div> | |
</div> | |
<button | |
type="button" | |
class="rounded-md bg-slate-800 px-3 py-2 text-sm font-semibold text-white shadow-sm hover:bg-slate-700 focus-visible:outline focus-visible:outline-2 focus-visible:outline-offset-2 focus-visible:outline-indigo-600" | |
@click="calculateSizes(format)" | |
> | |
Submit | |
</button> | |
</div> | |
<div class="w-auto flex flex-col gap-4"> | |
<div class="relative"> | |
<label | |
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" | |
> | |
Model Size (GB) | |
</label> | |
<div | |
id="resultmodel" | |
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" | |
>4.20</div> | |
</div> | |
<div class="relative"> | |
<label | |
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" | |
> | |
Context Size (GB) | |
</label> | |
<div | |
id="resultcontext" | |
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" | |
>6.90</div> | |
</div> | |
<div class="relative"> | |
<label | |
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" | |
> | |
Total Size (GB) | |
</label> | |
<div | |
id="resulttotal" | |
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" | |
>420.69</div> | |
</div> | |
</div> | |
</div> | |
</div> | |
<script | |
src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js" | |
></script> | |
<script defer> | |
calculateSizes("gguf") | |
</script> | |
</body> | |
</html> | |