Commit
•
e16200c
1
Parent(s):
4db8e8b
Update app.py
Browse files
app.py
CHANGED
@@ -64,7 +64,7 @@ def calc_mem(hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_par
|
|
64 |
# ---- FLOP Calculation ---- #
|
65 |
def calc_flops(vocab_size, hidden_size, sequence_length, num_layers, kv_size_ratio, topk, moe, num_experts, expert_interval, batch_size, tokens, checkpoint_activations, ffn_expansion_factor, infer):
|
66 |
# An A_(m x k) X B_(k x n) matrix multiplication requires 2m x k x n FLOPs (factor of 2 needed to account for multiplies and adds)
|
67 |
-
|
68 |
# determine the flops factor.
|
69 |
iter_factor = 3
|
70 |
if checkpoint_activations:
|
@@ -370,9 +370,9 @@ with gr.Blocks() as demo:
|
|
370 |
info="Global batch size in units of samples"
|
371 |
)
|
372 |
tokens = gr.Number(
|
373 |
-
label="Number of
|
374 |
-
value=
|
375 |
-
info="Total number of
|
376 |
)
|
377 |
checkpoint_activations = gr.Checkbox(
|
378 |
label="Checkpoint Activations",
|
|
|
64 |
# ---- FLOP Calculation ---- #
|
65 |
def calc_flops(vocab_size, hidden_size, sequence_length, num_layers, kv_size_ratio, topk, moe, num_experts, expert_interval, batch_size, tokens, checkpoint_activations, ffn_expansion_factor, infer):
|
66 |
# An A_(m x k) X B_(k x n) matrix multiplication requires 2m x k x n FLOPs (factor of 2 needed to account for multiplies and adds)
|
67 |
+
tokens = 1e9 * tokens
|
68 |
# determine the flops factor.
|
69 |
iter_factor = 3
|
70 |
if checkpoint_activations:
|
|
|
370 |
info="Global batch size in units of samples"
|
371 |
)
|
372 |
tokens = gr.Number(
|
373 |
+
label="Number of GigaTokens",
|
374 |
+
value=300,
|
375 |
+
info="Total number of GigaTokens for training"
|
376 |
)
|
377 |
checkpoint_activations = gr.Checkbox(
|
378 |
label="Checkpoint Activations",
|