derek-thomas HF staff commited on
Commit
e16200c
1 Parent(s): 4db8e8b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -64,7 +64,7 @@ def calc_mem(hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_par
64
  # ---- FLOP Calculation ---- #
65
  def calc_flops(vocab_size, hidden_size, sequence_length, num_layers, kv_size_ratio, topk, moe, num_experts, expert_interval, batch_size, tokens, checkpoint_activations, ffn_expansion_factor, infer):
66
  # An A_(m x k) X B_(k x n) matrix multiplication requires 2m x k x n FLOPs (factor of 2 needed to account for multiplies and adds)
67
-
68
  # determine the flops factor.
69
  iter_factor = 3
70
  if checkpoint_activations:
@@ -370,9 +370,9 @@ with gr.Blocks() as demo:
370
  info="Global batch size in units of samples"
371
  )
372
  tokens = gr.Number(
373
- label="Number of Tokens",
374
- value=300e9,
375
- info="Total number of tokens for training"
376
  )
377
  checkpoint_activations = gr.Checkbox(
378
  label="Checkpoint Activations",
 
64
  # ---- FLOP Calculation ---- #
65
  def calc_flops(vocab_size, hidden_size, sequence_length, num_layers, kv_size_ratio, topk, moe, num_experts, expert_interval, batch_size, tokens, checkpoint_activations, ffn_expansion_factor, infer):
66
  # An A_(m x k) X B_(k x n) matrix multiplication requires 2m x k x n FLOPs (factor of 2 needed to account for multiplies and adds)
67
+ tokens = 1e9 * tokens
68
  # determine the flops factor.
69
  iter_factor = 3
70
  if checkpoint_activations:
 
370
  info="Global batch size in units of samples"
371
  )
372
  tokens = gr.Number(
373
+ label="Number of GigaTokens",
374
+ value=300,
375
+ info="Total number of GigaTokens for training"
376
  )
377
  checkpoint_activations = gr.Checkbox(
378
  label="Checkpoint Activations",