derek-thomas HF staff commited on
Commit
d65669a
1 Parent(s): dcb01bb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -50
app.py CHANGED
@@ -13,19 +13,18 @@ def convert_params(params):
13
  return "%s %s" % (s, size_name[i])
14
 
15
  # Get Hugging Face model configuration and update the parameters
16
- def get_hf_model_args(hf_model_name_or_path, num_layers, hidden_size, num_attention_heads, vocab_size, sequence_length):
17
- if hf_model_name_or_path:
18
- try:
19
- config = AutoConfig.from_pretrained(hf_model_name_or_path, trust_remote_code=True).to_dict()
20
- except Exception as e:
21
- return None, f"Error fetching Hugging Face model: {str(e)}"
22
-
23
- # Update parameters with the Hugging Face model config values
24
- num_layers = config.get("num_hidden_layers", num_layers)
25
- hidden_size = config.get("hidden_size", hidden_size)
26
- num_attention_heads = config.get("num_attention_heads", num_attention_heads)
27
- vocab_size = config.get("vocab_size", vocab_size)
28
- sequence_length = config.get("max_position_embeddings", sequence_length)
29
 
30
  return {
31
  "num_layers": num_layers,
@@ -37,16 +36,17 @@ def get_hf_model_args(hf_model_name_or_path, num_layers, hidden_size, num_attent
37
 
38
  # ---- Memory Calculation ---- #
39
  def calc_mem(hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_parallel_size, batch_size_per_gpu, sequence_length, vocab_size, hidden_size, num_attention_heads, num_layers, ffn_expansion_factor, is_mixed_precision, misc_mem_gib):
40
- model_params, hf_error = get_hf_model_args(hf_model_name_or_path, num_layers, hidden_size, num_attention_heads, vocab_size, sequence_length)
41
 
42
  if hf_error:
43
  return hf_error
44
 
45
- num_layers = model_params["num_layers"]
46
- hidden_size = model_params["hidden_size"]
47
- num_attention_heads = model_params["num_attention_heads"]
48
- vocab_size = model_params["vocab_size"]
49
- sequence_length = model_params["sequence_length"]
 
50
 
51
  dp_degree = num_gpus / (tensor_parallel_size * pipeline_parallel_size)
52
  embed_params = 2 * vocab_size * hidden_size
@@ -62,37 +62,19 @@ def calc_mem(hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_par
62
 
63
  return f"Per-GPU Memory Required for Training: {per_gpu_mem_gib:.2f} GiB"
64
 
65
- # ---- Parameter Calculation ---- #
66
- def calc_params(vocab_size, tied_embeddings, hidden_size, sequence_length, num_layers, moe, num_experts, expert_interval, topk, ffn_expansion_factor, num_mlp_linears, kv_size_ratio):
67
- if tied_embeddings:
68
- embedding_params = hidden_size * vocab_size
69
- else:
70
- embedding_params = 2 * hidden_size * vocab_size
71
- position_embedding_params = hidden_size * sequence_length
72
- attention_params = int(2 * (1 + kv_size_ratio) * num_layers * hidden_size * hidden_size)
73
- layernorm_params = 13 * num_layers * hidden_size
74
-
75
- if moe:
76
- num_expert_layers = num_layers / expert_interval
77
- ffn_expert_params = num_mlp_linears * ffn_expansion_factor * num_expert_layers * num_experts * hidden_size * hidden_size
78
- ffn_dense_params = num_mlp_linears * ffn_expansion_factor * (num_layers - num_expert_layers) * hidden_size * hidden_size
79
- ffn_params = ffn_expert_params + ffn_dense_params
80
- gating_params = num_expert_layers * hidden_size * num_experts
81
- else:
82
- ffn_params = num_mlp_linears * ffn_expansion_factor * num_layers * hidden_size * hidden_size
83
-
84
- total_params = embedding_params + attention_params + ffn_params + position_embedding_params + layernorm_params
85
-
86
- if moe:
87
- total_params += gating_params
88
-
89
- return f"""
90
- Embedding parameters: {convert_params(embedding_params)}
91
- Attention parameters: {convert_params(attention_params)}
92
- FFN parameters: {convert_params(ffn_params)}
93
- {'Gating parameters: ' + convert_params(gating_params) if moe else ''}
94
- Total Params in the Model: {convert_params(total_params)}
95
- """
96
 
97
  # ---- Gradio Interface ---- #
98
  with gr.Blocks() as demo:
@@ -119,6 +101,10 @@ with gr.Blocks() as demo:
119
  inputs=[hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_parallel_size, batch_size_per_gpu, sequence_length, vocab_size, hidden_size, num_attention_heads, num_layers, ffn_expansion_factor, is_mixed_precision, misc_mem_gib],
120
  outputs=memory_result)
121
 
 
 
 
 
122
  # Parameter Calculation Tab
123
  with gr.TabItem("Parameter Calculation"):
124
  vocab_size = gr.Number(label="Vocab Size", value=51200)
 
13
  return "%s %s" % (s, size_name[i])
14
 
15
  # Get Hugging Face model configuration and update the parameters
16
+ def get_hf_model_args(hf_model_name_or_path):
17
+ try:
18
+ config = AutoConfig.from_pretrained(hf_model_name_or_path, trust_remote_code=True).to_dict()
19
+ except Exception as e:
20
+ return None, f"Error fetching Hugging Face model: {str(e)}"
21
+
22
+ # Extract relevant values from the config
23
+ num_layers = config.get("num_hidden_layers", None)
24
+ hidden_size = config.get("hidden_size", None)
25
+ num_attention_heads = config.get("num_attention_heads", None)
26
+ vocab_size = config.get("vocab_size", None)
27
+ sequence_length = config.get("max_position_embeddings", None)
 
28
 
29
  return {
30
  "num_layers": num_layers,
 
36
 
37
  # ---- Memory Calculation ---- #
38
  def calc_mem(hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_parallel_size, batch_size_per_gpu, sequence_length, vocab_size, hidden_size, num_attention_heads, num_layers, ffn_expansion_factor, is_mixed_precision, misc_mem_gib):
39
+ model_params, hf_error = get_hf_model_args(hf_model_name_or_path) if hf_model_name_or_path else (None, None)
40
 
41
  if hf_error:
42
  return hf_error
43
 
44
+ if model_params:
45
+ num_layers = model_params["num_layers"] or num_layers
46
+ hidden_size = model_params["hidden_size"] or hidden_size
47
+ num_attention_heads = model_params["num_attention_heads"] or num_attention_heads
48
+ vocab_size = model_params["vocab_size"] or vocab_size
49
+ sequence_length = model_params["sequence_length"] or sequence_length
50
 
51
  dp_degree = num_gpus / (tensor_parallel_size * pipeline_parallel_size)
52
  embed_params = 2 * vocab_size * hidden_size
 
62
 
63
  return f"Per-GPU Memory Required for Training: {per_gpu_mem_gib:.2f} GiB"
64
 
65
+ # ---- Update Gradio inputs with Hugging Face model config ---- #
66
+ def update_from_hf_model(hf_model_name_or_path):
67
+ model_params, hf_error = get_hf_model_args(hf_model_name_or_path)
68
+
69
+ if hf_error:
70
+ return gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), hf_error
71
+
72
+ return (gr.update(value=model_params["num_layers"]),
73
+ gr.update(value=model_params["hidden_size"]),
74
+ gr.update(value=model_params["num_attention_heads"]),
75
+ gr.update(value=model_params["vocab_size"]),
76
+ gr.update(value=model_params["sequence_length"]),
77
+ "")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
  # ---- Gradio Interface ---- #
80
  with gr.Blocks() as demo:
 
101
  inputs=[hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_parallel_size, batch_size_per_gpu, sequence_length, vocab_size, hidden_size, num_attention_heads, num_layers, ffn_expansion_factor, is_mixed_precision, misc_mem_gib],
102
  outputs=memory_result)
103
 
104
+ hf_model_name_or_path.change(fn=update_from_hf_model,
105
+ inputs=[hf_model_name_or_path],
106
+ outputs=[num_layers, hidden_size, num_attention_heads, vocab_size, sequence_length, memory_result])
107
+
108
  # Parameter Calculation Tab
109
  with gr.TabItem("Parameter Calculation"):
110
  vocab_size = gr.Number(label="Vocab Size", value=51200)