Spaces:

derek-thomas
/

transformer_calculator

Running

App Files Files Community

derek-thomas HF staff commited on Sep 13

Commit

18ddd47

•

1 Parent(s): e16200c

Update app.py

Browse files

Files changed (1) hide show

app.py +258 -256

app.py CHANGED Viewed

@@ -111,19 +111,19 @@ def calc_flops(vocab_size, hidden_size, sequence_length, num_layers, kv_size_rat
 # ---- Gradio Interface ---- #
-with gr.Blocks() as demo:
-    with gr.Tabs():
         gr.Markdown("""
         This app is a re-creation of [this calculator](https://github.com/EleutherAI/cookbook/tree/main/calc) from EleutherAI.
         Before training or inference even begins, common practical questions about potential models must be answered such as:
         1. How many parameters are we targeting? How should those parameters be allocated within the model?
         1. How many FLOPs does the model from step 1 take to train on t tokens? How about inference?
         1. How much memory does the model from step 1 take to train/infer on d devices? What memory-saving strategies (e.g. parallelism, quantization, etc) are necessary to fit the model on device memory?
         """)
-        with gr.TabItem("Memory Calculation"):
             gr.Markdown("""
             ## Memory Calculation
@@ -131,293 +131,295 @@ with gr.Blocks() as demo:
             Take this estimation with a grain of salt, because every implementation is different and these calculations were written to match the GPT-NeoX library as close as possible.
             Even for other training and inference libraries, however, we expect our script to give approximate memory estimations within acceptable error.
             (Please see [LLM finetuning memory requirements](https://blog.scottlogic.com/2023/11/24/llm-mem.html) for a treatment of how specific memory costs may vary framework-to-framework). Other good resources that we consulted are the [ZeRO Paper](https://arxiv.org/abs/1910.02054) and [Reducing Activation Recomputation in Large Transformer Models](https://arxiv.org/pdf/2205.05198.pdf).
             ## To Use
             Fill in the required details below and click 'Calculate Memory' to get a result.
             """)
-            with gr.Row():
-                with gr.Column("Generatable"):
-                    with gr.Group():
-                        hf_model_name_or_path = gr.Textbox(
-                            label="HuggingFace Model Name or Path",
-                            info="Name of the HuggingFace Hub repository or the local file path for it"
-                        )
-                        sequence_length = gr.Number(
-                            label="Sequence Length",
-                            value=2048,
-                            info="Sequence length used for training"
-                        )
-                        vocab_size = gr.Number(
-                            label="Vocab Size",
-                            value=51200,
-                            info="How many tokens are in the embedding layer"
-                        )
-                        hidden_size = gr.Number(
-                            label="Hidden Size",
-                            value=6144,
-                            info="Dimension of the model's hidden size"
-                        )
-                        num_attention_heads = gr.Number(
-                            label="Number of Attention Heads",
-                            value=64,
-                            info="Number of attention heads used in the model"
-                        )
-                        num_layers = gr.Number(
-                            label="Number of Layers",
-                            value=44,
-                            info="Number of transformer layers used in the model"
-                        )
-                with gr.Column("User Defined"):
-                    num_gpus = gr.Number(
-                        label="Number of GPUs",
-                        value=1,
-                        info="Number of GPUs used for training"
                     )
-                    tensor_parallel_size = gr.Number(
-                        label="Tensor Parallel Size",
-                        value=1,
-                        info="Tensor parallel degree (1 if not used)"
-                    )
-                    pipeline_parallel_size = gr.Number(
-                        label="Pipeline Parallel Size",
-                        value=1,
-                        info="Pipeline parallel degree (1 if not used)"
-                    )
-                    batch_size_per_gpu = gr.Number(
-                        label="Batch Size per GPU",
-                        value=8,
-                        info="Batch size per GPU"
                     )
-                    ffn_expansion_factor = gr.Number(
-                        label="FFN Expansion Factor",
-                        value=4,
-                        info="How much the MLP hidden size expands"
                     )
-                    is_mixed_precision = gr.Checkbox(
-                        label="Mixed Precision",
-                        value=True,
-                        info="Whether mixed precision is enabled"
                     )
-                    misc_mem_gib = gr.Number(
-                        label="Miscellaneous Memory Overhead (GiB)",
-                        value=5,
-                        info="Miscellaneous memory overhead per GPU by DL frameworks, communication libraries, etc."
                     )
-            calc_memory_button = gr.Button("Calculate Memory")
-            memory_result = gr.Textbox(label="Memory Calculation Result", interactive=False)
-            calc_memory_button.click(
                 calc_mem,
                 inputs=[
                     hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_parallel_size, batch_size_per_gpu, sequence_length, vocab_size, hidden_size, num_attention_heads, num_layers, ffn_expansion_factor, is_mixed_precision, misc_mem_gib
                 ],
                 outputs=memory_result
-            )
-            hf_model_name_or_path.change(
                 fn=update_from_hf_model,
                 inputs=[hf_model_name_or_path],
                 outputs=[num_layers, hidden_size, num_attention_heads, vocab_size, sequence_length, memory_result]
-            )
         # Parameter Calculation Tab
-        with gr.TabItem("Parameter Calculation"):
-            gr.Markdown("""
-            ## Parameter Calculation
-            Parameter Calculation calculates the number of parameters present in a given model based on its hyperparams.
-            Such calculations are important to determine memory overheads, FLOPs, or to determine the size of an unknown transformer model.
-            We also found the following resources helpful:
-            [How does GPT-3 spend its 175B parameters?](https://www.lesswrong.com/posts/3duR8CrvcHywrnhLo/how-does-gpt-3-spend-its-175b-parameters)
-            and [LLM Parameter Counting](https://kipp.ly/transformer-param-count/).
-            ## How To Use
-            Simply input the model details, such as the hidden size, number of layers, and attention heads, and press 'Calculate Parameters' to get a result.
-            """)
-            with gr.Row():
-                with gr.Column("Generatable"):
-                    with gr.Group():
-                        hf_model_name_or_path = gr.Textbox(
-                            label="HuggingFace Model Name or Path",
-                            info="Name of the HuggingFace Hub repository or the local file path for it"
-                        )
-                        vocab_size = gr.Number(
-                            label="Vocab Size",
-                            value=51200,
-                            info="How many tokens are in the embedding layer"
-                        )
-                        hidden_size = gr.Number(
-                            label="Hidden Size",
-                            value=6144,
-                            info="Dimension of the model's hidden size"
-                        )
-                        sequence_length = gr.Number(
-                            label="Sequence Length",
-                            value=2048,
-                            info="Sequence length used for training"
-                        )
-                        num_layers = gr.Number(
-                            label="Number of Layers",
-                            value=44,
-                            info="Number of transformer layers used in the model"
-                        )
-                with gr.Column("User Defined"):
-                    tied_embeddings = gr.Checkbox(
-                        label="Tied Embeddings",
                         value=False,
-                        info="Whether embeddings are tied (shared between input and output)"
                     )
-                    ffn_expansion_factor = gr.Number(
-                        label="FFN Expansion Factor",
-                        value=4,
-                        info="How much the MLP hidden size expands"
                     )
-                    num_mlp_linears = gr.Number(
-                        label="Number of Linear Layers per MLP Block",
-                        value=2,
-                        info="How many linear layers per MLP block"
                     )
-                    kv_size_ratio = gr.Number(
-                        label="KV Size Ratio",
-                        value=1.0,
-                        info="Ratio of total query heads to key/value heads. 1.0 for MHA, 1/num_attention_heads for MQA"
                     )
-                    with gr.Accordion("MoE Parameters", open=False):
-                        moe = gr.Checkbox(
-                            label="MoE",
-                            value=False,
-                            info="Whether the model is MoE"
-                        )
-                        num_experts = gr.Number(
-                            label="Number of Experts",
-                            value=8,
-                            info="Number of experts for MoE"
-                        )
-                        expert_interval = gr.Number(
-                            label="Expert Interval",
-                            value=1,
-                            info="Expert interval for MoE"
-                        )
-                        topk = gr.Number(
-                            label="Top k Routing",
-                            value=1,
-                            info="Top k routing for MoE"
-                        )
-            calc_param_button = gr.Button("Calculate Parameters")
-            param_result = gr.Textbox(label="Parameter Calculation Result", interactive=False)
-            calc_param_button.click(calc_params,
-                inputs=[vocab_size, tied_embeddings, hidden_size, sequence_length, num_layers, moe, num_experts, expert_interval, topk, ffn_expansion_factor, num_mlp_linears, kv_size_ratio],
-                outputs=param_result)
-            hf_model_name_or_path.change(fn=update_from_hf_model,
-                inputs=[hf_model_name_or_path],
-                outputs=[num_layers, hidden_size, num_attention_heads, vocab_size, sequence_length])
-        # New FLOP Calculation Tab
-        with gr.TabItem("FLOP Calculation"):
-            gr.Markdown("""
-            ## FLOP Calculation
-            FLOP Calculation calculates the number of theoretical FLOPs required to train a model on t tokens.
-            See [Transformers Math 101](https://blog.eleuther.ai/transformer-math/) for more details on how FLOPs are calculated.
-            Other good resources that we consulted are the [Chinchilla Paper](https://arxiv.org/abs/2203.15556) and
-            [Efficient Large-Scale Language Model Training on GPU Clusters Using Megatron-LM](https://people.eecs.berkeley.edu/~matei/papers/2021/sc_megatron_lm.pdf).
-            """)
-            with gr.Row():
-                with gr.Column("Generatable"):
-                    with gr.Group():
-                        hf_model_name_or_path = gr.Textbox(
-                            label="HuggingFace Model Name or Path",
-                            info="Name of the HuggingFace Hub repository or the local file path for it"
-                        )
-                        vocab_size = gr.Number(
-                            label="Vocab Size",
-                            value=51200,
-                            info="How many tokens are in the embedding layer"
-                        )
-                        hidden_size = gr.Number(
-                            label="Hidden Size",
-                            value=6144,
-                            info="Dimension of the model's hidden size"
-                        )
-                        sequence_length = gr.Number(
-                            label="Sequence Length",
-                            value=2048,
-                            info="Sequence length used for training"
-                        )
-                        num_layers = gr.Number(
-                            label="Number of Layers",
-                            value=44,
-                            info="Number of transformer layers used in the model"
-                        )
-                with gr.Column("Generatable"):
-                    kv_size_ratio = gr.Number(
-                        label="KV Size Ratio",
-                        value=1.0,
-                        info="Ratio of kv heads to query heads used in model. 1.0 for MHA"
                     )
-                    ffn_expansion_factor = gr.Number(
-                        label="FFN Expansion Factor",
-                        value=4,
-                        info="How much the MLP hidden size expands"
                     )
-                    batch_size = gr.Number(
-                        label="Batch Size",
-                        value=1,
-                        info="Global batch size in units of samples"
                     )
-                    tokens = gr.Number(
-                        label="Number of GigaTokens",
-                        value=300,
-                        info="Total number of GigaTokens for training"
                     )
-                    checkpoint_activations = gr.Checkbox(
-                        label="Checkpoint Activations",
-                        value=True,
-                        info="Whether Megatron-style activation checkpointing is being used"
                     )
-                    infer = gr.Checkbox(
-                        label="Inference-Only",
                         value=False,
-                        info="Whether the model is being used for inference-only"
                     )
-                    # MoE parameters hidden in accordion
-                    with gr.Accordion("Mixture of Experts (MoE)", open=False):
-                        moe = gr.Checkbox(
-                            label="Mixture of Experts (MoE)",
-                            value=False,
-                            info="Whether the model uses Mixture of Experts"
-                        )
-                        num_experts = gr.Number(
-                            label="Number of Experts",
-                            value=128,
-                            info="Number of experts for Mixture of Experts (MoE)"
-                        )
-                        expert_interval = gr.Number(
-                            label="Expert Interval",
-                            value=2,
-                            info="Expert interval for Mixture of Experts (MoE)"
-                        )
-                        topk = gr.Number(
-                            label="Top K Routing for MoE",
-                            value=1,
-                            info="Top k routing for Mixture of Experts (MoE)"
-                        )
-            calc_flops_button = gr.Button("Calculate FLOPs")
-            flops_result = gr.JSON(label="FLOP Calculation Result")
-            calc_flops_button.click(
-                calc_flops,
-                inputs=[vocab_size, hidden_size, sequence_length, num_layers, kv_size_ratio, topk, moe, num_experts, expert_interval, batch_size, tokens, checkpoint_activations, ffn_expansion_factor, infer],
-                outputs=flops_result
-            )
-            hf_model_name_or_path.change(fn=update_from_hf_model,
-                inputs=[hf_model_name_or_path],
-                outputs=[num_layers, hidden_size, vocab_size, sequence_length])
 demo.launch()

 # ---- Gradio Interface ---- #
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    with gr.Accordion("Credits and General Idea", open=False):
         gr.Markdown("""
         This app is a re-creation of [this calculator](https://github.com/EleutherAI/cookbook/tree/main/calc) from EleutherAI.
         Before training or inference even begins, common practical questions about potential models must be answered such as:
         1. How many parameters are we targeting? How should those parameters be allocated within the model?
         1. How many FLOPs does the model from step 1 take to train on t tokens? How about inference?
         1. How much memory does the model from step 1 take to train/infer on d devices? What memory-saving strategies (e.g. parallelism, quantization, etc) are necessary to fit the model on device memory?
         """)
+    with gr.Tab("Memory Calculation"):
+        #with gr.TabItem("Memory Calculation"):
+        with gr.Accordion("About Memory Calculation", open=False):
             gr.Markdown("""
             ## Memory Calculation
             Take this estimation with a grain of salt, because every implementation is different and these calculations were written to match the GPT-NeoX library as close as possible.
             Even for other training and inference libraries, however, we expect our script to give approximate memory estimations within acceptable error.
             (Please see [LLM finetuning memory requirements](https://blog.scottlogic.com/2023/11/24/llm-mem.html) for a treatment of how specific memory costs may vary framework-to-framework). Other good resources that we consulted are the [ZeRO Paper](https://arxiv.org/abs/1910.02054) and [Reducing Activation Recomputation in Large Transformer Models](https://arxiv.org/pdf/2205.05198.pdf).
+            """)
+        with gr.Accordion("How to use it?", open=False):
+            gr.Markdown("""
             ## To Use
             Fill in the required details below and click 'Calculate Memory' to get a result.
             """)
+        with gr.Row():
+            with gr.Column("Generatable"):
+                gr.Markdown("## Generatable")
+                with gr.Group():
+                    hf_model_name_or_path = gr.Textbox(
+                        label="HuggingFace Model Name or Path",
+                        info="Name of the HuggingFace Hub repository or the local file path for it"
                     )
+                    sequence_length = gr.Number(
+                        label="Sequence Length",
+                        value=2048,
+                        info="Sequence length used for training"
                     )
+                    vocab_size = gr.Number(
+                        label="Vocab Size",
+                        value=51200,
+                        info="How many tokens are in the embedding layer"
                     )
+                    hidden_size = gr.Number(
+                        label="Hidden Size",
+                        value=6144,
+                        info="Dimension of the model's hidden size"
                     )
+                    num_attention_heads = gr.Number(
+                        label="Number of Attention Heads",
+                        value=64,
+                        info="Number of attention heads used in the model"
                     )
+                    num_layers = gr.Number(
+                        label="Number of Layers",
+                        value=44,
+                        info="Number of transformer layers used in the model"
+                    )
+            with gr.Column("User Defined"):
+                gr.Markdown("## User Defined")
+                num_gpus = gr.Number(
+                    label="Number of GPUs",
+                    value=1,
+                    info="Number of GPUs used for training"
+                )
+                tensor_parallel_size = gr.Number(
+                    label="Tensor Parallel Size",
+                    value=1,
+                    info="Tensor parallel degree (1 if not used)"
+                )
+                pipeline_parallel_size = gr.Number(
+                    label="Pipeline Parallel Size",
+                    value=1,
+                    info="Pipeline parallel degree (1 if not used)"
+                )
+                batch_size_per_gpu = gr.Number(
+                    label="Batch Size per GPU",
+                    value=8,
+                    info="Batch size per GPU"
+                )
+                ffn_expansion_factor = gr.Number(
+                    label="FFN Expansion Factor",
+                    value=4,
+                    info="How much the MLP hidden size expands"
+                )
+                is_mixed_precision = gr.Checkbox(
+                    label="Mixed Precision",
+                    value=True,
+                    info="Whether mixed precision is enabled"
+                )
+                misc_mem_gib = gr.Number(
+                    label="Miscellaneous Memory Overhead (GiB)",
+                    value=5,
+                    info="Miscellaneous memory overhead per GPU by DL frameworks, communication libraries, etc."
+                )
+        calc_memory_button = gr.Button("Calculate Memory")
+        memory_result = gr.Textbox(label="Memory Calculation Result", interactive=False)
+        calc_memory_button.click(
                 calc_mem,
                 inputs=[
                     hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_parallel_size, batch_size_per_gpu, sequence_length, vocab_size, hidden_size, num_attention_heads, num_layers, ffn_expansion_factor, is_mixed_precision, misc_mem_gib
                 ],
                 outputs=memory_result
+        )
+        hf_model_name_or_path.change(
                 fn=update_from_hf_model,
                 inputs=[hf_model_name_or_path],
                 outputs=[num_layers, hidden_size, num_attention_heads, vocab_size, sequence_length, memory_result]
+        )
         # Parameter Calculation Tab
+    with gr.TabItem("Parameter Calculation"):
+        gr.Markdown("""
+        ## Parameter Calculation
+        Parameter Calculation calculates the number of parameters present in a given model based on its hyperparams.
+        Such calculations are important to determine memory overheads, FLOPs, or to determine the size of an unknown transformer model.
+        We also found the following resources helpful:
+        [How does GPT-3 spend its 175B parameters?](https://www.lesswrong.com/posts/3duR8CrvcHywrnhLo/how-does-gpt-3-spend-its-175b-parameters)
+        and [LLM Parameter Counting](https://kipp.ly/transformer-param-count/).
+        ## How To Use
+        Simply input the model details, such as the hidden size, number of layers, and attention heads, and press 'Calculate Parameters' to get a result.
+        """)
+        with gr.Row():
+            with gr.Column("Generatable"):
+                with gr.Group():
+                    hf_model_name_or_path = gr.Textbox(
+                        label="HuggingFace Model Name or Path",
+                        info="Name of the HuggingFace Hub repository or the local file path for it"
+                    )
+                    vocab_size = gr.Number(
+                        label="Vocab Size",
+                        value=51200,
+                        info="How many tokens are in the embedding layer"
+                    )
+                    hidden_size = gr.Number(
+                        label="Hidden Size",
+                        value=6144,
+                        info="Dimension of the model's hidden size"
+                    )
+                    sequence_length = gr.Number(
+                        label="Sequence Length",
+                        value=2048,
+                        info="Sequence length used for training"
+                    )
+                    num_layers = gr.Number(
+                        label="Number of Layers",
+                        value=44,
+                        info="Number of transformer layers used in the model"
+                    )
+            with gr.Column("User Defined"):
+                tied_embeddings = gr.Checkbox(
+                    label="Tied Embeddings",
+                    value=False,
+                    info="Whether embeddings are tied (shared between input and output)"
+                )
+                ffn_expansion_factor = gr.Number(
+                    label="FFN Expansion Factor",
+                    value=4,
+                    info="How much the MLP hidden size expands"
+                )
+                num_mlp_linears = gr.Number(
+                    label="Number of Linear Layers per MLP Block",
+                    value=2,
+                    info="How many linear layers per MLP block"
+                )
+                kv_size_ratio = gr.Number(
+                    label="KV Size Ratio",
+                    value=1.0,
+                    info="Ratio of total query heads to key/value heads. 1.0 for MHA, 1/num_attention_heads for MQA"
+                )
+                with gr.Accordion("MoE Parameters", open=False):
+                    moe = gr.Checkbox(
+                        label="MoE",
                         value=False,
+                        info="Whether the model is MoE"
                     )
+                    num_experts = gr.Number(
+                        label="Number of Experts",
+                        value=8,
+                        info="Number of experts for MoE"
                     )
+                    expert_interval = gr.Number(
+                        label="Expert Interval",
+                        value=1,
+                        info="Expert interval for MoE"
                     )
+                    topk = gr.Number(
+                        label="Top k Routing",
+                        value=1,
+                        info="Top k routing for MoE"
                     )
+        calc_param_button = gr.Button("Calculate Parameters")
+        param_result = gr.Textbox(label="Parameter Calculation Result", interactive=False)
+        calc_param_button.click(calc_params,
+            inputs=[vocab_size, tied_embeddings, hidden_size, sequence_length, num_layers, moe, num_experts, expert_interval, topk, ffn_expansion_factor, num_mlp_linears, kv_size_ratio],
+            outputs=param_result)
+        hf_model_name_or_path.change(fn=update_from_hf_model,
+            inputs=[hf_model_name_or_path],
+            outputs=[num_layers, hidden_size, num_attention_heads, vocab_size, sequence_length])
+    # New FLOP Calculation Tab
+    with gr.TabItem("FLOP Calculation"):
+        gr.Markdown("""
+        ## FLOP Calculation
+        FLOP Calculation calculates the number of theoretical FLOPs required to train a model on t tokens.
+        See [Transformers Math 101](https://blog.eleuther.ai/transformer-math/) for more details on how FLOPs are calculated.
+        Other good resources that we consulted are the [Chinchilla Paper](https://arxiv.org/abs/2203.15556) and
+        [Efficient Large-Scale Language Model Training on GPU Clusters Using Megatron-LM](https://people.eecs.berkeley.edu/~matei/papers/2021/sc_megatron_lm.pdf).
+        """)
+        with gr.Row():
+            with gr.Column("Generatable"):
+                with gr.Group():
+                    hf_model_name_or_path = gr.Textbox(
+                        label="HuggingFace Model Name or Path",
+                        info="Name of the HuggingFace Hub repository or the local file path for it"
                     )
+                    vocab_size = gr.Number(
+                        label="Vocab Size",
+                        value=51200,
+                        info="How many tokens are in the embedding layer"
                     )
+                    hidden_size = gr.Number(
+                        label="Hidden Size",
+                        value=6144,
+                        info="Dimension of the model's hidden size"
                     )
+                    sequence_length = gr.Number(
+                        label="Sequence Length",
+                        value=2048,
+                        info="Sequence length used for training"
                     )
+                    num_layers = gr.Number(
+                        label="Number of Layers",
+                        value=44,
+                        info="Number of transformer layers used in the model"
                     )
+            with gr.Column("Generatable"):
+                kv_size_ratio = gr.Number(
+                    label="KV Size Ratio",
+                    value=1.0,
+                    info="Ratio of kv heads to query heads used in model. 1.0 for MHA"
+                )
+                ffn_expansion_factor = gr.Number(
+                    label="FFN Expansion Factor",
+                    value=4,
+                    info="How much the MLP hidden size expands"
+                )
+                batch_size = gr.Number(
+                    label="Batch Size",
+                    value=1,
+                    info="Global batch size in units of samples"
+                )
+                tokens = gr.Number(
+                    label="Number of GigaTokens",
+                    value=300,
+                    info="Total number of GigaTokens for training"
+                )
+                checkpoint_activations = gr.Checkbox(
+                    label="Checkpoint Activations",
+                    value=True,
+                    info="Whether Megatron-style activation checkpointing is being used"
+                )
+                infer = gr.Checkbox(
+                    label="Inference-Only",
+                    value=False,
+                    info="Whether the model is being used for inference-only"
+                )
+                # MoE parameters hidden in accordion
+                with gr.Accordion("Mixture of Experts (MoE)", open=False):
+                    moe = gr.Checkbox(
+                        label="Mixture of Experts (MoE)",
                         value=False,
+                        info="Whether the model uses Mixture of Experts"
+                    )
+                    num_experts = gr.Number(
+                        label="Number of Experts",
+                        value=128,
+                        info="Number of experts for Mixture of Experts (MoE)"
+                    )
+                    expert_interval = gr.Number(
+                        label="Expert Interval",
+                        value=2,
+                        info="Expert interval for Mixture of Experts (MoE)"
+                    )
+                    topk = gr.Number(
+                        label="Top K Routing for MoE",
+                        value=1,
+                        info="Top k routing for Mixture of Experts (MoE)"
                     )
+        calc_flops_button = gr.Button("Calculate FLOPs")
+        flops_result = gr.JSON(label="FLOP Calculation Result")
+        calc_flops_button.click(
+            calc_flops,
+            inputs=[vocab_size, hidden_size, sequence_length, num_layers, kv_size_ratio, topk, moe, num_experts, expert_interval, batch_size, tokens, checkpoint_activations, ffn_expansion_factor, infer],
+            outputs=flops_result
+        )
+        hf_model_name_or_path.change(fn=update_from_hf_model,
+            inputs=[hf_model_name_or_path],
+            outputs=[num_layers, hidden_size, vocab_size, sequence_length])
 demo.launch()