Commit
•
fc57cfc
1
Parent(s):
24632bb
Update app.py
Browse files
app.py
CHANGED
@@ -62,9 +62,29 @@ def calc_mem(hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_par
|
|
62 |
|
63 |
# ---- Gradio Interface ---- #
|
64 |
with gr.Blocks() as demo:
|
|
|
65 |
with gr.Tabs():
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
with gr.TabItem("Memory Calculation"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
with gr.Row():
|
69 |
with gr.Column("Generatable"):
|
70 |
with gr.Group():
|
@@ -152,6 +172,19 @@ with gr.Blocks() as demo:
|
|
152 |
|
153 |
# Parameter Calculation Tab
|
154 |
with gr.TabItem("Parameter Calculation"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
with gr.Row():
|
156 |
with gr.Column("Generatable"):
|
157 |
with gr.Group():
|
|
|
62 |
|
63 |
# ---- Gradio Interface ---- #
|
64 |
with gr.Blocks() as demo:
|
65 |
+
|
66 |
with gr.Tabs():
|
67 |
+
gr.Markdown("""
|
68 |
+
This app is a re-creation of [this calculator](https://github.com/EleutherAI/cookbook/tree/main/calc) from EleutherAI.
|
69 |
+
|
70 |
+
Before training or inference even begins, common practical questions about potential models must be answered such as:
|
71 |
+
|
72 |
+
1. How many parameters are we targeting? How should those parameters be allocated within the model?
|
73 |
+
1. How many FLOPs does the model from step 1 take to train on t tokens? How about inference?
|
74 |
+
1. How much memory does the model from step 1 take to train/infer on d devices? What memory-saving strategies (e.g. parallelism, quantization, etc) are necessary to fit the model on device memory?
|
75 |
+
""")
|
76 |
with gr.TabItem("Memory Calculation"):
|
77 |
+
gr.Markdown("""
|
78 |
+
## Memory Calculation
|
79 |
+
|
80 |
+
Memory Calculation calculates the amount of device memory required to train or infer a model. See [Transformers Math 101](https://blog.eleuther.ai/transformer-math/) for more details on how memory overhead is calculated.
|
81 |
+
Take this estimation with a grain of salt, because every implementation is different and these calculations were written to match the GPT-NeoX library as close as possible.
|
82 |
+
Even for other training and inference libraries, however, we expect our script to give approximate memory estimations within acceptable error.
|
83 |
+
(Please see [LLM finetuning memory requirements](https://blog.scottlogic.com/2023/11/24/llm-mem.html) for a treatment of how specific memory costs may vary framework-to-framework). Other good resources that we consulted are the [ZeRO Paper](https://arxiv.org/abs/1910.02054) and [Reducing Activation Recomputation in Large Transformer Models](https://arxiv.org/pdf/2205.05198.pdf).
|
84 |
+
|
85 |
+
## To Use
|
86 |
+
Fill in the required details below and click 'Calculate Memory' to get a result.
|
87 |
+
""")
|
88 |
with gr.Row():
|
89 |
with gr.Column("Generatable"):
|
90 |
with gr.Group():
|
|
|
172 |
|
173 |
# Parameter Calculation Tab
|
174 |
with gr.TabItem("Parameter Calculation"):
|
175 |
+
gr.Markdown("""
|
176 |
+
## Parameter Calculation
|
177 |
+
|
178 |
+
Parameter Calculation calculates the number of parameters present in a given model based on its hyperparams.
|
179 |
+
Such calculations are important to determine memory overheads, FLOPs, or to determine the size of an unknown transformer model.
|
180 |
+
We also found the following resources helpful:
|
181 |
+
[How does GPT-3 spend its 175B parameters?](https://www.lesswrong.com/posts/3duR8CrvcHywrnhLo/how-does-gpt-3-spend-its-175b-parameters)
|
182 |
+
and [LLM Parameter Counting](https://kipp.ly/transformer-param-count/).
|
183 |
+
|
184 |
+
## How To Use
|
185 |
+
Simply input the model details, such as the hidden size, number of layers, and attention heads, and press 'Calculate Parameters' to get a result.
|
186 |
+
|
187 |
+
""")
|
188 |
with gr.Row():
|
189 |
with gr.Column("Generatable"):
|
190 |
with gr.Group():
|