Spaces:
Runtime error
Runtime error
justheuristic
commited on
Commit
•
06a624f
1
Parent(s):
4ee0173
plot tweaks
Browse files- app.py +11 -7
- charts.py +2 -1
- st_helpers.py +1 -0
app.py
CHANGED
@@ -34,7 +34,7 @@ All it takes is for a bunch of us to come together. In fact, we're doing it righ
|
|
34 |
draw_current_progress()
|
35 |
|
36 |
content_text(f"""
|
37 |
-
|
38 |
that is, a transformer "language model" that generates images from text description.
|
39 |
It is trained on {cite("LAION-400M", "https://laion.ai/laion-400-open-dataset/")},
|
40 |
the world's largest openly available image-text-pair dataset with 400 million samples. Our model is based on
|
@@ -47,12 +47,12 @@ with st.expander("How to train efficiently over the internet?"):
|
|
47 |
content_text(f"""
|
48 |
Modern distributed training algorithms are designed for HPC networks with 10-100 gigabit per second bandwidth.
|
49 |
In turn, a typical Internet connection runs at 10-100 megabits per second: that’s three orders of magnitude slower.
|
50 |
-
To make distributed training
|
|
|
51 |
""")
|
52 |
content_text(f"""
|
53 |
-
This may seem daunting at first, but in reality, DL researchers have already made all the necessary pieces for solving this puzzle:
|
54 |
<table style="border: 0px;"><tbody style="border: 0px;">
|
55 |
-
<tr><td> Speed
|
56 |
<tr><td class=centered><strong>4-16x</strong></td><td>
|
57 |
<strong>Large-batch training:</strong> {cite("You et al. (2019)", "https://arxiv.org/abs/1904.00962")} proposed a way for training neural networks efficiently with larger batches, and hence, fewer communication rounds.
|
58 |
</td></tr>
|
@@ -77,12 +77,16 @@ This may seem daunting at first, but in reality, DL researchers have already mad
|
|
77 |
</td></tr>
|
78 |
</tbody></table>
|
79 |
""")
|
80 |
-
|
|
|
|
|
|
|
|
|
81 |
|
82 |
content_title("How do I join?")
|
83 |
|
84 |
-
content_text("""
|
85 |
-
That's easy. First, make sure you're logged in at Hugging Face. If you don't have an account, create one
|
86 |
|
87 |
<ul style="text-align: left; list-style-position: inside; margin-top: 12px; margin-left: -24px;">
|
88 |
<li style="margin-top: 4px;">
|
|
|
34 |
draw_current_progress()
|
35 |
|
36 |
content_text(f"""
|
37 |
+
For this demo we train a model similar to {cite("OpenAI DALL-E", "https://openai.com/blog/dall-e/")},
|
38 |
that is, a transformer "language model" that generates images from text description.
|
39 |
It is trained on {cite("LAION-400M", "https://laion.ai/laion-400-open-dataset/")},
|
40 |
the world's largest openly available image-text-pair dataset with 400 million samples. Our model is based on
|
|
|
47 |
content_text(f"""
|
48 |
Modern distributed training algorithms are designed for HPC networks with 10-100 gigabit per second bandwidth.
|
49 |
In turn, a typical Internet connection runs at 10-100 megabits per second: that’s three orders of magnitude slower.
|
50 |
+
To make distributed training efficient, you need to win back these three orders of magnitude.
|
51 |
+
This may seem daunting at first, but in reality, DL researchers have already made all the necessary pieces for solving this puzzle:
|
52 |
""")
|
53 |
content_text(f"""
|
|
|
54 |
<table style="border: 0px;"><tbody style="border: 0px;">
|
55 |
+
<tr><td> Speed‑up <br> </td> <td>How to achieve</td></tr>
|
56 |
<tr><td class=centered><strong>4-16x</strong></td><td>
|
57 |
<strong>Large-batch training:</strong> {cite("You et al. (2019)", "https://arxiv.org/abs/1904.00962")} proposed a way for training neural networks efficiently with larger batches, and hence, fewer communication rounds.
|
58 |
</td></tr>
|
|
|
77 |
</td></tr>
|
78 |
</tbody></table>
|
79 |
""")
|
80 |
+
content_text("""
|
81 |
+
These techniques are already more than enough to cover 1000x slower communication (totalling to 655.
|
82 |
+
and choose which techniques to use. In this demo, we use parameter sharing to reduce the number of parameters by
|
83 |
+
roughly 12x. If you don’t want parameter sharing, you can instead use more advanced gradient compression or larger batches.
|
84 |
+
""")
|
85 |
|
86 |
content_title("How do I join?")
|
87 |
|
88 |
+
content_text(f"""
|
89 |
+
That's easy. First, make sure you're logged in at Hugging Face. If you don't have an account, create one {cite("here", "https://huggingface.co/join")}.<br>
|
90 |
|
91 |
<ul style="text-align: left; list-style-position: inside; margin-top: 12px; margin-left: -24px;">
|
92 |
<li style="margin-top: 4px;">
|
charts.py
CHANGED
@@ -11,6 +11,7 @@ def draw_current_progress():
|
|
11 |
st.vega_lite_chart(
|
12 |
source, {
|
13 |
"height": 200,
|
|
|
14 |
"title": {
|
15 |
"text": "Training DALL-E with volunteers (updated every few minutes during NeurIPS 2021)",
|
16 |
"dy": 6,
|
@@ -36,7 +37,7 @@ def draw_current_progress():
|
|
36 |
},
|
37 |
],
|
38 |
},
|
39 |
-
use_container_width=
|
40 |
)
|
41 |
|
42 |
|
|
|
11 |
st.vega_lite_chart(
|
12 |
source, {
|
13 |
"height": 200,
|
14 |
+
"width": 600,
|
15 |
"title": {
|
16 |
"text": "Training DALL-E with volunteers (updated every few minutes during NeurIPS 2021)",
|
17 |
"dy": 6,
|
|
|
37 |
},
|
38 |
],
|
39 |
},
|
40 |
+
use_container_width=False, # breaks on <600px screens
|
41 |
)
|
42 |
|
43 |
|
st_helpers.py
CHANGED
@@ -50,5 +50,6 @@ def content_text(text: str, vspace_before: int = 0, vspace_after: int = 0):
|
|
50 |
f'{text}</div><center>',
|
51 |
unsafe_allow_html=True)
|
52 |
|
|
|
53 |
def cite(tag, link):
|
54 |
return f"""<a target="_blank" rel="noopener noreferrer" href="{link}">{tag}</a>"""
|
|
|
50 |
f'{text}</div><center>',
|
51 |
unsafe_allow_html=True)
|
52 |
|
53 |
+
|
54 |
def cite(tag, link):
|
55 |
return f"""<a target="_blank" rel="noopener noreferrer" href="{link}">{tag}</a>"""
|