Spaces:
Sleeping
Sleeping
add app
Browse files- app.py +33 -0
- variables.py +99 -0
app.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from turtle import width
|
2 |
+
import streamlit as st
|
3 |
+
from pathlib import Path
|
4 |
+
from PIL import Image
|
5 |
+
|
6 |
+
from variables import MAPPING_LANG_CODE_TO_TEXT, PLOT_SIZES_PER_LANG
|
7 |
+
|
8 |
+
# Only need to set these here as we are add controls outside of Hydralit, to customise a run Hydralit!
|
9 |
+
st.set_page_config(page_title="Documents sizes", layout="wide")
|
10 |
+
|
11 |
+
plot_dir = Path("data/boxplot_per_ds_per_lang")
|
12 |
+
plot_paths = list(plot_dir.iterdir())
|
13 |
+
|
14 |
+
plot_paths = sorted(plot_paths)
|
15 |
+
|
16 |
+
with st.sidebar:
|
17 |
+
st.write("Go to plot")
|
18 |
+
for plot_path in plot_paths:
|
19 |
+
plot_name = str(plot_path.name)
|
20 |
+
if plot_name == "colorbar.png":
|
21 |
+
continue
|
22 |
+
lang_id = plot_name.split("_")[1][:-len(".png")]
|
23 |
+
title = MAPPING_LANG_CODE_TO_TEXT[lang_id]
|
24 |
+
st.markdown(f"[{title}](#{title.replace(' ', '-').lower()})", unsafe_allow_html=True)
|
25 |
+
|
26 |
+
for plot_path in plot_paths:
|
27 |
+
plot_name = str(plot_path.name)
|
28 |
+
if plot_name == "colorbar.png":
|
29 |
+
continue
|
30 |
+
lang_id = plot_name.split("_")[1][:-len(".png")]
|
31 |
+
st.title(MAPPING_LANG_CODE_TO_TEXT[lang_id])
|
32 |
+
image = Image.open(plot_path)
|
33 |
+
st.image(image, width=int(round(float(PLOT_SIZES_PER_LANG[lang_id]['width'])*50)))
|
variables.py
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MAPPING_LANG_CODE_TO_TEXT = {
|
2 |
+
"ar": "Arabic",
|
3 |
+
"ca": "Catalan",
|
4 |
+
"code": "code",
|
5 |
+
"en": "English",
|
6 |
+
"es": "Spanish",
|
7 |
+
"eu": "Basque",
|
8 |
+
"fr": "French",
|
9 |
+
"id": "Indonesian",
|
10 |
+
"indic-as": "Assamese",
|
11 |
+
"indic-bn": "Bengali",
|
12 |
+
"indic-gu": "Gujarati",
|
13 |
+
"indic-hi": "Hindi",
|
14 |
+
"indic-kn": "Kannada",
|
15 |
+
"indic-ml": "Malayalam",
|
16 |
+
"indic-mr": "Marathi",
|
17 |
+
"indic-ne": "Nepali",
|
18 |
+
"indic-or": "Odia",
|
19 |
+
"indic-pa": "Punjabi",
|
20 |
+
"indic-ta": "Tamil",
|
21 |
+
"indic-te": "Telugu",
|
22 |
+
"indic-ur": "Urdu",
|
23 |
+
"nigercongo-ak": "Akan",
|
24 |
+
"nigercongo-bm": "Bambara",
|
25 |
+
"nigercongo-fon": "Fon",
|
26 |
+
"nigercongo-ig": "Igbo",
|
27 |
+
"nigercongo-ki": "Kikuyu",
|
28 |
+
"nigercongo-lg": "Luganda",
|
29 |
+
"nigercongo-ln": "Lingala",
|
30 |
+
"nigercongo-nso": "Northern Sotho",
|
31 |
+
"nigercongo-ny": "Chi Chewa",
|
32 |
+
"nigercongo-rn": "Kirundi",
|
33 |
+
"nigercongo-rw": "Kinyarwanda",
|
34 |
+
"nigercongo-sn": "Chi Shona",
|
35 |
+
"nigercongo-st": "Sesotho",
|
36 |
+
"nigercongo-sw": "Swahili",
|
37 |
+
"nigercongo-tn": "Setswana",
|
38 |
+
"nigercongo-ts": "Xitsonga",
|
39 |
+
"nigercongo-tum": "Chi Tumbuka",
|
40 |
+
"nigercongo-tw": "Twi",
|
41 |
+
"nigercongo-wo": "Wolof",
|
42 |
+
"nigercongo-xh": "Xhosa",
|
43 |
+
"nigercongo-yo": "Yoruba",
|
44 |
+
"nigercongo-zu": "Isi Zulu",
|
45 |
+
"pt": "Portuguese",
|
46 |
+
"vi": "Vietnamese",
|
47 |
+
"zhs": "Simplified Chinese",
|
48 |
+
"zht": "Traditional Chinese",
|
49 |
+
}
|
50 |
+
|
51 |
+
PLOT_SIZES_PER_LANG = {
|
52 |
+
"indic-ta": {"width": "7.2", "num_ds": "13"},
|
53 |
+
"en": {"width": "19.6", "num_ds": "44"},
|
54 |
+
"es": {"width": "52.0", "num_ds": "125"},
|
55 |
+
"indic-kn": {"width": "5.2", "num_ds": "8"},
|
56 |
+
"zht": {"width": "2.8", "num_ds": "2"},
|
57 |
+
"nigercongo-ki": {"width": "2.4", "num_ds": "1"},
|
58 |
+
"indic-pa": {"width": "5.6", "num_ds": "9"},
|
59 |
+
"vi": {"width": "10.0", "num_ds": "20"},
|
60 |
+
"zhs": {"width": "8.8", "num_ds": "17"},
|
61 |
+
"fr": {"width": "10.8", "num_ds": "22"},
|
62 |
+
"eu": {"width": "7.6000000000000005", "num_ds": "14"},
|
63 |
+
"indic-te": {"width": "6.800000000000001", "num_ds": "12"},
|
64 |
+
"indic-hi": {"width": "10.0", "num_ds": "20"},
|
65 |
+
"pt": {"width": "9.600000000000001", "num_ds": "19"},
|
66 |
+
"indic-bn": {"width": "8.4", "num_ds": "16"},
|
67 |
+
"indic-mr": {"width": "6.4", "num_ds": "11"},
|
68 |
+
"indic-gu": {"width": "6.0", "num_ds": "10"},
|
69 |
+
"ca": {"width": "10.0", "num_ds": "20"},
|
70 |
+
"id": {"width": "12.4", "num_ds": "26"},
|
71 |
+
"ar": {"width": "12.0", "num_ds": "25"},
|
72 |
+
"indic-or": {"width": "5.6", "num_ds": "9"},
|
73 |
+
"indic-ur": {"width": "7.2", "num_ds": "13"},
|
74 |
+
"nigercongo-ig": {"width": "2.4", "num_ds": "1"},
|
75 |
+
"indic-as": {"width": "4.4", "num_ds": "6"},
|
76 |
+
"indic-ml": {"width": "6.800000000000001", "num_ds": "12"},
|
77 |
+
"nigercongo-ny": {"width": "2.4", "num_ds": "1"},
|
78 |
+
"nigercongo-tw": {"width": "2.4", "num_ds": "1"},
|
79 |
+
"nigercongo-rn": {"width": "2.4", "num_ds": "1"},
|
80 |
+
"nigercongo-st": {"width": "2.4", "num_ds": "1"},
|
81 |
+
"nigercongo-yo": {"width": "2.4", "num_ds": "1"},
|
82 |
+
"nigercongo-ak": {"width": "2.4", "num_ds": "1"},
|
83 |
+
"nigercongo-lg": {"width": "2.4", "num_ds": "1"},
|
84 |
+
"nigercongo-bm": {"width": "2.4", "num_ds": "1"},
|
85 |
+
"nigercongo-wo": {"width": "2.4", "num_ds": "1"},
|
86 |
+
"nigercongo-ln": {"width": "2.4", "num_ds": "1"},
|
87 |
+
"nigercongo-nso": {"width": "2.4", "num_ds": "1"},
|
88 |
+
"code": {"width": "2.8", "num_ds": "2"},
|
89 |
+
"indic-ne": {"width": "2.4", "num_ds": "1"},
|
90 |
+
"nigercongo-ts": {"width": "2.4", "num_ds": "1"},
|
91 |
+
"nigercongo-zu": {"width": "2.4", "num_ds": "1"},
|
92 |
+
"nigercongo-sn": {"width": "2.4", "num_ds": "1"},
|
93 |
+
"nigercongo-sw": {"width": "2.4", "num_ds": "1"},
|
94 |
+
"nigercongo-tum": {"width": "2.4", "num_ds": "1"},
|
95 |
+
"nigercongo-tn": {"width": "2.4", "num_ds": "1"},
|
96 |
+
"nigercongo-xh": {"width": "2.4", "num_ds": "1"},
|
97 |
+
"nigercongo-rw": {"width": "2.4", "num_ds": "1"},
|
98 |
+
"nigercongo-fon": {"width": "2.4", "num_ds": "1"},
|
99 |
+
}
|