SaulLu commited on
Commit
cfcb110
1 Parent(s): ac870a5
Files changed (2) hide show
  1. app.py +33 -0
  2. variables.py +99 -0
app.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from turtle import width
2
+ import streamlit as st
3
+ from pathlib import Path
4
+ from PIL import Image
5
+
6
+ from variables import MAPPING_LANG_CODE_TO_TEXT, PLOT_SIZES_PER_LANG
7
+
8
+ # Only need to set these here as we are add controls outside of Hydralit, to customise a run Hydralit!
9
+ st.set_page_config(page_title="Documents sizes", layout="wide")
10
+
11
+ plot_dir = Path("data/boxplot_per_ds_per_lang")
12
+ plot_paths = list(plot_dir.iterdir())
13
+
14
+ plot_paths = sorted(plot_paths)
15
+
16
+ with st.sidebar:
17
+ st.write("Go to plot")
18
+ for plot_path in plot_paths:
19
+ plot_name = str(plot_path.name)
20
+ if plot_name == "colorbar.png":
21
+ continue
22
+ lang_id = plot_name.split("_")[1][:-len(".png")]
23
+ title = MAPPING_LANG_CODE_TO_TEXT[lang_id]
24
+ st.markdown(f"[{title}](#{title.replace(' ', '-').lower()})", unsafe_allow_html=True)
25
+
26
+ for plot_path in plot_paths:
27
+ plot_name = str(plot_path.name)
28
+ if plot_name == "colorbar.png":
29
+ continue
30
+ lang_id = plot_name.split("_")[1][:-len(".png")]
31
+ st.title(MAPPING_LANG_CODE_TO_TEXT[lang_id])
32
+ image = Image.open(plot_path)
33
+ st.image(image, width=int(round(float(PLOT_SIZES_PER_LANG[lang_id]['width'])*50)))
variables.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MAPPING_LANG_CODE_TO_TEXT = {
2
+ "ar": "Arabic",
3
+ "ca": "Catalan",
4
+ "code": "code",
5
+ "en": "English",
6
+ "es": "Spanish",
7
+ "eu": "Basque",
8
+ "fr": "French",
9
+ "id": "Indonesian",
10
+ "indic-as": "Assamese",
11
+ "indic-bn": "Bengali",
12
+ "indic-gu": "Gujarati",
13
+ "indic-hi": "Hindi",
14
+ "indic-kn": "Kannada",
15
+ "indic-ml": "Malayalam",
16
+ "indic-mr": "Marathi",
17
+ "indic-ne": "Nepali",
18
+ "indic-or": "Odia",
19
+ "indic-pa": "Punjabi",
20
+ "indic-ta": "Tamil",
21
+ "indic-te": "Telugu",
22
+ "indic-ur": "Urdu",
23
+ "nigercongo-ak": "Akan",
24
+ "nigercongo-bm": "Bambara",
25
+ "nigercongo-fon": "Fon",
26
+ "nigercongo-ig": "Igbo",
27
+ "nigercongo-ki": "Kikuyu",
28
+ "nigercongo-lg": "Luganda",
29
+ "nigercongo-ln": "Lingala",
30
+ "nigercongo-nso": "Northern Sotho",
31
+ "nigercongo-ny": "Chi Chewa",
32
+ "nigercongo-rn": "Kirundi",
33
+ "nigercongo-rw": "Kinyarwanda",
34
+ "nigercongo-sn": "Chi Shona",
35
+ "nigercongo-st": "Sesotho",
36
+ "nigercongo-sw": "Swahili",
37
+ "nigercongo-tn": "Setswana",
38
+ "nigercongo-ts": "Xitsonga",
39
+ "nigercongo-tum": "Chi Tumbuka",
40
+ "nigercongo-tw": "Twi",
41
+ "nigercongo-wo": "Wolof",
42
+ "nigercongo-xh": "Xhosa",
43
+ "nigercongo-yo": "Yoruba",
44
+ "nigercongo-zu": "Isi Zulu",
45
+ "pt": "Portuguese",
46
+ "vi": "Vietnamese",
47
+ "zhs": "Simplified Chinese",
48
+ "zht": "Traditional Chinese",
49
+ }
50
+
51
+ PLOT_SIZES_PER_LANG = {
52
+ "indic-ta": {"width": "7.2", "num_ds": "13"},
53
+ "en": {"width": "19.6", "num_ds": "44"},
54
+ "es": {"width": "52.0", "num_ds": "125"},
55
+ "indic-kn": {"width": "5.2", "num_ds": "8"},
56
+ "zht": {"width": "2.8", "num_ds": "2"},
57
+ "nigercongo-ki": {"width": "2.4", "num_ds": "1"},
58
+ "indic-pa": {"width": "5.6", "num_ds": "9"},
59
+ "vi": {"width": "10.0", "num_ds": "20"},
60
+ "zhs": {"width": "8.8", "num_ds": "17"},
61
+ "fr": {"width": "10.8", "num_ds": "22"},
62
+ "eu": {"width": "7.6000000000000005", "num_ds": "14"},
63
+ "indic-te": {"width": "6.800000000000001", "num_ds": "12"},
64
+ "indic-hi": {"width": "10.0", "num_ds": "20"},
65
+ "pt": {"width": "9.600000000000001", "num_ds": "19"},
66
+ "indic-bn": {"width": "8.4", "num_ds": "16"},
67
+ "indic-mr": {"width": "6.4", "num_ds": "11"},
68
+ "indic-gu": {"width": "6.0", "num_ds": "10"},
69
+ "ca": {"width": "10.0", "num_ds": "20"},
70
+ "id": {"width": "12.4", "num_ds": "26"},
71
+ "ar": {"width": "12.0", "num_ds": "25"},
72
+ "indic-or": {"width": "5.6", "num_ds": "9"},
73
+ "indic-ur": {"width": "7.2", "num_ds": "13"},
74
+ "nigercongo-ig": {"width": "2.4", "num_ds": "1"},
75
+ "indic-as": {"width": "4.4", "num_ds": "6"},
76
+ "indic-ml": {"width": "6.800000000000001", "num_ds": "12"},
77
+ "nigercongo-ny": {"width": "2.4", "num_ds": "1"},
78
+ "nigercongo-tw": {"width": "2.4", "num_ds": "1"},
79
+ "nigercongo-rn": {"width": "2.4", "num_ds": "1"},
80
+ "nigercongo-st": {"width": "2.4", "num_ds": "1"},
81
+ "nigercongo-yo": {"width": "2.4", "num_ds": "1"},
82
+ "nigercongo-ak": {"width": "2.4", "num_ds": "1"},
83
+ "nigercongo-lg": {"width": "2.4", "num_ds": "1"},
84
+ "nigercongo-bm": {"width": "2.4", "num_ds": "1"},
85
+ "nigercongo-wo": {"width": "2.4", "num_ds": "1"},
86
+ "nigercongo-ln": {"width": "2.4", "num_ds": "1"},
87
+ "nigercongo-nso": {"width": "2.4", "num_ds": "1"},
88
+ "code": {"width": "2.8", "num_ds": "2"},
89
+ "indic-ne": {"width": "2.4", "num_ds": "1"},
90
+ "nigercongo-ts": {"width": "2.4", "num_ds": "1"},
91
+ "nigercongo-zu": {"width": "2.4", "num_ds": "1"},
92
+ "nigercongo-sn": {"width": "2.4", "num_ds": "1"},
93
+ "nigercongo-sw": {"width": "2.4", "num_ds": "1"},
94
+ "nigercongo-tum": {"width": "2.4", "num_ds": "1"},
95
+ "nigercongo-tn": {"width": "2.4", "num_ds": "1"},
96
+ "nigercongo-xh": {"width": "2.4", "num_ds": "1"},
97
+ "nigercongo-rw": {"width": "2.4", "num_ds": "1"},
98
+ "nigercongo-fon": {"width": "2.4", "num_ds": "1"},
99
+ }