huseinzol05 commited on
Commit
7963463
β€’
1 Parent(s): 958587e
Files changed (1) hide show
  1. app.py +147 -141
app.py CHANGED
@@ -1,152 +1,158 @@
1
- import gradio as gr
2
- import pandas as pd
3
- from css_html_js import custom_css
4
 
5
- TITLE = """<h1 align="center" id="space-title">πŸ‡²πŸ‡Ύ Malay LLM Leaderboard</h1>"""
6
 
7
- INTRODUCTION_TEXT = """
8
- πŸ“ The πŸ‡²πŸ‡Ύ Malay LLM Leaderboard aims to track, rank and evaluate open LLMs on Malay tasks. All notebooks at https://github.com/mesolitica/llm-benchmarks, feel free to submit your own score at https://huggingface.co/spaces/mesolitica/malay-llm-leaderboard/discussions with link to the notebook.
9
 
10
- ## Dataset
11
 
12
- πŸ“ˆ We evaluate models based on 3 datasets,
13
 
14
- 1. BM-PT3 Paper 1, contains 54 questions, https://github.com/mesolitica/malaysian-dataset/tree/master/llm-benchmark/BM-pt3
15
- - This test is for 15 years old Malaysia student, it is about reading comprehension and general knowledge for malay language.
16
- 2. Tatabahasa, contains 349 questions, https://github.com/mesolitica/malaysian-dataset/tree/master/llm-benchmark/tatabahasabm.tripod.com
17
- - This test is general test for malay grammar.
18
- 3. Translated IndoNLI to Malay, tested on `test_expert` dataset, https://huggingface.co/datasets/mesolitica/translated-indonli
19
- - This test is general test to language reasoning.
20
- 4. HumanEval, https://github.com/openai/human-eval
21
- - This test is for programming language understanding.
22
- """
23
 
24
- close_source = [
25
- {
26
- 'model': 'gpt-4-1106-preview',
27
- 'BM-PT3 0-shot': 51.85185185185185,
28
- 'BM-PT3 1-shot': 66.66666666666666,
29
- 'BM-PT3 3-shots': 55.55555555555556,
30
- 'Tatabahasa 0-shot': 75.64469914040114,
31
- 'Tatabahasa 1-shot': 73.63896848137536,
32
- 'Tatabahasa 3-shots': 75.64469914040114,
33
- },
34
- {
35
- 'model': 'gpt-3.5-turbo-0613',
36
- 'BM-PT3 0-shot': 36.53846153846153,
37
- 'BM-PT3 1-shot': 28.846153846153843,
38
- 'BM-PT3 3-shots': 24.528301886792452,
39
- 'Tatabahasa 0-shot': 59.530791788856305,
40
- 'Tatabahasa 1-shot': 60.80691642651297,
41
- 'Tatabahasa 3-shots': 63.03724928366762,
42
- },
43
- {
44
- 'model': 'Antrophic Claude 2',
45
- 'Tatabahasa 0-shot': 61,
46
- 'Tatabahasa 3-shots': 57.8,
47
- },
48
- {
49
- 'model': 'Antrophic Claude 1',
50
- 'Tatabahasa 3-shots': 67,
51
- },
52
- ]
53
 
54
- open_source = [
55
- {
56
- 'model': '[llama2-7b](https://huggingface.co/meta-llama/Llama-2-7b-hf)',
57
- 'Tatabahasa 0-shot': 24.355300859598856,
58
- 'Tatabahasa 1-shot': 28.08022922636103,
59
- 'Tatabahasa 3-shots': 24.641833810888254,
60
- },
61
- {
62
- 'model': '[malaysian-llama2-7b-32k](https://huggingface.co/mesolitica/llama-7b-hf-32768-fpf)',
63
- 'BM-PT3 0-shot': 20.37037037037037,
64
- 'BM-PT3 1-shot': 20.37037037037037,
65
- 'BM-PT3 3-shots': 29.629629629629626,
66
- 'Tatabahasa 0-shot': 17.765042979942695,
67
- 'Tatabahasa 1-shot': 24.068767908309454,
68
- 'Tatabahasa 3-shots': 27.507163323782237,
69
- },
70
- {
71
- 'model': '[malaysian-llama2-7b-32k-instructions](https://huggingface.co/mesolitica/malaysian-llama2-7b-32k-instructions)',
72
- 'BM-PT3 0-shot': 35.294117647058826,
73
- 'BM-PT3 1-shot': 21.153846153846153,
74
- 'BM-PT3 3-shots': 28.30188679245283,
75
- },
76
- {
77
- 'model': '[malaysian-llama2-13b-32k](https://huggingface.co/mesolitica/llama-13b-hf-32768-fpf)',
78
- 'BM-PT3 0-shot': 33.33333333333333,
79
- 'BM-PT3 1-shot': 20.37037037037037,
80
- 'BM-PT3 3-shots': 31.48148148148148,
81
- 'Tatabahasa 0-shot': 26.07449856733524,
82
- 'Tatabahasa 1-shot': 25.214899713467048,
83
- 'Tatabahasa 3-shots': 24.355300859598856,
84
- },
85
- {
86
- 'model': '[malaysian-llama2-13b-32k-instructions](https://huggingface.co/mesolitica/malaysian-llama2-13b-32k-instructions)',
87
- 'BM-PT3 0-shot': 28.57142857142857,
88
- 'BM-PT3 1-shot': 12.244897959183673,
89
- 'BM-PT3 3-shots': 17.307692307692307,
90
- },
91
- {
92
- 'model': '[mistral-7b](https://huggingface.co/mistralai/Mistral-7B-v0.1)',
93
- 'Tatabahasa 0-shot': 28.939828080229223,
94
- 'Tatabahasa 1-shot': 34.38395415472779,
95
- 'Tatabahasa 3-shots': 32.95128939828081,
96
- },
97
- {
98
- 'model': '[malaysian-mistral-7b-4k](https://huggingface.co/mesolitica/mistral-7b-4096-fpf)',
99
- 'BM-PT3 0-shot': 20.37037037037037,
100
- 'BM-PT3 1-shot': 22.22222222222222,
101
- 'BM-PT3 3-shots': 33.33333333333333,
102
- 'Tatabahasa 0-shot': 21.48997134670487,
103
- 'Tatabahasa 1-shot': 28.939828080229223,
104
- 'Tatabahasa 3-shots': 24.641833810888254,
105
- },
106
- {
107
- 'model': '[malaysian-mistral-7b-32k](https://huggingface.co/mesolitica/mistral-7b-32768-fpf)',
108
- 'BM-PT3 0-shot': 16.666666666666664,
109
- 'BM-PT3 1-shot': 16.666666666666664,
110
- 'BM-PT3 3-shots': 25.925925925925924,
111
- 'Tatabahasa 0-shot': 18.624641833810887,
112
- 'Tatabahasa 1-shot': 24.355300859598856,
113
- 'Tatabahasa 3-shots': 28.653295128939828,
114
- },
115
- {
116
- 'model': '[malaysian-mistral-7b-32k-instructions](https://huggingface.co/mesolitica/malaysian-mistral-7b-32k-instructions)',
117
- 'BM-PT3 0-shot': 35.18518518518518,
118
- 'BM-PT3 1-shot': 33.33333333333333,
119
- 'BM-PT3 3-shots': 37.03703703703704,
120
- 'Tatabahasa 0-shot': 55.014326647564474,
121
- 'Tatabahasa 1-shot': 42.693409742120345,
122
- 'Tatabahasa 3-shots': 33.33333333333333,
123
- },
124
- {
125
- 'model': '[aisingapore/sealion3b](https://huggingface.co/aisingapore/sealion3b)',
126
- 'BM-PT3 0-shot': 20.37037037037037,
127
- 'BM-PT3 1-shot': 25.925925925925924,
128
- 'BM-PT3 3-shots': 31.48148148148148,
129
- 'Tatabahasa 0-shot': 21.776504297994272,
130
- 'Tatabahasa 1-shot': 21.776504297994272,
131
- 'Tatabahasa 3-shots': 24.641833810888254,
132
- },
133
- {
134
- 'model': '[aisingapore/sealion7b](https://huggingface.co/aisingapore/sealion7b)',
135
- 'BM-PT3 0-shot': 20.37037037037037,
136
- 'BM-PT3 1-shot': 24.074074074074073,
137
- 'BM-PT3 3-shots': 33.33333333333333,
138
- 'Tatabahasa 0-shot': 25.787965616045845,
139
- 'Tatabahasa 1-shot': 27.507163323782237,
140
- 'Tatabahasa 3-shots': 26.07449856733524,
141
- }
142
- ]
143
 
144
- data = pd.DataFrame(close_source + open_source)
145
 
146
- demo = gr.Blocks(css=custom_css)
147
- with demo:
148
- gr.HTML(TITLE)
149
- gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
150
- gr.DataFrame(data, datatype = 'markdown')
 
 
151
 
 
 
 
 
152
  demo.launch()
 
1
+ # import gradio as gr
2
+ # import pandas as pd
3
+ # from css_html_js import custom_css
4
 
5
+ # TITLE = """<h1 align="center" id="space-title">πŸ‡²πŸ‡Ύ Malay LLM Leaderboard</h1>"""
6
 
7
+ # INTRODUCTION_TEXT = """
8
+ # πŸ“ The πŸ‡²πŸ‡Ύ Malay LLM Leaderboard aims to track, rank and evaluate open LLMs on Malay tasks. All notebooks at https://github.com/mesolitica/llm-benchmarks, feel free to submit your own score at https://huggingface.co/spaces/mesolitica/malay-llm-leaderboard/discussions with link to the notebook.
9
 
10
+ # ## Dataset
11
 
12
+ # πŸ“ˆ We evaluate models based on 3 datasets,
13
 
14
+ # 1. BM-PT3 Paper 1, contains 54 questions, https://github.com/mesolitica/malaysian-dataset/tree/master/llm-benchmark/BM-pt3
15
+ # - This test is for 15 years old Malaysia student, it is about reading comprehension and general knowledge for malay language.
16
+ # 2. Tatabahasa, contains 349 questions, https://github.com/mesolitica/malaysian-dataset/tree/master/llm-benchmark/tatabahasabm.tripod.com
17
+ # - This test is general test for malay grammar.
18
+ # 3. Translated IndoNLI to Malay, tested on `test_expert` dataset, https://huggingface.co/datasets/mesolitica/translated-indonli
19
+ # - This test is general test to language reasoning.
20
+ # 4. HumanEval, https://github.com/openai/human-eval
21
+ # - This test is for programming language understanding.
22
+ # """
23
 
24
+ # close_source = [
25
+ # {
26
+ # 'model': 'gpt-4-1106-preview',
27
+ # 'BM-PT3 0-shot': 51.85185185185185,
28
+ # 'BM-PT3 1-shot': 66.66666666666666,
29
+ # 'BM-PT3 3-shots': 55.55555555555556,
30
+ # 'Tatabahasa 0-shot': 75.64469914040114,
31
+ # 'Tatabahasa 1-shot': 73.63896848137536,
32
+ # 'Tatabahasa 3-shots': 75.64469914040114,
33
+ # },
34
+ # {
35
+ # 'model': 'gpt-3.5-turbo-0613',
36
+ # 'BM-PT3 0-shot': 36.53846153846153,
37
+ # 'BM-PT3 1-shot': 28.846153846153843,
38
+ # 'BM-PT3 3-shots': 24.528301886792452,
39
+ # 'Tatabahasa 0-shot': 59.530791788856305,
40
+ # 'Tatabahasa 1-shot': 60.80691642651297,
41
+ # 'Tatabahasa 3-shots': 63.03724928366762,
42
+ # },
43
+ # {
44
+ # 'model': 'Antrophic Claude 2',
45
+ # 'Tatabahasa 0-shot': 61,
46
+ # 'Tatabahasa 3-shots': 57.8,
47
+ # },
48
+ # {
49
+ # 'model': 'Antrophic Claude 1',
50
+ # 'Tatabahasa 3-shots': 67,
51
+ # },
52
+ # ]
53
 
54
+ # open_source = [
55
+ # {
56
+ # 'model': '[llama2-7b](https://huggingface.co/meta-llama/Llama-2-7b-hf)',
57
+ # 'Tatabahasa 0-shot': 24.355300859598856,
58
+ # 'Tatabahasa 1-shot': 28.08022922636103,
59
+ # 'Tatabahasa 3-shots': 24.641833810888254,
60
+ # },
61
+ # {
62
+ # 'model': '[malaysian-llama2-7b-32k](https://huggingface.co/mesolitica/llama-7b-hf-32768-fpf)',
63
+ # 'BM-PT3 0-shot': 20.37037037037037,
64
+ # 'BM-PT3 1-shot': 20.37037037037037,
65
+ # 'BM-PT3 3-shots': 29.629629629629626,
66
+ # 'Tatabahasa 0-shot': 17.765042979942695,
67
+ # 'Tatabahasa 1-shot': 24.068767908309454,
68
+ # 'Tatabahasa 3-shots': 27.507163323782237,
69
+ # },
70
+ # {
71
+ # 'model': '[malaysian-llama2-7b-32k-instructions](https://huggingface.co/mesolitica/malaysian-llama2-7b-32k-instructions)',
72
+ # 'BM-PT3 0-shot': 35.294117647058826,
73
+ # 'BM-PT3 1-shot': 21.153846153846153,
74
+ # 'BM-PT3 3-shots': 28.30188679245283,
75
+ # },
76
+ # {
77
+ # 'model': '[malaysian-llama2-13b-32k](https://huggingface.co/mesolitica/llama-13b-hf-32768-fpf)',
78
+ # 'BM-PT3 0-shot': 33.33333333333333,
79
+ # 'BM-PT3 1-shot': 20.37037037037037,
80
+ # 'BM-PT3 3-shots': 31.48148148148148,
81
+ # 'Tatabahasa 0-shot': 26.07449856733524,
82
+ # 'Tatabahasa 1-shot': 25.214899713467048,
83
+ # 'Tatabahasa 3-shots': 24.355300859598856,
84
+ # },
85
+ # {
86
+ # 'model': '[malaysian-llama2-13b-32k-instructions](https://huggingface.co/mesolitica/malaysian-llama2-13b-32k-instructions)',
87
+ # 'BM-PT3 0-shot': 28.57142857142857,
88
+ # 'BM-PT3 1-shot': 12.244897959183673,
89
+ # 'BM-PT3 3-shots': 17.307692307692307,
90
+ # },
91
+ # {
92
+ # 'model': '[mistral-7b](https://huggingface.co/mistralai/Mistral-7B-v0.1)',
93
+ # 'Tatabahasa 0-shot': 28.939828080229223,
94
+ # 'Tatabahasa 1-shot': 34.38395415472779,
95
+ # 'Tatabahasa 3-shots': 32.95128939828081,
96
+ # },
97
+ # {
98
+ # 'model': '[malaysian-mistral-7b-4k](https://huggingface.co/mesolitica/mistral-7b-4096-fpf)',
99
+ # 'BM-PT3 0-shot': 20.37037037037037,
100
+ # 'BM-PT3 1-shot': 22.22222222222222,
101
+ # 'BM-PT3 3-shots': 33.33333333333333,
102
+ # 'Tatabahasa 0-shot': 21.48997134670487,
103
+ # 'Tatabahasa 1-shot': 28.939828080229223,
104
+ # 'Tatabahasa 3-shots': 24.641833810888254,
105
+ # },
106
+ # {
107
+ # 'model': '[malaysian-mistral-7b-32k](https://huggingface.co/mesolitica/mistral-7b-32768-fpf)',
108
+ # 'BM-PT3 0-shot': 16.666666666666664,
109
+ # 'BM-PT3 1-shot': 16.666666666666664,
110
+ # 'BM-PT3 3-shots': 25.925925925925924,
111
+ # 'Tatabahasa 0-shot': 18.624641833810887,
112
+ # 'Tatabahasa 1-shot': 24.355300859598856,
113
+ # 'Tatabahasa 3-shots': 28.653295128939828,
114
+ # },
115
+ # {
116
+ # 'model': '[malaysian-mistral-7b-32k-instructions](https://huggingface.co/mesolitica/malaysian-mistral-7b-32k-instructions)',
117
+ # 'BM-PT3 0-shot': 35.18518518518518,
118
+ # 'BM-PT3 1-shot': 33.33333333333333,
119
+ # 'BM-PT3 3-shots': 37.03703703703704,
120
+ # 'Tatabahasa 0-shot': 55.014326647564474,
121
+ # 'Tatabahasa 1-shot': 42.693409742120345,
122
+ # 'Tatabahasa 3-shots': 33.33333333333333,
123
+ # },
124
+ # {
125
+ # 'model': '[aisingapore/sealion3b](https://huggingface.co/aisingapore/sealion3b)',
126
+ # 'BM-PT3 0-shot': 20.37037037037037,
127
+ # 'BM-PT3 1-shot': 25.925925925925924,
128
+ # 'BM-PT3 3-shots': 31.48148148148148,
129
+ # 'Tatabahasa 0-shot': 21.776504297994272,
130
+ # 'Tatabahasa 1-shot': 21.776504297994272,
131
+ # 'Tatabahasa 3-shots': 24.641833810888254,
132
+ # },
133
+ # {
134
+ # 'model': '[aisingapore/sealion7b](https://huggingface.co/aisingapore/sealion7b)',
135
+ # 'BM-PT3 0-shot': 20.37037037037037,
136
+ # 'BM-PT3 1-shot': 24.074074074074073,
137
+ # 'BM-PT3 3-shots': 33.33333333333333,
138
+ # 'Tatabahasa 0-shot': 25.787965616045845,
139
+ # 'Tatabahasa 1-shot': 27.507163323782237,
140
+ # 'Tatabahasa 3-shots': 26.07449856733524,
141
+ # }
142
+ # ]
143
 
144
+ # data = pd.DataFrame(close_source + open_source)
145
 
146
+ # demo = gr.Blocks(css=custom_css)
147
+ # with demo:
148
+ # gr.HTML(TITLE)
149
+ # gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
150
+ # gr.DataFrame(data, datatype = 'markdown')
151
+
152
+ # demo.launch()
153
 
154
+ import gradio as gr
155
+ demo = gr.Blocks()
156
+ with demo:
157
+ gr.HTML('helo')
158
  demo.launch()