Spaces:

mesolitica
/

malay-llm-leaderboard

Running

App Files Files Community

huseinzol05 commited on Nov 22, 2023

Commit

e22d664

•

1 Parent(s): 869a523

added more scores

Browse files

Files changed (1) hide show

app.py +25 -10

app.py CHANGED Viewed

@@ -2,25 +2,27 @@ import gradio as gr
 import pandas as pd
 from css_html_js import custom_css
-demo = gr.Blocks(css=custom_css)
 TITLE = """<h1 align="center" id="space-title">🇲🇾 Malay LLM Leaderboard</h1>"""
 INTRODUCTION_TEXT = """
-📐 The 🇲🇾 Malay LLM Leaderboard aims to track, rank and evaluate open LLMs on Malay tasks.\n
-🤗 All notebooks at https://github.com/mesolitica/llm-benchmarks, feel free to submit your own score at https://huggingface.co/spaces/mesolitica/malay-llm-leaderboard/discussions with link to the notebook.
 ## Dataset
 📈 We evaluate models based on 3 datasets,
 1. BM-PT3 Paper 1, contains 54 questions, https://github.com/mesolitica/malaysian-dataset/tree/master/llm-benchmark/BM-pt3
 2. Tatabahasa, contains 349 questions, https://github.com/mesolitica/malaysian-dataset/tree/master/llm-benchmark/tatabahasabm.tripod.com
 3. Translated IndoNLI to Malay, tested on `test_expert` dataset, https://huggingface.co/datasets/mesolitica/translated-indonli
 """
-data = [
-    {
         'model': 'gpt-4-1106-preview',
         'BM-PT3 0-shot': 51.85185185185185,
         'BM-PT3 1-shot': 66.66666666666666,
@@ -38,6 +40,18 @@ data = [
         'Tatabahasa 1-shot': 60.80691642651297,
         'Tatabahasa 3-shots': 63.03724928366762,
     },
     {
         'model': '[llama2-7b](https://huggingface.co/meta-llama/Llama-2-7b-hf)',
         'Tatabahasa 0-shot': 24.355300859598856,
@@ -103,9 +117,9 @@ data = [
         'BM-PT3 0-shot': 35.18518518518518,
         'BM-PT3 1-shot': 33.33333333333333,
         'BM-PT3 3-shots': 37.03703703703704,
-        'Tatabahasa 0-shot': 45.845272206303726,
-        'Tatabahasa 1-shot': 37.249283667621775,
-        'Tatabahasa 3-shots': 34.097421203438394,
     },
     {
         'model': '[aisingapore/sealion3b](https://huggingface.co/aisingapore/sealion3b)',
@@ -127,8 +141,9 @@ data = [
     }
 ]
-data = pd.DataFrame(data)
 with demo:
     gr.HTML(TITLE)
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")

 import pandas as pd
 from css_html_js import custom_css
 TITLE = """<h1 align="center" id="space-title">🇲🇾 Malay LLM Leaderboard</h1>"""
 INTRODUCTION_TEXT = """
+📐 The 🇲🇾 Malay LLM Leaderboard aims to track, rank and evaluate open LLMs on Malay tasks. All notebooks at https://github.com/mesolitica/llm-benchmarks, feel free to submit your own score at https://huggingface.co/spaces/mesolitica/malay-llm-leaderboard/discussions with link to the notebook.
 ## Dataset
 📈 We evaluate models based on 3 datasets,
 1. BM-PT3 Paper 1, contains 54 questions, https://github.com/mesolitica/malaysian-dataset/tree/master/llm-benchmark/BM-pt3
+- This test is for 15 years old Malaysia student, it is about reading comprehension and general knowledge for malay language.
 2. Tatabahasa, contains 349 questions, https://github.com/mesolitica/malaysian-dataset/tree/master/llm-benchmark/tatabahasabm.tripod.com
+- This test is general test for malay grammar.
 3. Translated IndoNLI to Malay, tested on `test_expert` dataset, https://huggingface.co/datasets/mesolitica/translated-indonli
+- This test is general test to language reasoning.
+4. HumanEval, https://github.com/openai/human-eval
+- This test is for programming language understanding.
 """
+close_source = [
+        {
         'model': 'gpt-4-1106-preview',
         'BM-PT3 0-shot': 51.85185185185185,
         'BM-PT3 1-shot': 66.66666666666666,
         'Tatabahasa 1-shot': 60.80691642651297,
         'Tatabahasa 3-shots': 63.03724928366762,
     },
+    {
+        'model': 'Antrophic Claude 2',
+        'Tatabahasa 0-shot': 61,
+        'Tatabahasa 3-shots': 57.8,
+    },
+    {
+        'model': 'Antrophic Claude 1',
+        'Tatabahasa 3-shots': 67,
+    },
+]
+open_source = [
     {
         'model': '[llama2-7b](https://huggingface.co/meta-llama/Llama-2-7b-hf)',
         'Tatabahasa 0-shot': 24.355300859598856,
         'BM-PT3 0-shot': 35.18518518518518,
         'BM-PT3 1-shot': 33.33333333333333,
         'BM-PT3 3-shots': 37.03703703703704,
+        'Tatabahasa 0-shot': 48.13753581661891,
+        'Tatabahasa 1-shot': 38.96848137535817,
+        'Tatabahasa 3-shots': 33.2378223495702,
     },
     {
         'model': '[aisingapore/sealion3b](https://huggingface.co/aisingapore/sealion3b)',
     }
 ]
+data = pd.DataFrame(close_source + open_source)
+demo = gr.Blocks(css=custom_css)
 with demo:
     gr.HTML(TITLE)
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")