open_dutch_llm_leaderboard

Running

App Files Files Community

Bram Vanroy commited on Dec 7, 2023

Commit

8e901a2

•

1 Parent(s): 575d1cf

update display

Browse files

Files changed (5) hide show

README.md +1 -2
app.py +200 -116
content.py +5 -3
evals/models.json +46 -16
requirements.txt +1 -0

README.md CHANGED Viewed

@@ -4,8 +4,7 @@ emoji: 🐨
 colorFrom: purple
 colorTo: blue
 sdk: gradio
-sdk_version: 3.33.1
 app_file: app.py
 pinned: false
 ---

 colorFrom: purple
 colorTo: blue
 sdk: gradio
+sdk_version: 4.8.0
 app_file: app.py
 pinned: false
 ---

app.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import json
 from collections import defaultdict
-from dataclasses import dataclass, field
 from functools import cached_property
 from pathlib import Path
 import numpy as np
 import pandas as pd
@@ -12,41 +13,159 @@ from pandas.io.formats.style import Styler
 from content import *
-ARC = "arc"
-HELLASWAG = "hellaswag"
-MMLU = "mmlu"
-TRUTHFULQA = "truthfulqa"
-BENCHMARKS = [ARC, HELLASWAG, MMLU, TRUTHFULQA]
-METRICS = ["acc_norm", "acc_norm", "acc_norm", "mc2"]
-MODEL_COL = "Model"
-AVERAGE_COL = "Average"
-ARC_COL = "ARC (25-shot)"
-HELLASWAG_COL = "HellaSwag (10-shot)️"
-MMLU_COL = "MMLU (5-shot)"
-TRUTHFULQA_COL = "TruthfulQA (0-shot)"
-TRAIN_TYPE_COL = "Training type"
-TRAIN_TYPE_COL = "Training type"
-NUM_PARAMETERS = "Num. parameters"
 @dataclass
 class Result:
-    train_type: str
     num_parameters: int
-    arc: float = field(default=0.)
-    hellaswag: float = field(default=0.)
-    mmlu: float = field(default=0.)
-    truthfulqa: float = field(default=0.)
     @cached_property
-    def num_parameters_kmb(self) -> str:
-        return convert_number_to_kmb(self.num_parameters)
     @cached_property
-    def average(self) -> float:
-        return self.arc + self.hellaswag + self.mmlu + self.truthfulqa / 4
 def convert_number_to_kmb(number: int) -> str:
@@ -65,121 +184,86 @@ def convert_number_to_kmb(number: int) -> str:
         return str(number)
-def collect_results() -> dict[tuple[str, str], dict[str, float]]:
     """
     Collects results from the evals folder and returns a dictionary of results
     :return: a dictionary of results where the keys are typles of (model_name, language) and the values are
     dictionaries of the form {benchmark_name: performance_score}
     """
-    performance_dict = defaultdict(dict)
-    for pfin in Path("evals").rglob("*.json"):
         data = json.loads(pfin.read_text(encoding="utf-8"))
-        if "results" not in data or "config" not in data:
-            continue
-        results = data["results"]
-        config = data["config"]
-        if "model_args" not in config:
-            continue
-        model_args = config["model_args"].split(",")
-        pretrained = [x for x in model_args if x.startswith("pretrained=")]
-        if len(pretrained) != 1:
             continue
-        pretrained = pretrained[0].split("=")[1]
-        pretrained = pretrained.split("/")[-1]
-        for lang_task, perfs in results.items():
-            task, lang = lang_task.split("_")
-            assert task in BENCHMARKS
-            if lang and task:
-                metric = METRICS[BENCHMARKS.index(task)]
-                p = round(perfs[metric] * 100, 1)
-                performance_dict[(pretrained, lang)][task] = p
-    return dict(performance_dict)
-def build_performance_df(performance_dict: dict[tuple[str, str], dict[str, float]]) -> DataFrame:
-    """
-    Builds a dataframe from the performance dictionary
-    :param performance_dict: a dictionary of results where the keys are typles of (model_name, language) and the values are
-    dictionaries of the form {benchmark_name: performance_score}
-    :return: a pd.DataFrame that has as rows the model names and as columns the benchmarks
-    """
-    data = []
-    dutch_training_info = json.loads(Path(__file__).parent.joinpath("evals/dutch_models.json").read_text(encoding="utf-8"))
-    for (pretrained, lang), perfs in performance_dict.items():
-        arc_perf = perfs.get(ARC, 0.0)
-        hellaswag_perf = perfs.get(HELLASWAG, 0.0)
-        mmlu_perf = perfs.get(MMLU, 0.0)
-        truthfulqa_perf = perfs.get(TRUTHFULQA, 0.0)
-        training_type = dutch_training_info.get(pretrained, "NA")
-        avg = round((arc_perf + hellaswag_perf + mmlu_perf + truthfulqa_perf) / 4, 1)
-        row = [pretrained, training_type, avg, arc_perf, hellaswag_perf, mmlu_perf, truthfulqa_perf]
-        data.append(row)
-    df = pd.DataFrame.from_records(data, columns=COLS)
-    df = df.sort_values(by=[AVERAGE_COL], ascending=False)
-    return df
-def style_df(df: DataFrame) -> Styler:
-    """
-    Styles the dataframe by rounding to two decimals and putting the max value in bold per column
-    :param df: the dataframe to style
-    :return: the Styler
-    """
-    styler = df.style.format("{:.2f}", subset=df.columns[2:])
-    def highlight_max(col):
-        return np.where(col == np.nanmax(col.to_numpy()), "font-weight: bold;", None)
-    styler = styler.apply(highlight_max, axis=1, subset=df.columns[2:])
-    styler = styler.hide()
-    return styler
-MODEL_COL = "Model"
-AVERAGE_COL = "Average"
-ARC_COL = "ARC (25-shot)"
-HELLASWAG_COL = "HellaSwag (10-shot)️"
-MMLU_COL = "MMLU (5-shot)"
-TRUTHFULQA_COL = "TruthfulQA (0-shot)"
-TRAIN_TYPE_COL = "Training type"
-TRAIN_TYPE_COL = "Training type"
-NUM_PARAMETERS = "Num. parameters"
-COLS = [MODEL_COL, TRAIN_TYPE_COL, AVERAGE_COL, ARC_COL, HELLASWAG_COL, MMLU_COL, TRUTHFULQA_COL]
-TYPES = ["str", "number", "number", "number", "number", "number"]
-results = collect_results()
-original_df = build_performance_df(results)
-styled_df = style_df(original_df)
 with gr.Blocks() as demo:
     gr.HTML(TITLE)
     gr.Markdown(INTRO_TEXT)
-    gr.Markdown("## Leaderboard\nOnly representative for the Dutch version (`*_nl`) of the benchmarks!")
     gr.components.Dataframe(
-        value=original_df,
-        headers=COLS,
-        datatype=TYPES,
         elem_id="leaderboard-table",
     )
-    gr.Markdown("Training type: <code>PT</code>: pretrained on only/mostly Dutch; <code>FT</code>: **only** finetuned on"
-            " Dutch; <code>NA</code> not specifically pretrained nor finetuned on Dutch but Dutch data may have been a (small) portion of the training data")
     gr.Markdown("## LaTeX")
-    gr.Code(styled_df.to_latex(convert_css=True))
     gr.Markdown(CREDIT, elem_classes="markdown-text")
     gr.Markdown(CITATION, elem_classes="markdown-text")
-if __name__ == '__main__':
-    demo.launch()

 import json
 from collections import defaultdict
+from dataclasses import dataclass, field, fields
 from functools import cached_property
 from pathlib import Path
+from typing import Literal
 import numpy as np
 import pandas as pd
 from content import *
+TASK_METRICS = {
+    "arc": "acc_norm",
+    "hellaswag": "acc_norm",
+    "mmlu": "acc_norm",
+    "truthfulqa": "mc2",
+}
+MODEL_TYPE_EMOJIS = {
+    "pretrained": "🟢",
+    "fine-tuned": "🔶",
+    "instruction-tuned": "⭕",
+    "RL-tuned": "🟦",
+}
 @dataclass
 class Result:
+    model_name: str
+    short_name: str
+    model_type: Literal["pretrained", "fine-tuned", "instruction-tuned", "RL-tuned"]
+    dutch_coverage: Literal["none", "pretrained", "fine-tuned"]
     num_parameters: int
+    arc: float = field(default=0.0)
+    average: float = field(default=0.0, init=False)
+    hellaswag: float = field(default=0.0)
+    mmlu: float = field(default=0.0)
+    truthfulqa: float = field(default=0.0)
+    num_parameters_kmb: str = field(init=False)
+    def __post_init__(self):
+        if self.model_type not in ["pretrained", "fine-tuned", "instruction-tuned", "RL-tuned"]:
+            raise ValueError(
+                f"Model type {self.model_type} must be one of 'pretrained', 'fine-tuned', 'instruction-tuned', 'RL-tuned'"
+            )
+        if self.dutch_coverage not in ["none", "pretrained", "fine-tuned"]:
+            raise ValueError(f"Dutch coverage {self.dutch_coverage} must be one of 'none', 'pretrained', 'fine-tuned'")
+        field_names = {f.name for f in fields(self)}
+        for task_name in TASK_METRICS:
+            if task_name not in field_names:
+                raise ValueError(f"Task name {task_name} not found in Result class fields so cannot create DataFrame")
+        self.average = (self.arc + self.hellaswag + self.mmlu + self.truthfulqa) / 4
+        self.num_parameters_kmb = convert_number_to_kmb(self.num_parameters)
+@dataclass
+class ResultSet:
+    results: list[Result]
+    column_names: dict[str, str] = field(default_factory=dict)
+    column_types: dict[str, str] = field(default_factory=dict)
+    def __post_init__(self):
+        if not self.column_names:
+            # Order will be the order of the columns in the DataFrame
+            self.column_names = {
+                "short_name": "Model",
+                "model_type": "T",
+                "dutch_coverage": "🇳🇱",
+                "num_parameters": "Size",
+                "average": "Avg.",
+                "arc": "ARC (25-shot)",
+                "hellaswag": "HellaSwag (10-shot)️",
+                "mmlu": "MMLU (5-shot)",
+                "truthfulqa": "TruthfulQA (0-shot)",
+            }
+            self.column_types = {
+                "Model": "markdown",
+                "T": "str",
+                "🇳🇱": "str",
+                "Size": "str",
+                "Avg.": "number",
+                "ARC (25-shot)": "number",
+                "HellaSwag (10-shot)️": "number",
+                "MMLU (5-shot)": "number",
+                "TruthfulQA (0-shot)": "number",
+            }
+        for column_type in self.column_types:
+            if column_type not in set(self.column_names.values()):
+                raise ValueError(
+                    f"Column names specified in column_types must be values in column_names."
+                    f" {column_type} not found."
+                )
+        if "average" not in self.column_names:
+            raise ValueError("Column names must contain 'average' column name")
+        field_names = [f.name for f in fields(Result)]
+        for column_name in self.column_names:
+            if column_name not in field_names:
+                raise ValueError(f"Column name {column_name} not found in Result class so cannot create DataFrame")
+    @cached_property
+    def df(self) -> DataFrame:
+        data = [
+            {
+                col_name: getattr(result, attr)
+                for attr, col_name in self.column_names.items()
+            }
+            for result in self.results
+        ]
+        df = pd.DataFrame(data)
+        df = df.sort_values(by=self.column_names["average"], ascending=False)
+        return df
     @cached_property
+    def styled_df(self) -> Styler:
+        data = [
+            {
+                col_name: (f"<a target='_blank' href='https://huggingface.co/{result.model_name}'"
+                           f" style='color: var(--link-text-color); text-decoration: underline;text-decoration-style:"
+                           f" dotted;'>{result.short_name}</a>")
+                if attr == "short_name"
+                else MODEL_TYPE_EMOJIS[result.model_type]
+                if attr == "model_type"
+                else getattr(result, attr)
+                for attr, col_name in self.column_names.items()
+            }
+            for result in self.results
+        ]
+        df = pd.DataFrame(data)
+        df = df.sort_values(by=self.column_names["average"], ascending=False)
+        number_cols = [col for attr, col in self.column_names.items() if attr in TASK_METRICS or attr == "average"]
+        styler = df.style.format("{:.2f}", subset=number_cols)
+        def highlight_max(col):
+            return np.where(col == np.nanmax(col.to_numpy()), "font-weight: bold;", None)
+        styler = styler.apply(highlight_max, axis=0, subset=number_cols)
+        num_params_col = self.column_names["num_parameters"]
+        styler = styler.format(convert_number_to_kmb, subset=num_params_col)
+        styler = styler.hide()
+        return styler
     @cached_property
+    def latex_df(self) -> Styler:
+        number_cols = [col for attr, col in self.column_names.items() if attr in TASK_METRICS or attr == "average"]
+        styler = self.df.style.format("{:.2f}", subset=number_cols)
+        def highlight_max(col):
+            return np.where(col == np.nanmax(col.to_numpy()), "font-weight: bold;", None)
+        styler = styler.apply(highlight_max, axis=1, subset=number_cols)
+        num_params_col = self.column_names["num_parameters"]
+        styler = styler.format(convert_number_to_kmb, subset=num_params_col)
+        styler = styler.hide()
+        return styler
 def convert_number_to_kmb(number: int) -> str:
         return str(number)
+def collect_results() -> ResultSet:
     """
     Collects results from the evals folder and returns a dictionary of results
     :return: a dictionary of results where the keys are typles of (model_name, language) and the values are
     dictionaries of the form {benchmark_name: performance_score}
     """
+    evals_dir = Path(__file__).parent.joinpath("evals")
+    pf_overview = evals_dir.joinpath("models.json")
+    if not pf_overview.exists():
+        raise ValueError(
+            f"Overview file {pf_overview} not found. Make sure to generate it first with `generate_overview_json.py`."
+        )
+    model_info = json.loads(pf_overview.read_text(encoding="utf-8"))
+    model_results = {}
+    for pfin in evals_dir.rglob("*.json"):
         data = json.loads(pfin.read_text(encoding="utf-8"))
+        if "results" not in data:
             continue
+        task_results = data["results"]
+        short_name = pfin.stem.split("_", 2)[2].lower()
+        if short_name not in model_results:
+            model_results[short_name] = {
+                "short_name": short_name,
+                "model_name": model_info[short_name]["model_name"],
+                "model_type": model_info[short_name]["model_type"],
+                "dutch_coverage": model_info[short_name]["dutch_coverage"],
+                "num_parameters": model_info[short_name]["num_parameters"],
+            }
+        for task_name, task_result in task_results.items():
+            task_name = task_name.rsplit("_", 1)[0]
+            metric = TASK_METRICS[task_name]
+            model_results[short_name][task_name] = task_result[metric]
+    model_results = ResultSet([Result(**res) for short_name, res in model_results.items()])
+    return model_results
 with gr.Blocks() as demo:
     gr.HTML(TITLE)
     gr.Markdown(INTRO_TEXT)
+    gr.Markdown(f"## Leaderboard\nOnly representative for the Dutch version (`*_nl`) of the benchmarks!")
+    results = collect_results()
     gr.components.Dataframe(
+        results.styled_df,
+        headers=list(results.df.columns),
+        datatype=[results.column_types[col] for col in results.df.columns],  # To ensure same order as headers
+        interactive=False,
         elem_id="leaderboard-table",
     )
+    with gr.Row():
+        with gr.Column():
+            modeltypes_str = "<br>".join([f"- {emoji}: {modeltype}" for modeltype, emoji in MODEL_TYPE_EMOJIS.items()])
+            gr.Markdown(f"Model types:<br>{modeltypes_str}")
+        with gr.Column():
+            gr.Markdown(
+                f"Language coverage ({results.column_names['dutch_coverage']}):"
+                f"<br>- `none`: no explicit/deliverate Dutch coverage,"
+                f"<br>- `pretrained`: pretrained on Dutch data,"
+                f"<br>- `fine-tuned`: fine-tuned on Dutch data"
+            )
+        with gr.Column():
+            metrics_str = "<br>".join([f"- {task}: `{metric}`" for task, metric in TASK_METRICS.items()])
+            gr.Markdown(f"Reported metrics:<br>{metrics_str}")
     gr.Markdown("## LaTeX")
+    gr.Code(results.latex_df.to_latex(convert_css=True))
     gr.Markdown(CREDIT, elem_classes="markdown-text")
     gr.Markdown(CITATION, elem_classes="markdown-text")
+if __name__ == "__main__":
+    demo.launch()

content.py CHANGED Viewed

@@ -1,8 +1,10 @@
-TITLE = '<h1 align="center" id="space-title">Open Multilingual LLM Evaluation Leaderboard (Dutch only)</h1>'
 INTRO_TEXT = f"""
 ## About
 This is a fork of the [Open Multilingual LLM Evaluation Leaderboard](https://huggingface.co/spaces/uonlp/open_multilingual_llm_leaderboard), but restricted to only Dutch models and augmented with additional model results.
 We test the models on the following benchmarks **for the Dutch version only!!**, which have been translated into Dutch automatically by the original authors of the Open Multilingual LLM Evaluation Leaderboard with `gpt-35-turbo`.
@@ -13,7 +15,7 @@ We test the models on the following benchmarks **for the Dutch version only!!**,
 I do not maintain those datasets, I only run benchmarks and add the results to this space. For questions regarding the test sets or running them yourself, see [the original Github repository](https://github.com/laiviet/lm-evaluation-harness).
-All models are benchmarked in 8-bit precision.
 """
 CREDIT = f"""
@@ -47,4 +49,4 @@ If you use the multilingual benchmarks, please cite the following paper:
     year={{2023}}
 }}
 ```
-"""

+TITLE = '<h1 align="center" id="space-title">Open Dutch LLM Evaluation Leaderboard</h1>'
 INTRO_TEXT = f"""
 ## About
+This is a leaderboard for Dutch benchmarks for large language models.
 This is a fork of the [Open Multilingual LLM Evaluation Leaderboard](https://huggingface.co/spaces/uonlp/open_multilingual_llm_leaderboard), but restricted to only Dutch models and augmented with additional model results.
 We test the models on the following benchmarks **for the Dutch version only!!**, which have been translated into Dutch automatically by the original authors of the Open Multilingual LLM Evaluation Leaderboard with `gpt-35-turbo`.
 I do not maintain those datasets, I only run benchmarks and add the results to this space. For questions regarding the test sets or running them yourself, see [the original Github repository](https://github.com/laiviet/lm-evaluation-harness).
+Disclaimer: I am aware that benchmarking models on *translated* data is not ideal. However, for Dutch there are no other options for generative models at the moment. If you have any suggestions for other Dutch benchmarks, please let me know so I can add them!
 """
 CREDIT = f"""
     year={{2023}}
 }}
 ```
+"""

evals/models.json CHANGED Viewed

@@ -3,90 +3,120 @@
         "compute_dtype": "bfloat16",
         "model_name": "yhavinga/gpt-neo-1.3B-dutch",
         "num_parameters": 1315575808,
-        "quantization": "8-bit"
     },
     "gpt-neo-125m-dutch": {
         "compute_dtype": "bfloat16",
         "model_name": "yhavinga/gpt-neo-125M-dutch",
         "num_parameters": 125198592,
-        "quantization": "8-bit"
     },
     "gpt2-large-dutch": {
         "compute_dtype": "bfloat16",
         "model_name": "yhavinga/gpt2-large-dutch",
         "num_parameters": 774030080,
-        "quantization": "8-bit"
     },
     "gpt2-medium-dutch": {
         "compute_dtype": "bfloat16",
         "model_name": "yhavinga/gpt2-medium-dutch",
         "num_parameters": 354823168,
-        "quantization": "8-bit"
     },
     "llama-2-13b-chat-dutch": {
         "compute_dtype": "bfloat16",
         "model_name": "BramVanroy/Llama-2-13b-chat-dutch",
         "num_parameters": 13015864320,
-        "quantization": "8-bit"
     },
     "llama-2-13b-chat-hf": {
         "compute_dtype": "bfloat16",
         "model_name": "meta-llama/Llama-2-13b-chat-hf",
         "num_parameters": 13015864320,
-        "quantization": "8-bit"
     },
     "llama-2-13b-hf": {
         "compute_dtype": "bfloat16",
         "model_name": "meta-llama/Llama-2-13b-hf",
         "num_parameters": 13015864320,
-        "quantization": "8-bit"
     },
     "llama-2-7b-chat-hf": {
         "compute_dtype": "bfloat16",
         "model_name": "meta-llama/Llama-2-7b-chat-hf",
         "num_parameters": 6738415616,
-        "quantization": "8-bit"
     },
     "llama-2-7b-hf": {
         "compute_dtype": "bfloat16",
         "model_name": "meta-llama/Llama-2-7b-hf",
         "num_parameters": 6738415616,
-        "quantization": "8-bit"
     },
-    "llama2-13b-ft-mc4": {
         "compute_dtype": "bfloat16",
         "model_name": "BramVanroy/llama2-13b-ft-mc4_nl_cleaned_tiny",
         "num_parameters": 13015864320,
-        "quantization": "8-bit"
     },
     "mistral-7b-v0.1": {
         "compute_dtype": "bfloat16",
         "model_name": "mistralai/Mistral-7B-v0.1",
         "num_parameters": 7241732096,
-        "quantization": "8-bit"
     },
     "neural-chat-7b-v3-1": {
         "compute_dtype": "bfloat16",
         "model_name": "Intel/neural-chat-7b-v3-1",
         "num_parameters": 7241732096,
-        "quantization": "8-bit"
     },
     "orca-2-13b": {
         "compute_dtype": "bfloat16",
         "model_name": "microsoft/Orca-2-13b",
         "num_parameters": 13015895040,
-        "quantization": "8-bit"
     },
     "orca-2-7b": {
         "compute_dtype": "bfloat16",
         "model_name": "microsoft/Orca-2-7b",
         "num_parameters": 6738440192,
-        "quantization": "8-bit"
     },
     "zephyr-7b-beta": {
         "compute_dtype": "bfloat16",
         "model_name": "HuggingFaceH4/zephyr-7b-beta",
         "num_parameters": 7241732096,
-        "quantization": "8-bit"
     }
 }

         "compute_dtype": "bfloat16",
         "model_name": "yhavinga/gpt-neo-1.3B-dutch",
         "num_parameters": 1315575808,
+        "quantization": "8-bit",
+        "model_type": "pretrained",
+        "dutch_coverage": "pretrained"
     },
     "gpt-neo-125m-dutch": {
         "compute_dtype": "bfloat16",
         "model_name": "yhavinga/gpt-neo-125M-dutch",
         "num_parameters": 125198592,
+        "quantization": "8-bit",
+        "model_type": "pretrained",
+        "dutch_coverage": "pretrained"
     },
     "gpt2-large-dutch": {
         "compute_dtype": "bfloat16",
         "model_name": "yhavinga/gpt2-large-dutch",
         "num_parameters": 774030080,
+        "quantization": "8-bit",
+        "model_type": "pretrained",
+        "dutch_coverage": "pretrained"
     },
     "gpt2-medium-dutch": {
         "compute_dtype": "bfloat16",
         "model_name": "yhavinga/gpt2-medium-dutch",
         "num_parameters": 354823168,
+        "quantization": "8-bit",
+        "model_type": "pretrained",
+        "dutch_coverage": "pretrained"
     },
     "llama-2-13b-chat-dutch": {
         "compute_dtype": "bfloat16",
         "model_name": "BramVanroy/Llama-2-13b-chat-dutch",
         "num_parameters": 13015864320,
+        "quantization": "8-bit",
+        "model_type": "instruction-tuned",
+        "dutch_coverage": "fine-tuned"
     },
     "llama-2-13b-chat-hf": {
         "compute_dtype": "bfloat16",
         "model_name": "meta-llama/Llama-2-13b-chat-hf",
         "num_parameters": 13015864320,
+        "quantization": "8-bit",
+        "model_type": "instruction-tuned",
+        "dutch_coverage": "none"
     },
     "llama-2-13b-hf": {
         "compute_dtype": "bfloat16",
         "model_name": "meta-llama/Llama-2-13b-hf",
         "num_parameters": 13015864320,
+        "quantization": "8-bit",
+        "model_type": "pretrained",
+        "dutch_coverage": "none"
     },
     "llama-2-7b-chat-hf": {
         "compute_dtype": "bfloat16",
         "model_name": "meta-llama/Llama-2-7b-chat-hf",
         "num_parameters": 6738415616,
+        "quantization": "8-bit",
+        "model_type": "instruction-tuned",
+        "dutch_coverage": "none"
     },
     "llama-2-7b-hf": {
         "compute_dtype": "bfloat16",
         "model_name": "meta-llama/Llama-2-7b-hf",
         "num_parameters": 6738415616,
+        "quantization": "8-bit",
+        "model_type": "pretrained",
+        "dutch_coverage": "none"
     },
+    "llama2-13b-ft-mc4_nl_cleaned_tiny": {
         "compute_dtype": "bfloat16",
         "model_name": "BramVanroy/llama2-13b-ft-mc4_nl_cleaned_tiny",
         "num_parameters": 13015864320,
+        "quantization": "8-bit",
+        "model_type": "fine-tuned",
+        "dutch_coverage": "fine-tuned"
     },
     "mistral-7b-v0.1": {
         "compute_dtype": "bfloat16",
         "model_name": "mistralai/Mistral-7B-v0.1",
         "num_parameters": 7241732096,
+        "quantization": "8-bit",
+        "model_type": "pretrained",
+        "dutch_coverage": "none"
     },
     "neural-chat-7b-v3-1": {
         "compute_dtype": "bfloat16",
         "model_name": "Intel/neural-chat-7b-v3-1",
         "num_parameters": 7241732096,
+        "quantization": "8-bit",
+        "model_type": "RL-tuned",
+        "dutch_coverage": "none"
     },
     "orca-2-13b": {
         "compute_dtype": "bfloat16",
         "model_name": "microsoft/Orca-2-13b",
         "num_parameters": 13015895040,
+        "quantization": "8-bit",
+        "model_type": "fine-tuned",
+        "dutch_coverage": "none"
     },
     "orca-2-7b": {
         "compute_dtype": "bfloat16",
         "model_name": "microsoft/Orca-2-7b",
         "num_parameters": 6738440192,
+        "quantization": "8-bit",
+        "model_type": "fine-tuned",
+        "dutch_coverage": "none"
     },
     "zephyr-7b-beta": {
         "compute_dtype": "bfloat16",
         "model_name": "HuggingFaceH4/zephyr-7b-beta",
         "num_parameters": 7241732096,
+        "quantization": "8-bit",
+        "model_type": "RL-tuned",
+        "dutch_coverage": "none"
     }
 }

requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ gradio==4.8.0