Bram Vanroy commited on
Commit
8e901a2
1 Parent(s): 575d1cf

update display

Browse files
Files changed (5) hide show
  1. README.md +1 -2
  2. app.py +200 -116
  3. content.py +5 -3
  4. evals/models.json +46 -16
  5. requirements.txt +1 -0
README.md CHANGED
@@ -4,8 +4,7 @@ emoji: 🐨
4
  colorFrom: purple
5
  colorTo: blue
6
  sdk: gradio
7
- sdk_version: 3.33.1
8
  app_file: app.py
9
  pinned: false
10
  ---
11
-
 
4
  colorFrom: purple
5
  colorTo: blue
6
  sdk: gradio
7
+ sdk_version: 4.8.0
8
  app_file: app.py
9
  pinned: false
10
  ---
 
app.py CHANGED
@@ -1,8 +1,9 @@
1
  import json
2
  from collections import defaultdict
3
- from dataclasses import dataclass, field
4
  from functools import cached_property
5
  from pathlib import Path
 
6
 
7
  import numpy as np
8
  import pandas as pd
@@ -12,41 +13,159 @@ from pandas.io.formats.style import Styler
12
 
13
  from content import *
14
 
15
- ARC = "arc"
16
- HELLASWAG = "hellaswag"
17
- MMLU = "mmlu"
18
- TRUTHFULQA = "truthfulqa"
19
- BENCHMARKS = [ARC, HELLASWAG, MMLU, TRUTHFULQA]
20
 
21
- METRICS = ["acc_norm", "acc_norm", "acc_norm", "mc2"]
 
 
 
 
 
22
 
23
- MODEL_COL = "Model"
24
- AVERAGE_COL = "Average"
25
- ARC_COL = "ARC (25-shot)"
26
- HELLASWAG_COL = "HellaSwag (10-shot)️"
27
- MMLU_COL = "MMLU (5-shot)"
28
- TRUTHFULQA_COL = "TruthfulQA (0-shot)"
29
- TRAIN_TYPE_COL = "Training type"
30
- TRAIN_TYPE_COL = "Training type"
31
- NUM_PARAMETERS = "Num. parameters"
32
 
33
 
34
  @dataclass
35
  class Result:
36
- train_type: str
 
 
 
37
  num_parameters: int
38
- arc: float = field(default=0.)
39
- hellaswag: float = field(default=0.)
40
- mmlu: float = field(default=0.)
41
- truthfulqa: float = field(default=0.)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  @cached_property
44
- def num_parameters_kmb(self) -> str:
45
- return convert_number_to_kmb(self.num_parameters)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  @cached_property
48
- def average(self) -> float:
49
- return self.arc + self.hellaswag + self.mmlu + self.truthfulqa / 4
 
 
 
 
 
 
 
 
 
 
50
 
51
 
52
  def convert_number_to_kmb(number: int) -> str:
@@ -65,121 +184,86 @@ def convert_number_to_kmb(number: int) -> str:
65
  return str(number)
66
 
67
 
68
-
69
- def collect_results() -> dict[tuple[str, str], dict[str, float]]:
70
  """
71
  Collects results from the evals folder and returns a dictionary of results
72
  :return: a dictionary of results where the keys are typles of (model_name, language) and the values are
73
  dictionaries of the form {benchmark_name: performance_score}
74
  """
75
- performance_dict = defaultdict(dict)
76
- for pfin in Path("evals").rglob("*.json"):
 
 
 
 
 
 
 
 
77
  data = json.loads(pfin.read_text(encoding="utf-8"))
78
- if "results" not in data or "config" not in data:
79
- continue
80
- results = data["results"]
81
- config = data["config"]
82
- if "model_args" not in config:
83
- continue
84
 
85
- model_args = config["model_args"].split(",")
86
- pretrained = [x for x in model_args if x.startswith("pretrained=")]
87
- if len(pretrained) != 1:
88
  continue
89
- pretrained = pretrained[0].split("=")[1]
90
- pretrained = pretrained.split("/")[-1]
91
-
92
- for lang_task, perfs in results.items():
93
- task, lang = lang_task.split("_")
94
- assert task in BENCHMARKS
95
-
96
- if lang and task:
97
- metric = METRICS[BENCHMARKS.index(task)]
98
- p = round(perfs[metric] * 100, 1)
99
- performance_dict[(pretrained, lang)][task] = p
100
-
101
- return dict(performance_dict)
102
-
103
-
104
- def build_performance_df(performance_dict: dict[tuple[str, str], dict[str, float]]) -> DataFrame:
105
- """
106
- Builds a dataframe from the performance dictionary
107
- :param performance_dict: a dictionary of results where the keys are typles of (model_name, language) and the values are
108
- dictionaries of the form {benchmark_name: performance_score}
109
- :return: a pd.DataFrame that has as rows the model names and as columns the benchmarks
110
- """
111
- data = []
112
- dutch_training_info = json.loads(Path(__file__).parent.joinpath("evals/dutch_models.json").read_text(encoding="utf-8"))
113
-
114
- for (pretrained, lang), perfs in performance_dict.items():
115
- arc_perf = perfs.get(ARC, 0.0)
116
- hellaswag_perf = perfs.get(HELLASWAG, 0.0)
117
- mmlu_perf = perfs.get(MMLU, 0.0)
118
- truthfulqa_perf = perfs.get(TRUTHFULQA, 0.0)
119
- training_type = dutch_training_info.get(pretrained, "NA")
120
-
121
- avg = round((arc_perf + hellaswag_perf + mmlu_perf + truthfulqa_perf) / 4, 1)
122
- row = [pretrained, training_type, avg, arc_perf, hellaswag_perf, mmlu_perf, truthfulqa_perf]
123
- data.append(row)
124
-
125
- df = pd.DataFrame.from_records(data, columns=COLS)
126
- df = df.sort_values(by=[AVERAGE_COL], ascending=False)
127
-
128
- return df
129
-
130
-
131
- def style_df(df: DataFrame) -> Styler:
132
- """
133
- Styles the dataframe by rounding to two decimals and putting the max value in bold per column
134
- :param df: the dataframe to style
135
- :return: the Styler
136
- """
137
- styler = df.style.format("{:.2f}", subset=df.columns[2:])
138
 
139
- def highlight_max(col):
140
- return np.where(col == np.nanmax(col.to_numpy()), "font-weight: bold;", None)
 
 
141
 
142
- styler = styler.apply(highlight_max, axis=1, subset=df.columns[2:])
143
- styler = styler.hide()
144
- return styler
145
 
 
146
 
147
- MODEL_COL = "Model"
148
- AVERAGE_COL = "Average"
149
- ARC_COL = "ARC (25-shot)"
150
- HELLASWAG_COL = "HellaSwag (10-shot)️"
151
- MMLU_COL = "MMLU (5-shot)"
152
- TRUTHFULQA_COL = "TruthfulQA (0-shot)"
153
- TRAIN_TYPE_COL = "Training type"
154
- TRAIN_TYPE_COL = "Training type"
155
- NUM_PARAMETERS = "Num. parameters"
156
 
157
- COLS = [MODEL_COL, TRAIN_TYPE_COL, AVERAGE_COL, ARC_COL, HELLASWAG_COL, MMLU_COL, TRUTHFULQA_COL]
158
- TYPES = ["str", "number", "number", "number", "number", "number"]
159
-
160
- results = collect_results()
161
- original_df = build_performance_df(results)
162
- styled_df = style_df(original_df)
163
  with gr.Blocks() as demo:
164
  gr.HTML(TITLE)
165
  gr.Markdown(INTRO_TEXT)
166
 
167
- gr.Markdown("## Leaderboard\nOnly representative for the Dutch version (`*_nl`) of the benchmarks!")
 
 
 
168
  gr.components.Dataframe(
169
- value=original_df,
170
- headers=COLS,
171
- datatype=TYPES,
 
172
  elem_id="leaderboard-table",
173
  )
174
- gr.Markdown("Training type: <code>PT</code>: pretrained on only/mostly Dutch; <code>FT</code>: **only** finetuned on"
175
- " Dutch; <code>NA</code> not specifically pretrained nor finetuned on Dutch but Dutch data may have been a (small) portion of the training data")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
 
177
  gr.Markdown("## LaTeX")
178
- gr.Code(styled_df.to_latex(convert_css=True))
179
 
180
  gr.Markdown(CREDIT, elem_classes="markdown-text")
181
  gr.Markdown(CITATION, elem_classes="markdown-text")
182
 
183
- if __name__ == '__main__':
184
- demo.launch()
185
 
 
 
 
1
  import json
2
  from collections import defaultdict
3
+ from dataclasses import dataclass, field, fields
4
  from functools import cached_property
5
  from pathlib import Path
6
+ from typing import Literal
7
 
8
  import numpy as np
9
  import pandas as pd
 
13
 
14
  from content import *
15
 
 
 
 
 
 
16
 
17
+ TASK_METRICS = {
18
+ "arc": "acc_norm",
19
+ "hellaswag": "acc_norm",
20
+ "mmlu": "acc_norm",
21
+ "truthfulqa": "mc2",
22
+ }
23
 
24
+ MODEL_TYPE_EMOJIS = {
25
+ "pretrained": "🟢",
26
+ "fine-tuned": "🔶",
27
+ "instruction-tuned": "⭕",
28
+ "RL-tuned": "🟦",
29
+ }
 
 
 
30
 
31
 
32
  @dataclass
33
  class Result:
34
+ model_name: str
35
+ short_name: str
36
+ model_type: Literal["pretrained", "fine-tuned", "instruction-tuned", "RL-tuned"]
37
+ dutch_coverage: Literal["none", "pretrained", "fine-tuned"]
38
  num_parameters: int
39
+ arc: float = field(default=0.0)
40
+ average: float = field(default=0.0, init=False)
41
+ hellaswag: float = field(default=0.0)
42
+ mmlu: float = field(default=0.0)
43
+ truthfulqa: float = field(default=0.0)
44
+ num_parameters_kmb: str = field(init=False)
45
+
46
+ def __post_init__(self):
47
+ if self.model_type not in ["pretrained", "fine-tuned", "instruction-tuned", "RL-tuned"]:
48
+ raise ValueError(
49
+ f"Model type {self.model_type} must be one of 'pretrained', 'fine-tuned', 'instruction-tuned', 'RL-tuned'"
50
+ )
51
+ if self.dutch_coverage not in ["none", "pretrained", "fine-tuned"]:
52
+ raise ValueError(f"Dutch coverage {self.dutch_coverage} must be one of 'none', 'pretrained', 'fine-tuned'")
53
+
54
+ field_names = {f.name for f in fields(self)}
55
+ for task_name in TASK_METRICS:
56
+ if task_name not in field_names:
57
+ raise ValueError(f"Task name {task_name} not found in Result class fields so cannot create DataFrame")
58
+
59
+ self.average = (self.arc + self.hellaswag + self.mmlu + self.truthfulqa) / 4
60
+ self.num_parameters_kmb = convert_number_to_kmb(self.num_parameters)
61
+
62
+
63
+ @dataclass
64
+ class ResultSet:
65
+ results: list[Result]
66
+ column_names: dict[str, str] = field(default_factory=dict)
67
+ column_types: dict[str, str] = field(default_factory=dict)
68
+
69
+ def __post_init__(self):
70
+ if not self.column_names:
71
+ # Order will be the order of the columns in the DataFrame
72
+ self.column_names = {
73
+ "short_name": "Model",
74
+ "model_type": "T",
75
+ "dutch_coverage": "🇳🇱",
76
+ "num_parameters": "Size",
77
+ "average": "Avg.",
78
+ "arc": "ARC (25-shot)",
79
+ "hellaswag": "HellaSwag (10-shot)️",
80
+ "mmlu": "MMLU (5-shot)",
81
+ "truthfulqa": "TruthfulQA (0-shot)",
82
+ }
83
+ self.column_types = {
84
+ "Model": "markdown",
85
+ "T": "str",
86
+ "🇳🇱": "str",
87
+ "Size": "str",
88
+ "Avg.": "number",
89
+ "ARC (25-shot)": "number",
90
+ "HellaSwag (10-shot)️": "number",
91
+ "MMLU (5-shot)": "number",
92
+ "TruthfulQA (0-shot)": "number",
93
+ }
94
+
95
+ for column_type in self.column_types:
96
+ if column_type not in set(self.column_names.values()):
97
+ raise ValueError(
98
+ f"Column names specified in column_types must be values in column_names."
99
+ f" {column_type} not found."
100
+ )
101
+
102
+ if "average" not in self.column_names:
103
+ raise ValueError("Column names must contain 'average' column name")
104
+
105
+ field_names = [f.name for f in fields(Result)]
106
+ for column_name in self.column_names:
107
+ if column_name not in field_names:
108
+ raise ValueError(f"Column name {column_name} not found in Result class so cannot create DataFrame")
109
+
110
+ @cached_property
111
+ def df(self) -> DataFrame:
112
+ data = [
113
+ {
114
+ col_name: getattr(result, attr)
115
+ for attr, col_name in self.column_names.items()
116
+ }
117
+ for result in self.results
118
+ ]
119
+
120
+ df = pd.DataFrame(data)
121
+ df = df.sort_values(by=self.column_names["average"], ascending=False)
122
+ return df
123
 
124
  @cached_property
125
+ def styled_df(self) -> Styler:
126
+ data = [
127
+ {
128
+ col_name: (f"<a target='_blank' href='https://huggingface.co/{result.model_name}'"
129
+ f" style='color: var(--link-text-color); text-decoration: underline;text-decoration-style:"
130
+ f" dotted;'>{result.short_name}</a>")
131
+ if attr == "short_name"
132
+ else MODEL_TYPE_EMOJIS[result.model_type]
133
+ if attr == "model_type"
134
+ else getattr(result, attr)
135
+ for attr, col_name in self.column_names.items()
136
+ }
137
+ for result in self.results
138
+ ]
139
+
140
+ df = pd.DataFrame(data)
141
+ df = df.sort_values(by=self.column_names["average"], ascending=False)
142
+ number_cols = [col for attr, col in self.column_names.items() if attr in TASK_METRICS or attr == "average"]
143
+ styler = df.style.format("{:.2f}", subset=number_cols)
144
+
145
+ def highlight_max(col):
146
+ return np.where(col == np.nanmax(col.to_numpy()), "font-weight: bold;", None)
147
+
148
+ styler = styler.apply(highlight_max, axis=0, subset=number_cols)
149
+
150
+ num_params_col = self.column_names["num_parameters"]
151
+ styler = styler.format(convert_number_to_kmb, subset=num_params_col)
152
+
153
+ styler = styler.hide()
154
+ return styler
155
 
156
  @cached_property
157
+ def latex_df(self) -> Styler:
158
+ number_cols = [col for attr, col in self.column_names.items() if attr in TASK_METRICS or attr == "average"]
159
+ styler = self.df.style.format("{:.2f}", subset=number_cols)
160
+
161
+ def highlight_max(col):
162
+ return np.where(col == np.nanmax(col.to_numpy()), "font-weight: bold;", None)
163
+
164
+ styler = styler.apply(highlight_max, axis=1, subset=number_cols)
165
+ num_params_col = self.column_names["num_parameters"]
166
+ styler = styler.format(convert_number_to_kmb, subset=num_params_col)
167
+ styler = styler.hide()
168
+ return styler
169
 
170
 
171
  def convert_number_to_kmb(number: int) -> str:
 
184
  return str(number)
185
 
186
 
187
+ def collect_results() -> ResultSet:
 
188
  """
189
  Collects results from the evals folder and returns a dictionary of results
190
  :return: a dictionary of results where the keys are typles of (model_name, language) and the values are
191
  dictionaries of the form {benchmark_name: performance_score}
192
  """
193
+ evals_dir = Path(__file__).parent.joinpath("evals")
194
+ pf_overview = evals_dir.joinpath("models.json")
195
+ if not pf_overview.exists():
196
+ raise ValueError(
197
+ f"Overview file {pf_overview} not found. Make sure to generate it first with `generate_overview_json.py`."
198
+ )
199
+
200
+ model_info = json.loads(pf_overview.read_text(encoding="utf-8"))
201
+ model_results = {}
202
+ for pfin in evals_dir.rglob("*.json"):
203
  data = json.loads(pfin.read_text(encoding="utf-8"))
 
 
 
 
 
 
204
 
205
+ if "results" not in data:
 
 
206
  continue
207
+ task_results = data["results"]
208
+ short_name = pfin.stem.split("_", 2)[2].lower()
209
+ if short_name not in model_results:
210
+ model_results[short_name] = {
211
+ "short_name": short_name,
212
+ "model_name": model_info[short_name]["model_name"],
213
+ "model_type": model_info[short_name]["model_type"],
214
+ "dutch_coverage": model_info[short_name]["dutch_coverage"],
215
+ "num_parameters": model_info[short_name]["num_parameters"],
216
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
 
218
+ for task_name, task_result in task_results.items():
219
+ task_name = task_name.rsplit("_", 1)[0]
220
+ metric = TASK_METRICS[task_name]
221
+ model_results[short_name][task_name] = task_result[metric]
222
 
223
+ model_results = ResultSet([Result(**res) for short_name, res in model_results.items()])
 
 
224
 
225
+ return model_results
226
 
 
 
 
 
 
 
 
 
 
227
 
 
 
 
 
 
 
228
  with gr.Blocks() as demo:
229
  gr.HTML(TITLE)
230
  gr.Markdown(INTRO_TEXT)
231
 
232
+ gr.Markdown(f"## Leaderboard\nOnly representative for the Dutch version (`*_nl`) of the benchmarks!")
233
+
234
+ results = collect_results()
235
+
236
  gr.components.Dataframe(
237
+ results.styled_df,
238
+ headers=list(results.df.columns),
239
+ datatype=[results.column_types[col] for col in results.df.columns], # To ensure same order as headers
240
+ interactive=False,
241
  elem_id="leaderboard-table",
242
  )
243
+
244
+ with gr.Row():
245
+ with gr.Column():
246
+ modeltypes_str = "<br>".join([f"- {emoji}: {modeltype}" for modeltype, emoji in MODEL_TYPE_EMOJIS.items()])
247
+ gr.Markdown(f"Model types:<br>{modeltypes_str}")
248
+
249
+ with gr.Column():
250
+ gr.Markdown(
251
+ f"Language coverage ({results.column_names['dutch_coverage']}):"
252
+ f"<br>- `none`: no explicit/deliverate Dutch coverage,"
253
+ f"<br>- `pretrained`: pretrained on Dutch data,"
254
+ f"<br>- `fine-tuned`: fine-tuned on Dutch data"
255
+ )
256
+
257
+ with gr.Column():
258
+ metrics_str = "<br>".join([f"- {task}: `{metric}`" for task, metric in TASK_METRICS.items()])
259
+ gr.Markdown(f"Reported metrics:<br>{metrics_str}")
260
 
261
  gr.Markdown("## LaTeX")
262
+ gr.Code(results.latex_df.to_latex(convert_css=True))
263
 
264
  gr.Markdown(CREDIT, elem_classes="markdown-text")
265
  gr.Markdown(CITATION, elem_classes="markdown-text")
266
 
 
 
267
 
268
+ if __name__ == "__main__":
269
+ demo.launch()
content.py CHANGED
@@ -1,8 +1,10 @@
1
- TITLE = '<h1 align="center" id="space-title">Open Multilingual LLM Evaluation Leaderboard (Dutch only)</h1>'
2
 
3
  INTRO_TEXT = f"""
4
  ## About
5
 
 
 
6
  This is a fork of the [Open Multilingual LLM Evaluation Leaderboard](https://huggingface.co/spaces/uonlp/open_multilingual_llm_leaderboard), but restricted to only Dutch models and augmented with additional model results.
7
  We test the models on the following benchmarks **for the Dutch version only!!**, which have been translated into Dutch automatically by the original authors of the Open Multilingual LLM Evaluation Leaderboard with `gpt-35-turbo`.
8
 
@@ -13,7 +15,7 @@ We test the models on the following benchmarks **for the Dutch version only!!**,
13
 
14
  I do not maintain those datasets, I only run benchmarks and add the results to this space. For questions regarding the test sets or running them yourself, see [the original Github repository](https://github.com/laiviet/lm-evaluation-harness).
15
 
16
- All models are benchmarked in 8-bit precision.
17
  """
18
 
19
  CREDIT = f"""
@@ -47,4 +49,4 @@ If you use the multilingual benchmarks, please cite the following paper:
47
  year={{2023}}
48
  }}
49
  ```
50
- """
 
1
+ TITLE = '<h1 align="center" id="space-title">Open Dutch LLM Evaluation Leaderboard</h1>'
2
 
3
  INTRO_TEXT = f"""
4
  ## About
5
 
6
+ This is a leaderboard for Dutch benchmarks for large language models.
7
+
8
  This is a fork of the [Open Multilingual LLM Evaluation Leaderboard](https://huggingface.co/spaces/uonlp/open_multilingual_llm_leaderboard), but restricted to only Dutch models and augmented with additional model results.
9
  We test the models on the following benchmarks **for the Dutch version only!!**, which have been translated into Dutch automatically by the original authors of the Open Multilingual LLM Evaluation Leaderboard with `gpt-35-turbo`.
10
 
 
15
 
16
  I do not maintain those datasets, I only run benchmarks and add the results to this space. For questions regarding the test sets or running them yourself, see [the original Github repository](https://github.com/laiviet/lm-evaluation-harness).
17
 
18
+ Disclaimer: I am aware that benchmarking models on *translated* data is not ideal. However, for Dutch there are no other options for generative models at the moment. If you have any suggestions for other Dutch benchmarks, please let me know so I can add them!
19
  """
20
 
21
  CREDIT = f"""
 
49
  year={{2023}}
50
  }}
51
  ```
52
+ """
evals/models.json CHANGED
@@ -3,90 +3,120 @@
3
  "compute_dtype": "bfloat16",
4
  "model_name": "yhavinga/gpt-neo-1.3B-dutch",
5
  "num_parameters": 1315575808,
6
- "quantization": "8-bit"
 
 
7
  },
8
  "gpt-neo-125m-dutch": {
9
  "compute_dtype": "bfloat16",
10
  "model_name": "yhavinga/gpt-neo-125M-dutch",
11
  "num_parameters": 125198592,
12
- "quantization": "8-bit"
 
 
13
  },
14
  "gpt2-large-dutch": {
15
  "compute_dtype": "bfloat16",
16
  "model_name": "yhavinga/gpt2-large-dutch",
17
  "num_parameters": 774030080,
18
- "quantization": "8-bit"
 
 
19
  },
20
  "gpt2-medium-dutch": {
21
  "compute_dtype": "bfloat16",
22
  "model_name": "yhavinga/gpt2-medium-dutch",
23
  "num_parameters": 354823168,
24
- "quantization": "8-bit"
 
 
25
  },
26
  "llama-2-13b-chat-dutch": {
27
  "compute_dtype": "bfloat16",
28
  "model_name": "BramVanroy/Llama-2-13b-chat-dutch",
29
  "num_parameters": 13015864320,
30
- "quantization": "8-bit"
 
 
31
  },
32
  "llama-2-13b-chat-hf": {
33
  "compute_dtype": "bfloat16",
34
  "model_name": "meta-llama/Llama-2-13b-chat-hf",
35
  "num_parameters": 13015864320,
36
- "quantization": "8-bit"
 
 
37
  },
38
  "llama-2-13b-hf": {
39
  "compute_dtype": "bfloat16",
40
  "model_name": "meta-llama/Llama-2-13b-hf",
41
  "num_parameters": 13015864320,
42
- "quantization": "8-bit"
 
 
43
  },
44
  "llama-2-7b-chat-hf": {
45
  "compute_dtype": "bfloat16",
46
  "model_name": "meta-llama/Llama-2-7b-chat-hf",
47
  "num_parameters": 6738415616,
48
- "quantization": "8-bit"
 
 
49
  },
50
  "llama-2-7b-hf": {
51
  "compute_dtype": "bfloat16",
52
  "model_name": "meta-llama/Llama-2-7b-hf",
53
  "num_parameters": 6738415616,
54
- "quantization": "8-bit"
 
 
55
  },
56
- "llama2-13b-ft-mc4": {
57
  "compute_dtype": "bfloat16",
58
  "model_name": "BramVanroy/llama2-13b-ft-mc4_nl_cleaned_tiny",
59
  "num_parameters": 13015864320,
60
- "quantization": "8-bit"
 
 
61
  },
62
  "mistral-7b-v0.1": {
63
  "compute_dtype": "bfloat16",
64
  "model_name": "mistralai/Mistral-7B-v0.1",
65
  "num_parameters": 7241732096,
66
- "quantization": "8-bit"
 
 
67
  },
68
  "neural-chat-7b-v3-1": {
69
  "compute_dtype": "bfloat16",
70
  "model_name": "Intel/neural-chat-7b-v3-1",
71
  "num_parameters": 7241732096,
72
- "quantization": "8-bit"
 
 
73
  },
74
  "orca-2-13b": {
75
  "compute_dtype": "bfloat16",
76
  "model_name": "microsoft/Orca-2-13b",
77
  "num_parameters": 13015895040,
78
- "quantization": "8-bit"
 
 
79
  },
80
  "orca-2-7b": {
81
  "compute_dtype": "bfloat16",
82
  "model_name": "microsoft/Orca-2-7b",
83
  "num_parameters": 6738440192,
84
- "quantization": "8-bit"
 
 
85
  },
86
  "zephyr-7b-beta": {
87
  "compute_dtype": "bfloat16",
88
  "model_name": "HuggingFaceH4/zephyr-7b-beta",
89
  "num_parameters": 7241732096,
90
- "quantization": "8-bit"
 
 
91
  }
92
  }
 
3
  "compute_dtype": "bfloat16",
4
  "model_name": "yhavinga/gpt-neo-1.3B-dutch",
5
  "num_parameters": 1315575808,
6
+ "quantization": "8-bit",
7
+ "model_type": "pretrained",
8
+ "dutch_coverage": "pretrained"
9
  },
10
  "gpt-neo-125m-dutch": {
11
  "compute_dtype": "bfloat16",
12
  "model_name": "yhavinga/gpt-neo-125M-dutch",
13
  "num_parameters": 125198592,
14
+ "quantization": "8-bit",
15
+ "model_type": "pretrained",
16
+ "dutch_coverage": "pretrained"
17
  },
18
  "gpt2-large-dutch": {
19
  "compute_dtype": "bfloat16",
20
  "model_name": "yhavinga/gpt2-large-dutch",
21
  "num_parameters": 774030080,
22
+ "quantization": "8-bit",
23
+ "model_type": "pretrained",
24
+ "dutch_coverage": "pretrained"
25
  },
26
  "gpt2-medium-dutch": {
27
  "compute_dtype": "bfloat16",
28
  "model_name": "yhavinga/gpt2-medium-dutch",
29
  "num_parameters": 354823168,
30
+ "quantization": "8-bit",
31
+ "model_type": "pretrained",
32
+ "dutch_coverage": "pretrained"
33
  },
34
  "llama-2-13b-chat-dutch": {
35
  "compute_dtype": "bfloat16",
36
  "model_name": "BramVanroy/Llama-2-13b-chat-dutch",
37
  "num_parameters": 13015864320,
38
+ "quantization": "8-bit",
39
+ "model_type": "instruction-tuned",
40
+ "dutch_coverage": "fine-tuned"
41
  },
42
  "llama-2-13b-chat-hf": {
43
  "compute_dtype": "bfloat16",
44
  "model_name": "meta-llama/Llama-2-13b-chat-hf",
45
  "num_parameters": 13015864320,
46
+ "quantization": "8-bit",
47
+ "model_type": "instruction-tuned",
48
+ "dutch_coverage": "none"
49
  },
50
  "llama-2-13b-hf": {
51
  "compute_dtype": "bfloat16",
52
  "model_name": "meta-llama/Llama-2-13b-hf",
53
  "num_parameters": 13015864320,
54
+ "quantization": "8-bit",
55
+ "model_type": "pretrained",
56
+ "dutch_coverage": "none"
57
  },
58
  "llama-2-7b-chat-hf": {
59
  "compute_dtype": "bfloat16",
60
  "model_name": "meta-llama/Llama-2-7b-chat-hf",
61
  "num_parameters": 6738415616,
62
+ "quantization": "8-bit",
63
+ "model_type": "instruction-tuned",
64
+ "dutch_coverage": "none"
65
  },
66
  "llama-2-7b-hf": {
67
  "compute_dtype": "bfloat16",
68
  "model_name": "meta-llama/Llama-2-7b-hf",
69
  "num_parameters": 6738415616,
70
+ "quantization": "8-bit",
71
+ "model_type": "pretrained",
72
+ "dutch_coverage": "none"
73
  },
74
+ "llama2-13b-ft-mc4_nl_cleaned_tiny": {
75
  "compute_dtype": "bfloat16",
76
  "model_name": "BramVanroy/llama2-13b-ft-mc4_nl_cleaned_tiny",
77
  "num_parameters": 13015864320,
78
+ "quantization": "8-bit",
79
+ "model_type": "fine-tuned",
80
+ "dutch_coverage": "fine-tuned"
81
  },
82
  "mistral-7b-v0.1": {
83
  "compute_dtype": "bfloat16",
84
  "model_name": "mistralai/Mistral-7B-v0.1",
85
  "num_parameters": 7241732096,
86
+ "quantization": "8-bit",
87
+ "model_type": "pretrained",
88
+ "dutch_coverage": "none"
89
  },
90
  "neural-chat-7b-v3-1": {
91
  "compute_dtype": "bfloat16",
92
  "model_name": "Intel/neural-chat-7b-v3-1",
93
  "num_parameters": 7241732096,
94
+ "quantization": "8-bit",
95
+ "model_type": "RL-tuned",
96
+ "dutch_coverage": "none"
97
  },
98
  "orca-2-13b": {
99
  "compute_dtype": "bfloat16",
100
  "model_name": "microsoft/Orca-2-13b",
101
  "num_parameters": 13015895040,
102
+ "quantization": "8-bit",
103
+ "model_type": "fine-tuned",
104
+ "dutch_coverage": "none"
105
  },
106
  "orca-2-7b": {
107
  "compute_dtype": "bfloat16",
108
  "model_name": "microsoft/Orca-2-7b",
109
  "num_parameters": 6738440192,
110
+ "quantization": "8-bit",
111
+ "model_type": "fine-tuned",
112
+ "dutch_coverage": "none"
113
  },
114
  "zephyr-7b-beta": {
115
  "compute_dtype": "bfloat16",
116
  "model_name": "HuggingFaceH4/zephyr-7b-beta",
117
  "num_parameters": 7241732096,
118
+ "quantization": "8-bit",
119
+ "model_type": "RL-tuned",
120
+ "dutch_coverage": "none"
121
  }
122
  }
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ gradio==4.8.0