Bram Vanroy commited on
Commit
351f9fe
1 Parent(s): b1e6575

add correct evals:

Browse files
Files changed (1) hide show
  1. evals/models.json +53 -45
evals/models.json CHANGED
@@ -1,122 +1,130 @@
1
  {
2
  "gpt-neo-1.3b-dutch": {
3
  "compute_dtype": "bfloat16",
 
4
  "model_name": "yhavinga/gpt-neo-1.3B-dutch",
5
- "num_parameters": 1315575808,
6
- "quantization": "8-bit",
7
  "model_type": "pretrained",
8
- "dutch_coverage": "pretrained"
 
9
  },
10
  "gpt-neo-125m-dutch": {
11
  "compute_dtype": "bfloat16",
 
12
  "model_name": "yhavinga/gpt-neo-125M-dutch",
13
- "num_parameters": 125198592,
14
- "quantization": "8-bit",
15
  "model_type": "pretrained",
16
- "dutch_coverage": "pretrained"
 
17
  },
18
  "gpt2-large-dutch": {
19
  "compute_dtype": "bfloat16",
 
20
  "model_name": "yhavinga/gpt2-large-dutch",
21
- "num_parameters": 774030080,
22
- "quantization": "8-bit",
23
  "model_type": "pretrained",
24
- "dutch_coverage": "pretrained"
 
25
  },
26
  "gpt2-medium-dutch": {
27
  "compute_dtype": "bfloat16",
 
28
  "model_name": "yhavinga/gpt2-medium-dutch",
29
- "num_parameters": 354823168,
30
- "quantization": "8-bit",
31
  "model_type": "pretrained",
32
- "dutch_coverage": "pretrained"
 
33
  },
34
  "llama-2-13b-chat-dutch": {
35
  "compute_dtype": "bfloat16",
 
36
  "model_name": "BramVanroy/Llama-2-13b-chat-dutch",
37
- "num_parameters": 13015864320,
38
- "quantization": "8-bit",
39
  "model_type": "instruction-tuned",
40
- "dutch_coverage": "fine-tuned"
 
41
  },
42
  "llama-2-13b-chat-hf": {
43
  "compute_dtype": "bfloat16",
 
44
  "model_name": "meta-llama/Llama-2-13b-chat-hf",
45
- "num_parameters": 13015864320,
46
- "quantization": "8-bit",
47
  "model_type": "instruction-tuned",
48
- "dutch_coverage": "none"
 
49
  },
50
  "llama-2-13b-hf": {
51
  "compute_dtype": "bfloat16",
 
52
  "model_name": "meta-llama/Llama-2-13b-hf",
53
- "num_parameters": 13015864320,
54
- "quantization": "8-bit",
55
  "model_type": "pretrained",
56
- "dutch_coverage": "none"
 
57
  },
58
  "llama-2-7b-chat-hf": {
59
  "compute_dtype": "bfloat16",
 
60
  "model_name": "meta-llama/Llama-2-7b-chat-hf",
61
- "num_parameters": 6738415616,
62
- "quantization": "8-bit",
63
  "model_type": "instruction-tuned",
64
- "dutch_coverage": "none"
 
65
  },
66
  "llama-2-7b-hf": {
67
  "compute_dtype": "bfloat16",
 
68
  "model_name": "meta-llama/Llama-2-7b-hf",
69
- "num_parameters": 6738415616,
70
- "quantization": "8-bit",
71
  "model_type": "pretrained",
72
- "dutch_coverage": "none"
 
73
  },
74
  "llama2-13b-ft-mc4_nl_cleaned_tiny": {
75
  "compute_dtype": "bfloat16",
 
76
  "model_name": "BramVanroy/llama2-13b-ft-mc4_nl_cleaned_tiny",
77
- "num_parameters": 13015864320,
78
- "quantization": "8-bit",
79
  "model_type": "fine-tuned",
80
- "dutch_coverage": "fine-tuned"
 
81
  },
82
  "mistral-7b-v0.1": {
83
  "compute_dtype": "bfloat16",
 
84
  "model_name": "mistralai/Mistral-7B-v0.1",
85
- "num_parameters": 7241732096,
86
- "quantization": "8-bit",
87
  "model_type": "pretrained",
88
- "dutch_coverage": "none"
 
 
 
 
 
 
 
 
 
89
  },
90
  "neural-chat-7b-v3-1": {
91
  "compute_dtype": "bfloat16",
 
92
  "model_name": "Intel/neural-chat-7b-v3-1",
93
- "num_parameters": 7241732096,
94
- "quantization": "8-bit",
95
  "model_type": "RL-tuned",
96
- "dutch_coverage": "none"
 
97
  },
98
  "orca-2-13b": {
99
  "compute_dtype": "bfloat16",
 
100
  "model_name": "microsoft/Orca-2-13b",
101
- "num_parameters": 13015895040,
102
- "quantization": "8-bit",
103
  "model_type": "fine-tuned",
104
- "dutch_coverage": "none"
 
105
  },
106
  "orca-2-7b": {
107
  "compute_dtype": "bfloat16",
 
108
  "model_name": "microsoft/Orca-2-7b",
109
- "num_parameters": 6738440192,
110
- "quantization": "8-bit",
111
  "model_type": "fine-tuned",
112
- "dutch_coverage": "none"
 
113
  },
114
  "zephyr-7b-beta": {
115
  "compute_dtype": "bfloat16",
 
116
  "model_name": "HuggingFaceH4/zephyr-7b-beta",
117
- "num_parameters": 7241732096,
118
- "quantization": "8-bit",
119
  "model_type": "RL-tuned",
120
- "dutch_coverage": "none"
 
121
  }
122
  }
 
1
  {
2
  "gpt-neo-1.3b-dutch": {
3
  "compute_dtype": "bfloat16",
4
+ "dutch_coverage": "pretrained",
5
  "model_name": "yhavinga/gpt-neo-1.3B-dutch",
 
 
6
  "model_type": "pretrained",
7
+ "num_parameters": 1315575808,
8
+ "quantization": "8-bit"
9
  },
10
  "gpt-neo-125m-dutch": {
11
  "compute_dtype": "bfloat16",
12
+ "dutch_coverage": "pretrained",
13
  "model_name": "yhavinga/gpt-neo-125M-dutch",
 
 
14
  "model_type": "pretrained",
15
+ "num_parameters": 125198592,
16
+ "quantization": "8-bit"
17
  },
18
  "gpt2-large-dutch": {
19
  "compute_dtype": "bfloat16",
20
+ "dutch_coverage": "pretrained",
21
  "model_name": "yhavinga/gpt2-large-dutch",
 
 
22
  "model_type": "pretrained",
23
+ "num_parameters": 774030080,
24
+ "quantization": "8-bit"
25
  },
26
  "gpt2-medium-dutch": {
27
  "compute_dtype": "bfloat16",
28
+ "dutch_coverage": "pretrained",
29
  "model_name": "yhavinga/gpt2-medium-dutch",
 
 
30
  "model_type": "pretrained",
31
+ "num_parameters": 354823168,
32
+ "quantization": "8-bit"
33
  },
34
  "llama-2-13b-chat-dutch": {
35
  "compute_dtype": "bfloat16",
36
+ "dutch_coverage": "fine-tuned",
37
  "model_name": "BramVanroy/Llama-2-13b-chat-dutch",
 
 
38
  "model_type": "instruction-tuned",
39
+ "num_parameters": 13015864320,
40
+ "quantization": "8-bit"
41
  },
42
  "llama-2-13b-chat-hf": {
43
  "compute_dtype": "bfloat16",
44
+ "dutch_coverage": "none",
45
  "model_name": "meta-llama/Llama-2-13b-chat-hf",
 
 
46
  "model_type": "instruction-tuned",
47
+ "num_parameters": 13015864320,
48
+ "quantization": "8-bit"
49
  },
50
  "llama-2-13b-hf": {
51
  "compute_dtype": "bfloat16",
52
+ "dutch_coverage": "none",
53
  "model_name": "meta-llama/Llama-2-13b-hf",
 
 
54
  "model_type": "pretrained",
55
+ "num_parameters": 13015864320,
56
+ "quantization": "8-bit"
57
  },
58
  "llama-2-7b-chat-hf": {
59
  "compute_dtype": "bfloat16",
60
+ "dutch_coverage": "none",
61
  "model_name": "meta-llama/Llama-2-7b-chat-hf",
 
 
62
  "model_type": "instruction-tuned",
63
+ "num_parameters": 6738415616,
64
+ "quantization": "8-bit"
65
  },
66
  "llama-2-7b-hf": {
67
  "compute_dtype": "bfloat16",
68
+ "dutch_coverage": "none",
69
  "model_name": "meta-llama/Llama-2-7b-hf",
 
 
70
  "model_type": "pretrained",
71
+ "num_parameters": 6738415616,
72
+ "quantization": "8-bit"
73
  },
74
  "llama2-13b-ft-mc4_nl_cleaned_tiny": {
75
  "compute_dtype": "bfloat16",
76
+ "dutch_coverage": "fine-tuned",
77
  "model_name": "BramVanroy/llama2-13b-ft-mc4_nl_cleaned_tiny",
 
 
78
  "model_type": "fine-tuned",
79
+ "num_parameters": 13015864320,
80
+ "quantization": "8-bit"
81
  },
82
  "mistral-7b-v0.1": {
83
  "compute_dtype": "bfloat16",
84
+ "dutch_coverage": "none",
85
  "model_name": "mistralai/Mistral-7B-v0.1",
 
 
86
  "model_type": "pretrained",
87
+ "num_parameters": 7241732096,
88
+ "quantization": "8-bit"
89
+ },
90
+ "mixtral-8x7b-v0.1": {
91
+ "compute_dtype": "auto",
92
+ "dutch_coverage": "not-given",
93
+ "model_name": "mistralai/Mixtral-8x7B-v0.1",
94
+ "model_type": "not-given",
95
+ "num_parameters": 46702792704,
96
+ "quantization": null
97
  },
98
  "neural-chat-7b-v3-1": {
99
  "compute_dtype": "bfloat16",
100
+ "dutch_coverage": "none",
101
  "model_name": "Intel/neural-chat-7b-v3-1",
 
 
102
  "model_type": "RL-tuned",
103
+ "num_parameters": 7241732096,
104
+ "quantization": "8-bit"
105
  },
106
  "orca-2-13b": {
107
  "compute_dtype": "bfloat16",
108
+ "dutch_coverage": "none",
109
  "model_name": "microsoft/Orca-2-13b",
 
 
110
  "model_type": "fine-tuned",
111
+ "num_parameters": 13015895040,
112
+ "quantization": "8-bit"
113
  },
114
  "orca-2-7b": {
115
  "compute_dtype": "bfloat16",
116
+ "dutch_coverage": "none",
117
  "model_name": "microsoft/Orca-2-7b",
 
 
118
  "model_type": "fine-tuned",
119
+ "num_parameters": 6738440192,
120
+ "quantization": "8-bit"
121
  },
122
  "zephyr-7b-beta": {
123
  "compute_dtype": "bfloat16",
124
+ "dutch_coverage": "none",
125
  "model_name": "HuggingFaceH4/zephyr-7b-beta",
 
 
126
  "model_type": "RL-tuned",
127
+ "num_parameters": 7241732096,
128
+ "quantization": "8-bit"
129
  }
130
  }