open_dutch_llm_leaderboard

Running

App Files Files Community

Bram Vanroy commited on Nov 24, 2023

Commit

863e074

•

1 Parent(s): 095087c

update with only Dutch

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

app.py +54 -60
css.py +2 -2
evals/arc/arc_ca-bloom-7b1.json +0 -23
evals/arc/arc_ca-llama-7B.json +0 -23
evals/arc/arc_da-bloom-7b1.json +0 -23
evals/arc/arc_da-llama-7B.json +0 -23
evals/arc/arc_de-bloom-7b1.json +0 -23
evals/arc/arc_de-llama-7B.json +0 -23
evals/arc/arc_es-bloom-7b1.json +0 -23
evals/arc/arc_es-llama-7B.json +0 -23
evals/arc/arc_eu-bloom-7b1.json +0 -23
evals/arc/arc_eu-llama-7B.json +0 -23
evals/arc/arc_fr-bloom-7b1.json +0 -23
evals/arc/arc_fr-llama-7B.json +0 -23
evals/arc/arc_gu-bloom-7b1.json +0 -23
evals/arc/arc_gu-llama-7B.json +0 -23
evals/arc/arc_hi-bloom-7b1.json +0 -23
evals/arc/arc_hi-llama-7B.json +0 -23
evals/arc/arc_hr-bloom-7b1.json +0 -23
evals/arc/arc_hr-llama-7B.json +0 -23
evals/arc/arc_hu-bloom-7b1.json +0 -23
evals/arc/arc_hu-llama-7B.json +0 -23
evals/arc/arc_hy-bloom-7b1.json +0 -23
evals/arc/arc_hy-llama-7B.json +0 -23
evals/arc/arc_id-bloom-7b1.json +0 -23
evals/arc/arc_id-llama-7B.json +0 -23
evals/arc/arc_it-bloom-7b1.json +0 -23
evals/arc/arc_it-llama-7B.json +0 -23
evals/arc/arc_kn-bloom-7b1.json +0 -23
evals/arc/arc_kn-llama-7B.json +0 -23
evals/arc/arc_ml-bloom-7b1.json +0 -23
evals/arc/arc_ml-llama-7B.json +0 -23
evals/arc/arc_mr-bloom-7b1.json +0 -23
evals/arc/arc_mr-llama-7B.json +0 -23
evals/arc/arc_ne-bloom-7b1.json +0 -23
evals/arc/arc_ne-llama-7B.json +0 -23
evals/arc/arc_nl_Llama-2-7b-chat-hf.json +23 -0
evals/{mmlu/mmlu_gu-bloom-7b1.json → arc/arc_nl_Llama-2-7b-hf.json} +8 -8
evals/arc/{arc_ar-llama-7B.json → arc_nl_Mistral-7B-v0.1.json} +8 -8
evals/arc/{arc_bn-bloom-7b1.json → arc_nl_zephyr-7b-beta.json} +8 -8
evals/arc/arc_pt-bloom-7b1.json +0 -23
evals/arc/arc_pt-llama-7B.json +0 -23
evals/arc/arc_ro-bloom-7b1.json +0 -23
evals/arc/arc_ro-llama-7B.json +0 -23
evals/arc/arc_ru-bloom-7b1.json +0 -23
evals/arc/arc_ru-llama-7B.json +0 -23
evals/arc/arc_sk-bloom-7b1.json +0 -23
evals/arc/arc_sk-llama-7B.json +0 -23
evals/arc/arc_sr-bloom-7b1.json +0 -23
evals/arc/arc_sr-llama-7B.json +0 -23

app.py CHANGED Viewed

@@ -1,9 +1,10 @@
-import os
 import json
-import glob
 from collections import defaultdict
 import pandas as pd
 import gradio as gr
 from content import *
 from css import *
 import glob
@@ -16,74 +17,74 @@ BENCHMARKS = [ARC, HELLASWAG, MMLU, TRUTHFULQA]
 METRICS = ["acc_norm", "acc_norm", "acc_norm", "mc2"]
-LANGS = 'ar,bn,ca,da,de,es,eu,fr,gu,hi,hr,hu,hy,id,it,kn,ml,mr,ne,nl,pt,ro,ru,sk,sr,sv,ta,te,uk,vi,zh'.split(',')
 LANG_NAME = {
-    'ar': 'Arabic',
-    'bn': 'Bengali',
-    'ca': 'Catalan',
-    'da': 'Danish',
-    'de': 'German',
-    'es': 'Spanish',
-    'eu': 'Basque',
-    'fr': 'French',
-    'gu': 'Gujarati',
-    'hi': 'Hindi',
-    'hr': 'Croatian',
-    'hu': 'Hungarian',
-    'hy': 'Armenian',
-    'id': 'Indonesian',
-    'it': 'Italian',
-    'kn': 'Kannada',
-    'ml': 'Malayalam',
-    'mr': 'Marathi',
-    'ne': 'Nepali',
-    'nl': 'Dutch',
-    'pt': 'Portuguese',
-    'ro': 'Romanian',
-    'ru': 'Russian',
-    'sk': 'Slovak',
-    'sr': 'Serbian',
-    'sv': 'Swedish',
-    'ta': 'Tamil',
-    'te': 'Telugu',
-    'uk': 'Ukrainian',
-    'vi': 'Vietnamese',
-    'zh': 'Chinese'
 }
 def collect_results():
     performance_dict = defaultdict(dict)
     pretrained_models = set()
-    for file in glob.glob('evals/*/*.json'):
-        with open(file, 'r') as f:
-            data = json.load(f)
-        if 'results' not in data:
             continue
-        if 'config' not in data:
             continue
-        results = data['results']
-        config = data['config']
-        if 'model_args' not in config:
             continue
-        model_args = config['model_args'].split(',')
-        pretrained = [x for x in model_args if x.startswith('pretrained=')]
         if len(pretrained) != 1:
             continue
-        pretrained = pretrained[0].split('=')[1]
-        pretrained = pretrained.split('/')[-1]
         pretrained_models.add(pretrained)
         for lang_task, perfs in results.items():
-            task, lang = lang_task.split('_')
             assert task in BENCHMARKS
             if lang and task:
                 metric = METRICS[BENCHMARKS.index(task)]
                 p = round(perfs[metric] * 100, 1)
                 performance_dict[(pretrained, lang)][task] = p
     return performance_dict, pretrained_models
@@ -96,15 +97,13 @@ def get_leaderboard_df(performance_dict, pretrained_models):
         mmlu_perf = perfs.get(MMLU, 0.0)
         truthfulqa_perf = perfs.get(TRUTHFULQA, 0.0)
-        if arc_perf * hellaswag_perf * mmlu_perf * truthfulqa_perf == 0:
-            continue
         avg = round((arc_perf + hellaswag_perf + mmlu_perf + truthfulqa_perf) / 4, 1)
-        notes = ' '.join([pretrained, lang_name])
-        row = [pretrained, lang_name, lang, avg, arc_perf, hellaswag_perf, mmlu_perf, truthfulqa_perf, notes]
         df.append(row)
     df = pd.DataFrame.from_records(df, columns=COLS)
-    df = df.sort_values(by=[LANG_COL, AVERAGE_COL], ascending=False)
     df = df[COLS]
     return df
@@ -115,10 +114,7 @@ def search_table(df, query):
     return filtered_df
 MODEL_COL = "Model"
-LANG_COL = "Language"
-CODE_COL = "Code"
 AVERAGE_COL = "Average"
 ARC_COL = "ARC (25-shot)"
 HELLASWAG_COL = "HellaSwag (10-shot)️"
@@ -126,8 +122,8 @@ MMLU_COL = "MMLU (5-shot)"
 TRUTHFULQA_COL = "TruthfulQA (0-shot)"
 NOTES_COL = "Notes"  # For search only
-COLS = [MODEL_COL, LANG_COL, CODE_COL, AVERAGE_COL, ARC_COL, HELLASWAG_COL, MMLU_COL, TRUTHFULQA_COL, NOTES_COL]
-TYPES = ["str", "str", "str", "number", "number", "number", "number", "number", "str"]
 args = collect_results()
 original_df = get_leaderboard_df(*args)
@@ -139,9 +135,7 @@ with demo:
     gr.Markdown(HOW_TO, elem_classes="markdown-text")
     with gr.Box():
-        search_bar = gr.Textbox(
-            placeholder="Search models and languages...", show_label=False, elem_id="search-bar"
-        )
         leaderboard_table = gr.components.Dataframe(
             value=original_df,

 import json
 from collections import defaultdict
+from pathlib import Path
 import pandas as pd
 import gradio as gr
 from content import *
 from css import *
 import glob
 METRICS = ["acc_norm", "acc_norm", "acc_norm", "mc2"]
+LANGS = "ar,bn,ca,da,de,es,eu,fr,gu,hi,hr,hu,hy,id,it,kn,ml,mr,ne,nl,pt,ro,ru,sk,sr,sv,ta,te,uk,vi,zh".split(",")
 LANG_NAME = {
+    "ar": "Arabic",
+    "bn": "Bengali",
+    "ca": "Catalan",
+    "da": "Danish",
+    "de": "German",
+    "es": "Spanish",
+    "eu": "Basque",
+    "fr": "French",
+    "gu": "Gujarati",
+    "hi": "Hindi",
+    "hr": "Croatian",
+    "hu": "Hungarian",
+    "hy": "Armenian",
+    "id": "Indonesian",
+    "it": "Italian",
+    "kn": "Kannada",
+    "ml": "Malayalam",
+    "mr": "Marathi",
+    "ne": "Nepali",
+    "nl": "Dutch",
+    "pt": "Portuguese",
+    "ro": "Romanian",
+    "ru": "Russian",
+    "sk": "Slovak",
+    "sr": "Serbian",
+    "sv": "Swedish",
+    "ta": "Tamil",
+    "te": "Telugu",
+    "uk": "Ukrainian",
+    "vi": "Vietnamese",
+    "zh": "Chinese",
 }
 def collect_results():
     performance_dict = defaultdict(dict)
     pretrained_models = set()
+    for pfin in Path("evals").rglob("*.json"):
+        data = json.loads(pfin.read_text(encoding="utf-8"))
+        if "results" not in data:
             continue
+        if "config" not in data:
             continue
+        results = data["results"]
+        config = data["config"]
+        if "model_args" not in config:
             continue
+        model_args = config["model_args"].split(",")
+        pretrained = [x for x in model_args if x.startswith("pretrained=")]
         if len(pretrained) != 1:
             continue
+        pretrained = pretrained[0].split("=")[1]
+        pretrained = pretrained.split("/")[-1]
         pretrained_models.add(pretrained)
         for lang_task, perfs in results.items():
+            task, lang = lang_task.split("_")
             assert task in BENCHMARKS
             if lang and task:
                 metric = METRICS[BENCHMARKS.index(task)]
                 p = round(perfs[metric] * 100, 1)
                 performance_dict[(pretrained, lang)][task] = p
     return performance_dict, pretrained_models
         mmlu_perf = perfs.get(MMLU, 0.0)
         truthfulqa_perf = perfs.get(TRUTHFULQA, 0.0)
         avg = round((arc_perf + hellaswag_perf + mmlu_perf + truthfulqa_perf) / 4, 1)
+        notes = " ".join([pretrained, lang_name])
+        row = [pretrained, avg, arc_perf, hellaswag_perf, mmlu_perf, truthfulqa_perf, notes]
         df.append(row)
     df = pd.DataFrame.from_records(df, columns=COLS)
+    df = df.sort_values(by=[AVERAGE_COL], ascending=False)
     df = df[COLS]
     return df
     return filtered_df
 MODEL_COL = "Model"
 AVERAGE_COL = "Average"
 ARC_COL = "ARC (25-shot)"
 HELLASWAG_COL = "HellaSwag (10-shot)️"
 TRUTHFULQA_COL = "TruthfulQA (0-shot)"
 NOTES_COL = "Notes"  # For search only
+COLS = [MODEL_COL, AVERAGE_COL, ARC_COL, HELLASWAG_COL, MMLU_COL, TRUTHFULQA_COL, NOTES_COL]
+TYPES = ["str", "number", "number", "number", "number", "number", "str"]
 args = collect_results()
 original_df = get_leaderboard_df(*args)
     gr.Markdown(HOW_TO, elem_classes="markdown-text")
     with gr.Box():
+        search_bar = gr.Textbox(placeholder="Search models and languages...", show_label=False, elem_id="search-bar")
         leaderboard_table = gr.components.Dataframe(
             value=original_df,

css.py CHANGED Viewed

@@ -1,4 +1,4 @@
-CUSTOM_CSS= """
 /* Hides the final column */
 table td:last-child,
 table th:last-child {
@@ -10,4 +10,4 @@ table th:last-child {
 #     overflow: auto;
 #     white-space: nowrap;
 # }
-"""

+CUSTOM_CSS = """
 /* Hides the final column */
 table td:last-child,
 table th:last-child {
 #     overflow: auto;
 #     white-space: nowrap;
 # }
+"""

evals/arc/arc_ca-bloom-7b1.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_ca": {
-      "acc": 0.31989708404802747,
-      "acc_stderr": 0.01366562491926326,
-      "acc_norm": 0.34734133790737565,
-      "acc_norm_stderr": 0.013949489903701517
-    }
-  },
-  "versions": {
-    "arc_ca": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=bigscience/bloom-7b1",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_ca-llama-7B.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_ca": {
-      "acc": 0.3276157804459691,
-      "acc_stderr": 0.01375080741597368,
-      "acc_norm": 0.3507718696397942,
-      "acc_norm_stderr": 0.013981316936172217
-    }
-  },
-  "versions": {
-    "arc_ca": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_da-bloom-7b1.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_da": {
-      "acc": 0.20137103684661525,
-      "acc_stderr": 0.011744154502532795,
-      "acc_norm": 0.24592973436161097,
-      "acc_norm_stderr": 0.012611366681285752
-    }
-  },
-  "versions": {
-    "arc_da": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=bigscience/bloom-7b1",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_da-llama-7B.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_da": {
-      "acc": 0.286203941730934,
-      "acc_stderr": 0.013236574332463879,
-      "acc_norm": 0.3273350471293916,
-      "acc_norm_stderr": 0.013741887176251822
-    }
-  },
-  "versions": {
-    "arc_da": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_de-bloom-7b1.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_de": {
-      "acc": 0.22241231822070145,
-      "acc_stderr": 0.012168377742629776,
-      "acc_norm": 0.262617621899059,
-      "acc_norm_stderr": 0.01287617552045283
-    }
-  },
-  "versions": {
-    "arc_de": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=bigscience/bloom-7b1",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_de-llama-7B.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_de": {
-      "acc": 0.2951240376390077,
-      "acc_stderr": 0.013345572865502645,
-      "acc_norm": 0.35072711719418304,
-      "acc_norm_stderr": 0.013962940383743043
-    }
-  },
-  "versions": {
-    "arc_de": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_es-bloom-7b1.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_es": {
-      "acc": 0.3316239316239316,
-      "acc_stderr": 0.013769752111910177,
-      "acc_norm": 0.3811965811965812,
-      "acc_norm_stderr": 0.01420507709573084
-    }
-  },
-  "versions": {
-    "arc_es": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=bigscience/bloom-7b1",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_es-llama-7B.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_es": {
-      "acc": 0.3606837606837607,
-      "acc_stderr": 0.014044746572948867,
-      "acc_norm": 0.3683760683760684,
-      "acc_norm_stderr": 0.014108074259155369
-    }
-  },
-  "versions": {
-    "arc_es": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_eu-bloom-7b1.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_eu": {
-      "acc": 0.22056239015817222,
-      "acc_stderr": 0.01229634886589257,
-      "acc_norm": 0.2521968365553603,
-      "acc_norm_stderr": 0.012879032347922939
-    }
-  },
-  "versions": {
-    "arc_eu": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=bigscience/bloom-7b1",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_eu-llama-7B.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_eu": {
-      "acc": 0.20738137082601055,
-      "acc_stderr": 0.012023662461166562,
-      "acc_norm": 0.2451669595782074,
-      "acc_norm_stderr": 0.012757811738008544
-    }
-  },
-  "versions": {
-    "arc_eu": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_fr-bloom-7b1.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_fr": {
-      "acc": 0.32677502138579984,
-      "acc_stderr": 0.01372407602199982,
-      "acc_norm": 0.3669803250641574,
-      "acc_norm_stderr": 0.014102904772197396
-    }
-  },
-  "versions": {
-    "arc_fr": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=bigscience/bloom-7b1",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_fr-llama-7B.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_fr": {
-      "acc": 0.3473053892215569,
-      "acc_stderr": 0.013931226499492353,
-      "acc_norm": 0.3729683490162532,
-      "acc_norm_stderr": 0.014150093168782438
-    }
-  },
-  "versions": {
-    "arc_fr": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_gu-bloom-7b1.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_gu": {
-      "acc": 0.2206896551724138,
-      "acc_stderr": 0.012181604374453973,
-      "acc_norm": 0.2336206896551724,
-      "acc_norm_stderr": 0.012428989430945793
-    }
-  },
-  "versions": {
-    "arc_gu": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=bigscience/bloom-7b1",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_gu-llama-7B.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_gu": {
-      "acc": 0.2120689655172414,
-      "acc_stderr": 0.012007177871292825,
-      "acc_norm": 0.23189655172413792,
-      "acc_norm_stderr": 0.012396962423413033
-    }
-  },
-  "versions": {
-    "arc_gu": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_hi-bloom-7b1.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_hi": {
-      "acc": 0.2363013698630137,
-      "acc_stderr": 0.012435369590403731,
-      "acc_norm": 0.2919520547945205,
-      "acc_norm_stderr": 0.013309191484613488
-    }
-  },
-  "versions": {
-    "arc_hi": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=bigscience/bloom-7b1",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_hi-llama-7B.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_hi": {
-      "acc": 0.21232876712328766,
-      "acc_stderr": 0.011971304657273123,
-      "acc_norm": 0.25,
-      "acc_norm_stderr": 0.012675503164084846
-    }
-  },
-  "versions": {
-    "arc_hi": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_hr-bloom-7b1.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_hr": {
-      "acc": 0.19332763045337895,
-      "acc_stderr": 0.011555111310342437,
-      "acc_norm": 0.2369546621043627,
-      "acc_norm_stderr": 0.012441890624187792
-    }
-  },
-  "versions": {
-    "arc_hr": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=bigscience/bloom-7b1",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_hr-llama-7B.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_hr": {
-      "acc": 0.2754491017964072,
-      "acc_stderr": 0.01307174925264165,
-      "acc_norm": 0.330196749358426,
-      "acc_norm_stderr": 0.013760638974726852
-    }
-  },
-  "versions": {
-    "arc_hr": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_hu-bloom-7b1.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_hu": {
-      "acc": 0.1969178082191781,
-      "acc_stderr": 0.011640913614197496,
-      "acc_norm": 0.2585616438356164,
-      "acc_norm_stderr": 0.0128169339627777
-    }
-  },
-  "versions": {
-    "arc_hu": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=bigscience/bloom-7b1",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_hu-llama-7B.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_hu": {
-      "acc": 0.2517123287671233,
-      "acc_stderr": 0.012704310825494622,
-      "acc_norm": 0.2979452054794521,
-      "acc_norm_stderr": 0.013388079339102703
-    }
-  },
-  "versions": {
-    "arc_hu": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_hy-bloom-7b1.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_hy": {
-      "acc": 0.21181818181818182,
-      "acc_stderr": 0.01232525683396216,
-      "acc_norm": 0.26181818181818184,
-      "acc_norm_stderr": 0.013261197012809796
-    }
-  },
-  "versions": {
-    "arc_hy": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=bigscience/bloom-7b1",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_hy-llama-7B.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_hy": {
-      "acc": 0.19454545454545455,
-      "acc_stderr": 0.011940766785664334,
-      "acc_norm": 0.2718181818181818,
-      "acc_norm_stderr": 0.013420241182110736
-    }
-  },
-  "versions": {
-    "arc_hy": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_id-bloom-7b1.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_id": {
-      "acc": 0.3128205128205128,
-      "acc_stderr": 0.013560492090917607,
-      "acc_norm": 0.3598290598290598,
-      "acc_norm_stderr": 0.014037469945597791
-    }
-  },
-  "versions": {
-    "arc_id": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=bigscience/bloom-7b1",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_id-llama-7B.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_id": {
-      "acc": 0.19316239316239317,
-      "acc_stderr": 0.011546413314069014,
-      "acc_norm": 0.26666666666666666,
-      "acc_norm_stderr": 0.012933850109759573
-    }
-  },
-  "versions": {
-    "arc_id": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_it-bloom-7b1.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_it": {
-      "acc": 0.24037639007698888,
-      "acc_stderr": 0.01250327289928353,
-      "acc_norm": 0.28999144568006846,
-      "acc_norm_stderr": 0.01327709194338097
-    }
-  },
-  "versions": {
-    "arc_it": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=bigscience/bloom-7b1",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_it-llama-7B.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_it": {
-      "acc": 0.31736526946107785,
-      "acc_stderr": 0.013619227292898307,
-      "acc_norm": 0.3575705731394354,
-      "acc_norm_stderr": 0.014024008839912006
-    }
-  },
-  "versions": {
-    "arc_it": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_kn-bloom-7b1.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_kn": {
-      "acc": 0.2221254355400697,
-      "acc_stderr": 0.012273607270054452,
-      "acc_norm": 0.24738675958188153,
-      "acc_norm_stderr": 0.012740675198098838
-    }
-  },
-  "versions": {
-    "arc_kn": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=bigscience/bloom-7b1",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_kn-llama-7B.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_kn": {
-      "acc": 0.20470383275261325,
-      "acc_stderr": 0.011913674295957856,
-      "acc_norm": 0.24738675958188153,
-      "acc_norm_stderr": 0.012740675198098834
-    }
-  },
-  "versions": {
-    "arc_kn": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_ml-bloom-7b1.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_ml": {
-      "acc": 0.2075306479859895,
-      "acc_stderr": 0.01200575665793095,
-      "acc_norm": 0.2635726795096322,
-      "acc_norm_stderr": 0.013042844591075362
-    }
-  },
-  "versions": {
-    "arc_ml": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=bigscience/bloom-7b1",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_ml-llama-7B.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_ml": {
-      "acc": 0.21628721541155868,
-      "acc_stderr": 0.012188522634632977,
-      "acc_norm": 0.27845884413309985,
-      "acc_norm_stderr": 0.013269918016014967
-    }
-  },
-  "versions": {
-    "arc_ml": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_mr-bloom-7b1.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_mr": {
-      "acc": 0.23376623376623376,
-      "acc_stderr": 0.012458582396003653,
-      "acc_norm": 0.2727272727272727,
-      "acc_norm_stderr": 0.013110221561502926
-    }
-  },
-  "versions": {
-    "arc_mr": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=bigscience/bloom-7b1",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_mr-llama-7B.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_mr": {
-      "acc": 0.2051948051948052,
-      "acc_stderr": 0.011888050053276677,
-      "acc_norm": 0.2545454545454545,
-      "acc_norm_stderr": 0.012823020964319998
-    }
-  },
-  "versions": {
-    "arc_mr": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_ne-bloom-7b1.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_ne": {
-      "acc": 0.21300256629597947,
-      "acc_stderr": 0.01198002307808546,
-      "acc_norm": 0.223267750213858,
-      "acc_norm_stderr": 0.012185048029719049
-    }
-  },
-  "versions": {
-    "arc_ne": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=bigscience/bloom-7b1",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_ne-llama-7B.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_ne": {
-      "acc": 0.2172797262617622,
-      "acc_stderr": 0.012066782166932105,
-      "acc_norm": 0.24294268605645852,
-      "acc_norm_stderr": 0.012548588352773893
-    }
-  },
-  "versions": {
-    "arc_ne": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_nl_Llama-2-7b-chat-hf.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "results": {
+    "arc_nl": {
+      "acc": 0.3609923011120616,
+      "acc_stderr": 0.014053373664144792,
+      "acc_norm": 0.3618477331052181,
+      "acc_norm_stderr": 0.014060593893704966
+    }
+  },
+  "versions": {
+    "arc_nl": 0
+  },
+  "config": {
+    "model": "hf-auto",
+    "model_args": "pretrained=meta-llama/Llama-2-7b-chat-hf,use_accelerate=True,device_map_option=auto,dtype=bfloat16",
+    "batch_size": 8,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}

evals/{mmlu/mmlu_gu-bloom-7b1.json → arc/arc_nl_Llama-2-7b-hf.json} RENAMED Viewed

@@ -1,19 +1,19 @@
 {
   "results": {
-    "mmlu_gu": {
-      "acc": 0.24933390631714655,
-      "acc_stderr": 0.004010971174274014,
-      "acc_norm": 0.26566394499355395,
-      "acc_norm_stderr": 0.004094955673385403
     }
   },
   "versions": {
-    "mmlu_gu": 0
   },
   "config": {
     "model": "hf-auto",
-    "model_args": "pretrained=bigscience/bloom-7b1",
-    "batch_size": 1,
     "device": "cuda",
     "no_cache": false,
     "limit": null,

 {
   "results": {
+    "arc_nl": {
+      "acc": 0.33704020530367834,
+      "acc_stderr": 0.013831300903580639,
+      "acc_norm": 0.3567151411462789,
+      "acc_norm_stderr": 0.014016546277185005
     }
   },
   "versions": {
+    "arc_nl": 0
   },
   "config": {
     "model": "hf-auto",
+    "model_args": "pretrained=meta-llama/Llama-2-7b-hf,use_accelerate=True,device_map_option=auto,dtype=bfloat16",
+    "batch_size": 8,
     "device": "cuda",
     "no_cache": false,
     "limit": null,

evals/arc/{arc_ar-llama-7B.json → arc_nl_Mistral-7B-v0.1.json} RENAMED Viewed

@@ -1,19 +1,19 @@
 {
   "results": {
-    "arc_ar": {
-      "acc": 0.19760479041916168,
-      "acc_stderr": 0.011651221980953499,
-      "acc_norm": 0.24636441402908468,
-      "acc_norm_stderr": 0.012608059960468694
     }
   },
   "versions": {
-    "arc_ar": 0
   },
   "config": {
     "model": "hf-auto",
-    "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B",
-    "batch_size": 1,
     "device": "cuda",
     "no_cache": false,
     "limit": null,

 {
   "results": {
+    "arc_nl": {
+      "acc": 0.42087254063301965,
+      "acc_stderr": 0.014445778557368833,
+      "acc_norm": 0.4294268605645851,
+      "acc_norm_stderr": 0.014483677397351059
     }
   },
   "versions": {
+    "arc_nl": 0
   },
   "config": {
     "model": "hf-auto",
+    "model_args": "pretrained=mistralai/Mistral-7B-v0.1,use_accelerate=True,device_map_option=auto,dtype=bfloat16",
+    "batch_size": 8,
     "device": "cuda",
     "no_cache": false,
     "limit": null,

evals/arc/{arc_bn-bloom-7b1.json → arc_nl_zephyr-7b-beta.json} RENAMED Viewed

@@ -1,19 +1,19 @@
 {
   "results": {
-    "arc_bn": {
-      "acc": 0.22412318220701455,
-      "acc_stderr": 0.012201644195165715,
-      "acc_norm": 0.2617621899059025,
-      "acc_norm_stderr": 0.012862641889254466
     }
   },
   "versions": {
-    "arc_bn": 0
   },
   "config": {
     "model": "hf-auto",
-    "model_args": "pretrained=bigscience/bloom-7b1",
-    "batch_size": 1,
     "device": "cuda",
     "no_cache": false,
     "limit": null,

 {
   "results": {
+    "arc_nl": {
+      "acc": 0.43798118049615054,
+      "acc_stderr": 0.01451716231691793,
+      "acc_norm": 0.4328485885372113,
+      "acc_norm_stderr": 0.01449759923259859
     }
   },
   "versions": {
+    "arc_nl": 0
   },
   "config": {
     "model": "hf-auto",
+    "model_args": "pretrained=HuggingFaceH4/zephyr-7b-beta,use_accelerate=True,device_map_option=auto,dtype=bfloat16",
+    "batch_size": 8,
     "device": "cuda",
     "no_cache": false,
     "limit": null,

evals/arc/arc_pt-bloom-7b1.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_pt": {
-      "acc": 0.3401709401709402,
-      "acc_stderr": 0.013856612397310694,
-      "acc_norm": 0.4,
-      "acc_norm_stderr": 0.014328422047021531
-    }
-  },
-  "versions": {
-    "arc_pt": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=bigscience/bloom-7b1",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_pt-llama-7B.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_pt": {
-      "acc": 0.3367521367521368,
-      "acc_stderr": 0.01382247630777062,
-      "acc_norm": 0.37777777777777777,
-      "acc_norm_stderr": 0.014180244103534094
-    }
-  },
-  "versions": {
-    "arc_pt": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_ro-bloom-7b1.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_ro": {
-      "acc": 0.2099400171379606,
-      "acc_stderr": 0.011926921791273557,
-      "acc_norm": 0.26906598114824337,
-      "acc_norm_stderr": 0.012987310039914976
-    }
-  },
-  "versions": {
-    "arc_ro": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=bigscience/bloom-7b1",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_ro-llama-7B.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_ro": {
-      "acc": 0.30077120822622105,
-      "acc_stderr": 0.013430077114209907,
-      "acc_norm": 0.32390745501285345,
-      "acc_norm_stderr": 0.013704533924425027
-    }
-  },
-  "versions": {
-    "arc_ro": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_ru-bloom-7b1.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_ru": {
-      "acc": 0.21043627031650983,
-      "acc_stderr": 0.01192703439080346,
-      "acc_norm": 0.2754491017964072,
-      "acc_norm_stderr": 0.01307174925264165
-    }
-  },
-  "versions": {
-    "arc_ru": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=bigscience/bloom-7b1",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_ru-llama-7B.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_ru": {
-      "acc": 0.2934131736526946,
-      "acc_stderr": 0.013322973103306575,
-      "acc_norm": 0.32078699743370404,
-      "acc_norm_stderr": 0.013658089444975752
-    }
-  },
-  "versions": {
-    "arc_ru": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_sk-bloom-7b1.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_sk": {
-      "acc": 0.20359281437125748,
-      "acc_stderr": 0.011782227020010716,
-      "acc_norm": 0.24893071000855432,
-      "acc_norm_stderr": 0.012651960282598879
-    }
-  },
-  "versions": {
-    "arc_sk": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=bigscience/bloom-7b1",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_sk-llama-7B.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_sk": {
-      "acc": 0.23609923011120615,
-      "acc_stderr": 0.012426371635795894,
-      "acc_norm": 0.28999144568006846,
-      "acc_norm_stderr": 0.013277091943380979
-    }
-  },
-  "versions": {
-    "arc_sk": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_sr-bloom-7b1.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_sr": {
-      "acc": 0.2172797262617622,
-      "acc_stderr": 0.012066782166932079,
-      "acc_norm": 0.25149700598802394,
-      "acc_norm_stderr": 0.01269526466186626
-    }
-  },
-  "versions": {
-    "arc_sr": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=bigscience/bloom-7b1",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_sr-llama-7B.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_sr": {
-      "acc": 0.25748502994011974,
-      "acc_stderr": 0.012794024494042348,
-      "acc_norm": 0.30795551753635586,
-      "acc_norm_stderr": 0.013507954174822524
-    }
-  },
-  "versions": {
-    "arc_sr": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}