Update src/about.py
Browse files- src/about.py +3 -3
src/about.py
CHANGED
@@ -15,12 +15,12 @@ class Task:
|
|
15 |
class Tasks(Enum):
|
16 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
17 |
task1 = Task("PeKA", "acc", "PeKA*")
|
18 |
-
task2 = Task("
|
19 |
task3 = Task("khayyam_challenge", "acc", "Khayyam Challenge")
|
20 |
task4 = Task("parsinlu_mc", "acc", "ParsiNLU MCQA")
|
21 |
task5 = Task("parsinlu_nli", "acc", "ParsiNLU NLI")
|
22 |
task6 = Task("parsinlu_qqp", "acc", "ParsiNLU QQP")
|
23 |
-
|
24 |
|
25 |
NUM_FEWSHOT = 0 # Change with your few shot
|
26 |
# ---------------------------------------------------
|
@@ -59,7 +59,7 @@ This benchmark can also be used by multilingual researchers to measure how well
|
|
59 |
We use our own framework to evaluate the models on the following benchmarks (TO BE RELEASED SOON).
|
60 |
### Tasks
|
61 |
- PeKA: Persian Knowledge Assesment (0-shot) - a set of multiple-choice questions that tests the level of native knowledge in Persian language in more 15 domains and categories: From art to history and geography, cinema, tv, sports, law and medicine, and much more.
|
62 |
-
- PK-BETS: Persian Bias Ethics Toxicity and Skills (0-shot) - a test of model's knowledge in Persian and its capability in linguistic skills such as Grammar and Praphrasing, and also questions examining the bias, ethics, and toxicity of the model.
|
63 |
- <a href="https://arxiv.org/abs/2404.06644" target="_blank"> Khayyam Challenge (Persian MMLU) </a> (0-shot) - comprising 20,805 four-choice questions (of which we use 20,776, removing questions that are longer than 200 words) sourced from 38 diverse tasks extracted from Persian examinations, spanning a wide spectrum of subjects, complexities, and ages
|
64 |
- <a href="https://arxiv.org/abs/2012.06154" target="_blank"> ParsiNLU MCQA </a> (0-shot) - a series of multiple-choice questions in domains of *literature*, *math & logic*, and *common knowledge*.
|
65 |
- <a href="https://arxiv.org/abs/2012.06154" target="_blank"> ParsiNLU NLI </a> (max[0,3,5,10]-shot) - a 3-way classification to determine whether a hypothesis sentence entails, contradicts, or is neutral with respect to a given premise sentence.
|
|
|
15 |
class Tasks(Enum):
|
16 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
17 |
task1 = Task("PeKA", "acc", "PeKA*")
|
18 |
+
task2 = Task("PKBETS MCQA", "acc", "PKBETS MCQA*")
|
19 |
task3 = Task("khayyam_challenge", "acc", "Khayyam Challenge")
|
20 |
task4 = Task("parsinlu_mc", "acc", "ParsiNLU MCQA")
|
21 |
task5 = Task("parsinlu_nli", "acc", "ParsiNLU NLI")
|
22 |
task6 = Task("parsinlu_qqp", "acc", "ParsiNLU QQP")
|
23 |
+
task7 = Task("persian_ARC", "acc", "Persian ARC")
|
24 |
|
25 |
NUM_FEWSHOT = 0 # Change with your few shot
|
26 |
# ---------------------------------------------------
|
|
|
59 |
We use our own framework to evaluate the models on the following benchmarks (TO BE RELEASED SOON).
|
60 |
### Tasks
|
61 |
- PeKA: Persian Knowledge Assesment (0-shot) - a set of multiple-choice questions that tests the level of native knowledge in Persian language in more 15 domains and categories: From art to history and geography, cinema, tv, sports, law and medicine, and much more.
|
62 |
+
- PK-BETS: Persian Knowledge: Bias Ethics Toxicity and Skills (0-shot) - a test of model's knowledge in Persian and its capability in linguistic skills such as Grammar and Praphrasing, and also questions examining the bias, ethics, and toxicity of the model.
|
63 |
- <a href="https://arxiv.org/abs/2404.06644" target="_blank"> Khayyam Challenge (Persian MMLU) </a> (0-shot) - comprising 20,805 four-choice questions (of which we use 20,776, removing questions that are longer than 200 words) sourced from 38 diverse tasks extracted from Persian examinations, spanning a wide spectrum of subjects, complexities, and ages
|
64 |
- <a href="https://arxiv.org/abs/2012.06154" target="_blank"> ParsiNLU MCQA </a> (0-shot) - a series of multiple-choice questions in domains of *literature*, *math & logic*, and *common knowledge*.
|
65 |
- <a href="https://arxiv.org/abs/2012.06154" target="_blank"> ParsiNLU NLI </a> (max[0,3,5,10]-shot) - a 3-way classification to determine whether a hypothesis sentence entails, contradicts, or is neutral with respect to a given premise sentence.
|