Spaces:

ml-energy
/

leaderboard

Running

App Files Files Community

Jae-Won Chung commited on Sep 4, 2023

Commit

b19efcb

•

1 Parent(s): 8a69511

Add Salesforce/xgen-7b-8k-inst to model list

Browse files

Files changed (2) hide show

pegasus/benchmark.yaml +1 -0
pegasus/nlp-eval.yaml +3 -0

pegasus/benchmark.yaml CHANGED Viewed

@@ -25,6 +25,7 @@
     - project-baize/baize-v2-7B
     - StabilityAI/stablelm-tuned-alpha-7b
     - togethercomputer/RedPajama-INCITE-7B-Chat
   task:
     - chat
     - chat-concise

     - project-baize/baize-v2-7B
     - StabilityAI/stablelm-tuned-alpha-7b
     - togethercomputer/RedPajama-INCITE-7B-Chat
+    - Salesforce/xgen-7b-8k-inst
   task:
     - chat
     - chat-concise

pegasus/nlp-eval.yaml CHANGED Viewed

@@ -20,6 +20,7 @@
     - StabilityAI/stablelm-tuned-alpha-7b
     - togethercomputer/RedPajama-INCITE-7B-Chat
     - RWKV/rwkv-raven-7b
 - command:
     - docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-causal-experimental --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks hellaswag --num_fewshot 10 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}/hellaswag.json
@@ -43,6 +44,7 @@
     - StabilityAI/stablelm-tuned-alpha-7b
     - togethercomputer/RedPajama-INCITE-7B-Chat
     - RWKV/rwkv-raven-7b
 - command:
     - docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-causal-experimental --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks truthfulqa_mc --num_fewshot 0 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}/truthfulqa_mc.json
@@ -66,6 +68,7 @@
     - StabilityAI/stablelm-tuned-alpha-7b
     - togethercomputer/RedPajama-INCITE-7B-Chat
     - RWKV/rwkv-raven-7b
 - command:
     - docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-seq2seq --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks arc_challenge --num_fewshot 25 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}/arc_challenge.json

     - StabilityAI/stablelm-tuned-alpha-7b
     - togethercomputer/RedPajama-INCITE-7B-Chat
     - RWKV/rwkv-raven-7b
+    - Salesforce/xgen-7b-8k-inst
 - command:
     - docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-causal-experimental --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks hellaswag --num_fewshot 10 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}/hellaswag.json
     - StabilityAI/stablelm-tuned-alpha-7b
     - togethercomputer/RedPajama-INCITE-7B-Chat
     - RWKV/rwkv-raven-7b
+    - Salesforce/xgen-7b-8k-inst
 - command:
     - docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-causal-experimental --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks truthfulqa_mc --num_fewshot 0 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}/truthfulqa_mc.json
     - StabilityAI/stablelm-tuned-alpha-7b
     - togethercomputer/RedPajama-INCITE-7B-Chat
     - RWKV/rwkv-raven-7b
+    - Salesforce/xgen-7b-8k-inst
 - command:
     - docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-seq2seq --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks arc_challenge --num_fewshot 25 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}/arc_challenge.json