Spaces:
Running
Running
Jae-Won Chung
commited on
Commit
•
b19efcb
1
Parent(s):
8a69511
Add Salesforce/xgen-7b-8k-inst to model list
Browse files- pegasus/benchmark.yaml +1 -0
- pegasus/nlp-eval.yaml +3 -0
pegasus/benchmark.yaml
CHANGED
@@ -25,6 +25,7 @@
|
|
25 |
- project-baize/baize-v2-7B
|
26 |
- StabilityAI/stablelm-tuned-alpha-7b
|
27 |
- togethercomputer/RedPajama-INCITE-7B-Chat
|
|
|
28 |
task:
|
29 |
- chat
|
30 |
- chat-concise
|
|
|
25 |
- project-baize/baize-v2-7B
|
26 |
- StabilityAI/stablelm-tuned-alpha-7b
|
27 |
- togethercomputer/RedPajama-INCITE-7B-Chat
|
28 |
+
- Salesforce/xgen-7b-8k-inst
|
29 |
task:
|
30 |
- chat
|
31 |
- chat-concise
|
pegasus/nlp-eval.yaml
CHANGED
@@ -20,6 +20,7 @@
|
|
20 |
- StabilityAI/stablelm-tuned-alpha-7b
|
21 |
- togethercomputer/RedPajama-INCITE-7B-Chat
|
22 |
- RWKV/rwkv-raven-7b
|
|
|
23 |
|
24 |
- command:
|
25 |
- docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-causal-experimental --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks hellaswag --num_fewshot 10 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}/hellaswag.json
|
@@ -43,6 +44,7 @@
|
|
43 |
- StabilityAI/stablelm-tuned-alpha-7b
|
44 |
- togethercomputer/RedPajama-INCITE-7B-Chat
|
45 |
- RWKV/rwkv-raven-7b
|
|
|
46 |
|
47 |
- command:
|
48 |
- docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-causal-experimental --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks truthfulqa_mc --num_fewshot 0 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}/truthfulqa_mc.json
|
@@ -66,6 +68,7 @@
|
|
66 |
- StabilityAI/stablelm-tuned-alpha-7b
|
67 |
- togethercomputer/RedPajama-INCITE-7B-Chat
|
68 |
- RWKV/rwkv-raven-7b
|
|
|
69 |
|
70 |
- command:
|
71 |
- docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-seq2seq --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks arc_challenge --num_fewshot 25 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}/arc_challenge.json
|
|
|
20 |
- StabilityAI/stablelm-tuned-alpha-7b
|
21 |
- togethercomputer/RedPajama-INCITE-7B-Chat
|
22 |
- RWKV/rwkv-raven-7b
|
23 |
+
- Salesforce/xgen-7b-8k-inst
|
24 |
|
25 |
- command:
|
26 |
- docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-causal-experimental --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks hellaswag --num_fewshot 10 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}/hellaswag.json
|
|
|
44 |
- StabilityAI/stablelm-tuned-alpha-7b
|
45 |
- togethercomputer/RedPajama-INCITE-7B-Chat
|
46 |
- RWKV/rwkv-raven-7b
|
47 |
+
- Salesforce/xgen-7b-8k-inst
|
48 |
|
49 |
- command:
|
50 |
- docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-causal-experimental --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks truthfulqa_mc --num_fewshot 0 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}/truthfulqa_mc.json
|
|
|
68 |
- StabilityAI/stablelm-tuned-alpha-7b
|
69 |
- togethercomputer/RedPajama-INCITE-7B-Chat
|
70 |
- RWKV/rwkv-raven-7b
|
71 |
+
- Salesforce/xgen-7b-8k-inst
|
72 |
|
73 |
- command:
|
74 |
- docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-seq2seq --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks arc_challenge --num_fewshot 25 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}/arc_challenge.json
|