Muennighoff
commited on
Commit
•
899c702
1
Parent(s):
f5692b9
Add files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +1 -0
- config.json +31 -0
- evaluation_copawinostoryht/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhht/results.json +9 -0
- evaluation_copawinostoryht/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhht/results.json +9 -0
- evaluation_copawinostoryht/Muennighoff_xstory_cloze/zh/Generate_Ending_zhht/results.json +9 -0
- evaluation_copawinostoryht/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhht/results.json +9 -0
- evaluation_copawinostoryht/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhht/results.json +9 -0
- evaluation_copawinostoryht/Muennighoff_xwinograd/zh/Replace_zhht/results.json +9 -0
- evaluation_copawinostoryht/Muennighoff_xwinograd/zh/True_or_False_zhht/results.json +9 -0
- evaluation_copawinostoryht/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhht/results.json +9 -0
- evaluation_copawinostoryht/Muennighoff_xwinograd/zh/stand_for_zhht/results.json +9 -0
- evaluation_copawinostoryht/Muennighoff_xwinograd/zh/underscore_refer_to_zhht/results.json +9 -0
- evaluation_copawinostoryht/merged.csv +20 -0
- evaluation_copawinostoryht/merged.json +1 -0
- evaluation_copawinostoryht/xcopa/zh/C1_or_C2?_premise_zhht/results.json +9 -0
- evaluation_copawinostoryht/xcopa/zh/best_option_zhht/results.json +9 -0
- evaluation_copawinostoryht/xcopa/zh/cause_effect_zhht/results.json +9 -0
- evaluation_copawinostoryht/xcopa/zh/i_am_hesitating_zhht/results.json +9 -0
- evaluation_copawinostoryht/xcopa/zh/plausible_alternatives_zhht/results.json +9 -0
- evaluation_copawinostorymt/Muennighoff_xstory_cloze/ar/Answer_Given_options_armt/results.json +9 -0
- evaluation_copawinostorymt/Muennighoff_xstory_cloze/ar/Choose_Story_Ending_armt/results.json +9 -0
- evaluation_copawinostorymt/Muennighoff_xstory_cloze/ar/Generate_Ending_armt/results.json +9 -0
- evaluation_copawinostorymt/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending_armt/results.json +9 -0
- evaluation_copawinostorymt/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options_armt/results.json +9 -0
- evaluation_copawinostorymt/Muennighoff_xstory_cloze/es/Answer_Given_options_esmt/results.json +9 -0
- evaluation_copawinostorymt/Muennighoff_xstory_cloze/es/Choose_Story_Ending_esmt/results.json +9 -0
- evaluation_copawinostorymt/Muennighoff_xstory_cloze/es/Generate_Ending_esmt/results.json +9 -0
- evaluation_copawinostorymt/Muennighoff_xstory_cloze/es/Novel_Correct_Ending_esmt/results.json +9 -0
- evaluation_copawinostorymt/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options_esmt/results.json +9 -0
- evaluation_copawinostorymt/Muennighoff_xstory_cloze/eu/Answer_Given_options_eumt/results.json +9 -0
- evaluation_copawinostorymt/Muennighoff_xstory_cloze/eu/Choose_Story_Ending_eumt/results.json +9 -0
- evaluation_copawinostorymt/Muennighoff_xstory_cloze/eu/Generate_Ending_eumt/results.json +9 -0
- evaluation_copawinostorymt/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending_eumt/results.json +9 -0
- evaluation_copawinostorymt/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options_eumt/results.json +9 -0
- evaluation_copawinostorymt/Muennighoff_xstory_cloze/hi/Answer_Given_options_himt/results.json +9 -0
- evaluation_copawinostorymt/Muennighoff_xstory_cloze/hi/Choose_Story_Ending_himt/results.json +9 -0
- evaluation_copawinostorymt/Muennighoff_xstory_cloze/hi/Generate_Ending_himt/results.json +9 -0
- evaluation_copawinostorymt/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending_himt/results.json +9 -0
- evaluation_copawinostorymt/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options_himt/results.json +9 -0
- evaluation_copawinostorymt/Muennighoff_xstory_cloze/id/Answer_Given_options_idmt/results.json +9 -0
- evaluation_copawinostorymt/Muennighoff_xstory_cloze/id/Choose_Story_Ending_idmt/results.json +9 -0
- evaluation_copawinostorymt/Muennighoff_xstory_cloze/id/Generate_Ending_idmt/results.json +9 -0
- evaluation_copawinostorymt/Muennighoff_xstory_cloze/id/Novel_Correct_Ending_idmt/results.json +9 -0
- evaluation_copawinostorymt/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options_idmt/results.json +9 -0
- evaluation_copawinostorymt/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhmt/results.json +9 -0
- evaluation_copawinostorymt/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhmt/results.json +9 -0
- evaluation_copawinostorymt/Muennighoff_xstory_cloze/zh/Generate_Ending_zhmt/results.json +9 -0
- evaluation_copawinostorymt/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhmt/results.json +9 -0
- evaluation_copawinostorymt/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhmt/results.json +9 -0
- evaluation_copawinostorymt/Muennighoff_xwinograd/fr/Replace_frmt/results.json +9 -0
.gitattributes
CHANGED
@@ -30,3 +30,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
30 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
31 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
32 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
30 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
31 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
32 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
33 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
config.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"apply_residual_connection_post_layernorm": false,
|
3 |
+
"architectures": [
|
4 |
+
"BloomModel"
|
5 |
+
],
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"attention_softmax_in_fp32": true,
|
8 |
+
"bias_dropout_fusion": true,
|
9 |
+
"bos_token_id": 1,
|
10 |
+
"eos_token_id": 2,
|
11 |
+
"hidden_dropout": 0.0,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"layer_norm_epsilon": 1e-05,
|
14 |
+
"masked_softmax_fusion": true,
|
15 |
+
"model_type": "bloom",
|
16 |
+
"n_embed": 4096,
|
17 |
+
"n_inner": null,
|
18 |
+
"n_layer": 30,
|
19 |
+
"num_attention_heads": 32,
|
20 |
+
"offset_alibi": 100,
|
21 |
+
"pad_token_id": 3,
|
22 |
+
"pretraining_tp": 4,
|
23 |
+
"seq_length": 2048,
|
24 |
+
"skip_bias_add": true,
|
25 |
+
"skip_bias_add_qkv": false,
|
26 |
+
"slow_but_exact": false,
|
27 |
+
"transformers_version": "4.21.0.dev0",
|
28 |
+
"unk_token_id": 0,
|
29 |
+
"use_cache": true,
|
30 |
+
"vocab_size": 250880
|
31 |
+
}
|
evaluation_copawinostoryht/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhht/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "zh",
|
4 |
+
"template_name": "Answer Given options_zhht",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.7299801455989411
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Answer Given options_zhht', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostoryht/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhht/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "zh",
|
4 |
+
"template_name": "Choose Story Ending_zhht",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.8537392455327598
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Choose Story Ending_zhht', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostoryht/Muennighoff_xstory_cloze/zh/Generate_Ending_zhht/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "zh",
|
4 |
+
"template_name": "Generate Ending_zhht",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.6082064857710126
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Generate Ending_zhht', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostoryht/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhht/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "zh",
|
4 |
+
"template_name": "Novel Correct Ending_zhht",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.8246194573130378
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Novel Correct Ending_zhht', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostoryht/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhht/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "zh",
|
4 |
+
"template_name": "Story Continuation and Options_zhht",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.8166776968894772
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Story Continuation and Options_zhht', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostoryht/Muennighoff_xwinograd/zh/Replace_zhht/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xwinograd",
|
3 |
+
"dataset_config_name": "zh",
|
4 |
+
"template_name": "Replace_zhht",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.5972222222222222
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='Replace_zhht', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostoryht/Muennighoff_xwinograd/zh/True_or_False_zhht/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xwinograd",
|
3 |
+
"dataset_config_name": "zh",
|
4 |
+
"template_name": "True or False_zhht",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.5218253968253969
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='True or False_zhht', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostoryht/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhht/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xwinograd",
|
3 |
+
"dataset_config_name": "zh",
|
4 |
+
"template_name": "does underscore refer to_zhht",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.5059523809523809
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='does underscore refer to_zhht', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostoryht/Muennighoff_xwinograd/zh/stand_for_zhht/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xwinograd",
|
3 |
+
"dataset_config_name": "zh",
|
4 |
+
"template_name": "stand for_zhht",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.5059523809523809
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='stand for_zhht', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostoryht/Muennighoff_xwinograd/zh/underscore_refer_to_zhht/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xwinograd",
|
3 |
+
"dataset_config_name": "zh",
|
4 |
+
"template_name": "underscore refer to_zhht",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.5099206349206349
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='underscore refer to_zhht', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostoryht/merged.csv
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset,prompt,metric,value
|
2 |
+
xcopa_zh,C1 or C2? premise_zhht,accuracy,0.65
|
3 |
+
xcopa_zh,best_option_zhht,accuracy,0.77
|
4 |
+
xcopa_zh,cause_effect_zhht,accuracy,0.76
|
5 |
+
xcopa_zh,i_am_hesitating_zhht,accuracy,0.73
|
6 |
+
xcopa_zh,plausible_alternatives_zhht,accuracy,0.78
|
7 |
+
xcopa_zh,median,accuracy,0.76
|
8 |
+
xstory_cloze_zh,Answer Given options_zhht,accuracy,0.7299801455989411
|
9 |
+
xstory_cloze_zh,Choose Story Ending_zhht,accuracy,0.8537392455327598
|
10 |
+
xstory_cloze_zh,Generate Ending_zhht,accuracy,0.6082064857710126
|
11 |
+
xstory_cloze_zh,Novel Correct Ending_zhht,accuracy,0.8246194573130378
|
12 |
+
xstory_cloze_zh,Story Continuation and Options_zhht,accuracy,0.8166776968894772
|
13 |
+
xstory_cloze_zh,median,accuracy,0.8166776968894772
|
14 |
+
xwinograd_zh,Replace_zhht,accuracy,0.5972222222222222
|
15 |
+
xwinograd_zh,True or False_zhht,accuracy,0.5218253968253969
|
16 |
+
xwinograd_zh,does underscore refer to_zhht,accuracy,0.5059523809523809
|
17 |
+
xwinograd_zh,stand for_zhht,accuracy,0.5059523809523809
|
18 |
+
xwinograd_zh,underscore refer to_zhht,accuracy,0.5099206349206349
|
19 |
+
xwinograd_zh,median,accuracy,0.5099206349206349
|
20 |
+
multiple,average,multiple,0.6955327772700374
|
evaluation_copawinostoryht/merged.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"Muennighoff/xstory_cloze_zh": {"Answer Given options_zhht": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Answer Given options_zhht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.7299801455989411}, "template_name": "Answer Given options_zhht"}, "Choose Story Ending_zhht": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Choose Story Ending_zhht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8537392455327598}, "template_name": "Choose Story Ending_zhht"}, "Generate Ending_zhht": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Generate Ending_zhht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.6082064857710126}, "template_name": "Generate Ending_zhht"}, "Novel Correct Ending_zhht": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Novel Correct Ending_zhht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8246194573130378}, "template_name": "Novel Correct Ending_zhht"}, "Story Continuation and Options_zhht": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Story Continuation and Options_zhht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8166776968894772}, "template_name": "Story Continuation and Options_zhht"}}, "Muennighoff/xwinograd_zh": {"Replace_zhht": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='Replace_zhht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5972222222222222}, "template_name": "Replace_zhht"}, "True or False_zhht": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='True or False_zhht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5218253968253969}, "template_name": "True or False_zhht"}, "does underscore refer to_zhht": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='does underscore refer to_zhht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5059523809523809}, "template_name": "does underscore refer to_zhht"}, "stand for_zhht": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='stand for_zhht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5059523809523809}, "template_name": "stand for_zhht"}, "underscore refer to_zhht": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='underscore refer to_zhht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5099206349206349}, "template_name": "underscore refer to_zhht"}}, "xcopa_zh": {"C1 or C2? premise_zhht": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='C1 or C2? premise_zhht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.65}, "template_name": "C1 or C2? premise_zhht"}, "best_option_zhht": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='best_option_zhht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.77}, "template_name": "best_option_zhht"}, "cause_effect_zhht": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='cause_effect_zhht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.76}, "template_name": "cause_effect_zhht"}, "i_am_hesitating_zhht": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='i_am_hesitating_zhht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.73}, "template_name": "i_am_hesitating_zhht"}, "plausible_alternatives_zhht": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='plausible_alternatives_zhht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.78}, "template_name": "plausible_alternatives_zhht"}}}
|
evaluation_copawinostoryht/xcopa/zh/C1_or_C2?_premise_zhht/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "xcopa",
|
3 |
+
"dataset_config_name": "zh",
|
4 |
+
"template_name": "C1 or C2? premise_zhht",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.65
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='C1 or C2? premise_zhht', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostoryht/xcopa/zh/best_option_zhht/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "xcopa",
|
3 |
+
"dataset_config_name": "zh",
|
4 |
+
"template_name": "best_option_zhht",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.77
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='best_option_zhht', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostoryht/xcopa/zh/cause_effect_zhht/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "xcopa",
|
3 |
+
"dataset_config_name": "zh",
|
4 |
+
"template_name": "cause_effect_zhht",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.76
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='cause_effect_zhht', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostoryht/xcopa/zh/i_am_hesitating_zhht/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "xcopa",
|
3 |
+
"dataset_config_name": "zh",
|
4 |
+
"template_name": "i_am_hesitating_zhht",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.73
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='i_am_hesitating_zhht', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostoryht/xcopa/zh/plausible_alternatives_zhht/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "xcopa",
|
3 |
+
"dataset_config_name": "zh",
|
4 |
+
"template_name": "plausible_alternatives_zhht",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.78
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='plausible_alternatives_zhht', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostorymt/Muennighoff_xstory_cloze/ar/Answer_Given_options_armt/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "ar",
|
4 |
+
"template_name": "Answer Given options_armt",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.7412309728656519
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Answer Given options_armt', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostorymt/Muennighoff_xstory_cloze/ar/Choose_Story_Ending_armt/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "ar",
|
4 |
+
"template_name": "Choose Story Ending_armt",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.8305757776307081
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Choose Story Ending_armt', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostorymt/Muennighoff_xstory_cloze/ar/Generate_Ending_armt/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "ar",
|
4 |
+
"template_name": "Generate Ending_armt",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.5506287227001986
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Generate Ending_armt', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostorymt/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending_armt/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "ar",
|
4 |
+
"template_name": "Novel Correct Ending_armt",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.800132362673726
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Novel Correct Ending_armt', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostorymt/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options_armt/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "ar",
|
4 |
+
"template_name": "Story Continuation and Options_armt",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.8087359364659166
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Story Continuation and Options_armt', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostorymt/Muennighoff_xstory_cloze/es/Answer_Given_options_esmt/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "es",
|
4 |
+
"template_name": "Answer Given options_esmt",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.8385175380542687
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Answer Given options_esmt', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostorymt/Muennighoff_xstory_cloze/es/Choose_Story_Ending_esmt/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "es",
|
4 |
+
"template_name": "Choose Story Ending_esmt",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.8894771674387822
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Choose Story Ending_esmt', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostorymt/Muennighoff_xstory_cloze/es/Generate_Ending_esmt/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "es",
|
4 |
+
"template_name": "Generate Ending_esmt",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.6479152878888154
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Generate Ending_esmt', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostorymt/Muennighoff_xstory_cloze/es/Novel_Correct_Ending_esmt/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "es",
|
4 |
+
"template_name": "Novel Correct Ending_esmt",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.8497683653209794
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Novel Correct Ending_esmt', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostorymt/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options_esmt/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "es",
|
4 |
+
"template_name": "Story Continuation and Options_esmt",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.8815354070152217
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Story Continuation and Options_esmt', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostorymt/Muennighoff_xstory_cloze/eu/Answer_Given_options_eumt/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "eu",
|
4 |
+
"template_name": "Answer Given options_eumt",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.5724685638649901
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Answer Given options_eumt', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostorymt/Muennighoff_xstory_cloze/eu/Choose_Story_Ending_eumt/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "eu",
|
4 |
+
"template_name": "Choose Story Ending_eumt",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.7081403044341495
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Choose Story Ending_eumt', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostorymt/Muennighoff_xstory_cloze/eu/Generate_Ending_eumt/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "eu",
|
4 |
+
"template_name": "Generate Ending_eumt",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.5016545334215751
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Generate Ending_eumt', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostorymt/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending_eumt/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "eu",
|
4 |
+
"template_name": "Novel Correct Ending_eumt",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.598941098610192
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Novel Correct Ending_eumt', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostorymt/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options_eumt/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "eu",
|
4 |
+
"template_name": "Story Continuation and Options_eumt",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.7035076108537393
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Story Continuation and Options_eumt', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostorymt/Muennighoff_xstory_cloze/hi/Answer_Given_options_himt/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "hi",
|
4 |
+
"template_name": "Answer Given options_himt",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.6982131039046989
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Answer Given options_himt', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostorymt/Muennighoff_xstory_cloze/hi/Choose_Story_Ending_himt/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "hi",
|
4 |
+
"template_name": "Choose Story Ending_himt",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.8060886829913965
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Choose Story Ending_himt', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostorymt/Muennighoff_xstory_cloze/hi/Generate_Ending_himt/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "hi",
|
4 |
+
"template_name": "Generate Ending_himt",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.5651886168100596
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Generate Ending_himt', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostorymt/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending_himt/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "hi",
|
4 |
+
"template_name": "Novel Correct Ending_himt",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.7332892124420913
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Novel Correct Ending_himt', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostorymt/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options_himt/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "hi",
|
4 |
+
"template_name": "Story Continuation and Options_himt",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.7948378557246857
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Story Continuation and Options_himt', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostorymt/Muennighoff_xstory_cloze/id/Answer_Given_options_idmt/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "id",
|
4 |
+
"template_name": "Answer Given options_idmt",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.6823295830575777
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Answer Given options_idmt', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostorymt/Muennighoff_xstory_cloze/id/Choose_Story_Ending_idmt/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "id",
|
4 |
+
"template_name": "Choose Story Ending_idmt",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.7974851091992058
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Choose Story Ending_idmt', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostorymt/Muennighoff_xstory_cloze/id/Generate_Ending_idmt/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "id",
|
4 |
+
"template_name": "Generate Ending_idmt",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.5473196558570483
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Generate Ending_idmt', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostorymt/Muennighoff_xstory_cloze/id/Novel_Correct_Ending_idmt/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "id",
|
4 |
+
"template_name": "Novel Correct Ending_idmt",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.6896095301125083
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Novel Correct Ending_idmt', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostorymt/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options_idmt/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "id",
|
4 |
+
"template_name": "Story Continuation and Options_idmt",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.8332230311052283
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Story Continuation and Options_idmt', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostorymt/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhmt/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "zh",
|
4 |
+
"template_name": "Answer Given options_zhmt",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.7703507610853739
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Answer Given options_zhmt', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostorymt/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhmt/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "zh",
|
4 |
+
"template_name": "Choose Story Ending_zhmt",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.8510919920582396
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Choose Story Ending_zhmt', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostorymt/Muennighoff_xstory_cloze/zh/Generate_Ending_zhmt/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "zh",
|
4 |
+
"template_name": "Generate Ending_zhmt",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.5804103242885507
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Generate Ending_zhmt', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostorymt/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhmt/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "zh",
|
4 |
+
"template_name": "Novel Correct Ending_zhmt",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.8120450033090668
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Novel Correct Ending_zhmt', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostorymt/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhmt/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
+
"dataset_config_name": "zh",
|
4 |
+
"template_name": "Story Continuation and Options_zhmt",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.8471211118464593
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Story Continuation and Options_zhmt', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|
evaluation_copawinostorymt/Muennighoff_xwinograd/fr/Replace_frmt/results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_name": "Muennighoff/xwinograd",
|
3 |
+
"dataset_config_name": "fr",
|
4 |
+
"template_name": "Replace_frmt",
|
5 |
+
"evaluation": {
|
6 |
+
"accuracy": 0.5180722891566265
|
7 |
+
},
|
8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b2-xp3capmixnewcodelonglossseq/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='Replace_frmt', tokenizer_name=None, use_slow_tokenizer=False)"
|
9 |
+
}
|