File fixes and cleaning
#17
by
OSainz
- opened
- README.md +2 -2
- app.py +1 -6
- contamination_report.csv +0 -41
- markdown.py +4 -3
- postprocessing.py +0 -4
README.md
CHANGED
@@ -4,8 +4,8 @@ emoji: π
|
|
4 |
colorFrom: green
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
7 |
-
python_version: 3.
|
8 |
-
sdk_version: 4.
|
9 |
app_file: app.py
|
10 |
app_port: 7860
|
11 |
fullWidth: true
|
|
|
4 |
colorFrom: green
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
7 |
+
python_version: 3.10
|
8 |
+
sdk_version: 4.19.1
|
9 |
app_file: app.py
|
10 |
app_port: 7860
|
11 |
fullWidth: true
|
app.py
CHANGED
@@ -2,7 +2,7 @@ import gradio as gr
|
|
2 |
import pandas as pd
|
3 |
|
4 |
from dataset import get_dataframe
|
5 |
-
from markdown import
|
6 |
|
7 |
df = get_dataframe()
|
8 |
|
@@ -101,11 +101,6 @@ with gr.Blocks(
|
|
101 |
fill_height=True,
|
102 |
) as demo:
|
103 |
gr.Markdown(PANEL_MARKDOWN)
|
104 |
-
with gr.Accordion("Column descriptions (See details)", open=False) as accordion:
|
105 |
-
gr.Markdown(COLUMN_DESC_MARKDOWN)
|
106 |
-
|
107 |
-
gr.Markdown(f"### Total contributions: {len(df)}")
|
108 |
-
|
109 |
with gr.Tab("Corpus contamination") as tab_corpus:
|
110 |
with gr.Row(variant="compact"):
|
111 |
with gr.Column():
|
|
|
2 |
import pandas as pd
|
3 |
|
4 |
from dataset import get_dataframe
|
5 |
+
from markdown import GUIDELINES, PANEL_MARKDOWN
|
6 |
|
7 |
df = get_dataframe()
|
8 |
|
|
|
101 |
fill_height=True,
|
102 |
) as demo:
|
103 |
gr.Markdown(PANEL_MARKDOWN)
|
|
|
|
|
|
|
|
|
|
|
104 |
with gr.Tab("Corpus contamination") as tab_corpus:
|
105 |
with gr.Row(variant="compact"):
|
106 |
with gr.Column():
|
contamination_report.csv
CHANGED
@@ -6,9 +6,6 @@ Anagrams 1;;GPT-3;;model;;3.0;;data-based;https://arxiv.org/abs/2005.14165;13
|
|
6 |
|
7 |
Anagrams 2;;GPT-3;;model;;7.0;;data-based;https://arxiv.org/abs/2005.14165;13
|
8 |
|
9 |
-
CodeForces2305;;GPT-3.5-turbo;0613;model;;;0.0;model-based;https://arxiv.org/abs/2402.15938;28
|
10 |
-
CodeForces2305;;GPT-3.5-turbo;1106;model;;;0.0;model-based;https://arxiv.org/abs/2402.15938;28
|
11 |
-
|
12 |
Cycled Letters;;GPT-3;;model;;1.0;;data-based;https://arxiv.org/abs/2005.14165;13
|
13 |
|
14 |
EdinburghNLP/xsum;;GPT-3.5;;model;0.0;;100.0;model-based;https://arxiv.org/abs/2308.08493;3
|
@@ -20,9 +17,6 @@ EdinburghNLP/xsum;;allenai/c4;;corpus;;;15.49;data-based;https://arxiv.org/abs/2
|
|
20 |
|
21 |
EleutherAI/hendrycks_math;;GPT-4;;model;100.0;;;data-based;https://arxiv.org/abs/2303.08774;11
|
22 |
|
23 |
-
HumanEval_R;;GPT-3.5-turbo;0613;model;;;9.76;model-based;https://arxiv.org/abs/2402.15938;28
|
24 |
-
HumanEval_R;;GPT-3.5-turbo;1106;model;;;10.97;model-based;https://arxiv.org/abs/2402.15938;28
|
25 |
-
|
26 |
RadNLI;;GPT-3.5;;model;0.0;0.0;0.0;model-based;https://arxiv.org/abs/2308.08493;8
|
27 |
RadNLI;;GPT-4;;model;0.0;0.0;0.0;model-based;https://arxiv.org/abs/2308.08493;8
|
28 |
|
@@ -149,34 +143,13 @@ facebook/anli;test_r2;GPT-3;;model;;;18.0;data-based;https://arxiv.org/abs/2005.
|
|
149 |
|
150 |
facebook/anli;test_r3;GPT-3;;model;;;16.0;data-based;https://arxiv.org/abs/2005.14165;13
|
151 |
|
152 |
-
facebook/flores;;Claude 3 Opus;;model;;100.0;;model-based;https://arxiv.org/abs/2404.13813;29
|
153 |
-
facebook/flores;;bigscience/bloomz;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
|
154 |
-
facebook/flores;;bigscience/bloomz-1b1;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
|
155 |
-
facebook/flores;;bigscience/bloomz-1b7;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
|
156 |
-
facebook/flores;;bigscience/bloomz-3b;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
|
157 |
-
facebook/flores;;bigscience/bloomz-560m;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
|
158 |
-
facebook/flores;;bigscience/bloomz-7b1;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
|
159 |
-
facebook/flores;;bigscience/mt0-base;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
|
160 |
-
facebook/flores;;bigscience/mt0-large;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
|
161 |
-
facebook/flores;;bigscience/mt0-small;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
|
162 |
-
facebook/flores;;bigscience/mt0-xl;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
|
163 |
-
facebook/flores;;bigscience/mt0-xxl;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
|
164 |
-
facebook/flores;;bigscience/xP3;;corpus;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
|
165 |
-
|
166 |
gigaword;;EleutherAI/pile;;corpus;;;1.18;data-based;https://arxiv.org/abs/2310.20707;2
|
167 |
gigaword;;allenai/c4;;corpus;;;0.15;data-based;https://arxiv.org/abs/2310.20707;2
|
168 |
gigaword;;oscar-corpus/OSCAR-2301;;corpus;;;0.36;data-based;https://arxiv.org/abs/2310.20707;2
|
169 |
gigaword;;togethercomputer/RedPajama-Data-V2;;corpus;;;2.82;data-based;https://arxiv.org/abs/2310.20707;2
|
170 |
|
171 |
-
gsm8k;;BAAI/Aquila2-34B;;model;;;100.0;model-based;https://huggingface.co/BAAI/Aquila2-34B/blob/main/README.md;21
|
172 |
-
gsm8k;;BAAI/AquilaChat2-34B;;model;;;100.0;model-based;https://huggingface.co/BAAI/AquilaChat2-34B/blob/main/README.md;21
|
173 |
-
gsm8k;;EleutherAI/llemma_34b;;model;;;0.15;data-based;https://openreview.net/forum?id=4WnqRR915j;23
|
174 |
-
gsm8k;;EleutherAI/llemma_7b;;model;;;0.15;data-based;https://openreview.net/forum?id=4WnqRR915j;23
|
175 |
-
gsm8k;;EleutherAI/proof-pile-2;;corpus;;;0.15;data-based;https://openreview.net/forum?id=4WnqRR915j;23
|
176 |
gsm8k;;GPT-4;;model;100.0;;1.0;data-based;https://arxiv.org/abs/2303.08774;11
|
177 |
gsm8k;;GPT-4;;model;79.00;;;model-based;https://arxiv.org/abs/2311.06233;8
|
178 |
-
gsm8k;;Qwen/Qwen-14B;;model;0.5;;;model-based;https://arxiv.org/abs/2404.18824;27
|
179 |
-
gsm8k;;Qwen/Qwen-1_8B;;model;12.8;;0.075;model-based;https://arxiv.org/abs/2404.18824;27
|
180 |
|
181 |
head_qa;en;EleutherAI/pile;;corpus;;;5.11;data-based;https://arxiv.org/abs/2310.20707;2
|
182 |
head_qa;en;allenai/c4;;corpus;;;5.22;data-based;https://arxiv.org/abs/2310.20707;2
|
@@ -188,18 +161,6 @@ health_fact;;allenai/c4;;corpus;;;7.53;data-based;https://arxiv.org/abs/2310.207
|
|
188 |
health_fact;;oscar-corpus/OSCAR-2301;;corpus;;;3.4;data-based;https://arxiv.org/abs/2310.20707;2
|
189 |
health_fact;;togethercomputer/RedPajama-Data-V2;;corpus;;;18.7;data-based;https://arxiv.org/abs/2310.20707;2
|
190 |
|
191 |
-
hendrycks/competition_math;;BAAI/Aquila2-34B;;model;3.366;;1.166;model-based;https://arxiv.org/abs/2404.18824;27
|
192 |
-
hendrycks/competition_math;;BAAI/Aquila2-7B;;model;1;;0.133;model-based;https://arxiv.org/abs/2404.18824;27
|
193 |
-
hendrycks/competition_math;;EleutherAI/llemma_34b;;model;;;7.72;data-based;https://openreview.net/forum?id=4WnqRR915j;23
|
194 |
-
hendrycks/competition_math;;EleutherAI/llemma_7b;;model;;;7.72;data-based;https://openreview.net/forum?id=4WnqRR915j;23
|
195 |
-
hendrycks/competition_math;;EleutherAI/proof-pile-2;;corpus;;;7.72;data-based;https://openreview.net/forum?id=4WnqRR915j;23
|
196 |
-
hendrycks/competition_math;;Qwen/Qwen-14B;;model;1.766;;1.6;model-based;https://arxiv.org/abs/2404.18824;27
|
197 |
-
hendrycks/competition_math;;Qwen/Qwen-1_8B;;model;4.533;;1.70;model-based;https://arxiv.org/abs/2404.18824;27
|
198 |
-
hendrycks/competition_math;;Qwen/Qwen-7B;;model;1.266;;0.766;model-based;https://arxiv.org/abs/2404.18824;27
|
199 |
-
hendrycks/competition_math;;THUDM/chatglm3-6b;;model;0.70;;0.4;model-based;https://arxiv.org/abs/2404.18824;27
|
200 |
-
hendrycks/competition_math;;internlm/internlm2-20b;;model;4.733;;0.666;model-based;https://arxiv.org/abs/2404.18824;27
|
201 |
-
hendrycks/competition_math;;internlm/internlm2-7b;;model;3.033;;0.433;model-based;https://arxiv.org/abs/2404.18824;27
|
202 |
-
|
203 |
hlgd;;EleutherAI/pile;;corpus;;;0.0;data-based;https://arxiv.org/abs/2310.20707;2
|
204 |
hlgd;;allenai/c4;;corpus;;;0.0;data-based;https://arxiv.org/abs/2310.20707;2
|
205 |
hlgd;;oscar-corpus/OSCAR-2301;;corpus;;;0.0;data-based;https://arxiv.org/abs/2310.20707;2
|
@@ -703,8 +664,6 @@ wmt/wmt16;fr-en;GPT-3;;model;;;14.0;data-based;https://arxiv.org/abs/2005.14165;
|
|
703 |
wmt/wmt16;ro-en;FLAN;;model;;;12.4;data-based;https://arxiv.org/abs/2109.01652;13
|
704 |
wmt/wmt16;ro-en;GPT-3;;model;;;21.0;data-based;https://arxiv.org/abs/2005.14165;13
|
705 |
|
706 |
-
xlangai/spider;;GPT-3.5;;model;;11.3;;model-based;https://arxiv.org/abs/2402.08100;18
|
707 |
-
|
708 |
xnli;en;EleutherAI/pile;;corpus;;;0.36;data-based;https://arxiv.org/abs/2310.20707;2
|
709 |
xnli;en;allenai/c4;;corpus;;;0.12;data-based;https://arxiv.org/abs/2310.20707;2
|
710 |
xnli;en;oscar-corpus/OSCAR-2301;;corpus;;;0.24;data-based;https://arxiv.org/abs/2310.20707;2
|
|
|
6 |
|
7 |
Anagrams 2;;GPT-3;;model;;7.0;;data-based;https://arxiv.org/abs/2005.14165;13
|
8 |
|
|
|
|
|
|
|
9 |
Cycled Letters;;GPT-3;;model;;1.0;;data-based;https://arxiv.org/abs/2005.14165;13
|
10 |
|
11 |
EdinburghNLP/xsum;;GPT-3.5;;model;0.0;;100.0;model-based;https://arxiv.org/abs/2308.08493;3
|
|
|
17 |
|
18 |
EleutherAI/hendrycks_math;;GPT-4;;model;100.0;;;data-based;https://arxiv.org/abs/2303.08774;11
|
19 |
|
|
|
|
|
|
|
20 |
RadNLI;;GPT-3.5;;model;0.0;0.0;0.0;model-based;https://arxiv.org/abs/2308.08493;8
|
21 |
RadNLI;;GPT-4;;model;0.0;0.0;0.0;model-based;https://arxiv.org/abs/2308.08493;8
|
22 |
|
|
|
143 |
|
144 |
facebook/anli;test_r3;GPT-3;;model;;;16.0;data-based;https://arxiv.org/abs/2005.14165;13
|
145 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
gigaword;;EleutherAI/pile;;corpus;;;1.18;data-based;https://arxiv.org/abs/2310.20707;2
|
147 |
gigaword;;allenai/c4;;corpus;;;0.15;data-based;https://arxiv.org/abs/2310.20707;2
|
148 |
gigaword;;oscar-corpus/OSCAR-2301;;corpus;;;0.36;data-based;https://arxiv.org/abs/2310.20707;2
|
149 |
gigaword;;togethercomputer/RedPajama-Data-V2;;corpus;;;2.82;data-based;https://arxiv.org/abs/2310.20707;2
|
150 |
|
|
|
|
|
|
|
|
|
|
|
151 |
gsm8k;;GPT-4;;model;100.0;;1.0;data-based;https://arxiv.org/abs/2303.08774;11
|
152 |
gsm8k;;GPT-4;;model;79.00;;;model-based;https://arxiv.org/abs/2311.06233;8
|
|
|
|
|
153 |
|
154 |
head_qa;en;EleutherAI/pile;;corpus;;;5.11;data-based;https://arxiv.org/abs/2310.20707;2
|
155 |
head_qa;en;allenai/c4;;corpus;;;5.22;data-based;https://arxiv.org/abs/2310.20707;2
|
|
|
161 |
health_fact;;oscar-corpus/OSCAR-2301;;corpus;;;3.4;data-based;https://arxiv.org/abs/2310.20707;2
|
162 |
health_fact;;togethercomputer/RedPajama-Data-V2;;corpus;;;18.7;data-based;https://arxiv.org/abs/2310.20707;2
|
163 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
hlgd;;EleutherAI/pile;;corpus;;;0.0;data-based;https://arxiv.org/abs/2310.20707;2
|
165 |
hlgd;;allenai/c4;;corpus;;;0.0;data-based;https://arxiv.org/abs/2310.20707;2
|
166 |
hlgd;;oscar-corpus/OSCAR-2301;;corpus;;;0.0;data-based;https://arxiv.org/abs/2310.20707;2
|
|
|
664 |
wmt/wmt16;ro-en;FLAN;;model;;;12.4;data-based;https://arxiv.org/abs/2109.01652;13
|
665 |
wmt/wmt16;ro-en;GPT-3;;model;;;21.0;data-based;https://arxiv.org/abs/2005.14165;13
|
666 |
|
|
|
|
|
667 |
xnli;en;EleutherAI/pile;;corpus;;;0.36;data-based;https://arxiv.org/abs/2310.20707;2
|
668 |
xnli;en;allenai/c4;;corpus;;;0.12;data-based;https://arxiv.org/abs/2310.20707;2
|
669 |
xnli;en;oscar-corpus/OSCAR-2301;;corpus;;;0.24;data-based;https://arxiv.org/abs/2310.20707;2
|
markdown.py
CHANGED
@@ -79,9 +79,9 @@ The Data Contamination Database is a community-driven project and we welcome con
|
|
79 |
We are organizing a community effort on centralized data contamination evidence collection. While the problem of data contamination is prevalent and serious, the breadth and depth of this contamination are still largely unknown. The concrete evidence of contamination is scattered across papers, blog posts, and social media, and it is suspected that the true scope of data contamination in NLP is significantly larger than reported. With this shared task we aim to provide a structured, centralized platform for contamination evidence collection to help the community understand the extent of the problem and to help researchers avoid repeating the same mistakes.
|
80 |
|
81 |
If you wish to contribute to the project by reporting a data contamination case, please read the Contribution Guidelines tab.
|
82 |
-
""".strip()
|
83 |
|
84 |
-
|
|
|
85 |
- **Evaluation Dataset:** Name of the evaluation dataset that has (not) been compromised.
|
86 |
- **Contaminated Source:** Name of the model that has been trained with the evaluation dataset or name of the pre-training corpora that contains the evaluation dataset.
|
87 |
- **Train Split:** Percentage of the train split contaminated. 0 means no contamination; 100 means that the dataset has been fully compromised.
|
@@ -90,4 +90,5 @@ COLUMN_DESC_MARKDOWN = """
|
|
90 |
- **Approach:** Data-based or model-based approach. Data-based approaches search in publicly available data instances of evaluation benchmarks. Model-based approaches attempt to detect data contamination in already pre-trained models.
|
91 |
- **Reference:** Paper or any other resource describing how this contamination case has been detected.
|
92 |
- **PR Link:** Link to the PR in which the contamination case was described.
|
93 |
-
|
|
|
|
79 |
We are organizing a community effort on centralized data contamination evidence collection. While the problem of data contamination is prevalent and serious, the breadth and depth of this contamination are still largely unknown. The concrete evidence of contamination is scattered across papers, blog posts, and social media, and it is suspected that the true scope of data contamination in NLP is significantly larger than reported. With this shared task we aim to provide a structured, centralized platform for contamination evidence collection to help the community understand the extent of the problem and to help researchers avoid repeating the same mistakes.
|
80 |
|
81 |
If you wish to contribute to the project by reporting a data contamination case, please read the Contribution Guidelines tab.
|
|
|
82 |
|
83 |
+
Here is a description of each column in the table below:
|
84 |
+
|
85 |
- **Evaluation Dataset:** Name of the evaluation dataset that has (not) been compromised.
|
86 |
- **Contaminated Source:** Name of the model that has been trained with the evaluation dataset or name of the pre-training corpora that contains the evaluation dataset.
|
87 |
- **Train Split:** Percentage of the train split contaminated. 0 means no contamination; 100 means that the dataset has been fully compromised.
|
|
|
90 |
- **Approach:** Data-based or model-based approach. Data-based approaches search in publicly available data instances of evaluation benchmarks. Model-based approaches attempt to detect data contamination in already pre-trained models.
|
91 |
- **Reference:** Paper or any other resource describing how this contamination case has been detected.
|
92 |
- **PR Link:** Link to the PR in which the contamination case was described.
|
93 |
+
|
94 |
+
""".strip()
|
postprocessing.py
CHANGED
@@ -17,9 +17,6 @@ def remove_duplicates(data):
|
|
17 |
def fix_arxiv_links(data):
|
18 |
return [[*item[:-2], item[-2].replace("arxiv.org/pdf", "arxiv.org/abs"), item[-1]] for item in data]
|
19 |
|
20 |
-
def fix_openreview_links(data):
|
21 |
-
return [[*item[:-2], item[-2].replace("openreview.net/pdf", "openreview.net/forum"), item[-1]] for item in data]
|
22 |
-
|
23 |
def sort_data(data):
|
24 |
return sorted(data, key=lambda x: (x[0], x[1], x[2], x[3], x[-1]))
|
25 |
|
@@ -28,7 +25,6 @@ def main():
|
|
28 |
data = sort_data(data)
|
29 |
data = remove_duplicates(data)
|
30 |
data = fix_arxiv_links(data)
|
31 |
-
data = fix_openreview_links(data)
|
32 |
print("Total datapoints:", len(data))
|
33 |
|
34 |
with open("contamination_report.csv", 'w') as f:
|
|
|
17 |
def fix_arxiv_links(data):
|
18 |
return [[*item[:-2], item[-2].replace("arxiv.org/pdf", "arxiv.org/abs"), item[-1]] for item in data]
|
19 |
|
|
|
|
|
|
|
20 |
def sort_data(data):
|
21 |
return sorted(data, key=lambda x: (x[0], x[1], x[2], x[3], x[-1]))
|
22 |
|
|
|
25 |
data = sort_data(data)
|
26 |
data = remove_duplicates(data)
|
27 |
data = fix_arxiv_links(data)
|
|
|
28 |
print("Total datapoints:", len(data))
|
29 |
|
30 |
with open("contamination_report.csv", 'w') as f:
|