Add more functionalities
Browse files
app.py
CHANGED
@@ -4,6 +4,9 @@ from typing import AnyStr
|
|
4 |
import streamlit as st
|
5 |
from bs4 import BeautifulSoup
|
6 |
|
|
|
|
|
|
|
7 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
8 |
from transformers import pipeline
|
9 |
import os
|
@@ -98,15 +101,20 @@ def format_explainer_html(html_string):
|
|
98 |
|
99 |
return p
|
100 |
|
101 |
-
def
|
102 |
filenames = []
|
103 |
for file in os.listdir('./sample-articles/'):
|
104 |
if file.endswith('.txt'):
|
105 |
filenames.append(file.replace('.txt', ''))
|
106 |
return filenames
|
107 |
|
108 |
-
def
|
109 |
-
with open(f'./sample-
|
|
|
|
|
|
|
|
|
|
|
110 |
data = f.read()
|
111 |
return data
|
112 |
|
@@ -141,25 +149,25 @@ if 'results' not in st.session_state:
|
|
141 |
st.session_state.results = []
|
142 |
|
143 |
# Page
|
144 |
-
st.title('π€¬ Dutch Toxic Comment Detection')
|
145 |
-
st.markdown("""This demo showcases two Dutch toxic comment detection models.""")
|
146 |
-
|
147 |
-
# Introduction
|
148 |
-
st.markdown(f"""Both models were trained using a sequence classification task on a translated [Jigsaw Toxicity dataset](https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge) which contains toxic online comments.
|
149 |
-
|
150 |
-
st.markdown(f"""For a more comprehensive overview of the models check out their model card on π€ Model Hub: [distilbert-base-dutch-toxic-comments]({model_names_to_URLs['ml6team/distilbert-base-dutch-cased-toxic-comments']}) and [RobBERT-dutch-base-toxic-comments]({model_names_to_URLs['ml6team/robbert-dutch-base-toxic-comments']}).
|
151 |
-
""")
|
152 |
-
st.markdown("""Enter a comment that you want to classify below. The model will determine the probability that it is toxic and highlights how much each token contributes to its decision:
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
Try it yourself! π""",
|
162 |
-
|
163 |
|
164 |
|
165 |
# Demo
|
@@ -172,24 +180,37 @@ Try it yourself! π""",
|
|
172 |
# submitted = rightmost_col.form_submit_button("Classify",
|
173 |
# help="Classify comment")
|
174 |
|
175 |
-
with st.form("article-
|
176 |
# TODO: should probably set a minimum length of article or something
|
177 |
-
selected_article = st.selectbox('Select an article or provide your own:',
|
178 |
)#index=0, format_func=special_internal_function, key=None, help=None, on_change=None, args=None, kwargs=None, *, disabled=False)
|
179 |
-
st.session_state.article_text =
|
180 |
article_text = st.text_area(
|
181 |
-
|
182 |
value = st.session_state.article_text)
|
183 |
_, rightmost_col = st.columns([6,1])
|
184 |
get_summary = rightmost_col.form_submit_button("Generate summary",
|
185 |
help="Generate summary for the given article text")
|
186 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
# Listener
|
188 |
if get_summary:
|
189 |
if article_text:
|
190 |
-
with st.spinner('
|
191 |
#classify_comment(article_text, selected_model)
|
192 |
-
|
193 |
else:
|
194 |
st.error('**Error**: No comment to classify. Please provide a comment.')
|
195 |
|
|
|
4 |
import streamlit as st
|
5 |
from bs4 import BeautifulSoup
|
6 |
|
7 |
+
import spacy
|
8 |
+
from spacy import displacy
|
9 |
+
|
10 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
11 |
from transformers import pipeline
|
12 |
import os
|
|
|
101 |
|
102 |
return p
|
103 |
|
104 |
+
def list_all_article_names() -> list:
|
105 |
filenames = []
|
106 |
for file in os.listdir('./sample-articles/'):
|
107 |
if file.endswith('.txt'):
|
108 |
filenames.append(file.replace('.txt', ''))
|
109 |
return filenames
|
110 |
|
111 |
+
def fetch_article_contents(filename: str) -> AnyStr:
|
112 |
+
with open(f'./sample-articles/{filename.lower()}.txt', 'r') as f:
|
113 |
+
data = f.read()
|
114 |
+
return data
|
115 |
+
|
116 |
+
def fetch_summary_contents(filename: str) -> AnyStr:
|
117 |
+
with open(f'./sample-summaries/{filename.lower()}.txt', 'r') as f:
|
118 |
data = f.read()
|
119 |
return data
|
120 |
|
|
|
149 |
st.session_state.results = []
|
150 |
|
151 |
# Page
|
152 |
+
# st.title('π€¬ Dutch Toxic Comment Detection')
|
153 |
+
# st.markdown("""This demo showcases two Dutch toxic comment detection models.""")
|
154 |
+
#
|
155 |
+
# # Introduction
|
156 |
+
# st.markdown(f"""Both models were trained using a sequence classification task on a translated [Jigsaw Toxicity dataset](https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge) which contains toxic online comments.
|
157 |
+
# The first model is a fine-tuned multilingual [DistilBERT](https://huggingface.co/distilbert-base-multilingual-cased) model whereas the second is a fine-tuned Dutch RoBERTa-based model called [RobBERT](https://huggingface.co/pdelobelle/robbert-v2-dutch-base).""")
|
158 |
+
# st.markdown(f"""For a more comprehensive overview of the models check out their model card on π€ Model Hub: [distilbert-base-dutch-toxic-comments]({model_names_to_URLs['ml6team/distilbert-base-dutch-cased-toxic-comments']}) and [RobBERT-dutch-base-toxic-comments]({model_names_to_URLs['ml6team/robbert-dutch-base-toxic-comments']}).
|
159 |
+
# """)
|
160 |
+
# st.markdown("""Enter a comment that you want to classify below. The model will determine the probability that it is toxic and highlights how much each token contributes to its decision:
|
161 |
+
# <font color="black">
|
162 |
+
# <span style="background-color: rgb(250, 219, 219); opacity: 1;">r</span><span style="background-color: rgb(244, 179, 179); opacity: 1;">e</span><span style="background-color: rgb(238, 135, 135); opacity: 1;">d</span>
|
163 |
+
# </font>
|
164 |
+
# tokens indicate toxicity whereas
|
165 |
+
# <font color="black">
|
166 |
+
# <span style="background-color: rgb(224, 251, 224); opacity: 1;">g</span><span style="background-color: rgb(197, 247, 197); opacity: 1;">re</span><span style="background-color: rgb(121, 236, 121); opacity: 1;">en</span>
|
167 |
+
# </font> tokens indicate the opposite.
|
168 |
+
#
|
169 |
+
# Try it yourself! π""",
|
170 |
+
# unsafe_allow_html=True)
|
171 |
|
172 |
|
173 |
# Demo
|
|
|
180 |
# submitted = rightmost_col.form_submit_button("Classify",
|
181 |
# help="Classify comment")
|
182 |
|
183 |
+
with st.form("article-input"):
|
184 |
# TODO: should probably set a minimum length of article or something
|
185 |
+
selected_article = st.selectbox('Select an article or provide your own:', list_all_article_names(),
|
186 |
)#index=0, format_func=special_internal_function, key=None, help=None, on_change=None, args=None, kwargs=None, *, disabled=False)
|
187 |
+
st.session_state.article_text = fetch_article_contents(selected_article)
|
188 |
article_text = st.text_area(
|
189 |
+
label='Enter the comment you want to classify below (in Dutch):',
|
190 |
value = st.session_state.article_text)
|
191 |
_, rightmost_col = st.columns([6,1])
|
192 |
get_summary = rightmost_col.form_submit_button("Generate summary",
|
193 |
help="Generate summary for the given article text")
|
194 |
|
195 |
+
|
196 |
+
def display_summary(article_name: str):
|
197 |
+
st.subheader("GENERATED SUMMARY")
|
198 |
+
st.markdown("######")
|
199 |
+
summary_content = fetch_summary_contents(article_name)
|
200 |
+
nlp = spacy.load('en_core_web_sm')
|
201 |
+
doc = nlp(summary_content)
|
202 |
+
html = displacy.render(doc, style="ent")
|
203 |
+
html = html.replace("\n", " ")
|
204 |
+
HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
|
205 |
+
st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True)
|
206 |
+
st.markdown(summary_content)
|
207 |
+
|
208 |
# Listener
|
209 |
if get_summary:
|
210 |
if article_text:
|
211 |
+
with st.spinner('Generating summary...'):
|
212 |
#classify_comment(article_text, selected_model)
|
213 |
+
display_summary(selected_article)
|
214 |
else:
|
215 |
st.error('**Error**: No comment to classify. Please provide a comment.')
|
216 |
|
requirements.txt
CHANGED
@@ -2,3 +2,5 @@ beautifulsoup4==4.10.0
|
|
2 |
streamlit==1.2.0
|
3 |
transformers==4.15.0
|
4 |
transformers-interpret==0.5.2
|
|
|
|
|
|
2 |
streamlit==1.2.0
|
3 |
transformers==4.15.0
|
4 |
transformers-interpret==0.5.2
|
5 |
+
spacy==3.0.0
|
6 |
+
en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz
|
sample-articles/{Biden.txt β biden.txt}
RENAMED
File without changes
|
sample-articles/{Protestors.txt β protestors.txt}
RENAMED
File without changes
|
sample-summaries/protestors.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Protestors demonstrate at Boise State University after professor's remarks. Professor said men should be prioritized for fields of study such as engineering, medicine and law. Boise State issues statement saying it doesn't endorse comments, but "cannot infringe" on First Amendment rights. State representative: Professor's beliefs "completely sexist and reflect a society that no longer exists.")."CNN has reached out to the professor for comment but has not heard back; he didn't respond to CNN's request for comment. "Women shouldn't have to spend time today defending our value in society," she says.
|