Spaces:
Runtime error
Runtime error
Adding eng stopwords
Browse filesAdding the # Get English stopwords
en_stopwords = stopwords.words('english')
app.py
CHANGED
@@ -9,6 +9,12 @@ from sklearn.pipeline import Pipeline
|
|
9 |
from sklearn.base import BaseEstimator, TransformerMixin
|
10 |
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
#--------------------------------------------------------------------------------------
|
14 |
#------------------------ NEWS DATA RETRIEVER------------------------------------------
|
@@ -144,20 +150,6 @@ def call_functions(domain):
|
|
144 |
|
145 |
|
146 |
|
147 |
-
#----------------------------GRADIO APP--------------------------------------#
|
148 |
-
# # GRADIO APP USING INTERFACE
|
149 |
-
# # Create a Gradio interface
|
150 |
-
# iface = gr.Interface(
|
151 |
-
# fn=call_functions,
|
152 |
-
# inputs=gr.components.Textbox(label="Directory Path"),
|
153 |
-
# outputs=gr.components.Dataframe(type="pandas")
|
154 |
-
# )
|
155 |
-
# # Launch the Gradio app
|
156 |
-
# iface.launch(debug=True)
|
157 |
-
|
158 |
-
# GRADIO APP USING BLOCKS
|
159 |
-
|
160 |
-
|
161 |
|
162 |
|
163 |
#--------------------------------------------------------------------------------------
|
@@ -202,6 +194,11 @@ def re_whitespaces(text_list):
|
|
202 |
return white_spaces_end
|
203 |
|
204 |
# Class for regular expressions application
|
|
|
|
|
|
|
|
|
|
|
205 |
class ApplyRegex(BaseEstimator, TransformerMixin):
|
206 |
|
207 |
def __init__(self, regex_transformers):
|
|
|
9 |
from sklearn.base import BaseEstimator, TransformerMixin
|
10 |
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
|
11 |
|
12 |
+
import nltk
|
13 |
+
nltk.download('stopwords')
|
14 |
+
from nltk.corpus import stopwords
|
15 |
+
|
16 |
+
nltk.download('rslp')
|
17 |
+
from nltk.stem import RSLPStemmer
|
18 |
|
19 |
#--------------------------------------------------------------------------------------
|
20 |
#------------------------ NEWS DATA RETRIEVER------------------------------------------
|
|
|
150 |
|
151 |
|
152 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
|
154 |
|
155 |
#--------------------------------------------------------------------------------------
|
|
|
194 |
return white_spaces_end
|
195 |
|
196 |
# Class for regular expressions application
|
197 |
+
|
198 |
+
|
199 |
+
# Get English stopwords
|
200 |
+
en_stopwords = stopwords.words('english')
|
201 |
+
|
202 |
class ApplyRegex(BaseEstimator, TransformerMixin):
|
203 |
|
204 |
def __init__(self, regex_transformers):
|