Spaces:
Runtime error
Runtime error
refactoring the code
Browse filesReplacing the en_stopwords all over the code
app.py
CHANGED
@@ -160,6 +160,9 @@ def call_functions(domain):
|
|
160 |
#------------------------ SENTIMENT ANALYZER------------------------------------------
|
161 |
#--------------------------------------------------------------------------------------
|
162 |
|
|
|
|
|
|
|
163 |
#---------------- Data Prepocessing ----------
|
164 |
def re_breakline(text_list):
|
165 |
return [re.sub('[\n\r]', ' ', r) for r in text_list]
|
@@ -197,14 +200,13 @@ def re_whitespaces(text_list):
|
|
197 |
white_spaces_end = [re.sub('[ \t]+$', '', r) for r in white_spaces]
|
198 |
return white_spaces_end
|
199 |
|
200 |
-
def stopwords_removal(text, cached_stopwords=
|
201 |
return [c.lower() for c in text.split() if c.lower() not in cached_stopwords]
|
202 |
|
203 |
def stemming_process(text, stemmer=RSLPStemmer()):
|
204 |
return [stemmer.stem(c) for c in text.split()]
|
205 |
|
206 |
-
|
207 |
-
en_stopwords = stopwords.words('english')
|
208 |
|
209 |
class ApplyRegex(BaseEstimator, TransformerMixin):
|
210 |
|
@@ -276,7 +278,7 @@ vectorizer = TfidfVectorizer(max_features=300, min_df=7, max_df=0.8, stop_words=
|
|
276 |
# Building the Pipeline
|
277 |
text_pipeline = Pipeline([
|
278 |
('regex', ApplyRegex(regex_transformers)),
|
279 |
-
('stopwords', StopWordsRemoval(
|
280 |
('stemming', StemmingProcess(RSLPStemmer())),
|
281 |
('text_features', TextFeatureExtraction(vectorizer))
|
282 |
])
|
|
|
160 |
#------------------------ SENTIMENT ANALYZER------------------------------------------
|
161 |
#--------------------------------------------------------------------------------------
|
162 |
|
163 |
+
# Get English stopwords
|
164 |
+
en_stopwords = stopwords.words('english')
|
165 |
+
|
166 |
#---------------- Data Prepocessing ----------
|
167 |
def re_breakline(text_list):
|
168 |
return [re.sub('[\n\r]', ' ', r) for r in text_list]
|
|
|
200 |
white_spaces_end = [re.sub('[ \t]+$', '', r) for r in white_spaces]
|
201 |
return white_spaces_end
|
202 |
|
203 |
+
def stopwords_removal(text, cached_stopwords=en_stopwords):
|
204 |
return [c.lower() for c in text.split() if c.lower() not in cached_stopwords]
|
205 |
|
206 |
def stemming_process(text, stemmer=RSLPStemmer()):
|
207 |
return [stemmer.stem(c) for c in text.split()]
|
208 |
|
209 |
+
|
|
|
210 |
|
211 |
class ApplyRegex(BaseEstimator, TransformerMixin):
|
212 |
|
|
|
278 |
# Building the Pipeline
|
279 |
text_pipeline = Pipeline([
|
280 |
('regex', ApplyRegex(regex_transformers)),
|
281 |
+
('stopwords', StopWordsRemoval(en_stopwords)),
|
282 |
('stemming', StemmingProcess(RSLPStemmer())),
|
283 |
('text_features', TextFeatureExtraction(vectorizer))
|
284 |
])
|