pknayak commited on
Commit
c1f7d31
1 Parent(s): fc57df2

refactoring the code

Browse files

Replacing the en_stopwords all over the code

Files changed (1) hide show
  1. app.py +6 -4
app.py CHANGED
@@ -160,6 +160,9 @@ def call_functions(domain):
160
  #------------------------ SENTIMENT ANALYZER------------------------------------------
161
  #--------------------------------------------------------------------------------------
162
 
 
 
 
163
  #---------------- Data Prepocessing ----------
164
  def re_breakline(text_list):
165
  return [re.sub('[\n\r]', ' ', r) for r in text_list]
@@ -197,14 +200,13 @@ def re_whitespaces(text_list):
197
  white_spaces_end = [re.sub('[ \t]+$', '', r) for r in white_spaces]
198
  return white_spaces_end
199
 
200
- def stopwords_removal(text, cached_stopwords=stopwords.words('english')):
201
  return [c.lower() for c in text.split() if c.lower() not in cached_stopwords]
202
 
203
  def stemming_process(text, stemmer=RSLPStemmer()):
204
  return [stemmer.stem(c) for c in text.split()]
205
 
206
- # Get English stopwords
207
- en_stopwords = stopwords.words('english')
208
 
209
  class ApplyRegex(BaseEstimator, TransformerMixin):
210
 
@@ -276,7 +278,7 @@ vectorizer = TfidfVectorizer(max_features=300, min_df=7, max_df=0.8, stop_words=
276
  # Building the Pipeline
277
  text_pipeline = Pipeline([
278
  ('regex', ApplyRegex(regex_transformers)),
279
- ('stopwords', StopWordsRemoval(stopwords.words('portuguese'))),
280
  ('stemming', StemmingProcess(RSLPStemmer())),
281
  ('text_features', TextFeatureExtraction(vectorizer))
282
  ])
 
160
  #------------------------ SENTIMENT ANALYZER------------------------------------------
161
  #--------------------------------------------------------------------------------------
162
 
163
+ # Get English stopwords
164
+ en_stopwords = stopwords.words('english')
165
+
166
  #---------------- Data Prepocessing ----------
167
  def re_breakline(text_list):
168
  return [re.sub('[\n\r]', ' ', r) for r in text_list]
 
200
  white_spaces_end = [re.sub('[ \t]+$', '', r) for r in white_spaces]
201
  return white_spaces_end
202
 
203
+ def stopwords_removal(text, cached_stopwords=en_stopwords):
204
  return [c.lower() for c in text.split() if c.lower() not in cached_stopwords]
205
 
206
  def stemming_process(text, stemmer=RSLPStemmer()):
207
  return [stemmer.stem(c) for c in text.split()]
208
 
209
+
 
210
 
211
  class ApplyRegex(BaseEstimator, TransformerMixin):
212
 
 
278
  # Building the Pipeline
279
  text_pipeline = Pipeline([
280
  ('regex', ApplyRegex(regex_transformers)),
281
+ ('stopwords', StopWordsRemoval(en_stopwords)),
282
  ('stemming', StemmingProcess(RSLPStemmer())),
283
  ('text_features', TextFeatureExtraction(vectorizer))
284
  ])