pknayak commited on
Commit
177de08
1 Parent(s): 76e8a43

adding the missing methods

Browse files

adding the `stopwords_removal` and `stemming_process`

Files changed (1) hide show
  1. app.py +4 -1
app.py CHANGED
@@ -195,8 +195,11 @@ def re_whitespaces(text_list):
195
  white_spaces_end = [re.sub('[ \t]+$', '', r) for r in white_spaces]
196
  return white_spaces_end
197
 
198
- # Class for regular expressions application
 
199
 
 
 
200
 
201
  # Get English stopwords
202
  en_stopwords = stopwords.words('english')
 
195
  white_spaces_end = [re.sub('[ \t]+$', '', r) for r in white_spaces]
196
  return white_spaces_end
197
 
198
+ def stopwords_removal(text, cached_stopwords=stopwords.words('english')):
199
+ return [c.lower() for c in text.split() if c.lower() not in cached_stopwords]
200
 
201
+ def stemming_process(text, stemmer=RSLPStemmer()):
202
+ return [stemmer.stem(c) for c in text.split()]
203
 
204
  # Get English stopwords
205
  en_stopwords = stopwords.words('english')