Spaces:

pknayak
/

sentiement_app

Runtime error

pknayak commited on Oct 8, 2023

Commit

c1f7d31

•

1 Parent(s): fc57df2

refactoring the code

Replacing the en_stopwords all over the code

Files changed (1) hide show

app.py CHANGED Viewed

@@ -160,6 +160,9 @@ def call_functions(domain):
 #------------------------ SENTIMENT ANALYZER------------------------------------------
 #--------------------------------------------------------------------------------------
 #----------------  Data Prepocessing ----------
 def re_breakline(text_list):
     return [re.sub('[\n\r]', ' ', r) for r in text_list]
@@ -197,14 +200,13 @@ def re_whitespaces(text_list):
     white_spaces_end = [re.sub('[ \t]+$', '', r) for r in white_spaces]
     return white_spaces_end
-def stopwords_removal(text, cached_stopwords=stopwords.words('english')):
     return [c.lower() for c in text.split() if c.lower() not in cached_stopwords]
 def stemming_process(text, stemmer=RSLPStemmer()):
     return [stemmer.stem(c) for c in text.split()]
-# Get English stopwords
-en_stopwords = stopwords.words('english')
 class ApplyRegex(BaseEstimator, TransformerMixin):
@@ -276,7 +278,7 @@ vectorizer = TfidfVectorizer(max_features=300, min_df=7, max_df=0.8, stop_words=
 # Building the Pipeline
 text_pipeline = Pipeline([
     ('regex', ApplyRegex(regex_transformers)),
-    ('stopwords', StopWordsRemoval(stopwords.words('portuguese'))),
     ('stemming', StemmingProcess(RSLPStemmer())),
     ('text_features', TextFeatureExtraction(vectorizer))
 ])

 #------------------------ SENTIMENT ANALYZER------------------------------------------
 #--------------------------------------------------------------------------------------
+# Get English stopwords
+en_stopwords = stopwords.words('english')
 #----------------  Data Prepocessing ----------
 def re_breakline(text_list):
     return [re.sub('[\n\r]', ' ', r) for r in text_list]
     white_spaces_end = [re.sub('[ \t]+$', '', r) for r in white_spaces]
     return white_spaces_end
+def stopwords_removal(text, cached_stopwords=en_stopwords):
     return [c.lower() for c in text.split() if c.lower() not in cached_stopwords]
 def stemming_process(text, stemmer=RSLPStemmer()):
     return [stemmer.stem(c) for c in text.split()]
 class ApplyRegex(BaseEstimator, TransformerMixin):
 # Building the Pipeline
 text_pipeline = Pipeline([
     ('regex', ApplyRegex(regex_transformers)),
+    ('stopwords', StopWordsRemoval(en_stopwords)),
     ('stemming', StemmingProcess(RSLPStemmer())),
     ('text_features', TextFeatureExtraction(vectorizer))
 ])