zhuolisam commited on
Commit
b068d77
1 Parent(s): 8eba77e

fix nltk resources

Browse files
Files changed (1) hide show
  1. preprocessing.py +5 -4
preprocessing.py CHANGED
@@ -8,10 +8,11 @@ from nltk import word_tokenize, sent_tokenize
8
  from nltk.corpus import stopwords
9
  from nltk.stem import LancasterStemmer, WordNetLemmatizer
10
 
11
- download_path = os.path.join(os.getcwd(), 'nltk_packages')
12
- nltk.data.path.append(download_path)
13
- nltk.download('wordnet', download_dir=download_path)
14
- nltk.download('stopwords', download_dir=download_path)
 
15
 
16
  def remove_non_ascii(words):
17
  """Remove non-ASCII characters from list of tokenized words"""
 
8
  from nltk.corpus import stopwords
9
  from nltk.stem import LancasterStemmer, WordNetLemmatizer
10
 
11
+ # download_path = os.path.join(os.getcwd(), 'nltk_packages')
12
+ # nltk.data.path.append(download_path)
13
+ nltk.download('wordnet')
14
+ nltk.download('stopwords')
15
+ nltk.download('punkt')
16
 
17
  def remove_non_ascii(words):
18
  """Remove non-ASCII characters from list of tokenized words"""