# download spacy model for document tokenization (https://spacy.io/usage) # the en_core_web_trf model is the best performing model for tokenization python -m spacy download en_core_web_trf