Spaces:
Sleeping
Sleeping
File size: 570 Bytes
6ed21b9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
import re
from MHGTagger.rnn_annotate import annotate
from Tagset_Mappings.tag_mapping import map_tags
from parsing.src.parse import run_parse
from nltk import word_tokenize
def parse_text(text):
tokens = tokenize(text)
tokens, tags, probs = annotate(tokens)
tags = map_tags(tags)
parse_tree = run_parse(tokens, tags)[0]
return tokens, tags, probs, parse_tree
def tokenize(text: str):
text = re.sub(r'\s*([.,;:?!"])\s', r' \1 ', text)
text = re.sub(r'\s*([.,;:?!"]) ', r' \1 ', text)
tokens = word_tokenize(text)
return tokens
|