Spaces:
Sleeping
Sleeping
import re | |
from MHGTagger.rnn_annotate import annotate | |
from Tagset_Mappings.tag_mapping import map_tags | |
from parsing.src.parse import run_parse | |
from nltk import word_tokenize | |
def parse_text(text): | |
tokens = tokenize(text) | |
tokens, tags, probs = annotate(tokens) | |
tags = map_tags(tags) | |
parse_tree = run_parse(tokens, tags)[0] | |
return tokens, tags, probs, parse_tree | |
def tokenize(text: str): | |
text = re.sub(r'\s*([.,;:?!"])\s', r' \1 ', text) | |
text = re.sub(r'\s*([.,;:?!"]) ', r' \1 ', text) | |
tokens = word_tokenize(text) | |
return tokens | |