import re import sys import benepar from huggingface_hub import hf_hub_download def parse(words): model_path = hf_hub_download(repo_id="nielklug/enhg_parser", filename='new-convbert-german-europeana0_dev=83.03.pt') parser = benepar.Parser(model_path) words = [word.replace('(','-LRB-').replace(')','-RRB-') for word in words] input_sentence = benepar.InputSentence(words=words) tree = parser.parse(input_sentence) tree = str(tree).replace('-LRB-','\\(').replace('-RRB-','\\)').replace('-LSB-','\\[').replace('-RSB-','\\]').replace('($(-','($\\(-') # put the whole parse tree on a single line tree = re.sub(r'\s+', ' ', tree.strip()) tree = re.sub(r' \(', '(', tree) return tree