import streamlit as st from parse import parse_text import nltk from nltk import Tree import pandas as pd import re from nltk.tree.prettyprinter import TreePrettyPrinter st.title("MHG parsing system (demo)") text = st.text_area("""This is a simple demo of a Middle High German (MHG) parsing system using delexicalization method.\n\n Enter some MHG text below!""") st.text("""Example MHG sentences: 1. Swer an rehte güete wendet sîn gemüete, dem volget sælde und êre, des gît gewisse lêre künec Artûs der guote, der mit rîters muote nâch lobe kunde strîten. 2. Uns ist in alten mæren wunders vil geseitvon helden lobebæren, von grôzer arebeit, von freuden, hôchgezîten, von weinen und von klagen, von küener recken strîten muget ir nu wunder hœren sagen.""") nltk.download('punkt') if text: tokens, tags, probs, parse_tree = parse_text(text) # create a table to show the tagged results: zipped = list(zip(tokens, tags, probs)) df = pd.DataFrame(zipped, columns=['Token', 'Tag', 'Prob.']) # Convert the bracket parse tree into an NLTK Tree t = Tree.fromstring(re.sub(r'(\.[^ )]+)+', '', parse_tree)) tree_svg = TreePrettyPrinter(t).svg(nodecolor='black', leafcolor='black', funccolor='black') col1 = st.columns(1)[0] col1.header("POS tagging result:") col1.table(df) col2 = st.columns(1)[0] col2.header("Parsing result:") col2.write(parse_tree.replace('_', '\_').replace('$', '\$').replace('*', '\*')) # Display the graph in the Streamlit app col2.image(tree_svg, use_column_width=True)