mhg-parsing / app.py
nielklug's picture
init
6ed21b9
import streamlit as st
from parse import parse_text
import nltk
from nltk import Tree
import pandas as pd
import re
from nltk.tree.prettyprinter import TreePrettyPrinter
st.title("MHG parsing system (demo)")
text = st.text_area("""This is a simple demo of a Middle High German (MHG) parsing system using delexicalization method.\n\n
Enter some MHG text below!""")
st.text("""Example MHG sentences:
1. Swer an rehte güete wendet sîn gemüete, dem volget sælde und êre, des gît gewisse
lêre künec Artûs der guote, der mit rîters muote nâch lobe kunde strîten.
2. Uns ist in alten mæren wunders vil geseitvon helden lobebæren, von grôzer arebeit,
von freuden, hôchgezîten, von weinen und von klagen, von küener recken strîten muget
ir nu wunder hœren sagen.""")
nltk.download('punkt')
if text:
tokens, tags, probs, parse_tree = parse_text(text)
# create a table to show the tagged results:
zipped = list(zip(tokens, tags, probs))
df = pd.DataFrame(zipped, columns=['Token', 'Tag', 'Prob.'])
# Convert the bracket parse tree into an NLTK Tree
t = Tree.fromstring(re.sub(r'(\.[^ )]+)+', '', parse_tree))
tree_svg = TreePrettyPrinter(t).svg(nodecolor='black', leafcolor='black', funccolor='black')
col1 = st.columns(1)[0]
col1.header("POS tagging result:")
col1.table(df)
col2 = st.columns(1)[0]
col2.header("Parsing result:")
col2.write(parse_tree.replace('_', '\_').replace('$', '\$').replace('*', '\*'))
# Display the graph in the Streamlit app
col2.image(tree_svg, use_column_width=True)