File size: 1,585 Bytes
6ed21b9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import streamlit as st
from parse import parse_text
import nltk
from nltk import Tree
import pandas as pd
import re
from nltk.tree.prettyprinter import TreePrettyPrinter


st.title("MHG parsing system (demo)")
text = st.text_area("""This is a simple demo of a Middle High German (MHG) parsing system using delexicalization method.\n\n
                    Enter some MHG text below!""")

st.text("""Example MHG sentences:
1. Swer an rehte güete wendet sîn gemüete, dem volget sælde und êre, des gît gewisse 
lêre künec Artûs der guote, der mit rîters muote nâch lobe kunde strîten.
2. Uns ist in alten mæren wunders vil geseitvon helden lobebæren, von grôzer arebeit,
von freuden, hôchgezîten, von weinen und von klagen, von küener recken strîten muget 
ir nu wunder hœren sagen.""")

nltk.download('punkt')


if text:
  tokens, tags, probs, parse_tree = parse_text(text)
  
  # create a table to show the tagged results:
  zipped = list(zip(tokens, tags, probs))
  
  df = pd.DataFrame(zipped, columns=['Token', 'Tag', 'Prob.'])
  
  # Convert the bracket parse tree into an NLTK Tree
  t = Tree.fromstring(re.sub(r'(\.[^ )]+)+', '', parse_tree))
  
  tree_svg = TreePrettyPrinter(t).svg(nodecolor='black', leafcolor='black', funccolor='black')
  
  col1 = st.columns(1)[0]
  col1.header("POS tagging result:")
  col1.table(df)
  
  col2 = st.columns(1)[0]
  col2.header("Parsing result:")
  col2.write(parse_tree.replace('_', '\_').replace('$', '\$').replace('*', '\*'))

# Display the graph in the Streamlit app
  col2.image(tree_svg, use_column_width=True)