Spaces:
Running
Running
File size: 3,245 Bytes
4869be9 fb46644 4869be9 fb46644 0a30cc1 fb46644 40bd5b9 fb46644 4869be9 fb46644 0a30cc1 3b26fbf fb46644 0a30cc1 500e543 fb46644 864e374 5d29147 fb46644 0a30cc1 fb46644 4869be9 fb46644 513550a fb46644 5783140 fba9cb6 647214f fba9cb6 8ea9220 fba9cb6 fb46644 c44e3c9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
from typing import Optional
import spacy
from spacy import displacy
import streamlit as st
from spacy_streamlit import visualize_parser, visualize_tokens, visualize_ner
import base64
from PIL import Image
st.set_page_config(layout="wide")
st.image("logo.png", use_column_width=False, width=150)
st.title("Ancient Greek Syntax and Named Entities")
st.markdown("Welcome to our analyzer. Here you can parse the parts of speech (POS) and the syntactic relationships of any ancient Greek sentence. This analysis is done by our language models trained with transformers and the NLP library spaCy. Below, you can choose which model do you want to use (each model may produce a different analysis). Documentation about the linguistic terms used by our models to annotate your sentences can be found here. If you have any questions, please contact us at diogenet@ucsd.edu")
st.header("Select a model:")
spacy_model = st.selectbox("Model", ["grc_proiel_lg","grc_proiel_trf","grc_proiel_sm","grc_perseus_lg","grc_perseus_trf","grc_perseus_sm","grc_ner_trf"])
st.header("Enter text:")
text = st.text_area("Greek text","ἐπὶ τοῦτον δὴ τὸν Ἄμασιν Καμβύσης ὁ Κύρου ἐστρατεύετο, ἄγων καί ἄλλους τῶν ἦρχε καὶ Ἑλλήνων Ἴωνάς τε καὶ Αἰολέας.")
nlp = spacy.load(spacy_model)
doc = nlp(text)
def get_html(html: str):
"""Convert HTML so it can be rendered."""
WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
html = html.replace("\n", " ")
return WRAPPER.format(html)
def get_svg(svg: str, style: str = "", wrap: bool = True):
"""Convert an SVG to a base64-encoded image."""
b64 = base64.b64encode(svg.encode("utf-8")).decode("utf-8")
html = f'<img src="data:image/svg+xml;base64,{b64}" style="{style}"/>'
return get_html(html) if wrap else html
def visualize_parser(
doc: spacy.tokens.Doc,
*,
title: Optional[str] = "Dependency parse & part of speech:",
key: Optional[str] = None,
) -> None:
"""Visualizer for dependency parses."""
if title:
st.header(title)
cols = st.columns(4)
split_sents = cols[0].checkbox(
"Split sentences", value=True, key=f"{key}_parser_split_sents"
)
options = {
"collapse_punct": cols[1].checkbox(
"Collapse punct", value=True, key=f"{key}_parser_collapse_punct"
),
"compact": cols[3].checkbox("Compact mode", value=True, key=f"{key}_parser_compact"),
}
docs = [span.as_doc() for span in doc.sents] if split_sents else [doc]
for sent in docs:
html = displacy.render(sent, options=options, style="dep")
html = html.replace("\n\n", "\n")
if split_sents and len(docs) > 1:
st.markdown(f"> {sent.text}")
st.write(get_svg(html), unsafe_allow_html=True)
visualize_parser(doc)
visualize_ner(
doc,
labels=["PERSON","LOC","NORP","GOD","LANGUAGE"],
show_table=False,
title="Persons, locations, groups, gods, and languages",
)
visualize_tokens(doc, attrs=["text", "lemma_", "pos_", "dep_","ent_type_"], title="Table view:", key="tokens")
|