ALDi / app.py
AMR-KELEG's picture
Switch to tab view
604feee
raw
history blame
2.15 kB
# Hint: this cheatsheet is magic! https://cheat-sheet.streamlit.app/
import constants
import numpy as np
import pandas as pd
import streamlit as st
from transformers import BertForSequenceClassification, AutoTokenizer
import random
import altair as alt
from altair import X, Y, Scale
@st.cache_data
def convert_df(df):
# IMPORTANT: Cache the conversion to prevent computation on every rerun
return df.to_csv(index=None).encode("utf-8")
def compute_ALDi(inputs):
return random.randint(0, 100) / 100
st.title(constants.TITLE)
tab1, tab2 = st.tabs(["Input a Sentence", "Upload a File"])
with tab1:
sent = st.text_input("Arabic Sentence:", placeholder="Enter an Arabic sentence.")
# TODO: Check if this is needed!
st.button("Submit")
if sent:
ALDi_score = compute_ALDi(sent)
st.write(ALDi_score)
with tab2:
file = st.file_uploader("Upload a file", type=["txt"])
if file is not None:
df = pd.read_csv(file, sep="\t", header=None)
df.columns = ["Sentence"]
df = pd.concat([df, df, df])
df = pd.concat([df, df, df])
df = pd.concat([df, df, df])
df.reset_index(drop=True, inplace=True)
# TODO: Run the model
df["ALDi"] = df["Sentence"].apply(lambda s: compute_ALDi(s))
# A horizontal rule
st.markdown("""---""")
chart = (
alt.Chart(df.reset_index())
.mark_area(color="violet", opacity=0.5)
.encode(
x=X(field="index", title="Sentence Index"),
y=Y("ALDi", scale=Scale(domain=[0, 1]))
)
)
st.altair_chart(chart.interactive(), use_container_width=True)
col1, col2 = st.columns([4, 1])
with col1:
# Display the output
st.table(
df,
)
with col2:
# Add a download button
csv = convert_df(df)
st.download_button(
label=":file_folder: Download predictions as CSV",
data=csv,
file_name="ALDi_scores.csv",
mime="text/csv",
)