devhem's picture
Create app.py
cace677 verified
import streamlit as st
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import numpy as np
import fitz # PyMuPDF
import pandas as pd
import io
# Load the model and tokenizer from Hugging Face
model_name = "KevSun/Engessay_grading_ML"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Streamlit app
st.title("Automated Scoring App")
st.write("Enter your English essay below to predict scores from multiple dimensions:")
# Replace text input with file uploader
uploaded_file = st.file_uploader("Upload your PDF essay:", type=['pdf'])
if uploaded_file:
# Convert uploaded file to bytes for fitz
pdf_bytes = uploaded_file.read()
# Read and display PDF content
with fitz.open(stream=pdf_bytes, filetype="pdf") as doc:
text_content = ""
for page in doc:
text_content += page.get_text()
# Display the extracted text
st.write("Extracted text from PDF:")
st.text_area("PDF Content", text_content, height=200, disabled=True)
if st.button("Predict"):
if uploaded_file:
# Use the already extracted text_content for prediction
# Tokenize input text with truncation
inputs = tokenizer(
text_content,
return_tensors="pt",
truncation=True,
max_length=512 # Standard BERT/RoBERTa max length
)
# After tokenization
token_count = len(inputs['input_ids'][0])
if token_count == 512:
st.warning("⚠️ The text was too long and has been truncated to fit the model's maximum length. This might affect the accuracy of the predictions.")
# Get predictions from the model
with torch.no_grad():
outputs = model(**inputs)
# Extract and process predictions
predictions = outputs.logits.squeeze()
predicted_scores = predictions.numpy()
# Scale the predictions
scaled_scores = 2.25 * predicted_scores - 1.25
rounded_scores = [round(score * 2) / 2 for score in scaled_scores]
# Create results DataFrame
labels = ["cohesion", "syntax", "vocabulary", "phraseology", "grammar", "conventions"]
results_dict = {
'Dimension': labels,
'Score': rounded_scores
}
df = pd.DataFrame(results_dict)
# Display results in app
st.write("Scores:")
st.dataframe(df)
# Save CSV locally
local_path = "essay_scores.csv"
df.to_csv(local_path, index=False)
st.success(f"Results saved locally to {local_path}")
# Create download button for CSV
csv = df.to_csv(index=False)
st.download_button(
label="Download results as CSV",
data=csv,
file_name="essay_scores.csv",
mime="text/csv"
)
else:
st.write("Please upload a PDF file to get scores.")