Spaces:
Runtime error
Runtime error
import streamlit as st | |
from transformers import AutoModelForSequenceClassification, AutoTokenizer | |
import torch | |
import numpy as np | |
import fitz # PyMuPDF | |
import pandas as pd | |
import io | |
# Load the model and tokenizer from Hugging Face | |
model_name = "KevSun/Engessay_grading_ML" | |
model = AutoModelForSequenceClassification.from_pretrained(model_name) | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
# Streamlit app | |
st.title("Automated Scoring App") | |
st.write("Enter your English essay below to predict scores from multiple dimensions:") | |
# Replace text input with file uploader | |
uploaded_file = st.file_uploader("Upload your PDF essay:", type=['pdf']) | |
if uploaded_file: | |
# Convert uploaded file to bytes for fitz | |
pdf_bytes = uploaded_file.read() | |
# Read and display PDF content | |
with fitz.open(stream=pdf_bytes, filetype="pdf") as doc: | |
text_content = "" | |
for page in doc: | |
text_content += page.get_text() | |
# Display the extracted text | |
st.write("Extracted text from PDF:") | |
st.text_area("PDF Content", text_content, height=200, disabled=True) | |
if st.button("Predict"): | |
if uploaded_file: | |
# Use the already extracted text_content for prediction | |
# Tokenize input text with truncation | |
inputs = tokenizer( | |
text_content, | |
return_tensors="pt", | |
truncation=True, | |
max_length=512 # Standard BERT/RoBERTa max length | |
) | |
# After tokenization | |
token_count = len(inputs['input_ids'][0]) | |
if token_count == 512: | |
st.warning("⚠️ The text was too long and has been truncated to fit the model's maximum length. This might affect the accuracy of the predictions.") | |
# Get predictions from the model | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
# Extract and process predictions | |
predictions = outputs.logits.squeeze() | |
predicted_scores = predictions.numpy() | |
# Scale the predictions | |
scaled_scores = 2.25 * predicted_scores - 1.25 | |
rounded_scores = [round(score * 2) / 2 for score in scaled_scores] | |
# Create results DataFrame | |
labels = ["cohesion", "syntax", "vocabulary", "phraseology", "grammar", "conventions"] | |
results_dict = { | |
'Dimension': labels, | |
'Score': rounded_scores | |
} | |
df = pd.DataFrame(results_dict) | |
# Display results in app | |
st.write("Scores:") | |
st.dataframe(df) | |
# Save CSV locally | |
local_path = "essay_scores.csv" | |
df.to_csv(local_path, index=False) | |
st.success(f"Results saved locally to {local_path}") | |
# Create download button for CSV | |
csv = df.to_csv(index=False) | |
st.download_button( | |
label="Download results as CSV", | |
data=csv, | |
file_name="essay_scores.csv", | |
mime="text/csv" | |
) | |
else: | |
st.write("Please upload a PDF file to get scores.") | |