Spaces:
Running
Running
# app.py | |
import os | |
os.system('python download.py') | |
from transformers import T5Tokenizer, T5ForConditionalGeneration | |
import gradio as gr | |
import nltk | |
from nltk.tokenize import sent_tokenize, word_tokenize | |
from difflib import SequenceMatcher | |
# Download necessary resources | |
nltk.download('punkt') | |
nltk.download('averaged_perceptron_tagger') | |
nltk.download('wordnet') | |
# Load a pre-trained T5 model specifically fine-tuned for grammar correction | |
tokenizer = T5Tokenizer.from_pretrained("prithivida/grammar_error_correcter_v1") | |
model = T5ForConditionalGeneration.from_pretrained("prithivida/grammar_error_correcter_v1") | |
# Function to perform grammar correction | |
def grammar_check(text): | |
sentences = sent_tokenize(text) | |
corrected_sentences = [] | |
for sentence in sentences: | |
input_text = f"gec: {sentence}" | |
input_ids = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True) | |
outputs = model.generate(input_ids, max_length=512, num_beams=4, early_stopping=True) | |
corrected_sentence = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
corrected_sentences.append(corrected_sentence) | |
# Function to underline and color revised parts | |
def underline_and_color_revisions(original, corrected): | |
diff = SequenceMatcher(None, original.split(), corrected.split()) | |
result = [] | |
for tag, i1, i2, j1, j2 in diff.get_opcodes(): | |
if tag == 'insert': | |
result.append(f"<u style='color:red;'>{' '.join(corrected.split()[j1:j2])}</u>") | |
elif tag == 'replace': | |
result.append(f"<u style='color:red;'>{' '.join(corrected.split()[j1:j2])}</u>") | |
elif tag == 'equal': | |
result.append(' '.join(original.split()[i1:i2])) | |
return " ".join(result) | |
corrected_text = " ".join( | |
underline_and_color_revisions(orig, corr) for orig, corr in zip(sentences, corrected_sentences) | |
) | |
return corrected_text | |
# Create Gradio interface with a writing prompt | |
interface = gr.Interface( | |
fn=grammar_check, | |
inputs="text", | |
outputs="html", # Output type is HTML | |
title="Grammar Checker", | |
description=( | |
"Enter text to check for grammar mistakes.\n\n" | |
"Writing Prompt:\n" | |
"In the story, Alex and his friends discovered an ancient treasure in Whispering Hollow and decided to donate the artifacts to the local museum.\n\n" | |
"In the past, did you have a similar experience where you found something valuable or interesting? Tell the story. Describe what you found, what you did with it, and how you felt about your decision.\n\n" | |
"Remember to use past tense in your writing.\n\n" | |
"<b>A student's sample answer:</b>\n" | |
"<blockquote>When I was 10, I find an old coin in my backyard. I kept it for a while and shows it to my friends. They was impressed and say it might be valuable. Later, I take it to a local antique shop, and the owner told me it was very old. I decided to give it to the museum in my town. The museum was happy and put it on display. I feel proud of my decision.<br><br><i>Copy and paste to try.</i></blockquote>" | |
) | |
) | |
# Launch the interface | |
interface.launch() | |