Spaces:
Runtime error
Runtime error
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer | |
import re | |
# Path to your model's checkpoints | |
model_checkpoint_path = "barbaroo/nllb_200_600M_fo_en" | |
# Load the tokenizer and model | |
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint_path) | |
model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint_path) | |
def split_into_sentences(text): | |
# This simple function splits text into sentences using regular expressions | |
# that capture punctuation marks followed by space and a capital letter. | |
sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s', text) | |
return sentences | |
def translate(text, model, tokenizer, max_length=80): | |
# Split long text into sentences | |
sentences = split_into_sentences(text) | |
translated_text = [] | |
# Process each sentence separately | |
for sentence in sentences: | |
inputs = tokenizer.encode(sentence, return_tensors="pt", max_length=max_length, truncation=True) | |
outputs = model.generate(inputs, max_length=max_length, num_beams=4, early_stopping=True) | |
translated_sentence = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
translated_text.append(translated_sentence) | |
# Join the translated sentences back into a single string | |
return " ".join(translated_text) | |
# Gradio Interface setup | |
# Ensure Gradio is installed | |
# Importing Gradio | |
import gradio as gr | |
# Define the Gradio interface | |
def gradio_translate(text): | |
return translate(text, model, tokenizer) | |
iface = gr.Interface(fn=gradio_translate, | |
inputs="text", | |
outputs="text", | |
title="Faroese to English Translator", | |
description="Translate Faroese text to English using a state-of-the-art model.") | |
# Launch the interface | |
iface.launch() |