Spaces:
Runtime error
Runtime error
File size: 4,303 Bytes
acc5ceb c91f330 8b08dff 6a7f812 8b08dff c91f330 e7c4cd7 c939e8c 79787d6 c939e8c e7c4cd7 79787d6 9c1ca18 0e80915 9c1ca18 79787d6 9c1ca18 ffac277 8b08dff ffac277 c7ddbe5 9c1ca18 79787d6 0e80915 ffac277 9c1ca18 ffac277 0e80915 6a7f812 0e80915 9c1ca18 0e80915 ffac277 e7c4cd7 c939e8c c91f330 c939e8c 9c1ca18 79787d6 9c1ca18 79787d6 c939e8c c91f330 9c1ca18 c91f330 c939e8c 79787d6 c939e8c 79787d6 c939e8c e7c4cd7 c91f330 0e80915 9c1ca18 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
import spaces
import gradio as gr
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import nltk
from nltk.tokenize import sent_tokenize
import torch
# Initialize and download necessary NLTK resources
nltk.download('punkt')
# Load the models and tokenizers
model_checkpoint_fo_en = "barbaroo/nllb_200_600M_fo_en"
model_checkpoint_en_fo = "barbaroo/nllb_200_600M_en_fo"
model_checkpoint_uk_en = "Helsinki-NLP/opus-mt-uk-en"
model_checkpoint_en_uk = "Helsinki-NLP/opus-mt-en-uk"
tokenizer_fo_en = AutoTokenizer.from_pretrained(model_checkpoint_fo_en)
model_fo_en = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint_fo_en)
tokenizer_en_fo = AutoTokenizer.from_pretrained(model_checkpoint_en_fo)
model_en_fo = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint_en_fo)
tokenizer_uk_en = AutoTokenizer.from_pretrained(model_checkpoint_uk_en)
model_uk_en = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint_uk_en)
tokenizer_en_uk = AutoTokenizer.from_pretrained(model_checkpoint_en_uk)
model_en_uk = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint_en_uk)
# Check if a GPU is available and move models to GPU if possible
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if torch.cuda.is_available():
print("GPU is available. Initializing models on GPU.")
model_fo_en.to(device)
model_en_fo.to(device)
model_uk_en.to(device)
model_en_uk.to(device)
else:
print("GPU is not available. Using CPU.")
def split_into_sentences(text):
return sent_tokenize(text)
@spaces.GPU
def translate(text, model, tokenizer, max_length=80):
# Ensure model is on the correct device
model.to(device)
sentences = split_into_sentences(text)
translated_text = []
for sentence in sentences:
# Move inputs to the correct device
inputs = tokenizer.encode(sentence, return_tensors="pt", max_length=max_length, truncation=True).to(device)
print(f"Input tensor device: {inputs.device}") # Debug statement
# Model inference on the GPU
outputs = model.generate(inputs, max_length=max_length, num_beams=4, early_stopping=True)
print(f"Output tensor device: {outputs.device}") # Debug statement
# Move outputs back to CPU for decoding
translated_sentence = tokenizer.decode(outputs[0].cpu(), skip_special_tokens=True)
translated_text.append(translated_sentence)
return " ".join(translated_text)
def handle_input(text, file, direction):
if file is not None:
# Decode the file bytes directly
text = file.decode("utf-8")
if direction == "fo_en":
model = model_fo_en
tokenizer = tokenizer_fo_en
elif direction == "en_fo":
model = model_en_fo
tokenizer = tokenizer_en_fo
elif direction == "uk_en":
model = model_uk_en
tokenizer = tokenizer_uk_en
elif direction == "en_uk":
model = model_en_uk
tokenizer = tokenizer_en_uk
elif direction == "uk_fo":
# Ukrainian to Faroese via English pivot
model = model_uk_en
tokenizer = tokenizer_uk_en
text = translate(text, model, tokenizer)
model = model_en_fo
tokenizer = tokenizer_en_fo
elif direction == "fo_uk":
# Faroese to Ukrainian via English pivot
model = model_fo_en
tokenizer = tokenizer_fo_en
text = translate(text, model, tokenizer)
model = model_en_uk
tokenizer = tokenizer_en_uk
# Translate the text if it's not empty
if text:
return translate(text, model, tokenizer)
else:
return "Please enter text or upload a text file."
# Define the Gradio interface
iface = gr.Interface(
fn=handle_input,
inputs=[
gr.Textbox(lines=2, placeholder="Type here or upload a text file..."),
gr.File(label="or Upload Text File", type="binary"),
gr.Dropdown(label="Translation Direction", choices=["fo_en", "en_fo", "uk_en", "en_uk", "uk_fo", "fo_uk"], value="fo_en")
],
outputs="text",
title="Multilingual Translator",
description="Enter text directly or upload a text file (.txt) to translate between Faroese, Ukrainian, and English."
)
# Launch the interface
iface.launch()
|