Spaces:
Runtime error
Runtime error
File size: 2,843 Bytes
343c3c7 c91f330 8b08dff 6a7f812 8b08dff c91f330 e7c4cd7 c939e8c e7c4cd7 6a7f812 ffac277 8b08dff ffac277 8b08dff ffac277 6a7f812 ffac277 e7c4cd7 c939e8c c91f330 c939e8c c91f330 c939e8c e7c4cd7 c91f330 c939e8c 343c3c7 be8987f e311694 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
'''
import gradio as gr
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import nltk
from nltk.tokenize import sent_tokenize
import torch
# Initialize and download necessary NLTK resources
nltk.download('punkt')
# Load the models and tokenizers
model_checkpoint_fo_en = "barbaroo/nllb_200_600M_fo_en"
model_checkpoint_en_fo = "barbaroo/nllb_200_600M_en_fo"
tokenizer_fo_en = AutoTokenizer.from_pretrained(model_checkpoint_fo_en)
model_fo_en = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint_fo_en)
tokenizer_en_fo = AutoTokenizer.from_pretrained(model_checkpoint_en_fo)
model_en_fo = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint_en_fo)
# Check if a GPU is available and move models to GPU if possible
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if torch.cuda.is_available():
print("GPU is available. Initializing models on GPU.")
model_fo_en.to(device)
model_en_fo.to(device)
else:
print("GPU is not available. Using CPU.")
def split_into_sentences(text):
return sent_tokenize(text)
def translate(text, model, tokenizer, max_length=80):
sentences = split_into_sentences(text)
translated_text = []
for sentence in sentences:
inputs = tokenizer.encode(sentence, return_tensors="pt", max_length=max_length, truncation=True).to(device)
outputs = model.generate(inputs, max_length=max_length, num_beams=4, early_stopping=True)
translated_sentence = tokenizer.decode(outputs[0], skip_special_tokens=True)
translated_text.append(translated_sentence)
return " ".join(translated_text)
def handle_input(text, file, direction):
if file is not None:
# Decode the file bytes directly
text = file.decode("utf-8")
if direction == "fo_en":
model = model_fo_en
tokenizer = tokenizer_fo_en
else:
model = model_en_fo
tokenizer = tokenizer_en_fo
# Translate the text if it's not empty
if text:
return translate(text, model, tokenizer)
else:
return "Please enter text or upload a text file."
# Define the Gradio interface
iface = gr.Interface(
fn=handle_input,
inputs=[
gr.Textbox(lines=2, placeholder="Type here or upload a text file..."),
gr.File(label="or Upload Text File", type="binary"),
gr.Dropdown(label="Translation Direction", choices=["fo_en", "en_fo"], value="fo_en")
],
outputs="text",
title="Bidirectional Translator",
description="Enter text directly or upload a text file (.txt) to translate between Faroese and English."
)
# Launch the interface
iface.launch()
'''
import torch
print(f"Is CUDA available: {torch.cuda.is_available()}")
# True
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}") |