Spaces:

barbaroo
/

Faroese_English_Ukranian_Translator

Runtime error

App Files Files Community

Faroese_English_Ukranian_Translator / app.py

barbaroo

Update app.py

79787d6 verified 4 months ago

raw

history blame

No virus

4.3 kB

	import spaces
	import gradio as gr
	from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
	import nltk
	from nltk.tokenize import sent_tokenize
	import torch

	# Initialize and download necessary NLTK resources
	nltk.download('punkt')

	# Load the models and tokenizers
	model_checkpoint_fo_en = "barbaroo/nllb_200_600M_fo_en"
	model_checkpoint_en_fo = "barbaroo/nllb_200_600M_en_fo"
	model_checkpoint_uk_en = "Helsinki-NLP/opus-mt-uk-en"
	model_checkpoint_en_uk = "Helsinki-NLP/opus-mt-en-uk"

	tokenizer_fo_en = AutoTokenizer.from_pretrained(model_checkpoint_fo_en)
	model_fo_en = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint_fo_en)

	tokenizer_en_fo = AutoTokenizer.from_pretrained(model_checkpoint_en_fo)
	model_en_fo = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint_en_fo)

	tokenizer_uk_en = AutoTokenizer.from_pretrained(model_checkpoint_uk_en)
	model_uk_en = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint_uk_en)

	tokenizer_en_uk = AutoTokenizer.from_pretrained(model_checkpoint_en_uk)
	model_en_uk = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint_en_uk)

	# Check if a GPU is available and move models to GPU if possible
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	print(f"Using device: {device}")

	if torch.cuda.is_available():
	print("GPU is available. Initializing models on GPU.")
	model_fo_en.to(device)
	model_en_fo.to(device)
	model_uk_en.to(device)
	model_en_uk.to(device)
	else:
	print("GPU is not available. Using CPU.")

	def split_into_sentences(text):
	return sent_tokenize(text)

	@spaces.GPU
	def translate(text, model, tokenizer, max_length=80):
	# Ensure model is on the correct device
	model.to(device)

	sentences = split_into_sentences(text)
	translated_text = []

	for sentence in sentences:
	# Move inputs to the correct device
	inputs = tokenizer.encode(sentence, return_tensors="pt", max_length=max_length, truncation=True).to(device)
	print(f"Input tensor device: {inputs.device}") # Debug statement

	# Model inference on the GPU
	outputs = model.generate(inputs, max_length=max_length, num_beams=4, early_stopping=True)
	print(f"Output tensor device: {outputs.device}") # Debug statement

	# Move outputs back to CPU for decoding
	translated_sentence = tokenizer.decode(outputs[0].cpu(), skip_special_tokens=True)
	translated_text.append(translated_sentence)

	return " ".join(translated_text)

	def handle_input(text, file, direction):
	if file is not None:
	# Decode the file bytes directly
	text = file.decode("utf-8")

	if direction == "fo_en":
	model = model_fo_en
	tokenizer = tokenizer_fo_en
	elif direction == "en_fo":
	model = model_en_fo
	tokenizer = tokenizer_en_fo
	elif direction == "uk_en":
	model = model_uk_en
	tokenizer = tokenizer_uk_en
	elif direction == "en_uk":
	model = model_en_uk
	tokenizer = tokenizer_en_uk
	elif direction == "uk_fo":
	# Ukrainian to Faroese via English pivot
	model = model_uk_en
	tokenizer = tokenizer_uk_en
	text = translate(text, model, tokenizer)
	model = model_en_fo
	tokenizer = tokenizer_en_fo
	elif direction == "fo_uk":
	# Faroese to Ukrainian via English pivot
	model = model_fo_en
	tokenizer = tokenizer_fo_en
	text = translate(text, model, tokenizer)
	model = model_en_uk
	tokenizer = tokenizer_en_uk

	# Translate the text if it's not empty
	if text:
	return translate(text, model, tokenizer)
	else:
	return "Please enter text or upload a text file."

	# Define the Gradio interface
	iface = gr.Interface(
	fn=handle_input,
	inputs=[
	gr.Textbox(lines=2, placeholder="Type here or upload a text file..."),
	gr.File(label="or Upload Text File", type="binary"),
	gr.Dropdown(label="Translation Direction", choices=["fo_en", "en_fo", "uk_en", "en_uk", "uk_fo", "fo_uk"], value="fo_en")
	],
	outputs="text",
	title="Multilingual Translator",
	description="Enter text directly or upload a text file (.txt) to translate between Faroese, Ukrainian, and English."
	)

	# Launch the interface
	iface.launch()