Spaces:
Sleeping
Sleeping
import fitz # PyMuPDF | |
from transformers import VitsModel, MBartForConditionalGeneration, AutoTokenizer | |
import torch | |
import soundfile as sf | |
import gradio as gr | |
# Load the translation model and tokenizer | |
translation_tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-50-one-to-many-mmt", use_fast=False) | |
translation_model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-one-to-many-mmt") | |
# Load the TTS model and tokenizer | |
tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-hin") | |
tts_model = VitsModel.from_pretrained("facebook/mms-tts-hin") | |
def extract_text_from_pdf(pdf_file): | |
"""Extract text from a PDF file.""" | |
doc = fitz.open(pdf_file) | |
text = "" | |
for page in doc: | |
text += page.get_text() | |
return text | |
def process_pdf(pdf_file): | |
# Extract text from the PDF | |
input_text = extract_text_from_pdf(pdf_file) | |
# Convert sentences to tensors | |
model_inputs = translation_tokenizer(input_text, return_tensors="pt", padding=True, truncation=True) | |
# Translate from English to Hindi | |
generated_tokens = translation_model.generate( | |
**model_inputs, | |
forced_bos_token_id=translation_tokenizer.lang_code_to_id["hi_IN"] | |
) | |
# Decode the translated tokens to text | |
translation = translation_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True) | |
translated_text = " ".join(translation) # Join all translated sentences | |
# Tokenize the translated text for TTS | |
tts_inputs = tts_tokenizer(translated_text, return_tensors="pt") | |
# Generate the waveform | |
try: | |
with torch.no_grad(): | |
tts_output = tts_model(**tts_inputs) | |
waveform = tts_output.waveform.squeeze().cpu().numpy() | |
except RuntimeError as e: | |
return f"Runtime Error: {e}" | |
# Save the waveform to an audio file | |
audio_path = "output.wav" | |
sf.write(audio_path, waveform, 22050) | |
return audio_path | |
def gradio_interface(pdf_file): | |
audio_path = process_pdf(pdf_file.name) | |
return audio_path | |
# Create the Gradio interface | |
iface = gr.Interface( | |
fn=gradio_interface, | |
inputs=gr.File(file_count="single"), | |
outputs="audio" | |
) | |
# Launch the Gradio app | |
iface.launch(debug=True) | |