Kartheesh's picture
Update app.py
5803861 verified
import fitz # PyMuPDF
from transformers import VitsModel, MBartForConditionalGeneration, AutoTokenizer
import torch
import soundfile as sf
import gradio as gr
# Load the translation model and tokenizer
translation_tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-50-one-to-many-mmt", use_fast=False)
translation_model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-one-to-many-mmt")
# Load the TTS model and tokenizer
tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-hin")
tts_model = VitsModel.from_pretrained("facebook/mms-tts-hin")
def extract_text_from_pdf(pdf_file):
"""Extract text from a PDF file."""
doc = fitz.open(pdf_file)
text = ""
for page in doc:
text += page.get_text()
return text
def process_pdf(pdf_file):
# Extract text from the PDF
input_text = extract_text_from_pdf(pdf_file)
# Convert sentences to tensors
model_inputs = translation_tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
# Translate from English to Hindi
generated_tokens = translation_model.generate(
**model_inputs,
forced_bos_token_id=translation_tokenizer.lang_code_to_id["hi_IN"]
)
# Decode the translated tokens to text
translation = translation_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
translated_text = " ".join(translation) # Join all translated sentences
# Tokenize the translated text for TTS
tts_inputs = tts_tokenizer(translated_text, return_tensors="pt")
# Generate the waveform
try:
with torch.no_grad():
tts_output = tts_model(**tts_inputs)
waveform = tts_output.waveform.squeeze().cpu().numpy()
except RuntimeError as e:
return f"Runtime Error: {e}"
# Save the waveform to an audio file
audio_path = "output.wav"
sf.write(audio_path, waveform, 22050)
return audio_path
def gradio_interface(pdf_file):
audio_path = process_pdf(pdf_file.name)
return audio_path
# Create the Gradio interface
iface = gr.Interface(
fn=gradio_interface,
inputs=gr.File(file_count="single"),
outputs="audio"
)
# Launch the Gradio app
iface.launch(debug=True)