import gradio as gr from pathlib import Path from pypdf import PdfReader from transformers.utils import logging logging.set_verbosity_error() from transformers import pipeline import torch device = "cuda:0" if torch.cuda.is_available() else "cpu" summarizer = pipeline( task="summarization", model="facebook/bart-large-cnn", torch_dtype=torch.bfloat16, device=device ) narrator = pipeline("text-to-speech", model="suno/bark-small", device=device) def upload_file(filepath): file_path = Path(filepath).name return file_path def pdf_to_audio(file_output): pdf_summary = "" pdf_reader = PdfReader(file_output) for page in pdf_reader.pages: page_summary = summarizer(page.extract_text(), min_length=10, max_length=100) pdf_summary = pdf_summary + page_summary[0]["summary_text"] + " " narrated_text = narrator(pdf_summary) return gr.Audio((narrated_text["sampling_rate"], narrated_text["audio"].T)) with gr.Blocks() as demo: file_output = gr.File() upload_button = gr.UploadButton( "Click to upload your PDF file", file_types=["file"], file_count="single" ) upload_button.upload(upload_file, upload_button, file_output) audio = gr.Interface( fn=pdf_to_audio, inputs=file_output, outputs="audio", ) if __name__ == "__main__": demo.launch(debug=True)