Spaces:
Running
on
T4
Running
on
T4
knowsuchagency
commited on
Commit
•
1a8723f
1
Parent(s):
7a115e0
chore: Refactor PDF file reading in generate_audio function
Browse files
main.py
CHANGED
@@ -12,6 +12,10 @@ from promptic import llm
|
|
12 |
from pydantic import BaseModel, ValidationError
|
13 |
from pypdf import PdfReader
|
14 |
from tenacity import retry, retry_if_exception_type
|
|
|
|
|
|
|
|
|
15 |
|
16 |
sentry_sdk.init(os.getenv("SENTRY_DSN"))
|
17 |
|
@@ -61,7 +65,7 @@ def generate_dialogue(text: str) -> Dialogue:
|
|
61 |
Now that you have brainstormed ideas and created a rough outline, it's time to write the actual podcast dialogue. Aim for a natural, conversational flow between the host and any guest speakers. Incorporate the best ideas from your brainstorming session and make sure to explain any complex topics in an easy-to-understand way.
|
62 |
|
63 |
<podcast_dialogue>
|
64 |
-
Write your engaging, informative podcast dialogue here, based on the key points and creative ideas you came up with during the brainstorming session. Use a conversational tone and include any necessary context or explanations to make the content accessible to a general audience.
|
65 |
</podcast_dialogue>
|
66 |
"""
|
67 |
|
@@ -102,7 +106,6 @@ def generate_audio(file: str, openai_api_key: str = None) -> bytes:
|
|
102 |
futures = []
|
103 |
for line in llm_output.dialogue:
|
104 |
transcript_line = f"{line.speaker}: {line.text}"
|
105 |
-
logger.info(transcript_line)
|
106 |
future = executor.submit(get_mp3, line.text, line.voice, openai_api_key)
|
107 |
futures.append((future, transcript_line))
|
108 |
characters += len(line.text)
|
@@ -114,7 +117,23 @@ def generate_audio(file: str, openai_api_key: str = None) -> bytes:
|
|
114 |
|
115 |
logger.info(f"Generated {characters} characters of audio")
|
116 |
|
117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
|
119 |
|
120 |
description = """
|
@@ -122,8 +141,6 @@ description = """
|
|
122 |
<strong>Convert any PDF into a podcast episode! Experience research papers, websites, and more in a whole new way.</strong>
|
123 |
<br>
|
124 |
<a href="https://github.com/knowsuchagency/pdf-to-podcast">knowsuchagency/pdf-to-podcast</a>
|
125 |
-
<br>
|
126 |
-
<em>Note: audio doesn't work in Safari</em>
|
127 |
</p>
|
128 |
"""
|
129 |
|
|
|
12 |
from pydantic import BaseModel, ValidationError
|
13 |
from pypdf import PdfReader
|
14 |
from tenacity import retry, retry_if_exception_type
|
15 |
+
from tempfile import NamedTemporaryFile
|
16 |
+
import glob
|
17 |
+
import os
|
18 |
+
import time
|
19 |
|
20 |
sentry_sdk.init(os.getenv("SENTRY_DSN"))
|
21 |
|
|
|
65 |
Now that you have brainstormed ideas and created a rough outline, it's time to write the actual podcast dialogue. Aim for a natural, conversational flow between the host and any guest speakers. Incorporate the best ideas from your brainstorming session and make sure to explain any complex topics in an easy-to-understand way.
|
66 |
|
67 |
<podcast_dialogue>
|
68 |
+
Write your engaging, informative podcast dialogue here, based on the key points and creative ideas you came up with during the brainstorming session. Use a conversational tone and include any necessary context or explanations to make the content accessible to a general audience. Don't include variable brackets like `[Host Name]` or `[Guest Name]`. Use made-up names for the hosts and guests to create a more engaging and immersive experience for listeners. Design your output to be read aloud -- it will be directly converted into audio.
|
69 |
</podcast_dialogue>
|
70 |
"""
|
71 |
|
|
|
106 |
futures = []
|
107 |
for line in llm_output.dialogue:
|
108 |
transcript_line = f"{line.speaker}: {line.text}"
|
|
|
109 |
future = executor.submit(get_mp3, line.text, line.voice, openai_api_key)
|
110 |
futures.append((future, transcript_line))
|
111 |
characters += len(line.text)
|
|
|
117 |
|
118 |
logger.info(f"Generated {characters} characters of audio")
|
119 |
|
120 |
+
temporary_directory = "./gradio_cached_examples/tmp/"
|
121 |
+
os.makedirs(temporary_directory, exist_ok=True)
|
122 |
+
|
123 |
+
temporary_file = NamedTemporaryFile(
|
124 |
+
dir=temporary_directory,
|
125 |
+
delete=False,
|
126 |
+
suffix=".mp3",
|
127 |
+
)
|
128 |
+
temporary_file.write(audio)
|
129 |
+
temporary_file.close()
|
130 |
+
|
131 |
+
# Delete any files in the temp directory that end with .mp3 and are over a day old
|
132 |
+
for file in glob.glob(f"{temporary_directory}*.mp3"):
|
133 |
+
if os.path.isfile(file) and time.time() - os.path.getmtime(file) > 24 * 60 * 60:
|
134 |
+
os.remove(file)
|
135 |
+
|
136 |
+
return temporary_file.name, transcript
|
137 |
|
138 |
|
139 |
description = """
|
|
|
141 |
<strong>Convert any PDF into a podcast episode! Experience research papers, websites, and more in a whole new way.</strong>
|
142 |
<br>
|
143 |
<a href="https://github.com/knowsuchagency/pdf-to-podcast">knowsuchagency/pdf-to-podcast</a>
|
|
|
|
|
144 |
</p>
|
145 |
"""
|
146 |
|