Spaces:
Running
on
T4
Running
on
T4
knowsuchagency
commited on
Commit
•
672cb3f
1
Parent(s):
5c2ba64
pdf's now work
Browse files- main.py +28 -18
- requirements.txt +2 -1
main.py
CHANGED
@@ -6,20 +6,9 @@ from loguru import logger
|
|
6 |
from openai import OpenAI
|
7 |
from promptic import llm
|
8 |
from pydantic import BaseModel
|
|
|
9 |
|
10 |
|
11 |
-
def get_mp3(text: str, voice: str) -> bytes:
|
12 |
-
client = OpenAI()
|
13 |
-
|
14 |
-
with client.audio.speech.with_streaming_response.create(
|
15 |
-
model="tts-1",
|
16 |
-
voice=voice,
|
17 |
-
input=text,
|
18 |
-
) as response:
|
19 |
-
with io.BytesIO() as file:
|
20 |
-
for chunk in response.iter_bytes():
|
21 |
-
file.write(chunk)
|
22 |
-
return file.getvalue()
|
23 |
|
24 |
|
25 |
class DialogueItem(BaseModel):
|
@@ -32,7 +21,7 @@ class Dialogue(BaseModel):
|
|
32 |
dialogue: List[DialogueItem]
|
33 |
|
34 |
|
35 |
-
@llm(model="gemini/gemini-1.5-
|
36 |
def generate_dialogue(text: str) -> Dialogue:
|
37 |
"""
|
38 |
Your task is to take the input text provided and turn it into an engaging, informative podcast dialogue. The input text may be messy or unstructured, as it could come from a variety of sources like PDFs or web pages.
|
@@ -58,8 +47,25 @@ def generate_dialogue(text: str) -> Dialogue:
|
|
58 |
Write your engaging, informative podcast dialogue based on the key points and creative ideas you came up with during the brainstorming session. Use a conversational tone and include any necessary context or explanations to make the content accessible to a general audience.
|
59 |
"""
|
60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
-
def generate_audio(
|
|
|
|
|
|
|
|
|
63 |
|
64 |
llm_output = generate_dialogue(text)
|
65 |
logger.info(llm_output)
|
@@ -84,10 +90,14 @@ def generate_audio(text: str) -> bytes:
|
|
84 |
demo = gr.Interface(
|
85 |
fn=generate_audio,
|
86 |
inputs=[
|
87 |
-
gr.
|
88 |
-
label="Input
|
89 |
-
|
90 |
-
)
|
|
|
|
|
|
|
|
|
91 |
# gr.Textbox(
|
92 |
# label="Male Voice",
|
93 |
# value="1m3E2x7boso3AU9J3woJ",
|
|
|
6 |
from openai import OpenAI
|
7 |
from promptic import llm
|
8 |
from pydantic import BaseModel
|
9 |
+
from pypdf import PdfReader
|
10 |
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
|
14 |
class DialogueItem(BaseModel):
|
|
|
21 |
dialogue: List[DialogueItem]
|
22 |
|
23 |
|
24 |
+
@llm(model="gemini/gemini-1.5-flash")
|
25 |
def generate_dialogue(text: str) -> Dialogue:
|
26 |
"""
|
27 |
Your task is to take the input text provided and turn it into an engaging, informative podcast dialogue. The input text may be messy or unstructured, as it could come from a variety of sources like PDFs or web pages.
|
|
|
47 |
Write your engaging, informative podcast dialogue based on the key points and creative ideas you came up with during the brainstorming session. Use a conversational tone and include any necessary context or explanations to make the content accessible to a general audience.
|
48 |
"""
|
49 |
|
50 |
+
def get_mp3(text: str, voice: str) -> bytes:
|
51 |
+
client = OpenAI()
|
52 |
+
|
53 |
+
with client.audio.speech.with_streaming_response.create(
|
54 |
+
model="tts-1",
|
55 |
+
voice=voice,
|
56 |
+
input=text,
|
57 |
+
) as response:
|
58 |
+
with io.BytesIO() as file:
|
59 |
+
for chunk in response.iter_bytes():
|
60 |
+
file.write(chunk)
|
61 |
+
return file.getvalue()
|
62 |
+
|
63 |
|
64 |
+
def generate_audio(file: bytes) -> bytes:
|
65 |
+
|
66 |
+
# Read the PDF file
|
67 |
+
reader = PdfReader(io.BytesIO(file))
|
68 |
+
text = "\n\n".join([page.extract_text() for page in reader.pages])
|
69 |
|
70 |
llm_output = generate_dialogue(text)
|
71 |
logger.info(llm_output)
|
|
|
90 |
demo = gr.Interface(
|
91 |
fn=generate_audio,
|
92 |
inputs=[
|
93 |
+
gr.File(
|
94 |
+
label="Input PDF",
|
95 |
+
type="binary",
|
96 |
+
)
|
97 |
+
# gr.Textbox(
|
98 |
+
# label="Input Text",
|
99 |
+
# placeholder="Enter text here",
|
100 |
+
# ),
|
101 |
# gr.Textbox(
|
102 |
# label="Male Voice",
|
103 |
# value="1m3E2x7boso3AU9J3woJ",
|
requirements.txt
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
gradio~=4.36
|
2 |
-
promptic==0.7.
|
3 |
pydantic~=2.7
|
4 |
google-generativeai~=0.6
|
5 |
loguru~=0.7
|
|
|
|
1 |
gradio~=4.36
|
2 |
+
promptic==0.7.5
|
3 |
pydantic~=2.7
|
4 |
google-generativeai~=0.6
|
5 |
loguru~=0.7
|
6 |
+
pypdf~=4.1
|