Spaces:

gabrielchua
/

open-notebooklm

Running on T4

App Files Files Community

gabrielchua commited on Oct 20

Commit

506f934

•

1 Parent(s): 14ff1d7

update app

Browse files

Files changed (3) hide show

constants.py +1 -3
requirements.txt +9 -3
utils.py +15 -52

constants.py CHANGED Viewed

@@ -23,11 +23,9 @@ ERROR_MESSAGE_TOO_LONG = "The total content is too long. Please ensure the combi
 # Fireworks API-related constants
 FIREWORKS_API_KEY = os.getenv("FIREWORKS_API_KEY")
-FIREWORKS_BASE_URL = "https://api.fireworks.ai/inference/v1"
 FIREWORKS_MAX_TOKENS = 16_384
 FIREWORKS_MODEL_ID = "accounts/fireworks/models/llama-v3p1-405b-instruct"
 FIREWORKS_TEMPERATURE = 0.1
-FIREWORKS_JSON_RETRY_ATTEMPTS = 3
 # MeloTTS
 MELO_API_NAME = "/synthesize"
@@ -80,7 +78,7 @@ UI_DESCRIPTION = """
 Generate Podcasts from PDFs using open-source AI.
 Built with:
-- [Llama 3.1 405B 🦙](https://huggingface.co/meta-llama/Llama-3.1-405B) via [Fireworks AI 🎆](https://fireworks.ai/)
 - [MeloTTS 🐚](https://huggingface.co/myshell-ai/MeloTTS-English)
 - [Bark 🐶](https://huggingface.co/suno/bark)
 - [Jina Reader 🔍](https://jina.ai/reader/)

 # Fireworks API-related constants
 FIREWORKS_API_KEY = os.getenv("FIREWORKS_API_KEY")
 FIREWORKS_MAX_TOKENS = 16_384
 FIREWORKS_MODEL_ID = "accounts/fireworks/models/llama-v3p1-405b-instruct"
 FIREWORKS_TEMPERATURE = 0.1
 # MeloTTS
 MELO_API_NAME = "/synthesize"
 Generate Podcasts from PDFs using open-source AI.
 Built with:
+- [Llama 3.1 405B 🦙](https://huggingface.co/meta-llama/Llama-3.1-405B) via [Fireworks AI 🎆](https://fireworks.ai/) and [Instructor 📐](https://github.com/instructor-ai/instructor)
 - [MeloTTS 🐚](https://huggingface.co/myshell-ai/MeloTTS-English)
 - [Bark 🐶](https://huggingface.co/suno/bark)
 - [Jina Reader 🔍](https://jina.ai/reader/)

requirements.txt CHANGED Viewed

@@ -13,11 +13,13 @@ click==8.1.7
 contourpy==1.3.0
 cycler==0.12.1
 distro==1.9.0
 einops==0.8.0
 encodec==0.1.1
 fastapi==0.115.0
 ffmpy==0.4.0
 filelock==3.16.1
 fonttools==4.54.1
 frozenlist==1.4.1
 fsspec==2024.9.0
@@ -28,10 +30,13 @@ granian==1.4.0
 h11==0.14.0
 httpcore==1.0.5
 httpx==0.27.2
 huggingface-hub==0.25.1
 idna==3.10
 importlib_metadata==8.5.0
 importlib_resources==6.4.5
 Jinja2==3.1.4
 jiter==0.5.0
 jmespath==1.0.1
@@ -55,8 +60,8 @@ pandas==2.2.3
 pillow==10.4.0
 promptic==0.7.5
 psutil==5.9.8
-pydantic==2.7.0
-pydantic_core==2.18.1
 pydub==0.25.1
 Pygments==2.18.0
 pyparsing==3.1.4
@@ -85,7 +90,7 @@ spaces==0.30.2
 starlette==0.38.6
 suno-bark @ git+https://github.com/suno-ai/bark.git@f4f32d4cd480dfec1c245d258174bc9bde3c2148
 sympy==1.13.3
-tenacity==8.3.0
 tiktoken==0.7.0
 tokenizers==0.20.0
 tomlkit==0.12.0
@@ -100,5 +105,6 @@ urllib3==2.2.3
 uvicorn==0.31.0
 uvloop==0.18.0
 websockets==12.0
 yarl==1.13.1
 zipp==3.20.2

 contourpy==1.3.0
 cycler==0.12.1
 distro==1.9.0
+docstring_parser==0.16
 einops==0.8.0
 encodec==0.1.1
 fastapi==0.115.0
 ffmpy==0.4.0
 filelock==3.16.1
+fireworks-ai==0.15.6
 fonttools==4.54.1
 frozenlist==1.4.1
 fsspec==2024.9.0
 h11==0.14.0
 httpcore==1.0.5
 httpx==0.27.2
+httpx-sse==0.4.0
+httpx-ws==0.6.2
 huggingface-hub==0.25.1
 idna==3.10
 importlib_metadata==8.5.0
 importlib_resources==6.4.5
+instructor==1.6.2
 Jinja2==3.1.4
 jiter==0.5.0
 jmespath==1.0.1
 pillow==10.4.0
 promptic==0.7.5
 psutil==5.9.8
+pydantic==2.9.2
+pydantic_core==2.23.4
 pydub==0.25.1
 Pygments==2.18.0
 pyparsing==3.1.4
 starlette==0.38.6
 suno-bark @ git+https://github.com/suno-ai/bark.git@f4f32d4cd480dfec1c245d258174bc9bde3c2148
 sympy==1.13.3
+tenacity==9.0.0
 tiktoken==0.7.0
 tokenizers==0.20.0
 tomlkit==0.12.0
 uvicorn==0.31.0
 uvloop==0.18.0
 websockets==12.0
+wsproto==1.2.0
 yarl==1.13.1
 zipp==3.20.2

utils.py CHANGED Viewed

@@ -6,6 +6,9 @@ Functions:
 - call_llm: Call the LLM with the given prompt and dialogue format.
 - parse_url: Parse the given URL and return the text content.
 - generate_podcast_audio: Generate audio for podcast using TTS or advanced audio models.
 """
 # Standard library imports
@@ -13,21 +16,19 @@ import time
 from typing import Any, Union
 # Third-party imports
 import requests
 from bark import SAMPLE_RATE, generate_audio, preload_models
 from gradio_client import Client
-from openai import OpenAI
-from pydantic import ValidationError
 from scipy.io.wavfile import write as write_wav
 # Local imports
 from constants import (
     FIREWORKS_API_KEY,
-    FIREWORKS_BASE_URL,
     FIREWORKS_MODEL_ID,
     FIREWORKS_MAX_TOKENS,
     FIREWORKS_TEMPERATURE,
-    FIREWORKS_JSON_RETRY_ATTEMPTS,
     MELO_API_NAME,
     MELO_TTS_SPACES_ID,
     MELO_RETRY_ATTEMPTS,
@@ -38,8 +39,11 @@ from constants import (
 )
 from schema import ShortDialogue, MediumDialogue
-# Initialize clients
-fw_client = OpenAI(base_url=FIREWORKS_BASE_URL, api_key=FIREWORKS_API_KEY)
 hf_client = Client(MELO_TTS_SPACES_ID)
 # Download and load all models for Bark
@@ -53,51 +57,13 @@ def generate_script(
 ) -> Union[ShortDialogue, MediumDialogue]:
     """Get the dialogue from the LLM."""
-    # Call the LLM
-    response = call_llm(system_prompt, input_text, output_model)
-    response_json = response.choices[0].message.content
-    # Validate the response
-    for attempt in range(FIREWORKS_JSON_RETRY_ATTEMPTS):
-        try:
-            first_draft_dialogue = output_model.model_validate_json(response_json)
-            break
-        except ValidationError as e:
-            if attempt == FIREWORKS_JSON_RETRY_ATTEMPTS - 1:  # Last attempt
-                raise ValueError(
-                    f"Failed to parse dialogue JSON after {FIREWORKS_JSON_RETRY_ATTEMPTS} attempts: {e}"
-                ) from e
-            error_message = (
-                f"Failed to parse dialogue JSON (attempt {attempt + 1}): {e}"
-            )
-            # Re-call the LLM with the error message
-            system_prompt_with_error = f"{system_prompt}\n\nPlease return a VALID JSON object. This was the earlier error: {error_message}"
-            response = call_llm(system_prompt_with_error, input_text, output_model)
-            response_json = response.choices[0].message.content
-            first_draft_dialogue = output_model.model_validate_json(response_json)
     # Call the LLM a second time to improve the dialogue
-    system_prompt_with_dialogue = f"{system_prompt}\n\nHere is the first draft of the dialogue you provided:\n\n{first_draft_dialogue}."
-    # Validate the response
-    for attempt in range(FIREWORKS_JSON_RETRY_ATTEMPTS):
-        try:
-            response = call_llm(
-                system_prompt_with_dialogue,
-                "Please improve the dialogue. Make it more natural and engaging.",
-                output_model,
-            )
-            final_dialogue = output_model.model_validate_json(
-                response.choices[0].message.content
-            )
-            break
-        except ValidationError as e:
-            if attempt == FIREWORKS_JSON_RETRY_ATTEMPTS - 1:  # Last attempt
-                raise ValueError(
-                    f"Failed to improve dialogue after {FIREWORKS_JSON_RETRY_ATTEMPTS} attempts: {e}"
-                ) from e
-            error_message = f"Failed to improve dialogue (attempt {attempt + 1}): {e}"
-            system_prompt_with_dialogue += f"\n\nPlease return a VALID JSON object. This was the earlier error: {error_message}"
     return final_dialogue
@@ -111,10 +77,7 @@ def call_llm(system_prompt: str, text: str, dialogue_format: Any) -> Any:
         model=FIREWORKS_MODEL_ID,
         max_tokens=FIREWORKS_MAX_TOKENS,
         temperature=FIREWORKS_TEMPERATURE,
-        response_format={
-            "type": "json_object",
-            "schema": dialogue_format.model_json_schema(),
-        },
     )
     return response

 - call_llm: Call the LLM with the given prompt and dialogue format.
 - parse_url: Parse the given URL and return the text content.
 - generate_podcast_audio: Generate audio for podcast using TTS or advanced audio models.
+- _use_suno_model: Generate advanced audio using Bark.
+- _use_melotts_api: Generate audio using TTS model.
+- _get_melo_tts_params: Get TTS parameters based on speaker and language.
 """
 # Standard library imports
 from typing import Any, Union
 # Third-party imports
+import instructor
 import requests
 from bark import SAMPLE_RATE, generate_audio, preload_models
+from fireworks.client import Fireworks
 from gradio_client import Client
 from scipy.io.wavfile import write as write_wav
 # Local imports
 from constants import (
     FIREWORKS_API_KEY,
     FIREWORKS_MODEL_ID,
     FIREWORKS_MAX_TOKENS,
     FIREWORKS_TEMPERATURE,
     MELO_API_NAME,
     MELO_TTS_SPACES_ID,
     MELO_RETRY_ATTEMPTS,
 )
 from schema import ShortDialogue, MediumDialogue
+# Initialize Fireworks client, with Instructor patch
+fw_client = Fireworks(api_key=FIREWORKS_API_KEY)
+fw_client = instructor.from_fireworks(fw_client)
+# Initialize Hugging Face client
 hf_client = Client(MELO_TTS_SPACES_ID)
 # Download and load all models for Bark
 ) -> Union[ShortDialogue, MediumDialogue]:
     """Get the dialogue from the LLM."""
+    # Call the LLM for the first time
+    first_draft_dialogue = call_llm(system_prompt, input_text, output_model)
     # Call the LLM a second time to improve the dialogue
+    system_prompt_with_dialogue = f"{system_prompt}\n\nHere is the first draft of the dialogue you provided:\n\n{first_draft_dialogue.model_dump_json()}."
+    final_dialogue = call_llm(system_prompt_with_dialogue, "Please improve the dialogue. Make it more natural and engaging.", output_model)
     return final_dialogue
         model=FIREWORKS_MODEL_ID,
         max_tokens=FIREWORKS_MAX_TOKENS,
         temperature=FIREWORKS_TEMPERATURE,
+        response_model=dialogue_format,
     )
     return response