Spaces:

gabrielchua
/

open-notebooklm

Running on T4

App Files Files Community

open-notebooklm / utils.py

gabrielchua

fix meloTTS

b9d657b 2 months ago

raw

history blame

3.96 kB

	"""
	utils.py

	Functions:
	- get_script: Get the dialogue from the LLM.
	- call_llm: Call the LLM with the given prompt and dialogue format.
	- get_audio: Get the audio from the TTS model from HF Spaces.
	"""

	import os
	import requests
	import time
	from gradio_client import Client
	from openai import OpenAI
	from pydantic import ValidationError

	from bark import SAMPLE_RATE, generate_audio, preload_models
	from scipy.io.wavfile import write as write_wav

	MODEL_ID = "accounts/fireworks/models/llama-v3p1-405b-instruct"
	JINA_URL = "https://r.jina.ai/"

	client = OpenAI(
	base_url="https://api.fireworks.ai/inference/v1",
	api_key=os.getenv("FIREWORKS_API_KEY"),
	)

	hf_client = Client("mrfakename/MeloTTS")

	# download and load all models
	preload_models()


	def generate_script(system_prompt: str, input_text: str, output_model):
	"""Get the dialogue from the LLM."""
	# Load as python object
	try:
	response = call_llm(system_prompt, input_text, output_model)
	dialogue = output_model.model_validate_json(response.choices[0].message.content)
	except ValidationError as e:
	error_message = f"Failed to parse dialogue JSON: {e}"
	system_prompt_with_error = f"{system_prompt}\n\nPlease return a VALID JSON object. This was the earlier error: {error_message}"
	response = call_llm(system_prompt_with_error, input_text, output_model)
	dialogue = output_model.model_validate_json(response.choices[0].message.content)

	# Call the LLM again to improve the dialogue
	system_prompt_with_dialogue = f"{system_prompt}\n\nHere is the first draft of the dialogue you provided:\n\n{dialogue}."
	response = call_llm(
	system_prompt_with_dialogue, "Please improve the dialogue.", output_model
	)
	improved_dialogue = output_model.model_validate_json(
	response.choices[0].message.content
	)
	return improved_dialogue


	def call_llm(system_prompt: str, text: str, dialogue_format):
	"""Call the LLM with the given prompt and dialogue format."""
	response = client.chat.completions.create(
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": text},
	],
	model=MODEL_ID,
	max_tokens=16_384,
	temperature=0.1,
	response_format={
	"type": "json_object",
	"schema": dialogue_format.model_json_schema(),
	},
	)
	return response


	def parse_url(url: str) -> str:
	"""Parse the given URL and return the text content."""
	full_url = f"{JINA_URL}{url}"
	response = requests.get(full_url, timeout=60)
	return response.text


	def generate_podcast_audio(text: str, speaker: str, language: str, use_advanced_audio: bool) -> str:

	if use_advanced_audio:
	audio_array = generate_audio(text, history_prompt=f"v2/{language}_speaker_{'1' if speaker == 'Host (Jane)' else '3'}")

	file_path = f"audio_{language}_{speaker}.mp3"

	# save audio to disk
	write_wav(file_path, SAMPLE_RATE, audio_array)

	return file_path


	else:
	if speaker == "Guest":
	accent = "EN-US" if language == "EN" else language
	speed = 0.9
	else: # host
	accent = "EN-Default" if language == "EN" else language
	speed = 1
	if language != "EN" and speaker != "Guest":
	speed = 1.1

	# Generate audio
	for attempt in range(3):
	try:
	result = hf_client.predict(
	text=text,
	language=language,
	speaker=accent,
	speed=speed,
	api_name="/synthesize",
	)
	return result
	except Exception as e:
	if attempt == 2: # Last attempt
	raise # Re-raise the last exception if all attempts fail
	time.sleep(1) # Wait for 1 second before retrying