output files issue
would you please take a look at my code, the generated .wav files are corrupted and I can not play it with any player. here is my code :
import os
import torch
from diffusers import AudioLDMPipeline
import scipy.io.wavfile
Create the necessary directories if they do not exist
if not os.path.exists('./models'):
os.makedirs('./models')
if not os.path.exists('./outputs'):
os.makedirs('./outputs')
Set the repository ID and model directory
repo_id = "cvssp/audioldm-m-full"
model_dir = './models'
Check if the model is already downloaded
model_path = os.path.join(model_dir, repo_id.split('/')[-1])
if not os.path.exists(model_path):
# Download the model and move it to the model directory
pipe = AudioLDMPipeline.from_pretrained(repo_id, torch_dtype=torch.float16)
pipe.save_pretrained(model_path)
else:
# Load the downloaded model
pipe = AudioLDMPipeline.from_pretrained(model_path, torch_dtype=torch.float16)
Move the pipeline to the GPU
pipe = pipe.to("cuda")
Define the list of prompts
prompts = ["Techno music with a strong, upbeat tempo and high melodic riffs", "Classical music with a slow tempo and soft melody"]
Generate audio for each prompt
for i, prompt in enumerate(prompts):
audio = pipe(prompt, num_inference_steps=10, audio_length_in_s=20.0).audios[0]
# Save the audio to a unique file in the outputs directory
output_path = os.path.join('./outputs', f'audio_{i}.wav')
scipy.io.wavfile.write(output_path, rate=16000, data=audio)
See https://github.com/huggingface/diffusers/pull/3189 and https://github.com/huggingface/diffusers/issues/3091, you need to run diffusers on the latest version to fix this:
pip install --upgrade diffusers
it got fixed just used some np at the end :
Generate audio for each prompt
for i, prompt in enumerate(prompts):
audio = pipe(prompt, num_inference_steps=20, audio_length_in_s=20.0).audios[0]
# Convert and scale the audio data
audio_scaled = np.int16(audio / np.max(np.abs(audio)) * 32767)
# Save the audio to a unique file in the outputs directory
output_path = os.path.join('./outputs', f'audio_{i}.wav')
scipy.io.wavfile.write(output_path, rate=16000, data=audio_scaled)