import torch import math from audiocraft.models import MusicGen import numpy as np def separate_audio_segments(audio, segment_duration=30): sr, audio_data = audio[0], audio[1] total_samples = len(audio_data) segment_samples = sr * segment_duration total_segments = math.ceil(total_samples / segment_samples) segments = [] for segment_idx in range(total_segments): print(f"Audio Input segment {segment_idx + 1} / {total_segments + 1} \r") start_sample = segment_idx * segment_samples end_sample = (segment_idx + 1) * segment_samples segment = audio_data[start_sample:end_sample] segments.append((sr, segment)) return segments def generate_music_segments(text, melody, MODEL, duration:int=10, segment_duration:int=30): # generate audio segments melody_segments = separate_audio_segments(melody, segment_duration) # Create a list to store the melody tensors for each segment melodys = [] output_segments = [] # Calculate the total number of segments total_segments = max(math.ceil(duration / segment_duration),1) print(f"total Segments to Generate: {total_segments} for {duration} seconds. Each segment is {segment_duration} seconds") # If melody_segments is shorter than total_segments, repeat the segments until the total number of segments is reached if len(melody_segments) < total_segments: for i in range(total_segments - len(melody_segments)): segment = melody_segments[i] melody_segments.append(segment) print(f"melody_segments: {len(melody_segments)} fixed") # Iterate over the segments to create list of Meldoy tensors for segment_idx in range(total_segments): print(f"segment {segment_idx} of {total_segments} \r") sr, verse = melody_segments[segment_idx][0], torch.from_numpy(melody_segments[segment_idx][1]).to(MODEL.device).float().t().unsqueeze(0) print(f"shape:{verse.shape} dim:{verse.dim()}") if verse.dim() == 2: verse = verse[None] verse = verse[..., :int(sr * MODEL.lm.cfg.dataset.segment_duration)] # Append the segment to the melodys list melodys.append(verse) for idx, verse in enumerate(melodys): print(f"Generating New Melody Segment {idx + 1}: {text}\r") output = MODEL.generate_with_chroma( descriptions=[text], melody_wavs=verse, melody_sample_rate=sr, progress=True ) # Append the generated output to the list of segments #output_segments.append(output[:, :segment_duration]) output_segments.append(output) print(f"output_segments: {len(output_segments)}: shape[0]: {output.shape} dim {output.dim()}") return output_segments #def generate_music_segments(text, melody, duration, MODEL, segment_duration=30): # sr, melody = melody[0], torch.from_numpy(melody[1]).to(MODEL.device).float().t().unsqueeze(0) # # Create a list to store the melody tensors for each segment # melodys = [] # # Calculate the total number of segments # total_segments = math.ceil(melody.shape[1] / (sr * segment_duration)) # # Iterate over the segments # for segment_idx in range(total_segments): # print(f"segment {segment_idx + 1} / {total_segments + 1} \r") # start_frame = segment_idx * sr * segment_duration # end_frame = (segment_idx + 1) * sr * segment_duration # # Extract the segment from the melody tensor # segment = melody[:, start_frame:end_frame] # # Append the segment to the melodys list # melodys.append(segment) # output_segments = [] # for segment in melodys: # output = MODEL.generate_with_chroma( # descriptions=[text], # melody_wavs=segment, # melody_sample_rate=sr, # progress=False # ) # # Append the generated output to the list of segments # output_segments.append(output[:, :segment_duration]) # return output_segments