valle_v2.1 / utils /cut_by_vad.py
jiaqili3's picture
init
addb7e5
# Copyright (c) 2023 Amphion.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
""" This code is modified from https://github.com/facebookresearch/libri-light/blob/main/data_preparation/cut_by_vad.py"""
import pathlib
import soundfile as sf
import numpy as np
import json
import multiprocessing
import tqdm
def save(seq, fname, index, extension):
"""save audio sequences to file"""
output = np.hstack(seq)
file_name = fname.parent / (fname.stem + f"_{index:04}{extension}")
fname.parent.mkdir(exist_ok=True, parents=True)
sf.write(file_name, output, samplerate=16000)
def cut_sequence(path, vad, path_out, target_len_sec, out_extension):
"""cut audio sequences based on VAD"""
data, samplerate = sf.read(path)
assert len(data.shape) == 1
assert samplerate == 16000
to_stitch = []
length_accumulated = 0.0
i = 0
# Iterate over VAD segments
for start, end in vad:
start_index = int(start * samplerate)
end_index = int(end * samplerate)
slice = data[start_index:end_index]
# Save slices that exceed the target length or if there's already accumulated audio
if (
length_accumulated + (end - start) > target_len_sec
and length_accumulated > 0
):
save(to_stitch, path_out, i, out_extension)
to_stitch = []
i += 1
length_accumulated = 0
# Add the current slice to the list to be stitched
to_stitch.append(slice)
length_accumulated += end - start
# Save any remaining slices
if to_stitch:
save(to_stitch, path_out, i, out_extension)
def cut_book(task):
"""process each book in the dataset"""
path_book, root_out, target_len_sec, extension = task
speaker = pathlib.Path(path_book.parent.name)
for i, meta_file_path in enumerate(path_book.glob("*.json")):
with open(meta_file_path, "r") as f:
meta = json.loads(f.read())
book_id = meta["book_meta"]["id"]
vad = meta["voice_activity"]
sound_file = meta_file_path.parent / (meta_file_path.stem + ".flac")
path_out = root_out / speaker / book_id / (meta_file_path.stem)
cut_sequence(sound_file, vad, path_out, target_len_sec, extension)
def cut_segments(
input_dir, output_dir, target_len_sec=30, n_process=32, out_extension=".wav"
):
"""Main function to cut segments from audio files"""
pathlib.Path(output_dir).mkdir(exist_ok=True, parents=True)
list_dir = pathlib.Path(input_dir).glob("*/*")
list_dir = [x for x in list_dir if x.is_dir()]
print(f"{len(list_dir)} directories detected")
print(f"Launching {n_process} processes")
# Create tasks for multiprocessing
tasks = [
(path_book, output_dir, target_len_sec, out_extension) for path_book in list_dir
]
# Process tasks in parallel using multiprocessing
with multiprocessing.Pool(processes=n_process) as pool:
for _ in tqdm.tqdm(pool.imap_unordered(cut_book, tasks), total=len(tasks)):
pass
if __name__ == "__main__":
input_dir = "/path/to/input_dir"
output_dir = "/path/to/output_dir"
target_len_sec = 10
n_process = 16
cut_segments(input_dir, output_dir, target_len_sec, n_process)