|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import argparse |
|
import glob |
|
import json |
|
import logging |
|
import os |
|
import subprocess |
|
import urllib.request |
|
from shutil import copy, move |
|
from zipfile import ZipFile |
|
|
|
from tqdm import tqdm |
|
|
|
parser = argparse.ArgumentParser(description="OpenSLR RIR Data download and process") |
|
parser.add_argument("--data_root", required=True, default=None, type=str) |
|
args = parser.parse_args() |
|
|
|
URLS = { |
|
"SLR28": ("http://www.openslr.org/resources/28/rirs_noises.zip"), |
|
} |
|
|
|
|
|
def __retrieve_with_progress(source: str, filename: str): |
|
""" |
|
Downloads source to destination |
|
Displays progress bar |
|
Args: |
|
source: url of resource |
|
destination: local filepath |
|
Returns: |
|
""" |
|
with open(filename, "wb") as f: |
|
response = urllib.request.urlopen(source) |
|
total = response.length |
|
|
|
if total is None: |
|
f.write(response.content) |
|
else: |
|
with tqdm(total=total, unit="B", unit_scale=True, unit_divisor=1024) as pbar: |
|
for data in response: |
|
f.write(data) |
|
pbar.update(len(data)) |
|
|
|
|
|
def __maybe_download_file(destination: str, source: str): |
|
""" |
|
Downloads source to destination if it doesn't exist. |
|
If exists, skips download |
|
Args: |
|
destination: local filepath |
|
source: url of resource |
|
Returns: |
|
""" |
|
source = URLS[source] |
|
if not os.path.exists(destination): |
|
logging.info("{0} does not exist. Downloading ...".format(destination)) |
|
__retrieve_with_progress(source, filename=destination + ".tmp") |
|
os.rename(destination + ".tmp", destination) |
|
logging.info("Downloaded {0}.".format(destination)) |
|
else: |
|
logging.info("Destination {0} exists. Skipping.".format(destination)) |
|
return destination |
|
|
|
|
|
def __extract_file(filepath: str, data_dir: str): |
|
try: |
|
with ZipFile(filepath, "r") as zipObj: |
|
zipObj.extractall(data_dir) |
|
except Exception: |
|
logging.info("Not extracting. Maybe already there?") |
|
|
|
|
|
def __process_data(data_folder: str, dst_folder: str, manifest_file: str): |
|
""" |
|
Converts flac to wav and build manifests's json |
|
Args: |
|
data_folder: source with flac files |
|
dst_folder: where wav files will be stored |
|
manifest_file: where to store manifest |
|
Returns: |
|
""" |
|
if not os.path.exists(dst_folder): |
|
os.makedirs(dst_folder) |
|
|
|
real_rir_list = os.path.join(data_folder, "RIRS_NOISES", "real_rirs_isotropic_noises", "rir_list") |
|
rirfiles = [] |
|
with open(real_rir_list, "r") as rir_f: |
|
for line in rir_f: |
|
rirfiles.append(os.path.join(data_folder, line.rstrip().split(" ")[4])) |
|
|
|
real_rir_folder = os.path.join(dst_folder, "real_rirs") |
|
if not os.path.exists(real_rir_folder): |
|
os.makedirs(real_rir_folder) |
|
|
|
for rir_f in rirfiles: |
|
n_chans = int(subprocess.check_output("soxi -c {0}".format(rir_f), shell=True)) |
|
if n_chans == 1: |
|
copy(rir_f, real_rir_folder) |
|
else: |
|
for chan in range(1, n_chans + 1): |
|
chan_file_name = os.path.join( |
|
real_rir_folder, os.path.splitext(os.path.basename(rir_f))[0] + "-" + str(chan) + ".wav", |
|
) |
|
_ = subprocess.check_output(f"sox {rir_f} {chan_file_name} remix {chan}", shell=True) |
|
|
|
|
|
if not os.path.exists(os.path.join(dst_folder, "simulated_rirs")): |
|
move(os.path.join(data_folder, "RIRS_NOISES", "simulated_rirs"), dst_folder) |
|
|
|
os.chdir(dst_folder) |
|
all_rirs = glob.glob("**/*.wav", recursive=True) |
|
with open(manifest_file, "w") as man_f: |
|
entry = {} |
|
for rir in all_rirs: |
|
rir_file = os.path.join(dst_folder, rir) |
|
duration = subprocess.check_output("soxi -D {0}".format(rir_file), shell=True) |
|
entry["audio_filepath"] = rir_file |
|
entry["duration"] = float(duration) |
|
entry["offset"] = 0 |
|
entry["text"] = "_" |
|
man_f.write(json.dumps(entry) + "\n") |
|
|
|
print("Done!") |
|
|
|
|
|
def main(): |
|
data_root = os.path.abspath(args.data_root) |
|
data_set = "slr28" |
|
logging.getLogger().setLevel(logging.INFO) |
|
logging.info("\n\nWorking on: {0}".format(data_set)) |
|
filepath = os.path.join(data_root, data_set + ".zip") |
|
logging.info("Getting {0}".format(data_set)) |
|
__maybe_download_file(filepath, data_set.upper()) |
|
logging.info("Extracting {0}".format(data_set)) |
|
__extract_file(filepath, data_root) |
|
logging.info("Processing {0}".format(data_set)) |
|
__process_data( |
|
data_root, |
|
os.path.join(os.path.join(data_root, "processed")), |
|
os.path.join(os.path.join(data_root, "processed", "rir.json")), |
|
) |
|
logging.info("Done!") |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|