Spaces:
Running
Running
File size: 3,170 Bytes
c5ed230 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
import logging
import os
from json import loads
import av
from torch import load, FloatTensor
from numpy import float32
import librosa
class HParams():
def __init__(self, **kwargs):
for k, v in kwargs.items():
if type(v) == dict:
v = HParams(**v)
self[k] = v
def keys(self):
return self.__dict__.keys()
def items(self):
return self.__dict__.items()
def values(self):
return self.__dict__.values()
def __len__(self):
return len(self.__dict__)
def __getitem__(self, key):
return getattr(self, key)
def __setitem__(self, key, value):
return setattr(self, key, value)
def __contains__(self, key):
return key in self.__dict__
def __repr__(self):
return self.__dict__.__repr__()
def load_checkpoint(checkpoint_path, model):
checkpoint_dict = load(checkpoint_path, map_location='cpu')
iteration = checkpoint_dict['iteration']
saved_state_dict = checkpoint_dict['model']
if hasattr(model, 'module'):
state_dict = model.module.state_dict()
else:
state_dict = model.state_dict()
new_state_dict = {}
for k, v in state_dict.items():
try:
new_state_dict[k] = saved_state_dict[k]
except:
logging.info("%s is not in the checkpoint" % k)
new_state_dict[k] = v
if hasattr(model, 'module'):
model.module.load_state_dict(new_state_dict)
else:
model.load_state_dict(new_state_dict)
logging.info("Loaded checkpoint '{}' (iteration {})".format(
checkpoint_path, iteration))
return
def get_hparams_from_file(config_path):
with open(config_path, 'r', encoding='utf-8') as f:
data = f.read()
config = loads(data)
hparams = HParams(**config)
return hparams
def load_audio_to_torch(full_path, target_sampling_rate):
audio, sampling_rate = librosa.load(full_path, sr=target_sampling_rate, mono=True)
return FloatTensor(audio.astype(float32))
def wav2ogg(input, output):
with av.open(input, 'rb') as i:
with av.open(output, 'wb', format='ogg') as o:
out_stream = o.add_stream('libvorbis')
for frame in i.decode(audio=0):
for p in out_stream.encode(frame):
o.mux(p)
for p in out_stream.encode(None):
o.mux(p)
def wav2mp3(input, output):
with av.open(input, 'rb') as i:
with av.open(output, 'wb', format='mp3') as o:
out_stream = o.add_stream('mp3')
for frame in i.decode(audio=0):
for p in out_stream.encode(frame):
o.mux(p)
for p in out_stream.encode(None):
o.mux(p)
def clean_folder(folder_path):
for filename in os.listdir(folder_path):
file_path = os.path.join(folder_path, filename)
# å¦‚æžœæ˜¯æ–‡ä»¶ï¼Œåˆ™åˆ é™¤æ–‡ä»¶
if os.path.isfile(file_path):
os.remove(file_path)
# is none -> True, is not none -> False
def check_is_none(s):
return s is None or (isinstance(s, str) and str(s).isspace()) or str(s) == ""
|