TensorBoard
File size: 7,089 Bytes
ccee439
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
from __gin__ import dynamic_registration
import cached_conv as cc
from cached_conv import convs
import rave
from rave import blocks
from rave import core
from rave import dataset
from rave import descript_discriminator
from rave import discriminator
from rave import model
from rave import pqmf
import torch
import torch.nn as nn

# Macros:
# ==============================================================================
ACTIVATION = @blocks.Snake
CAPACITY = 96
DILATIONS = [[1, 3, 9], [1, 3, 9], [1, 3, 9], [1, 3]]
KERNEL_SIZE = 3
LATENT_SIZE = 128
N_BAND = 16
NOISE_AUGMENTATION = 0
PHASE_1_DURATION = 0
RATIOS = [4, 4, 4, 2]
SAMPLING_RATE = 48000

# Parameters for blocks.AdaptiveInstanceNormalization:
# ==============================================================================
# None.

# Parameters for variational/blocks.AdaptiveInstanceNormalization:
# ==============================================================================
# None.

# Parameters for core.AudioDistanceV1:
# ==============================================================================
core.AudioDistanceV1.log_epsilon = 1e-07
core.AudioDistanceV1.multiscale_stft = @core.MultiScaleSTFT

# Parameters for model.BetaWarmupCallback:
# ==============================================================================
model.BetaWarmupCallback.initial_value = 1e-06
model.BetaWarmupCallback.target_value = 0.005
model.BetaWarmupCallback.warmup_len = 20000

# Parameters for pqmf.CachedPQMF:
# ==============================================================================
pqmf.CachedPQMF.attenuation = 100
pqmf.CachedPQMF.n_band = %N_BAND

# Parameters for cc.Conv1d:
# ==============================================================================
cc.Conv1d.bias = False

# Parameters for variational/cc.Conv1d:
# ==============================================================================
variational/cc.Conv1d.bias = False

# Parameters for cc.ConvTranspose1d:
# ==============================================================================
cc.ConvTranspose1d.bias = False

# Parameters for descript_discriminator.DescriptDiscriminator:
# ==============================================================================
descript_discriminator.DescriptDiscriminator.bands = \
    [(0.0, 0.1), (0.1, 0.25), (0.25, 0.5), (0.5, 0.75), (0.75, 1.0)]
descript_discriminator.DescriptDiscriminator.fft_sizes = [2048, 1024, 512]
descript_discriminator.DescriptDiscriminator.periods = [2, 3, 5, 7, 11]
descript_discriminator.DescriptDiscriminator.rates = []
descript_discriminator.DescriptDiscriminator.sample_rate = 44100

# Parameters for variational/blocks.EncoderV2:
# ==============================================================================
variational/blocks.EncoderV2.activation = %ACTIVATION
variational/blocks.EncoderV2.adain = @blocks.AdaptiveInstanceNormalization
variational/blocks.EncoderV2.capacity = %CAPACITY
variational/blocks.EncoderV2.data_size = %N_BAND
variational/blocks.EncoderV2.dilations = %DILATIONS
variational/blocks.EncoderV2.keep_dim = False
variational/blocks.EncoderV2.kernel_size = %KERNEL_SIZE
variational/blocks.EncoderV2.latent_size = %LATENT_SIZE
variational/blocks.EncoderV2.n_out = 2
variational/blocks.EncoderV2.ratios = %RATIOS
variational/blocks.EncoderV2.recurrent_layer = None
variational/blocks.EncoderV2.spectrogram = None

# Parameters for blocks.GeneratorV2:
# ==============================================================================
blocks.GeneratorV2.activation = %ACTIVATION
blocks.GeneratorV2.adain = @blocks.AdaptiveInstanceNormalization
blocks.GeneratorV2.amplitude_modulation = True
blocks.GeneratorV2.capacity = %CAPACITY
blocks.GeneratorV2.causal_convtranspose = False
blocks.GeneratorV2.data_size = %N_BAND
blocks.GeneratorV2.dilations = %DILATIONS
blocks.GeneratorV2.keep_dim = False
blocks.GeneratorV2.kernel_size = %KERNEL_SIZE
blocks.GeneratorV2.latent_size = @core.get_augmented_latent_size()
blocks.GeneratorV2.noise_module = None
blocks.GeneratorV2.ratios = %RATIOS
blocks.GeneratorV2.recurrent_layer = None

# Parameters for core.get_augmented_latent_size:
# ==============================================================================
core.get_augmented_latent_size.latent_size = %LATENT_SIZE
core.get_augmented_latent_size.noise_augmentation = %NOISE_AUGMENTATION

# Parameters for convs.get_padding:
# ==============================================================================
convs.get_padding.dilation = 1
convs.get_padding.mode = 'causal'
convs.get_padding.stride = 1

# Parameters for variational/convs.get_padding:
# ==============================================================================
variational/convs.get_padding.dilation = 1
variational/convs.get_padding.mode = 'causal'
variational/convs.get_padding.stride = 1

# Parameters for core.MultiScaleSTFT:
# ==============================================================================
core.MultiScaleSTFT.magnitude = True
core.MultiScaleSTFT.normalized = False
core.MultiScaleSTFT.num_mels = None
core.MultiScaleSTFT.random_crop = True
core.MultiScaleSTFT.sample_rate = %SAMPLING_RATE
core.MultiScaleSTFT.scales = [2048, 1024, 512, 256, 128]

# Parameters for blocks.normalization:
# ==============================================================================
blocks.normalization.mode = 'weight_norm'

# Parameters for variational/blocks.normalization:
# ==============================================================================
variational/blocks.normalization.mode = 'weight_norm'

# Parameters for model.RAVE:
# ==============================================================================
model.RAVE.audio_distance = @core.AudioDistanceV1
model.RAVE.decoder = @blocks.GeneratorV2
model.RAVE.discriminator = @descript_discriminator.DescriptDiscriminator
model.RAVE.enable_pqmf_decode = True
model.RAVE.enable_pqmf_encode = True
model.RAVE.encoder = @blocks.VariationalEncoder
model.RAVE.feature_matching_fun = @feature_matching/core.mean_difference
model.RAVE.freeze_encoder = False
model.RAVE.gan_loss = @core.hinge_gan
model.RAVE.latent_size = %LATENT_SIZE
model.RAVE.multiband_audio_distance = @core.AudioDistanceV1
model.RAVE.num_skipped_features = 1
model.RAVE.phase_1_duration = %PHASE_1_DURATION
model.RAVE.pqmf = @pqmf.CachedPQMF
model.RAVE.sampling_rate = %SAMPLING_RATE
model.RAVE.update_discriminator_every = 4
model.RAVE.valid_signal_crop = True
model.RAVE.warmup_quantize = None
model.RAVE.weights = {'feature_matching': 20}

# Parameters for blocks.Snake:
# ==============================================================================
# None.

# Parameters for variational/blocks.Snake:
# ==============================================================================
# None.

# Parameters for dataset.split_dataset:
# ==============================================================================
dataset.split_dataset.max_residual = 1000

# Parameters for blocks.VariationalEncoder:
# ==============================================================================
blocks.VariationalEncoder.encoder = @variational/blocks.EncoderV2