|
from transformers import PretrainedConfig |
|
from typing import Sequence |
|
|
|
class DMAE1dConfig(PretrainedConfig): |
|
|
|
model_type = "archinetai/dmae1d-ATC64-v1" |
|
|
|
def __init__( |
|
self, |
|
in_channels: int = 2, |
|
channels: int = 512, |
|
multipliers: Sequence[int] = [3, 2, 1, 1, 1, 1, 1, 1], |
|
factors: Sequence[int] = [1, 2, 2, 2, 2, 2, 2], |
|
num_blocks: Sequence[int] = [1, 1, 1, 2, 2, 2, 2], |
|
attentions: Sequence[int] = [0, 0, 0, 0, 0, 0, 0], |
|
encoder_inject_depth: int = 3, |
|
encoder_channels: int = 32, |
|
encoder_factors: Sequence[int] = [1, 1, 2, 2, 1, 1], |
|
encoder_multipliers: Sequence[int] = [32, 16, 8, 8, 4, 2, 1], |
|
encoder_num_blocks: Sequence[int] = [4, 4, 4, 4, 4, 4], |
|
bottleneck: str = 'tanh', |
|
stft_use_complex: bool = True, |
|
stft_num_fft: int = 1023, |
|
stft_hop_length: int = 256, |
|
**kwargs |
|
): |
|
self.in_channels = in_channels |
|
self.channels = channels |
|
self.multipliers = multipliers |
|
self.factors = factors |
|
self.num_blocks = num_blocks |
|
self.attentions = attentions |
|
self.encoder_inject_depth = encoder_inject_depth |
|
self.encoder_channels = encoder_channels |
|
self.encoder_factors = encoder_factors |
|
self.encoder_multipliers = encoder_multipliers |
|
self.encoder_num_blocks = encoder_num_blocks |
|
self.bottleneck = bottleneck |
|
self.stft_use_complex = stft_use_complex |
|
self.stft_num_fft = stft_num_fft |
|
self.stft_hop_length = stft_hop_length |
|
super().__init__(**kwargs) |
|
|