{ | |
"embed_dim": 2048, | |
"audio_cfg": { | |
"audio_length": 1024, | |
"clip_samples": 480000, | |
"mel_bins": 64, | |
"sample_rate": 48000, | |
"window_size": 1024, | |
"hop_size": 480, | |
"fmin": 50, | |
"fmax": 14000, | |
"class_num": 527, | |
"model_type": "HTSAT", | |
"model_name": "large" | |
}, | |
"text_cfg": { | |
"context_length": 77, | |
"vocab_size": 49408, | |
"width": 512, | |
"heads": 8, | |
"layers": 12 | |
} | |
} |