Upload ParlerTTSForConditionalGeneration

Files changed (3) hide show

config.json CHANGED Viewed

@@ -4,6 +4,7 @@
     "ParlerTTSForConditionalGeneration"
   ],
   "audio_encoder": {
     "_name_or_path": "parler-tts/dac_44khZ_8kbps",
     "add_cross_attention": false,
     "architectures": [
@@ -41,7 +42,7 @@
     "max_length": 20,
     "min_length": 0,
     "model_bitrate": 8,
-    "model_type": "dac",
     "no_repeat_ngram_size": 0,
     "num_beam_groups": 1,
     "num_beams": 1,
@@ -75,6 +76,7 @@
     "use_bfloat16": false
   },
   "decoder": {
     "_name_or_path": "/fsx/yoach/tmp/artefacts/parler-tts-mini/decoder",
     "activation_dropout": 0.0,
     "activation_function": "gelu",
@@ -87,6 +89,7 @@
     "begin_suppress_tokens": null,
     "bos_token_id": 1025,
     "chunk_size_feed_forward": 0,
     "cross_attention_hidden_size": null,
     "cross_attention_implementation_strategy": null,
     "decoder_start_token_id": null,
@@ -157,6 +160,7 @@
     "typical_p": 1.0,
     "use_bfloat16": false,
     "use_cache": true,
     "vocab_size": 1088
   },
   "decoder_start_token_id": 1025,
@@ -165,6 +169,7 @@
   "pad_token_id": 1024,
   "prompt_cross_attention": false,
   "text_encoder": {
     "_name_or_path": "google/flan-t5-large",
     "add_cross_attention": false,
     "architectures": [
@@ -248,7 +253,7 @@
     "use_cache": true,
     "vocab_size": 32128
   },
-  "torch_dtype": "float16",
-  "transformers_version": "4.44.2",
   "vocab_size": 32128
 }

     "ParlerTTSForConditionalGeneration"
   ],
   "audio_encoder": {
+    "_attn_implementation_autoset": false,
     "_name_or_path": "parler-tts/dac_44khZ_8kbps",
     "add_cross_attention": false,
     "architectures": [
     "max_length": 20,
     "min_length": 0,
     "model_bitrate": 8,
+    "model_type": "dac_on_the_hub",
     "no_repeat_ngram_size": 0,
     "num_beam_groups": 1,
     "num_beams": 1,
     "use_bfloat16": false
   },
   "decoder": {
+    "_attn_implementation_autoset": false,
     "_name_or_path": "/fsx/yoach/tmp/artefacts/parler-tts-mini/decoder",
     "activation_dropout": 0.0,
     "activation_function": "gelu",
     "begin_suppress_tokens": null,
     "bos_token_id": 1025,
     "chunk_size_feed_forward": 0,
+    "codebook_weights": null,
     "cross_attention_hidden_size": null,
     "cross_attention_implementation_strategy": null,
     "decoder_start_token_id": null,
     "typical_p": 1.0,
     "use_bfloat16": false,
     "use_cache": true,
+    "use_fused_lm_heads": false,
     "vocab_size": 1088
   },
   "decoder_start_token_id": 1025,
   "pad_token_id": 1024,
   "prompt_cross_attention": false,
   "text_encoder": {
+    "_attn_implementation_autoset": false,
     "_name_or_path": "google/flan-t5-large",
     "add_cross_attention": false,
     "architectures": [
     "use_cache": true,
     "vocab_size": 32128
   },
+  "torch_dtype": "float32",
+  "transformers_version": "4.46.1",
   "vocab_size": 32128
 }

generation_config.json CHANGED Viewed

@@ -4,5 +4,5 @@
   "decoder_start_token_id": 1025,
   "eos_token_id": 1024,
   "pad_token_id": 1024,
-  "transformers_version": "4.44.2"
 }

   "decoder_start_token_id": 1025,
   "eos_token_id": 1024,
   "pad_token_id": 1024,
+  "transformers_version": "4.46.1"
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d3ed1626fa8f9f5dc04775f86b5a085c203523f19ea6112d4482434bfddad09a
-size 1755805420

 version https://git-lfs.github.com/spec/v1
+oid sha256:eda87a1d13bda2a2cd0353f1cc69f282f0873ba78233111453398cc128cef76d
+size 3511494464