mrfakename commited on
Commit
3c748fe
1 Parent(s): 9df052c

Sync from GitHub repo

Browse files

This Space is synced from the GitHub repo: https://github.com/SWivid/F5-TTS. Please submit contributions to the Space there

Files changed (1) hide show
  1. app.py +34 -16
app.py CHANGED
@@ -201,28 +201,43 @@ def parse_speechtypes_text(gen_text):
201
  with gr.Blocks() as app_multistyle:
202
  # New section for emotional generation
203
  gr.Markdown(
204
- """
205
  # Multiple Speech-Type Generation
206
 
207
- This section allows you to upload different audio clips for each speech type. 'Regular' emotion is mandatory. You can add additional speech types by clicking the "Add Speech Type" button. Enter your text in the format shown below, and the system will generate speech using the appropriate emotions. If unspecified, the model will use the regular speech type. The current speech type will be used until the next speech type is specified.
208
-
209
- **Example Input:**
210
- {Regular} Hello, I'd like to order a sandwich please.
211
- {Surprised} What do you mean you're out of bread?
212
- {Sad} I really wanted a sandwich though...
213
- {Angry} You know what, darn you and your little shop!
214
- {Whisper} I'll just go back home and cry now.
215
- {Shouting} Why me?!
216
  """
217
  )
218
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  gr.Markdown(
220
- "Upload different audio clips for each speech type. 'Regular' emotion is mandatory. You can add additional speech types by clicking the 'Add Speech Type' button."
221
  )
222
 
223
  # Regular speech type (mandatory)
224
  with gr.Row():
225
- regular_name = gr.Textbox(value="Regular", label="Speech Type Name", interactive=False)
226
  regular_audio = gr.Audio(label="Regular Reference Audio", type="filepath")
227
  regular_ref_text = gr.Textbox(label="Reference Text (Regular)", lines=2)
228
 
@@ -236,16 +251,19 @@ with gr.Blocks() as app_multistyle:
236
 
237
  for i in range(max_speech_types - 1):
238
  with gr.Row(visible=False) as row:
239
- name_input = gr.Textbox(label="Speech Type Name")
 
 
 
240
  audio_input = gr.Audio(label="Reference Audio", type="filepath")
241
  ref_text_input = gr.Textbox(label="Reference Text", lines=2)
242
- delete_btn = gr.Button("Delete", variant="secondary")
243
  speech_type_rows.append(row)
244
  speech_type_names.append(name_input)
245
  speech_type_audios.append(audio_input)
246
  speech_type_ref_texts.append(ref_text_input)
247
  speech_type_delete_btns.append(delete_btn)
248
 
 
249
  # Button to add speech type
250
  add_speech_type_btn = gr.Button("Add Speech Type")
251
 
@@ -297,7 +315,7 @@ with gr.Blocks() as app_multistyle:
297
 
298
  # Text input for the prompt
299
  gen_text_input_emotional = gr.Textbox(
300
- label="Text to Generate ( Make sure the type names you entered match the Speech Type Name above ! ! ! )",
301
  lines=10,
302
  placeholder="Enter the script with speaker names (or emotion types) at the start of each block, e.g.:\n\n{Regular} Hello, I'd like to order a sandwich please.\n{Surprised} What do you mean you're out of bread?\n{Sad} I really wanted a sandwich though...\n{Angry} You know what, darn you and your little shop!\n{Whisper} I'll just go back home and cry now.\n{Shouting} Why me?!",
303
  )
@@ -603,7 +621,7 @@ If you're having issues, try converting your reference audio to WAV or MP3, clip
603
  )
604
  gr.TabbedInterface(
605
  [app_tts, app_multistyle, app_chat, app_credits],
606
- ["TTS", "Multi-Style", "Voice-Chat", "Credits"],
607
  )
608
 
609
 
 
201
  with gr.Blocks() as app_multistyle:
202
  # New section for emotional generation
203
  gr.Markdown(
204
+ """
205
  # Multiple Speech-Type Generation
206
 
207
+ This section allows you to generate multiple speech types or multiple people's voices. Enter your text in the format shown below, and the system will generate speech using the appropriate type. If unspecified, the model will use the regular speech type. The current speech type will be used until the next speech type is specified.
 
 
 
 
 
 
 
 
208
  """
209
  )
210
 
211
+ with gr.Row():
212
+ gr.Markdown(
213
+ """
214
+ **Example Input:**
215
+ {Regular} Hello, I'd like to order a sandwich please.
216
+ {Surprised} What do you mean you're out of bread?
217
+ {Sad} I really wanted a sandwich though...
218
+ {Angry} You know what, darn you and your little shop!
219
+ {Whisper} I'll just go back home and cry now.
220
+ {Shouting} Why me?!
221
+ """
222
+ )
223
+
224
+ gr.Markdown(
225
+ """
226
+ **Example Input 2:**
227
+ {Speaker1_Happy} Hello, I'd like to order a sandwich please.
228
+ {Speaker2_Regular} Sorry, we're out of bread.
229
+ {Speaker1_Sad} I really wanted a sandwich though...
230
+ {Speaker2_Whisper} I'll give you the last one I was hiding.
231
+ """
232
+ )
233
+
234
  gr.Markdown(
235
+ "Upload different audio clips for each speech type. The first speech type is mandatory. You can add additional speech types by clicking the 'Add Speech Type' button."
236
  )
237
 
238
  # Regular speech type (mandatory)
239
  with gr.Row():
240
+ regular_name = gr.Textbox(value="Regular", label="Speech Type Name")
241
  regular_audio = gr.Audio(label="Regular Reference Audio", type="filepath")
242
  regular_ref_text = gr.Textbox(label="Reference Text (Regular)", lines=2)
243
 
 
251
 
252
  for i in range(max_speech_types - 1):
253
  with gr.Row(visible=False) as row:
254
+ with gr.Column():
255
+ name_input = gr.Textbox(label="Speech Type Name")
256
+ delete_btn = gr.Button("Delete", variant="secondary")
257
+
258
  audio_input = gr.Audio(label="Reference Audio", type="filepath")
259
  ref_text_input = gr.Textbox(label="Reference Text", lines=2)
 
260
  speech_type_rows.append(row)
261
  speech_type_names.append(name_input)
262
  speech_type_audios.append(audio_input)
263
  speech_type_ref_texts.append(ref_text_input)
264
  speech_type_delete_btns.append(delete_btn)
265
 
266
+
267
  # Button to add speech type
268
  add_speech_type_btn = gr.Button("Add Speech Type")
269
 
 
315
 
316
  # Text input for the prompt
317
  gen_text_input_emotional = gr.Textbox(
318
+ label="Text to Generate",
319
  lines=10,
320
  placeholder="Enter the script with speaker names (or emotion types) at the start of each block, e.g.:\n\n{Regular} Hello, I'd like to order a sandwich please.\n{Surprised} What do you mean you're out of bread?\n{Sad} I really wanted a sandwich though...\n{Angry} You know what, darn you and your little shop!\n{Whisper} I'll just go back home and cry now.\n{Shouting} Why me?!",
321
  )
 
621
  )
622
  gr.TabbedInterface(
623
  [app_tts, app_multistyle, app_chat, app_credits],
624
+ ["TTS", "Multi-Speech", "Voice-Chat", "Credits"],
625
  )
626
 
627