mrfakename commited on
Commit
df928c5
1 Parent(s): 4a7a0b1

Sync from GitHub repo

Browse files

This Space is synced from the GitHub repo: https://github.com/SWivid/F5-TTS. Please submit contributions to the Space there

Files changed (1) hide show
  1. app.py +25 -3
app.py CHANGED
@@ -240,23 +240,28 @@ with gr.Blocks() as app_multistyle:
240
 
241
  # Regular speech type (mandatory)
242
  with gr.Row():
243
- regular_name = gr.Textbox(value="Regular", label="Speech Type Name")
 
 
244
  regular_audio = gr.Audio(label="Regular Reference Audio", type="filepath")
245
  regular_ref_text = gr.Textbox(label="Reference Text (Regular)", lines=2)
246
 
247
  # Additional speech types (up to 99 more)
248
  max_speech_types = 100
249
  speech_type_rows = []
250
- speech_type_names = []
251
  speech_type_audios = []
252
  speech_type_ref_texts = []
253
  speech_type_delete_btns = []
 
 
254
 
255
  for i in range(max_speech_types - 1):
256
  with gr.Row(visible=False) as row:
257
  with gr.Column():
258
  name_input = gr.Textbox(label="Speech Type Name")
259
  delete_btn = gr.Button("Delete", variant="secondary")
 
260
  audio_input = gr.Audio(label="Reference Audio", type="filepath")
261
  ref_text_input = gr.Textbox(label="Reference Text", lines=2)
262
  speech_type_rows.append(row)
@@ -264,6 +269,7 @@ with gr.Blocks() as app_multistyle:
264
  speech_type_audios.append(audio_input)
265
  speech_type_ref_texts.append(ref_text_input)
266
  speech_type_delete_btns.append(delete_btn)
 
267
 
268
  # Button to add speech type
269
  add_speech_type_btn = gr.Button("Add Speech Type")
@@ -321,6 +327,22 @@ with gr.Blocks() as app_multistyle:
321
  placeholder="Enter the script with speaker names (or emotion types) at the start of each block, e.g.:\n\n{Regular} Hello, I'd like to order a sandwich please.\n{Surprised} What do you mean you're out of bread?\n{Sad} I really wanted a sandwich though...\n{Angry} You know what, darn you and your little shop!\n{Whisper} I'll just go back home and cry now.\n{Shouting} Why me?!",
322
  )
323
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
  # Model choice
325
  model_choice_multistyle = gr.Radio(choices=["F5-TTS", "E2-TTS"], label="Choose TTS Model", value="F5-TTS")
326
 
@@ -347,7 +369,7 @@ with gr.Blocks() as app_multistyle:
347
  speech_type_names_list = args[:num_additional_speech_types]
348
  speech_type_audios_list = args[num_additional_speech_types : 2 * num_additional_speech_types]
349
  speech_type_ref_texts_list = args[2 * num_additional_speech_types : 3 * num_additional_speech_types]
350
- model_choice = args[3 * num_additional_speech_types]
351
  remove_silence = args[3 * num_additional_speech_types + 1]
352
 
353
  # Collect the speech types and their audios into a dict
 
240
 
241
  # Regular speech type (mandatory)
242
  with gr.Row():
243
+ with gr.Column():
244
+ regular_name = gr.Textbox(value="Regular", label="Speech Type Name")
245
+ regular_insert = gr.Button("Insert", variant="secondary")
246
  regular_audio = gr.Audio(label="Regular Reference Audio", type="filepath")
247
  regular_ref_text = gr.Textbox(label="Reference Text (Regular)", lines=2)
248
 
249
  # Additional speech types (up to 99 more)
250
  max_speech_types = 100
251
  speech_type_rows = []
252
+ speech_type_names = [regular_name]
253
  speech_type_audios = []
254
  speech_type_ref_texts = []
255
  speech_type_delete_btns = []
256
+ speech_type_insert_btns = []
257
+ speech_type_insert_btns.append(regular_insert)
258
 
259
  for i in range(max_speech_types - 1):
260
  with gr.Row(visible=False) as row:
261
  with gr.Column():
262
  name_input = gr.Textbox(label="Speech Type Name")
263
  delete_btn = gr.Button("Delete", variant="secondary")
264
+ insert_btn = gr.Button("Insert", variant="secondary")
265
  audio_input = gr.Audio(label="Reference Audio", type="filepath")
266
  ref_text_input = gr.Textbox(label="Reference Text", lines=2)
267
  speech_type_rows.append(row)
 
269
  speech_type_audios.append(audio_input)
270
  speech_type_ref_texts.append(ref_text_input)
271
  speech_type_delete_btns.append(delete_btn)
272
+ speech_type_insert_btns.append(insert_btn)
273
 
274
  # Button to add speech type
275
  add_speech_type_btn = gr.Button("Add Speech Type")
 
327
  placeholder="Enter the script with speaker names (or emotion types) at the start of each block, e.g.:\n\n{Regular} Hello, I'd like to order a sandwich please.\n{Surprised} What do you mean you're out of bread?\n{Sad} I really wanted a sandwich though...\n{Angry} You know what, darn you and your little shop!\n{Whisper} I'll just go back home and cry now.\n{Shouting} Why me?!",
328
  )
329
 
330
+ def make_insert_speech_type_fn(index):
331
+ def insert_speech_type_fn(current_text, speech_type_name):
332
+ current_text = current_text or ""
333
+ speech_type_name = speech_type_name or "None"
334
+ updated_text = current_text + f"{{{speech_type_name}}} "
335
+ return gr.update(value=updated_text)
336
+ return insert_speech_type_fn
337
+
338
+ for i, insert_btn in enumerate(speech_type_insert_btns):
339
+ insert_fn = make_insert_speech_type_fn(i)
340
+ insert_btn.click(
341
+ insert_fn,
342
+ inputs=[gen_text_input_multistyle, speech_type_names[i]],
343
+ outputs=gen_text_input_multistyle,
344
+ )
345
+
346
  # Model choice
347
  model_choice_multistyle = gr.Radio(choices=["F5-TTS", "E2-TTS"], label="Choose TTS Model", value="F5-TTS")
348
 
 
369
  speech_type_names_list = args[:num_additional_speech_types]
370
  speech_type_audios_list = args[num_additional_speech_types : 2 * num_additional_speech_types]
371
  speech_type_ref_texts_list = args[2 * num_additional_speech_types : 3 * num_additional_speech_types]
372
+ model_choice = args[3 * num_additional_speech_types + 1]
373
  remove_silence = args[3 * num_additional_speech_types + 1]
374
 
375
  # Collect the speech types and their audios into a dict