mrfakename commited on
Commit
4fd0fc1
1 Parent(s): faf2525

Sync from GitHub repo

Browse files

This Space is synced from the GitHub repo: https://github.com/SWivid/F5-TTS. Please submit contributions to the Space there

Files changed (1) hide show
  1. app.py +28 -86
app.py CHANGED
@@ -141,32 +141,6 @@ def generate_podcast(
141
 
142
  return podcast_path
143
 
144
-
145
- def parse_speechtypes_text(gen_text):
146
- # Pattern to find (Emotion)
147
- pattern = r"\((.*?)\)"
148
-
149
- # Split the text by the pattern
150
- tokens = re.split(pattern, gen_text)
151
-
152
- segments = []
153
-
154
- current_emotion = "Regular"
155
-
156
- for i in range(len(tokens)):
157
- if i % 2 == 0:
158
- # This is text
159
- text = tokens[i].strip()
160
- if text:
161
- segments.append({"emotion": current_emotion, "text": text})
162
- else:
163
- # This is emotion
164
- emotion = tokens[i].strip()
165
- current_emotion = emotion
166
-
167
- return segments
168
-
169
-
170
  with gr.Blocks() as app_credits:
171
  gr.Markdown("""
172
  # Credits
@@ -273,10 +247,9 @@ with gr.Blocks() as app_podcast:
273
  outputs=podcast_output,
274
  )
275
 
276
-
277
- def parse_emotional_text(gen_text):
278
  # Pattern to find (Emotion)
279
- pattern = r"\((.*?)\)"
280
 
281
  # Split the text by the pattern
282
  tokens = re.split(pattern, gen_text)
@@ -298,7 +271,6 @@ def parse_emotional_text(gen_text):
298
 
299
  return segments
300
 
301
-
302
  with gr.Blocks() as app_emotional:
303
  # New section for emotional generation
304
  gr.Markdown(
@@ -309,7 +281,7 @@ with gr.Blocks() as app_emotional:
309
 
310
  **Example Input:**
311
 
312
- (Regular) Hello, I'd like to order a sandwich please. (Surprised) What do you mean you're out of bread? (Sad) I really wanted a sandwich though... (Angry) You know what, darn you and your little shop, you suck! (Whisper) I'll just go back home and cry now. (Shouting) Why me?!
313
  """
314
  )
315
 
@@ -325,17 +297,19 @@ with gr.Blocks() as app_emotional:
325
 
326
  # Additional speech types (up to 99 more)
327
  max_speech_types = 100
 
328
  speech_type_names = []
329
  speech_type_audios = []
330
  speech_type_ref_texts = []
331
  speech_type_delete_btns = []
332
 
333
  for i in range(max_speech_types - 1):
334
- with gr.Row():
335
- name_input = gr.Textbox(label="Speech Type Name", visible=False)
336
- audio_input = gr.Audio(label="Reference Audio", type="filepath", visible=False)
337
- ref_text_input = gr.Textbox(label="Reference Text", lines=2, visible=False)
338
- delete_btn = gr.Button("Delete", variant="secondary", visible=False)
 
339
  speech_type_names.append(name_input)
340
  speech_type_audios.append(audio_input)
341
  speech_type_ref_texts.append(ref_text_input)
@@ -347,82 +321,53 @@ with gr.Blocks() as app_emotional:
347
  # Keep track of current number of speech types
348
  speech_type_count = gr.State(value=0)
349
 
 
350
  # Function to add a speech type
351
  def add_speech_type_fn(speech_type_count):
352
  if speech_type_count < max_speech_types - 1:
353
  speech_type_count += 1
354
- # Prepare updates for the components
355
- name_updates = []
356
- audio_updates = []
357
- ref_text_updates = []
358
- delete_btn_updates = []
359
  for i in range(max_speech_types - 1):
360
  if i < speech_type_count:
361
- name_updates.append(gr.update(visible=True))
362
- audio_updates.append(gr.update(visible=True))
363
- ref_text_updates.append(gr.update(visible=True))
364
- delete_btn_updates.append(gr.update(visible=True))
365
  else:
366
- name_updates.append(gr.update())
367
- audio_updates.append(gr.update())
368
- ref_text_updates.append(gr.update())
369
- delete_btn_updates.append(gr.update())
370
  else:
371
  # Optionally, show a warning
372
- # gr.Warning("Maximum number of speech types reached.")
373
- name_updates = [gr.update() for _ in range(max_speech_types - 1)]
374
- audio_updates = [gr.update() for _ in range(max_speech_types - 1)]
375
- ref_text_updates = [gr.update() for _ in range(max_speech_types - 1)]
376
- delete_btn_updates = [gr.update() for _ in range(max_speech_types - 1)]
377
- return [speech_type_count] + name_updates + audio_updates + ref_text_updates + delete_btn_updates
378
 
379
  add_speech_type_btn.click(
380
  add_speech_type_fn,
381
  inputs=speech_type_count,
382
- outputs=[speech_type_count]
383
- + speech_type_names
384
- + speech_type_audios
385
- + speech_type_ref_texts
386
- + speech_type_delete_btns,
387
  )
388
 
389
  # Function to delete a speech type
390
  def make_delete_speech_type_fn(index):
391
  def delete_speech_type_fn(speech_type_count):
392
  # Prepare updates
393
- name_updates = []
394
- audio_updates = []
395
- ref_text_updates = []
396
- delete_btn_updates = []
397
-
398
  for i in range(max_speech_types - 1):
399
  if i == index:
400
- name_updates.append(gr.update(visible=False, value=""))
401
- audio_updates.append(gr.update(visible=False, value=None))
402
- ref_text_updates.append(gr.update(visible=False, value=""))
403
- delete_btn_updates.append(gr.update(visible=False))
404
  else:
405
- name_updates.append(gr.update())
406
- audio_updates.append(gr.update())
407
- ref_text_updates.append(gr.update())
408
- delete_btn_updates.append(gr.update())
409
 
410
  speech_type_count = max(0, speech_type_count - 1)
411
 
412
- return [speech_type_count] + name_updates + audio_updates + ref_text_updates + delete_btn_updates
413
 
414
  return delete_speech_type_fn
415
 
 
416
  for i, delete_btn in enumerate(speech_type_delete_btns):
417
  delete_fn = make_delete_speech_type_fn(i)
418
  delete_btn.click(
419
  delete_fn,
420
  inputs=speech_type_count,
421
- outputs=[speech_type_count]
422
- + speech_type_names
423
- + speech_type_audios
424
- + speech_type_ref_texts
425
- + speech_type_delete_btns,
426
  )
427
 
428
  # Text input for the prompt
@@ -434,7 +379,7 @@ with gr.Blocks() as app_emotional:
434
  with gr.Accordion("Advanced Settings", open=False):
435
  remove_silence_emotional = gr.Checkbox(
436
  label="Remove Silences",
437
- value=True,
438
  )
439
 
440
  # Generate button
@@ -506,11 +451,7 @@ with gr.Blocks() as app_emotional:
506
  regular_audio,
507
  regular_ref_text,
508
  gen_text_input_emotional,
509
- ]
510
- + speech_type_names
511
- + speech_type_audios
512
- + speech_type_ref_texts
513
- + [
514
  model_choice_emotional,
515
  remove_silence_emotional,
516
  ],
@@ -531,7 +472,7 @@ with gr.Blocks() as app_emotional:
531
  speech_types_available.add(name_input)
532
 
533
  # Parse the gen_text to get the speech types used
534
- segments = parse_emotional_text(gen_text)
535
  speech_types_in_text = set(segment["emotion"] for segment in segments)
536
 
537
  # Check if all speech types in text are available
@@ -549,6 +490,7 @@ with gr.Blocks() as app_emotional:
549
  inputs=[gen_text_input_emotional, regular_name] + speech_type_names,
550
  outputs=generate_emotional_btn,
551
  )
 
552
  with gr.Blocks() as app:
553
  gr.Markdown(
554
  """
 
141
 
142
  return podcast_path
143
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  with gr.Blocks() as app_credits:
145
  gr.Markdown("""
146
  # Credits
 
247
  outputs=podcast_output,
248
  )
249
 
250
+ def parse_speechtypes_text(gen_text):
 
251
  # Pattern to find (Emotion)
252
+ pattern = r"\{(.*?)\}"
253
 
254
  # Split the text by the pattern
255
  tokens = re.split(pattern, gen_text)
 
271
 
272
  return segments
273
 
 
274
  with gr.Blocks() as app_emotional:
275
  # New section for emotional generation
276
  gr.Markdown(
 
281
 
282
  **Example Input:**
283
 
284
+ {Regular} Hello, I'd like to order a sandwich please. {Surprised} What do you mean you're out of bread? {Sad} I really wanted a sandwich though... {Angry} You know what, darn you and your little shop, you suck! {Whisper} I'll just go back home and cry now. {Shouting} Why me?!
285
  """
286
  )
287
 
 
297
 
298
  # Additional speech types (up to 99 more)
299
  max_speech_types = 100
300
+ speech_type_rows = []
301
  speech_type_names = []
302
  speech_type_audios = []
303
  speech_type_ref_texts = []
304
  speech_type_delete_btns = []
305
 
306
  for i in range(max_speech_types - 1):
307
+ with gr.Row(visible=False) as row:
308
+ name_input = gr.Textbox(label="Speech Type Name")
309
+ audio_input = gr.Audio(label="Reference Audio", type="filepath")
310
+ ref_text_input = gr.Textbox(label="Reference Text", lines=2)
311
+ delete_btn = gr.Button("Delete", variant="secondary")
312
+ speech_type_rows.append(row)
313
  speech_type_names.append(name_input)
314
  speech_type_audios.append(audio_input)
315
  speech_type_ref_texts.append(ref_text_input)
 
321
  # Keep track of current number of speech types
322
  speech_type_count = gr.State(value=0)
323
 
324
+ # Function to add a speech type
325
  # Function to add a speech type
326
  def add_speech_type_fn(speech_type_count):
327
  if speech_type_count < max_speech_types - 1:
328
  speech_type_count += 1
329
+ # Prepare updates for the rows
330
+ row_updates = []
 
 
 
331
  for i in range(max_speech_types - 1):
332
  if i < speech_type_count:
333
+ row_updates.append(gr.update(visible=True))
 
 
 
334
  else:
335
+ row_updates.append(gr.update())
 
 
 
336
  else:
337
  # Optionally, show a warning
338
+ row_updates = [gr.update() for _ in range(max_speech_types - 1)]
339
+ return [speech_type_count] + row_updates
 
 
 
 
340
 
341
  add_speech_type_btn.click(
342
  add_speech_type_fn,
343
  inputs=speech_type_count,
344
+ outputs=[speech_type_count] + speech_type_rows
 
 
 
 
345
  )
346
 
347
  # Function to delete a speech type
348
  def make_delete_speech_type_fn(index):
349
  def delete_speech_type_fn(speech_type_count):
350
  # Prepare updates
351
+ row_updates = []
 
 
 
 
352
  for i in range(max_speech_types - 1):
353
  if i == index:
354
+ row_updates.append(gr.update(visible=False))
 
 
 
355
  else:
356
+ row_updates.append(gr.update())
 
 
 
357
 
358
  speech_type_count = max(0, speech_type_count - 1)
359
 
360
+ return [speech_type_count] + row_updates
361
 
362
  return delete_speech_type_fn
363
 
364
+ # Update delete button clicks
365
  for i, delete_btn in enumerate(speech_type_delete_btns):
366
  delete_fn = make_delete_speech_type_fn(i)
367
  delete_btn.click(
368
  delete_fn,
369
  inputs=speech_type_count,
370
+ outputs=[speech_type_count] + speech_type_rows
 
 
 
 
371
  )
372
 
373
  # Text input for the prompt
 
379
  with gr.Accordion("Advanced Settings", open=False):
380
  remove_silence_emotional = gr.Checkbox(
381
  label="Remove Silences",
382
+ value=False,
383
  )
384
 
385
  # Generate button
 
451
  regular_audio,
452
  regular_ref_text,
453
  gen_text_input_emotional,
454
+ ] + speech_type_names + speech_type_audios + speech_type_ref_texts + [
 
 
 
 
455
  model_choice_emotional,
456
  remove_silence_emotional,
457
  ],
 
472
  speech_types_available.add(name_input)
473
 
474
  # Parse the gen_text to get the speech types used
475
+ segments = parse_speechtypes_text(gen_text)
476
  speech_types_in_text = set(segment["emotion"] for segment in segments)
477
 
478
  # Check if all speech types in text are available
 
490
  inputs=[gen_text_input_emotional, regular_name] + speech_type_names,
491
  outputs=generate_emotional_btn,
492
  )
493
+
494
  with gr.Blocks() as app:
495
  gr.Markdown(
496
  """