kevinwang676 commited on
Commit
c6cb55a
1 Parent(s): 3217a02

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -107
app.py CHANGED
@@ -1,12 +1,7 @@
1
- import os, sys
2
- import tempfile
3
- import gradio as gr
4
- from src.gradio_demo import SadTalker
5
- # from src.utils.text2speech import TTSTalker
6
- from huggingface_hub import snapshot_download
7
-
8
  import torch
9
  import librosa
 
10
  from scipy.io.wavfile import write
11
  from transformers import WavLMModel
12
 
@@ -27,9 +22,10 @@ import edge_tts
27
  import tempfile
28
  import anyio
29
 
 
 
 
30
 
31
- def get_source_image(image):
32
- return image
33
 
34
  try:
35
  import webui # in webui
@@ -49,23 +45,8 @@ def ref_video_fn(path_of_ref_video):
49
  return gr.update(value=True)
50
  else:
51
  return gr.update(value=False)
52
-
53
- def download_model():
54
- REPO_ID = 'vinthony/SadTalker-V002rc'
55
- snapshot_download(repo_id=REPO_ID, local_dir='./checkpoints', local_dir_use_symlinks=True)
56
-
57
- def sadtalker_demo():
58
-
59
- download_model()
60
 
61
- sad_talker = SadTalker(lazy_load=True)
62
- # tts_talker = TTSTalker()
63
-
64
- download_model()
65
- sad_talker = SadTalker(lazy_load=True)
66
-
67
-
68
- # ChatGLM2 & FreeVC
69
 
70
  '''
71
  def get_wavlm():
@@ -141,7 +122,7 @@ except Exception:
141
  logger.warning("Windows, cant run time.tzset()")
142
 
143
  # model_name = "THUDM/chatglm2-6b"
144
- model_name = "THUDM/chatglm2-6b-int4"
145
 
146
  RETRY_FLAG = False
147
 
@@ -332,15 +313,14 @@ async def text_to_speech_edge(text, language_code):
332
  return tmp_path
333
 
334
 
335
- with gr.Blocks(title="ChatGLM2-6B-int4", theme=gr.themes.Soft(text_size="sm"), analytics_enabled=False) as demo:
336
  gr.HTML("<center>"
337
- "<h1>📺💕🎶 - ChatGLM2+声音克隆+视频对话:和喜欢的角色畅所欲言吧!</h1>"
338
  "</center>")
339
- gr.Markdown("## <center>🥳 - ChatGLM2+FreeVC+SadTalker,为您打造沉浸式的视频对话体验,支持中英双语</center>")
340
  gr.Markdown("## <center>🌊 - 更多精彩应用,尽在[滔滔AI](http://www.talktalkai.com);滔滔AI,为爱滔滔!💕</center>")
341
- gr.Markdown("### <center>⭐ - 如果您喜欢这个程序,欢迎给我的[GitHub项目](https://github.com/KevinWang676/ChatGLM2-Voice-Cloning)点赞支持!</center>")
342
-
343
- with gr.Tab("🍻 - ChatGLM2聊天区"):
344
  with gr.Accordion("📒 相关信息", open=False):
345
  _ = f""" ChatGLM2的可选参数信息:
346
  * Low temperature: responses will be more deterministic and focused; High temperature: responses more creative.
@@ -364,7 +344,7 @@ with gr.Blocks(title="ChatGLM2-6B-int4", theme=gr.themes.Soft(text_size="sm"), a
364
  submitBtn = gr.Button("开始和GLM2交流吧", variant="primary")
365
  deleteBtn = gr.Button("删除最新一轮对话", variant="secondary")
366
  retryBtn = gr.Button("重新生成最新一轮对话", variant="secondary")
367
-
368
  with gr.Accordion("🔧 更多设置", open=False):
369
  with gr.Row():
370
  emptyBtn = gr.Button("清空所有聊天记录")
@@ -382,8 +362,8 @@ with gr.Blocks(title="ChatGLM2-6B-int4", theme=gr.themes.Soft(text_size="sm"), a
382
  temperature = gr.Slider(
383
  0.01, 1, value=0.95, step=0.01, label="Temperature", interactive=True
384
  )
385
-
386
-
387
  with gr.Row():
388
  test1 = gr.Textbox(label="GLM2的最新回答 (可编辑)", lines = 3)
389
  with gr.Column():
@@ -401,7 +381,7 @@ with gr.Blocks(title="ChatGLM2-6B-int4", theme=gr.themes.Soft(text_size="sm"), a
401
  audio_cloned = gr.Audio(label="为您生成的专属声音克隆音频", type='filepath')
402
 
403
  clone_btn.click(convert, inputs=[model_choice, audio1, audio2], outputs=[audio_cloned])
404
-
405
  history = gr.State([])
406
  past_key_values = gr.State(None)
407
 
@@ -441,7 +421,7 @@ with gr.Blocks(title="ChatGLM2-6B-int4", theme=gr.themes.Soft(text_size="sm"), a
441
  emptyBtn.click(
442
  reset_state, outputs=[chatbot, history, past_key_values, test1], show_progress="full"
443
  )
444
-
445
  retryBtn.click(
446
  retry_last_answer,
447
  inputs=[
@@ -457,7 +437,7 @@ with gr.Blocks(title="ChatGLM2-6B-int4", theme=gr.themes.Soft(text_size="sm"), a
457
  outputs=[chatbot, history, past_key_values, test1],
458
  )
459
  deleteBtn.click(delete_last_turn, [chatbot, history], [chatbot, history])
460
-
461
  with gr.Accordion("📔 提示词示例", open=False):
462
  etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
463
  examples = gr.Examples(
@@ -513,90 +493,55 @@ with gr.Blocks(title="ChatGLM2-6B-int4", theme=gr.themes.Soft(text_size="sm"), a
513
  api_name="tr1",
514
  )
515
  # """
516
- with gr.Tab("📺 - 视频聊天区"):
 
517
  with gr.Row().style(equal_height=False):
518
  with gr.Column(variant='panel'):
519
  with gr.Tabs(elem_id="sadtalker_source_image"):
520
- with gr.TabItem('图片上传'):
521
  with gr.Row():
522
- source_image = gr.Image(label="请上传一张您喜欢角色的图片", source="upload", type="filepath", elem_id="img2img_image").style(width=512)
523
-
524
 
525
  with gr.Tabs(elem_id="sadtalker_driven_audio"):
526
- with gr.TabItem('💡您还可以将视频下载到本地'):
527
-
528
- with gr.Row():
529
- driven_audio = audio_cloned
530
- driven_audio_no = gr.Audio(label="Use IDLE mode, no audio is required", source="upload", type="filepath", visible=False)
531
-
532
- with gr.Column():
533
- use_idle_mode = gr.Checkbox(label="Use Idle Animation", visible=False)
534
- length_of_audio = gr.Number(value=5, label="The length(seconds) of the generated video.", visible=False)
535
- use_idle_mode.change(toggle_audio_file, inputs=use_idle_mode, outputs=[driven_audio, driven_audio_no]) # todo
536
-
537
- with gr.Row():
538
- ref_video = gr.Video(label="Reference Video", source="upload", type="filepath", elem_id="vidref", visible=False).style(width=512)
539
-
540
- with gr.Column():
541
- use_ref_video = gr.Checkbox(label="Use Reference Video", visible=False)
542
- ref_info = gr.Radio(['pose', 'blink','pose+blink', 'all'], value='pose', label='Reference Video',info="How to borrow from reference Video?((fully transfer, aka, video driving mode))", visible=False)
543
-
544
- ref_video.change(ref_video_fn, inputs=ref_video, outputs=[use_ref_video]) # todo
545
-
546
 
547
- with gr.Column(variant='panel'):
548
  with gr.Tabs(elem_id="sadtalker_checkbox"):
549
- with gr.TabItem('视频设置'):
 
550
  with gr.Column(variant='panel'):
551
  # width = gr.Slider(minimum=64, elem_id="img2img_width", maximum=2048, step=8, label="Manually Crop Width", value=512) # img2img_width
552
  # height = gr.Slider(minimum=64, elem_id="img2img_height", maximum=2048, step=8, label="Manually Crop Height", value=512) # img2img_width
553
- with gr.Row():
554
- pose_style = gr.Slider(minimum=0, maximum=45, step=1, label="Pose style", value=0, visible=False) #
555
- exp_weight = gr.Slider(minimum=0, maximum=3, step=0.1, label="expression scale", value=1, visible=False) #
556
- blink_every = gr.Checkbox(label="use eye blink", value=True, visible=False)
557
-
558
- with gr.Row():
559
- size_of_image = gr.Radio([256, 512], value=256, label='face model resolution', info="use 256/512 model?", visible=False) #
560
- preprocess_type = gr.Radio(['crop', 'full'], value='crop', label='是否聚焦角色面部', info="crop:视频会聚焦角色面部;full:视频会显示图片全貌")
561
-
562
- with gr.Row():
563
- is_still_mode = gr.Checkbox(label="静态模式 (开启静态模式,角色的面部动作会减少;默认开启)", value=True)
564
- facerender = gr.Radio(['facevid2vid','pirender'], value='facevid2vid', label='facerender', info="which face render?", visible=False)
565
-
566
- with gr.Row():
567
- batch_size = gr.Slider(label="Batch size (数值越大,生成速度越快;若显卡性能好,可增大数值)", step=1, maximum=32, value=2)
568
- enhancer = gr.Checkbox(label="GFPGAN as Face enhancer", value=True, visible=False)
569
-
570
- submit = gr.Button('开始视频聊天吧', elem_id="sadtalker_generate", variant='primary')
571
-
572
  with gr.Tabs(elem_id="sadtalker_genearted"):
573
- gen_video = gr.Video(label="为您生成的专属视频", format="mp4").style(width=256)
574
-
575
-
576
 
577
  submit.click(
578
- fn=sad_talker.test,
579
- inputs=[source_image,
580
- driven_audio,
581
- preprocess_type,
582
- is_still_mode,
583
- enhancer,
584
- batch_size,
585
- size_of_image,
586
- pose_style,
587
- facerender,
588
- exp_weight,
589
- use_ref_video,
590
- ref_video,
591
- ref_info,
592
- use_idle_mode,
593
- length_of_audio,
594
- blink_every
595
- ],
596
- outputs=[gen_video]
597
- )
598
  gr.Markdown("### <center>注意❗:请不要生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及个人娱乐使用。</center>")
599
- gr.Markdown("<center>💡- 如何使用此程序:输入您对ChatGLM的提问后,依次点击“开始和GLM2交流吧”、“生成对应的音频吧”、“开始AI声音克隆吧”、“开始视频聊天吧”三个按键即可;使用声音克隆功能时,请先上传一段您喜欢的音频</center>")
600
  gr.HTML('''
601
  <div class="footer">
602
  <p>🌊🏞️🎶 - 江水东流急,滔滔无尽声。 明·顾璘
 
1
+ import os
 
 
 
 
 
 
2
  import torch
3
  import librosa
4
+ import gradio as gr
5
  from scipy.io.wavfile import write
6
  from transformers import WavLMModel
7
 
 
22
  import tempfile
23
  import anyio
24
 
25
+ import os, sys
26
+ import gradio as gr
27
+ from src.gradio_demo import SadTalker
28
 
 
 
29
 
30
  try:
31
  import webui # in webui
 
45
  return gr.update(value=True)
46
  else:
47
  return gr.update(value=False)
 
 
 
 
 
 
 
 
48
 
49
+ sad_talker = SadTalker("checkpoints", "src/config", lazy_load=True)
 
 
 
 
 
 
 
50
 
51
  '''
52
  def get_wavlm():
 
122
  logger.warning("Windows, cant run time.tzset()")
123
 
124
  # model_name = "THUDM/chatglm2-6b"
125
+ model_name = "THUDM/chatglm2-6b"
126
 
127
  RETRY_FLAG = False
128
 
 
313
  return tmp_path
314
 
315
 
316
+ with gr.Blocks(title="ChatGLM2-6B-int4", theme=gr.themes.Soft(text_size="sm")) as demo:
317
  gr.HTML("<center>"
318
+ "<h1>🥳💕🎶 - ChatGLM2 + 声音克隆:和你喜欢的角色畅所欲言吧!</h1>"
319
  "</center>")
320
+ gr.Markdown("## <center>💡 - 第二代ChatGLM大语言模型 + FreeVC变声,为您打造独一无二的沉浸式对话体验,支持中英双语</center>")
321
  gr.Markdown("## <center>🌊 - 更多精彩应用,尽在[滔滔AI](http://www.talktalkai.com);滔滔AI,为爱滔滔!💕</center>")
322
+ gr.Markdown("### <center>⭐ - 如果您喜欢这个程序,欢迎给我的[Github项目](https://github.com/KevinWang676/ChatGLM2-Voice-Cloning)点赞支持!</center>")
323
+ with gr.Tab("Chat"):
 
324
  with gr.Accordion("📒 相关信息", open=False):
325
  _ = f""" ChatGLM2的可选参数信息:
326
  * Low temperature: responses will be more deterministic and focused; High temperature: responses more creative.
 
344
  submitBtn = gr.Button("开始和GLM2交流吧", variant="primary")
345
  deleteBtn = gr.Button("删除最新一轮对话", variant="secondary")
346
  retryBtn = gr.Button("重新生成最新一轮对话", variant="secondary")
347
+
348
  with gr.Accordion("🔧 更多设置", open=False):
349
  with gr.Row():
350
  emptyBtn = gr.Button("清空所有聊天记录")
 
362
  temperature = gr.Slider(
363
  0.01, 1, value=0.95, step=0.01, label="Temperature", interactive=True
364
  )
365
+
366
+
367
  with gr.Row():
368
  test1 = gr.Textbox(label="GLM2的最新回答 (可编辑)", lines = 3)
369
  with gr.Column():
 
381
  audio_cloned = gr.Audio(label="为您生成的专属声音克隆音频", type='filepath')
382
 
383
  clone_btn.click(convert, inputs=[model_choice, audio1, audio2], outputs=[audio_cloned])
384
+
385
  history = gr.State([])
386
  past_key_values = gr.State(None)
387
 
 
421
  emptyBtn.click(
422
  reset_state, outputs=[chatbot, history, past_key_values, test1], show_progress="full"
423
  )
424
+
425
  retryBtn.click(
426
  retry_last_answer,
427
  inputs=[
 
437
  outputs=[chatbot, history, past_key_values, test1],
438
  )
439
  deleteBtn.click(delete_last_turn, [chatbot, history], [chatbot, history])
440
+
441
  with gr.Accordion("📔 提示词示例", open=False):
442
  etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
443
  examples = gr.Examples(
 
493
  api_name="tr1",
494
  )
495
  # """
496
+
497
+ with gr.Tab("Video"):
498
  with gr.Row().style(equal_height=False):
499
  with gr.Column(variant='panel'):
500
  with gr.Tabs(elem_id="sadtalker_source_image"):
501
+ with gr.TabItem('Upload image'):
502
  with gr.Row():
503
+ source_image = gr.Image(label="Source image", source="upload", type="filepath", elem_id="img2img_image").style(width=512)
 
504
 
505
  with gr.Tabs(elem_id="sadtalker_driven_audio"):
506
+ with gr.TabItem('Upload OR TTS'):
507
+ with gr.Column(variant='panel'):
508
+ driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
509
 
510
+ with gr.Column(variant='panel'):
511
  with gr.Tabs(elem_id="sadtalker_checkbox"):
512
+ with gr.TabItem('Settings'):
513
+ gr.Markdown("need help? please visit our [best practice page](https://github.com/OpenTalker/SadTalker/blob/main/docs/best_practice.md) for more detials")
514
  with gr.Column(variant='panel'):
515
  # width = gr.Slider(minimum=64, elem_id="img2img_width", maximum=2048, step=8, label="Manually Crop Width", value=512) # img2img_width
516
  # height = gr.Slider(minimum=64, elem_id="img2img_height", maximum=2048, step=8, label="Manually Crop Height", value=512) # img2img_width
517
+ pose_style = gr.Slider(minimum=0, maximum=46, step=1, label="Pose style", value=0) #
518
+ size_of_image = gr.Radio([256, 512], value=256, label='face model resolution', info="use 256/512 model?") #
519
+ preprocess_type = gr.Radio(['crop', 'resize','full', 'extcrop', 'extfull'], value='crop', label='preprocess', info="How to handle input image?")
520
+ is_still_mode = gr.Checkbox(label="Still Mode (fewer hand motion, works with preprocess `full`)")
521
+ batch_size = gr.Slider(label="batch size in generation", step=1, maximum=10, value=2)
522
+ enhancer = gr.Checkbox(label="GFPGAN as Face enhancer")
523
+ submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary')
524
+
 
 
 
 
 
 
 
 
 
 
 
525
  with gr.Tabs(elem_id="sadtalker_genearted"):
526
+ gen_video = gr.Video(label="Generated video", format="mp4").style(width=256)
 
 
527
 
528
  submit.click(
529
+ fn=sad_talker.test,
530
+ inputs=[source_image,
531
+ driven_audio,
532
+ preprocess_type,
533
+ is_still_mode,
534
+ enhancer,
535
+ batch_size,
536
+ size_of_image,
537
+ pose_style
538
+ ],
539
+ outputs=[gen_video]
540
+ )
541
+
542
+
 
 
 
 
 
 
543
  gr.Markdown("### <center>注意❗:请不要生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及个人娱乐使用。</center>")
544
+ gr.Markdown("<center>💡 - 如何使用此程序:输入您对ChatGLM的提问后,依次点击“开始和GLM2交流吧”、“生成对应的音频吧”、“开始AI声音克隆吧”三个按键即可;使用声音克隆功能时,请先上传一段您喜欢的音频</center>")
545
  gr.HTML('''
546
  <div class="footer">
547
  <p>🌊🏞️🎶 - 江水东流急,滔滔无尽声。 明·顾璘