Spaces:
Configuration error
Configuration error
Yurii Paniv
commited on
Commit
•
eeaef84
1
Parent(s):
95bbd8f
Speed up demo
Browse files- .gitignore +1 -1
- README.md +3 -1
- app.py +38 -46
- requirements.txt +7 -7
.gitignore
CHANGED
@@ -131,5 +131,5 @@ dmypy.json
|
|
131 |
.DS_Store
|
132 |
|
133 |
#models
|
134 |
-
config.
|
135 |
*.pth
|
|
|
131 |
.DS_Store
|
132 |
|
133 |
#models
|
134 |
+
config.yaml
|
135 |
*.pth
|
README.md
CHANGED
@@ -4,6 +4,8 @@ emoji: 🇺🇦
|
|
4 |
colorFrom: blue
|
5 |
colorTo: yellow
|
6 |
sdk: gradio
|
|
|
|
|
7 |
app_file: app.py
|
8 |
pinned: false
|
9 |
---
|
@@ -17,7 +19,7 @@ Link to speaking demo: [https://huggingface.co/spaces/robinhad/ukrainian-ai](htt
|
|
17 |
Link to text demo: [https://huggingface.co/robinhad/gpt2-uk-conversational](https://huggingface.co/robinhad/gpt2-uk-conversational)
|
18 |
# Technologies used:
|
19 |
|
20 |
-
- [Wav2Vec2 XLS-R 300M fine-tuned to Ukrainian language](https://huggingface.co/
|
21 |
- [Ukrainian VITS TTS](https://github.com/robinhad/ukrainian-tts) for text-to-speech generation.
|
22 |
- Conversational pipeline (this repository)
|
23 |
|
|
|
4 |
colorFrom: blue
|
5 |
colorTo: yellow
|
6 |
sdk: gradio
|
7 |
+
sdk_version : 3.16
|
8 |
+
python_version: 3.10
|
9 |
app_file: app.py
|
10 |
pinned: false
|
11 |
---
|
|
|
19 |
Link to text demo: [https://huggingface.co/robinhad/gpt2-uk-conversational](https://huggingface.co/robinhad/gpt2-uk-conversational)
|
20 |
# Technologies used:
|
21 |
|
22 |
+
- [Wav2Vec2 XLS-R 300M fine-tuned to Ukrainian language](https://huggingface.co/robinhad/wav2vec2-xls-r-300m-uk) for speech recognition.
|
23 |
- [Ukrainian VITS TTS](https://github.com/robinhad/ukrainian-tts) for text-to-speech generation.
|
24 |
- Conversational pipeline (this repository)
|
25 |
|
app.py
CHANGED
@@ -1,83 +1,75 @@
|
|
1 |
import gradio as gr
|
2 |
-
from transformers import Conversation, ConversationalPipeline, pipeline
|
3 |
import tempfile
|
4 |
-
import torch
|
5 |
-
from os.path import exists
|
6 |
-
import requests
|
7 |
-
from TTS.utils.synthesizer import Synthesizer
|
8 |
import gradio as gr
|
|
|
|
|
9 |
|
10 |
-
def download(url, file_name):
|
11 |
-
if not exists(file_name):
|
12 |
-
print(f"Downloading {file_name}")
|
13 |
-
r = requests.get(url, allow_redirects=True)
|
14 |
-
with open(file_name, "wb") as file:
|
15 |
-
file.write(r.content)
|
16 |
-
else:
|
17 |
-
print(f"Found {file_name}. Skipping download...")
|
18 |
|
19 |
-
|
20 |
-
print("downloading uk/mykyta/vits-tts")
|
21 |
-
release_number = "v2.0.0-beta"
|
22 |
-
model_link = f"https://github.com/robinhad/ukrainian-tts/releases/download/{release_number}/model-inference.pth"
|
23 |
-
config_link = f"https://github.com/robinhad/ukrainian-tts/releases/download/{release_number}/config.json"
|
24 |
-
|
25 |
-
model_path = "model.pth"
|
26 |
-
config_path = "config.json"
|
27 |
-
|
28 |
-
download(model_link, model_path)
|
29 |
-
download(config_link, config_path)
|
30 |
|
31 |
p = pipeline(
|
32 |
-
"automatic-speech-recognition", "
|
33 |
)
|
34 |
|
|
|
|
|
35 |
conv: ConversationalPipeline = pipeline(
|
36 |
-
"conversational", "robinhad/gpt2-uk-conversational"
|
37 |
)
|
38 |
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
)
|
46 |
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
|
52 |
-
def transcribe(audio, history):
|
53 |
text = p(audio)["text"]
|
54 |
history = history or []
|
|
|
55 |
past_user_inputs = [i[0] for i in history]
|
56 |
generated_responses = [i[1] for i in history]
|
57 |
-
|
|
|
58 |
response = response.generated_responses[-1]
|
59 |
history.append((text, response))
|
60 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
|
61 |
-
|
62 |
-
wavs = synthesizer.tts(response)
|
63 |
-
synthesizer.save_wav(wavs, fp)
|
64 |
return text, fp.name, history, history
|
65 |
|
66 |
|
67 |
iface = gr.Interface(
|
68 |
fn=transcribe,
|
69 |
-
inputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
outputs=[
|
71 |
gr.outputs.Textbox(label="Recognized text"),
|
72 |
-
gr.outputs.Audio(label="Output"),
|
73 |
gr.outputs.Chatbot(label="Chat"),
|
74 |
"state",
|
75 |
],
|
76 |
description="""Це альфа-версія end-to-end розмовного бота, з яким можна поспілкуватися голосом.
|
77 |
Перейдіть сюди для доступу до текстової версії: [https://huggingface.co/robinhad/gpt2-uk-conversational](https://huggingface.co/robinhad/gpt2-uk-conversational)
|
78 |
""",
|
79 |
-
article=f"""Розпізнавання української: [https://huggingface.co/
|
80 |
-
Синтез української: [https://huggingface.co/spaces/robinhad/ukrainian-tts](https://huggingface.co/spaces/robinhad/ukrainian-tts)
|
81 |
-
<center><img src="{badge}" alt="visitors badge"/></center>""",
|
82 |
)
|
83 |
iface.launch()
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import Conversation, ConversationalPipeline, pipeline, AlbertTokenizerFast
|
3 |
import tempfile
|
|
|
|
|
|
|
|
|
4 |
import gradio as gr
|
5 |
+
from ukrainian_tts.tts import TTS, Voices, Stress
|
6 |
+
from enum import Enum
|
7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
+
tts = TTS(device="cpu") # can try gpu, mps
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
p = pipeline(
|
12 |
+
"automatic-speech-recognition", "robinhad/wav2vec2-xls-r-300m-uk"
|
13 |
)
|
14 |
|
15 |
+
|
16 |
+
tokenizer = AlbertTokenizerFast.from_pretrained("robinhad/gpt2-uk-conversational")
|
17 |
conv: ConversationalPipeline = pipeline(
|
18 |
+
"conversational", "robinhad/gpt2-uk-conversational", tokenizer=tokenizer
|
19 |
)
|
20 |
|
21 |
+
class VoiceOption(Enum):
|
22 |
+
Olena = "Олена (жіночий) 👩"
|
23 |
+
Mykyta = "Микита (чоловічий) 👨"
|
24 |
+
Lada = "Лада (жіночий) 👩"
|
25 |
+
Dmytro = "Дмитро (чоловічий) 👨"
|
26 |
+
Olga = "Ольга (жіночий) 👩"
|
|
|
27 |
|
28 |
+
|
29 |
+
voice_mapping = {
|
30 |
+
VoiceOption.Olena.value: Voices.Olena.value,
|
31 |
+
VoiceOption.Mykyta.value: Voices.Mykyta.value,
|
32 |
+
VoiceOption.Lada.value: Voices.Lada.value,
|
33 |
+
VoiceOption.Dmytro.value: Voices.Dmytro.value,
|
34 |
+
VoiceOption.Olga.value: Voices.Olga.value,
|
35 |
+
}
|
36 |
|
37 |
|
38 |
+
def transcribe(audio, selected_voice, history):
|
39 |
text = p(audio)["text"]
|
40 |
history = history or []
|
41 |
+
selected_voice = voice_mapping[selected_voice]
|
42 |
past_user_inputs = [i[0] for i in history]
|
43 |
generated_responses = [i[1] for i in history]
|
44 |
+
next_output_length = len(tokenizer.encode("".join(generated_responses + past_user_inputs))) + 60
|
45 |
+
response = conv(Conversation(text, past_user_inputs, generated_responses), max_length=next_output_length, penalty_alpha=0.6, top_k=4)
|
46 |
response = response.generated_responses[-1]
|
47 |
history.append((text, response))
|
48 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
|
49 |
+
_, output_text = tts.tts(response, selected_voice, Stress.Dictionary.value, fp)
|
|
|
|
|
50 |
return text, fp.name, history, history
|
51 |
|
52 |
|
53 |
iface = gr.Interface(
|
54 |
fn=transcribe,
|
55 |
+
inputs=[
|
56 |
+
gr.inputs.Audio(source="microphone", type="filepath"),
|
57 |
+
gr.components.Radio(
|
58 |
+
label="Голос",
|
59 |
+
choices=[option.value for option in VoiceOption],
|
60 |
+
value=VoiceOption.Olena.value,
|
61 |
+
),
|
62 |
+
"state"],
|
63 |
outputs=[
|
64 |
gr.outputs.Textbox(label="Recognized text"),
|
65 |
+
gr.outputs.Audio(label="Output", type="filepath"),
|
66 |
gr.outputs.Chatbot(label="Chat"),
|
67 |
"state",
|
68 |
],
|
69 |
description="""Це альфа-версія end-to-end розмовного бота, з яким можна поспілкуватися голосом.
|
70 |
Перейдіть сюди для доступу до текстової версії: [https://huggingface.co/robinhad/gpt2-uk-conversational](https://huggingface.co/robinhad/gpt2-uk-conversational)
|
71 |
""",
|
72 |
+
article=f"""Розпізнавання української: [https://huggingface.co/robinhad/wav2vec2-xls-r-300m-uk](https://huggingface.co/robinhad/wav2vec2-xls-r-300m-uk)
|
73 |
+
Синтез української: [https://huggingface.co/spaces/robinhad/ukrainian-tts](https://huggingface.co/spaces/robinhad/ukrainian-tts)""",
|
|
|
74 |
)
|
75 |
iface.launch()
|
requirements.txt
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
-
gradio
|
2 |
-
transformers==4.
|
3 |
-
|
4 |
-
|
5 |
-
pyctcdecode
|
6 |
-
https://github.com/kpu/kenlm/archive/master.zip
|
7 |
-
sentencepiece==0.1.96
|
|
|
1 |
+
gradio==3.16
|
2 |
+
transformers==4.26
|
3 |
+
git+https://github.com/robinhad/ukrainian-tts.git@d3459a5e8a78dd95bfd1b43a4a659637a12a61d7
|
4 |
+
# this would be needed if model would have an LM
|
5 |
+
#pyctcdecode
|
6 |
+
#https://github.com/kpu/kenlm/archive/master.zip
|
7 |
+
#sentencepiece==0.1.96
|