Fabrice-TIERCELIN commited on
Commit
78e560c
β€’
1 Parent(s): 32614b8

Generate on the fly

Browse files
Files changed (1) hide show
  1. app.py +17 -52
app.py CHANGED
@@ -2,7 +2,6 @@ import gradio as gr
2
  import json
3
  import torch
4
  import time
5
- import wavio
6
 
7
  from tqdm import tqdm
8
  from huggingface_hub import snapshot_download
@@ -84,28 +83,23 @@ tango.model.to(device_type)
84
 
85
  def check(
86
  prompt,
87
- output_format,
88
  output_number,
89
  steps,
90
  guidance
91
  ):
92
  if prompt is None or prompt == "":
93
  raise gr.Error("Please provide a prompt input.")
94
- if not output_format in ["wav", "mp3"]:
95
- raise gr.Error("Please choose an allowed output format (.wav or .mp3).")
96
  if not output_number in [1, 2, 3]:
97
  raise gr.Error("Please ask for 1, 2 or 3 output files.")
98
 
99
- def update_output(output_format, output_number):
100
  return [
101
- gr.update(format = output_format),
102
- gr.update(format = output_format, visible = (2 <= output_number)),
103
- gr.update(format = output_format, visible = (output_number == 3))
104
  ]
105
 
106
  def text2audio(
107
  prompt,
108
- output_format,
109
  output_number,
110
  steps,
111
  guidance
@@ -113,32 +107,9 @@ def text2audio(
113
  start = time.time()
114
  output_wave = tango.generate(prompt, steps, guidance, output_number)
115
 
116
- output_filename_1 = "tmp1.wav"
117
- wavio.write(output_filename_1, output_wave[0], rate = 16000, sampwidth = 2)
118
-
119
- if (output_format == "mp3"):
120
- AudioSegment.from_wav("tmp1.wav").export("tmp1.mp3", format = "mp3")
121
- output_filename_1 = "tmp1.mp3"
122
-
123
- if (2 <= output_number):
124
- output_filename_2 = "tmp2.wav"
125
- wavio.write(output_filename_2, output_wave[1], rate = 16000, sampwidth = 2)
126
-
127
- if (output_format == "mp3"):
128
- AudioSegment.from_wav("tmp2.wav").export("tmp2.mp3", format = "mp3")
129
- output_filename_2 = "tmp2.mp3"
130
- else:
131
- output_filename_2 = None
132
-
133
- if (output_number == 3):
134
- output_filename_3 = "tmp3.wav"
135
- wavio.write(output_filename_3, output_wave[2], rate = 16000, sampwidth = 2)
136
-
137
- if (output_format == "mp3"):
138
- AudioSegment.from_wav("tmp3.wav").export("tmp3.mp3", format = "mp3")
139
- output_filename_3 = "tmp3.mp3"
140
- else:
141
- output_filename_3 = None
142
 
143
  end = time.time()
144
  secondes = int(end - start)
@@ -147,9 +118,9 @@ def text2audio(
147
  hours = minutes // 60
148
  minutes = minutes - (hours * 60)
149
  return [
150
- output_filename_1,
151
- output_filename_2,
152
- output_filename_3,
153
  "Start again to get a different result. The output have been generated in " + ((str(hours) + " h, ") if hours != 0 else "") + ((str(minutes) + " min, ") if hours != 0 or minutes != 0 else "") + str(secondes) + " sec."
154
  ]
155
 
@@ -175,7 +146,6 @@ with gr.Blocks() as interface:
175
  """
176
  )
177
  input_text = gr.Textbox(label = "Prompt", value = "Snort of a horse", lines = 2, autofocus = True)
178
- output_format = gr.Radio(label = "Output format", info = "The file you can dowload", choices = ["mp3", "wav"], value = "wav")
179
  with gr.Accordion("Advanced options", open = False):
180
  output_number = gr.Slider(label = "Number of generations", info = "1, 2 or 3 output files", minimum = 1, maximum = 3, value = 3, step = 1, interactive = True)
181
  denoising_steps = gr.Slider(label = "Steps", info = "lower=faster & variant, higher=audio quality & similar", minimum = 100, maximum = 200, value = 100, step = 1, interactive = True)
@@ -183,27 +153,23 @@ with gr.Blocks() as interface:
183
 
184
  submit = gr.Button("πŸš€ Generate", variant = "primary")
185
 
186
- output_audio_1 = gr.Audio(label = "Generated Audio #1/3", format = "wav", type="filepath", autoplay = True)
187
- output_audio_2 = gr.Audio(label = "Generated Audio #2/3", format = "wav", type="filepath")
188
- output_audio_3 = gr.Audio(label = "Generated Audio #3/3", format = "wav", type="filepath")
189
  information = gr.Label(label = "Information")
190
 
191
  submit.click(fn = check, inputs = [
192
  input_text,
193
- output_format,
194
  output_number,
195
  denoising_steps,
196
  guidance_scale
197
  ], outputs = [], queue = False, show_progress = False).success(fn = update_output, inputs = [
198
- output_format,
199
  output_number
200
  ], outputs = [
201
- output_audio_1,
202
  output_audio_2,
203
  output_audio_3
204
  ], queue = False, show_progress = False).success(fn = text2audio, inputs = [
205
  input_text,
206
- output_format,
207
  output_number,
208
  denoising_steps,
209
  guidance_scale
@@ -218,7 +184,6 @@ with gr.Blocks() as interface:
218
  fn = text2audio,
219
  inputs = [
220
  input_text,
221
- output_format,
222
  output_number,
223
  denoising_steps,
224
  guidance_scale
@@ -230,11 +195,11 @@ with gr.Blocks() as interface:
230
  information
231
  ],
232
  examples = [
233
- ["A hammer is hitting a wooden surface", "mp3", 3, 100, 3],
234
- ["Peaceful and calming ambient music with singing bowl and other instruments.", "wav", 3, 100, 3],
235
- ["A man is speaking in a small room.", "mp3", 2, 100, 3],
236
- ["A female is speaking followed by footstep sound", "mp3", 1, 100, 3],
237
- ["Wooden table tapping sound followed by water pouring sound.", "mp3", 3, 200, 3],
238
  ],
239
  cache_examples = "lazy",
240
  )
 
2
  import json
3
  import torch
4
  import time
 
5
 
6
  from tqdm import tqdm
7
  from huggingface_hub import snapshot_download
 
83
 
84
  def check(
85
  prompt,
 
86
  output_number,
87
  steps,
88
  guidance
89
  ):
90
  if prompt is None or prompt == "":
91
  raise gr.Error("Please provide a prompt input.")
 
 
92
  if not output_number in [1, 2, 3]:
93
  raise gr.Error("Please ask for 1, 2 or 3 output files.")
94
 
95
+ def update_output(output_number):
96
  return [
97
+ gr.update(visible = (2 <= output_number)),
98
+ gr.update(visible = (output_number == 3))
 
99
  ]
100
 
101
  def text2audio(
102
  prompt,
 
103
  output_number,
104
  steps,
105
  guidance
 
107
  start = time.time()
108
  output_wave = tango.generate(prompt, steps, guidance, output_number)
109
 
110
+ output_wave_1 = gr.make_waveform((16000, output_wave[0]))
111
+ output_wave_2 = gr.make_waveform((16000, output_wave[1])) if (2 <= output_number) else None
112
+ output_wave_3 = gr.make_waveform((16000, output_wave[2])) if (output_number == 3) else None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
  end = time.time()
115
  secondes = int(end - start)
 
118
  hours = minutes // 60
119
  minutes = minutes - (hours * 60)
120
  return [
121
+ output_wave_1,
122
+ output_wave_2,
123
+ output_wave_3,
124
  "Start again to get a different result. The output have been generated in " + ((str(hours) + " h, ") if hours != 0 else "") + ((str(minutes) + " min, ") if hours != 0 or minutes != 0 else "") + str(secondes) + " sec."
125
  ]
126
 
 
146
  """
147
  )
148
  input_text = gr.Textbox(label = "Prompt", value = "Snort of a horse", lines = 2, autofocus = True)
 
149
  with gr.Accordion("Advanced options", open = False):
150
  output_number = gr.Slider(label = "Number of generations", info = "1, 2 or 3 output files", minimum = 1, maximum = 3, value = 3, step = 1, interactive = True)
151
  denoising_steps = gr.Slider(label = "Steps", info = "lower=faster & variant, higher=audio quality & similar", minimum = 100, maximum = 200, value = 100, step = 1, interactive = True)
 
153
 
154
  submit = gr.Button("πŸš€ Generate", variant = "primary")
155
 
156
+ output_audio_1 = gr.Audio(label = "Generated Audio #1/3", format = "wav", type="numpy", autoplay = True)
157
+ output_audio_2 = gr.Audio(label = "Generated Audio #2/3", format = "wav", type="numpy")
158
+ output_audio_3 = gr.Audio(label = "Generated Audio #3/3", format = "wav", type="numpy")
159
  information = gr.Label(label = "Information")
160
 
161
  submit.click(fn = check, inputs = [
162
  input_text,
 
163
  output_number,
164
  denoising_steps,
165
  guidance_scale
166
  ], outputs = [], queue = False, show_progress = False).success(fn = update_output, inputs = [
 
167
  output_number
168
  ], outputs = [
 
169
  output_audio_2,
170
  output_audio_3
171
  ], queue = False, show_progress = False).success(fn = text2audio, inputs = [
172
  input_text,
 
173
  output_number,
174
  denoising_steps,
175
  guidance_scale
 
184
  fn = text2audio,
185
  inputs = [
186
  input_text,
 
187
  output_number,
188
  denoising_steps,
189
  guidance_scale
 
195
  information
196
  ],
197
  examples = [
198
+ ["A hammer is hitting a wooden surface", 3, 100, 3],
199
+ ["Peaceful and calming ambient music with singing bowl and other instruments.", 3, 100, 3],
200
+ ["A man is speaking in a small room.", 2, 100, 3],
201
+ ["A female is speaking followed by footstep sound", 1, 100, 3],
202
+ ["Wooden table tapping sound followed by water pouring sound.", 3, 200, 3],
203
  ],
204
  cache_examples = "lazy",
205
  )