Fabrice-TIERCELIN commited on
Commit
eed24d8
β€’
1 Parent(s): 5497723

Write outputs

Browse files
Files changed (1) hide show
  1. app.py +25 -12
app.py CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
2
  import json
3
  import torch
4
  import time
 
5
 
6
  from tqdm import tqdm
7
  from huggingface_hub import snapshot_download
@@ -65,7 +66,7 @@ class Tango:
65
  for k in tqdm(range(0, len(prompts), batch_size)):
66
  batch = prompts[k: k + batch_size]
67
  with torch.no_grad():
68
- latents = self.model.inference(batch, self.scheduler, steps, guidance, samples, disable_progress = disable_progress)
69
  mel = self.vae.decode_first_stage(latents)
70
  wave = self.vae.decode_to_waveform(mel)
71
  outputs += [item for item in wave]
@@ -110,9 +111,21 @@ def text2audio(
110
  ):
111
  start = time.time()
112
  output_wave = tango.generate(prompt, steps, guidance, output_number)
113
- output_wave_1 = gr.make_waveform((16000, output_wave[0]))
114
- output_wave_2 = gr.make_waveform((16000, output_wave[1])) if (2 <= output_number) else None
115
- output_wave_3 = gr.make_waveform((16000, output_wave[2])) if (output_number == 3) else None
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
  end = time.time()
118
  secondes = int(end - start)
@@ -121,10 +134,10 @@ def text2audio(
121
  hours = minutes // 60
122
  minutes = minutes - (hours * 60)
123
  return [
124
- output_wave_1,
125
- output_wave_2,
126
- output_wave_3,
127
- "Start again to get a different result. The output have been generated in " + str(hours) + " h, " + str(minutes) + " min, " + str(secondes) + " sec."
128
  ]
129
 
130
  # Gradio interface
@@ -149,7 +162,7 @@ with gr.Blocks() as interface:
149
  """
150
  )
151
  input_text = gr.Textbox(label = "Prompt", value = "Snort of a horse", lines = 2, autofocus = True)
152
- output_format = gr.Radio(label = "Output format", info = "The file you can dowload", choices = ["mp3", "wav"], value = "mp3")
153
  with gr.Accordion("Advanced options", open = False):
154
  output_number = gr.Slider(label = "Number of generations", info = "1, 2 or 3 output files", minimum = 1, maximum = 3, value = 3, step = 1, interactive = True)
155
  denoising_steps = gr.Slider(label = "Steps", info = "lower=faster & variant, higher=audio quality & similar", minimum = 100, maximum = 200, value = 100, step = 1, interactive = True)
@@ -157,9 +170,9 @@ with gr.Blocks() as interface:
157
 
158
  submit = gr.Button("πŸš€ Generate", variant = "primary")
159
 
160
- output_audio_1 = gr.Audio(label = "Generated Audio #1/3", format = "mp3", autoplay = True)
161
- output_audio_2 = gr.Audio(label = "Generated Audio #2/3", format = "mp3")
162
- output_audio_3 = gr.Audio(label = "Generated Audio #3/3", format = "mp3")
163
  information = gr.Label(label = "Information")
164
 
165
  submit.click(fn = check, inputs = [
 
2
  import json
3
  import torch
4
  import time
5
+ import wavio
6
 
7
  from tqdm import tqdm
8
  from huggingface_hub import snapshot_download
 
66
  for k in tqdm(range(0, len(prompts), batch_size)):
67
  batch = prompts[k: k + batch_size]
68
  with torch.no_grad():
69
+ latents = self.model.inference(batch, self.scheduler, steps, guidance, samples, disable_progress = disable_progress, length = 20)
70
  mel = self.vae.decode_first_stage(latents)
71
  wave = self.vae.decode_to_waveform(mel)
72
  outputs += [item for item in wave]
 
111
  ):
112
  start = time.time()
113
  output_wave = tango.generate(prompt, steps, guidance, output_number)
114
+
115
+ output_filename_1 = "tmp1_.wav"
116
+ wavio.write(output_filename, output_wave[0], rate = 16000, sampwidth = 2)
117
+
118
+ if (2 <= output_number):
119
+ output_filename_2 = "tmp2_.wav"
120
+ wavio.write(output_filename, output_wave[1], rate = 16000, sampwidth = 2)
121
+ else:
122
+ output_filename_2 = None
123
+
124
+ if (output_number == 3):
125
+ output_filename_3 = "tmp3_.wav"
126
+ wavio.write(output_filename, output_wave[2], rate = 16000, sampwidth = 2)
127
+ else:
128
+ output_filename_3 = None
129
 
130
  end = time.time()
131
  secondes = int(end - start)
 
134
  hours = minutes // 60
135
  minutes = minutes - (hours * 60)
136
  return [
137
+ output_filename_1,
138
+ output_filename_2,
139
+ output_filename_3,
140
+ "Start again to get a different result. The output have been generated in " + ((str(hours) + " h, ") if hours != 0 else "") + ((str(minutes) + " min, ") if hours != 0 or minutes != 0 else "") + str(secondes) + " sec."
141
  ]
142
 
143
  # Gradio interface
 
162
  """
163
  )
164
  input_text = gr.Textbox(label = "Prompt", value = "Snort of a horse", lines = 2, autofocus = True)
165
+ output_format = gr.Radio(label = "Output format", info = "The file you can dowload", choices = ["mp3", "wav"], value = "wav")
166
  with gr.Accordion("Advanced options", open = False):
167
  output_number = gr.Slider(label = "Number of generations", info = "1, 2 or 3 output files", minimum = 1, maximum = 3, value = 3, step = 1, interactive = True)
168
  denoising_steps = gr.Slider(label = "Steps", info = "lower=faster & variant, higher=audio quality & similar", minimum = 100, maximum = 200, value = 100, step = 1, interactive = True)
 
170
 
171
  submit = gr.Button("πŸš€ Generate", variant = "primary")
172
 
173
+ output_audio_1 = gr.Audio(label = "Generated Audio #1/3", format = "wav", type="filepath", autoplay = True)
174
+ output_audio_2 = gr.Audio(label = "Generated Audio #2/3", format = "wav", type="filepath")
175
+ output_audio_3 = gr.Audio(label = "Generated Audio #3/3", format = "wav", type="filepath")
176
  information = gr.Label(label = "Information")
177
 
178
  submit.click(fn = check, inputs = [