Fabrice-TIERCELIN commited on
Commit
8408dd7
β€’
1 Parent(s): 9e55dff

1, 2 or 3 output files

Browse files
Files changed (1) hide show
  1. app.py +51 -13
app.py CHANGED
@@ -56,7 +56,7 @@ class Tango:
56
  latents = self.model.inference([prompt], self.scheduler, steps, guidance, samples, disable_progress = disable_progress)
57
  mel = self.vae.decode_first_stage(latents)
58
  wave = self.vae.decode_to_waveform(mel)
59
- return wave[0]
60
 
61
  def generate_for_batch(self, prompts, steps = 200, guidance = 3, samples = 1, batch_size = 8, disable_progress = True):
62
  # Generate audio for a list of prompt strings
@@ -81,26 +81,33 @@ tango.model.to(device_type)
81
 
82
  def check(
83
  prompt,
 
84
  steps,
85
  guidance
86
  ):
87
  if prompt is None or prompt == "":
88
  raise gr.Error("Please provide a prompt input.")
 
 
89
 
90
  def text2audio(
91
  prompt,
 
92
  steps,
93
  guidance
94
  ):
95
- output_wave = tango.generate(prompt, steps, guidance)
96
- return gr.make_waveform((16000, output_wave))
 
 
 
97
 
98
  # Gradio interface
99
  with gr.Blocks() as interface:
100
  gr.Markdown("""
101
  <p style="text-align: center;">
102
  <b><big><big><big>Text-to-Audio</big></big></big></b>
103
- <br/>Generates an audio file, freely, without account, without watermark, that you can download.
104
  </p>
105
  <br/>
106
  <br/>
@@ -116,45 +123,76 @@ with gr.Blocks() as interface:
116
  βš–οΈ You can use, modify and share the generated sounds but not for commercial uses.
117
  """
118
  )
119
- input_text = gr.Textbox(label = "Prompt", value = "Snort of a horse", lines = 2, autofocus = True)
120
  with gr.Accordion("Advanced options", open = False):
 
121
  denoising_steps = gr.Slider(label = "Steps", info = "lower=faster & variant, higher=audio quality & similar", minimum = 100, maximum = 200, value = 100, step = 1, interactive = True)
122
  guidance_scale = gr.Slider(label = "Guidance Scale", info = "lower=audio quality, higher=follow the prompt", minimum = 1, maximum = 10, value = 3, step = 0.1, interactive = True)
123
 
124
  submit = gr.Button("Generate πŸš€", variant = "primary")
125
 
126
- output_audio = gr.Audio(label = "Generated Audio")
 
 
127
 
128
  submit.click(fn = check, inputs = [
129
  input_text,
 
130
  denoising_steps,
131
  guidance_scale
132
  ], outputs = [], queue = False, show_progress = False).success(fn = text2audio, inputs = [
133
  input_text,
 
134
  denoising_steps,
135
  guidance_scale
136
  ], outputs = [
137
- output_audio
 
 
138
  ], scroll_to_output = True)
139
 
140
  gr.Examples(
141
  fn = text2audio,
142
  inputs = [
143
  input_text,
 
144
  denoising_steps,
145
  guidance_scale
146
  ],
147
  outputs = [
148
- output_audio
 
 
149
  ],
150
  examples = [
151
- ["A hammer is hitting a wooden surface", 100, 3],
152
- ["Peaceful and calming ambient music with singing bowl and other instruments.", 100, 3],
153
- ["A man is speaking in a small room.", 100, 3],
154
- ["A female is speaking followed by footstep sound", 100, 3],
155
- ["Wooden table tapping sound followed by water pouring sound.", 100, 3],
156
  ],
157
  cache_examples = "lazy",
158
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
  interface.queue(10).launch()
 
56
  latents = self.model.inference([prompt], self.scheduler, steps, guidance, samples, disable_progress = disable_progress)
57
  mel = self.vae.decode_first_stage(latents)
58
  wave = self.vae.decode_to_waveform(mel)
59
+ return wave
60
 
61
  def generate_for_batch(self, prompts, steps = 200, guidance = 3, samples = 1, batch_size = 8, disable_progress = True):
62
  # Generate audio for a list of prompt strings
 
81
 
82
  def check(
83
  prompt,
84
+ output_number,
85
  steps,
86
  guidance
87
  ):
88
  if prompt is None or prompt == "":
89
  raise gr.Error("Please provide a prompt input.")
90
+ if not output_number in [1, 2, 3]:
91
+ raise gr.Error("Please ask for 1, 2 or 3 output files.")
92
 
93
  def text2audio(
94
  prompt,
95
+ output_number,
96
  steps,
97
  guidance
98
  ):
99
+ output_wave = tango.generate(prompt, steps, guidance, output_number)
100
+ output_wave_1 = gr.make_waveform((16000, output_wave[0]))
101
+ output_wave_2 = gr.make_waveform((16000, output_wave[1])) if (2 <= output_number) else None
102
+ output_wave_3 = gr.make_waveform((16000, output_wave[2])) if (output_number == 3) else None
103
+ return [output_wave_1, output_wave_2, output_wave_3]
104
 
105
  # Gradio interface
106
  with gr.Blocks() as interface:
107
  gr.Markdown("""
108
  <p style="text-align: center;">
109
  <b><big><big><big>Text-to-Audio</big></big></big></b>
110
+ <br/>Generates 10 second audio file, freely, without account, without watermark, that you can download.
111
  </p>
112
  <br/>
113
  <br/>
 
123
  βš–οΈ You can use, modify and share the generated sounds but not for commercial uses.
124
  """
125
  )
126
+ input_text = gr.Textbox(label = "Prompt", value = "Snort of a horse, best quality", lines = 2, autofocus = True)
127
  with gr.Accordion("Advanced options", open = False):
128
+ output_number = gr.Slider(label = "Number of generations", info = "1, 2 or 3 output files", minimum = 1, maximum = 3, value = 1, step = 1, interactive = True)
129
  denoising_steps = gr.Slider(label = "Steps", info = "lower=faster & variant, higher=audio quality & similar", minimum = 100, maximum = 200, value = 100, step = 1, interactive = True)
130
  guidance_scale = gr.Slider(label = "Guidance Scale", info = "lower=audio quality, higher=follow the prompt", minimum = 1, maximum = 10, value = 3, step = 0.1, interactive = True)
131
 
132
  submit = gr.Button("Generate πŸš€", variant = "primary")
133
 
134
+ output_audio_1 = gr.Audio(label = "Generated Audio #1/3")
135
+ output_audio_2 = gr.Audio(label = "Generated Audio #2/3")
136
+ output_audio_3 = gr.Audio(label = "Generated Audio #3/3")
137
 
138
  submit.click(fn = check, inputs = [
139
  input_text,
140
+ output_number,
141
  denoising_steps,
142
  guidance_scale
143
  ], outputs = [], queue = False, show_progress = False).success(fn = text2audio, inputs = [
144
  input_text,
145
+ output_number,
146
  denoising_steps,
147
  guidance_scale
148
  ], outputs = [
149
+ output_audio_1,
150
+ output_audio_2,
151
+ output_audio_3
152
  ], scroll_to_output = True)
153
 
154
  gr.Examples(
155
  fn = text2audio,
156
  inputs = [
157
  input_text,
158
+ output_number,
159
  denoising_steps,
160
  guidance_scale
161
  ],
162
  outputs = [
163
+ output_audio_1,
164
+ output_audio_2,
165
+ output_audio_3
166
  ],
167
  examples = [
168
+ ["A hammer is hitting a wooden surface", 1, 100, 3],
169
+ ["Peaceful and calming ambient music with singing bowl and other instruments.", 1, 100, 3],
170
+ ["A man is speaking in a small room.", 1, 100, 3],
171
+ ["A female is speaking followed by footstep sound", 1, 100, 3],
172
+ ["Wooden table tapping sound followed by water pouring sound.", 1, 100, 3],
173
  ],
174
  cache_examples = "lazy",
175
  )
176
+
177
+ gr.Markdown(
178
+ """
179
+ ## How to prompt your sound
180
+ You can use round brackets to increase the importance of a part:
181
+ ```
182
+ Peaceful and (calming) ambient music with singing bowl and other instruments
183
+ ```
184
+ You can use several levels of round brackets to even more increase the importance of a part:
185
+ ```
186
+ (Peaceful) and ((calming)) ambient music with singing bowl and other instruments
187
+ ```
188
+ You can use number instead of several round brackets:
189
+ ```
190
+ (Peaceful:1.5) and ((calming)) ambient music with singing bowl and other instruments
191
+ ```
192
+ You can do the same thing with square brackets to decrease the importance of a part:
193
+ ```
194
+ (Peaceful:1.5) and ((calming)) ambient music with [singing:2] bowl and other instruments
195
+ """
196
+ )
197
 
198
  interface.queue(10).launch()