|
|
|
|
|
import datetime |
|
import os |
|
import random |
|
|
|
import gradio as gr |
|
from gradio.components import Markdown as m |
|
|
|
def get_time(): |
|
now = datetime.datetime.now() |
|
return now.strftime("%m/%d/%Y, %H:%M:%S") |
|
|
|
def generate_recording(): |
|
return random.choice(["new-sax-1.mp3", "new-sax-1.wav"]) |
|
|
|
def reconstruct(audio): |
|
return random.choice(["new-sax-1.mp3", "new-sax-1.wav"]) |
|
|
|
io1 = gr.Interface( |
|
lambda x, y, z: os.path.join(os.path.dirname(__file__),"sax.wav"), |
|
[ |
|
gr.Slider(label="pitch"), |
|
gr.Slider(label="loudness"), |
|
gr.Audio(label="base audio file (optional)"), |
|
], |
|
gr.Audio(), |
|
) |
|
|
|
io2 = gr.Interface( |
|
lambda x, y, z: os.path.join(os.path.dirname(__file__),"flute.wav"), |
|
[ |
|
gr.Slider(label="pitch"), |
|
gr.Slider(label="loudness"), |
|
gr.Audio(label="base audio file (optional)"), |
|
], |
|
gr.Audio(), |
|
) |
|
|
|
io3 = gr.Interface( |
|
lambda x, y, z: os.path.join(os.path.dirname(__file__),"trombone.wav"), |
|
[ |
|
gr.Slider(label="pitch"), |
|
gr.Slider(label="loudness"), |
|
gr.Audio(label="base audio file (optional)"), |
|
], |
|
gr.Audio(), |
|
) |
|
|
|
io4 = gr.Interface( |
|
lambda x, y, z: os.path.join(os.path.dirname(__file__),"sax2.wav"), |
|
[ |
|
gr.Slider(label="pitch"), |
|
gr.Slider(label="loudness"), |
|
gr.Audio(label="base audio file (optional)"), |
|
], |
|
gr.Audio(), |
|
) |
|
|
|
demo = gr.Blocks(title="Neural Instrument Cloning") |
|
|
|
with demo.clear(): |
|
m( |
|
""" |
|
## Neural Instrument Cloning from Very Few Samples |
|
<center><img src="https://media.istockphoto.com/photos/brass-trombone-picture-id490455809?k=20&m=490455809&s=612x612&w=0&h=l9KJvH_25z0QTLggHrcH_MsR4gPLH7uXwDPUAZ_C5zk=" width="400px"></center>""" |
|
) |
|
m( |
|
""" |
|
This Blocks implementation is an adaptation [a report written](https://erlj.notion.site/Neural-Instrument-Cloning-from-very-few-samples-2cf41d8b630842ee8c7eb55036a1bfd6) by Nicolas Jonason and Bob L.T. Sturm. |
|
|
|
I've implemented it in Blocks to show off some cool features, such as embedding live ML demos. More on that ahead... |
|
|
|
### What does this machine learning model do? |
|
It combines techniques from neural voice cloning with musical instrument synthesis. This makes it possible to produce neural instrument synthesisers from just seconds of target instrument audio. |
|
|
|
### Audio Examples |
|
Here are some **real** 16 second saxophone recordings: |
|
""" |
|
) |
|
gr.Audio(os.path.join(os.path.dirname(__file__),"sax.wav"), label="Here is a real 16 second saxophone recording:") |
|
gr.Audio(os.path.join(os.path.dirname(__file__),"sax.wav")) |
|
|
|
m( |
|
"""\n |
|
Here is a **generated** saxophone recordings:""" |
|
) |
|
a = gr.Audio(os.path.join(os.path.dirname(__file__),"new-sax.wav")) |
|
|
|
gr.Button("Generate a new saxophone recording") |
|
|
|
m( |
|
""" |
|
### Inputs to the model |
|
The inputs to the model are: |
|
* pitch |
|
* loudness |
|
* base audio file |
|
""" |
|
) |
|
|
|
m( |
|
""" |
|
Try the model live! |
|
""" |
|
) |
|
|
|
gr.TabbedInterface( |
|
[io1, io2, io3, io4], ["Saxophone", "Flute", "Trombone", "Another Saxophone"] |
|
) |
|
|
|
m( |
|
""" |
|
### Using the model for cloning |
|
You can also use this model a different way, to simply clone the audio file and reconstruct it |
|
using machine learning. Here, we'll show a demo of that below: |
|
""" |
|
) |
|
|
|
a2 = gr.Audio() |
|
a2.change(reconstruct, a2, a2) |
|
|
|
m( |
|
""" |
|
Thanks for reading this! As you may have realized, all of the "models" in this demo are fake. They are just designed to show you what is possible using Blocks 🤗. |
|
|
|
For details of the model, read the [original report here](https://erlj.notion.site/Neural-Instrument-Cloning-from-very-few-samples-2cf41d8b630842ee8c7eb55036a1bfd6). |
|
|
|
*Details for nerds*: this report was "launched" on: |
|
""" |
|
) |
|
|
|
t = gr.Textbox(label="timestamp") |
|
|
|
demo.load(get_time, [], t) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|