Spaces:
Runtime error
Runtime error
laizeqiang
commited on
Commit
•
c160de1
1
Parent(s):
b6d24d0
updaste
Browse files- .gitattributes +2 -0
- app.py +31 -14
- assets/bird_image.jpg +0 -0
- assets/car_image.jpg +0 -0
- assets/dog_image.jpg +0 -0
- assets/generated/audio_image_to_image/bird_rain.png +3 -0
- assets/generated/audio_image_to_image/bird_wave.png +3 -0
- assets/generated/audio_text_to_image/bird_a_painting.png +3 -0
- assets/generated/audio_text_to_image/bird_a_photo.png +3 -0
- assets/generated/audio_text_to_image/cat_a_painting.png +3 -0
- assets/generated/audio_text_to_image/cat_a_photo.png +3 -0
- assets/generated/audio_to_image/bird_audio.png +3 -0
- assets/generated/audio_to_image/car_audio.png +3 -0
- assets/generated/audio_to_image/cat.png +3 -0
- assets/generated/audio_to_image/cattle.png +3 -0
- assets/generated/audio_to_image/dog_audio.png +3 -0
- assets/generated/audio_to_image/fire_engine.png +3 -0
- assets/generated/audio_to_image/goat.png +3 -0
- assets/generated/audio_to_image/motorcycle.png +3 -0
- assets/generated/audio_to_image/plane.png +3 -0
- assets/generated/audio_to_image/train.png +3 -0
- assets/generated/bird_audio.png +0 -0
- assets/generated/cattle.png +0 -0
- assets/generated/dog_audio.png +0 -0
- assets/generated/goat.png +0 -0
- assets/generated/image_to_image/car_image.png +3 -0
- assets/image/bird.png +3 -0
- assets/image/bird_image.jpg +3 -0
- assets/image/car_image.jpg +3 -0
- assets/image/dog_image.jpg +3 -0
- assets/wav/cat.wav +3 -0
- assets/wav/fire_engine.wav +3 -0
- assets/wav/motorcycle.wav +3 -0
- assets/wav/plane.wav +3 -0
- assets/wav/train.wav +3 -0
- assets/wav/wave.wav +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
*.wav filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
*.wav filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
37 |
+
*.jpg filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
@@ -1,12 +1,13 @@
|
|
1 |
import gradio as gr
|
2 |
import imagebind
|
|
|
3 |
import torch
|
4 |
from diffusers import StableUnCLIPImg2ImgPipeline
|
5 |
-
|
6 |
|
7 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
8 |
pipe = StableUnCLIPImg2ImgPipeline.from_pretrained(
|
9 |
-
"stabilityai/stable-diffusion-2-1-unclip",
|
10 |
)
|
11 |
pipe = pipe.to(device)
|
12 |
|
@@ -15,18 +16,34 @@ model.eval()
|
|
15 |
model.to(device)
|
16 |
|
17 |
@torch.no_grad()
|
18 |
-
def anything2img(prompt, audio):
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
return images[0]
|
29 |
|
30 |
|
31 |
-
demo = gr.Interface(fn=anything2img, inputs=["text", "audio"], outputs="image")
|
32 |
-
demo.launch()
|
|
|
|
1 |
import gradio as gr
|
2 |
import imagebind
|
3 |
+
import soundfile as sf
|
4 |
import torch
|
5 |
from diffusers import StableUnCLIPImg2ImgPipeline
|
6 |
+
from PIL import Image
|
7 |
|
8 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
9 |
pipe = StableUnCLIPImg2ImgPipeline.from_pretrained(
|
10 |
+
"stabilityai/stable-diffusion-2-1-unclip",
|
11 |
)
|
12 |
pipe = pipe.to(device)
|
13 |
|
|
|
16 |
model.to(device)
|
17 |
|
18 |
@torch.no_grad()
|
19 |
+
def anything2img(prompt, audio, image):
|
20 |
+
if audio is not None:
|
21 |
+
sr, waveform = audio
|
22 |
+
sf.write('tmp.wav', waveform, sr)
|
23 |
+
embeddings = model.forward({
|
24 |
+
imagebind.ModalityType.AUDIO: imagebind.load_and_transform_audio_data(['tmp.wav'], device),
|
25 |
+
})
|
26 |
+
audio_embeddings = embeddings[imagebind.ModalityType.AUDIO]
|
27 |
+
if image is not None:
|
28 |
+
Image.fromarray(image).save('tmp.png')
|
29 |
+
embeddings = model.forward({
|
30 |
+
imagebind.ModalityType.VISION: imagebind.load_and_transform_vision_data(['tmp.png'], device),
|
31 |
+
})
|
32 |
+
image_embeddings = embeddings[imagebind.ModalityType.VISION]
|
33 |
+
|
34 |
+
if audio_embeddings is not None and image_embeddings is not None:
|
35 |
+
embeddings = audio_embeddings + image_embeddings
|
36 |
+
elif image_embeddings is not None:
|
37 |
+
embeddings = image_embeddings
|
38 |
+
elif audio_embeddings is not None:
|
39 |
+
embeddings = audio_embeddings
|
40 |
+
else:
|
41 |
+
embeddings = None
|
42 |
+
|
43 |
+
images = pipe(prompt=prompt, image_embeds=embeddings).images
|
44 |
return images[0]
|
45 |
|
46 |
|
47 |
+
demo = gr.Interface(fn=anything2img, inputs=["text", "audio", "image"], outputs="image")
|
48 |
+
# demo.launch(server_name='0.0.0.0', server_port=10051, share=True)
|
49 |
+
demo.launch(server_name='0.0.0.0', server_port=10047, share=True)
|
assets/bird_image.jpg
DELETED
Binary file (115 kB)
|
|
assets/car_image.jpg
DELETED
Binary file (59.3 kB)
|
|
assets/dog_image.jpg
DELETED
Binary file (86.1 kB)
|
|
assets/generated/audio_image_to_image/bird_rain.png
ADDED
Git LFS Details
|
assets/generated/audio_image_to_image/bird_wave.png
ADDED
Git LFS Details
|
assets/generated/audio_text_to_image/bird_a_painting.png
ADDED
Git LFS Details
|
assets/generated/audio_text_to_image/bird_a_photo.png
ADDED
Git LFS Details
|
assets/generated/audio_text_to_image/cat_a_painting.png
ADDED
Git LFS Details
|
assets/generated/audio_text_to_image/cat_a_photo.png
ADDED
Git LFS Details
|
assets/generated/audio_to_image/bird_audio.png
ADDED
Git LFS Details
|
assets/generated/audio_to_image/car_audio.png
ADDED
Git LFS Details
|
assets/generated/audio_to_image/cat.png
ADDED
Git LFS Details
|
assets/generated/audio_to_image/cattle.png
ADDED
Git LFS Details
|
assets/generated/audio_to_image/dog_audio.png
ADDED
Git LFS Details
|
assets/generated/audio_to_image/fire_engine.png
ADDED
Git LFS Details
|
assets/generated/audio_to_image/goat.png
ADDED
Git LFS Details
|
assets/generated/audio_to_image/motorcycle.png
ADDED
Git LFS Details
|
assets/generated/audio_to_image/plane.png
ADDED
Git LFS Details
|
assets/generated/audio_to_image/train.png
ADDED
Git LFS Details
|
assets/generated/bird_audio.png
DELETED
Binary file (760 kB)
|
|
assets/generated/cattle.png
DELETED
Binary file (760 kB)
|
|
assets/generated/dog_audio.png
DELETED
Binary file (819 kB)
|
|
assets/generated/goat.png
DELETED
Binary file (874 kB)
|
|
assets/generated/image_to_image/car_image.png
ADDED
Git LFS Details
|
assets/image/bird.png
ADDED
Git LFS Details
|
assets/image/bird_image.jpg
ADDED
Git LFS Details
|
assets/image/car_image.jpg
ADDED
Git LFS Details
|
assets/image/dog_image.jpg
ADDED
Git LFS Details
|
assets/wav/cat.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa3d926fa712eb851c4e27d81dff0804cafc72e4f1bd716800478b2a4b40a02d
|
3 |
+
size 640044
|
assets/wav/fire_engine.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:29165227485a6d457146dbee150610cc92a099af01c4a228172b328bcdcef3f0
|
3 |
+
size 882078
|
assets/wav/motorcycle.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45cd2e6a875984813afe9097be6dae8c911e7616698ae50cdd3cdb1ab6900b17
|
3 |
+
size 457918
|
assets/wav/plane.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e816bca22a06dc50e75d1cab715ca99ab3334a4097e5635c955eaee0c0b2ccf
|
3 |
+
size 1058478
|
assets/wav/train.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:54eaa334a44b11f8d8888cdbf7cfc0340b70a87fb57c97c09adb454f26de7f60
|
3 |
+
size 483959
|
assets/wav/wave.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ce431cb79b82390adf8640c0f68536aae71cdf4b48da412516c4fee0428831a
|
3 |
+
size 1411278
|