Spaces:
Runtime error
Runtime error
import gradio as gr | |
import imagebind | |
import soundfile as sf | |
import torch | |
from diffusers import StableUnCLIPImg2ImgPipeline | |
from PIL import Image | |
device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
pipe = StableUnCLIPImg2ImgPipeline.from_pretrained( | |
"stabilityai/stable-diffusion-2-1-unclip", | |
) | |
pipe = pipe.to(device) | |
model = imagebind.imagebind_huge(pretrained=True) | |
model.eval() | |
model.to(device) | |
def anything2img(prompt, audio, image): | |
if audio is not None: | |
sr, waveform = audio | |
sf.write('tmp.wav', waveform, sr) | |
embeddings = model.forward({ | |
imagebind.ModalityType.AUDIO: imagebind.load_and_transform_audio_data(['tmp.wav'], device), | |
}) | |
audio_embeddings = embeddings[imagebind.ModalityType.AUDIO] | |
if image is not None: | |
Image.fromarray(image).save('tmp.png') | |
embeddings = model.forward({ | |
imagebind.ModalityType.VISION: imagebind.load_and_transform_vision_data(['tmp.png'], device), | |
}) | |
image_embeddings = embeddings[imagebind.ModalityType.VISION] | |
if audio_embeddings is not None and image_embeddings is not None: | |
embeddings = audio_embeddings + image_embeddings | |
elif image_embeddings is not None: | |
embeddings = image_embeddings | |
elif audio_embeddings is not None: | |
embeddings = audio_embeddings | |
else: | |
embeddings = None | |
images = pipe(prompt=prompt, image_embeds=embeddings).images | |
return images[0] | |
demo = gr.Interface(fn=anything2img, inputs=["text", "audio", "image"], outputs="image") | |
# demo.launch(server_name='0.0.0.0', server_port=10051, share=True) | |
demo.launch(server_name='0.0.0.0', server_port=10047, share=True) |