laizeqiang commited on
Commit
e4a59ce
1 Parent(s): 8d91ae4
Files changed (2) hide show
  1. anything2image/api.py +2 -2
  2. app.py +74 -22
anything2image/api.py CHANGED
@@ -14,7 +14,7 @@ class Anything2Image:
14
  imagebind_download_dir="checkpoints"
15
  ):
16
  self.pipe = StableUnCLIPImg2ImgPipeline.from_pretrained(
17
- "stabilityai/stable-diffusion-2-1-unclip", torch_dtype=torch.float16
18
  ).to(device)
19
  self.model = imagebind.imagebind_huge(pretrained=True, download_dir=imagebind_download_dir).eval().to(device)
20
  self.device = device
@@ -52,7 +52,7 @@ class Anything2Image:
52
  }, normalize=False)
53
  embeddings = embeddings[imagebind.ModalityType.TEXT]
54
 
55
- if embeddings is not None:
56
  embeddings = embeddings.half()
57
 
58
  images = pipe(prompt=prompt, image_embeds=embeddings).images
 
14
  imagebind_download_dir="checkpoints"
15
  ):
16
  self.pipe = StableUnCLIPImg2ImgPipeline.from_pretrained(
17
+ "stabilityai/stable-diffusion-2-1-unclip", torch_dtype=None if device == 'cpu' else torch.float16,
18
  ).to(device)
19
  self.model = imagebind.imagebind_huge(pretrained=True, download_dir=imagebind_download_dir).eval().to(device)
20
  self.device = device
 
52
  }, normalize=False)
53
  embeddings = embeddings[imagebind.ModalityType.TEXT]
54
 
55
+ if embeddings is not None and self.device != 'cpu':
56
  embeddings = embeddings.half()
57
 
58
  images = pipe(prompt=prompt, image_embeds=embeddings).images
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import gradio as gr
2
  from anything2image.api import Anything2Image
3
 
@@ -5,26 +6,77 @@ from anything2image.api import Anything2Image
5
  anything2img = Anything2Image(imagebind_download_dir='checkpoints')
6
 
7
  with gr.Blocks() as demo:
8
- gr.HTML(
9
- """
10
- <div align='center'> <h1>Anything To Image </h1> </div>
11
- <p align="center"> Generate image from anything with ImageBind's unified latent space and stable-diffusion-2-1-unclip. </p>
12
- <p align="center"><a href="https://github.com/Zeqiang-Lai/Anything2Image"><b>https://github.com/Zeqiang-Lai/Anything2Image</b></p>
13
- """)
14
- gr.Interface(fn=anything2img,
15
- inputs=["text",
16
- "audio",
17
- "image",
18
- "text",
19
- ],
20
- outputs="text",
21
- examples=[['', 'assets/wav/dog_audio.wav', None, None],
22
- ['A painting', 'assets/wav/cat.wav', None, None],
23
- ['', 'assets/wav/wave.wav', 'assets/image/bird.png', None],
24
- ['', None, 'assets/image/bird_image.jpg', None],
25
- ['', None, None, 'A sunset over the ocean.'],
26
- ],
27
- cache_examples=True,
28
- )
29
- demo.queue(1).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
 
 
1
+ import os
2
  import gradio as gr
3
  from anything2image.api import Anything2Image
4
 
 
6
  anything2img = Anything2Image(imagebind_download_dir='checkpoints')
7
 
8
  with gr.Blocks() as demo:
9
+ gr.HTML(
10
+ """
11
+ <div align='center'> <h1>Anything To Image </h1> </div>
12
+ <p align="center"> Generate image from anything with ImageBind's unified latent space and stable-diffusion-2-1-unclip. </p>
13
+ <p align="center"><a href="https://github.com/Zeqiang-Lai/Anything2Image"><b>https://github.com/Zeqiang-Lai/Anything2Image</b></p>
14
+ """
15
+ )
16
+ with gr.Tab('Audio to Image'):
17
+ wav_dir = 'assets/wav'
18
+ def audio2image(audio): return anything2img(audio=audio)
19
+ gr.Interface(
20
+ fn=audio2image,
21
+ inputs="audio",
22
+ outputs="image",
23
+ examples=[os.path.join(wav_dir, name) for name in os.listdir(wav_dir)],
24
+ )
25
+ with gr.Tab('Audio+Text to Image'):
26
+ wav_dir = 'assets/wav'
27
+ def audiotext2image(prompt, audio): return anything2img(prompt=prompt, audio=audio)
28
+ gr.Interface(
29
+ fn=audiotext2image,
30
+ inputs=["text","audio"],
31
+ outputs="image",
32
+ examples=[
33
+ ['A painting', 'assets/wav/cat.wav'],
34
+ ['A photo', 'assets/wav/cat.wav'],
35
+ ['A painting', 'assets/wav/dog_audio.wav'],
36
+ ['A photo', 'assets/wav/dog_audio.wav'],
37
+ ],
38
+ )
39
+ with gr.Tab('Audio+Image to Image'):
40
+ wav_dir = 'assets/wav'
41
+ def audioimage2image(audio, image): return anything2img(image=image, audio=audio)
42
+ gr.Interface(
43
+ fn=audioimage2image,
44
+ inputs=["audio","image"],
45
+ outputs="image",
46
+ examples=[
47
+ ['assets/wav/wave.wav', 'assets/image/bird.png'],
48
+ ['assets/wav/wave.wav', 'assets/image/dog_image.jpg'],
49
+ ['assets/wav/wave.wav', 'assets/image/room.png'],
50
+ ['assets/wav/rain.wav', 'assets/image/room.png'],
51
+ ],
52
+ )
53
+ with gr.Tab('Image to Image'):
54
+ image_dir = 'assets/image'
55
+ def image2image(image): return anything2img(image=image)
56
+ gr.Interface(
57
+ fn=image2image,
58
+ inputs=["image"],
59
+ outputs="image",
60
+ examples=[os.path.join(image_dir, name) for name in os.listdir(image_dir)],
61
+ )
62
+ with gr.Tab('Text to Image'):
63
+ def text2image(text): return anything2img(text=text)
64
+ gr.Interface(
65
+ fn=text2image,
66
+ inputs=["text"],
67
+ outputs="image",
68
+ examples=['A sunset over the ocean.',
69
+ 'A photo of a car',
70
+ "A bird's-eye view of a cityscape.",
71
+ "A close-up of a flower."],
72
+ )
73
+ with gr.Tab('Text+Any to Image'):
74
+ def textany2image(prompt, image, audio): return anything2img(prompt=prompt, image=image, audio=audio)
75
+ gr.Interface(
76
+ fn=textany2image,
77
+ inputs=["text", "image", "audio"],
78
+ outputs="image",
79
+ examples=[['A painting.', 'assets/image/bird.png', 'assets/wav/wave.wav']],
80
+ )
81
 
82
+ demo.queue(1).launch()