Spaces:
Runtime error
Runtime error
LanHarmony
commited on
Commit
•
ee54eed
1
Parent(s):
bc147cf
api key
Browse files
visual_foundation_models.py
CHANGED
@@ -77,7 +77,7 @@ class ImageEditing:
|
|
77 |
print("Initializing ImageEditing to %s" % device)
|
78 |
self.device = device
|
79 |
self.mask_former = MaskFormer(device=self.device)
|
80 |
-
self.inpaint = StableDiffusionInpaintPipeline.from_pretrained("runwayml/stable-diffusion-inpainting").to(device)
|
81 |
|
82 |
@prompts(name="Remove Something From The Photo",
|
83 |
description="useful when you want to remove and object or something from the photo "
|
@@ -113,7 +113,7 @@ class InstructPix2Pix:
|
|
113 |
def __init__(self, device):
|
114 |
print("Initializing InstructPix2Pix to %s" % device)
|
115 |
self.device = device
|
116 |
-
self.pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained("timbrooks/instruct-pix2pix",
|
117 |
safety_checker=None).to(device)
|
118 |
self.pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(self.pipe.scheduler.config)
|
119 |
|
@@ -139,7 +139,7 @@ class Text2Image:
|
|
139 |
def __init__(self, device):
|
140 |
print("Initializing Text2Image to %s" % device)
|
141 |
self.device = device
|
142 |
-
self.pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
|
143 |
self.text_refine_tokenizer = AutoTokenizer.from_pretrained("Gustavosta/MagicPrompt-Stable-Diffusion")
|
144 |
self.text_refine_model = AutoModelForCausalLM.from_pretrained("Gustavosta/MagicPrompt-Stable-Diffusion")
|
145 |
self.text_refine_gpt2_pipe = pipeline("text-generation", model=self.text_refine_model,
|
@@ -166,13 +166,13 @@ class ImageCaptioning:
|
|
166 |
self.device = device
|
167 |
self.processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
|
168 |
self.model = BlipForConditionalGeneration.from_pretrained(
|
169 |
-
"Salesforce/blip-image-captioning-base").to(self.device)
|
170 |
|
171 |
@prompts(name="Get Photo Description",
|
172 |
description="useful when you want to know what is inside the photo. receives image_path as input. "
|
173 |
"The input to this tool should be a string, representing the image_path. ")
|
174 |
def inference(self, image_path):
|
175 |
-
inputs = self.processor(Image.open(image_path), return_tensors="pt").to(self.device)
|
176 |
out = self.model.generate(**inputs)
|
177 |
captions = self.processor.decode(out[0], skip_special_tokens=True)
|
178 |
print(f"\nProcessed ImageCaptioning, Input Image: {image_path}, Output Text: {captions}")
|
@@ -206,9 +206,9 @@ class Image2Canny:
|
|
206 |
class CannyText2Image:
|
207 |
def __init__(self, device):
|
208 |
print("Initializing CannyText2Image to %s" % device)
|
209 |
-
self.controlnet = ControlNetModel.from_pretrained("fusing/stable-diffusion-v1-5-controlnet-canny")
|
210 |
self.pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
211 |
-
"runwayml/stable-diffusion-v1-5", controlnet=self.controlnet, safety_checker=None)
|
212 |
self.pipe.scheduler = UniPCMultistepScheduler.from_config(self.pipe.scheduler.config)
|
213 |
self.pipe.to(device)
|
214 |
self.seed = -1
|
@@ -685,7 +685,7 @@ class VisualQuestionAnswering:
|
|
685 |
print("Initializing VisualQuestionAnswering to %s" % device)
|
686 |
self.device = device
|
687 |
self.processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
|
688 |
-
self.model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base").to(self.device)
|
689 |
|
690 |
@prompts(name="Answer Question About The Image",
|
691 |
description="useful when you need an answer for a question based on an image. "
|
@@ -694,7 +694,7 @@ class VisualQuestionAnswering:
|
|
694 |
def inference(self, inputs):
|
695 |
image_path, question = inputs.split(",")
|
696 |
raw_image = Image.open(image_path).convert('RGB')
|
697 |
-
inputs = self.processor(raw_image, question, return_tensors="pt").to(self.device)
|
698 |
out = self.model.generate(**inputs)
|
699 |
answer = self.processor.decode(out[0], skip_special_tokens=True)
|
700 |
print(f"\nProcessed VisualQuestionAnswering, Input Image: {image_path}, Input Question: {question}, "
|
|
|
77 |
print("Initializing ImageEditing to %s" % device)
|
78 |
self.device = device
|
79 |
self.mask_former = MaskFormer(device=self.device)
|
80 |
+
self.inpaint = StableDiffusionInpaintPipeline.from_pretrained("runwayml/stable-diffusion-inpainting", revision="fp16", torch_dtype=torch.float16).to(device)
|
81 |
|
82 |
@prompts(name="Remove Something From The Photo",
|
83 |
description="useful when you want to remove and object or something from the photo "
|
|
|
113 |
def __init__(self, device):
|
114 |
print("Initializing InstructPix2Pix to %s" % device)
|
115 |
self.device = device
|
116 |
+
self.pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained("timbrooks/instruct-pix2pix", torch_dtype=torch.float16,
|
117 |
safety_checker=None).to(device)
|
118 |
self.pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(self.pipe.scheduler.config)
|
119 |
|
|
|
139 |
def __init__(self, device):
|
140 |
print("Initializing Text2Image to %s" % device)
|
141 |
self.device = device
|
142 |
+
self.pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5",torch_dtype=torch.float16)
|
143 |
self.text_refine_tokenizer = AutoTokenizer.from_pretrained("Gustavosta/MagicPrompt-Stable-Diffusion")
|
144 |
self.text_refine_model = AutoModelForCausalLM.from_pretrained("Gustavosta/MagicPrompt-Stable-Diffusion")
|
145 |
self.text_refine_gpt2_pipe = pipeline("text-generation", model=self.text_refine_model,
|
|
|
166 |
self.device = device
|
167 |
self.processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
|
168 |
self.model = BlipForConditionalGeneration.from_pretrained(
|
169 |
+
"Salesforce/blip-image-captioning-base", torch_dtype=torch.float16).to(self.device)
|
170 |
|
171 |
@prompts(name="Get Photo Description",
|
172 |
description="useful when you want to know what is inside the photo. receives image_path as input. "
|
173 |
"The input to this tool should be a string, representing the image_path. ")
|
174 |
def inference(self, image_path):
|
175 |
+
inputs = self.processor(Image.open(image_path), return_tensors="pt").to(self.device, torch.float16)
|
176 |
out = self.model.generate(**inputs)
|
177 |
captions = self.processor.decode(out[0], skip_special_tokens=True)
|
178 |
print(f"\nProcessed ImageCaptioning, Input Image: {image_path}, Output Text: {captions}")
|
|
|
206 |
class CannyText2Image:
|
207 |
def __init__(self, device):
|
208 |
print("Initializing CannyText2Image to %s" % device)
|
209 |
+
self.controlnet = ControlNetModel.from_pretrained("fusing/stable-diffusion-v1-5-controlnet-canny", torch_dtype=torch.float16)
|
210 |
self.pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
211 |
+
"runwayml/stable-diffusion-v1-5", controlnet=self.controlnet, safety_checker=None, torch_dtype=torch.float16)
|
212 |
self.pipe.scheduler = UniPCMultistepScheduler.from_config(self.pipe.scheduler.config)
|
213 |
self.pipe.to(device)
|
214 |
self.seed = -1
|
|
|
685 |
print("Initializing VisualQuestionAnswering to %s" % device)
|
686 |
self.device = device
|
687 |
self.processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
|
688 |
+
self.model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base", torch_dtype=torch.float16).to(self.device)
|
689 |
|
690 |
@prompts(name="Answer Question About The Image",
|
691 |
description="useful when you need an answer for a question based on an image. "
|
|
|
694 |
def inference(self, inputs):
|
695 |
image_path, question = inputs.split(",")
|
696 |
raw_image = Image.open(image_path).convert('RGB')
|
697 |
+
inputs = self.processor(raw_image, question, return_tensors="pt").to(self.device, torch.float16)
|
698 |
out = self.model.generate(**inputs)
|
699 |
answer = self.processor.decode(out[0], skip_special_tokens=True)
|
700 |
print(f"\nProcessed VisualQuestionAnswering, Input Image: {image_path}, Input Question: {question}, "
|