Upload 2 files
Browse files
ComfyUI/custom_nodes/img2txt-comfyui-nodes/src/blip_img2txt.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
from PIL import Image
|
2 |
from transformers import (
|
3 |
BlipProcessor,
|
@@ -9,7 +10,7 @@ from transformers import (
|
|
9 |
|
10 |
import torch
|
11 |
import model_management
|
12 |
-
|
13 |
|
14 |
class BLIPImg2Txt:
|
15 |
def __init__(
|
@@ -21,21 +22,24 @@ class BLIPImg2Txt:
|
|
21 |
repetition_penalty: float,
|
22 |
search_beams: int,
|
23 |
model_id: str = "Salesforce/blip-image-captioning-large",
|
|
|
24 |
):
|
25 |
self.conditional_caption = conditional_caption
|
26 |
self.model_id = model_id
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
-
# Determine do_sample and num_beams
|
29 |
if temperature > 1.1 or temperature < 0.90:
|
30 |
do_sample = True
|
31 |
-
num_beams = 1
|
32 |
else:
|
33 |
do_sample = False
|
34 |
-
num_beams =
|
35 |
-
search_beams if search_beams > 1 else 1
|
36 |
-
) # Use beam search if num_beams > 1
|
37 |
|
38 |
-
# Initialize text config kwargs
|
39 |
self.text_config_kwargs = {
|
40 |
"do_sample": do_sample,
|
41 |
"max_length": max_words,
|
@@ -51,18 +55,25 @@ class BLIPImg2Txt:
|
|
51 |
if image.mode != "RGB":
|
52 |
image = image.convert("RGB")
|
53 |
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
-
|
57 |
-
config_text = BlipTextConfig.from_pretrained(self.model_id)
|
58 |
config_text.update(self.text_config_kwargs)
|
59 |
-
config_vision = BlipVisionConfig.from_pretrained(
|
60 |
config = BlipConfig.from_text_vision_configs(config_text, config_vision)
|
61 |
|
62 |
model = BlipForConditionalGeneration.from_pretrained(
|
63 |
-
|
64 |
config=config,
|
65 |
torch_dtype=torch.float16,
|
|
|
66 |
).to(model_management.get_torch_device())
|
67 |
|
68 |
inputs = processor(
|
@@ -78,4 +89,4 @@ class BLIPImg2Txt:
|
|
78 |
del model
|
79 |
torch.cuda.empty_cache()
|
80 |
|
81 |
-
return ret
|
|
|
1 |
+
import os
|
2 |
from PIL import Image
|
3 |
from transformers import (
|
4 |
BlipProcessor,
|
|
|
10 |
|
11 |
import torch
|
12 |
import model_management
|
13 |
+
import folder_paths
|
14 |
|
15 |
class BLIPImg2Txt:
|
16 |
def __init__(
|
|
|
22 |
repetition_penalty: float,
|
23 |
search_beams: int,
|
24 |
model_id: str = "Salesforce/blip-image-captioning-large",
|
25 |
+
custom_model_path: str = None,
|
26 |
):
|
27 |
self.conditional_caption = conditional_caption
|
28 |
self.model_id = model_id
|
29 |
+
self.custom_model_path = custom_model_path
|
30 |
+
|
31 |
+
if self.custom_model_path and os.path.exists(self.custom_model_path):
|
32 |
+
self.model_path = self.custom_model_path
|
33 |
+
else:
|
34 |
+
self.model_path = folder_paths.get_full_path("blip", model_id)
|
35 |
|
|
|
36 |
if temperature > 1.1 or temperature < 0.90:
|
37 |
do_sample = True
|
38 |
+
num_beams = 1
|
39 |
else:
|
40 |
do_sample = False
|
41 |
+
num_beams = search_beams if search_beams > 1 else 1
|
|
|
|
|
42 |
|
|
|
43 |
self.text_config_kwargs = {
|
44 |
"do_sample": do_sample,
|
45 |
"max_length": max_words,
|
|
|
55 |
if image.mode != "RGB":
|
56 |
image = image.convert("RGB")
|
57 |
|
58 |
+
if self.model_path and os.path.exists(self.model_path):
|
59 |
+
model_path = self.model_path
|
60 |
+
local_files_only = True
|
61 |
+
else:
|
62 |
+
model_path = self.model_id
|
63 |
+
local_files_only = False
|
64 |
+
|
65 |
+
processor = BlipProcessor.from_pretrained(model_path, local_files_only=local_files_only)
|
66 |
|
67 |
+
config_text = BlipTextConfig.from_pretrained(model_path, local_files_only=local_files_only)
|
|
|
68 |
config_text.update(self.text_config_kwargs)
|
69 |
+
config_vision = BlipVisionConfig.from_pretrained(model_path, local_files_only=local_files_only)
|
70 |
config = BlipConfig.from_text_vision_configs(config_text, config_vision)
|
71 |
|
72 |
model = BlipForConditionalGeneration.from_pretrained(
|
73 |
+
model_path,
|
74 |
config=config,
|
75 |
torch_dtype=torch.float16,
|
76 |
+
local_files_only=local_files_only
|
77 |
).to(model_management.get_torch_device())
|
78 |
|
79 |
inputs = processor(
|
|
|
89 |
del model
|
90 |
torch.cuda.empty_cache()
|
91 |
|
92 |
+
return ret
|
ComfyUI/custom_nodes/img2txt-comfyui-nodes/src/img2txt_node.py
CHANGED
@@ -14,6 +14,8 @@ from .mini_cpm_img2txt import MiniPCMImg2Txt
|
|
14 |
|
15 |
from typing import Tuple
|
16 |
|
|
|
|
|
17 |
|
18 |
class Img2TxtNode:
|
19 |
CATEGORY = "img2txt"
|
@@ -145,6 +147,11 @@ class Img2TxtNode:
|
|
145 |
|
146 |
captions = []
|
147 |
if use_all_models or use_blip_model:
|
|
|
|
|
|
|
|
|
|
|
148 |
blip = BLIPImg2Txt(
|
149 |
conditional_caption=blip_caption_prefix,
|
150 |
min_words=min_words,
|
@@ -152,6 +159,7 @@ class Img2TxtNode:
|
|
152 |
temperature=temperature,
|
153 |
repetition_penalty=repetition_penalty,
|
154 |
search_beams=search_beams,
|
|
|
155 |
)
|
156 |
captions.append(blip.generate_caption(raw_image))
|
157 |
|
|
|
14 |
|
15 |
from typing import Tuple
|
16 |
|
17 |
+
import os
|
18 |
+
import folder_paths
|
19 |
|
20 |
class Img2TxtNode:
|
21 |
CATEGORY = "img2txt"
|
|
|
147 |
|
148 |
captions = []
|
149 |
if use_all_models or use_blip_model:
|
150 |
+
blip_model_path = folder_paths.get_folder_paths("blip")[0]
|
151 |
+
print(f"blip_model_path: {blip_model_path}")
|
152 |
+
if not blip_model_path or not os.path.exists(blip_model_path):
|
153 |
+
raise ValueError("BLIP model 'blip-image-captioning-large' not found in ComfyUI models directory. Please ensure it's in the 'models/blip' folder.")
|
154 |
+
|
155 |
blip = BLIPImg2Txt(
|
156 |
conditional_caption=blip_caption_prefix,
|
157 |
min_words=min_words,
|
|
|
159 |
temperature=temperature,
|
160 |
repetition_penalty=repetition_penalty,
|
161 |
search_beams=search_beams,
|
162 |
+
custom_model_path=blip_model_path
|
163 |
)
|
164 |
captions.append(blip.generate_caption(raw_image))
|
165 |
|