Commit
•
dfa75c8
1
Parent(s):
e9f4025
caption using an auxiliary space if on spaces (#3)
Browse files- caption using an auxiliary space if on spaces (bc82dbcf33a595d9d5f60222daba2e0cfdfe2afa)
- Update requirements.txt (e881eb450784b8e8cc3cdef6b2cf34dce5bf14d5)
- Update requirements_local.txt (03a86ee3a538067a432ed7b47e5af526a1c556c0)
- Update requirements.txt (acf3b82e1699354a0f39644a5b656fc19c4f338f)
- app.py +29 -17
- requirements.txt +0 -3
- requirements_local.txt +0 -2
app.py
CHANGED
@@ -4,18 +4,10 @@ from typing import Union
|
|
4 |
from huggingface_hub import whoami
|
5 |
is_spaces = True if os.environ.get("SPACE_ID") else False
|
6 |
is_canonical = True if os.environ.get("SPACE_ID") == "autotrain-projects/train-flux-lora-ease" else False
|
7 |
-
|
8 |
-
if is_spaces:
|
9 |
-
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
10 |
-
import spaces
|
11 |
|
12 |
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
13 |
import sys
|
14 |
|
15 |
-
from dotenv import load_dotenv
|
16 |
-
|
17 |
-
load_dotenv()
|
18 |
-
|
19 |
# Add the current working directory to the Python path
|
20 |
sys.path.insert(0, os.getcwd())
|
21 |
|
@@ -28,9 +20,13 @@ import shutil
|
|
28 |
import json
|
29 |
import yaml
|
30 |
from slugify import slugify
|
31 |
-
from transformers import AutoProcessor, AutoModelForCausalLM
|
32 |
|
|
|
|
|
|
|
|
|
33 |
if not is_spaces:
|
|
|
34 |
sys.path.insert(0, "ai-toolkit")
|
35 |
from toolkit.job import get_job
|
36 |
gr.OAuthProfile = None
|
@@ -38,7 +34,6 @@ if not is_spaces:
|
|
38 |
|
39 |
MAX_IMAGES = 150
|
40 |
|
41 |
-
|
42 |
def load_captioning(uploaded_files, concept_sentence):
|
43 |
uploaded_images = [file for file in uploaded_files if not file.endswith('.txt')]
|
44 |
txt_files = [file for file in uploaded_files if file.endswith('.txt')]
|
@@ -71,7 +66,6 @@ def load_captioning(uploaded_files, concept_sentence):
|
|
71 |
print(base_name)
|
72 |
print(image_value)
|
73 |
if base_name in txt_files_dict:
|
74 |
-
print("entrou")
|
75 |
with open(txt_files_dict[base_name], 'r') as file:
|
76 |
corresponding_caption = file.read()
|
77 |
|
@@ -112,13 +106,13 @@ def create_dataset(*inputs):
|
|
112 |
return destination_folder
|
113 |
|
114 |
|
115 |
-
def
|
116 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
117 |
torch_dtype = torch.float16
|
118 |
model = AutoModelForCausalLM.from_pretrained(
|
119 |
-
"
|
120 |
).to(device)
|
121 |
-
processor = AutoProcessor.from_pretrained("
|
122 |
|
123 |
captions = list(captions)
|
124 |
for i, image_path in enumerate(images):
|
@@ -147,8 +141,26 @@ def run_captioning(images, concept_sentence, *captions):
|
|
147 |
del model
|
148 |
del processor
|
149 |
|
150 |
-
|
151 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
|
153 |
def recursive_update(d, u):
|
154 |
for k, v in u.items():
|
@@ -548,7 +560,7 @@ with gr.Blocks(theme=theme, css=css) as demo:
|
|
548 |
outputs=progress_area,
|
549 |
)
|
550 |
|
551 |
-
do_captioning.click(fn=
|
552 |
demo.load(fn=swap_visibilty, outputs=main_ui)
|
553 |
|
554 |
if __name__ == "__main__":
|
|
|
4 |
from huggingface_hub import whoami
|
5 |
is_spaces = True if os.environ.get("SPACE_ID") else False
|
6 |
is_canonical = True if os.environ.get("SPACE_ID") == "autotrain-projects/train-flux-lora-ease" else False
|
|
|
|
|
|
|
|
|
7 |
|
8 |
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
9 |
import sys
|
10 |
|
|
|
|
|
|
|
|
|
11 |
# Add the current working directory to the Python path
|
12 |
sys.path.insert(0, os.getcwd())
|
13 |
|
|
|
20 |
import json
|
21 |
import yaml
|
22 |
from slugify import slugify
|
|
|
23 |
|
24 |
+
if is_spaces:
|
25 |
+
from gradio_client import Client, handle_file
|
26 |
+
client = Client("multimodalart/Florence-2-l4")
|
27 |
+
|
28 |
if not is_spaces:
|
29 |
+
from transformers import AutoProcessor, AutoModelForCausalLM
|
30 |
sys.path.insert(0, "ai-toolkit")
|
31 |
from toolkit.job import get_job
|
32 |
gr.OAuthProfile = None
|
|
|
34 |
|
35 |
MAX_IMAGES = 150
|
36 |
|
|
|
37 |
def load_captioning(uploaded_files, concept_sentence):
|
38 |
uploaded_images = [file for file in uploaded_files if not file.endswith('.txt')]
|
39 |
txt_files = [file for file in uploaded_files if file.endswith('.txt')]
|
|
|
66 |
print(base_name)
|
67 |
print(image_value)
|
68 |
if base_name in txt_files_dict:
|
|
|
69 |
with open(txt_files_dict[base_name], 'r') as file:
|
70 |
corresponding_caption = file.read()
|
71 |
|
|
|
106 |
return destination_folder
|
107 |
|
108 |
|
109 |
+
def run_captioning_local(images, concept_sentence, *captions):
|
110 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
111 |
torch_dtype = torch.float16
|
112 |
model = AutoModelForCausalLM.from_pretrained(
|
113 |
+
"multimodalart/Florence-2-large-no-flash-attn", torch_dtype=torch_dtype, trust_remote_code=True
|
114 |
).to(device)
|
115 |
+
processor = AutoProcessor.from_pretrained("multimodalart/Florence-2-large-no-flash-attn", trust_remote_code=True)
|
116 |
|
117 |
captions = list(captions)
|
118 |
for i, image_path in enumerate(images):
|
|
|
141 |
del model
|
142 |
del processor
|
143 |
|
144 |
+
def run_captioning_spaces(images, concept_sentence, *captions):
|
145 |
+
captions = list(captions)
|
146 |
+
for i, image_path in enumerate(images):
|
147 |
+
print(captions[i])
|
148 |
+
if isinstance(image_path, str): # If image is a file path
|
149 |
+
image = Image.open(image_path).convert("RGB")
|
150 |
+
|
151 |
+
answer = client.predict(
|
152 |
+
image=handle_file(image_path),
|
153 |
+
task_prompt="Detailed Caption",
|
154 |
+
text_input=None,
|
155 |
+
api_name="/process_image"
|
156 |
+
)[0].replace("'", '"')
|
157 |
+
parsed_answer = json.loads(answer)
|
158 |
+
caption_text = parsed_answer["<DETAILED_CAPTION>"].replace("The image shows ", "")
|
159 |
+
if concept_sentence:
|
160 |
+
caption_text = f"{caption_text} [trigger]"
|
161 |
+
captions[i] = caption_text
|
162 |
+
|
163 |
+
yield captions
|
164 |
|
165 |
def recursive_update(d, u):
|
166 |
for k, v in u.items():
|
|
|
560 |
outputs=progress_area,
|
561 |
)
|
562 |
|
563 |
+
do_captioning.click(fn=run_captioning_spaces if is_spaces else run_captioning_local, inputs=[images, concept_sentence] + caption_list, outputs=caption_list)
|
564 |
demo.load(fn=swap_visibilty, outputs=main_ui)
|
565 |
|
566 |
if __name__ == "__main__":
|
requirements.txt
CHANGED
@@ -1,7 +1,4 @@
|
|
1 |
-
transformers
|
2 |
-
accelerate
|
3 |
python-slugify
|
4 |
-
python-dotenv
|
5 |
einops
|
6 |
timm
|
7 |
autotrain-advanced
|
|
|
|
|
|
|
1 |
python-slugify
|
|
|
2 |
einops
|
3 |
timm
|
4 |
autotrain-advanced
|
requirements_local.txt
CHANGED
@@ -1,5 +1,3 @@
|
|
1 |
gradio
|
2 |
python-slugify
|
3 |
-
python-dotenv
|
4 |
-
flash-attn
|
5 |
huggingface_hub
|
|
|
1 |
gradio
|
2 |
python-slugify
|
|
|
|
|
3 |
huggingface_hub
|