multimodalart HF staff commited on
Commit
bc82dbc
1 Parent(s): 2406cac

caption using an auxiliary space if on spaces

Browse files
Files changed (1) hide show
  1. app.py +29 -17
app.py CHANGED
@@ -4,18 +4,10 @@ from typing import Union
4
  from huggingface_hub import whoami
5
  is_spaces = True if os.environ.get("SPACE_ID") else False
6
  is_canonical = True if os.environ.get("SPACE_ID") == "autotrain-projects/train-flux-lora-ease" else False
7
-
8
- if is_spaces:
9
- subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
10
- import spaces
11
 
12
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
13
  import sys
14
 
15
- from dotenv import load_dotenv
16
-
17
- load_dotenv()
18
-
19
  # Add the current working directory to the Python path
20
  sys.path.insert(0, os.getcwd())
21
 
@@ -28,9 +20,13 @@ import shutil
28
  import json
29
  import yaml
30
  from slugify import slugify
31
- from transformers import AutoProcessor, AutoModelForCausalLM
32
 
 
 
 
 
33
  if not is_spaces:
 
34
  sys.path.insert(0, "ai-toolkit")
35
  from toolkit.job import get_job
36
  gr.OAuthProfile = None
@@ -38,7 +34,6 @@ if not is_spaces:
38
 
39
  MAX_IMAGES = 150
40
 
41
-
42
  def load_captioning(uploaded_files, concept_sentence):
43
  uploaded_images = [file for file in uploaded_files if not file.endswith('.txt')]
44
  txt_files = [file for file in uploaded_files if file.endswith('.txt')]
@@ -71,7 +66,6 @@ def load_captioning(uploaded_files, concept_sentence):
71
  print(base_name)
72
  print(image_value)
73
  if base_name in txt_files_dict:
74
- print("entrou")
75
  with open(txt_files_dict[base_name], 'r') as file:
76
  corresponding_caption = file.read()
77
 
@@ -112,13 +106,13 @@ def create_dataset(*inputs):
112
  return destination_folder
113
 
114
 
115
- def run_captioning(images, concept_sentence, *captions):
116
  device = "cuda" if torch.cuda.is_available() else "cpu"
117
  torch_dtype = torch.float16
118
  model = AutoModelForCausalLM.from_pretrained(
119
- "microsoft/Florence-2-large", torch_dtype=torch_dtype, trust_remote_code=True
120
  ).to(device)
121
- processor = AutoProcessor.from_pretrained("microsoft/Florence-2-large", trust_remote_code=True)
122
 
123
  captions = list(captions)
124
  for i, image_path in enumerate(images):
@@ -147,8 +141,26 @@ def run_captioning(images, concept_sentence, *captions):
147
  del model
148
  del processor
149
 
150
- if is_spaces:
151
- run_captioning = spaces.GPU()(run_captioning)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
  def recursive_update(d, u):
154
  for k, v in u.items():
@@ -548,7 +560,7 @@ with gr.Blocks(theme=theme, css=css) as demo:
548
  outputs=progress_area,
549
  )
550
 
551
- do_captioning.click(fn=run_captioning, inputs=[images, concept_sentence] + caption_list, outputs=caption_list)
552
  demo.load(fn=swap_visibilty, outputs=main_ui)
553
 
554
  if __name__ == "__main__":
 
4
  from huggingface_hub import whoami
5
  is_spaces = True if os.environ.get("SPACE_ID") else False
6
  is_canonical = True if os.environ.get("SPACE_ID") == "autotrain-projects/train-flux-lora-ease" else False
 
 
 
 
7
 
8
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
9
  import sys
10
 
 
 
 
 
11
  # Add the current working directory to the Python path
12
  sys.path.insert(0, os.getcwd())
13
 
 
20
  import json
21
  import yaml
22
  from slugify import slugify
 
23
 
24
+ if is_spaces:
25
+ from gradio_client import Client, handle_file
26
+ client = Client("multimodalart/Florence-2-l4")
27
+
28
  if not is_spaces:
29
+ from transformers import AutoProcessor, AutoModelForCausalLM
30
  sys.path.insert(0, "ai-toolkit")
31
  from toolkit.job import get_job
32
  gr.OAuthProfile = None
 
34
 
35
  MAX_IMAGES = 150
36
 
 
37
  def load_captioning(uploaded_files, concept_sentence):
38
  uploaded_images = [file for file in uploaded_files if not file.endswith('.txt')]
39
  txt_files = [file for file in uploaded_files if file.endswith('.txt')]
 
66
  print(base_name)
67
  print(image_value)
68
  if base_name in txt_files_dict:
 
69
  with open(txt_files_dict[base_name], 'r') as file:
70
  corresponding_caption = file.read()
71
 
 
106
  return destination_folder
107
 
108
 
109
+ def run_captioning_local(images, concept_sentence, *captions):
110
  device = "cuda" if torch.cuda.is_available() else "cpu"
111
  torch_dtype = torch.float16
112
  model = AutoModelForCausalLM.from_pretrained(
113
+ "multimodalart/Florence-2-large-no-flash-attn", torch_dtype=torch_dtype, trust_remote_code=True
114
  ).to(device)
115
+ processor = AutoProcessor.from_pretrained("multimodalart/Florence-2-large-no-flash-attn", trust_remote_code=True)
116
 
117
  captions = list(captions)
118
  for i, image_path in enumerate(images):
 
141
  del model
142
  del processor
143
 
144
+ def run_captioning_spaces(images, concept_sentence, *captions):
145
+ captions = list(captions)
146
+ for i, image_path in enumerate(images):
147
+ print(captions[i])
148
+ if isinstance(image_path, str): # If image is a file path
149
+ image = Image.open(image_path).convert("RGB")
150
+
151
+ answer = client.predict(
152
+ image=handle_file(image_path),
153
+ task_prompt="Detailed Caption",
154
+ text_input=None,
155
+ api_name="/process_image"
156
+ )[0].replace("'", '"')
157
+ parsed_answer = json.loads(answer)
158
+ caption_text = parsed_answer["<DETAILED_CAPTION>"].replace("The image shows ", "")
159
+ if concept_sentence:
160
+ caption_text = f"{caption_text} [trigger]"
161
+ captions[i] = caption_text
162
+
163
+ yield captions
164
 
165
  def recursive_update(d, u):
166
  for k, v in u.items():
 
560
  outputs=progress_area,
561
  )
562
 
563
+ do_captioning.click(fn=run_captioning_spaces if is_spaces else run_captioning_local, inputs=[images, concept_sentence] + caption_list, outputs=caption_list)
564
  demo.load(fn=swap_visibilty, outputs=main_ui)
565
 
566
  if __name__ == "__main__":