Spaces:
Sleeping
Sleeping
arjunanand13
commited on
Commit
•
2d145da
1
Parent(s):
ecc5376
Update app.py
Browse files
app.py
CHANGED
@@ -27,7 +27,6 @@ def analyze_image(image, prompt):
|
|
27 |
{"type": "text", "text": prompt}
|
28 |
]}
|
29 |
]
|
30 |
-
|
31 |
input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
|
32 |
inputs = processor(
|
33 |
image,
|
@@ -35,41 +34,39 @@ def analyze_image(image, prompt):
|
|
35 |
add_special_tokens=False,
|
36 |
return_tensors="pt"
|
37 |
).to(model.device)
|
38 |
-
|
39 |
with torch.no_grad():
|
40 |
output = model.generate(**inputs, max_new_tokens=100)
|
41 |
|
42 |
full_response = processor.decode(output[0])
|
43 |
-
|
44 |
-
|
45 |
-
# return full_response
|
46 |
try:
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
50 |
try:
|
51 |
-
|
52 |
except json.JSONDecodeError as e:
|
53 |
-
|
54 |
-
return {"error": "Invalid JSON in model output", "full_response": full_response}
|
55 |
else:
|
56 |
-
|
57 |
except Exception as e:
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
|
62 |
-
default_prompt = """Analyze this image and determine if it contains a data logger.
|
63 |
-
A data logger is typically a small, black electronic device used to monitor and record data
|
64 |
-
over time, such as voltage, temperature, or current, via external sensors.
|
65 |
|
66 |
-
If a data logger is present in the image, respond with:
|
67 |
-
{"present": true, "reason": "
|
68 |
|
69 |
If no data logger is visible, respond with:
|
70 |
-
{"present": false, "reason": "
|
71 |
|
72 |
-
Ensure your response is in valid JSON format."""
|
73 |
|
74 |
iface = gr.Interface(
|
75 |
fn=analyze_image,
|
@@ -77,12 +74,104 @@ iface = gr.Interface(
|
|
77 |
gr.Image(type="pil", label="Upload Image"),
|
78 |
gr.Textbox(label="Prompt", value=default_prompt, lines=10)
|
79 |
],
|
80 |
-
outputs=
|
81 |
-
|
82 |
-
|
|
|
|
|
|
|
83 |
examples=[
|
84 |
-
["bad.
|
85 |
]
|
86 |
)
|
87 |
|
88 |
-
iface.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
{"type": "text", "text": prompt}
|
28 |
]}
|
29 |
]
|
|
|
30 |
input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
|
31 |
inputs = processor(
|
32 |
image,
|
|
|
34 |
add_special_tokens=False,
|
35 |
return_tensors="pt"
|
36 |
).to(model.device)
|
37 |
+
|
38 |
with torch.no_grad():
|
39 |
output = model.generate(**inputs, max_new_tokens=100)
|
40 |
|
41 |
full_response = processor.decode(output[0])
|
42 |
+
|
|
|
|
|
43 |
try:
|
44 |
+
# Find all JSON-like structures in the response
|
45 |
+
json_matches = list(re.finditer(r'\{.*?\}', full_response, re.DOTALL))
|
46 |
+
|
47 |
+
if json_matches:
|
48 |
+
# Take the last match
|
49 |
+
last_json_str = json_matches[-1].group(0)
|
50 |
try:
|
51 |
+
processed_json = json.loads(last_json_str)
|
52 |
except json.JSONDecodeError as e:
|
53 |
+
processed_json = {"error": f"Invalid JSON in model output: {e}", "full_response": full_response}
|
|
|
54 |
else:
|
55 |
+
processed_json = {"error": "No JSON found in model output", "full_response": full_response}
|
56 |
except Exception as e:
|
57 |
+
processed_json = {"error": str(e), "full_response": full_response}
|
58 |
+
|
59 |
+
return full_response, processed_json
|
60 |
|
61 |
+
default_prompt = """Analyze this image and determine if it contains a data logger. A data logger is typically a small, black electronic device used to monitor and record data over time, such as voltage, temperature, or current, via external sensors.
|
|
|
|
|
62 |
|
63 |
+
Carefully examine the image and provide a detailed response. If a data logger is present in the image, respond with:
|
64 |
+
{"present": true, "reason": "Detailed explanation of why you believe it's a data logger, including specific visual cues you've identified"}
|
65 |
|
66 |
If no data logger is visible, respond with:
|
67 |
+
{"present": false, "reason": "Detailed explanation of why you believe there's no data logger, describing what you see instead"}
|
68 |
|
69 |
+
Be specific in your reasoning, mentioning colors, shapes, and other relevant details you observe in the image. Ensure your response is in valid JSON format and provides substantial, meaningful analysis."""
|
70 |
|
71 |
iface = gr.Interface(
|
72 |
fn=analyze_image,
|
|
|
74 |
gr.Image(type="pil", label="Upload Image"),
|
75 |
gr.Textbox(label="Prompt", value=default_prompt, lines=10)
|
76 |
],
|
77 |
+
outputs=[
|
78 |
+
gr.Textbox(label="Full Response", lines=10),
|
79 |
+
gr.JSON(label="Processed JSON")
|
80 |
+
],
|
81 |
+
title="Llama 3.2 Vision",
|
82 |
+
description=" ",
|
83 |
examples=[
|
84 |
+
["./bad.jpg", default_prompt]
|
85 |
]
|
86 |
)
|
87 |
|
88 |
+
iface.launch()
|
89 |
+
|
90 |
+
# import torch
|
91 |
+
# from PIL import Image
|
92 |
+
# from transformers import AutoProcessor, AutoModelForPreTraining
|
93 |
+
# import gradio as gr
|
94 |
+
# import json
|
95 |
+
# import traceback
|
96 |
+
# import os
|
97 |
+
# import re
|
98 |
+
|
99 |
+
# model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct"
|
100 |
+
# token = os.getenv("HUGGINGFACE_TOKEN").strip()
|
101 |
+
|
102 |
+
# processor = AutoProcessor.from_pretrained(model_name, token=token)
|
103 |
+
# model = AutoModelForPreTraining.from_pretrained(
|
104 |
+
# model_name,
|
105 |
+
# quantization_config={"load_in_4bit": True},
|
106 |
+
# token=token
|
107 |
+
# )
|
108 |
+
|
109 |
+
# if torch.cuda.is_available():
|
110 |
+
# model = model.to('cuda')
|
111 |
+
|
112 |
+
# def analyze_image(image, prompt):
|
113 |
+
# messages = [
|
114 |
+
# {"role": "user", "content": [
|
115 |
+
# {"type": "image"},
|
116 |
+
# {"type": "text", "text": prompt}
|
117 |
+
# ]}
|
118 |
+
# ]
|
119 |
+
|
120 |
+
# input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
|
121 |
+
# inputs = processor(
|
122 |
+
# image,
|
123 |
+
# input_text,
|
124 |
+
# add_special_tokens=False,
|
125 |
+
# return_tensors="pt"
|
126 |
+
# ).to(model.device)
|
127 |
+
|
128 |
+
# with torch.no_grad():
|
129 |
+
# output = model.generate(**inputs, max_new_tokens=100)
|
130 |
+
|
131 |
+
# full_response = processor.decode(output[0])
|
132 |
+
# print("Full response:", full_response) # Debug print
|
133 |
+
|
134 |
+
# # return full_response
|
135 |
+
# try:
|
136 |
+
# json_match = re.search(r'\{.*?\}', full_response, re.DOTALL)
|
137 |
+
# if json_match:
|
138 |
+
# json_str = json_match.group(0)
|
139 |
+
# try:
|
140 |
+
# return json.loads(json_str)
|
141 |
+
# except json.JSONDecodeError as e:
|
142 |
+
# print(f"JSON decode error: {e}")
|
143 |
+
# return {"error": "Invalid JSON in model output", "full_response": full_response}
|
144 |
+
# else:
|
145 |
+
# return {"error": "No JSON found in model output", "full_response": full_response}
|
146 |
+
# except Exception as e:
|
147 |
+
# print(f"Error in analyze_image: {e}")
|
148 |
+
# return {"Full Response": str(e), "full_response": full_response}
|
149 |
+
|
150 |
+
|
151 |
+
# default_prompt = """Analyze this image and determine if it contains a data logger.
|
152 |
+
# A data logger is typically a small, black electronic device used to monitor and record data
|
153 |
+
# over time, such as voltage, temperature, or current, via external sensors.
|
154 |
+
|
155 |
+
# If a data logger is present in the image, respond with:
|
156 |
+
# {"present": true, "reason": "Brief explanation of why you believe it's a data logger"}
|
157 |
+
|
158 |
+
# If no data logger is visible, respond with:
|
159 |
+
# {"present": false, "reason": "Brief explanation of why you believe there's no data logger"}
|
160 |
+
|
161 |
+
# Ensure your response is in valid JSON format."""
|
162 |
+
|
163 |
+
# iface = gr.Interface(
|
164 |
+
# fn=analyze_image,
|
165 |
+
# inputs=[
|
166 |
+
# gr.Image(type="pil", label="Upload Image"),
|
167 |
+
# gr.Textbox(label="Prompt", value=default_prompt, lines=10)
|
168 |
+
# ],
|
169 |
+
# outputs=gr.JSON(label="Analysis Result"),
|
170 |
+
# title="Data Logger Detection using Llama 3.2 Vision",
|
171 |
+
# description="Upload an image and customize the prompt to check if it contains a data logger.",
|
172 |
+
# examples=[
|
173 |
+
# ["bad.png", default_prompt]
|
174 |
+
# ]
|
175 |
+
# )
|
176 |
+
|
177 |
+
# iface.launch()
|