Spaces:
Runtime error
Runtime error
ChandraP12330
commited on
Commit
•
02de360
1
Parent(s):
4d94a5f
Update app.py
Browse files
app.py
CHANGED
@@ -34,6 +34,11 @@ model_id = "openai/clip-vit-large-patch14"
|
|
34 |
processor = CLIPProcessor.from_pretrained(model_id)
|
35 |
model = CLIPModel.from_pretrained(model_id)
|
36 |
|
|
|
|
|
|
|
|
|
|
|
37 |
# Title
|
38 |
st.title("Image Caption Surveillance")
|
39 |
|
@@ -47,14 +52,22 @@ image_url = st.text_input("Enter the URL of the image:")
|
|
47 |
if image_url:
|
48 |
try:
|
49 |
st.image(image_url, caption="Uploaded Image")
|
|
|
50 |
image = Image.open(requests.get(image_url, stream=True).raw)
|
51 |
inputs = processor(text=scene_labels, images=image, return_tensors="pt", padding=True)
|
52 |
outputs = model(**inputs)
|
53 |
logits_per_image = outputs.logits_per_image # this is the image-text similarity score
|
54 |
probs = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
-
|
57 |
st.write("context: ", context)
|
|
|
58 |
except Exception as e:
|
59 |
st.error(f"Error: {e}")
|
60 |
else:
|
|
|
34 |
processor = CLIPProcessor.from_pretrained(model_id)
|
35 |
model = CLIPModel.from_pretrained(model_id)
|
36 |
|
37 |
+
##BLIP
|
38 |
+
from transformers import pipeline
|
39 |
+
image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
|
40 |
+
|
41 |
+
|
42 |
# Title
|
43 |
st.title("Image Caption Surveillance")
|
44 |
|
|
|
52 |
if image_url:
|
53 |
try:
|
54 |
st.image(image_url, caption="Uploaded Image")
|
55 |
+
##CLIP
|
56 |
image = Image.open(requests.get(image_url, stream=True).raw)
|
57 |
inputs = processor(text=scene_labels, images=image, return_tensors="pt", padding=True)
|
58 |
outputs = model(**inputs)
|
59 |
logits_per_image = outputs.logits_per_image # this is the image-text similarity score
|
60 |
probs = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities
|
61 |
+
raw_context= scene_labels[probs.argmax(-1)]
|
62 |
+
context= 'the image is depicting scene of '+ context_raw
|
63 |
+
|
64 |
+
##BLIP
|
65 |
+
caption = image_to_text(image_url, max_new_tokens=200)
|
66 |
+
initial_caption= caption[0]['generated_text']
|
67 |
|
68 |
+
##Output
|
69 |
st.write("context: ", context)
|
70 |
+
st.write("initial_caption: ", initial_caption)
|
71 |
except Exception as e:
|
72 |
st.error(f"Error: {e}")
|
73 |
else:
|