ChandraP12330 commited on
Commit
02de360
1 Parent(s): 4d94a5f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -1
app.py CHANGED
@@ -34,6 +34,11 @@ model_id = "openai/clip-vit-large-patch14"
34
  processor = CLIPProcessor.from_pretrained(model_id)
35
  model = CLIPModel.from_pretrained(model_id)
36
 
 
 
 
 
 
37
  # Title
38
  st.title("Image Caption Surveillance")
39
 
@@ -47,14 +52,22 @@ image_url = st.text_input("Enter the URL of the image:")
47
  if image_url:
48
  try:
49
  st.image(image_url, caption="Uploaded Image")
 
50
  image = Image.open(requests.get(image_url, stream=True).raw)
51
  inputs = processor(text=scene_labels, images=image, return_tensors="pt", padding=True)
52
  outputs = model(**inputs)
53
  logits_per_image = outputs.logits_per_image # this is the image-text similarity score
54
  probs = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities
 
 
 
 
 
 
55
 
56
- context= scene_labels[probs.argmax(-1)]
57
  st.write("context: ", context)
 
58
  except Exception as e:
59
  st.error(f"Error: {e}")
60
  else:
 
34
  processor = CLIPProcessor.from_pretrained(model_id)
35
  model = CLIPModel.from_pretrained(model_id)
36
 
37
+ ##BLIP
38
+ from transformers import pipeline
39
+ image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
40
+
41
+
42
  # Title
43
  st.title("Image Caption Surveillance")
44
 
 
52
  if image_url:
53
  try:
54
  st.image(image_url, caption="Uploaded Image")
55
+ ##CLIP
56
  image = Image.open(requests.get(image_url, stream=True).raw)
57
  inputs = processor(text=scene_labels, images=image, return_tensors="pt", padding=True)
58
  outputs = model(**inputs)
59
  logits_per_image = outputs.logits_per_image # this is the image-text similarity score
60
  probs = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities
61
+ raw_context= scene_labels[probs.argmax(-1)]
62
+ context= 'the image is depicting scene of '+ context_raw
63
+
64
+ ##BLIP
65
+ caption = image_to_text(image_url, max_new_tokens=200)
66
+ initial_caption= caption[0]['generated_text']
67
 
68
+ ##Output
69
  st.write("context: ", context)
70
+ st.write("initial_caption: ", initial_caption)
71
  except Exception as e:
72
  st.error(f"Error: {e}")
73
  else: