Spaces:

ChandraP12330
/

image-caption

Runtime error

ChandraP12330 commited on Apr 27

Commit

02de360

•

1 Parent(s): 4d94a5f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -34,6 +34,11 @@ model_id = "openai/clip-vit-large-patch14"
 processor = CLIPProcessor.from_pretrained(model_id)
 model = CLIPModel.from_pretrained(model_id)
 # Title
 st.title("Image Caption Surveillance")
@@ -47,14 +52,22 @@ image_url = st.text_input("Enter the URL of the image:")
 if image_url:
     try:
         st.image(image_url, caption="Uploaded Image")
         image = Image.open(requests.get(image_url, stream=True).raw)
         inputs = processor(text=scene_labels, images=image, return_tensors="pt", padding=True)
         outputs = model(**inputs)
         logits_per_image = outputs.logits_per_image # this is the image-text similarity score
         probs = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities
-        context= scene_labels[probs.argmax(-1)]
         st.write("context: ", context)
     except Exception as e:
         st.error(f"Error: {e}")
 else:

 processor = CLIPProcessor.from_pretrained(model_id)
 model = CLIPModel.from_pretrained(model_id)
+##BLIP
+from transformers import pipeline
+image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
 # Title
 st.title("Image Caption Surveillance")
 if image_url:
     try:
         st.image(image_url, caption="Uploaded Image")
+        ##CLIP
         image = Image.open(requests.get(image_url, stream=True).raw)
         inputs = processor(text=scene_labels, images=image, return_tensors="pt", padding=True)
         outputs = model(**inputs)
         logits_per_image = outputs.logits_per_image # this is the image-text similarity score
         probs = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities
+        raw_context= scene_labels[probs.argmax(-1)]
+        context= 'the image is depicting scene of '+ context_raw
+        ##BLIP
+        caption = image_to_text(image_url, max_new_tokens=200)
+        initial_caption= caption[0]['generated_text']
+        ##Output
         st.write("context: ", context)
+        st.write("initial_caption: ", initial_caption)
     except Exception as e:
         st.error(f"Error: {e}")
 else: