Spaces:

ChandraP12330
/

image-caption

Runtime error

App Files Files Community

ChandraP12330 commited on Apr 27

Commit

c8b7141

•

1 Parent(s): cb5683a

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -10

app.py CHANGED Viewed

@@ -1,24 +1,70 @@
 import streamlit as st
-from transformers import BlipForConditionalGeneration, BlipProcessor
 # Load the BLIP model and processor
-model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
-processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
 def generate_caption(image):
     # Preprocess the image
-    pixel_values = processor(images=image, return_tensors="pt").pixel_values
     # Generate caption using the BLIP model
-    output_ids = model.generate(pixel_values, max_length=50, num_beams=4, early_stopping=True)
     # Decode the caption
-    caption = processor.decode(output_ids[0], skip_special_tokens=True)
     return caption
 def main():
-    st.title("Image Caption Generator")
     # Upload image
     uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
@@ -27,11 +73,13 @@ def main():
         # Display the uploaded image
         image = st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
-        # Generate caption
-        if st.button("Generate Caption"):
-            with st.spinner("Generating caption..."):
                 caption = generate_caption(uploaded_file.getvalue())
                 st.success(f"Caption: {caption}")
 if __name__ == "__main__":
     main()

 import streamlit as st
+from transformers import BlipForConditionalGeneration, BlipProcessor, CLIPProcessor, CLIPModel
+import torch
 # Load the BLIP model and processor
+blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
+blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
+# Load the CLIP model and processor
+clip_model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
+clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
+# Labels for classification
+labels=['Arrest',
+ 'Arson',
+ 'Explosion',
+ 'public fight',
+ 'Normal',
+ 'Road Accident',
+ 'Robbery',
+ 'Shooting',
+ 'Stealing',
+ 'Vandalism',
+ 'Suspicious activity',
+ 'Tailgating',
+ 'Unauthorized entry',
+ 'Protest/Demonstration',
+ 'Drone suspicious activity',
+ 'Fire/Smoke detection',
+ 'Medical emergency',
+ 'Suspicious package/object',
+ 'Threatening',
+ 'Attack',
+ 'Shoplifting',
+ 'burglary ',
+ 'distress',
+ 'assault']
 def generate_caption(image):
     # Preprocess the image
+    pixel_values = blip_processor(images=image, return_tensors="pt").pixel_values
     # Generate caption using the BLIP model
+    output_ids = blip_model.generate(pixel_values, max_length=50, num_beams=4, early_stopping=True)
     # Decode the caption
+    caption = blip_processor.decode(output_ids[0], skip_special_tokens=True)
     return caption
+def classify_image(image):
+    # Preprocess the image
+    inputs = clip_processor(images=image, return_tensors="pt")
+    # Classify the image using the CLIP model
+    with torch.no_grad():
+        logits_per_image = clip_model(**inputs)[0]
+        probs = logits_per_image.softmax(dim=-1)
+    # Get the top predicted label
+    top_prob, top_label = torch.max(probs, dim=-1)
+    top_label = labels[top_label.item()]
+    return top_label
 def main():
+    st.title("Image Caption and Classification")
     # Upload image
     uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
         # Display the uploaded image
         image = st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
+        # Generate caption and classify the image
+        if st.button("Generate Caption and Classify"):
+            with st.spinner("Processing image..."):
                 caption = generate_caption(uploaded_file.getvalue())
+                top_label = classify_image(uploaded_file.getvalue())
                 st.success(f"Caption: {caption}")
+                st.success(f"Top Predicted Label: {top_label}")
 if __name__ == "__main__":
     main()