Spaces:

mohAhmad
/

Temporary

Sleeping

mohAhmad commited on Oct 5

Commit

4824b47

•

1 Parent(s): 6977686

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,22 +1,22 @@
 import streamlit as st
-from transformers import VisionEncoderDecoderModel, AutoProcessor
 from PIL import Image
 import torch
-# Set title and description
-st.title("Image to Text Captioning App")
-st.write("This app converts an image into a text description using the ViT-GPT2 model.")
-# Load processor and model
 @st.cache_resource
 def load_model():
-    processor = AutoProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
-    model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
     return processor, model
 processor, model = load_model()
-# Image upload
 uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
 if uploaded_file is not None:
@@ -24,14 +24,14 @@ if uploaded_file is not None:
     st.image(image, caption="Uploaded Image", use_column_width=True)
     # Preprocess the image
-    pixel_values = processor(images=image, return_tensors="pt").pixel_values
-    # Perform inference (caption generation)
-    generated_ids = model.generate(pixel_values)
     # Decode the generated caption
-    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
     # Display the generated caption
-    st.write("Generated Caption: ")
     st.success(generated_text)

 import streamlit as st
+from transformers import BlipProcessor, BlipForConditionalGeneration
 from PIL import Image
 import torch
+# Title and description
+st.title("Image Captioning App")
+st.write("This app converts an uploaded image into a text description using the BLIP model.")
+# Load model and processor
 @st.cache_resource
 def load_model():
+    processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+    model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
     return processor, model
 processor, model = load_model()
+# Upload image
 uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
 if uploaded_file is not None:
     st.image(image, caption="Uploaded Image", use_column_width=True)
     # Preprocess the image
+    inputs = processor(image, return_tensors="pt")
+    # Generate the caption (inference)
+    generated_ids = model.generate(**inputs)
     # Decode the generated caption
+    generated_text = processor.decode(generated_ids[0], skip_special_tokens=True)
     # Display the generated caption
+    st.write("Generated Caption:")
     st.success(generated_text)