|
import streamlit as st |
|
from transformers import BlipProcessor, BlipForConditionalGeneration |
|
from PIL import Image |
|
import torch |
|
|
|
|
|
st.title("Image Captioning App") |
|
st.write("This app converts an uploaded image into a text description using the BLIP model.") |
|
|
|
|
|
@st.cache_resource |
|
def load_model(): |
|
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") |
|
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") |
|
return processor, model |
|
|
|
processor, model = load_model() |
|
|
|
|
|
uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"]) |
|
|
|
if uploaded_file is not None: |
|
image = Image.open(uploaded_file).convert("RGB") |
|
st.image(image, caption="Uploaded Image", use_column_width=True) |
|
|
|
|
|
inputs = processor(image, return_tensors="pt") |
|
|
|
|
|
generated_ids = model.generate(**inputs) |
|
|
|
|
|
generated_text = processor.decode(generated_ids[0], skip_special_tokens=True) |
|
|
|
|
|
st.write("Generated Caption:") |
|
st.success(generated_text) |
|
|