import streamlit as st from transformers import BlipForConditionalGeneration, BlipProcessor # Load the BLIP model and processor model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large") processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large") def generate_caption(image): # Preprocess the image pixel_values = processor(images=image, return_tensors="pt").pixel_values # Generate caption using the BLIP model output_ids = model.generate(pixel_values, max_length=50, num_beams=4, early_stopping=True) # Decode the caption caption = processor.decode(output_ids[0], skip_special_tokens=True) return caption def main(): st.title("Image Caption Generator") # Upload image uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) if uploaded_file is not None: # Display the uploaded image image = st.image(uploaded_file, caption="Uploaded Image", use_column_width=True) # Generate caption if st.button("Generate Caption"): with st.spinner("Generating caption..."): caption = generate_caption(uploaded_file.getvalue()) st.success(f"Caption: {caption}") if __name__ == "__main__": main()