Spaces:
Sleeping
Sleeping
import streamlit as st | |
import torch | |
import bitsandbytes | |
import accelerate | |
import scipy | |
from PIL import Image | |
import torch.nn as nn | |
from transformers import Blip2Processor, Blip2ForConditionalGeneration, InstructBlipProcessor, InstructBlipForConditionalGeneration | |
from my_model.object_detection import detect_and_draw_objects | |
def load_caption_model(blip2=False, instructblip=True): | |
if blip2: | |
processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b", load_in_8bit=True,torch_dtype=torch.float16) | |
model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", load_in_8bit=True,torch_dtype=torch.float16) | |
if torch.cuda.device_count() > 1: | |
model = nn.DataParallel(model) | |
model.to('cuda') | |
#model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16, device_map="auto") | |
if instructblip: | |
model = InstructBlipForConditionalGeneration.from_pretrained("Salesforce/instructblip-vicuna-7b", load_in_8bit=True,torch_dtype=torch.float16) | |
if torch.cuda.device_count() > 1: | |
model = nn.DataParallel(model) | |
model.to('cuda') | |
processor = InstructBlipProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b", load_in_8bit=True,torch_dtype=torch.float16) | |
return model, processor | |
def answer_question(image, question, model, processor): | |
image = Image.open(image) | |
inputs = processor(image, question, return_tensors="pt").to("cuda", torch.float16) | |
if isinstance(model, torch.nn.DataParallel): | |
# Use the 'module' attribute to access the original model | |
out = model.module.generate(**inputs, max_length=100, min_length=20) | |
else: | |
out = model.generate(**inputs, max_length=100, min_length=20) | |
answer = processor.decode(out[0], skip_special_tokens=True).strip() | |
return answer | |
st.title("Image Question Answering") | |
# File uploader for the image | |
image = st.file_uploader("Upload an image", type=["png", "jpg", "jpeg"]) | |
# Text input for the question | |
question = st.text_input("Enter your question about the image:") | |
if st.button("Get Answer"): | |
if image is not None and question: | |
# Display the image | |
st.image(image, use_column_width=True) | |
# Get and display the answer | |
model, processor = load_caption_model() | |
answer = answer_question(image, question, model, processor) | |
st.write(answer) | |
else: | |
st.write("Please upload an image and enter a question.") | |
# Object Detection | |
# Object Detection UI in the sidebar | |
st.sidebar.title("Object Detection") | |
# Dropdown to select the model | |
detect_model = st.sidebar.selectbox("Choose a model for object detection:", ["detic", "yolov5"]) | |
# Slider for threshold with default values based on the model | |
threshold = st.sidebar.slider("Select Detection Threshold", 0.1, 0.9, 0.2 if detect_model == "yolov5" else 0.4) | |
# Button to trigger object detection | |
detect_button = st.sidebar.button("Detect Objects") | |
def perform_object_detection(image, model_name, threshold): | |
""" | |
Perform object detection on the given image using the specified model and threshold. | |
Args: | |
image (PIL.Image): The image on which to perform object detection. | |
model_name (str): The name of the object detection model to use. | |
threshold (float): The threshold for object detection. | |
Returns: | |
PIL.Image, str: The image with drawn bounding boxes and a string of detected objects. | |
""" | |
# Perform object detection and draw bounding boxes | |
st.write("9999999999999") | |
processed_image, detected_objects = detect_and_draw_objects(image, model_name, threshold) | |
st.write("66666666666666") | |
st.write(detected_objects[:20]) | |
return processed_image, detected_objects | |
# Check if the 'Detect Objects' button was clicked | |
if detect_button: | |
if image is not None: | |
# Open the uploaded image | |
try: | |
image = Image.open(image) | |
st.write("1111111111111") | |
# Display the original image | |
st.image(image, use_column_width=True, caption="Original Image") | |
st.write("22222222222222") | |
# Perform object detection | |
processed_image, detected_objects = perform_object_detection(image, detect_model, threshold) | |
st.write(detected_objects) | |
st.write("333333333333333") | |
st.write(processed_image) | |
st.write(type(processed_image)) | |
#if processed_image: | |
# Display the image with detected objects | |
st.image(processed_image, use_column_width=True, caption="Image with Detected Objects") | |
st.write("444444444444444") | |
# Display the detected objects as text | |
st.write(detected_objects) | |
st.write("5555555555555555") | |
#else: | |
# st.error("Failed to process image for object detection.") | |
except Exception as e: | |
st.error(f"Error loading image: {e}") | |
else: | |
st.write("Please upload an image for object detection.") | |