DINO-GPT4V

Runtime error

File size: 2,248 Bytes

ebdc756
 
 
 
 
 
 
 
 
b413d1c
ec67201
 
 
51536c9
ebdc756
b413d1c
 
 
 
ebdc756
51536c9
ebdc756
51536c9
 
 
b413d1c
 
 
 
 
 
 
51536c9
b413d1c
ebdc756
b413d1c
ebdc756
b413d1c
 
 
 
92dbca0
b413d1c
 
 
 
 
 
ebdc756
b413d1c
ebdc756
 
 
 
 
b413d1c
ebdc756
b413d1c
 
ebdc756
 
 
51536c9
ebdc756

import gradio as gr
from autodistill_gpt_4v import GPT4V
from autodistill.detection import CaptionOntology
from autodistill_grounding_dino import GroundingDINO
from autodistill.utils import plot
import tempfile
import cv2
from autodistill.core.custom_detection_model import CustomDetectionModel

# Hardcoded values
api_key = "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"
dino_prompt = "buildings . parks ."
gpt_prompt = "buildings"

MARKDOWN = """
# DINO-GPT4V
Use Grounding DINO and GPT-4V to label specific objects.
Visit [awesome-openai-vision-api-experiments](https://github.com/roboflow/awesome-openai-vision-api-experiments) 
repository to find more OpenAI Vision API experiments or contribute your own."""

def respond(input_image):
    input_image = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)
    with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
        cv2.imwrite(temp_file.name, input_image)

        DINOGPT = CustomDetectionModel(
            detection_model=GroundingDINO(
                CaptionOntology({dino_prompt: dino_prompt})
            ),
            classification_model=GPT4V(
                CaptionOntology({k: k for k in gpt_prompt.split(", ")}),
                api_key=api_key
            )
        )

        results = DINOGPT.predict(temp_file.name)

        if isinstance(results, tuple):
            # If results are a tuple, handle it accordingly
            # This is a placeholder, you need to adjust based on the actual structure of the tuple
            results = results[0]  # Assuming the first item in the tuple is the desired data

        result = plot(
            image=cv2.imread(temp_file.name),
            detections=results,
            classes=gpt_prompt.split(", "),
            raw=True
        )

    return result

with gr.Blocks() as demo:
    gr.Markdown(MARKDOWN)
    with gr.Row():
        with gr.Column():
            input_image = gr.Image(type="numpy", label="Input Image")
        with gr.Column():
            output_image = gr.Image(type="numpy", label="Output Image")
            submit_button = gr.Button("Submit")

    submit_button.click(
        fn=respond,
        inputs=[input_image],
        outputs=[output_image]
    )

demo.launch()