File size: 2,248 Bytes
ebdc756
 
 
 
 
 
 
 
 
b413d1c
ec67201
 
 
51536c9
ebdc756
b413d1c
 
 
 
ebdc756
51536c9
ebdc756
51536c9
 
 
b413d1c
 
 
 
 
 
 
51536c9
b413d1c
ebdc756
b413d1c
ebdc756
b413d1c
 
 
 
92dbca0
b413d1c
 
 
 
 
 
ebdc756
b413d1c
ebdc756
 
 
 
 
b413d1c
ebdc756
b413d1c
 
ebdc756
 
 
51536c9
ebdc756
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import gradio as gr
from autodistill_gpt_4v import GPT4V
from autodistill.detection import CaptionOntology
from autodistill_grounding_dino import GroundingDINO
from autodistill.utils import plot
import tempfile
import cv2
from autodistill.core.custom_detection_model import CustomDetectionModel

# Hardcoded values
api_key = "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"
dino_prompt = "buildings . parks ."
gpt_prompt = "buildings"

MARKDOWN = """
# DINO-GPT4V
Use Grounding DINO and GPT-4V to label specific objects.
Visit [awesome-openai-vision-api-experiments](https://github.com/roboflow/awesome-openai-vision-api-experiments) 
repository to find more OpenAI Vision API experiments or contribute your own."""

def respond(input_image):
    input_image = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)
    with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
        cv2.imwrite(temp_file.name, input_image)

        DINOGPT = CustomDetectionModel(
            detection_model=GroundingDINO(
                CaptionOntology({dino_prompt: dino_prompt})
            ),
            classification_model=GPT4V(
                CaptionOntology({k: k for k in gpt_prompt.split(", ")}),
                api_key=api_key
            )
        )

        results = DINOGPT.predict(temp_file.name)

        if isinstance(results, tuple):
            # If results are a tuple, handle it accordingly
            # This is a placeholder, you need to adjust based on the actual structure of the tuple
            results = results[0]  # Assuming the first item in the tuple is the desired data

        result = plot(
            image=cv2.imread(temp_file.name),
            detections=results,
            classes=gpt_prompt.split(", "),
            raw=True
        )

    return result

with gr.Blocks() as demo:
    gr.Markdown(MARKDOWN)
    with gr.Row():
        with gr.Column():
            input_image = gr.Image(type="numpy", label="Input Image")
        with gr.Column():
            output_image = gr.Image(type="numpy", label="Output Image")
            submit_button = gr.Button("Submit")

    submit_button.click(
        fn=respond,
        inputs=[input_image],
        outputs=[output_image]
    )

demo.launch()