File size: 765 Bytes
d715cb4
ebf09a0
daa9e93
62e7359
58ef361
 
ebf09a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8080b3e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import gradio as gr
from PIL import Image
from transformers import AutoProcessor, BlipForConditionalGeneration
import os
processor = AutoProcessor.from_pretrained("shadowlilac/visor")
model = BlipForConditionalGeneration.from_pretrained("shadowlilac/visor", ignore_mismatched_sizes=True)

def generate_caption(image):
    raw_image = Image.fromarray(image)
    inputs = processor(raw_image, return_tensors="pt")

    out = model.generate(**inputs, max_length=200)
    caption = processor.decode(out[0], skip_special_tokens=True)
    
    return caption

# Create a Gradio interface
iface = gr.Interface(
    fn=generate_caption,
    inputs="image",
    outputs="text",
    live=True,
)

# Launch the Gradio app
iface.launch(server_name='0.0.0.0', server_port=7860)