Spaces:

Jangai
/

Describer

Running

File size: 1,524 Bytes

761cd02
 
a2142f7
4b4c90a
761cd02
8e6a6ad
4b4c90a
fc1f6da
 
 
69db782
 
 
 
fc1f6da
 
 
 
 
 
 
 
69db782
 
 
 
fc1f6da
69db782
fc1f6da
4b4c90a
761cd02
69db782
5342f17
69db782
761cd02

import gradio as gr
from transformers import pipeline
from PIL import Image

# Initialize the pipeline with the image captioning model
caption_pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")

# Initialize the pipeline for emotion classification
emotion_pipeline = pipeline("image-classification", model="RickyIG/emotion_face_image_classification_v3")

# Initialize the pipeline for object detection
object_pipeline = pipeline("object-detection", model="facebook/detr-resnet-50")

def generate_caption_emotion_and_objects(image):
    # Process the image for captioning
    caption_result = caption_pipeline(image)
    caption = caption_result[0]["generated_text"]

    # Process the image for emotion classification
    emotion_result = emotion_pipeline(image)
    emotions = ", ".join([f"{res['label']}: {res['score']:.2f}" for res in emotion_result])

    # Process the image for object detection
    object_result = object_pipeline(image)
    objects = ", ".join([f"{obj['label']}: {obj['score']:.2f}" for obj in object_result])

    # Combine results
    combined_result = f"Caption: {caption}\nEmotions: {emotions}\nObjects: {objects}"
    return combined_result

# Setup the Gradio interface
interface = gr.Interface(fn=generate_caption_emotion_and_objects,
                         inputs=gr.components.Image(type="pil", label="Upload an Image"),
                         outputs=gr.components.Textbox(label="Generated Caption, Emotions, and Objects Detected"))
interface.launch()