Spaces:
Sleeping
Sleeping
import gradio as gr | |
import torch | |
import torchvision.transforms as T | |
import numpy as np | |
from PIL import Image | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
# DINOv2 | |
# Select checkpoint | |
dinov2_ckpt = ['dinov2_vits14', 'dinov2_vitb14', 'dinov2_vitl14', 'dinov2_vitg14'][1] | |
dinov2 = torch.hub.load('facebookresearch/dinov2', dinov2_ckpt) | |
dinov2.to(device) | |
print() | |
transform_image = T.Compose([ | |
T.Resize((224, 224)), | |
T.ToTensor(), | |
T.Normalize(mean=[0.485, 0.456, 0.406], | |
std=[0.229, 0.224, 0.225]) | |
]) | |
def predict(image): | |
""" | |
Predict the identity of an image. | |
Args: | |
image: A PIL Image object. | |
Returns: | |
A string representing the predicted identity of the image. | |
""" | |
# Convert the image to a tensor. | |
transformed_img = transform_image(image)[:3].unsqueeze(0).to(device) | |
# Get the embedding of the image. | |
with torch.no_grad(): | |
embedding = dinov2(transformed_img) | |
print(embedding.shape) | |
embedding = embedding[0].cpu().numpy().tolist() | |
print(embedding) | |
return { | |
"embedding": embedding | |
} | |
# Create a Gradio interface. | |
interface = gr.Interface( | |
fn=predict, | |
inputs=[gr.Image(type='pil')], | |
outputs=[gr.JSON()], | |
title="DINOv2 Embedding", | |
description=dinov2_ckpt | |
) | |
# Start the Gradio server. | |
interface.launch() | |