Spaces:
Sleeping
Sleeping
File size: 1,336 Bytes
4f13ac7 20ed5b4 4f13ac7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
import gradio as gr
import torch
import torchvision.transforms as T
import numpy as np
from PIL import Image
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# DINOv2
# Select checkpoint
dinov2_ckpt = ['dinov2_vits14', 'dinov2_vitb14', 'dinov2_vitl14', 'dinov2_vitg14'][1]
dinov2 = torch.hub.load('facebookresearch/dinov2', dinov2_ckpt)
dinov2.to(device)
print()
transform_image = T.Compose([
T.Resize((224, 224)),
T.ToTensor(),
T.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
def predict(image):
"""
Predict the identity of an image.
Args:
image: A PIL Image object.
Returns:
A string representing the predicted identity of the image.
"""
# Convert the image to a tensor.
transformed_img = transform_image(image)[:3].unsqueeze(0).to(device)
# Get the embedding of the image.
with torch.no_grad():
embedding = dinov2(transformed_img)
print(embedding.shape)
embedding = embedding[0].cpu().numpy().tolist()
print(embedding)
return {
"embedding": embedding
}
# Create a Gradio interface.
interface = gr.Interface(
fn=predict,
inputs=[gr.Image(type='pil')],
outputs=[gr.JSON()],
title="DINOv2 Embedding",
description=dinov2_ckpt
)
# Start the Gradio server.
interface.launch()
|