image-retriever / app.py
not-lain's picture
🌘wπŸŒ–
fe9fde2
import gradio as gr
import spaces
import torch
from transformers import AutoProcessor, AutoModelForZeroShotImageClassification
from datasets import load_dataset
dataset = load_dataset("not-lain/embedded-pokemon", split="train")
dataset = dataset.add_faiss_index("embeddings")
device = "cuda" if torch.cuda.is_available() else "cpu"
processor = AutoProcessor.from_pretrained("openai/clip-vit-large-patch14")
model = AutoModelForZeroShotImageClassification.from_pretrained(
"openai/clip-vit-large-patch14", device_map=device
)
@spaces.GPU
def search(query: str, k: int = 4):
"""a function that embeds a new image and returns the most probable results"""
pixel_values = processor(images=query, return_tensors="pt")[
"pixel_values"
] # embed new image
pixel_values = pixel_values.to(device)
img_emb = model.get_image_features(pixel_values)[0] # because 1 element
img_emb = img_emb.cpu().detach().numpy() # because datasets only works with numpy
scores, retrieved_examples = dataset.get_nearest_examples( # retrieve results
"embeddings",
img_emb, # compare our new embedded query with the dataset embeddings
k=k, # get only top k results
)
# return as image, caption pairs
out = []
for i in range(k):
out.append([retrieved_examples["image"][i], retrieved_examples["text"][i]])
return out
demo = gr.Interface(
search,
inputs="image",
outputs=[
"gallery"
# , "label"
],
examples=["./charmander.jpg"],
)
demo.launch(debug=True)