import json
import gradio as gr
import torch
from transformers import EfficientFormerImageProcessor, EfficientFormerForImageClassificationWithTeacher
# Load preprocessor and pretrained model
model_name = "snap-research/efficientformer-l7-300"
processor = EfficientFormerImageProcessor.from_pretrained(model_name)
model = EfficientFormerForImageClassificationWithTeacher.from_pretrained(model_name)
# Load ImageNet idx to label mapping
with open("assets/imagenet_1000_idx2labels.json") as f:
idx_to_label = json.load(f)
def classify_image(img, top_k):
# Preprocess input image
inputs = processor(images=img, return_tensors="pt")
# Inference
with torch.no_grad():
outputs = model(**inputs)
# Print the top ImageNet1k class prediction
logits = outputs.logits
scores = torch.nn.functional.softmax(logits, dim=1)
top_k_labels = scores.argsort(descending=True)[0][:top_k].cpu().detach().numpy()
top_k_labels = list(top_k_labels)
return {idx_to_label[str(idx)] : round(float(scores[0, idx]), 4) for idx in top_k_labels}
description = """
Gradio demo for EfficientFormer,
introduced in EfficientFormer: Vision Transformers at MobileNet Speed.
\n\nEfficientFormer is a mobile-friendly image classification model that achieves MobileNet inference speed with impressive performance gains.
To use it, simply upload an image and print the top predictions.
"""
demo = gr.Interface(
classify_image,
inputs=[gr.Image(), gr.Slider(0, 1000, value=5)],
outputs=gr.outputs.Label(),
description=description,
title="Image Classification with EfficientFormer-L1",
examples=[
["assets/halloween-gaf8ad7ebc_1920.jpeg", 5],
["assets/IMG_4484.jpeg", 5],
["assets/IMG_4737.jpeg", 5],
["assets/IMG_4740.jpeg", 5],
],
)
demo.launch()