File size: 3,300 Bytes
dc3a5ff
606cf89
 
 
 
 
 
ed9c2a5
606cf89
 
 
 
ed9c2a5
606cf89
 
ed9c2a5
606cf89
 
ed9c2a5
606cf89
ed9c2a5
606cf89
 
 
 
 
ed9c2a5
606cf89
 
 
 
 
 
 
 
 
 
 
 
 
ed9c2a5
606cf89
ed9c2a5
606cf89
ed9c2a5
606cf89
ed9c2a5
606cf89
ed9c2a5
606cf89
 
 
ed9c2a5
606cf89
ed9c2a5
606cf89
 
 
ed9c2a5
606cf89
ed9c2a5
606cf89
ed9c2a5
606cf89
 
ed9c2a5
606cf89
 
ed9c2a5
606cf89
 
ed9c2a5
 
606cf89
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import cv2
from icevision.all import *
import icedata
import PIL, requests
import torch
from torchvision import transforms
import gradio as gr

# Download the dataset
url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip"
dest_dir = "fridge"
data_dir = icedata.load_data(url, dest_dir)

# Create the parser
parser = parsers.VOCBBoxParser(annotations_dir=data_dir / "odFridgeObjects/annotations", images_dir=data_dir / "odFridgeObjects/images")

# Parse annotations to create records
train_records, valid_records = parser.parse()

class_map = parser.class_map

extra_args = {}
model_type = models.torchvision.retinanet
backbone = model_type.backbones.resnet50_fpn
# Instantiate the model
model = model_type.model(backbone=backbone(pretrained=True), num_classes=len(parser.class_map), **extra_args) 

# Transforms
# size is set to 384 because EfficientDet requires its inputs to be divisible by 128
image_size = 384
train_tfms = tfms.A.Adapter([*tfms.A.aug_tfms(size=image_size, presize=512), tfms.A.Normalize()])
valid_tfms = tfms.A.Adapter([*tfms.A.resize_and_pad(image_size), tfms.A.Normalize()])
# Datasets
train_ds = Dataset(train_records, train_tfms)
valid_ds = Dataset(valid_records, valid_tfms)
# Data Loaders
train_dl = model_type.train_dl(train_ds, batch_size=8, num_workers=4, shuffle=True)
valid_dl = model_type.valid_dl(valid_ds, batch_size=8, num_workers=4, shuffle=False)
metrics = [COCOMetric(metric_type=COCOMetricType.bbox)]
learn = model_type.fastai.learner(dls=[train_dl, valid_dl], model=model, metrics=metrics)

learn = learn.load('model')

def show_preds(input_image, display_label, display_bbox, detection_threshold):

    if detection_threshold==0: detection_threshold=0.5

    img = PIL.Image.fromarray(input_image, 'RGB')

    pred_dict  = model_type.end2end_detect(img, valid_tfms, model, class_map=class_map, detection_threshold=detection_threshold,
                                           display_label=display_label, display_bbox=display_bbox, return_img=True, 
                                           font_size=16, label_color="#FF59D6")

    return pred_dict['img']

# display_chkbox = gr.inputs.CheckboxGroup(["Label", "BBox"], label="Display", default=True)
display_chkbox_label = gr.inputs.Checkbox(label="Label", default=True)
display_chkbox_box = gr.inputs.Checkbox(label="Box", default=True)

detection_threshold_slider = gr.inputs.Slider(minimum=0, maximum=1, step=0.1, default=0.5, label="Detection Threshold")

outputs = gr.outputs.Image(type="pil")

# Option 1: Get an image from local drive
gr_interface = gr.Interface(fn=show_preds, inputs=["image", display_chkbox_label, display_chkbox_box,  detection_threshold_slider], outputs=outputs, title='IceApp - Fridge Object')

# #  Option 2: Grab an image from a webcam
# gr_interface = gr.Interface(fn=show_preds, inputs=["webcam", display_chkbox_label, display_chkbox_box,  detection_threshold_slider], outputs=outputs, title='IceApp - COCO', live=False)

# #  Option 3: Continuous image stream from the webcam
# gr_interface = gr.Interface(fn=show_preds, inputs=["webcam", display_chkbox_label, display_chkbox_box,  detection_threshold_slider], outputs=outputs, title='IceApp - COCO', live=True)


gr_interface.launch(inline=False, share=True, debug=True)