|
|
|
import os |
|
import cv2 |
|
import argparse |
|
import os.path as osp |
|
from functools import partial |
|
from io import BytesIO |
|
from copy import deepcopy |
|
|
|
import onnx |
|
import onnxsim |
|
import torch |
|
import gradio as gr |
|
import numpy as np |
|
import supervision as sv |
|
from PIL import Image |
|
from torchvision.ops import nms |
|
from mmengine.config import Config, ConfigDict, DictAction |
|
from mmengine.runner import Runner |
|
from mmengine.runner.amp import autocast |
|
from mmengine.dataset import Compose |
|
from mmdet.visualization import DetLocalVisualizer |
|
from mmdet.datasets import CocoDataset |
|
from mmyolo.registry import RUNNERS |
|
|
|
from yolo_world.easydeploy.model import DeployModel, MMYOLOBackend |
|
|
|
BOUNDING_BOX_ANNOTATOR = sv.BoundingBoxAnnotator() |
|
LABEL_ANNOTATOR = sv.LabelAnnotator(text_color=sv.Color.BLACK) |
|
|
|
def parse_args(): |
|
parser = argparse.ArgumentParser( |
|
description='YOLO-World Demo') |
|
parser.add_argument('config', help='test config file path') |
|
parser.add_argument('checkpoint', help='checkpoint file') |
|
parser.add_argument( |
|
'--work-dir', |
|
help='the directory to save the file containing evaluation metrics') |
|
parser.add_argument( |
|
'--cfg-options', |
|
nargs='+', |
|
action=DictAction, |
|
help='override some settings in the used config, the key-value pair ' |
|
'in xxx=yyy format will be merged into config file. If the value to ' |
|
'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' |
|
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' |
|
'Note that the quotation marks are necessary and that no white space ' |
|
'is allowed.') |
|
args = parser.parse_args() |
|
return args |
|
|
|
|
|
def run_image(runner, |
|
image, |
|
text, |
|
max_num_boxes, |
|
score_thr, |
|
nms_thr, |
|
image_path='./work_dirs/demo.png'): |
|
os.makedirs('./work_dirs', exist_ok=True) |
|
image.save(image_path) |
|
texts = [[t.strip()] for t in text.split(',')] + [[' ']] |
|
data_info = dict(img_id=0, img_path=image_path, texts=texts) |
|
data_info = runner.pipeline(data_info) |
|
data_batch = dict(inputs=data_info['inputs'].unsqueeze(0), |
|
data_samples=[data_info['data_samples']]) |
|
|
|
with autocast(enabled=False), torch.no_grad(): |
|
output = runner.model.test_step(data_batch)[0] |
|
pred_instances = output.pred_instances |
|
|
|
keep = nms(pred_instances.bboxes, pred_instances.scores, iou_threshold=nms_thr) |
|
pred_instances = pred_instances[keep] |
|
pred_instances = pred_instances[pred_instances.scores.float() > score_thr] |
|
|
|
if len(pred_instances.scores) > max_num_boxes: |
|
indices = pred_instances.scores.float().topk(max_num_boxes)[1] |
|
pred_instances = pred_instances[indices] |
|
|
|
pred_instances = pred_instances.cpu().numpy() |
|
detections = sv.Detections( |
|
xyxy=pred_instances['bboxes'], |
|
class_id=pred_instances['labels'], |
|
confidence=pred_instances['scores'] |
|
) |
|
labels = [ |
|
f"{texts[class_id][0]} {confidence:0.2f}" |
|
for class_id, confidence |
|
in zip(detections.class_id, detections.confidence) |
|
] |
|
|
|
image = np.array(image) |
|
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) |
|
image = BOUNDING_BOX_ANNOTATOR.annotate(image, detections) |
|
image = LABEL_ANNOTATOR.annotate(image, detections, labels=labels) |
|
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) |
|
image = Image.fromarray(image) |
|
return image |
|
|
|
|
|
def export_model(runner, |
|
checkpoint, |
|
text, |
|
max_num_boxes, |
|
score_thr, |
|
nms_thr): |
|
backend = MMYOLOBackend.ONNXRUNTIME |
|
postprocess_cfg = ConfigDict( |
|
pre_top_k=10 * max_num_boxes, |
|
keep_top_k=max_num_boxes, |
|
iou_threshold=nms_thr, |
|
score_threshold=score_thr) |
|
|
|
base_model = deepcopy(runner.model) |
|
texts = [[t.strip() for t in text.split(',')] + [' ']] |
|
base_model.reparameterize(texts) |
|
deploy_model = DeployModel( |
|
baseModel=base_model, |
|
backend=backend, |
|
postprocess_cfg=postprocess_cfg) |
|
deploy_model.eval() |
|
|
|
device = (next(iter(base_model.parameters()))).device |
|
fake_input = torch.ones([1, 3, 640, 640], device=device) |
|
|
|
deploy_model(fake_input) |
|
|
|
os.makedirs('work_dirs', exist_ok=True) |
|
save_onnx_path = os.path.join( |
|
'work_dirs', 'yolow-l.onnx') |
|
|
|
with BytesIO() as f: |
|
output_names = ['num_dets', 'boxes', 'scores', 'labels'] |
|
torch.onnx.export( |
|
deploy_model, |
|
fake_input, |
|
f, |
|
input_names=['images'], |
|
output_names=output_names, |
|
opset_version=12) |
|
f.seek(0) |
|
onnx_model = onnx.load(f) |
|
onnx.checker.check_model(onnx_model) |
|
onnx_model, check = onnxsim.simplify(onnx_model) |
|
onnx.save(onnx_model, save_onnx_path) |
|
|
|
del base_model |
|
del deploy_model |
|
del onnx_model |
|
return gr.update(visible=True), save_onnx_path |
|
|
|
|
|
def demo(runner, args, cfg): |
|
with gr.Blocks(title="YOLO-World") as demo: |
|
with gr.Row(): |
|
gr.Markdown('<h1><center>YOLO-World: Real-Time Open-Vocabulary ' |
|
'Object Detector</center></h1>') |
|
with gr.Row(): |
|
with gr.Column(scale=0.3): |
|
with gr.Row(): |
|
image = gr.Image(type='pil', label='input image') |
|
input_text = gr.Textbox( |
|
lines=7, |
|
label='Enter the classes to be detected, ' |
|
'separated by comma', |
|
value=', '.join(CocoDataset.METAINFO['classes']), |
|
elem_id='textbox') |
|
with gr.Row(): |
|
submit = gr.Button('Submit') |
|
clear = gr.Button('Clear') |
|
with gr.Row(): |
|
export = gr.Button('Deploy and Export ONNX Model') |
|
out_download = gr.File( |
|
label='Download link', |
|
visible=True, |
|
height=30, |
|
interactive=False) |
|
max_num_boxes = gr.Slider( |
|
minimum=1, |
|
maximum=300, |
|
value=100, |
|
step=1, |
|
interactive=True, |
|
label='Maximum Number Boxes') |
|
score_thr = gr.Slider( |
|
minimum=0, |
|
maximum=1, |
|
value=0.05, |
|
step=0.001, |
|
interactive=True, |
|
label='Score Threshold') |
|
nms_thr = gr.Slider( |
|
minimum=0, |
|
maximum=1, |
|
value=0.5, |
|
step=0.001, |
|
interactive=True, |
|
label='NMS Threshold') |
|
with gr.Column(scale=0.7): |
|
output_image = gr.Image( |
|
type='pil', |
|
label='output image') |
|
|
|
submit.click(partial(run_image, runner), |
|
[image, input_text, max_num_boxes, |
|
score_thr, nms_thr], |
|
[output_image]) |
|
clear.click(lambda: [[], '', ''], None, |
|
[image, input_text, output_image]) |
|
export.click(partial(export_model, runner, args.checkpoint), |
|
[input_text, max_num_boxes, score_thr, nms_thr], |
|
[out_download, out_download]) |
|
demo.launch(server_name='0.0.0.0') |
|
|
|
|
|
if __name__ == '__main__': |
|
args = parse_args() |
|
|
|
|
|
cfg = Config.fromfile(args.config) |
|
if args.cfg_options is not None: |
|
cfg.merge_from_dict(args.cfg_options) |
|
|
|
os.makedirs('./work_dirs', exist_ok=True) |
|
if args.work_dir is not None: |
|
cfg.work_dir = args.work_dir |
|
elif cfg.get('work_dir', None) is None: |
|
cfg.work_dir = osp.join('./work_dirs', |
|
osp.splitext(osp.basename(args.config))[0]) |
|
|
|
cfg.load_from = args.checkpoint |
|
|
|
if 'runner_type' not in cfg: |
|
runner = Runner.from_cfg(cfg) |
|
else: |
|
runner = RUNNERS.build(cfg) |
|
|
|
runner.call_hook('before_run') |
|
runner.load_or_resume() |
|
pipeline = cfg.test_dataloader.dataset.pipeline |
|
runner.pipeline = Compose(pipeline) |
|
runner.model.eval() |
|
demo(runner, args, cfg) |
|
|