YOLOW

Sleeping

YOLOW / yolo_world /easydeploy /nms /trt_nms.py

stevengrove

initial commit

186701e 9 months ago

8.05 kB

	# Copyright (c) OpenMMLab. All rights reserved.
	import torch
	from torch import Tensor

	_XYWH2XYXY = torch.tensor([[1.0, 0.0, 1.0, 0.0], [0.0, 1.0, 0.0, 1.0],
	[-0.5, 0.0, 0.5, 0.0], [0.0, -0.5, 0.0, 0.5]],
	dtype=torch.float32)


	class TRTEfficientNMSop(torch.autograd.Function):

	@staticmethod
	def forward(
	ctx,
	boxes: Tensor,
	scores: Tensor,
	background_class: int = -1,
	box_coding: int = 0,
	iou_threshold: float = 0.45,
	max_output_boxes: int = 100,
	plugin_version: str = '1',
	score_activation: int = 0,
	score_threshold: float = 0.25,
	):
	batch_size, _, num_classes = scores.shape
	num_det = torch.randint(
	0, max_output_boxes, (batch_size, 1), dtype=torch.int32)
	det_boxes = torch.randn(batch_size, max_output_boxes, 4)
	det_scores = torch.randn(batch_size, max_output_boxes)
	det_classes = torch.randint(
	0, num_classes, (batch_size, max_output_boxes), dtype=torch.int32)
	return num_det, det_boxes, det_scores, det_classes

	@staticmethod
	def symbolic(g,
	boxes: Tensor,
	scores: Tensor,
	background_class: int = -1,
	box_coding: int = 0,
	iou_threshold: float = 0.45,
	max_output_boxes: int = 100,
	plugin_version: str = '1',
	score_activation: int = 0,
	score_threshold: float = 0.25):
	out = g.op(
	'TRT::EfficientNMS_TRT',
	boxes,
	scores,
	background_class_i=background_class,
	box_coding_i=box_coding,
	iou_threshold_f=iou_threshold,
	max_output_boxes_i=max_output_boxes,
	plugin_version_s=plugin_version,
	score_activation_i=score_activation,
	score_threshold_f=score_threshold,
	outputs=4)
	num_det, det_boxes, det_scores, det_classes = out
	return num_det, det_boxes, det_scores, det_classes


	class TRTbatchedNMSop(torch.autograd.Function):
	"""TensorRT NMS operation."""

	@staticmethod
	def forward(
	ctx,
	boxes: Tensor,
	scores: Tensor,
	plugin_version: str = '1',
	shareLocation: int = 1,
	backgroundLabelId: int = -1,
	numClasses: int = 80,
	topK: int = 1000,
	keepTopK: int = 100,
	scoreThreshold: float = 0.25,
	iouThreshold: float = 0.45,
	isNormalized: int = 0,
	clipBoxes: int = 0,
	scoreBits: int = 16,
	caffeSemantics: int = 1,
	):
	batch_size, _, numClasses = scores.shape
	num_det = torch.randint(
	0, keepTopK, (batch_size, 1), dtype=torch.int32)
	det_boxes = torch.randn(batch_size, keepTopK, 4)
	det_scores = torch.randn(batch_size, keepTopK)
	det_classes = torch.randint(0, numClasses,
	(batch_size, keepTopK)).float()
	return num_det, det_boxes, det_scores, det_classes

	@staticmethod
	def symbolic(
	g,
	boxes: Tensor,
	scores: Tensor,
	plugin_version: str = '1',
	shareLocation: int = 1,
	backgroundLabelId: int = -1,
	numClasses: int = 80,
	topK: int = 1000,
	keepTopK: int = 100,
	scoreThreshold: float = 0.25,
	iouThreshold: float = 0.45,
	isNormalized: int = 0,
	clipBoxes: int = 0,
	scoreBits: int = 16,
	caffeSemantics: int = 1,
	):
	out = g.op(
	'TRT::BatchedNMSDynamic_TRT',
	boxes,
	scores,
	shareLocation_i=shareLocation,
	plugin_version_s=plugin_version,
	backgroundLabelId_i=backgroundLabelId,
	numClasses_i=numClasses,
	topK_i=topK,
	keepTopK_i=keepTopK,
	scoreThreshold_f=scoreThreshold,
	iouThreshold_f=iouThreshold,
	isNormalized_i=isNormalized,
	clipBoxes_i=clipBoxes,
	scoreBits_i=scoreBits,
	caffeSemantics_i=caffeSemantics,
	outputs=4)
	num_det, det_boxes, det_scores, det_classes = out
	return num_det, det_boxes, det_scores, det_classes


	def _efficient_nms(
	boxes: Tensor,
	scores: Tensor,
	max_output_boxes_per_class: int = 1000,
	iou_threshold: float = 0.5,
	score_threshold: float = 0.05,
	pre_top_k: int = -1,
	keep_top_k: int = 100,
	box_coding: int = 0,
	):
	"""Wrapper for `efficient_nms` with TensorRT.
	Args:
	boxes (Tensor): The bounding boxes of shape [N, num_boxes, 4].
	scores (Tensor): The detection scores of shape
	[N, num_boxes, num_classes].
	max_output_boxes_per_class (int): Maximum number of output
	boxes per class of nms. Defaults to 1000.
	iou_threshold (float): IOU threshold of nms. Defaults to 0.5.
	score_threshold (float): score threshold of nms.
	Defaults to 0.05.
	pre_top_k (int): Number of top K boxes to keep before nms.
	Defaults to -1.
	keep_top_k (int): Number of top K boxes to keep after nms.
	Defaults to -1.
	box_coding (int): Bounding boxes format for nms.
	Defaults to 0 means [x1, y1 ,x2, y2].
	Set to 1 means [x, y, w, h].
	Returns:
	tuple[Tensor, Tensor, Tensor, Tensor]:
	(num_det, det_boxes, det_scores, det_classes),
	`num_det` of shape [N, 1]
	`det_boxes` of shape [N, num_det, 4]
	`det_scores` of shape [N, num_det]
	`det_classes` of shape [N, num_det]
	"""
	num_det, det_boxes, det_scores, det_classes = TRTEfficientNMSop.apply(
	boxes, scores, -1, box_coding, iou_threshold, keep_top_k, '1', 0,
	score_threshold)
	return num_det, det_boxes, det_scores, det_classes


	def _batched_nms(
	boxes: Tensor,
	scores: Tensor,
	max_output_boxes_per_class: int = 1000,
	iou_threshold: float = 0.5,
	score_threshold: float = 0.05,
	pre_top_k: int = -1,
	keep_top_k: int = 100,
	box_coding: int = 0,
	):
	"""Wrapper for `efficient_nms` with TensorRT.
	Args:
	boxes (Tensor): The bounding boxes of shape [N, num_boxes, 4].
	scores (Tensor): The detection scores of shape
	[N, num_boxes, num_classes].
	max_output_boxes_per_class (int): Maximum number of output
	boxes per class of nms. Defaults to 1000.
	iou_threshold (float): IOU threshold of nms. Defaults to 0.5.
	score_threshold (float): score threshold of nms.
	Defaults to 0.05.
	pre_top_k (int): Number of top K boxes to keep before nms.
	Defaults to -1.
	keep_top_k (int): Number of top K boxes to keep after nms.
	Defaults to -1.
	box_coding (int): Bounding boxes format for nms.
	Defaults to 0 means [x1, y1 ,x2, y2].
	Set to 1 means [x, y, w, h].
	Returns:
	tuple[Tensor, Tensor, Tensor, Tensor]:
	(num_det, det_boxes, det_scores, det_classes),
	`num_det` of shape [N, 1]
	`det_boxes` of shape [N, num_det, 4]
	`det_scores` of shape [N, num_det]
	`det_classes` of shape [N, num_det]
	"""
	if box_coding == 1:
	boxes = boxes @ (_XYWH2XYXY.to(boxes.device))
	boxes = boxes if boxes.dim() == 4 else boxes.unsqueeze(2)
	_, _, numClasses = scores.shape

	num_det, det_boxes, det_scores, det_classes = TRTbatchedNMSop.apply(
	boxes, scores, '1', 1, -1, int(numClasses), min(pre_top_k, 4096),
	keep_top_k, score_threshold, iou_threshold, 0, 0, 16, 1)

	det_classes = det_classes.int()
	return num_det, det_boxes, det_scores, det_classes


	def efficient_nms(args, *kwargs):
	"""Wrapper function for `_efficient_nms`."""
	return _efficient_nms(args, *kwargs)


	def batched_nms(args, *kwargs):
	"""Wrapper function for `_batched_nms`."""
	return _batched_nms(args, *kwargs)