danieladejumo commited on Jun 13, 2022

Commit

1ba06ec

•

1 Parent(s): 062916a

Files Commit

Browse files

Files changed (20) hide show

.gitignore +3 -0
Jupyternote Cheatsheet.ipynb +1 -0
PyTorch_Object_Detection.ipynb +0 -0
PyTorch_Object_Tracking.ipynb +1 -0
__pycache__/models.cpython-37.pyc +0 -0
__pycache__/sort.cpython-37.pyc +0 -0
darknet-coco-object_detection.ipynb +0 -0
models.py +350 -0
object_tracker.py +110 -0
sort.py +305 -0
sort.py.old +317 -0
utils/__pycache__/__init__.cpython-36.pyc +0 -0
utils/__pycache__/datasets.cpython-36.pyc +0 -0
utils/__pycache__/parse_config.cpython-36.pyc +0 -0
utils/__pycache__/parse_config.cpython-37.pyc +0 -0
utils/__pycache__/utils.cpython-36.pyc +0 -0
utils/__pycache__/utils.cpython-37.pyc +0 -0
utils/datasets.py +121 -0
utils/parse_config.py +36 -0
utils/utils.py +258 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+images/*
+videos/*
+config/*

Jupyternote Cheatsheet.ipynb ADDED Viewed

	@@ -0,0 +1 @@

+ {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Jupyternote Cheatsheet.ipynb","provenance":[],"mount_file_id":"1rMSETYdooFC6fVgT0PaOovnBrB4ZWoys","authorship_tag":"ABX9TyN4O59ZYPVT0rGiUB3bfznT"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["# Models"],"metadata":{"id":"ODx9TIOB4tCe"}},{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"BelRHeLw4qyQ","executionInfo":{"status":"ok","timestamp":1654537166220,"user_tz":-60,"elapsed":22,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"60695f20-3957-4958-aabd-c2ecff870977"},"outputs":[{"output_type":"stream","name":"stdout","text":["Writing models.py\n"]}],"source":["%%writefile models.py\n","from __future__ import division\n","\n","import torch\n","import torch.nn as nn\n","import torch.nn.functional as F\n","from torch.autograd import Variable\n","import numpy as np\n","\n","from PIL import Image\n","\n","from utils.parse_config import *\n","from utils.utils import build_targets\n","from collections import defaultdict\n","\n","##import matplotlib.pyplot as plt\n","##import matplotlib.patches as patches\n","\n","\n","def create_modules(module_defs):\n"," \"\"\"\n"," Constructs module list of layer blocks from module configuration in module_defs\n"," \"\"\"\n"," hyperparams = module_defs.pop(0)\n"," output_filters = [int(hyperparams[\"channels\"])]\n"," module_list = nn.ModuleList()\n"," for i, module_def in enumerate(module_defs):\n"," modules = nn.Sequential()\n","\n"," if module_def[\"type\"] == \"convolutional\":\n"," bn = int(module_def[\"batch_normalize\"])\n"," filters = int(module_def[\"filters\"])\n"," kernel_size = int(module_def[\"size\"])\n"," pad = (kernel_size - 1) // 2 if int(module_def[\"pad\"]) else 0\n"," modules.add_module(\n"," \"conv_%d\" % i,\n"," nn.Conv2d(\n"," in_channels=output_filters[-1],\n"," out_channels=filters,\n"," kernel_size=kernel_size,\n"," stride=int(module_def[\"stride\"]),\n"," padding=pad,\n"," bias=not bn,\n"," ),\n"," )\n"," if bn:\n"," modules.add_module(\"batch_norm_%d\" % i, nn.BatchNorm2d(filters))\n"," if module_def[\"activation\"] == \"leaky\":\n"," modules.add_module(\"leaky_%d\" % i, nn.LeakyReLU(0.1))\n","\n"," elif module_def[\"type\"] == \"maxpool\":\n"," kernel_size = int(module_def[\"size\"])\n"," stride = int(module_def[\"stride\"])\n"," if kernel_size == 2 and stride == 1:\n"," padding = nn.ZeroPad2d((0, 1, 0, 1))\n"," modules.add_module(\"_debug_padding_%d\" % i, padding)\n"," maxpool = nn.MaxPool2d(\n"," kernel_size=int(module_def[\"size\"]),\n"," stride=int(module_def[\"stride\"]),\n"," padding=int((kernel_size - 1) // 2),\n"," )\n"," modules.add_module(\"maxpool_%d\" % i, maxpool)\n","\n"," elif module_def[\"type\"] == \"upsample\":\n"," upsample = nn.Upsample(scale_factor=int(module_def[\"stride\"]), mode=\"nearest\")\n"," modules.add_module(\"upsample_%d\" % i, upsample)\n","\n"," elif module_def[\"type\"] == \"route\":\n"," layers = [int(x) for x in module_def[\"layers\"].split(\",\")]\n"," filters = sum([output_filters[layer_i] for layer_i in layers])\n"," modules.add_module(\"route_%d\" % i, EmptyLayer())\n","\n"," elif module_def[\"type\"] == \"shortcut\":\n"," filters = output_filters[int(module_def[\"from\"])]\n"," modules.add_module(\"shortcut_%d\" % i, EmptyLayer())\n","\n"," elif module_def[\"type\"] == \"yolo\":\n"," anchor_idxs = [int(x) for x in module_def[\"mask\"].split(\",\")]\n"," # Extract anchors\n"," anchors = [int(x) for x in module_def[\"anchors\"].split(\",\")]\n"," anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]\n"," anchors = [anchors[i] for i in anchor_idxs]\n"," num_classes = int(module_def[\"classes\"])\n"," img_height = int(hyperparams[\"height\"])\n"," # Define detection layer\n"," yolo_layer = YOLOLayer(anchors, num_classes, img_height)\n"," modules.add_module(\"yolo_%d\" % i, yolo_layer)\n"," # Register module list and number of output filters\n"," module_list.append(modules)\n"," output_filters.append(filters)\n","\n"," return hyperparams, module_list\n","\n","\n","class EmptyLayer(nn.Module):\n"," \"\"\"Placeholder for 'route' and 'shortcut' layers\"\"\"\n","\n"," def __init__(self):\n"," super(EmptyLayer, self).__init__()\n","\n","\n","class YOLOLayer(nn.Module):\n"," \"\"\"Detection layer\"\"\"\n","\n"," def __init__(self, anchors, num_classes, img_dim):\n"," super(YOLOLayer, self).__init__()\n"," self.anchors = anchors\n"," self.num_anchors = len(anchors)\n"," self.num_classes = num_classes\n"," self.bbox_attrs = 5 + num_classes\n"," self.image_dim = img_dim\n"," self.ignore_thres = 0.5\n"," self.lambda_coord = 1\n","\n"," self.mse_loss = nn.MSELoss(size_average=True) # Coordinate loss\n"," self.bce_loss = nn.BCELoss(size_average=True) # Confidence loss\n"," self.ce_loss = nn.CrossEntropyLoss() # Class loss\n","\n"," def forward(self, x, targets=None):\n"," nA = self.num_anchors\n"," nB = x.size(0)\n"," nG = x.size(2)\n"," stride = self.image_dim / nG\n","\n"," # Tensors for cuda support\n"," FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor\n"," LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor\n"," ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor\n","\n"," prediction = x.view(nB, nA, self.bbox_attrs, nG, nG).permute(0, 1, 3, 4, 2).contiguous()\n","\n"," # Get outputs\n"," x = torch.sigmoid(prediction[..., 0]) # Center x\n"," y = torch.sigmoid(prediction[..., 1]) # Center y\n"," w = prediction[..., 2] # Width\n"," h = prediction[..., 3] # Height\n"," pred_conf = torch.sigmoid(prediction[..., 4]) # Conf\n"," pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred.\n","\n"," # Calculate offsets for each grid\n"," grid_x = torch.arange(nG).repeat(nG, 1).view([1, 1, nG, nG]).type(FloatTensor)\n"," grid_y = torch.arange(nG).repeat(nG, 1).t().view([1, 1, nG, nG]).type(FloatTensor)\n"," scaled_anchors = FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in self.anchors])\n"," anchor_w = scaled_anchors[:, 0:1].view((1, nA, 1, 1))\n"," anchor_h = scaled_anchors[:, 1:2].view((1, nA, 1, 1))\n","\n"," # Add offset and scale with anchors\n"," pred_boxes = FloatTensor(prediction[..., :4].shape)\n"," pred_boxes[..., 0] = x.data + grid_x\n"," pred_boxes[..., 1] = y.data + grid_y\n"," pred_boxes[..., 2] = torch.exp(w.data) * anchor_w\n"," pred_boxes[..., 3] = torch.exp(h.data) * anchor_h\n","\n"," # Training\n"," if targets is not None:\n","\n"," if x.is_cuda:\n"," self.mse_loss = self.mse_loss.cuda()\n"," self.bce_loss = self.bce_loss.cuda()\n"," self.ce_loss = self.ce_loss.cuda()\n","\n"," nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls = build_targets(\n"," pred_boxes=pred_boxes.cpu().data,\n"," pred_conf=pred_conf.cpu().data,\n"," pred_cls=pred_cls.cpu().data,\n"," target=targets.cpu().data,\n"," anchors=scaled_anchors.cpu().data,\n"," num_anchors=nA,\n"," num_classes=self.num_classes,\n"," grid_size=nG,\n"," ignore_thres=self.ignore_thres,\n"," img_dim=self.image_dim,\n"," )\n","\n"," nProposals = int((pred_conf > 0.5).sum().item())\n"," recall = float(nCorrect / nGT) if nGT else 1\n"," precision = float(nCorrect / nProposals)\n","\n"," # Handle masks\n"," mask = Variable(mask.type(ByteTensor))\n"," conf_mask = Variable(conf_mask.type(ByteTensor))\n","\n"," # Handle target variables\n"," tx = Variable(tx.type(FloatTensor), requires_grad=False)\n"," ty = Variable(ty.type(FloatTensor), requires_grad=False)\n"," tw = Variable(tw.type(FloatTensor), requires_grad=False)\n"," th = Variable(th.type(FloatTensor), requires_grad=False)\n"," tconf = Variable(tconf.type(FloatTensor), requires_grad=False)\n"," tcls = Variable(tcls.type(LongTensor), requires_grad=False)\n","\n"," # Get conf mask where gt and where there is no gt\n"," conf_mask_true = mask\n"," conf_mask_false = conf_mask - mask\n","\n"," # Mask outputs to ignore non-existing objects\n"," loss_x = self.mse_loss(x[mask], tx[mask])\n"," loss_y = self.mse_loss(y[mask], ty[mask])\n"," loss_w = self.mse_loss(w[mask], tw[mask])\n"," loss_h = self.mse_loss(h[mask], th[mask])\n"," loss_conf = self.bce_loss(pred_conf[conf_mask_false], tconf[conf_mask_false]) + self.bce_loss(\n"," pred_conf[conf_mask_true], tconf[conf_mask_true]\n"," )\n"," loss_cls = (1 / nB) * self.ce_loss(pred_cls[mask], torch.argmax(tcls[mask], 1))\n"," loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls\n","\n"," return (\n"," loss,\n"," loss_x.item(),\n"," loss_y.item(),\n"," loss_w.item(),\n"," loss_h.item(),\n"," loss_conf.item(),\n"," loss_cls.item(),\n"," recall,\n"," precision,\n"," )\n","\n"," else:\n"," # If not in training phase return predictions\n"," output = torch.cat(\n"," (\n"," pred_boxes.view(nB, -1, 4) * stride,\n"," pred_conf.view(nB, -1, 1),\n"," pred_cls.view(nB, -1, self.num_classes),\n"," ),\n"," -1,\n"," )\n"," return output\n","\n","\n","class Darknet(nn.Module):\n"," \"\"\"YOLOv3 object detection model\"\"\"\n","\n"," def __init__(self, config_path, img_size=416):\n"," super(Darknet, self).__init__()\n"," self.module_defs = parse_model_config(config_path)\n"," self.hyperparams, self.module_list = create_modules(self.module_defs)\n"," self.img_size = img_size\n"," self.seen = 0\n"," self.header_info = np.array([0, 0, 0, self.seen, 0])\n"," self.loss_names = [\"x\", \"y\", \"w\", \"h\", \"conf\", \"cls\", \"recall\", \"precision\"]\n","\n"," def forward(self, x, targets=None):\n"," is_training = targets is not None\n"," output = []\n"," self.losses = defaultdict(float)\n"," layer_outputs = []\n"," for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):\n"," if module_def[\"type\"] in [\"convolutional\", \"upsample\", \"maxpool\"]:\n"," x = module(x)\n"," elif module_def[\"type\"] == \"route\":\n"," layer_i = [int(x) for x in module_def[\"layers\"].split(\",\")]\n"," x = torch.cat([layer_outputs[i] for i in layer_i], 1)\n"," elif module_def[\"type\"] == \"shortcut\":\n"," layer_i = int(module_def[\"from\"])\n"," x = layer_outputs[-1] + layer_outputs[layer_i]\n"," elif module_def[\"type\"] == \"yolo\":\n"," # Train phase: get loss\n"," if is_training:\n"," x, *losses = module[0](x, targets)\n"," for name, loss in zip(self.loss_names, losses):\n"," self.losses[name] += loss\n"," # Test phase: Get detections\n"," else:\n"," x = module(x)\n"," output.append(x)\n"," layer_outputs.append(x)\n","\n"," self.losses[\"recall\"] /= 3\n"," self.losses[\"precision\"] /= 3\n"," return sum(output) if is_training else torch.cat(output, 1)\n","\n"," def load_weights(self, weights_path):\n"," \"\"\"Parses and loads the weights stored in 'weights_path'\"\"\"\n","\n"," # Open the weights file\n"," fp = open(weights_path, \"rb\")\n"," header = np.fromfile(fp, dtype=np.int32, count=5) # First five are header values\n","\n"," # Needed to write header when saving weights\n"," self.header_info = header\n","\n"," self.seen = header[3]\n"," weights = np.fromfile(fp, dtype=np.float32) # The rest are weights\n"," fp.close()\n","\n"," ptr = 0\n"," for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):\n"," if module_def[\"type\"] == \"convolutional\":\n"," conv_layer = module[0]\n"," if module_def[\"batch_normalize\"]:\n"," # Load BN bias, weights, running mean and running variance\n"," bn_layer = module[1]\n"," num_b = bn_layer.bias.numel() # Number of biases\n"," # Bias\n"," bn_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.bias)\n"," bn_layer.bias.data.copy_(bn_b)\n"," ptr += num_b\n"," # Weight\n"," bn_w = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.weight)\n"," bn_layer.weight.data.copy_(bn_w)\n"," ptr += num_b\n"," # Running Mean\n"," bn_rm = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_mean)\n"," bn_layer.running_mean.data.copy_(bn_rm)\n"," ptr += num_b\n"," # Running Var\n"," bn_rv = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_var)\n"," bn_layer.running_var.data.copy_(bn_rv)\n"," ptr += num_b\n"," else:\n"," # Load conv. bias\n"," num_b = conv_layer.bias.numel()\n"," conv_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(conv_layer.bias)\n"," conv_layer.bias.data.copy_(conv_b)\n"," ptr += num_b\n"," # Load conv. weights\n"," num_w = conv_layer.weight.numel()\n"," conv_w = torch.from_numpy(weights[ptr : ptr + num_w]).view_as(conv_layer.weight)\n"," conv_layer.weight.data.copy_(conv_w)\n"," ptr += num_w\n","\n"," \"\"\"\n"," @:param path - path of the new weights file\n"," @:param cutoff - save layers between 0 and cutoff (cutoff = -1 -> all are saved)\n"," \"\"\"\n","\n"," def save_weights(self, path, cutoff=-1):\n","\n"," fp = open(path, \"wb\")\n"," self.header_info[3] = self.seen\n"," self.header_info.tofile(fp)\n","\n"," # Iterate through layers\n"," for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):\n"," if module_def[\"type\"] == \"convolutional\":\n"," conv_layer = module[0]\n"," # If batch norm, load bn first\n"," if module_def[\"batch_normalize\"]:\n"," bn_layer = module[1]\n"," bn_layer.bias.data.cpu().numpy().tofile(fp)\n"," bn_layer.weight.data.cpu().numpy().tofile(fp)\n"," bn_layer.running_mean.data.cpu().numpy().tofile(fp)\n"," bn_layer.running_var.data.cpu().numpy().tofile(fp)\n"," # Load conv bias\n"," else:\n"," conv_layer.bias.data.cpu().numpy().tofile(fp)\n"," # Load conv weights\n"," conv_layer.weight.data.cpu().numpy().tofile(fp)\n","\n"," fp.close()"]},{"cell_type":"code","source":["!ls"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ar8FuY3z43Fk","executionInfo":{"status":"ok","timestamp":1654537174809,"user_tz":-60,"elapsed":16,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"ce227d02-75a3-477d-becf-e1c2702c7001"},"execution_count":2,"outputs":[{"output_type":"stream","name":"stdout","text":["models.py sample_data\n"]}]},{"cell_type":"code","source":["!pwd"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"hRxa6vyoGbla","executionInfo":{"status":"ok","timestamp":1654537258168,"user_tz":-60,"elapsed":26,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"ccaaf1dc-6769-4093-8769-c8aa3b809bdf"},"execution_count":3,"outputs":[{"output_type":"stream","name":"stdout","text":["/content\n"]}]},{"cell_type":"code","source":["%%writefile Readme.md\n","Are you for real!!"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"cVKDwgGtGv7g","executionInfo":{"status":"ok","timestamp":1654537404197,"user_tz":-60,"elapsed":21,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"41cdc392-059d-42be-b267-2a7f66d0a1f6"},"execution_count":4,"outputs":[{"output_type":"stream","name":"stdout","text":["Overwriting Readme.md\n"]}]},{"cell_type":"code","source":["%cd Computer Vision"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"780vJiykHTmT","executionInfo":{"status":"ok","timestamp":1654537643123,"user_tz":-60,"elapsed":16,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"159eb128-2a7a-41b3-b84c-7d517ff92454"},"execution_count":14,"outputs":[{"output_type":"stream","name":"stdout","text":["/content/drive/MyDrive/Python/Machine Learning/Computer Vision\n"]}]},{"cell_type":"code","source":["!pwd"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"WeA417NzHe0W","executionInfo":{"status":"ok","timestamp":1654537646111,"user_tz":-60,"elapsed":408,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"036a3c8e-b106-46a8-b5de-b7adf66938ab"},"execution_count":15,"outputs":[{"output_type":"stream","name":"stdout","text":["/content/drive/MyDrive/Python/Machine Learning/Computer Vision\n"]}]},{"cell_type":"code","source":["%%writefile test.and\n","\n","Really I can now write to my drive!"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"hrSVQd-fHzai","executionInfo":{"status":"ok","timestamp":1654537570112,"user_tz":-60,"elapsed":24,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"c58a5849-aaba-4fe3-c596-681a5e7df731"},"execution_count":10,"outputs":[{"output_type":"stream","name":"stdout","text":["Writing test.and\n"]}]},{"cell_type":"code","source":["!ls"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"jRtg6b1IH8KV","executionInfo":{"status":"ok","timestamp":1654537654214,"user_tz":-60,"elapsed":24,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"dd49447d-6924-4176-f5a9-ca184b671be8"},"execution_count":16,"outputs":[{"output_type":"stream","name":"stdout","text":["cnn-resnet-CIFAR10 darknet-COCO-object_detection feedforward-cnn-MNIST\n"]}]},{"cell_type":"code","source":["%%bash\n","\n","ls -la\n","python --version"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"iUpVW1oZIQnl","executionInfo":{"status":"ok","timestamp":1654537857269,"user_tz":-60,"elapsed":14,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"ff54c93a-9f2c-4453-d82f-c6c1683f61b8"},"execution_count":19,"outputs":[{"output_type":"stream","name":"stdout","text":["total 12\n","drwx------ 2 root root 4096 May 17 21:02 cnn-resnet-CIFAR10\n","drwx------ 2 root root 4096 Jun 6 16:38 darknet-COCO-object_detection\n","drwx------ 2 root root 4096 May 17 21:01 feedforward-cnn-MNIST\n","Python 3.7.13\n"]}]},{"cell_type":"code","source":["%cd ../"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"NJ7riTtCI2-V","executionInfo":{"status":"ok","timestamp":1654537984381,"user_tz":-60,"elapsed":14,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"713f2de8-ae10-46b9-d5e9-bbfa779de2c8"},"execution_count":21,"outputs":[{"output_type":"stream","name":"stdout","text":["/content\n"]}]},{"cell_type":"code","source":["%%bash\n","\n","cd \"drive/MyDrive/Python/Machine Learning\"\n","ls"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ZAOqxQzPJc1k","executionInfo":{"status":"ok","timestamp":1654538084191,"user_tz":-60,"elapsed":14,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"7b82c13f-3e14-47b5-bc12-25bdf0dee540"},"execution_count":25,"outputs":[{"output_type":"stream","name":"stdout","text":["Articles\n","Computer Vision\n","Datasets\n","Deep-Learning-with-PyTorch-Jovian\n","Deep RL\n","FastAI Course\n","Generative Models\n","HuggingFace-Deep-RL\n","PyTorch\n","ZeroToGANS_Revision\n"]}]},{"cell_type":"code","source":["%run models.py"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":235},"id":"HvI6SRX8JsS7","executionInfo":{"status":"ok","timestamp":1654538109961,"user_tz":-60,"elapsed":2355,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"08a28f6a-76c2-4eaa-fa36-36d5a8e145ea"},"execution_count":27,"outputs":[{"output_type":"error","ename":"ModuleNotFoundError","evalue":"ignored","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)","\u001b[0;32m/content/models.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mPIL\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mImage\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mutils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparse_config\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 13\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mutils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mutils\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mbuild_targets\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mcollections\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mdefaultdict\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'utils'"]}]},{"cell_type":"code","source":["%edit"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"JLbktoGWJvft","executionInfo":{"status":"ok","timestamp":1654538391516,"user_tz":-60,"elapsed":21,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"cce69d99-b879-4600-a9ba-9afb5a58b76a"},"execution_count":29,"outputs":[{"output_type":"stream","name":"stdout","text":["IPython will make a temporary file named: /tmp/ipython_edit_nffqr1eo/ipython_edit_msvbxat4.py\n"]}]},{"cell_type":"code","source":["%load models.py"],"metadata":{"id":"PI_bYsujKQfx","executionInfo":{"status":"ok","timestamp":1654538646656,"user_tz":-60,"elapsed":443,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}}},"execution_count":31,"outputs":[]},{"cell_type":"code","source":["%%writefile\n","%run\n","%cd\n","%cat\n","%load [-r, -s]\n","%edit\n","%time, %%time\n","%timeit, %%timeit\n","%%html\n","%env, ...\n","%%file, alias for writefile\n","%%bash\n","%matplotlib [inline, ...]\n","and more\n","%paste, %cpaste\n","%pinfo\n","%who\n","%lsmagic\n","%pwd"],"metadata":{"id":"GdCgR_KCL7MK"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["%quickref\n","%%js\n","%%python[2, 3]\n","%%latex\n","%%shell\n","%%svg"],"metadata":{"id":"B4QAAv64NHRW","executionInfo":{"status":"ok","timestamp":1654539235689,"user_tz":-60,"elapsed":445,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}}},"execution_count":38,"outputs":[]},{"cell_type":"code","source":["%system, %%system\n","%sx, %%sx"],"metadata":{"id":"psD0AZ7YNJBZ"},"execution_count":null,"outputs":[]}]}

PyTorch_Object_Detection.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

PyTorch_Object_Tracking.ipynb ADDED Viewed

	@@ -0,0 +1 @@

+ {"cells":[{"cell_type":"code","source":["from google.colab import drive\n","drive.mount('/content/drive')"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"1VkPIQMBmJMO","executionInfo":{"status":"ok","timestamp":1654700494173,"user_tz":-60,"elapsed":3080,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"1e3cd91c-ca69-486b-b182-d2f31583b645"},"execution_count":1,"outputs":[{"output_type":"stream","name":"stdout","text":["Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"]}]},{"cell_type":"code","source":["%cd ./drive/MyDrive/Python/Machine Learning/Computer Vision/darknet-COCO-object_detection"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"izkEuuuPmTZf","executionInfo":{"status":"ok","timestamp":1654700494174,"user_tz":-60,"elapsed":11,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"daf99ba8-1ed2-4935-e2b9-3481fef9584a"},"execution_count":2,"outputs":[{"output_type":"stream","name":"stdout","text":["/content/drive/MyDrive/Python/Machine Learning/Computer Vision/darknet-COCO-object_detection\n"]}]},{"cell_type":"code","source":["!pip install filterpy --quiet"],"metadata":{"id":"qXFwvyxqmXDr","executionInfo":{"status":"ok","timestamp":1654700498924,"user_tz":-60,"elapsed":4757,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}}},"execution_count":3,"outputs":[]},{"cell_type":"code","source":["!pip install lap --quiet"],"metadata":{"id":"zqK3-Fn2oRsc","executionInfo":{"status":"ok","timestamp":1654700503070,"user_tz":-60,"elapsed":4165,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}}},"execution_count":4,"outputs":[]},{"cell_type":"code","execution_count":5,"metadata":{"id":"kHwKuAkPlviV","executionInfo":{"status":"ok","timestamp":1654700504310,"user_tz":-60,"elapsed":1248,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}}},"outputs":[],"source":["from models import *\n","from utils import *\n","\n","import os, sys, time, datetime, random\n","import torch\n","from torch.utils.data import DataLoader\n","from torchvision import datasets, transforms\n","from torch.autograd import Variable\n","\n","import matplotlib.pyplot as plt\n","import matplotlib.patches as patches\n","from PIL import Image"]},{"cell_type":"code","execution_count":6,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"N5uZwVlClvie","executionInfo":{"status":"ok","timestamp":1654700508098,"user_tz":-60,"elapsed":3795,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"3a3e75b1-3379-4e79-f418-0b8a48ffb62f"},"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.7/dist-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='mean' instead.\n"," warnings.warn(warning.format(ret))\n"]}],"source":["config_path='config/yolov3.cfg'\n","weights_path='config/yolov3.weights'\n","class_path='config/coco.names'\n","img_size=416\n","conf_thres=0.8\n","nms_thres=0.4\n","\n","# Load model and weights\n","model = Darknet(config_path, img_size=img_size)\n","model.load_weights(weights_path)\n","model.cuda()\n","model.eval()\n","classes = utils.load_classes(class_path)\n","Tensor = torch.cuda.FloatTensor"]},{"cell_type":"code","execution_count":7,"metadata":{"id":"n4NNQSOYlvij","executionInfo":{"status":"ok","timestamp":1654700508099,"user_tz":-60,"elapsed":9,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}}},"outputs":[],"source":["def detect_image(img):\n"," # scale and pad image\n"," ratio = min(img_size/img.size[0], img_size/img.size[1])\n"," imw = round(img.size[0] * ratio)\n"," imh = round(img.size[1] * ratio)\n"," img_transforms = transforms.Compose([ transforms.Resize((imh, imw)),\n"," transforms.Pad((max(int((imh-imw)/2),0), max(int((imw-imh)/2),0), max(int((imh-imw)/2),0), max(int((imw-imh)/2),0)),\n"," (128,128,128)),\n"," transforms.ToTensor(),\n"," ])\n"," # convert image to Tensor\n"," image_tensor = img_transforms(img).float()\n"," image_tensor = image_tensor.unsqueeze_(0)\n"," input_img = Variable(image_tensor.type(Tensor))\n"," # run inference on the model and get detections\n"," with torch.no_grad():\n"," detections = model(input_img)\n"," detections = utils.non_max_suppression(detections, 80, conf_thres, nms_thres)\n"," return detections[0]"]},{"cell_type":"code","execution_count":8,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"7VUHsE2-lvik","executionInfo":{"status":"ok","timestamp":1654700521379,"user_tz":-60,"elapsed":13287,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"6144a350-24e9-4a7c-95c5-96bb66b824e0"},"outputs":[{"output_type":"stream","name":"stdout","text":["Populating the interactive namespace from numpy and matplotlib\n"]},{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.7/dist-packages/IPython/core/magics/pylab.py:161: UserWarning: pylab import has clobbered these variables: ['random']\n","`%matplotlib` prevents importing * from pylab and numpy\n"," \"\\n`%matplotlib` prevents importing * from pylab and numpy\"\n"]},{"output_type":"stream","name":"stdout","text":["Video size 1280 720\n"]},{"output_type":"stream","name":"stderr","text":["/content/drive/MyDrive/Python/Machine Learning/Computer Vision/darknet-COCO-object_detection/sort.py:38: NumbaWarning: \n","Compilation is falling back to object mode WITH looplifting enabled because Function \"iou\" failed type inference due to: non-precise type pyobject\n","During: typing of argument at /content/drive/MyDrive/Python/Machine Learning/Computer Vision/darknet-COCO-object_detection/sort.py (43)\n","\n","File \"sort.py\", line 43:\n","def iou(bb_test,bb_gt):\n"," <source elided>\n"," \"\"\"\n"," xx1 = np.maximum(bb_test[0], bb_gt[0])\n"," ^\n","\n"," @jit\n","/usr/local/lib/python3.7/dist-packages/numba/core/object_mode_passes.py:178: NumbaWarning: Function \"iou\" was compiled in object mode without forceobj=True.\n","\n","File \"sort.py\", line 39:\n","@jit\n","def iou(bb_test,bb_gt):\n","^\n","\n"," state.func_ir.loc))\n","/usr/local/lib/python3.7/dist-packages/numba/core/object_mode_passes.py:188: NumbaDeprecationWarning: \n","Fall-back from the nopython compilation path to the object mode compilation path has been detected, this is deprecated behaviour.\n","\n","For more information visit https://numba.pydata.org/numba-doc/latest/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit\n","\n","File \"sort.py\", line 39:\n","@jit\n","def iou(bb_test,bb_gt):\n","^\n","\n"," state.func_ir.loc))\n"]}],"source":["videopath = './videos/HorseRacing.mp4'\n","\n","%pylab inline \n","import cv2\n","from IPython.display import clear_output\n","\n","cmap = plt.get_cmap('tab20b')\n","colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]\n","\n","# initialize Sort object and video capture\n","from sort import *\n","vid = cv2.VideoCapture(videopath)\n","mot_tracker = Sort()\n","\n","fourcc = cv2.VideoWriter_fourcc(*'XVID')\n","ret,frame=vid.read()\n","vw = frame.shape[1]\n","vh = frame.shape[0]\n","print (\"Video size\", vw,vh)\n","outvideo = cv2.VideoWriter(videopath.replace(\".mp4\", \"-det.mp4\"),fourcc,20.0,(vw,vh))\n","\n","# while(True):\n","for ii in range(40):\n"," ret, frame = vid.read()\n"," if not ret:\n"," print(\"Done Procesing Video\")\n"," break\n"," frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)\n"," pilimg = Image.fromarray(frame)\n"," detections = detect_image(pilimg)\n","\n"," img = np.array(pilimg)\n"," pad_x = max(img.shape[0] - img.shape[1], 0) * (img_size / max(img.shape))\n"," pad_y = max(img.shape[1] - img.shape[0], 0) * (img_size / max(img.shape))\n"," unpad_h = img_size - pad_y\n"," unpad_w = img_size - pad_x\n"," if detections is not None:\n"," tracked_objects = mot_tracker.update(detections.cpu())\n","\n"," unique_labels = detections[:, -1].cpu().unique()\n"," n_cls_preds = len(unique_labels)\n"," for x1, y1, x2, y2, obj_id, cls_pred in tracked_objects:\n"," box_h = int(((y2 - y1) / unpad_h) * img.shape[0])\n"," box_w = int(((x2 - x1) / unpad_w) * img.shape[1])\n"," y1 = int(((y1 - pad_y // 2) / unpad_h) * img.shape[0])\n"," x1 = int(((x1 - pad_x // 2) / unpad_w) * img.shape[1])\n","\n"," color = colors[int(obj_id) % len(colors)]\n"," color = [i * 255 for i in color]\n"," cls = classes[int(cls_pred)]\n"," cv2.rectangle(frame, (x1, y1), (x1+box_w, y1+box_h), color, 4)\n"," cv2.rectangle(frame, (x1, y1-35), (x1+len(cls)*19+60, y1), color, -1)\n"," cv2.putText(frame, cls + \"-\" + str(int(obj_id)), (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 3)\n","\n"," outvideo.write(frame)\n","\n","outvideo.release()"]},{"cell_type":"code","source":["from pathlib import Path\n","from IPython import display as ipythondisplay\n","import base64\n","\n","def show_videos(video_path='', prefix=''):\n"," html = []\n"," for mp4 in Path(video_path).glob(f\"{prefix}*.mp4\"):\n"," video_b64 = base64.b64encode(mp4.read_bytes())\n"," html.append('''<video alt=\"{}\" autoplay \n"," loop controls style=\"height: 400px;\">\n"," <source src=\"data:video/mp4;base64,{}\" type=\"video/mp4\" />\n"," </video>'''.format(mp4, video_b64.decode('ascii')))\n"," break\n"," ipythondisplay.display(ipythondisplay.HTML(data=\"<br>\".join(html)))"],"metadata":{"id":"Xx6d_F3VstfA","executionInfo":{"status":"ok","timestamp":1654700521380,"user_tz":-60,"elapsed":19,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}}},"execution_count":9,"outputs":[]},{"cell_type":"code","source":["video_b64 = base64.b64encode(Path(videopath.replace(\".mp4\", \"-det.mp4\")).read_bytes())\n","html = '''<video alt=\"{}\" autoplay \n"," loop controls style=\"height: 400px;\">\n"," <source src=\"data:video/mp4;base64,{}\" type=\"video/mp4\" />\n"," </video>'''.format(Path(videopath), video_b64.decode('ascii'))\n","ipythondisplay.display(ipythondisplay.HTML(data=html)) "],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":421,"output_embedded_package_id":"1KE6a6Jf_qBrnIGEjOY8GYXagvaaGt84D"},"id":"K3VrKNb3yUbH","executionInfo":{"status":"ok","timestamp":1654700524974,"user_tz":-60,"elapsed":3611,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"92ea1435-9e17-4167-c094-dd1e380b200f"},"execution_count":10,"outputs":[{"output_type":"display_data","data":{"text/plain":"Output hidden; open in https://colab.research.google.com to view."},"metadata":{}}]}],"metadata":{"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"name":"python"},"colab":{"name":"PyTorch_Object_Tracking.ipynb","provenance":[],"collapsed_sections":[]},"accelerator":"GPU"},"nbformat":4,"nbformat_minor":0}

__pycache__/models.cpython-37.pyc ADDED Viewed

Binary file (9.65 kB). View file

__pycache__/sort.cpython-37.pyc ADDED Viewed

Binary file (10.2 kB). View file

darknet-coco-object_detection.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

models.py ADDED Viewed

	@@ -0,0 +1,350 @@

+from __future__ import division
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Variable
+import numpy as np
+from PIL import Image
+from utils.parse_config import *
+from utils.utils import build_targets
+from collections import defaultdict
+##import matplotlib.pyplot as plt
+##import matplotlib.patches as patches
+def create_modules(module_defs):
+    """
+    Constructs module list of layer blocks from module configuration in module_defs
+    """
+    hyperparams = module_defs.pop(0)
+    output_filters = [int(hyperparams["channels"])]
+    module_list = nn.ModuleList()
+    for i, module_def in enumerate(module_defs):
+        modules = nn.Sequential()
+        if module_def["type"] == "convolutional":
+            bn = int(module_def["batch_normalize"])
+            filters = int(module_def["filters"])
+            kernel_size = int(module_def["size"])
+            pad = (kernel_size - 1) // 2 if int(module_def["pad"]) else 0
+            modules.add_module(
+                "conv_%d" % i,
+                nn.Conv2d(
+                    in_channels=output_filters[-1],
+                    out_channels=filters,
+                    kernel_size=kernel_size,
+                    stride=int(module_def["stride"]),
+                    padding=pad,
+                    bias=not bn,
+                ),
+            )
+            if bn:
+                modules.add_module("batch_norm_%d" % i, nn.BatchNorm2d(filters))
+            if module_def["activation"] == "leaky":
+                modules.add_module("leaky_%d" % i, nn.LeakyReLU(0.1))
+        elif module_def["type"] == "maxpool":
+            kernel_size = int(module_def["size"])
+            stride = int(module_def["stride"])
+            if kernel_size == 2 and stride == 1:
+                padding = nn.ZeroPad2d((0, 1, 0, 1))
+                modules.add_module("_debug_padding_%d" % i, padding)
+            maxpool = nn.MaxPool2d(
+                kernel_size=int(module_def["size"]),
+                stride=int(module_def["stride"]),
+                padding=int((kernel_size - 1) // 2),
+            )
+            modules.add_module("maxpool_%d" % i, maxpool)
+        elif module_def["type"] == "upsample":
+            upsample = nn.Upsample(scale_factor=int(module_def["stride"]), mode="nearest")
+            modules.add_module("upsample_%d" % i, upsample)
+        elif module_def["type"] == "route":
+            layers = [int(x) for x in module_def["layers"].split(",")]
+            filters = sum([output_filters[layer_i] for layer_i in layers])
+            modules.add_module("route_%d" % i, EmptyLayer())
+        elif module_def["type"] == "shortcut":
+            filters = output_filters[int(module_def["from"])]
+            modules.add_module("shortcut_%d" % i, EmptyLayer())
+        elif module_def["type"] == "yolo":
+            anchor_idxs = [int(x) for x in module_def["mask"].split(",")]
+            # Extract anchors
+            anchors = [int(x) for x in module_def["anchors"].split(",")]
+            anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
+            anchors = [anchors[i] for i in anchor_idxs]
+            num_classes = int(module_def["classes"])
+            img_height = int(hyperparams["height"])
+            # Define detection layer
+            yolo_layer = YOLOLayer(anchors, num_classes, img_height)
+            modules.add_module("yolo_%d" % i, yolo_layer)
+        # Register module list and number of output filters
+        module_list.append(modules)
+        output_filters.append(filters)
+    return hyperparams, module_list
+class EmptyLayer(nn.Module):
+    """Placeholder for 'route' and 'shortcut' layers"""
+    def __init__(self):
+        super(EmptyLayer, self).__init__()
+class YOLOLayer(nn.Module):
+    """Detection layer"""
+    def __init__(self, anchors, num_classes, img_dim):
+        super(YOLOLayer, self).__init__()
+        self.anchors = anchors
+        self.num_anchors = len(anchors)
+        self.num_classes = num_classes
+        self.bbox_attrs = 5 + num_classes
+        self.image_dim = img_dim
+        self.ignore_thres = 0.5
+        self.lambda_coord = 1
+        self.mse_loss = nn.MSELoss(size_average=True)  # Coordinate loss
+        self.bce_loss = nn.BCELoss(size_average=True)  # Confidence loss
+        self.ce_loss = nn.CrossEntropyLoss()  # Class loss
+    def forward(self, x, targets=None):
+        nA = self.num_anchors
+        nB = x.size(0)
+        nG = x.size(2)
+        stride = self.image_dim / nG
+        # Tensors for cuda support
+        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
+        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
+        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor
+        prediction = x.view(nB, nA, self.bbox_attrs, nG, nG).permute(0, 1, 3, 4, 2).contiguous()
+        # Get outputs
+        x = torch.sigmoid(prediction[..., 0])  # Center x
+        y = torch.sigmoid(prediction[..., 1])  # Center y
+        w = prediction[..., 2]  # Width
+        h = prediction[..., 3]  # Height
+        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf
+        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.
+        # Calculate offsets for each grid
+        grid_x = torch.arange(nG).repeat(nG, 1).view([1, 1, nG, nG]).type(FloatTensor)
+        grid_y = torch.arange(nG).repeat(nG, 1).t().view([1, 1, nG, nG]).type(FloatTensor)
+        scaled_anchors = FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in self.anchors])
+        anchor_w = scaled_anchors[:, 0:1].view((1, nA, 1, 1))
+        anchor_h = scaled_anchors[:, 1:2].view((1, nA, 1, 1))
+        # Add offset and scale with anchors
+        pred_boxes = FloatTensor(prediction[..., :4].shape)
+        pred_boxes[..., 0] = x.data + grid_x
+        pred_boxes[..., 1] = y.data + grid_y
+        pred_boxes[..., 2] = torch.exp(w.data) * anchor_w
+        pred_boxes[..., 3] = torch.exp(h.data) * anchor_h
+        # Training
+        if targets is not None:
+            if x.is_cuda:
+                self.mse_loss = self.mse_loss.cuda()
+                self.bce_loss = self.bce_loss.cuda()
+                self.ce_loss = self.ce_loss.cuda()
+            nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls = build_targets(
+                pred_boxes=pred_boxes.cpu().data,
+                pred_conf=pred_conf.cpu().data,
+                pred_cls=pred_cls.cpu().data,
+                target=targets.cpu().data,
+                anchors=scaled_anchors.cpu().data,
+                num_anchors=nA,
+                num_classes=self.num_classes,
+                grid_size=nG,
+                ignore_thres=self.ignore_thres,
+                img_dim=self.image_dim,
+            )
+            nProposals = int((pred_conf > 0.5).sum().item())
+            recall = float(nCorrect / nGT) if nGT else 1
+            precision = float(nCorrect / nProposals)
+            # Handle masks
+            mask = Variable(mask.type(ByteTensor))
+            conf_mask = Variable(conf_mask.type(ByteTensor))
+            # Handle target variables
+            tx = Variable(tx.type(FloatTensor), requires_grad=False)
+            ty = Variable(ty.type(FloatTensor), requires_grad=False)
+            tw = Variable(tw.type(FloatTensor), requires_grad=False)
+            th = Variable(th.type(FloatTensor), requires_grad=False)
+            tconf = Variable(tconf.type(FloatTensor), requires_grad=False)
+            tcls = Variable(tcls.type(LongTensor), requires_grad=False)
+            # Get conf mask where gt and where there is no gt
+            conf_mask_true = mask
+            conf_mask_false = conf_mask - mask
+            # Mask outputs to ignore non-existing objects
+            loss_x = self.mse_loss(x[mask], tx[mask])
+            loss_y = self.mse_loss(y[mask], ty[mask])
+            loss_w = self.mse_loss(w[mask], tw[mask])
+            loss_h = self.mse_loss(h[mask], th[mask])
+            loss_conf = self.bce_loss(pred_conf[conf_mask_false], tconf[conf_mask_false]) + self.bce_loss(
+                pred_conf[conf_mask_true], tconf[conf_mask_true]
+            )
+            loss_cls = (1 / nB) * self.ce_loss(pred_cls[mask], torch.argmax(tcls[mask], 1))
+            loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
+            return (
+                loss,
+                loss_x.item(),
+                loss_y.item(),
+                loss_w.item(),
+                loss_h.item(),
+                loss_conf.item(),
+                loss_cls.item(),
+                recall,
+                precision,
+            )
+        else:
+            # If not in training phase return predictions
+            output = torch.cat(
+                (
+                    pred_boxes.view(nB, -1, 4) * stride,
+                    pred_conf.view(nB, -1, 1),
+                    pred_cls.view(nB, -1, self.num_classes),
+                ),
+                -1,
+            )
+            return output
+class Darknet(nn.Module):
+    """YOLOv3 object detection model"""
+    def __init__(self, config_path, img_size=416):
+        super(Darknet, self).__init__()
+        self.module_defs = parse_model_config(config_path)
+        self.hyperparams, self.module_list = create_modules(self.module_defs)
+        self.img_size = img_size
+        self.seen = 0
+        self.header_info = np.array([0, 0, 0, self.seen, 0])
+        self.loss_names = ["x", "y", "w", "h", "conf", "cls", "recall", "precision"]
+    def forward(self, x, targets=None):
+        is_training = targets is not None
+        output = []
+        self.losses = defaultdict(float)
+        layer_outputs = []
+        for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
+            if module_def["type"] in ["convolutional", "upsample", "maxpool"]:
+                x = module(x)
+            elif module_def["type"] == "route":
+                layer_i = [int(x) for x in module_def["layers"].split(",")]
+                x = torch.cat([layer_outputs[i] for i in layer_i], 1)
+            elif module_def["type"] == "shortcut":
+                layer_i = int(module_def["from"])
+                x = layer_outputs[-1] + layer_outputs[layer_i]
+            elif module_def["type"] == "yolo":
+                # Train phase: get loss
+                if is_training:
+                    x, *losses = module[0](x, targets)
+                    for name, loss in zip(self.loss_names, losses):
+                        self.losses[name] += loss
+                # Test phase: Get detections
+                else:
+                    x = module(x)
+                output.append(x)
+            layer_outputs.append(x)
+        self.losses["recall"] /= 3
+        self.losses["precision"] /= 3
+        return sum(output) if is_training else torch.cat(output, 1)
+    def load_weights(self, weights_path):
+        """Parses and loads the weights stored in 'weights_path'"""
+        # Open the weights file
+        fp = open(weights_path, "rb")
+        header = np.fromfile(fp, dtype=np.int32, count=5)  # First five are header values
+        # Needed to write header when saving weights
+        self.header_info = header
+        self.seen = header[3]
+        weights = np.fromfile(fp, dtype=np.float32)  # The rest are weights
+        fp.close()
+        ptr = 0
+        for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
+            if module_def["type"] == "convolutional":
+                conv_layer = module[0]
+                if module_def["batch_normalize"]:
+                    # Load BN bias, weights, running mean and running variance
+                    bn_layer = module[1]
+                    num_b = bn_layer.bias.numel()  # Number of biases
+                    # Bias
+                    bn_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.bias)
+                    bn_layer.bias.data.copy_(bn_b)
+                    ptr += num_b
+                    # Weight
+                    bn_w = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.weight)
+                    bn_layer.weight.data.copy_(bn_w)
+                    ptr += num_b
+                    # Running Mean
+                    bn_rm = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_mean)
+                    bn_layer.running_mean.data.copy_(bn_rm)
+                    ptr += num_b
+                    # Running Var
+                    bn_rv = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_var)
+                    bn_layer.running_var.data.copy_(bn_rv)
+                    ptr += num_b
+                else:
+                    # Load conv. bias
+                    num_b = conv_layer.bias.numel()
+                    conv_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(conv_layer.bias)
+                    conv_layer.bias.data.copy_(conv_b)
+                    ptr += num_b
+                # Load conv. weights
+                num_w = conv_layer.weight.numel()
+                conv_w = torch.from_numpy(weights[ptr : ptr + num_w]).view_as(conv_layer.weight)
+                conv_layer.weight.data.copy_(conv_w)
+                ptr += num_w
+    """
+        @:param path    - path of the new weights file
+        @:param cutoff  - save layers between 0 and cutoff (cutoff = -1 -> all are saved)
+    """
+    def save_weights(self, path, cutoff=-1):
+        fp = open(path, "wb")
+        self.header_info[3] = self.seen
+        self.header_info.tofile(fp)
+        # Iterate through layers
+        for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
+            if module_def["type"] == "convolutional":
+                conv_layer = module[0]
+                # If batch norm, load bn first
+                if module_def["batch_normalize"]:
+                    bn_layer = module[1]
+                    bn_layer.bias.data.cpu().numpy().tofile(fp)
+                    bn_layer.weight.data.cpu().numpy().tofile(fp)
+                    bn_layer.running_mean.data.cpu().numpy().tofile(fp)
+                    bn_layer.running_var.data.cpu().numpy().tofile(fp)
+                # Load conv bias
+                else:
+                    conv_layer.bias.data.cpu().numpy().tofile(fp)
+                # Load conv weights
+                conv_layer.weight.data.cpu().numpy().tofile(fp)
+        fp.close()

object_tracker.py ADDED Viewed

	@@ -0,0 +1,110 @@

+from models import *
+from utils import *
+import os, sys, time, datetime, random
+import torch
+from torch.utils.data import DataLoader
+from torchvision import datasets, transforms
+from torch.autograd import Variable
+from PIL import Image
+# load weights and set defaults
+config_path='config/yolov3.cfg'
+weights_path='config/yolov3.weights'
+class_path='config/coco.names'
+img_size=416
+conf_thres=0.8
+nms_thres=0.4
+# load model and put into eval mode
+model = Darknet(config_path, img_size=img_size)
+model.load_weights(weights_path)
+model.cuda()
+model.eval()
+classes = utils.load_classes(class_path)
+Tensor = torch.cuda.FloatTensor
+def detect_image(img):
+    # scale and pad image
+    ratio = min(img_size/img.size[0], img_size/img.size[1])
+    imw = round(img.size[0] * ratio)
+    imh = round(img.size[1] * ratio)
+    img_transforms = transforms.Compose([ transforms.Resize((imh, imw)),
+         transforms.Pad((max(int((imh-imw)/2),0), max(int((imw-imh)/2),0), max(int((imh-imw)/2),0), max(int((imw-imh)/2),0)),
+                        (128,128,128)),
+         transforms.ToTensor(),
+         ])
+    # convert image to Tensor
+    image_tensor = img_transforms(img).float()
+    image_tensor = image_tensor.unsqueeze_(0)
+    input_img = Variable(image_tensor.type(Tensor))
+    # run inference on the model and get detections
+    with torch.no_grad():
+        detections = model(input_img)
+        detections = utils.non_max_suppression(detections, 80, conf_thres, nms_thres)
+    return detections[0]
+videopath = './videos/HorseRacing.mp4'
+import cv2
+from sort import *
+colors=[(255,0,0),(0,255,0),(0,0,255),(255,0,255),(128,0,0),(0,128,0),(0,0,128),(128,0,128),(128,128,0),(0,128,128)]
+vid = cv2.VideoCapture(videopath)
+mot_tracker = Sort()
+cv2.namedWindow('Stream',cv2.WINDOW_NORMAL)
+cv2.resizeWindow('Stream', (800,600))
+fourcc = cv2.VideoWriter_fourcc(*'XVID')
+ret,frame=vid.read()
+vw = frame.shape[1]
+vh = frame.shape[0]
+print ("Video size", vw,vh)
+outvideo = cv2.VideoWriter(videopath.replace(".mp4", "-det.mp4"),fourcc,20.0,(vw,vh))
+frames = 0
+starttime = time.time()
+while(True):
+    ret, frame = vid.read()
+    if not ret:
+        break
+    frames += 1
+    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+    pilimg = Image.fromarray(frame)
+    detections = detect_image(pilimg)
+    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
+    img = np.array(pilimg)
+    pad_x = max(img.shape[0] - img.shape[1], 0) * (img_size / max(img.shape))
+    pad_y = max(img.shape[1] - img.shape[0], 0) * (img_size / max(img.shape))
+    unpad_h = img_size - pad_y
+    unpad_w = img_size - pad_x
+    if detections is not None:
+        tracked_objects = mot_tracker.update(detections.cpu())
+        unique_labels = detections[:, -1].cpu().unique()
+        n_cls_preds = len(unique_labels)
+        for x1, y1, x2, y2, obj_id, cls_pred in tracked_objects:
+            box_h = int(((y2 - y1) / unpad_h) * img.shape[0])
+            box_w = int(((x2 - x1) / unpad_w) * img.shape[1])
+            y1 = int(((y1 - pad_y // 2) / unpad_h) * img.shape[0])
+            x1 = int(((x1 - pad_x // 2) / unpad_w) * img.shape[1])
+            color = colors[int(obj_id) % len(colors)]
+            cls = classes[int(cls_pred)]
+            cv2.rectangle(frame, (x1, y1), (x1+box_w, y1+box_h), color, 4)
+            cv2.rectangle(frame, (x1, y1-35), (x1+len(cls)*19+80, y1), color, -1)
+            cv2.putText(frame, cls + "-" + str(int(obj_id)), (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 3)
+    cv2.imshow('Stream', frame)
+    outvideo.write(frame)
+    ch = 0xFF & cv2.waitKey(1)
+    if ch == 27:
+        break
+totaltime = time.time()-starttime
+print(frames, "frames", totaltime/frames, "s/frame")
+cv2.destroyAllWindows()
+outvideo.release()

sort.py ADDED Viewed

	@@ -0,0 +1,305 @@

+"""
+    SORT: A Simple, Online and Realtime Tracker
+    Copyright (C) 2016 Alex Bewley alex@dynamicdetection.com
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+from __future__ import print_function
+from numba import jit
+import os.path
+import numpy as np
+##import matplotlib.pyplot as plt
+##import matplotlib.patches as patches
+from skimage import io
+# from sklearn.utils.linear_assignment_ import linear_assignment
+import glob
+import time
+import argparse
+from filterpy.kalman import KalmanFilter
+from scipy.optimize import linear_sum_assignment
+def linear_assignment(x):
+  indices = linear_sum_assignment(x)
+  indices = np.asarray(indices)
+  return np.transpose(indices)
+@jit
+def iou(bb_test,bb_gt):
+  """
+  Computes IUO between two bboxes in the form [x1,y1,x2,y2]
+  """
+  xx1 = np.maximum(bb_test[0], bb_gt[0])
+  yy1 = np.maximum(bb_test[1], bb_gt[1])
+  xx2 = np.minimum(bb_test[2], bb_gt[2])
+  yy2 = np.minimum(bb_test[3], bb_gt[3])
+  w = np.maximum(0., xx2 - xx1)
+  h = np.maximum(0., yy2 - yy1)
+  wh = w * h
+  o = wh / ((bb_test[2]-bb_test[0])*(bb_test[3]-bb_test[1])
+    + (bb_gt[2]-bb_gt[0])*(bb_gt[3]-bb_gt[1]) - wh)
+  return(o)
+def convert_bbox_to_z(bbox):
+  """
+  Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form
+    [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is
+    the aspect ratio
+  """
+  w = bbox[2]-bbox[0]
+  h = bbox[3]-bbox[1]
+  x = bbox[0]+w/2.
+  y = bbox[1]+h/2.
+  s = w*h    #scale is just area
+  r = w/float(h)
+  return np.array([x,y,s,r]).reshape((4,1))
+def convert_x_to_bbox(x,score=None):
+  """
+  Takes a bounding box in the centre form [x,y,s,r] and returns it in the form
+    [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right
+  """
+  w = np.sqrt(x[2]*x[3])
+  h = x[2]/w
+  if(score==None):
+    return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.]).reshape((1,4))
+  else:
+    return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.,score]).reshape((1,5))
+class KalmanBoxTracker(object):
+  """
+  This class represents the internel state of individual tracked objects observed as bbox.
+  """
+  count = 0
+  def __init__(self,bbox):
+    """
+    Initialises a tracker using initial bounding box.
+    """
+    #define constant velocity model
+    self.kf = KalmanFilter(dim_x=7, dim_z=4)
+    self.kf.F = np.array([[1,0,0,0,1,0,0],[0,1,0,0,0,1,0],[0,0,1,0,0,0,1],[0,0,0,1,0,0,0],  [0,0,0,0,1,0,0],[0,0,0,0,0,1,0],[0,0,0,0,0,0,1]])
+    self.kf.H = np.array([[1,0,0,0,0,0,0],[0,1,0,0,0,0,0],[0,0,1,0,0,0,0],[0,0,0,1,0,0,0]])
+    self.kf.R[2:,2:] *= 10.
+    self.kf.P[4:,4:] *= 1000. #give high uncertainty to the unobservable initial velocities
+    self.kf.P *= 10.
+    self.kf.Q[-1,-1] *= 0.01
+    self.kf.Q[4:,4:] *= 0.01
+    self.kf.x[:4] = convert_bbox_to_z(bbox)
+    self.time_since_update = 0
+    self.id = KalmanBoxTracker.count
+    KalmanBoxTracker.count += 1
+    self.history = []
+    self.hits = 0
+    self.hit_streak = 0
+    self.age = 0
+    self.objclass = bbox[6]
+  def update(self,bbox):
+    """
+    Updates the state vector with observed bbox.
+    """
+    self.time_since_update = 0
+    self.history = []
+    self.hits += 1
+    self.hit_streak += 1
+    self.kf.update(convert_bbox_to_z(bbox))
+  def predict(self):
+    """
+    Advances the state vector and returns the predicted bounding box estimate.
+    """
+    if((self.kf.x[6]+self.kf.x[2])<=0):
+      self.kf.x[6] *= 0.0
+    self.kf.predict()
+    self.age += 1
+    if(self.time_since_update>0):
+      self.hit_streak = 0
+    self.time_since_update += 1
+    self.history.append(convert_x_to_bbox(self.kf.x))
+    return self.history[-1]
+  def get_state(self):
+    """
+    Returns the current bounding box estimate.
+    """
+    return convert_x_to_bbox(self.kf.x)
+def associate_detections_to_trackers(detections,trackers,iou_threshold = 0.3):
+  """
+  Assigns detections to tracked object (both represented as bounding boxes)
+  Returns 3 lists of matches, unmatched_detections and unmatched_trackers
+  """
+  if(len(trackers)==0):
+    return np.empty((0,2),dtype=int), np.arange(len(detections)), np.empty((0,5),dtype=int)
+  iou_matrix = np.zeros((len(detections),len(trackers)),dtype=np.float32)
+  for d,det in enumerate(detections):
+    for t,trk in enumerate(trackers):
+      iou_matrix[d,t] = iou(det,trk)
+  matched_indices = linear_assignment(-iou_matrix)
+  unmatched_detections = []
+  for d,det in enumerate(detections):
+    if(d not in matched_indices[:,0]):
+      unmatched_detections.append(d)
+  unmatched_trackers = []
+  for t,trk in enumerate(trackers):
+    if(t not in matched_indices[:,1]):
+      unmatched_trackers.append(t)
+  #filter out matched with low IOU
+  matches = []
+  for m in matched_indices:
+    if(iou_matrix[m[0],m[1]]<iou_threshold):
+      unmatched_detections.append(m[0])
+      unmatched_trackers.append(m[1])
+    else:
+      matches.append(m.reshape(1,2))
+  if(len(matches)==0):
+    matches = np.empty((0,2),dtype=int)
+  else:
+    matches = np.concatenate(matches,axis=0)
+  return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
+class Sort(object):
+  def __init__(self,max_age=1,min_hits=3):
+    """
+    Sets key parameters for SORT
+    """
+    self.max_age = max_age
+    self.min_hits = min_hits
+    self.trackers = []
+    self.frame_count = 0
+  def update(self,dets):
+    """
+    Params:
+      dets - a numpy array of detections in the format [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...]
+    Requires: this method must be called once for each frame even with empty detections.
+    Returns the a similar array, where the last column is the object ID.
+    NOTE: The number of objects returned may differ from the number of detections provided.
+    """
+    self.frame_count += 1
+    #get predicted locations from existing trackers.
+    trks = np.zeros((len(self.trackers),5))
+    to_del = []
+    ret = []
+    for t,trk in enumerate(trks):
+      pos = self.trackers[t].predict()[0]
+      trk[:] = [pos[0], pos[1], pos[2], pos[3], 0]
+      if(np.any(np.isnan(pos))):
+        to_del.append(t)
+    trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
+    for t in reversed(to_del):
+      self.trackers.pop(t)
+    matched, unmatched_dets, unmatched_trks = associate_detections_to_trackers(dets,trks)
+    #update matched trackers with assigned detections
+    for t,trk in enumerate(self.trackers):
+      if(t not in unmatched_trks):
+        d = matched[np.where(matched[:,1]==t)[0],0]
+        trk.update(dets[d,:][0])
+    #create and initialise new trackers for unmatched detections
+    for i in unmatched_dets:
+        trk = KalmanBoxTracker(dets[i,:])
+        self.trackers.append(trk)
+    i = len(self.trackers)
+    for trk in reversed(self.trackers):
+        d = trk.get_state()[0]
+        if((trk.time_since_update < 1) and (trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits)):
+          ret.append(np.concatenate((d,[trk.id+1], [trk.objclass])).reshape(1,-1)) # +1 as MOT benchmark requires positive
+        i -= 1
+        #remove dead tracklet
+        if(trk.time_since_update > self.max_age):
+          self.trackers.pop(i)
+    if(len(ret)>0):
+      return np.concatenate(ret)
+    return np.empty((0,5))
+def parse_args():
+    """Parse input arguments."""
+    parser = argparse.ArgumentParser(description='SORT demo')
+    parser.add_argument('--display', dest='display', help='Display online tracker output (slow) [False]',action='store_true')
+    args = parser.parse_args()
+    return args
+if __name__ == '__main__':
+  # all train
+  sequences = ['PETS09-S2L1','TUD-Campus','TUD-Stadtmitte','ETH-Bahnhof','ETH-Sunnyday','ETH-Pedcross2','KITTI-13','KITTI-17','ADL-Rundle-6','ADL-Rundle-8','Venice-2']
+  args = parse_args()
+  display = args.display
+  phase = 'train'
+  total_time = 0.0
+  total_frames = 0
+  colours = np.random.rand(32,3) #used only for display
+  if(display):
+    if not os.path.exists('mot_benchmark'):
+      print('\n\tERROR: mot_benchmark link not found!\n\n    Create a symbolic link to the MOT benchmark\n    (https://motchallenge.net/data/2D_MOT_2015/#download). E.g.:\n\n    $ ln -s /path/to/MOT2015_challenge/2DMOT2015 mot_benchmark\n\n')
+      exit()
+    plt.ion()
+    fig = plt.figure()
+  if not os.path.exists('output'):
+    os.makedirs('output')
+  for seq in sequences:
+    mot_tracker = Sort() #create instance of the SORT tracker
+    seq_dets = np.loadtxt('data/%s/det.txt'%(seq),delimiter=',') #load detections
+    with open('output/%s.txt'%(seq),'w') as out_file:
+      print("Processing %s."%(seq))
+      for frame in range(int(seq_dets[:,0].max())):
+        frame += 1 #detection and frame numbers begin at 1
+        dets = seq_dets[seq_dets[:,0]==frame,2:7]
+        dets[:,2:4] += dets[:,0:2] #convert to [x1,y1,w,h] to [x1,y1,x2,y2]
+        total_frames += 1
+        if(display):
+          ax1 = fig.add_subplot(111, aspect='equal')
+          fn = 'mot_benchmark/%s/%s/img1/%06d.jpg'%(phase,seq,frame)
+          im =io.imread(fn)
+          ax1.imshow(im)
+          plt.title(seq+' Tracked Targets')
+        start_time = time.time()
+        trackers = mot_tracker.update(dets)
+        cycle_time = time.time() - start_time
+        total_time += cycle_time
+        for d in trackers:
+          print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1'%(frame,d[4],d[0],d[1],d[2]-d[0],d[3]-d[1]),file=out_file)
+          if(display):
+            d = d.astype(np.int32)
+            ax1.add_patch(patches.Rectangle((d[0],d[1]),d[2]-d[0],d[3]-d[1],fill=False,lw=3,ec=colours[d[4]%32,:]))
+            ax1.set_adjustable('box-forced')
+        if(display):
+          fig.canvas.flush_events()
+          plt.draw()
+          ax1.cla()
+  print("Total Tracking took: %.3f for %d frames or %.1f FPS"%(total_time,total_frames,total_frames/total_time))
+  if(display):
+    print("Note: to get real runtime results run without the option: --display")

sort.py.old ADDED Viewed

	@@ -0,0 +1,317 @@

+"""
+    SORT: A Simple, Online and Realtime Tracker
+    Copyright (C) 2016 Alex Bewley alex@dynamicdetection.com
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+from __future__ import print_function
+from numba import jit
+import os.path
+import numpy as np
+##import matplotlib.pyplot as plt
+##import matplotlib.patches as patches
+from skimage import io
+# from scipy.optimize import linear_sum_assignment as linear_assignment
+import glob
+import time
+import argparse
+from filterpy.kalman import KalmanFilter
+# -------- Fixes the Warning ----------------------
+# def linear_assignment(cost_matrix):
+#   try:
+#     import lap
+#     _, x, y = lap.lapjv(cost_matrix, extend_cost=True)
+#     return np.array([[y[i], i] for i in x if i >= 0])
+#   except ImportError:
+#     from scipy.optimize import linear_sum_assignment
+#     x, y = linear_sum_assignment(cost_matrix)
+#     return np.array(list(zip(x, y)))
+# --------------- Fixes the Error
+from scipy.optimize import linear_sum_assignment
+def linear_assignment(x):
+  indices = linear_sum_assignment(x)
+  indices = np.asarray(indices)
+  return np.transpose(indices)
+@jit
+def iou(bb_test,bb_gt):
+  """
+  Computes IUO between two bboxes in the form [x1,y1,x2,y2]
+  """
+  xx1 = np.maximum(bb_test[0], bb_gt[0])
+  yy1 = np.maximum(bb_test[1], bb_gt[1])
+  xx2 = np.minimum(bb_test[2], bb_gt[2])
+  yy2 = np.minimum(bb_test[3], bb_gt[3])
+  w = np.maximum(0., xx2 - xx1)
+  h = np.maximum(0., yy2 - yy1)
+  wh = w * h
+  o = wh / ((bb_test[2]-bb_test[0])*(bb_test[3]-bb_test[1])
+    + (bb_gt[2]-bb_gt[0])*(bb_gt[3]-bb_gt[1]) - wh)
+  return(o)
+def convert_bbox_to_z(bbox):
+  """
+  Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form
+    [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is
+    the aspect ratio
+  """
+  w = bbox[2]-bbox[0]
+  h = bbox[3]-bbox[1]
+  x = bbox[0]+w/2.
+  y = bbox[1]+h/2.
+  s = w*h    #scale is just area
+  r = w/float(h)
+  return np.array([x,y,s,r]).reshape((4,1))
+def convert_x_to_bbox(x,score=None):
+  """
+  Takes a bounding box in the centre form [x,y,s,r] and returns it in the form
+    [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right
+  """
+  w = np.sqrt(x[2]*x[3])
+  h = x[2]/w
+  if(score==None):
+    return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.]).reshape((1,4))
+  else:
+    return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.,score]).reshape((1,5))
+class KalmanBoxTracker(object):
+  """
+  This class represents the internel state of individual tracked objects observed as bbox.
+  """
+  count = 0
+  def __init__(self,bbox):
+    """
+    Initialises a tracker using initial bounding box.
+    """
+    #define constant velocity model
+    self.kf = KalmanFilter(dim_x=7, dim_z=4)
+    self.kf.F = np.array([[1,0,0,0,1,0,0],[0,1,0,0,0,1,0],[0,0,1,0,0,0,1],[0,0,0,1,0,0,0],  [0,0,0,0,1,0,0],[0,0,0,0,0,1,0],[0,0,0,0,0,0,1]])
+    self.kf.H = np.array([[1,0,0,0,0,0,0],[0,1,0,0,0,0,0],[0,0,1,0,0,0,0],[0,0,0,1,0,0,0]])
+    self.kf.R[2:,2:] *= 10.
+    self.kf.P[4:,4:] *= 1000. #give high uncertainty to the unobservable initial velocities
+    self.kf.P *= 10.
+    self.kf.Q[-1,-1] *= 0.01
+    self.kf.Q[4:,4:] *= 0.01
+    self.kf.x[:4] = convert_bbox_to_z(bbox)
+    self.time_since_update = 0
+    self.id = KalmanBoxTracker.count
+    KalmanBoxTracker.count += 1
+    self.history = []
+    self.hits = 0
+    self.hit_streak = 0
+    self.age = 0
+    self.objclass = bbox[6]
+  def update(self,bbox):
+    """
+    Updates the state vector with observed bbox.
+    """
+    self.time_since_update = 0
+    self.history = []
+    self.hits += 1
+    self.hit_streak += 1
+    self.kf.update(convert_bbox_to_z(bbox))
+  def predict(self):
+    """
+    Advances the state vector and returns the predicted bounding box estimate.
+    """
+    if((self.kf.x[6]+self.kf.x[2])<=0):
+      self.kf.x[6] *= 0.0
+    self.kf.predict()
+    self.age += 1
+    if(self.time_since_update>0):
+      self.hit_streak = 0
+    self.time_since_update += 1
+    self.history.append(convert_x_to_bbox(self.kf.x))
+    return self.history[-1]
+  def get_state(self):
+    """
+    Returns the current bounding box estimate.
+    """
+    return convert_x_to_bbox(self.kf.x)
+def associate_detections_to_trackers(detections,trackers,iou_threshold = 0.3):
+  """
+  Assigns detections to tracked object (both represented as bounding boxes)
+  Returns 3 lists of matches, unmatched_detections and unmatched_trackers
+  """
+  if(len(trackers)==0):
+    return np.empty((0,2),dtype=int), np.arange(len(detections)), np.empty((0,5),dtype=int)
+  iou_matrix = np.zeros((len(detections),len(trackers)),dtype=np.float32)
+  for d,det in enumerate(detections):
+    for t,trk in enumerate(trackers):
+      iou_matrix[d,t] = iou(det,trk)
+  matched_indices = linear_assignment(-iou_matrix)
+  unmatched_detections = []
+  for d,det in enumerate(detections):
+    if(d not in matched_indices[:,0]):
+      unmatched_detections.append(d)
+  unmatched_trackers = []
+  for t,trk in enumerate(trackers):
+    if(t not in matched_indices[:,1]):
+      unmatched_trackers.append(t)
+  #filter out matched with low IOU
+  matches = []
+  for m in matched_indices:
+    if(iou_matrix[m[0],m[1]]<iou_threshold):
+      unmatched_detections.append(m[0])
+      unmatched_trackers.append(m[1])
+    else:
+      matches.append(m.reshape(1,2))
+  if(len(matches)==0):
+    matches = np.empty((0,2),dtype=int)
+  else:
+    matches = np.concatenate(matches,axis=0)
+  return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
+class Sort(object):
+  def __init__(self,max_age=1,min_hits=3):
+    """
+    Sets key parameters for SORT
+    """
+    self.max_age = max_age
+    self.min_hits = min_hits
+    self.trackers = []
+    self.frame_count = 0
+  def update(self,dets):
+    """
+    Params:
+      dets - a numpy array of detections in the format [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...]
+    Requires: this method must be called once for each frame even with empty detections.
+    Returns the a similar array, where the last column is the object ID.
+    NOTE: The number of objects returned may differ from the number of detections provided.
+    """
+    self.frame_count += 1
+    #get predicted locations from existing trackers.
+    trks = np.zeros((len(self.trackers),5))
+    to_del = []
+    ret = []
+    for t,trk in enumerate(trks):
+      pos = self.trackers[t].predict()[0]
+      trk[:] = [pos[0], pos[1], pos[2], pos[3], 0]
+      if(np.any(np.isnan(pos))):
+        to_del.append(t)
+    trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
+    for t in reversed(to_del):
+      self.trackers.pop(t)
+    matched, unmatched_dets, unmatched_trks = associate_detections_to_trackers(dets,trks)
+    #update matched trackers with assigned detections
+    for t,trk in enumerate(self.trackers):
+      if(t not in unmatched_trks):
+        d = matched[np.where(matched[:,1]==t)[0],0]
+        trk.update(dets[d,:][0])
+    #create and initialise new trackers for unmatched detections
+    for i in unmatched_dets:
+        trk = KalmanBoxTracker(dets[i,:])
+        self.trackers.append(trk)
+    i = len(self.trackers)
+    for trk in reversed(self.trackers):
+        d = trk.get_state()[0]
+        if((trk.time_since_update < 1) and (trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits)):
+          ret.append(np.concatenate((d,[trk.id+1], [trk.objclass])).reshape(1,-1)) # +1 as MOT benchmark requires positive
+        i -= 1
+        #remove dead tracklet
+        if(trk.time_since_update > self.max_age):
+          self.trackers.pop(i)
+    if(len(ret)>0):
+      return np.concatenate(ret)
+    return np.empty((0,5))
+def parse_args():
+    """Parse input arguments."""
+    parser = argparse.ArgumentParser(description='SORT demo')
+    parser.add_argument('--display', dest='display', help='Display online tracker output (slow) [False]',action='store_true')
+    args = parser.parse_args()
+    return args
+if __name__ == '__main__':
+  # all train
+  sequences = ['PETS09-S2L1','TUD-Campus','TUD-Stadtmitte','ETH-Bahnhof','ETH-Sunnyday','ETH-Pedcross2','KITTI-13','KITTI-17','ADL-Rundle-6','ADL-Rundle-8','Venice-2']
+  args = parse_args()
+  display = args.display
+  phase = 'train'
+  total_time = 0.0
+  total_frames = 0
+  colours = np.random.rand(32,3) #used only for display
+  if(display):
+    if not os.path.exists('mot_benchmark'):
+      print('\n\tERROR: mot_benchmark link not found!\n\n    Create a symbolic link to the MOT benchmark\n    (https://motchallenge.net/data/2D_MOT_2015/#download). E.g.:\n\n    $ ln -s /path/to/MOT2015_challenge/2DMOT2015 mot_benchmark\n\n')
+      exit()
+    plt.ion()
+    fig = plt.figure()
+  if not os.path.exists('output'):
+    os.makedirs('output')
+  for seq in sequences:
+    mot_tracker = Sort() #create instance of the SORT tracker
+    seq_dets = np.loadtxt('data/%s/det.txt'%(seq),delimiter=',') #load detections
+    with open('output/%s.txt'%(seq),'w') as out_file:
+      print("Processing %s."%(seq))
+      for frame in range(int(seq_dets[:,0].max())):
+        frame += 1 #detection and frame numbers begin at 1
+        dets = seq_dets[seq_dets[:,0]==frame,2:7]
+        dets[:,2:4] += dets[:,0:2] #convert to [x1,y1,w,h] to [x1,y1,x2,y2]
+        total_frames += 1
+        if(display):
+          ax1 = fig.add_subplot(111, aspect='equal')
+          fn = 'mot_benchmark/%s/%s/img1/%06d.jpg'%(phase,seq,frame)
+          im =io.imread(fn)
+          ax1.imshow(im)
+          plt.title(seq+' Tracked Targets')
+        start_time = time.time()
+        trackers = mot_tracker.update(dets)
+        cycle_time = time.time() - start_time
+        total_time += cycle_time
+        for d in trackers:
+          print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1'%(frame,d[4],d[0],d[1],d[2]-d[0],d[3]-d[1]),file=out_file)
+          if(display):
+            d = d.astype(np.int32)
+            ax1.add_patch(patches.Rectangle((d[0],d[1]),d[2]-d[0],d[3]-d[1],fill=False,lw=3,ec=colours[d[4]%32,:]))
+            ax1.set_adjustable('box-forced')
+        if(display):
+          fig.canvas.flush_events()
+          plt.draw()
+          ax1.cla()
+  print("Total Tracking took: %.3f for %d frames or %.1f FPS"%(total_time,total_frames,total_frames/total_time))
+  if(display):
+    print("Note: to get real runtime results run without the option: --display")

utils/__pycache__/__init__.cpython-36.pyc ADDED Viewed

Binary file (125 Bytes). View file

utils/__pycache__/datasets.cpython-36.pyc ADDED Viewed

Binary file (3.65 kB). View file

utils/__pycache__/parse_config.cpython-36.pyc ADDED Viewed

Binary file (1.38 kB). View file

utils/__pycache__/parse_config.cpython-37.pyc ADDED Viewed

Binary file (1.43 kB). View file

utils/__pycache__/utils.cpython-36.pyc ADDED Viewed

Binary file (7.05 kB). View file

utils/__pycache__/utils.cpython-37.pyc ADDED Viewed

Binary file (7.1 kB). View file

utils/datasets.py ADDED Viewed

	@@ -0,0 +1,121 @@

+import glob
+import random
+import os
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+from PIL import Image
+import torchvision.transforms as transforms
+##import matplotlib.pyplot as plt
+##import matplotlib.patches as patches
+from skimage.transform import resize
+import sys
+class ImageFolder(Dataset):
+    def __init__(self, folder_path, img_size=416):
+        self.files = sorted(glob.glob('%s/*.*' % folder_path))
+        self.img_shape = (img_size, img_size)
+    def __getitem__(self, index):
+        img_path = self.files[index % len(self.files)]
+        # Extract image
+        img = np.array(Image.open(img_path))
+        h, w, _ = img.shape
+        dim_diff = np.abs(h - w)
+        # Upper (left) and lower (right) padding
+        pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
+        # Determine padding
+        pad = ((pad1, pad2), (0, 0), (0, 0)) if h <= w else ((0, 0), (pad1, pad2), (0, 0))
+        # Add padding
+        input_img = np.pad(img, pad, 'constant', constant_values=127.5) / 255.
+        # Resize and normalize
+        input_img = resize(input_img, (*self.img_shape, 3), mode='reflect')
+        # Channels-first
+        input_img = np.transpose(input_img, (2, 0, 1))
+        # As pytorch tensor
+        input_img = torch.from_numpy(input_img).float()
+        return img_path, input_img
+    def __len__(self):
+        return len(self.files)
+class ListDataset(Dataset):
+    def __init__(self, list_path, img_size=416):
+        with open(list_path, 'r') as file:
+            self.img_files = file.readlines()
+        self.label_files = [path.replace('images', 'labels').replace('.png', '.txt').replace('.jpg', '.txt') for path in self.img_files]
+        self.img_shape = (img_size, img_size)
+        self.max_objects = 50
+    def __getitem__(self, index):
+        #---------
+        #  Image
+        #---------
+        img_path = self.img_files[index % len(self.img_files)].rstrip()
+        img = np.array(Image.open(img_path))
+        # Handles images with less than three channels
+        while len(img.shape) != 3:
+            index += 1
+            img_path = self.img_files[index % len(self.img_files)].rstrip()
+            img = np.array(Image.open(img_path))
+        h, w, _ = img.shape
+        dim_diff = np.abs(h - w)
+        # Upper (left) and lower (right) padding
+        pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
+        # Determine padding
+        pad = ((pad1, pad2), (0, 0), (0, 0)) if h <= w else ((0, 0), (pad1, pad2), (0, 0))
+        # Add padding
+        input_img = np.pad(img, pad, 'constant', constant_values=128) / 255.
+        padded_h, padded_w, _ = input_img.shape
+        # Resize and normalize
+        input_img = resize(input_img, (*self.img_shape, 3), mode='reflect')
+        # Channels-first
+        input_img = np.transpose(input_img, (2, 0, 1))
+        # As pytorch tensor
+        input_img = torch.from_numpy(input_img).float()
+        #---------
+        #  Label
+        #---------
+        label_path = self.label_files[index % len(self.img_files)].rstrip()
+        labels = None
+        if os.path.exists(label_path):
+            labels = np.loadtxt(label_path).reshape(-1, 5)
+            # Extract coordinates for unpadded + unscaled image
+            x1 = w * (labels[:, 1] - labels[:, 3]/2)
+            y1 = h * (labels[:, 2] - labels[:, 4]/2)
+            x2 = w * (labels[:, 1] + labels[:, 3]/2)
+            y2 = h * (labels[:, 2] + labels[:, 4]/2)
+            # Adjust for added padding
+            x1 += pad[1][0]
+            y1 += pad[0][0]
+            x2 += pad[1][0]
+            y2 += pad[0][0]
+            # Calculate ratios from coordinates
+            labels[:, 1] = ((x1 + x2) / 2) / padded_w
+            labels[:, 2] = ((y1 + y2) / 2) / padded_h
+            labels[:, 3] *= w / padded_w
+            labels[:, 4] *= h / padded_h
+        # Fill matrix
+        filled_labels = np.zeros((self.max_objects, 5))
+        if labels is not None:
+            filled_labels[range(len(labels))[:self.max_objects]] = labels[:self.max_objects]
+        filled_labels = torch.from_numpy(filled_labels)
+        return img_path, input_img, filled_labels
+    def __len__(self):
+        return len(self.img_files)

utils/parse_config.py ADDED Viewed

	@@ -0,0 +1,36 @@

+def parse_model_config(path):
+    """Parses the yolo-v3 layer configuration file and returns module definitions"""
+    file = open(path, 'r')
+    lines = file.read().split('\n')
+    lines = [x for x in lines if x and not x.startswith('#')]
+    lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
+    module_defs = []
+    for line in lines:
+        if line.startswith('['): # This marks the start of a new block
+            module_defs.append({})
+            module_defs[-1]['type'] = line[1:-1].rstrip()
+            if module_defs[-1]['type'] == 'convolutional':
+                module_defs[-1]['batch_normalize'] = 0
+        else:
+            key, value = line.split("=")
+            value = value.strip()
+            module_defs[-1][key.rstrip()] = value.strip()
+    return module_defs
+def parse_data_config(path):
+    """Parses the data configuration file"""
+    options = dict()
+    options['gpus'] = '0,1,2,3'
+    options['num_workers'] = '10'
+    with open(path, 'r') as fp:
+        lines = fp.readlines()
+    for line in lines:
+        line = line.strip()
+        if line == '' or line.startswith('#'):
+            continue
+        key, value = line.split('=')
+        options[key.strip()] = value.strip()
+    return options

utils/utils.py ADDED Viewed

	@@ -0,0 +1,258 @@

+from __future__ import division
+import math
+import time
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Variable
+import numpy as np
+#import matplotlib.pyplot as plt
+#import matplotlib.patches as patches
+def load_classes(path):
+    """
+    Loads class labels at 'path'
+    """
+    fp = open(path, "r")
+    names = fp.read().split("\n")[:-1]
+    return names
+def weights_init_normal(m):
+    classname = m.__class__.__name__
+    if classname.find("Conv") != -1:
+        torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
+    elif classname.find("BatchNorm2d") != -1:
+        torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
+        torch.nn.init.constant_(m.bias.data, 0.0)
+def compute_ap(recall, precision):
+    """ Compute the average precision, given the recall and precision curves.
+    Code originally from https://github.com/rbgirshick/py-faster-rcnn.
+    # Arguments
+        recall:    The recall curve (list).
+        precision: The precision curve (list).
+    # Returns
+        The average precision as computed in py-faster-rcnn.
+    """
+    # correct AP calculation
+    # first append sentinel values at the end
+    mrec = np.concatenate(([0.0], recall, [1.0]))
+    mpre = np.concatenate(([0.0], precision, [0.0]))
+    # compute the precision envelope
+    for i in range(mpre.size - 1, 0, -1):
+        mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
+    # to calculate area under PR curve, look for points
+    # where X axis (recall) changes value
+    i = np.where(mrec[1:] != mrec[:-1])[0]
+    # and sum (\Delta recall) * prec
+    ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
+    return ap
+def bbox_iou(box1, box2, x1y1x2y2=True):
+    """
+    Returns the IoU of two bounding boxes
+    """
+    if not x1y1x2y2:
+        # Transform from center and width to exact coordinates
+        b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
+        b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
+        b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
+        b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
+    else:
+        # Get the coordinates of bounding boxes
+        b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
+        b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
+    # get the corrdinates of the intersection rectangle
+    inter_rect_x1 = torch.max(b1_x1, b2_x1)
+    inter_rect_y1 = torch.max(b1_y1, b2_y1)
+    inter_rect_x2 = torch.min(b1_x2, b2_x2)
+    inter_rect_y2 = torch.min(b1_y2, b2_y2)
+    # Intersection area
+    inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(
+        inter_rect_y2 - inter_rect_y1 + 1, min=0
+    )
+    # Union Area
+    b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
+    b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
+    iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
+    return iou
+def bbox_iou_numpy(box1, box2):
+    """Computes IoU between bounding boxes.
+    Parameters
+    ----------
+    box1 : ndarray
+        (N, 4) shaped array with bboxes
+    box2 : ndarray
+        (M, 4) shaped array with bboxes
+    Returns
+    -------
+    : ndarray
+        (N, M) shaped array with IoUs
+    """
+    area = (box2[:, 2] - box2[:, 0]) * (box2[:, 3] - box2[:, 1])
+    iw = np.minimum(np.expand_dims(box1[:, 2], axis=1), box2[:, 2]) - np.maximum(
+        np.expand_dims(box1[:, 0], 1), box2[:, 0]
+    )
+    ih = np.minimum(np.expand_dims(box1[:, 3], axis=1), box2[:, 3]) - np.maximum(
+        np.expand_dims(box1[:, 1], 1), box2[:, 1]
+    )
+    iw = np.maximum(iw, 0)
+    ih = np.maximum(ih, 0)
+    ua = np.expand_dims((box1[:, 2] - box1[:, 0]) * (box1[:, 3] - box1[:, 1]), axis=1) + area - iw * ih
+    ua = np.maximum(ua, np.finfo(float).eps)
+    intersection = iw * ih
+    return intersection / ua
+def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
+    """
+    Removes detections with lower object confidence score than 'conf_thres' and performs
+    Non-Maximum Suppression to further filter detections.
+    Returns detections with shape:
+        (x1, y1, x2, y2, object_conf, class_score, class_pred)
+    """
+    # From (center x, center y, width, height) to (x1, y1, x2, y2)
+    box_corner = prediction.new(prediction.shape)
+    box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
+    box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
+    box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
+    box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
+    prediction[:, :, :4] = box_corner[:, :, :4]
+    output = [None for _ in range(len(prediction))]
+    for image_i, image_pred in enumerate(prediction):
+        # Filter out confidence scores below threshold
+        conf_mask = (image_pred[:, 4] >= conf_thres).squeeze()
+        image_pred = image_pred[conf_mask]
+        # If none are remaining => process next image
+        if not image_pred.size(0):
+            continue
+        # Get score and class with highest confidence
+        class_conf, class_pred = torch.max(image_pred[:, 5 : 5 + num_classes], 1, keepdim=True)
+        # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred)
+        detections = torch.cat((image_pred[:, :5], class_conf.float(), class_pred.float()), 1)
+        # Iterate through all predicted classes
+        unique_labels = detections[:, -1].cpu().unique()
+        if prediction.is_cuda:
+            unique_labels = unique_labels.cuda()
+        for c in unique_labels:
+            # Get the detections with the particular class
+            detections_class = detections[detections[:, -1] == c]
+            # Sort the detections by maximum objectness confidence
+            _, conf_sort_index = torch.sort(detections_class[:, 4], descending=True)
+            detections_class = detections_class[conf_sort_index]
+            # Perform non-maximum suppression
+            max_detections = []
+            while detections_class.size(0):
+                # Get detection with highest confidence and save as max detection
+                max_detections.append(detections_class[0].unsqueeze(0))
+                # Stop if we're at the last detection
+                if len(detections_class) == 1:
+                    break
+                # Get the IOUs for all boxes with lower confidence
+                ious = bbox_iou(max_detections[-1], detections_class[1:])
+                # Remove detections with IoU >= NMS threshold
+                detections_class = detections_class[1:][ious < nms_thres]
+            max_detections = torch.cat(max_detections).data
+            # Add max detections to outputs
+            output[image_i] = (
+                max_detections if output[image_i] is None else torch.cat((output[image_i], max_detections))
+            )
+    return output
+def build_targets(
+    pred_boxes, pred_conf, pred_cls, target, anchors, num_anchors, num_classes, grid_size, ignore_thres, img_dim
+):
+    nB = target.size(0)
+    nA = num_anchors
+    nC = num_classes
+    nG = grid_size
+    mask = torch.zeros(nB, nA, nG, nG)
+    conf_mask = torch.ones(nB, nA, nG, nG)
+    tx = torch.zeros(nB, nA, nG, nG)
+    ty = torch.zeros(nB, nA, nG, nG)
+    tw = torch.zeros(nB, nA, nG, nG)
+    th = torch.zeros(nB, nA, nG, nG)
+    tconf = torch.ByteTensor(nB, nA, nG, nG).fill_(0)
+    tcls = torch.ByteTensor(nB, nA, nG, nG, nC).fill_(0)
+    nGT = 0
+    nCorrect = 0
+    for b in range(nB):
+        for t in range(target.shape[1]):
+            if target[b, t].sum() == 0:
+                continue
+            nGT += 1
+            # Convert to position relative to box
+            gx = target[b, t, 1] * nG
+            gy = target[b, t, 2] * nG
+            gw = target[b, t, 3] * nG
+            gh = target[b, t, 4] * nG
+            # Get grid box indices
+            gi = int(gx)
+            gj = int(gy)
+            # Get shape of gt box
+            gt_box = torch.FloatTensor(np.array([0, 0, gw, gh])).unsqueeze(0)
+            # Get shape of anchor box
+            anchor_shapes = torch.FloatTensor(np.concatenate((np.zeros((len(anchors), 2)), np.array(anchors)), 1))
+            # Calculate iou between gt and anchor shapes
+            anch_ious = bbox_iou(gt_box, anchor_shapes)
+            # Where the overlap is larger than threshold set mask to zero (ignore)
+            conf_mask[b, anch_ious > ignore_thres, gj, gi] = 0
+            # Find the best matching anchor box
+            best_n = np.argmax(anch_ious)
+            # Get ground truth box
+            gt_box = torch.FloatTensor(np.array([gx, gy, gw, gh])).unsqueeze(0)
+            # Get the best prediction
+            pred_box = pred_boxes[b, best_n, gj, gi].unsqueeze(0)
+            # Masks
+            mask[b, best_n, gj, gi] = 1
+            conf_mask[b, best_n, gj, gi] = 1
+            # Coordinates
+            tx[b, best_n, gj, gi] = gx - gi
+            ty[b, best_n, gj, gi] = gy - gj
+            # Width and height
+            tw[b, best_n, gj, gi] = math.log(gw / anchors[best_n][0] + 1e-16)
+            th[b, best_n, gj, gi] = math.log(gh / anchors[best_n][1] + 1e-16)
+            # One-hot encoding of label
+            target_label = int(target[b, t, 0])
+            tcls[b, best_n, gj, gi, target_label] = 1
+            tconf[b, best_n, gj, gi] = 1
+            # Calculate iou between ground truth and best matching prediction
+            iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False)
+            pred_label = torch.argmax(pred_cls[b, best_n, gj, gi])
+            score = pred_conf[b, best_n, gj, gi]
+            if iou > 0.5 and pred_label == target_label and score > 0.5:
+                nCorrect += 1
+    return nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls
+def to_categorical(y, num_classes):
+    """ 1-hot encodes a tensor """
+    return torch.from_numpy(np.eye(num_classes, dtype="uint8")[y])