danieladejumo
commited on
Commit
•
1ba06ec
1
Parent(s):
062916a
Files Commit
Browse files- .gitignore +3 -0
- Jupyternote Cheatsheet.ipynb +1 -0
- PyTorch_Object_Detection.ipynb +0 -0
- PyTorch_Object_Tracking.ipynb +1 -0
- __pycache__/models.cpython-37.pyc +0 -0
- __pycache__/sort.cpython-37.pyc +0 -0
- darknet-coco-object_detection.ipynb +0 -0
- models.py +350 -0
- object_tracker.py +110 -0
- sort.py +305 -0
- sort.py.old +317 -0
- utils/__pycache__/__init__.cpython-36.pyc +0 -0
- utils/__pycache__/datasets.cpython-36.pyc +0 -0
- utils/__pycache__/parse_config.cpython-36.pyc +0 -0
- utils/__pycache__/parse_config.cpython-37.pyc +0 -0
- utils/__pycache__/utils.cpython-36.pyc +0 -0
- utils/__pycache__/utils.cpython-37.pyc +0 -0
- utils/datasets.py +121 -0
- utils/parse_config.py +36 -0
- utils/utils.py +258 -0
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
images/*
|
2 |
+
videos/*
|
3 |
+
config/*
|
Jupyternote Cheatsheet.ipynb
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Jupyternote Cheatsheet.ipynb","provenance":[],"mount_file_id":"1rMSETYdooFC6fVgT0PaOovnBrB4ZWoys","authorship_tag":"ABX9TyN4O59ZYPVT0rGiUB3bfznT"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["# Models"],"metadata":{"id":"ODx9TIOB4tCe"}},{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"BelRHeLw4qyQ","executionInfo":{"status":"ok","timestamp":1654537166220,"user_tz":-60,"elapsed":22,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"60695f20-3957-4958-aabd-c2ecff870977"},"outputs":[{"output_type":"stream","name":"stdout","text":["Writing models.py\n"]}],"source":["%%writefile models.py\n","from __future__ import division\n","\n","import torch\n","import torch.nn as nn\n","import torch.nn.functional as F\n","from torch.autograd import Variable\n","import numpy as np\n","\n","from PIL import Image\n","\n","from utils.parse_config import *\n","from utils.utils import build_targets\n","from collections import defaultdict\n","\n","##import matplotlib.pyplot as plt\n","##import matplotlib.patches as patches\n","\n","\n","def create_modules(module_defs):\n"," \"\"\"\n"," Constructs module list of layer blocks from module configuration in module_defs\n"," \"\"\"\n"," hyperparams = module_defs.pop(0)\n"," output_filters = [int(hyperparams[\"channels\"])]\n"," module_list = nn.ModuleList()\n"," for i, module_def in enumerate(module_defs):\n"," modules = nn.Sequential()\n","\n"," if module_def[\"type\"] == \"convolutional\":\n"," bn = int(module_def[\"batch_normalize\"])\n"," filters = int(module_def[\"filters\"])\n"," kernel_size = int(module_def[\"size\"])\n"," pad = (kernel_size - 1) // 2 if int(module_def[\"pad\"]) else 0\n"," modules.add_module(\n"," \"conv_%d\" % i,\n"," nn.Conv2d(\n"," in_channels=output_filters[-1],\n"," out_channels=filters,\n"," kernel_size=kernel_size,\n"," stride=int(module_def[\"stride\"]),\n"," padding=pad,\n"," bias=not bn,\n"," ),\n"," )\n"," if bn:\n"," modules.add_module(\"batch_norm_%d\" % i, nn.BatchNorm2d(filters))\n"," if module_def[\"activation\"] == \"leaky\":\n"," modules.add_module(\"leaky_%d\" % i, nn.LeakyReLU(0.1))\n","\n"," elif module_def[\"type\"] == \"maxpool\":\n"," kernel_size = int(module_def[\"size\"])\n"," stride = int(module_def[\"stride\"])\n"," if kernel_size == 2 and stride == 1:\n"," padding = nn.ZeroPad2d((0, 1, 0, 1))\n"," modules.add_module(\"_debug_padding_%d\" % i, padding)\n"," maxpool = nn.MaxPool2d(\n"," kernel_size=int(module_def[\"size\"]),\n"," stride=int(module_def[\"stride\"]),\n"," padding=int((kernel_size - 1) // 2),\n"," )\n"," modules.add_module(\"maxpool_%d\" % i, maxpool)\n","\n"," elif module_def[\"type\"] == \"upsample\":\n"," upsample = nn.Upsample(scale_factor=int(module_def[\"stride\"]), mode=\"nearest\")\n"," modules.add_module(\"upsample_%d\" % i, upsample)\n","\n"," elif module_def[\"type\"] == \"route\":\n"," layers = [int(x) for x in module_def[\"layers\"].split(\",\")]\n"," filters = sum([output_filters[layer_i] for layer_i in layers])\n"," modules.add_module(\"route_%d\" % i, EmptyLayer())\n","\n"," elif module_def[\"type\"] == \"shortcut\":\n"," filters = output_filters[int(module_def[\"from\"])]\n"," modules.add_module(\"shortcut_%d\" % i, EmptyLayer())\n","\n"," elif module_def[\"type\"] == \"yolo\":\n"," anchor_idxs = [int(x) for x in module_def[\"mask\"].split(\",\")]\n"," # Extract anchors\n"," anchors = [int(x) for x in module_def[\"anchors\"].split(\",\")]\n"," anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]\n"," anchors = [anchors[i] for i in anchor_idxs]\n"," num_classes = int(module_def[\"classes\"])\n"," img_height = int(hyperparams[\"height\"])\n"," # Define detection layer\n"," yolo_layer = YOLOLayer(anchors, num_classes, img_height)\n"," modules.add_module(\"yolo_%d\" % i, yolo_layer)\n"," # Register module list and number of output filters\n"," module_list.append(modules)\n"," output_filters.append(filters)\n","\n"," return hyperparams, module_list\n","\n","\n","class EmptyLayer(nn.Module):\n"," \"\"\"Placeholder for 'route' and 'shortcut' layers\"\"\"\n","\n"," def __init__(self):\n"," super(EmptyLayer, self).__init__()\n","\n","\n","class YOLOLayer(nn.Module):\n"," \"\"\"Detection layer\"\"\"\n","\n"," def __init__(self, anchors, num_classes, img_dim):\n"," super(YOLOLayer, self).__init__()\n"," self.anchors = anchors\n"," self.num_anchors = len(anchors)\n"," self.num_classes = num_classes\n"," self.bbox_attrs = 5 + num_classes\n"," self.image_dim = img_dim\n"," self.ignore_thres = 0.5\n"," self.lambda_coord = 1\n","\n"," self.mse_loss = nn.MSELoss(size_average=True) # Coordinate loss\n"," self.bce_loss = nn.BCELoss(size_average=True) # Confidence loss\n"," self.ce_loss = nn.CrossEntropyLoss() # Class loss\n","\n"," def forward(self, x, targets=None):\n"," nA = self.num_anchors\n"," nB = x.size(0)\n"," nG = x.size(2)\n"," stride = self.image_dim / nG\n","\n"," # Tensors for cuda support\n"," FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor\n"," LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor\n"," ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor\n","\n"," prediction = x.view(nB, nA, self.bbox_attrs, nG, nG).permute(0, 1, 3, 4, 2).contiguous()\n","\n"," # Get outputs\n"," x = torch.sigmoid(prediction[..., 0]) # Center x\n"," y = torch.sigmoid(prediction[..., 1]) # Center y\n"," w = prediction[..., 2] # Width\n"," h = prediction[..., 3] # Height\n"," pred_conf = torch.sigmoid(prediction[..., 4]) # Conf\n"," pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred.\n","\n"," # Calculate offsets for each grid\n"," grid_x = torch.arange(nG).repeat(nG, 1).view([1, 1, nG, nG]).type(FloatTensor)\n"," grid_y = torch.arange(nG).repeat(nG, 1).t().view([1, 1, nG, nG]).type(FloatTensor)\n"," scaled_anchors = FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in self.anchors])\n"," anchor_w = scaled_anchors[:, 0:1].view((1, nA, 1, 1))\n"," anchor_h = scaled_anchors[:, 1:2].view((1, nA, 1, 1))\n","\n"," # Add offset and scale with anchors\n"," pred_boxes = FloatTensor(prediction[..., :4].shape)\n"," pred_boxes[..., 0] = x.data + grid_x\n"," pred_boxes[..., 1] = y.data + grid_y\n"," pred_boxes[..., 2] = torch.exp(w.data) * anchor_w\n"," pred_boxes[..., 3] = torch.exp(h.data) * anchor_h\n","\n"," # Training\n"," if targets is not None:\n","\n"," if x.is_cuda:\n"," self.mse_loss = self.mse_loss.cuda()\n"," self.bce_loss = self.bce_loss.cuda()\n"," self.ce_loss = self.ce_loss.cuda()\n","\n"," nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls = build_targets(\n"," pred_boxes=pred_boxes.cpu().data,\n"," pred_conf=pred_conf.cpu().data,\n"," pred_cls=pred_cls.cpu().data,\n"," target=targets.cpu().data,\n"," anchors=scaled_anchors.cpu().data,\n"," num_anchors=nA,\n"," num_classes=self.num_classes,\n"," grid_size=nG,\n"," ignore_thres=self.ignore_thres,\n"," img_dim=self.image_dim,\n"," )\n","\n"," nProposals = int((pred_conf > 0.5).sum().item())\n"," recall = float(nCorrect / nGT) if nGT else 1\n"," precision = float(nCorrect / nProposals)\n","\n"," # Handle masks\n"," mask = Variable(mask.type(ByteTensor))\n"," conf_mask = Variable(conf_mask.type(ByteTensor))\n","\n"," # Handle target variables\n"," tx = Variable(tx.type(FloatTensor), requires_grad=False)\n"," ty = Variable(ty.type(FloatTensor), requires_grad=False)\n"," tw = Variable(tw.type(FloatTensor), requires_grad=False)\n"," th = Variable(th.type(FloatTensor), requires_grad=False)\n"," tconf = Variable(tconf.type(FloatTensor), requires_grad=False)\n"," tcls = Variable(tcls.type(LongTensor), requires_grad=False)\n","\n"," # Get conf mask where gt and where there is no gt\n"," conf_mask_true = mask\n"," conf_mask_false = conf_mask - mask\n","\n"," # Mask outputs to ignore non-existing objects\n"," loss_x = self.mse_loss(x[mask], tx[mask])\n"," loss_y = self.mse_loss(y[mask], ty[mask])\n"," loss_w = self.mse_loss(w[mask], tw[mask])\n"," loss_h = self.mse_loss(h[mask], th[mask])\n"," loss_conf = self.bce_loss(pred_conf[conf_mask_false], tconf[conf_mask_false]) + self.bce_loss(\n"," pred_conf[conf_mask_true], tconf[conf_mask_true]\n"," )\n"," loss_cls = (1 / nB) * self.ce_loss(pred_cls[mask], torch.argmax(tcls[mask], 1))\n"," loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls\n","\n"," return (\n"," loss,\n"," loss_x.item(),\n"," loss_y.item(),\n"," loss_w.item(),\n"," loss_h.item(),\n"," loss_conf.item(),\n"," loss_cls.item(),\n"," recall,\n"," precision,\n"," )\n","\n"," else:\n"," # If not in training phase return predictions\n"," output = torch.cat(\n"," (\n"," pred_boxes.view(nB, -1, 4) * stride,\n"," pred_conf.view(nB, -1, 1),\n"," pred_cls.view(nB, -1, self.num_classes),\n"," ),\n"," -1,\n"," )\n"," return output\n","\n","\n","class Darknet(nn.Module):\n"," \"\"\"YOLOv3 object detection model\"\"\"\n","\n"," def __init__(self, config_path, img_size=416):\n"," super(Darknet, self).__init__()\n"," self.module_defs = parse_model_config(config_path)\n"," self.hyperparams, self.module_list = create_modules(self.module_defs)\n"," self.img_size = img_size\n"," self.seen = 0\n"," self.header_info = np.array([0, 0, 0, self.seen, 0])\n"," self.loss_names = [\"x\", \"y\", \"w\", \"h\", \"conf\", \"cls\", \"recall\", \"precision\"]\n","\n"," def forward(self, x, targets=None):\n"," is_training = targets is not None\n"," output = []\n"," self.losses = defaultdict(float)\n"," layer_outputs = []\n"," for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):\n"," if module_def[\"type\"] in [\"convolutional\", \"upsample\", \"maxpool\"]:\n"," x = module(x)\n"," elif module_def[\"type\"] == \"route\":\n"," layer_i = [int(x) for x in module_def[\"layers\"].split(\",\")]\n"," x = torch.cat([layer_outputs[i] for i in layer_i], 1)\n"," elif module_def[\"type\"] == \"shortcut\":\n"," layer_i = int(module_def[\"from\"])\n"," x = layer_outputs[-1] + layer_outputs[layer_i]\n"," elif module_def[\"type\"] == \"yolo\":\n"," # Train phase: get loss\n"," if is_training:\n"," x, *losses = module[0](x, targets)\n"," for name, loss in zip(self.loss_names, losses):\n"," self.losses[name] += loss\n"," # Test phase: Get detections\n"," else:\n"," x = module(x)\n"," output.append(x)\n"," layer_outputs.append(x)\n","\n"," self.losses[\"recall\"] /= 3\n"," self.losses[\"precision\"] /= 3\n"," return sum(output) if is_training else torch.cat(output, 1)\n","\n"," def load_weights(self, weights_path):\n"," \"\"\"Parses and loads the weights stored in 'weights_path'\"\"\"\n","\n"," # Open the weights file\n"," fp = open(weights_path, \"rb\")\n"," header = np.fromfile(fp, dtype=np.int32, count=5) # First five are header values\n","\n"," # Needed to write header when saving weights\n"," self.header_info = header\n","\n"," self.seen = header[3]\n"," weights = np.fromfile(fp, dtype=np.float32) # The rest are weights\n"," fp.close()\n","\n"," ptr = 0\n"," for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):\n"," if module_def[\"type\"] == \"convolutional\":\n"," conv_layer = module[0]\n"," if module_def[\"batch_normalize\"]:\n"," # Load BN bias, weights, running mean and running variance\n"," bn_layer = module[1]\n"," num_b = bn_layer.bias.numel() # Number of biases\n"," # Bias\n"," bn_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.bias)\n"," bn_layer.bias.data.copy_(bn_b)\n"," ptr += num_b\n"," # Weight\n"," bn_w = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.weight)\n"," bn_layer.weight.data.copy_(bn_w)\n"," ptr += num_b\n"," # Running Mean\n"," bn_rm = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_mean)\n"," bn_layer.running_mean.data.copy_(bn_rm)\n"," ptr += num_b\n"," # Running Var\n"," bn_rv = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_var)\n"," bn_layer.running_var.data.copy_(bn_rv)\n"," ptr += num_b\n"," else:\n"," # Load conv. bias\n"," num_b = conv_layer.bias.numel()\n"," conv_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(conv_layer.bias)\n"," conv_layer.bias.data.copy_(conv_b)\n"," ptr += num_b\n"," # Load conv. weights\n"," num_w = conv_layer.weight.numel()\n"," conv_w = torch.from_numpy(weights[ptr : ptr + num_w]).view_as(conv_layer.weight)\n"," conv_layer.weight.data.copy_(conv_w)\n"," ptr += num_w\n","\n"," \"\"\"\n"," @:param path - path of the new weights file\n"," @:param cutoff - save layers between 0 and cutoff (cutoff = -1 -> all are saved)\n"," \"\"\"\n","\n"," def save_weights(self, path, cutoff=-1):\n","\n"," fp = open(path, \"wb\")\n"," self.header_info[3] = self.seen\n"," self.header_info.tofile(fp)\n","\n"," # Iterate through layers\n"," for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):\n"," if module_def[\"type\"] == \"convolutional\":\n"," conv_layer = module[0]\n"," # If batch norm, load bn first\n"," if module_def[\"batch_normalize\"]:\n"," bn_layer = module[1]\n"," bn_layer.bias.data.cpu().numpy().tofile(fp)\n"," bn_layer.weight.data.cpu().numpy().tofile(fp)\n"," bn_layer.running_mean.data.cpu().numpy().tofile(fp)\n"," bn_layer.running_var.data.cpu().numpy().tofile(fp)\n"," # Load conv bias\n"," else:\n"," conv_layer.bias.data.cpu().numpy().tofile(fp)\n"," # Load conv weights\n"," conv_layer.weight.data.cpu().numpy().tofile(fp)\n","\n"," fp.close()"]},{"cell_type":"code","source":["!ls"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ar8FuY3z43Fk","executionInfo":{"status":"ok","timestamp":1654537174809,"user_tz":-60,"elapsed":16,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"ce227d02-75a3-477d-becf-e1c2702c7001"},"execution_count":2,"outputs":[{"output_type":"stream","name":"stdout","text":["models.py sample_data\n"]}]},{"cell_type":"code","source":["!pwd"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"hRxa6vyoGbla","executionInfo":{"status":"ok","timestamp":1654537258168,"user_tz":-60,"elapsed":26,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"ccaaf1dc-6769-4093-8769-c8aa3b809bdf"},"execution_count":3,"outputs":[{"output_type":"stream","name":"stdout","text":["/content\n"]}]},{"cell_type":"code","source":["%%writefile Readme.md\n","Are you for real!!"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"cVKDwgGtGv7g","executionInfo":{"status":"ok","timestamp":1654537404197,"user_tz":-60,"elapsed":21,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"41cdc392-059d-42be-b267-2a7f66d0a1f6"},"execution_count":4,"outputs":[{"output_type":"stream","name":"stdout","text":["Overwriting Readme.md\n"]}]},{"cell_type":"code","source":["%cd Computer Vision"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"780vJiykHTmT","executionInfo":{"status":"ok","timestamp":1654537643123,"user_tz":-60,"elapsed":16,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"159eb128-2a7a-41b3-b84c-7d517ff92454"},"execution_count":14,"outputs":[{"output_type":"stream","name":"stdout","text":["/content/drive/MyDrive/Python/Machine Learning/Computer Vision\n"]}]},{"cell_type":"code","source":["!pwd"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"WeA417NzHe0W","executionInfo":{"status":"ok","timestamp":1654537646111,"user_tz":-60,"elapsed":408,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"036a3c8e-b106-46a8-b5de-b7adf66938ab"},"execution_count":15,"outputs":[{"output_type":"stream","name":"stdout","text":["/content/drive/MyDrive/Python/Machine Learning/Computer Vision\n"]}]},{"cell_type":"code","source":["%%writefile test.and\n","\n","Really I can now write to my drive!"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"hrSVQd-fHzai","executionInfo":{"status":"ok","timestamp":1654537570112,"user_tz":-60,"elapsed":24,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"c58a5849-aaba-4fe3-c596-681a5e7df731"},"execution_count":10,"outputs":[{"output_type":"stream","name":"stdout","text":["Writing test.and\n"]}]},{"cell_type":"code","source":["!ls"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"jRtg6b1IH8KV","executionInfo":{"status":"ok","timestamp":1654537654214,"user_tz":-60,"elapsed":24,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"dd49447d-6924-4176-f5a9-ca184b671be8"},"execution_count":16,"outputs":[{"output_type":"stream","name":"stdout","text":["cnn-resnet-CIFAR10 darknet-COCO-object_detection feedforward-cnn-MNIST\n"]}]},{"cell_type":"code","source":["%%bash\n","\n","ls -la\n","python --version"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"iUpVW1oZIQnl","executionInfo":{"status":"ok","timestamp":1654537857269,"user_tz":-60,"elapsed":14,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"ff54c93a-9f2c-4453-d82f-c6c1683f61b8"},"execution_count":19,"outputs":[{"output_type":"stream","name":"stdout","text":["total 12\n","drwx------ 2 root root 4096 May 17 21:02 cnn-resnet-CIFAR10\n","drwx------ 2 root root 4096 Jun 6 16:38 darknet-COCO-object_detection\n","drwx------ 2 root root 4096 May 17 21:01 feedforward-cnn-MNIST\n","Python 3.7.13\n"]}]},{"cell_type":"code","source":["%cd ../"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"NJ7riTtCI2-V","executionInfo":{"status":"ok","timestamp":1654537984381,"user_tz":-60,"elapsed":14,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"713f2de8-ae10-46b9-d5e9-bbfa779de2c8"},"execution_count":21,"outputs":[{"output_type":"stream","name":"stdout","text":["/content\n"]}]},{"cell_type":"code","source":["%%bash\n","\n","cd \"drive/MyDrive/Python/Machine Learning\"\n","ls"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ZAOqxQzPJc1k","executionInfo":{"status":"ok","timestamp":1654538084191,"user_tz":-60,"elapsed":14,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"7b82c13f-3e14-47b5-bc12-25bdf0dee540"},"execution_count":25,"outputs":[{"output_type":"stream","name":"stdout","text":["Articles\n","Computer Vision\n","Datasets\n","Deep-Learning-with-PyTorch-Jovian\n","Deep RL\n","FastAI Course\n","Generative Models\n","HuggingFace-Deep-RL\n","PyTorch\n","ZeroToGANS_Revision\n"]}]},{"cell_type":"code","source":["%run models.py"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":235},"id":"HvI6SRX8JsS7","executionInfo":{"status":"ok","timestamp":1654538109961,"user_tz":-60,"elapsed":2355,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"08a28f6a-76c2-4eaa-fa36-36d5a8e145ea"},"execution_count":27,"outputs":[{"output_type":"error","ename":"ModuleNotFoundError","evalue":"ignored","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)","\u001b[0;32m/content/models.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mPIL\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mImage\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mutils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparse_config\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 13\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mutils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mutils\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mbuild_targets\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mcollections\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mdefaultdict\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'utils'"]}]},{"cell_type":"code","source":["%edit"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"JLbktoGWJvft","executionInfo":{"status":"ok","timestamp":1654538391516,"user_tz":-60,"elapsed":21,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"cce69d99-b879-4600-a9ba-9afb5a58b76a"},"execution_count":29,"outputs":[{"output_type":"stream","name":"stdout","text":["IPython will make a temporary file named: /tmp/ipython_edit_nffqr1eo/ipython_edit_msvbxat4.py\n"]}]},{"cell_type":"code","source":["%load models.py"],"metadata":{"id":"PI_bYsujKQfx","executionInfo":{"status":"ok","timestamp":1654538646656,"user_tz":-60,"elapsed":443,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}}},"execution_count":31,"outputs":[]},{"cell_type":"code","source":["%%writefile\n","%run\n","%cd\n","%cat\n","%load [-r, -s]\n","%edit\n","%time, %%time\n","%timeit, %%timeit\n","%%html\n","%env, ...\n","%%file, alias for writefile\n","%%bash\n","%matplotlib [inline, ...]\n","and more\n","%paste, %cpaste\n","%pinfo\n","%who\n","%lsmagic\n","%pwd"],"metadata":{"id":"GdCgR_KCL7MK"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["%quickref\n","%%js\n","%%python[2, 3]\n","%%latex\n","%%shell\n","%%svg"],"metadata":{"id":"B4QAAv64NHRW","executionInfo":{"status":"ok","timestamp":1654539235689,"user_tz":-60,"elapsed":445,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}}},"execution_count":38,"outputs":[]},{"cell_type":"code","source":["%system, %%system\n","%sx, %%sx"],"metadata":{"id":"psD0AZ7YNJBZ"},"execution_count":null,"outputs":[]}]}
|
PyTorch_Object_Detection.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
PyTorch_Object_Tracking.ipynb
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"cells":[{"cell_type":"code","source":["from google.colab import drive\n","drive.mount('/content/drive')"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"1VkPIQMBmJMO","executionInfo":{"status":"ok","timestamp":1654700494173,"user_tz":-60,"elapsed":3080,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"1e3cd91c-ca69-486b-b182-d2f31583b645"},"execution_count":1,"outputs":[{"output_type":"stream","name":"stdout","text":["Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"]}]},{"cell_type":"code","source":["%cd ./drive/MyDrive/Python/Machine Learning/Computer Vision/darknet-COCO-object_detection"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"izkEuuuPmTZf","executionInfo":{"status":"ok","timestamp":1654700494174,"user_tz":-60,"elapsed":11,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"daf99ba8-1ed2-4935-e2b9-3481fef9584a"},"execution_count":2,"outputs":[{"output_type":"stream","name":"stdout","text":["/content/drive/MyDrive/Python/Machine Learning/Computer Vision/darknet-COCO-object_detection\n"]}]},{"cell_type":"code","source":["!pip install filterpy --quiet"],"metadata":{"id":"qXFwvyxqmXDr","executionInfo":{"status":"ok","timestamp":1654700498924,"user_tz":-60,"elapsed":4757,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}}},"execution_count":3,"outputs":[]},{"cell_type":"code","source":["!pip install lap --quiet"],"metadata":{"id":"zqK3-Fn2oRsc","executionInfo":{"status":"ok","timestamp":1654700503070,"user_tz":-60,"elapsed":4165,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}}},"execution_count":4,"outputs":[]},{"cell_type":"code","execution_count":5,"metadata":{"id":"kHwKuAkPlviV","executionInfo":{"status":"ok","timestamp":1654700504310,"user_tz":-60,"elapsed":1248,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}}},"outputs":[],"source":["from models import *\n","from utils import *\n","\n","import os, sys, time, datetime, random\n","import torch\n","from torch.utils.data import DataLoader\n","from torchvision import datasets, transforms\n","from torch.autograd import Variable\n","\n","import matplotlib.pyplot as plt\n","import matplotlib.patches as patches\n","from PIL import Image"]},{"cell_type":"code","execution_count":6,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"N5uZwVlClvie","executionInfo":{"status":"ok","timestamp":1654700508098,"user_tz":-60,"elapsed":3795,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"3a3e75b1-3379-4e79-f418-0b8a48ffb62f"},"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.7/dist-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='mean' instead.\n"," warnings.warn(warning.format(ret))\n"]}],"source":["config_path='config/yolov3.cfg'\n","weights_path='config/yolov3.weights'\n","class_path='config/coco.names'\n","img_size=416\n","conf_thres=0.8\n","nms_thres=0.4\n","\n","# Load model and weights\n","model = Darknet(config_path, img_size=img_size)\n","model.load_weights(weights_path)\n","model.cuda()\n","model.eval()\n","classes = utils.load_classes(class_path)\n","Tensor = torch.cuda.FloatTensor"]},{"cell_type":"code","execution_count":7,"metadata":{"id":"n4NNQSOYlvij","executionInfo":{"status":"ok","timestamp":1654700508099,"user_tz":-60,"elapsed":9,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}}},"outputs":[],"source":["def detect_image(img):\n"," # scale and pad image\n"," ratio = min(img_size/img.size[0], img_size/img.size[1])\n"," imw = round(img.size[0] * ratio)\n"," imh = round(img.size[1] * ratio)\n"," img_transforms = transforms.Compose([ transforms.Resize((imh, imw)),\n"," transforms.Pad((max(int((imh-imw)/2),0), max(int((imw-imh)/2),0), max(int((imh-imw)/2),0), max(int((imw-imh)/2),0)),\n"," (128,128,128)),\n"," transforms.ToTensor(),\n"," ])\n"," # convert image to Tensor\n"," image_tensor = img_transforms(img).float()\n"," image_tensor = image_tensor.unsqueeze_(0)\n"," input_img = Variable(image_tensor.type(Tensor))\n"," # run inference on the model and get detections\n"," with torch.no_grad():\n"," detections = model(input_img)\n"," detections = utils.non_max_suppression(detections, 80, conf_thres, nms_thres)\n"," return detections[0]"]},{"cell_type":"code","execution_count":8,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"7VUHsE2-lvik","executionInfo":{"status":"ok","timestamp":1654700521379,"user_tz":-60,"elapsed":13287,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"6144a350-24e9-4a7c-95c5-96bb66b824e0"},"outputs":[{"output_type":"stream","name":"stdout","text":["Populating the interactive namespace from numpy and matplotlib\n"]},{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.7/dist-packages/IPython/core/magics/pylab.py:161: UserWarning: pylab import has clobbered these variables: ['random']\n","`%matplotlib` prevents importing * from pylab and numpy\n"," \"\\n`%matplotlib` prevents importing * from pylab and numpy\"\n"]},{"output_type":"stream","name":"stdout","text":["Video size 1280 720\n"]},{"output_type":"stream","name":"stderr","text":["/content/drive/MyDrive/Python/Machine Learning/Computer Vision/darknet-COCO-object_detection/sort.py:38: NumbaWarning: \n","Compilation is falling back to object mode WITH looplifting enabled because Function \"iou\" failed type inference due to: non-precise type pyobject\n","During: typing of argument at /content/drive/MyDrive/Python/Machine Learning/Computer Vision/darknet-COCO-object_detection/sort.py (43)\n","\n","File \"sort.py\", line 43:\n","def iou(bb_test,bb_gt):\n"," <source elided>\n"," \"\"\"\n"," xx1 = np.maximum(bb_test[0], bb_gt[0])\n"," ^\n","\n"," @jit\n","/usr/local/lib/python3.7/dist-packages/numba/core/object_mode_passes.py:178: NumbaWarning: Function \"iou\" was compiled in object mode without forceobj=True.\n","\n","File \"sort.py\", line 39:\n","@jit\n","def iou(bb_test,bb_gt):\n","^\n","\n"," state.func_ir.loc))\n","/usr/local/lib/python3.7/dist-packages/numba/core/object_mode_passes.py:188: NumbaDeprecationWarning: \n","Fall-back from the nopython compilation path to the object mode compilation path has been detected, this is deprecated behaviour.\n","\n","For more information visit https://numba.pydata.org/numba-doc/latest/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit\n","\n","File \"sort.py\", line 39:\n","@jit\n","def iou(bb_test,bb_gt):\n","^\n","\n"," state.func_ir.loc))\n"]}],"source":["videopath = './videos/HorseRacing.mp4'\n","\n","%pylab inline \n","import cv2\n","from IPython.display import clear_output\n","\n","cmap = plt.get_cmap('tab20b')\n","colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]\n","\n","# initialize Sort object and video capture\n","from sort import *\n","vid = cv2.VideoCapture(videopath)\n","mot_tracker = Sort()\n","\n","fourcc = cv2.VideoWriter_fourcc(*'XVID')\n","ret,frame=vid.read()\n","vw = frame.shape[1]\n","vh = frame.shape[0]\n","print (\"Video size\", vw,vh)\n","outvideo = cv2.VideoWriter(videopath.replace(\".mp4\", \"-det.mp4\"),fourcc,20.0,(vw,vh))\n","\n","# while(True):\n","for ii in range(40):\n"," ret, frame = vid.read()\n"," if not ret:\n"," print(\"Done Procesing Video\")\n"," break\n"," frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)\n"," pilimg = Image.fromarray(frame)\n"," detections = detect_image(pilimg)\n","\n"," img = np.array(pilimg)\n"," pad_x = max(img.shape[0] - img.shape[1], 0) * (img_size / max(img.shape))\n"," pad_y = max(img.shape[1] - img.shape[0], 0) * (img_size / max(img.shape))\n"," unpad_h = img_size - pad_y\n"," unpad_w = img_size - pad_x\n"," if detections is not None:\n"," tracked_objects = mot_tracker.update(detections.cpu())\n","\n"," unique_labels = detections[:, -1].cpu().unique()\n"," n_cls_preds = len(unique_labels)\n"," for x1, y1, x2, y2, obj_id, cls_pred in tracked_objects:\n"," box_h = int(((y2 - y1) / unpad_h) * img.shape[0])\n"," box_w = int(((x2 - x1) / unpad_w) * img.shape[1])\n"," y1 = int(((y1 - pad_y // 2) / unpad_h) * img.shape[0])\n"," x1 = int(((x1 - pad_x // 2) / unpad_w) * img.shape[1])\n","\n"," color = colors[int(obj_id) % len(colors)]\n"," color = [i * 255 for i in color]\n"," cls = classes[int(cls_pred)]\n"," cv2.rectangle(frame, (x1, y1), (x1+box_w, y1+box_h), color, 4)\n"," cv2.rectangle(frame, (x1, y1-35), (x1+len(cls)*19+60, y1), color, -1)\n"," cv2.putText(frame, cls + \"-\" + str(int(obj_id)), (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 3)\n","\n"," outvideo.write(frame)\n","\n","outvideo.release()"]},{"cell_type":"code","source":["from pathlib import Path\n","from IPython import display as ipythondisplay\n","import base64\n","\n","def show_videos(video_path='', prefix=''):\n"," html = []\n"," for mp4 in Path(video_path).glob(f\"{prefix}*.mp4\"):\n"," video_b64 = base64.b64encode(mp4.read_bytes())\n"," html.append('''<video alt=\"{}\" autoplay \n"," loop controls style=\"height: 400px;\">\n"," <source src=\"data:video/mp4;base64,{}\" type=\"video/mp4\" />\n"," </video>'''.format(mp4, video_b64.decode('ascii')))\n"," break\n"," ipythondisplay.display(ipythondisplay.HTML(data=\"<br>\".join(html)))"],"metadata":{"id":"Xx6d_F3VstfA","executionInfo":{"status":"ok","timestamp":1654700521380,"user_tz":-60,"elapsed":19,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}}},"execution_count":9,"outputs":[]},{"cell_type":"code","source":["video_b64 = base64.b64encode(Path(videopath.replace(\".mp4\", \"-det.mp4\")).read_bytes())\n","html = '''<video alt=\"{}\" autoplay \n"," loop controls style=\"height: 400px;\">\n"," <source src=\"data:video/mp4;base64,{}\" type=\"video/mp4\" />\n"," </video>'''.format(Path(videopath), video_b64.decode('ascii'))\n","ipythondisplay.display(ipythondisplay.HTML(data=html)) "],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":421,"output_embedded_package_id":"1KE6a6Jf_qBrnIGEjOY8GYXagvaaGt84D"},"id":"K3VrKNb3yUbH","executionInfo":{"status":"ok","timestamp":1654700524974,"user_tz":-60,"elapsed":3611,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"92ea1435-9e17-4167-c094-dd1e380b200f"},"execution_count":10,"outputs":[{"output_type":"display_data","data":{"text/plain":"Output hidden; open in https://colab.research.google.com to view."},"metadata":{}}]}],"metadata":{"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"name":"python"},"colab":{"name":"PyTorch_Object_Tracking.ipynb","provenance":[],"collapsed_sections":[]},"accelerator":"GPU"},"nbformat":4,"nbformat_minor":0}
|
__pycache__/models.cpython-37.pyc
ADDED
Binary file (9.65 kB). View file
|
|
__pycache__/sort.cpython-37.pyc
ADDED
Binary file (10.2 kB). View file
|
|
darknet-coco-object_detection.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
models.py
ADDED
@@ -0,0 +1,350 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import division
|
2 |
+
|
3 |
+
import torch
|
4 |
+
import torch.nn as nn
|
5 |
+
import torch.nn.functional as F
|
6 |
+
from torch.autograd import Variable
|
7 |
+
import numpy as np
|
8 |
+
|
9 |
+
from PIL import Image
|
10 |
+
|
11 |
+
from utils.parse_config import *
|
12 |
+
from utils.utils import build_targets
|
13 |
+
from collections import defaultdict
|
14 |
+
|
15 |
+
##import matplotlib.pyplot as plt
|
16 |
+
##import matplotlib.patches as patches
|
17 |
+
|
18 |
+
|
19 |
+
def create_modules(module_defs):
|
20 |
+
"""
|
21 |
+
Constructs module list of layer blocks from module configuration in module_defs
|
22 |
+
"""
|
23 |
+
hyperparams = module_defs.pop(0)
|
24 |
+
output_filters = [int(hyperparams["channels"])]
|
25 |
+
module_list = nn.ModuleList()
|
26 |
+
for i, module_def in enumerate(module_defs):
|
27 |
+
modules = nn.Sequential()
|
28 |
+
|
29 |
+
if module_def["type"] == "convolutional":
|
30 |
+
bn = int(module_def["batch_normalize"])
|
31 |
+
filters = int(module_def["filters"])
|
32 |
+
kernel_size = int(module_def["size"])
|
33 |
+
pad = (kernel_size - 1) // 2 if int(module_def["pad"]) else 0
|
34 |
+
modules.add_module(
|
35 |
+
"conv_%d" % i,
|
36 |
+
nn.Conv2d(
|
37 |
+
in_channels=output_filters[-1],
|
38 |
+
out_channels=filters,
|
39 |
+
kernel_size=kernel_size,
|
40 |
+
stride=int(module_def["stride"]),
|
41 |
+
padding=pad,
|
42 |
+
bias=not bn,
|
43 |
+
),
|
44 |
+
)
|
45 |
+
if bn:
|
46 |
+
modules.add_module("batch_norm_%d" % i, nn.BatchNorm2d(filters))
|
47 |
+
if module_def["activation"] == "leaky":
|
48 |
+
modules.add_module("leaky_%d" % i, nn.LeakyReLU(0.1))
|
49 |
+
|
50 |
+
elif module_def["type"] == "maxpool":
|
51 |
+
kernel_size = int(module_def["size"])
|
52 |
+
stride = int(module_def["stride"])
|
53 |
+
if kernel_size == 2 and stride == 1:
|
54 |
+
padding = nn.ZeroPad2d((0, 1, 0, 1))
|
55 |
+
modules.add_module("_debug_padding_%d" % i, padding)
|
56 |
+
maxpool = nn.MaxPool2d(
|
57 |
+
kernel_size=int(module_def["size"]),
|
58 |
+
stride=int(module_def["stride"]),
|
59 |
+
padding=int((kernel_size - 1) // 2),
|
60 |
+
)
|
61 |
+
modules.add_module("maxpool_%d" % i, maxpool)
|
62 |
+
|
63 |
+
elif module_def["type"] == "upsample":
|
64 |
+
upsample = nn.Upsample(scale_factor=int(module_def["stride"]), mode="nearest")
|
65 |
+
modules.add_module("upsample_%d" % i, upsample)
|
66 |
+
|
67 |
+
elif module_def["type"] == "route":
|
68 |
+
layers = [int(x) for x in module_def["layers"].split(",")]
|
69 |
+
filters = sum([output_filters[layer_i] for layer_i in layers])
|
70 |
+
modules.add_module("route_%d" % i, EmptyLayer())
|
71 |
+
|
72 |
+
elif module_def["type"] == "shortcut":
|
73 |
+
filters = output_filters[int(module_def["from"])]
|
74 |
+
modules.add_module("shortcut_%d" % i, EmptyLayer())
|
75 |
+
|
76 |
+
elif module_def["type"] == "yolo":
|
77 |
+
anchor_idxs = [int(x) for x in module_def["mask"].split(",")]
|
78 |
+
# Extract anchors
|
79 |
+
anchors = [int(x) for x in module_def["anchors"].split(",")]
|
80 |
+
anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
|
81 |
+
anchors = [anchors[i] for i in anchor_idxs]
|
82 |
+
num_classes = int(module_def["classes"])
|
83 |
+
img_height = int(hyperparams["height"])
|
84 |
+
# Define detection layer
|
85 |
+
yolo_layer = YOLOLayer(anchors, num_classes, img_height)
|
86 |
+
modules.add_module("yolo_%d" % i, yolo_layer)
|
87 |
+
# Register module list and number of output filters
|
88 |
+
module_list.append(modules)
|
89 |
+
output_filters.append(filters)
|
90 |
+
|
91 |
+
return hyperparams, module_list
|
92 |
+
|
93 |
+
|
94 |
+
class EmptyLayer(nn.Module):
|
95 |
+
"""Placeholder for 'route' and 'shortcut' layers"""
|
96 |
+
|
97 |
+
def __init__(self):
|
98 |
+
super(EmptyLayer, self).__init__()
|
99 |
+
|
100 |
+
|
101 |
+
class YOLOLayer(nn.Module):
|
102 |
+
"""Detection layer"""
|
103 |
+
|
104 |
+
def __init__(self, anchors, num_classes, img_dim):
|
105 |
+
super(YOLOLayer, self).__init__()
|
106 |
+
self.anchors = anchors
|
107 |
+
self.num_anchors = len(anchors)
|
108 |
+
self.num_classes = num_classes
|
109 |
+
self.bbox_attrs = 5 + num_classes
|
110 |
+
self.image_dim = img_dim
|
111 |
+
self.ignore_thres = 0.5
|
112 |
+
self.lambda_coord = 1
|
113 |
+
|
114 |
+
self.mse_loss = nn.MSELoss(size_average=True) # Coordinate loss
|
115 |
+
self.bce_loss = nn.BCELoss(size_average=True) # Confidence loss
|
116 |
+
self.ce_loss = nn.CrossEntropyLoss() # Class loss
|
117 |
+
|
118 |
+
def forward(self, x, targets=None):
|
119 |
+
nA = self.num_anchors
|
120 |
+
nB = x.size(0)
|
121 |
+
nG = x.size(2)
|
122 |
+
stride = self.image_dim / nG
|
123 |
+
|
124 |
+
# Tensors for cuda support
|
125 |
+
FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
|
126 |
+
LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
|
127 |
+
ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor
|
128 |
+
|
129 |
+
prediction = x.view(nB, nA, self.bbox_attrs, nG, nG).permute(0, 1, 3, 4, 2).contiguous()
|
130 |
+
|
131 |
+
# Get outputs
|
132 |
+
x = torch.sigmoid(prediction[..., 0]) # Center x
|
133 |
+
y = torch.sigmoid(prediction[..., 1]) # Center y
|
134 |
+
w = prediction[..., 2] # Width
|
135 |
+
h = prediction[..., 3] # Height
|
136 |
+
pred_conf = torch.sigmoid(prediction[..., 4]) # Conf
|
137 |
+
pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred.
|
138 |
+
|
139 |
+
# Calculate offsets for each grid
|
140 |
+
grid_x = torch.arange(nG).repeat(nG, 1).view([1, 1, nG, nG]).type(FloatTensor)
|
141 |
+
grid_y = torch.arange(nG).repeat(nG, 1).t().view([1, 1, nG, nG]).type(FloatTensor)
|
142 |
+
scaled_anchors = FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in self.anchors])
|
143 |
+
anchor_w = scaled_anchors[:, 0:1].view((1, nA, 1, 1))
|
144 |
+
anchor_h = scaled_anchors[:, 1:2].view((1, nA, 1, 1))
|
145 |
+
|
146 |
+
# Add offset and scale with anchors
|
147 |
+
pred_boxes = FloatTensor(prediction[..., :4].shape)
|
148 |
+
pred_boxes[..., 0] = x.data + grid_x
|
149 |
+
pred_boxes[..., 1] = y.data + grid_y
|
150 |
+
pred_boxes[..., 2] = torch.exp(w.data) * anchor_w
|
151 |
+
pred_boxes[..., 3] = torch.exp(h.data) * anchor_h
|
152 |
+
|
153 |
+
# Training
|
154 |
+
if targets is not None:
|
155 |
+
|
156 |
+
if x.is_cuda:
|
157 |
+
self.mse_loss = self.mse_loss.cuda()
|
158 |
+
self.bce_loss = self.bce_loss.cuda()
|
159 |
+
self.ce_loss = self.ce_loss.cuda()
|
160 |
+
|
161 |
+
nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls = build_targets(
|
162 |
+
pred_boxes=pred_boxes.cpu().data,
|
163 |
+
pred_conf=pred_conf.cpu().data,
|
164 |
+
pred_cls=pred_cls.cpu().data,
|
165 |
+
target=targets.cpu().data,
|
166 |
+
anchors=scaled_anchors.cpu().data,
|
167 |
+
num_anchors=nA,
|
168 |
+
num_classes=self.num_classes,
|
169 |
+
grid_size=nG,
|
170 |
+
ignore_thres=self.ignore_thres,
|
171 |
+
img_dim=self.image_dim,
|
172 |
+
)
|
173 |
+
|
174 |
+
nProposals = int((pred_conf > 0.5).sum().item())
|
175 |
+
recall = float(nCorrect / nGT) if nGT else 1
|
176 |
+
precision = float(nCorrect / nProposals)
|
177 |
+
|
178 |
+
# Handle masks
|
179 |
+
mask = Variable(mask.type(ByteTensor))
|
180 |
+
conf_mask = Variable(conf_mask.type(ByteTensor))
|
181 |
+
|
182 |
+
# Handle target variables
|
183 |
+
tx = Variable(tx.type(FloatTensor), requires_grad=False)
|
184 |
+
ty = Variable(ty.type(FloatTensor), requires_grad=False)
|
185 |
+
tw = Variable(tw.type(FloatTensor), requires_grad=False)
|
186 |
+
th = Variable(th.type(FloatTensor), requires_grad=False)
|
187 |
+
tconf = Variable(tconf.type(FloatTensor), requires_grad=False)
|
188 |
+
tcls = Variable(tcls.type(LongTensor), requires_grad=False)
|
189 |
+
|
190 |
+
# Get conf mask where gt and where there is no gt
|
191 |
+
conf_mask_true = mask
|
192 |
+
conf_mask_false = conf_mask - mask
|
193 |
+
|
194 |
+
# Mask outputs to ignore non-existing objects
|
195 |
+
loss_x = self.mse_loss(x[mask], tx[mask])
|
196 |
+
loss_y = self.mse_loss(y[mask], ty[mask])
|
197 |
+
loss_w = self.mse_loss(w[mask], tw[mask])
|
198 |
+
loss_h = self.mse_loss(h[mask], th[mask])
|
199 |
+
loss_conf = self.bce_loss(pred_conf[conf_mask_false], tconf[conf_mask_false]) + self.bce_loss(
|
200 |
+
pred_conf[conf_mask_true], tconf[conf_mask_true]
|
201 |
+
)
|
202 |
+
loss_cls = (1 / nB) * self.ce_loss(pred_cls[mask], torch.argmax(tcls[mask], 1))
|
203 |
+
loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
|
204 |
+
|
205 |
+
return (
|
206 |
+
loss,
|
207 |
+
loss_x.item(),
|
208 |
+
loss_y.item(),
|
209 |
+
loss_w.item(),
|
210 |
+
loss_h.item(),
|
211 |
+
loss_conf.item(),
|
212 |
+
loss_cls.item(),
|
213 |
+
recall,
|
214 |
+
precision,
|
215 |
+
)
|
216 |
+
|
217 |
+
else:
|
218 |
+
# If not in training phase return predictions
|
219 |
+
output = torch.cat(
|
220 |
+
(
|
221 |
+
pred_boxes.view(nB, -1, 4) * stride,
|
222 |
+
pred_conf.view(nB, -1, 1),
|
223 |
+
pred_cls.view(nB, -1, self.num_classes),
|
224 |
+
),
|
225 |
+
-1,
|
226 |
+
)
|
227 |
+
return output
|
228 |
+
|
229 |
+
|
230 |
+
class Darknet(nn.Module):
|
231 |
+
"""YOLOv3 object detection model"""
|
232 |
+
|
233 |
+
def __init__(self, config_path, img_size=416):
|
234 |
+
super(Darknet, self).__init__()
|
235 |
+
self.module_defs = parse_model_config(config_path)
|
236 |
+
self.hyperparams, self.module_list = create_modules(self.module_defs)
|
237 |
+
self.img_size = img_size
|
238 |
+
self.seen = 0
|
239 |
+
self.header_info = np.array([0, 0, 0, self.seen, 0])
|
240 |
+
self.loss_names = ["x", "y", "w", "h", "conf", "cls", "recall", "precision"]
|
241 |
+
|
242 |
+
def forward(self, x, targets=None):
|
243 |
+
is_training = targets is not None
|
244 |
+
output = []
|
245 |
+
self.losses = defaultdict(float)
|
246 |
+
layer_outputs = []
|
247 |
+
for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
|
248 |
+
if module_def["type"] in ["convolutional", "upsample", "maxpool"]:
|
249 |
+
x = module(x)
|
250 |
+
elif module_def["type"] == "route":
|
251 |
+
layer_i = [int(x) for x in module_def["layers"].split(",")]
|
252 |
+
x = torch.cat([layer_outputs[i] for i in layer_i], 1)
|
253 |
+
elif module_def["type"] == "shortcut":
|
254 |
+
layer_i = int(module_def["from"])
|
255 |
+
x = layer_outputs[-1] + layer_outputs[layer_i]
|
256 |
+
elif module_def["type"] == "yolo":
|
257 |
+
# Train phase: get loss
|
258 |
+
if is_training:
|
259 |
+
x, *losses = module[0](x, targets)
|
260 |
+
for name, loss in zip(self.loss_names, losses):
|
261 |
+
self.losses[name] += loss
|
262 |
+
# Test phase: Get detections
|
263 |
+
else:
|
264 |
+
x = module(x)
|
265 |
+
output.append(x)
|
266 |
+
layer_outputs.append(x)
|
267 |
+
|
268 |
+
self.losses["recall"] /= 3
|
269 |
+
self.losses["precision"] /= 3
|
270 |
+
return sum(output) if is_training else torch.cat(output, 1)
|
271 |
+
|
272 |
+
def load_weights(self, weights_path):
|
273 |
+
"""Parses and loads the weights stored in 'weights_path'"""
|
274 |
+
|
275 |
+
# Open the weights file
|
276 |
+
fp = open(weights_path, "rb")
|
277 |
+
header = np.fromfile(fp, dtype=np.int32, count=5) # First five are header values
|
278 |
+
|
279 |
+
# Needed to write header when saving weights
|
280 |
+
self.header_info = header
|
281 |
+
|
282 |
+
self.seen = header[3]
|
283 |
+
weights = np.fromfile(fp, dtype=np.float32) # The rest are weights
|
284 |
+
fp.close()
|
285 |
+
|
286 |
+
ptr = 0
|
287 |
+
for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
|
288 |
+
if module_def["type"] == "convolutional":
|
289 |
+
conv_layer = module[0]
|
290 |
+
if module_def["batch_normalize"]:
|
291 |
+
# Load BN bias, weights, running mean and running variance
|
292 |
+
bn_layer = module[1]
|
293 |
+
num_b = bn_layer.bias.numel() # Number of biases
|
294 |
+
# Bias
|
295 |
+
bn_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.bias)
|
296 |
+
bn_layer.bias.data.copy_(bn_b)
|
297 |
+
ptr += num_b
|
298 |
+
# Weight
|
299 |
+
bn_w = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.weight)
|
300 |
+
bn_layer.weight.data.copy_(bn_w)
|
301 |
+
ptr += num_b
|
302 |
+
# Running Mean
|
303 |
+
bn_rm = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_mean)
|
304 |
+
bn_layer.running_mean.data.copy_(bn_rm)
|
305 |
+
ptr += num_b
|
306 |
+
# Running Var
|
307 |
+
bn_rv = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_var)
|
308 |
+
bn_layer.running_var.data.copy_(bn_rv)
|
309 |
+
ptr += num_b
|
310 |
+
else:
|
311 |
+
# Load conv. bias
|
312 |
+
num_b = conv_layer.bias.numel()
|
313 |
+
conv_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(conv_layer.bias)
|
314 |
+
conv_layer.bias.data.copy_(conv_b)
|
315 |
+
ptr += num_b
|
316 |
+
# Load conv. weights
|
317 |
+
num_w = conv_layer.weight.numel()
|
318 |
+
conv_w = torch.from_numpy(weights[ptr : ptr + num_w]).view_as(conv_layer.weight)
|
319 |
+
conv_layer.weight.data.copy_(conv_w)
|
320 |
+
ptr += num_w
|
321 |
+
|
322 |
+
"""
|
323 |
+
@:param path - path of the new weights file
|
324 |
+
@:param cutoff - save layers between 0 and cutoff (cutoff = -1 -> all are saved)
|
325 |
+
"""
|
326 |
+
|
327 |
+
def save_weights(self, path, cutoff=-1):
|
328 |
+
|
329 |
+
fp = open(path, "wb")
|
330 |
+
self.header_info[3] = self.seen
|
331 |
+
self.header_info.tofile(fp)
|
332 |
+
|
333 |
+
# Iterate through layers
|
334 |
+
for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
|
335 |
+
if module_def["type"] == "convolutional":
|
336 |
+
conv_layer = module[0]
|
337 |
+
# If batch norm, load bn first
|
338 |
+
if module_def["batch_normalize"]:
|
339 |
+
bn_layer = module[1]
|
340 |
+
bn_layer.bias.data.cpu().numpy().tofile(fp)
|
341 |
+
bn_layer.weight.data.cpu().numpy().tofile(fp)
|
342 |
+
bn_layer.running_mean.data.cpu().numpy().tofile(fp)
|
343 |
+
bn_layer.running_var.data.cpu().numpy().tofile(fp)
|
344 |
+
# Load conv bias
|
345 |
+
else:
|
346 |
+
conv_layer.bias.data.cpu().numpy().tofile(fp)
|
347 |
+
# Load conv weights
|
348 |
+
conv_layer.weight.data.cpu().numpy().tofile(fp)
|
349 |
+
|
350 |
+
fp.close()
|
object_tracker.py
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from models import *
|
2 |
+
from utils import *
|
3 |
+
|
4 |
+
import os, sys, time, datetime, random
|
5 |
+
import torch
|
6 |
+
from torch.utils.data import DataLoader
|
7 |
+
from torchvision import datasets, transforms
|
8 |
+
from torch.autograd import Variable
|
9 |
+
|
10 |
+
from PIL import Image
|
11 |
+
|
12 |
+
# load weights and set defaults
|
13 |
+
config_path='config/yolov3.cfg'
|
14 |
+
weights_path='config/yolov3.weights'
|
15 |
+
class_path='config/coco.names'
|
16 |
+
img_size=416
|
17 |
+
conf_thres=0.8
|
18 |
+
nms_thres=0.4
|
19 |
+
|
20 |
+
# load model and put into eval mode
|
21 |
+
model = Darknet(config_path, img_size=img_size)
|
22 |
+
model.load_weights(weights_path)
|
23 |
+
model.cuda()
|
24 |
+
model.eval()
|
25 |
+
|
26 |
+
classes = utils.load_classes(class_path)
|
27 |
+
Tensor = torch.cuda.FloatTensor
|
28 |
+
|
29 |
+
def detect_image(img):
|
30 |
+
# scale and pad image
|
31 |
+
ratio = min(img_size/img.size[0], img_size/img.size[1])
|
32 |
+
imw = round(img.size[0] * ratio)
|
33 |
+
imh = round(img.size[1] * ratio)
|
34 |
+
img_transforms = transforms.Compose([ transforms.Resize((imh, imw)),
|
35 |
+
transforms.Pad((max(int((imh-imw)/2),0), max(int((imw-imh)/2),0), max(int((imh-imw)/2),0), max(int((imw-imh)/2),0)),
|
36 |
+
(128,128,128)),
|
37 |
+
transforms.ToTensor(),
|
38 |
+
])
|
39 |
+
# convert image to Tensor
|
40 |
+
image_tensor = img_transforms(img).float()
|
41 |
+
image_tensor = image_tensor.unsqueeze_(0)
|
42 |
+
input_img = Variable(image_tensor.type(Tensor))
|
43 |
+
# run inference on the model and get detections
|
44 |
+
with torch.no_grad():
|
45 |
+
detections = model(input_img)
|
46 |
+
detections = utils.non_max_suppression(detections, 80, conf_thres, nms_thres)
|
47 |
+
return detections[0]
|
48 |
+
|
49 |
+
videopath = './videos/HorseRacing.mp4'
|
50 |
+
|
51 |
+
import cv2
|
52 |
+
from sort import *
|
53 |
+
colors=[(255,0,0),(0,255,0),(0,0,255),(255,0,255),(128,0,0),(0,128,0),(0,0,128),(128,0,128),(128,128,0),(0,128,128)]
|
54 |
+
|
55 |
+
vid = cv2.VideoCapture(videopath)
|
56 |
+
mot_tracker = Sort()
|
57 |
+
|
58 |
+
cv2.namedWindow('Stream',cv2.WINDOW_NORMAL)
|
59 |
+
cv2.resizeWindow('Stream', (800,600))
|
60 |
+
|
61 |
+
fourcc = cv2.VideoWriter_fourcc(*'XVID')
|
62 |
+
ret,frame=vid.read()
|
63 |
+
vw = frame.shape[1]
|
64 |
+
vh = frame.shape[0]
|
65 |
+
print ("Video size", vw,vh)
|
66 |
+
outvideo = cv2.VideoWriter(videopath.replace(".mp4", "-det.mp4"),fourcc,20.0,(vw,vh))
|
67 |
+
|
68 |
+
frames = 0
|
69 |
+
starttime = time.time()
|
70 |
+
while(True):
|
71 |
+
ret, frame = vid.read()
|
72 |
+
if not ret:
|
73 |
+
break
|
74 |
+
frames += 1
|
75 |
+
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
76 |
+
pilimg = Image.fromarray(frame)
|
77 |
+
detections = detect_image(pilimg)
|
78 |
+
|
79 |
+
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
|
80 |
+
img = np.array(pilimg)
|
81 |
+
pad_x = max(img.shape[0] - img.shape[1], 0) * (img_size / max(img.shape))
|
82 |
+
pad_y = max(img.shape[1] - img.shape[0], 0) * (img_size / max(img.shape))
|
83 |
+
unpad_h = img_size - pad_y
|
84 |
+
unpad_w = img_size - pad_x
|
85 |
+
if detections is not None:
|
86 |
+
tracked_objects = mot_tracker.update(detections.cpu())
|
87 |
+
|
88 |
+
unique_labels = detections[:, -1].cpu().unique()
|
89 |
+
n_cls_preds = len(unique_labels)
|
90 |
+
for x1, y1, x2, y2, obj_id, cls_pred in tracked_objects:
|
91 |
+
box_h = int(((y2 - y1) / unpad_h) * img.shape[0])
|
92 |
+
box_w = int(((x2 - x1) / unpad_w) * img.shape[1])
|
93 |
+
y1 = int(((y1 - pad_y // 2) / unpad_h) * img.shape[0])
|
94 |
+
x1 = int(((x1 - pad_x // 2) / unpad_w) * img.shape[1])
|
95 |
+
color = colors[int(obj_id) % len(colors)]
|
96 |
+
cls = classes[int(cls_pred)]
|
97 |
+
cv2.rectangle(frame, (x1, y1), (x1+box_w, y1+box_h), color, 4)
|
98 |
+
cv2.rectangle(frame, (x1, y1-35), (x1+len(cls)*19+80, y1), color, -1)
|
99 |
+
cv2.putText(frame, cls + "-" + str(int(obj_id)), (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 3)
|
100 |
+
|
101 |
+
cv2.imshow('Stream', frame)
|
102 |
+
outvideo.write(frame)
|
103 |
+
ch = 0xFF & cv2.waitKey(1)
|
104 |
+
if ch == 27:
|
105 |
+
break
|
106 |
+
|
107 |
+
totaltime = time.time()-starttime
|
108 |
+
print(frames, "frames", totaltime/frames, "s/frame")
|
109 |
+
cv2.destroyAllWindows()
|
110 |
+
outvideo.release()
|
sort.py
ADDED
@@ -0,0 +1,305 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
SORT: A Simple, Online and Realtime Tracker
|
3 |
+
Copyright (C) 2016 Alex Bewley alex@dynamicdetection.com
|
4 |
+
|
5 |
+
This program is free software: you can redistribute it and/or modify
|
6 |
+
it under the terms of the GNU General Public License as published by
|
7 |
+
the Free Software Foundation, either version 3 of the License, or
|
8 |
+
(at your option) any later version.
|
9 |
+
|
10 |
+
This program is distributed in the hope that it will be useful,
|
11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13 |
+
GNU General Public License for more details.
|
14 |
+
|
15 |
+
You should have received a copy of the GNU General Public License
|
16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17 |
+
"""
|
18 |
+
from __future__ import print_function
|
19 |
+
|
20 |
+
from numba import jit
|
21 |
+
import os.path
|
22 |
+
import numpy as np
|
23 |
+
##import matplotlib.pyplot as plt
|
24 |
+
##import matplotlib.patches as patches
|
25 |
+
from skimage import io
|
26 |
+
# from sklearn.utils.linear_assignment_ import linear_assignment
|
27 |
+
import glob
|
28 |
+
import time
|
29 |
+
import argparse
|
30 |
+
from filterpy.kalman import KalmanFilter
|
31 |
+
|
32 |
+
from scipy.optimize import linear_sum_assignment
|
33 |
+
def linear_assignment(x):
|
34 |
+
indices = linear_sum_assignment(x)
|
35 |
+
indices = np.asarray(indices)
|
36 |
+
return np.transpose(indices)
|
37 |
+
|
38 |
+
@jit
|
39 |
+
def iou(bb_test,bb_gt):
|
40 |
+
"""
|
41 |
+
Computes IUO between two bboxes in the form [x1,y1,x2,y2]
|
42 |
+
"""
|
43 |
+
xx1 = np.maximum(bb_test[0], bb_gt[0])
|
44 |
+
yy1 = np.maximum(bb_test[1], bb_gt[1])
|
45 |
+
xx2 = np.minimum(bb_test[2], bb_gt[2])
|
46 |
+
yy2 = np.minimum(bb_test[3], bb_gt[3])
|
47 |
+
w = np.maximum(0., xx2 - xx1)
|
48 |
+
h = np.maximum(0., yy2 - yy1)
|
49 |
+
wh = w * h
|
50 |
+
o = wh / ((bb_test[2]-bb_test[0])*(bb_test[3]-bb_test[1])
|
51 |
+
+ (bb_gt[2]-bb_gt[0])*(bb_gt[3]-bb_gt[1]) - wh)
|
52 |
+
return(o)
|
53 |
+
|
54 |
+
def convert_bbox_to_z(bbox):
|
55 |
+
"""
|
56 |
+
Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form
|
57 |
+
[x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is
|
58 |
+
the aspect ratio
|
59 |
+
"""
|
60 |
+
w = bbox[2]-bbox[0]
|
61 |
+
h = bbox[3]-bbox[1]
|
62 |
+
x = bbox[0]+w/2.
|
63 |
+
y = bbox[1]+h/2.
|
64 |
+
s = w*h #scale is just area
|
65 |
+
r = w/float(h)
|
66 |
+
return np.array([x,y,s,r]).reshape((4,1))
|
67 |
+
|
68 |
+
def convert_x_to_bbox(x,score=None):
|
69 |
+
"""
|
70 |
+
Takes a bounding box in the centre form [x,y,s,r] and returns it in the form
|
71 |
+
[x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right
|
72 |
+
"""
|
73 |
+
w = np.sqrt(x[2]*x[3])
|
74 |
+
h = x[2]/w
|
75 |
+
if(score==None):
|
76 |
+
return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.]).reshape((1,4))
|
77 |
+
else:
|
78 |
+
return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.,score]).reshape((1,5))
|
79 |
+
|
80 |
+
|
81 |
+
class KalmanBoxTracker(object):
|
82 |
+
"""
|
83 |
+
This class represents the internel state of individual tracked objects observed as bbox.
|
84 |
+
"""
|
85 |
+
count = 0
|
86 |
+
def __init__(self,bbox):
|
87 |
+
"""
|
88 |
+
Initialises a tracker using initial bounding box.
|
89 |
+
"""
|
90 |
+
#define constant velocity model
|
91 |
+
self.kf = KalmanFilter(dim_x=7, dim_z=4)
|
92 |
+
self.kf.F = np.array([[1,0,0,0,1,0,0],[0,1,0,0,0,1,0],[0,0,1,0,0,0,1],[0,0,0,1,0,0,0], [0,0,0,0,1,0,0],[0,0,0,0,0,1,0],[0,0,0,0,0,0,1]])
|
93 |
+
self.kf.H = np.array([[1,0,0,0,0,0,0],[0,1,0,0,0,0,0],[0,0,1,0,0,0,0],[0,0,0,1,0,0,0]])
|
94 |
+
|
95 |
+
self.kf.R[2:,2:] *= 10.
|
96 |
+
self.kf.P[4:,4:] *= 1000. #give high uncertainty to the unobservable initial velocities
|
97 |
+
self.kf.P *= 10.
|
98 |
+
self.kf.Q[-1,-1] *= 0.01
|
99 |
+
self.kf.Q[4:,4:] *= 0.01
|
100 |
+
|
101 |
+
self.kf.x[:4] = convert_bbox_to_z(bbox)
|
102 |
+
self.time_since_update = 0
|
103 |
+
self.id = KalmanBoxTracker.count
|
104 |
+
KalmanBoxTracker.count += 1
|
105 |
+
self.history = []
|
106 |
+
self.hits = 0
|
107 |
+
self.hit_streak = 0
|
108 |
+
self.age = 0
|
109 |
+
self.objclass = bbox[6]
|
110 |
+
|
111 |
+
def update(self,bbox):
|
112 |
+
"""
|
113 |
+
Updates the state vector with observed bbox.
|
114 |
+
"""
|
115 |
+
self.time_since_update = 0
|
116 |
+
self.history = []
|
117 |
+
self.hits += 1
|
118 |
+
self.hit_streak += 1
|
119 |
+
self.kf.update(convert_bbox_to_z(bbox))
|
120 |
+
|
121 |
+
def predict(self):
|
122 |
+
"""
|
123 |
+
Advances the state vector and returns the predicted bounding box estimate.
|
124 |
+
"""
|
125 |
+
if((self.kf.x[6]+self.kf.x[2])<=0):
|
126 |
+
self.kf.x[6] *= 0.0
|
127 |
+
self.kf.predict()
|
128 |
+
self.age += 1
|
129 |
+
if(self.time_since_update>0):
|
130 |
+
self.hit_streak = 0
|
131 |
+
self.time_since_update += 1
|
132 |
+
self.history.append(convert_x_to_bbox(self.kf.x))
|
133 |
+
return self.history[-1]
|
134 |
+
|
135 |
+
def get_state(self):
|
136 |
+
"""
|
137 |
+
Returns the current bounding box estimate.
|
138 |
+
"""
|
139 |
+
return convert_x_to_bbox(self.kf.x)
|
140 |
+
|
141 |
+
def associate_detections_to_trackers(detections,trackers,iou_threshold = 0.3):
|
142 |
+
"""
|
143 |
+
Assigns detections to tracked object (both represented as bounding boxes)
|
144 |
+
|
145 |
+
Returns 3 lists of matches, unmatched_detections and unmatched_trackers
|
146 |
+
"""
|
147 |
+
if(len(trackers)==0):
|
148 |
+
return np.empty((0,2),dtype=int), np.arange(len(detections)), np.empty((0,5),dtype=int)
|
149 |
+
iou_matrix = np.zeros((len(detections),len(trackers)),dtype=np.float32)
|
150 |
+
|
151 |
+
for d,det in enumerate(detections):
|
152 |
+
for t,trk in enumerate(trackers):
|
153 |
+
iou_matrix[d,t] = iou(det,trk)
|
154 |
+
matched_indices = linear_assignment(-iou_matrix)
|
155 |
+
|
156 |
+
unmatched_detections = []
|
157 |
+
for d,det in enumerate(detections):
|
158 |
+
if(d not in matched_indices[:,0]):
|
159 |
+
unmatched_detections.append(d)
|
160 |
+
unmatched_trackers = []
|
161 |
+
for t,trk in enumerate(trackers):
|
162 |
+
if(t not in matched_indices[:,1]):
|
163 |
+
unmatched_trackers.append(t)
|
164 |
+
|
165 |
+
#filter out matched with low IOU
|
166 |
+
matches = []
|
167 |
+
for m in matched_indices:
|
168 |
+
if(iou_matrix[m[0],m[1]]<iou_threshold):
|
169 |
+
unmatched_detections.append(m[0])
|
170 |
+
unmatched_trackers.append(m[1])
|
171 |
+
else:
|
172 |
+
matches.append(m.reshape(1,2))
|
173 |
+
if(len(matches)==0):
|
174 |
+
matches = np.empty((0,2),dtype=int)
|
175 |
+
else:
|
176 |
+
matches = np.concatenate(matches,axis=0)
|
177 |
+
|
178 |
+
return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
|
179 |
+
|
180 |
+
|
181 |
+
|
182 |
+
class Sort(object):
|
183 |
+
def __init__(self,max_age=1,min_hits=3):
|
184 |
+
"""
|
185 |
+
Sets key parameters for SORT
|
186 |
+
"""
|
187 |
+
self.max_age = max_age
|
188 |
+
self.min_hits = min_hits
|
189 |
+
self.trackers = []
|
190 |
+
self.frame_count = 0
|
191 |
+
|
192 |
+
def update(self,dets):
|
193 |
+
"""
|
194 |
+
Params:
|
195 |
+
dets - a numpy array of detections in the format [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...]
|
196 |
+
Requires: this method must be called once for each frame even with empty detections.
|
197 |
+
Returns the a similar array, where the last column is the object ID.
|
198 |
+
|
199 |
+
NOTE: The number of objects returned may differ from the number of detections provided.
|
200 |
+
"""
|
201 |
+
self.frame_count += 1
|
202 |
+
#get predicted locations from existing trackers.
|
203 |
+
trks = np.zeros((len(self.trackers),5))
|
204 |
+
to_del = []
|
205 |
+
ret = []
|
206 |
+
for t,trk in enumerate(trks):
|
207 |
+
pos = self.trackers[t].predict()[0]
|
208 |
+
trk[:] = [pos[0], pos[1], pos[2], pos[3], 0]
|
209 |
+
if(np.any(np.isnan(pos))):
|
210 |
+
to_del.append(t)
|
211 |
+
trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
|
212 |
+
for t in reversed(to_del):
|
213 |
+
self.trackers.pop(t)
|
214 |
+
matched, unmatched_dets, unmatched_trks = associate_detections_to_trackers(dets,trks)
|
215 |
+
|
216 |
+
#update matched trackers with assigned detections
|
217 |
+
for t,trk in enumerate(self.trackers):
|
218 |
+
if(t not in unmatched_trks):
|
219 |
+
d = matched[np.where(matched[:,1]==t)[0],0]
|
220 |
+
trk.update(dets[d,:][0])
|
221 |
+
|
222 |
+
#create and initialise new trackers for unmatched detections
|
223 |
+
for i in unmatched_dets:
|
224 |
+
trk = KalmanBoxTracker(dets[i,:])
|
225 |
+
self.trackers.append(trk)
|
226 |
+
i = len(self.trackers)
|
227 |
+
for trk in reversed(self.trackers):
|
228 |
+
d = trk.get_state()[0]
|
229 |
+
if((trk.time_since_update < 1) and (trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits)):
|
230 |
+
ret.append(np.concatenate((d,[trk.id+1], [trk.objclass])).reshape(1,-1)) # +1 as MOT benchmark requires positive
|
231 |
+
i -= 1
|
232 |
+
#remove dead tracklet
|
233 |
+
if(trk.time_since_update > self.max_age):
|
234 |
+
self.trackers.pop(i)
|
235 |
+
if(len(ret)>0):
|
236 |
+
return np.concatenate(ret)
|
237 |
+
return np.empty((0,5))
|
238 |
+
|
239 |
+
def parse_args():
|
240 |
+
"""Parse input arguments."""
|
241 |
+
parser = argparse.ArgumentParser(description='SORT demo')
|
242 |
+
parser.add_argument('--display', dest='display', help='Display online tracker output (slow) [False]',action='store_true')
|
243 |
+
args = parser.parse_args()
|
244 |
+
return args
|
245 |
+
|
246 |
+
if __name__ == '__main__':
|
247 |
+
# all train
|
248 |
+
sequences = ['PETS09-S2L1','TUD-Campus','TUD-Stadtmitte','ETH-Bahnhof','ETH-Sunnyday','ETH-Pedcross2','KITTI-13','KITTI-17','ADL-Rundle-6','ADL-Rundle-8','Venice-2']
|
249 |
+
args = parse_args()
|
250 |
+
display = args.display
|
251 |
+
phase = 'train'
|
252 |
+
total_time = 0.0
|
253 |
+
total_frames = 0
|
254 |
+
colours = np.random.rand(32,3) #used only for display
|
255 |
+
if(display):
|
256 |
+
if not os.path.exists('mot_benchmark'):
|
257 |
+
print('\n\tERROR: mot_benchmark link not found!\n\n Create a symbolic link to the MOT benchmark\n (https://motchallenge.net/data/2D_MOT_2015/#download). E.g.:\n\n $ ln -s /path/to/MOT2015_challenge/2DMOT2015 mot_benchmark\n\n')
|
258 |
+
exit()
|
259 |
+
plt.ion()
|
260 |
+
fig = plt.figure()
|
261 |
+
|
262 |
+
if not os.path.exists('output'):
|
263 |
+
os.makedirs('output')
|
264 |
+
|
265 |
+
for seq in sequences:
|
266 |
+
mot_tracker = Sort() #create instance of the SORT tracker
|
267 |
+
seq_dets = np.loadtxt('data/%s/det.txt'%(seq),delimiter=',') #load detections
|
268 |
+
with open('output/%s.txt'%(seq),'w') as out_file:
|
269 |
+
print("Processing %s."%(seq))
|
270 |
+
for frame in range(int(seq_dets[:,0].max())):
|
271 |
+
frame += 1 #detection and frame numbers begin at 1
|
272 |
+
dets = seq_dets[seq_dets[:,0]==frame,2:7]
|
273 |
+
dets[:,2:4] += dets[:,0:2] #convert to [x1,y1,w,h] to [x1,y1,x2,y2]
|
274 |
+
total_frames += 1
|
275 |
+
|
276 |
+
if(display):
|
277 |
+
ax1 = fig.add_subplot(111, aspect='equal')
|
278 |
+
fn = 'mot_benchmark/%s/%s/img1/%06d.jpg'%(phase,seq,frame)
|
279 |
+
im =io.imread(fn)
|
280 |
+
ax1.imshow(im)
|
281 |
+
plt.title(seq+' Tracked Targets')
|
282 |
+
|
283 |
+
start_time = time.time()
|
284 |
+
trackers = mot_tracker.update(dets)
|
285 |
+
cycle_time = time.time() - start_time
|
286 |
+
total_time += cycle_time
|
287 |
+
|
288 |
+
for d in trackers:
|
289 |
+
print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1'%(frame,d[4],d[0],d[1],d[2]-d[0],d[3]-d[1]),file=out_file)
|
290 |
+
if(display):
|
291 |
+
d = d.astype(np.int32)
|
292 |
+
ax1.add_patch(patches.Rectangle((d[0],d[1]),d[2]-d[0],d[3]-d[1],fill=False,lw=3,ec=colours[d[4]%32,:]))
|
293 |
+
ax1.set_adjustable('box-forced')
|
294 |
+
|
295 |
+
if(display):
|
296 |
+
fig.canvas.flush_events()
|
297 |
+
plt.draw()
|
298 |
+
ax1.cla()
|
299 |
+
|
300 |
+
print("Total Tracking took: %.3f for %d frames or %.1f FPS"%(total_time,total_frames,total_frames/total_time))
|
301 |
+
if(display):
|
302 |
+
print("Note: to get real runtime results run without the option: --display")
|
303 |
+
|
304 |
+
|
305 |
+
|
sort.py.old
ADDED
@@ -0,0 +1,317 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
SORT: A Simple, Online and Realtime Tracker
|
3 |
+
Copyright (C) 2016 Alex Bewley alex@dynamicdetection.com
|
4 |
+
|
5 |
+
This program is free software: you can redistribute it and/or modify
|
6 |
+
it under the terms of the GNU General Public License as published by
|
7 |
+
the Free Software Foundation, either version 3 of the License, or
|
8 |
+
(at your option) any later version.
|
9 |
+
|
10 |
+
This program is distributed in the hope that it will be useful,
|
11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13 |
+
GNU General Public License for more details.
|
14 |
+
|
15 |
+
You should have received a copy of the GNU General Public License
|
16 |
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17 |
+
"""
|
18 |
+
from __future__ import print_function
|
19 |
+
|
20 |
+
from numba import jit
|
21 |
+
import os.path
|
22 |
+
import numpy as np
|
23 |
+
##import matplotlib.pyplot as plt
|
24 |
+
##import matplotlib.patches as patches
|
25 |
+
from skimage import io
|
26 |
+
# from scipy.optimize import linear_sum_assignment as linear_assignment
|
27 |
+
import glob
|
28 |
+
import time
|
29 |
+
import argparse
|
30 |
+
from filterpy.kalman import KalmanFilter
|
31 |
+
|
32 |
+
# -------- Fixes the Warning ----------------------
|
33 |
+
# def linear_assignment(cost_matrix):
|
34 |
+
# try:
|
35 |
+
# import lap
|
36 |
+
# _, x, y = lap.lapjv(cost_matrix, extend_cost=True)
|
37 |
+
# return np.array([[y[i], i] for i in x if i >= 0])
|
38 |
+
# except ImportError:
|
39 |
+
# from scipy.optimize import linear_sum_assignment
|
40 |
+
# x, y = linear_sum_assignment(cost_matrix)
|
41 |
+
# return np.array(list(zip(x, y)))
|
42 |
+
|
43 |
+
# --------------- Fixes the Error
|
44 |
+
from scipy.optimize import linear_sum_assignment
|
45 |
+
def linear_assignment(x):
|
46 |
+
indices = linear_sum_assignment(x)
|
47 |
+
indices = np.asarray(indices)
|
48 |
+
return np.transpose(indices)
|
49 |
+
|
50 |
+
@jit
|
51 |
+
def iou(bb_test,bb_gt):
|
52 |
+
"""
|
53 |
+
Computes IUO between two bboxes in the form [x1,y1,x2,y2]
|
54 |
+
"""
|
55 |
+
xx1 = np.maximum(bb_test[0], bb_gt[0])
|
56 |
+
yy1 = np.maximum(bb_test[1], bb_gt[1])
|
57 |
+
xx2 = np.minimum(bb_test[2], bb_gt[2])
|
58 |
+
yy2 = np.minimum(bb_test[3], bb_gt[3])
|
59 |
+
w = np.maximum(0., xx2 - xx1)
|
60 |
+
h = np.maximum(0., yy2 - yy1)
|
61 |
+
wh = w * h
|
62 |
+
o = wh / ((bb_test[2]-bb_test[0])*(bb_test[3]-bb_test[1])
|
63 |
+
+ (bb_gt[2]-bb_gt[0])*(bb_gt[3]-bb_gt[1]) - wh)
|
64 |
+
return(o)
|
65 |
+
|
66 |
+
def convert_bbox_to_z(bbox):
|
67 |
+
"""
|
68 |
+
Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form
|
69 |
+
[x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is
|
70 |
+
the aspect ratio
|
71 |
+
"""
|
72 |
+
w = bbox[2]-bbox[0]
|
73 |
+
h = bbox[3]-bbox[1]
|
74 |
+
x = bbox[0]+w/2.
|
75 |
+
y = bbox[1]+h/2.
|
76 |
+
s = w*h #scale is just area
|
77 |
+
r = w/float(h)
|
78 |
+
return np.array([x,y,s,r]).reshape((4,1))
|
79 |
+
|
80 |
+
def convert_x_to_bbox(x,score=None):
|
81 |
+
"""
|
82 |
+
Takes a bounding box in the centre form [x,y,s,r] and returns it in the form
|
83 |
+
[x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right
|
84 |
+
"""
|
85 |
+
w = np.sqrt(x[2]*x[3])
|
86 |
+
h = x[2]/w
|
87 |
+
if(score==None):
|
88 |
+
return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.]).reshape((1,4))
|
89 |
+
else:
|
90 |
+
return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.,score]).reshape((1,5))
|
91 |
+
|
92 |
+
|
93 |
+
class KalmanBoxTracker(object):
|
94 |
+
"""
|
95 |
+
This class represents the internel state of individual tracked objects observed as bbox.
|
96 |
+
"""
|
97 |
+
count = 0
|
98 |
+
def __init__(self,bbox):
|
99 |
+
"""
|
100 |
+
Initialises a tracker using initial bounding box.
|
101 |
+
"""
|
102 |
+
#define constant velocity model
|
103 |
+
self.kf = KalmanFilter(dim_x=7, dim_z=4)
|
104 |
+
self.kf.F = np.array([[1,0,0,0,1,0,0],[0,1,0,0,0,1,0],[0,0,1,0,0,0,1],[0,0,0,1,0,0,0], [0,0,0,0,1,0,0],[0,0,0,0,0,1,0],[0,0,0,0,0,0,1]])
|
105 |
+
self.kf.H = np.array([[1,0,0,0,0,0,0],[0,1,0,0,0,0,0],[0,0,1,0,0,0,0],[0,0,0,1,0,0,0]])
|
106 |
+
|
107 |
+
self.kf.R[2:,2:] *= 10.
|
108 |
+
self.kf.P[4:,4:] *= 1000. #give high uncertainty to the unobservable initial velocities
|
109 |
+
self.kf.P *= 10.
|
110 |
+
self.kf.Q[-1,-1] *= 0.01
|
111 |
+
self.kf.Q[4:,4:] *= 0.01
|
112 |
+
|
113 |
+
self.kf.x[:4] = convert_bbox_to_z(bbox)
|
114 |
+
self.time_since_update = 0
|
115 |
+
self.id = KalmanBoxTracker.count
|
116 |
+
KalmanBoxTracker.count += 1
|
117 |
+
self.history = []
|
118 |
+
self.hits = 0
|
119 |
+
self.hit_streak = 0
|
120 |
+
self.age = 0
|
121 |
+
self.objclass = bbox[6]
|
122 |
+
|
123 |
+
def update(self,bbox):
|
124 |
+
"""
|
125 |
+
Updates the state vector with observed bbox.
|
126 |
+
"""
|
127 |
+
self.time_since_update = 0
|
128 |
+
self.history = []
|
129 |
+
self.hits += 1
|
130 |
+
self.hit_streak += 1
|
131 |
+
self.kf.update(convert_bbox_to_z(bbox))
|
132 |
+
|
133 |
+
def predict(self):
|
134 |
+
"""
|
135 |
+
Advances the state vector and returns the predicted bounding box estimate.
|
136 |
+
"""
|
137 |
+
if((self.kf.x[6]+self.kf.x[2])<=0):
|
138 |
+
self.kf.x[6] *= 0.0
|
139 |
+
self.kf.predict()
|
140 |
+
self.age += 1
|
141 |
+
if(self.time_since_update>0):
|
142 |
+
self.hit_streak = 0
|
143 |
+
self.time_since_update += 1
|
144 |
+
self.history.append(convert_x_to_bbox(self.kf.x))
|
145 |
+
return self.history[-1]
|
146 |
+
|
147 |
+
def get_state(self):
|
148 |
+
"""
|
149 |
+
Returns the current bounding box estimate.
|
150 |
+
"""
|
151 |
+
return convert_x_to_bbox(self.kf.x)
|
152 |
+
|
153 |
+
def associate_detections_to_trackers(detections,trackers,iou_threshold = 0.3):
|
154 |
+
"""
|
155 |
+
Assigns detections to tracked object (both represented as bounding boxes)
|
156 |
+
|
157 |
+
Returns 3 lists of matches, unmatched_detections and unmatched_trackers
|
158 |
+
"""
|
159 |
+
if(len(trackers)==0):
|
160 |
+
return np.empty((0,2),dtype=int), np.arange(len(detections)), np.empty((0,5),dtype=int)
|
161 |
+
iou_matrix = np.zeros((len(detections),len(trackers)),dtype=np.float32)
|
162 |
+
|
163 |
+
for d,det in enumerate(detections):
|
164 |
+
for t,trk in enumerate(trackers):
|
165 |
+
iou_matrix[d,t] = iou(det,trk)
|
166 |
+
matched_indices = linear_assignment(-iou_matrix)
|
167 |
+
|
168 |
+
unmatched_detections = []
|
169 |
+
for d,det in enumerate(detections):
|
170 |
+
if(d not in matched_indices[:,0]):
|
171 |
+
unmatched_detections.append(d)
|
172 |
+
unmatched_trackers = []
|
173 |
+
for t,trk in enumerate(trackers):
|
174 |
+
if(t not in matched_indices[:,1]):
|
175 |
+
unmatched_trackers.append(t)
|
176 |
+
|
177 |
+
#filter out matched with low IOU
|
178 |
+
matches = []
|
179 |
+
for m in matched_indices:
|
180 |
+
if(iou_matrix[m[0],m[1]]<iou_threshold):
|
181 |
+
unmatched_detections.append(m[0])
|
182 |
+
unmatched_trackers.append(m[1])
|
183 |
+
else:
|
184 |
+
matches.append(m.reshape(1,2))
|
185 |
+
if(len(matches)==0):
|
186 |
+
matches = np.empty((0,2),dtype=int)
|
187 |
+
else:
|
188 |
+
matches = np.concatenate(matches,axis=0)
|
189 |
+
|
190 |
+
return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
|
191 |
+
|
192 |
+
|
193 |
+
|
194 |
+
class Sort(object):
|
195 |
+
def __init__(self,max_age=1,min_hits=3):
|
196 |
+
"""
|
197 |
+
Sets key parameters for SORT
|
198 |
+
"""
|
199 |
+
self.max_age = max_age
|
200 |
+
self.min_hits = min_hits
|
201 |
+
self.trackers = []
|
202 |
+
self.frame_count = 0
|
203 |
+
|
204 |
+
def update(self,dets):
|
205 |
+
"""
|
206 |
+
Params:
|
207 |
+
dets - a numpy array of detections in the format [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...]
|
208 |
+
Requires: this method must be called once for each frame even with empty detections.
|
209 |
+
Returns the a similar array, where the last column is the object ID.
|
210 |
+
|
211 |
+
NOTE: The number of objects returned may differ from the number of detections provided.
|
212 |
+
"""
|
213 |
+
self.frame_count += 1
|
214 |
+
#get predicted locations from existing trackers.
|
215 |
+
trks = np.zeros((len(self.trackers),5))
|
216 |
+
to_del = []
|
217 |
+
ret = []
|
218 |
+
for t,trk in enumerate(trks):
|
219 |
+
pos = self.trackers[t].predict()[0]
|
220 |
+
trk[:] = [pos[0], pos[1], pos[2], pos[3], 0]
|
221 |
+
if(np.any(np.isnan(pos))):
|
222 |
+
to_del.append(t)
|
223 |
+
trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
|
224 |
+
for t in reversed(to_del):
|
225 |
+
self.trackers.pop(t)
|
226 |
+
matched, unmatched_dets, unmatched_trks = associate_detections_to_trackers(dets,trks)
|
227 |
+
|
228 |
+
#update matched trackers with assigned detections
|
229 |
+
for t,trk in enumerate(self.trackers):
|
230 |
+
if(t not in unmatched_trks):
|
231 |
+
d = matched[np.where(matched[:,1]==t)[0],0]
|
232 |
+
trk.update(dets[d,:][0])
|
233 |
+
|
234 |
+
#create and initialise new trackers for unmatched detections
|
235 |
+
for i in unmatched_dets:
|
236 |
+
trk = KalmanBoxTracker(dets[i,:])
|
237 |
+
self.trackers.append(trk)
|
238 |
+
i = len(self.trackers)
|
239 |
+
for trk in reversed(self.trackers):
|
240 |
+
d = trk.get_state()[0]
|
241 |
+
if((trk.time_since_update < 1) and (trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits)):
|
242 |
+
ret.append(np.concatenate((d,[trk.id+1], [trk.objclass])).reshape(1,-1)) # +1 as MOT benchmark requires positive
|
243 |
+
i -= 1
|
244 |
+
#remove dead tracklet
|
245 |
+
if(trk.time_since_update > self.max_age):
|
246 |
+
self.trackers.pop(i)
|
247 |
+
if(len(ret)>0):
|
248 |
+
return np.concatenate(ret)
|
249 |
+
return np.empty((0,5))
|
250 |
+
|
251 |
+
def parse_args():
|
252 |
+
"""Parse input arguments."""
|
253 |
+
parser = argparse.ArgumentParser(description='SORT demo')
|
254 |
+
parser.add_argument('--display', dest='display', help='Display online tracker output (slow) [False]',action='store_true')
|
255 |
+
args = parser.parse_args()
|
256 |
+
return args
|
257 |
+
|
258 |
+
if __name__ == '__main__':
|
259 |
+
# all train
|
260 |
+
sequences = ['PETS09-S2L1','TUD-Campus','TUD-Stadtmitte','ETH-Bahnhof','ETH-Sunnyday','ETH-Pedcross2','KITTI-13','KITTI-17','ADL-Rundle-6','ADL-Rundle-8','Venice-2']
|
261 |
+
args = parse_args()
|
262 |
+
display = args.display
|
263 |
+
phase = 'train'
|
264 |
+
total_time = 0.0
|
265 |
+
total_frames = 0
|
266 |
+
colours = np.random.rand(32,3) #used only for display
|
267 |
+
if(display):
|
268 |
+
if not os.path.exists('mot_benchmark'):
|
269 |
+
print('\n\tERROR: mot_benchmark link not found!\n\n Create a symbolic link to the MOT benchmark\n (https://motchallenge.net/data/2D_MOT_2015/#download). E.g.:\n\n $ ln -s /path/to/MOT2015_challenge/2DMOT2015 mot_benchmark\n\n')
|
270 |
+
exit()
|
271 |
+
plt.ion()
|
272 |
+
fig = plt.figure()
|
273 |
+
|
274 |
+
if not os.path.exists('output'):
|
275 |
+
os.makedirs('output')
|
276 |
+
|
277 |
+
for seq in sequences:
|
278 |
+
mot_tracker = Sort() #create instance of the SORT tracker
|
279 |
+
seq_dets = np.loadtxt('data/%s/det.txt'%(seq),delimiter=',') #load detections
|
280 |
+
with open('output/%s.txt'%(seq),'w') as out_file:
|
281 |
+
print("Processing %s."%(seq))
|
282 |
+
for frame in range(int(seq_dets[:,0].max())):
|
283 |
+
frame += 1 #detection and frame numbers begin at 1
|
284 |
+
dets = seq_dets[seq_dets[:,0]==frame,2:7]
|
285 |
+
dets[:,2:4] += dets[:,0:2] #convert to [x1,y1,w,h] to [x1,y1,x2,y2]
|
286 |
+
total_frames += 1
|
287 |
+
|
288 |
+
if(display):
|
289 |
+
ax1 = fig.add_subplot(111, aspect='equal')
|
290 |
+
fn = 'mot_benchmark/%s/%s/img1/%06d.jpg'%(phase,seq,frame)
|
291 |
+
im =io.imread(fn)
|
292 |
+
ax1.imshow(im)
|
293 |
+
plt.title(seq+' Tracked Targets')
|
294 |
+
|
295 |
+
start_time = time.time()
|
296 |
+
trackers = mot_tracker.update(dets)
|
297 |
+
cycle_time = time.time() - start_time
|
298 |
+
total_time += cycle_time
|
299 |
+
|
300 |
+
for d in trackers:
|
301 |
+
print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1'%(frame,d[4],d[0],d[1],d[2]-d[0],d[3]-d[1]),file=out_file)
|
302 |
+
if(display):
|
303 |
+
d = d.astype(np.int32)
|
304 |
+
ax1.add_patch(patches.Rectangle((d[0],d[1]),d[2]-d[0],d[3]-d[1],fill=False,lw=3,ec=colours[d[4]%32,:]))
|
305 |
+
ax1.set_adjustable('box-forced')
|
306 |
+
|
307 |
+
if(display):
|
308 |
+
fig.canvas.flush_events()
|
309 |
+
plt.draw()
|
310 |
+
ax1.cla()
|
311 |
+
|
312 |
+
print("Total Tracking took: %.3f for %d frames or %.1f FPS"%(total_time,total_frames,total_frames/total_time))
|
313 |
+
if(display):
|
314 |
+
print("Note: to get real runtime results run without the option: --display")
|
315 |
+
|
316 |
+
|
317 |
+
|
utils/__pycache__/__init__.cpython-36.pyc
ADDED
Binary file (125 Bytes). View file
|
|
utils/__pycache__/datasets.cpython-36.pyc
ADDED
Binary file (3.65 kB). View file
|
|
utils/__pycache__/parse_config.cpython-36.pyc
ADDED
Binary file (1.38 kB). View file
|
|
utils/__pycache__/parse_config.cpython-37.pyc
ADDED
Binary file (1.43 kB). View file
|
|
utils/__pycache__/utils.cpython-36.pyc
ADDED
Binary file (7.05 kB). View file
|
|
utils/__pycache__/utils.cpython-37.pyc
ADDED
Binary file (7.1 kB). View file
|
|
utils/datasets.py
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import glob
|
2 |
+
import random
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
|
6 |
+
import torch
|
7 |
+
|
8 |
+
from torch.utils.data import Dataset
|
9 |
+
from PIL import Image
|
10 |
+
import torchvision.transforms as transforms
|
11 |
+
|
12 |
+
##import matplotlib.pyplot as plt
|
13 |
+
##import matplotlib.patches as patches
|
14 |
+
|
15 |
+
from skimage.transform import resize
|
16 |
+
|
17 |
+
import sys
|
18 |
+
|
19 |
+
class ImageFolder(Dataset):
|
20 |
+
def __init__(self, folder_path, img_size=416):
|
21 |
+
self.files = sorted(glob.glob('%s/*.*' % folder_path))
|
22 |
+
self.img_shape = (img_size, img_size)
|
23 |
+
|
24 |
+
def __getitem__(self, index):
|
25 |
+
img_path = self.files[index % len(self.files)]
|
26 |
+
# Extract image
|
27 |
+
img = np.array(Image.open(img_path))
|
28 |
+
h, w, _ = img.shape
|
29 |
+
dim_diff = np.abs(h - w)
|
30 |
+
# Upper (left) and lower (right) padding
|
31 |
+
pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
|
32 |
+
# Determine padding
|
33 |
+
pad = ((pad1, pad2), (0, 0), (0, 0)) if h <= w else ((0, 0), (pad1, pad2), (0, 0))
|
34 |
+
# Add padding
|
35 |
+
input_img = np.pad(img, pad, 'constant', constant_values=127.5) / 255.
|
36 |
+
# Resize and normalize
|
37 |
+
input_img = resize(input_img, (*self.img_shape, 3), mode='reflect')
|
38 |
+
# Channels-first
|
39 |
+
input_img = np.transpose(input_img, (2, 0, 1))
|
40 |
+
# As pytorch tensor
|
41 |
+
input_img = torch.from_numpy(input_img).float()
|
42 |
+
|
43 |
+
return img_path, input_img
|
44 |
+
|
45 |
+
def __len__(self):
|
46 |
+
return len(self.files)
|
47 |
+
|
48 |
+
|
49 |
+
class ListDataset(Dataset):
|
50 |
+
def __init__(self, list_path, img_size=416):
|
51 |
+
with open(list_path, 'r') as file:
|
52 |
+
self.img_files = file.readlines()
|
53 |
+
self.label_files = [path.replace('images', 'labels').replace('.png', '.txt').replace('.jpg', '.txt') for path in self.img_files]
|
54 |
+
self.img_shape = (img_size, img_size)
|
55 |
+
self.max_objects = 50
|
56 |
+
|
57 |
+
def __getitem__(self, index):
|
58 |
+
|
59 |
+
#---------
|
60 |
+
# Image
|
61 |
+
#---------
|
62 |
+
|
63 |
+
img_path = self.img_files[index % len(self.img_files)].rstrip()
|
64 |
+
img = np.array(Image.open(img_path))
|
65 |
+
|
66 |
+
# Handles images with less than three channels
|
67 |
+
while len(img.shape) != 3:
|
68 |
+
index += 1
|
69 |
+
img_path = self.img_files[index % len(self.img_files)].rstrip()
|
70 |
+
img = np.array(Image.open(img_path))
|
71 |
+
|
72 |
+
h, w, _ = img.shape
|
73 |
+
dim_diff = np.abs(h - w)
|
74 |
+
# Upper (left) and lower (right) padding
|
75 |
+
pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
|
76 |
+
# Determine padding
|
77 |
+
pad = ((pad1, pad2), (0, 0), (0, 0)) if h <= w else ((0, 0), (pad1, pad2), (0, 0))
|
78 |
+
# Add padding
|
79 |
+
input_img = np.pad(img, pad, 'constant', constant_values=128) / 255.
|
80 |
+
padded_h, padded_w, _ = input_img.shape
|
81 |
+
# Resize and normalize
|
82 |
+
input_img = resize(input_img, (*self.img_shape, 3), mode='reflect')
|
83 |
+
# Channels-first
|
84 |
+
input_img = np.transpose(input_img, (2, 0, 1))
|
85 |
+
# As pytorch tensor
|
86 |
+
input_img = torch.from_numpy(input_img).float()
|
87 |
+
|
88 |
+
#---------
|
89 |
+
# Label
|
90 |
+
#---------
|
91 |
+
|
92 |
+
label_path = self.label_files[index % len(self.img_files)].rstrip()
|
93 |
+
|
94 |
+
labels = None
|
95 |
+
if os.path.exists(label_path):
|
96 |
+
labels = np.loadtxt(label_path).reshape(-1, 5)
|
97 |
+
# Extract coordinates for unpadded + unscaled image
|
98 |
+
x1 = w * (labels[:, 1] - labels[:, 3]/2)
|
99 |
+
y1 = h * (labels[:, 2] - labels[:, 4]/2)
|
100 |
+
x2 = w * (labels[:, 1] + labels[:, 3]/2)
|
101 |
+
y2 = h * (labels[:, 2] + labels[:, 4]/2)
|
102 |
+
# Adjust for added padding
|
103 |
+
x1 += pad[1][0]
|
104 |
+
y1 += pad[0][0]
|
105 |
+
x2 += pad[1][0]
|
106 |
+
y2 += pad[0][0]
|
107 |
+
# Calculate ratios from coordinates
|
108 |
+
labels[:, 1] = ((x1 + x2) / 2) / padded_w
|
109 |
+
labels[:, 2] = ((y1 + y2) / 2) / padded_h
|
110 |
+
labels[:, 3] *= w / padded_w
|
111 |
+
labels[:, 4] *= h / padded_h
|
112 |
+
# Fill matrix
|
113 |
+
filled_labels = np.zeros((self.max_objects, 5))
|
114 |
+
if labels is not None:
|
115 |
+
filled_labels[range(len(labels))[:self.max_objects]] = labels[:self.max_objects]
|
116 |
+
filled_labels = torch.from_numpy(filled_labels)
|
117 |
+
|
118 |
+
return img_path, input_img, filled_labels
|
119 |
+
|
120 |
+
def __len__(self):
|
121 |
+
return len(self.img_files)
|
utils/parse_config.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
|
3 |
+
def parse_model_config(path):
|
4 |
+
"""Parses the yolo-v3 layer configuration file and returns module definitions"""
|
5 |
+
file = open(path, 'r')
|
6 |
+
lines = file.read().split('\n')
|
7 |
+
lines = [x for x in lines if x and not x.startswith('#')]
|
8 |
+
lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
|
9 |
+
module_defs = []
|
10 |
+
for line in lines:
|
11 |
+
if line.startswith('['): # This marks the start of a new block
|
12 |
+
module_defs.append({})
|
13 |
+
module_defs[-1]['type'] = line[1:-1].rstrip()
|
14 |
+
if module_defs[-1]['type'] == 'convolutional':
|
15 |
+
module_defs[-1]['batch_normalize'] = 0
|
16 |
+
else:
|
17 |
+
key, value = line.split("=")
|
18 |
+
value = value.strip()
|
19 |
+
module_defs[-1][key.rstrip()] = value.strip()
|
20 |
+
|
21 |
+
return module_defs
|
22 |
+
|
23 |
+
def parse_data_config(path):
|
24 |
+
"""Parses the data configuration file"""
|
25 |
+
options = dict()
|
26 |
+
options['gpus'] = '0,1,2,3'
|
27 |
+
options['num_workers'] = '10'
|
28 |
+
with open(path, 'r') as fp:
|
29 |
+
lines = fp.readlines()
|
30 |
+
for line in lines:
|
31 |
+
line = line.strip()
|
32 |
+
if line == '' or line.startswith('#'):
|
33 |
+
continue
|
34 |
+
key, value = line.split('=')
|
35 |
+
options[key.strip()] = value.strip()
|
36 |
+
return options
|
utils/utils.py
ADDED
@@ -0,0 +1,258 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import division
|
2 |
+
import math
|
3 |
+
import time
|
4 |
+
import torch
|
5 |
+
import torch.nn as nn
|
6 |
+
import torch.nn.functional as F
|
7 |
+
from torch.autograd import Variable
|
8 |
+
import numpy as np
|
9 |
+
|
10 |
+
#import matplotlib.pyplot as plt
|
11 |
+
#import matplotlib.patches as patches
|
12 |
+
|
13 |
+
|
14 |
+
def load_classes(path):
|
15 |
+
"""
|
16 |
+
Loads class labels at 'path'
|
17 |
+
"""
|
18 |
+
fp = open(path, "r")
|
19 |
+
names = fp.read().split("\n")[:-1]
|
20 |
+
return names
|
21 |
+
|
22 |
+
|
23 |
+
def weights_init_normal(m):
|
24 |
+
classname = m.__class__.__name__
|
25 |
+
if classname.find("Conv") != -1:
|
26 |
+
torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
|
27 |
+
elif classname.find("BatchNorm2d") != -1:
|
28 |
+
torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
|
29 |
+
torch.nn.init.constant_(m.bias.data, 0.0)
|
30 |
+
|
31 |
+
|
32 |
+
def compute_ap(recall, precision):
|
33 |
+
""" Compute the average precision, given the recall and precision curves.
|
34 |
+
Code originally from https://github.com/rbgirshick/py-faster-rcnn.
|
35 |
+
|
36 |
+
# Arguments
|
37 |
+
recall: The recall curve (list).
|
38 |
+
precision: The precision curve (list).
|
39 |
+
# Returns
|
40 |
+
The average precision as computed in py-faster-rcnn.
|
41 |
+
"""
|
42 |
+
# correct AP calculation
|
43 |
+
# first append sentinel values at the end
|
44 |
+
mrec = np.concatenate(([0.0], recall, [1.0]))
|
45 |
+
mpre = np.concatenate(([0.0], precision, [0.0]))
|
46 |
+
|
47 |
+
# compute the precision envelope
|
48 |
+
for i in range(mpre.size - 1, 0, -1):
|
49 |
+
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
|
50 |
+
|
51 |
+
# to calculate area under PR curve, look for points
|
52 |
+
# where X axis (recall) changes value
|
53 |
+
i = np.where(mrec[1:] != mrec[:-1])[0]
|
54 |
+
|
55 |
+
# and sum (\Delta recall) * prec
|
56 |
+
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
|
57 |
+
return ap
|
58 |
+
|
59 |
+
|
60 |
+
def bbox_iou(box1, box2, x1y1x2y2=True):
|
61 |
+
"""
|
62 |
+
Returns the IoU of two bounding boxes
|
63 |
+
"""
|
64 |
+
if not x1y1x2y2:
|
65 |
+
# Transform from center and width to exact coordinates
|
66 |
+
b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
|
67 |
+
b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
|
68 |
+
b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
|
69 |
+
b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
|
70 |
+
else:
|
71 |
+
# Get the coordinates of bounding boxes
|
72 |
+
b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
|
73 |
+
b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
|
74 |
+
|
75 |
+
# get the corrdinates of the intersection rectangle
|
76 |
+
inter_rect_x1 = torch.max(b1_x1, b2_x1)
|
77 |
+
inter_rect_y1 = torch.max(b1_y1, b2_y1)
|
78 |
+
inter_rect_x2 = torch.min(b1_x2, b2_x2)
|
79 |
+
inter_rect_y2 = torch.min(b1_y2, b2_y2)
|
80 |
+
# Intersection area
|
81 |
+
inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(
|
82 |
+
inter_rect_y2 - inter_rect_y1 + 1, min=0
|
83 |
+
)
|
84 |
+
# Union Area
|
85 |
+
b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
|
86 |
+
b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
|
87 |
+
|
88 |
+
iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
|
89 |
+
|
90 |
+
return iou
|
91 |
+
|
92 |
+
|
93 |
+
def bbox_iou_numpy(box1, box2):
|
94 |
+
"""Computes IoU between bounding boxes.
|
95 |
+
Parameters
|
96 |
+
----------
|
97 |
+
box1 : ndarray
|
98 |
+
(N, 4) shaped array with bboxes
|
99 |
+
box2 : ndarray
|
100 |
+
(M, 4) shaped array with bboxes
|
101 |
+
Returns
|
102 |
+
-------
|
103 |
+
: ndarray
|
104 |
+
(N, M) shaped array with IoUs
|
105 |
+
"""
|
106 |
+
area = (box2[:, 2] - box2[:, 0]) * (box2[:, 3] - box2[:, 1])
|
107 |
+
|
108 |
+
iw = np.minimum(np.expand_dims(box1[:, 2], axis=1), box2[:, 2]) - np.maximum(
|
109 |
+
np.expand_dims(box1[:, 0], 1), box2[:, 0]
|
110 |
+
)
|
111 |
+
ih = np.minimum(np.expand_dims(box1[:, 3], axis=1), box2[:, 3]) - np.maximum(
|
112 |
+
np.expand_dims(box1[:, 1], 1), box2[:, 1]
|
113 |
+
)
|
114 |
+
|
115 |
+
iw = np.maximum(iw, 0)
|
116 |
+
ih = np.maximum(ih, 0)
|
117 |
+
|
118 |
+
ua = np.expand_dims((box1[:, 2] - box1[:, 0]) * (box1[:, 3] - box1[:, 1]), axis=1) + area - iw * ih
|
119 |
+
|
120 |
+
ua = np.maximum(ua, np.finfo(float).eps)
|
121 |
+
|
122 |
+
intersection = iw * ih
|
123 |
+
|
124 |
+
return intersection / ua
|
125 |
+
|
126 |
+
|
127 |
+
def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
|
128 |
+
"""
|
129 |
+
Removes detections with lower object confidence score than 'conf_thres' and performs
|
130 |
+
Non-Maximum Suppression to further filter detections.
|
131 |
+
Returns detections with shape:
|
132 |
+
(x1, y1, x2, y2, object_conf, class_score, class_pred)
|
133 |
+
"""
|
134 |
+
|
135 |
+
# From (center x, center y, width, height) to (x1, y1, x2, y2)
|
136 |
+
box_corner = prediction.new(prediction.shape)
|
137 |
+
box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
|
138 |
+
box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
|
139 |
+
box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
|
140 |
+
box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
|
141 |
+
prediction[:, :, :4] = box_corner[:, :, :4]
|
142 |
+
|
143 |
+
output = [None for _ in range(len(prediction))]
|
144 |
+
for image_i, image_pred in enumerate(prediction):
|
145 |
+
# Filter out confidence scores below threshold
|
146 |
+
conf_mask = (image_pred[:, 4] >= conf_thres).squeeze()
|
147 |
+
image_pred = image_pred[conf_mask]
|
148 |
+
# If none are remaining => process next image
|
149 |
+
if not image_pred.size(0):
|
150 |
+
continue
|
151 |
+
# Get score and class with highest confidence
|
152 |
+
class_conf, class_pred = torch.max(image_pred[:, 5 : 5 + num_classes], 1, keepdim=True)
|
153 |
+
# Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred)
|
154 |
+
detections = torch.cat((image_pred[:, :5], class_conf.float(), class_pred.float()), 1)
|
155 |
+
# Iterate through all predicted classes
|
156 |
+
unique_labels = detections[:, -1].cpu().unique()
|
157 |
+
if prediction.is_cuda:
|
158 |
+
unique_labels = unique_labels.cuda()
|
159 |
+
for c in unique_labels:
|
160 |
+
# Get the detections with the particular class
|
161 |
+
detections_class = detections[detections[:, -1] == c]
|
162 |
+
# Sort the detections by maximum objectness confidence
|
163 |
+
_, conf_sort_index = torch.sort(detections_class[:, 4], descending=True)
|
164 |
+
detections_class = detections_class[conf_sort_index]
|
165 |
+
# Perform non-maximum suppression
|
166 |
+
max_detections = []
|
167 |
+
while detections_class.size(0):
|
168 |
+
# Get detection with highest confidence and save as max detection
|
169 |
+
max_detections.append(detections_class[0].unsqueeze(0))
|
170 |
+
# Stop if we're at the last detection
|
171 |
+
if len(detections_class) == 1:
|
172 |
+
break
|
173 |
+
# Get the IOUs for all boxes with lower confidence
|
174 |
+
ious = bbox_iou(max_detections[-1], detections_class[1:])
|
175 |
+
# Remove detections with IoU >= NMS threshold
|
176 |
+
detections_class = detections_class[1:][ious < nms_thres]
|
177 |
+
|
178 |
+
max_detections = torch.cat(max_detections).data
|
179 |
+
# Add max detections to outputs
|
180 |
+
output[image_i] = (
|
181 |
+
max_detections if output[image_i] is None else torch.cat((output[image_i], max_detections))
|
182 |
+
)
|
183 |
+
|
184 |
+
return output
|
185 |
+
|
186 |
+
|
187 |
+
def build_targets(
|
188 |
+
pred_boxes, pred_conf, pred_cls, target, anchors, num_anchors, num_classes, grid_size, ignore_thres, img_dim
|
189 |
+
):
|
190 |
+
nB = target.size(0)
|
191 |
+
nA = num_anchors
|
192 |
+
nC = num_classes
|
193 |
+
nG = grid_size
|
194 |
+
mask = torch.zeros(nB, nA, nG, nG)
|
195 |
+
conf_mask = torch.ones(nB, nA, nG, nG)
|
196 |
+
tx = torch.zeros(nB, nA, nG, nG)
|
197 |
+
ty = torch.zeros(nB, nA, nG, nG)
|
198 |
+
tw = torch.zeros(nB, nA, nG, nG)
|
199 |
+
th = torch.zeros(nB, nA, nG, nG)
|
200 |
+
tconf = torch.ByteTensor(nB, nA, nG, nG).fill_(0)
|
201 |
+
tcls = torch.ByteTensor(nB, nA, nG, nG, nC).fill_(0)
|
202 |
+
|
203 |
+
nGT = 0
|
204 |
+
nCorrect = 0
|
205 |
+
for b in range(nB):
|
206 |
+
for t in range(target.shape[1]):
|
207 |
+
if target[b, t].sum() == 0:
|
208 |
+
continue
|
209 |
+
nGT += 1
|
210 |
+
# Convert to position relative to box
|
211 |
+
gx = target[b, t, 1] * nG
|
212 |
+
gy = target[b, t, 2] * nG
|
213 |
+
gw = target[b, t, 3] * nG
|
214 |
+
gh = target[b, t, 4] * nG
|
215 |
+
# Get grid box indices
|
216 |
+
gi = int(gx)
|
217 |
+
gj = int(gy)
|
218 |
+
# Get shape of gt box
|
219 |
+
gt_box = torch.FloatTensor(np.array([0, 0, gw, gh])).unsqueeze(0)
|
220 |
+
# Get shape of anchor box
|
221 |
+
anchor_shapes = torch.FloatTensor(np.concatenate((np.zeros((len(anchors), 2)), np.array(anchors)), 1))
|
222 |
+
# Calculate iou between gt and anchor shapes
|
223 |
+
anch_ious = bbox_iou(gt_box, anchor_shapes)
|
224 |
+
# Where the overlap is larger than threshold set mask to zero (ignore)
|
225 |
+
conf_mask[b, anch_ious > ignore_thres, gj, gi] = 0
|
226 |
+
# Find the best matching anchor box
|
227 |
+
best_n = np.argmax(anch_ious)
|
228 |
+
# Get ground truth box
|
229 |
+
gt_box = torch.FloatTensor(np.array([gx, gy, gw, gh])).unsqueeze(0)
|
230 |
+
# Get the best prediction
|
231 |
+
pred_box = pred_boxes[b, best_n, gj, gi].unsqueeze(0)
|
232 |
+
# Masks
|
233 |
+
mask[b, best_n, gj, gi] = 1
|
234 |
+
conf_mask[b, best_n, gj, gi] = 1
|
235 |
+
# Coordinates
|
236 |
+
tx[b, best_n, gj, gi] = gx - gi
|
237 |
+
ty[b, best_n, gj, gi] = gy - gj
|
238 |
+
# Width and height
|
239 |
+
tw[b, best_n, gj, gi] = math.log(gw / anchors[best_n][0] + 1e-16)
|
240 |
+
th[b, best_n, gj, gi] = math.log(gh / anchors[best_n][1] + 1e-16)
|
241 |
+
# One-hot encoding of label
|
242 |
+
target_label = int(target[b, t, 0])
|
243 |
+
tcls[b, best_n, gj, gi, target_label] = 1
|
244 |
+
tconf[b, best_n, gj, gi] = 1
|
245 |
+
|
246 |
+
# Calculate iou between ground truth and best matching prediction
|
247 |
+
iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False)
|
248 |
+
pred_label = torch.argmax(pred_cls[b, best_n, gj, gi])
|
249 |
+
score = pred_conf[b, best_n, gj, gi]
|
250 |
+
if iou > 0.5 and pred_label == target_label and score > 0.5:
|
251 |
+
nCorrect += 1
|
252 |
+
|
253 |
+
return nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls
|
254 |
+
|
255 |
+
|
256 |
+
def to_categorical(y, num_classes):
|
257 |
+
""" 1-hot encodes a tensor """
|
258 |
+
return torch.from_numpy(np.eye(num_classes, dtype="uint8")[y])
|