{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import torchvision\n", "import pretrainedmodels\n", "import torch\n", "import pretrainedmodels.utils as utils\n", "import torchvision.transforms as transforms\n", "import torchvision.models as models\n", "import torch.nn as nn\n", "import torch.nn.functional as F\n", "from PIL import Image\n", "from collections import OrderedDict" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'imagenet': {'url': 'http://data.lip6.fr/cadene/pretrainedmodels/nasnetalarge-a1897284.pth', 'input_space': 'RGB', 'input_size': [3, 331, 331], 'input_range': [0, 1], 'mean': [0.5, 0.5, 0.5], 'std': [0.5, 0.5, 0.5], 'num_classes': 1000}, 'imagenet+background': {'url': 'http://data.lip6.fr/cadene/pretrainedmodels/nasnetalarge-a1897284.pth', 'input_space': 'RGB', 'input_size': [3, 331, 331], 'input_range': [0, 1], 'mean': [0.5, 0.5, 0.5], 'std': [0.5, 0.5, 0.5], 'num_classes': 1001}}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Downloading: \"http://data.lip6.fr/cadene/pretrainedmodels/nasnetalarge-a1897284.pth\" to /home/dattran/.torch/models/nasnetalarge-a1897284.pth\n", "100%|██████████| 356056626/356056626 [07:27<00:00, 795221.08it/s] \n" ] } ], "source": [ "print(pretrainedmodels.pretrained_settings['nasnetalarge'])\n", "model = pretrainedmodels.__dict__['nasnetalarge'](num_classes=1000, pretrained='imagenet')" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Variable containing:\n", "( 0 , 0 ,.,.) = \n", " 0.1121 0.0000 0.0000 ... 0.0000 0.0000 0.0000\n", " 0.4418 0.0000 0.0000 ... 0.0000 0.0000 0.0000\n", " 0.0000 0.0000 0.0000 ... 0.8554 0.0000 0.0000\n", " ... ⋱ ... \n", " 0.0000 0.5682 0.5166 ... 0.0301 0.0000 0.0000\n", " 0.0000 0.0921 0.2531 ... 2.0754 0.4212 0.0000\n", " 0.0000 0.0000 0.0000 ... 1.8003 0.5220 0.0000\n", "\n", "( 0 , 1 ,.,.) = \n", " 0.1536 0.1211 0.0000 ... 1.3220 0.1388 0.0000\n", " 0.4111 0.2736 0.1038 ... 0.7770 0.0000 0.0000\n", " 0.0000 0.0000 0.0000 ... 1.5591 0.0000 0.0000\n", " ... ⋱ ... \n", " 0.0000 0.0000 0.3120 ... 0.0000 0.0000 0.0000\n", " 1.2059 0.9648 0.7537 ... 0.0000 0.0000 0.0000\n", " 1.8497 0.5725 0.0000 ... 0.0000 0.0000 0.0000\n", "\n", "( 0 , 2 ,.,.) = \n", " 0.3311 0.0000 0.0000 ... 0.0000 0.0000 0.0000\n", " 0.0000 0.0000 0.0000 ... 0.0000 0.0000 0.0000\n", " 0.3876 0.2232 0.1740 ... 0.0000 0.0000 0.0000\n", " ... ⋱ ... \n", " 0.0000 0.0000 0.3968 ... 0.0000 0.2945 0.0000\n", " 0.4885 0.8537 1.2278 ... 0.2380 0.6205 1.0980\n", " 2.1136 1.3682 1.5039 ... 0.9723 0.9817 0.0760\n", " ... \n", "\n", "( 0 ,1533,.,.) = \n", " 0.8787 0.9274 0.5682 ... 0.4111 0.5084 0.5470\n", " 0.5436 0.6317 0.5634 ... 0.5417 0.3694 0.4478\n", " 0.0000 0.3396 0.5478 ... 0.8584 0.5552 0.5867\n", " ... ⋱ ... \n", " 0.0000 0.0000 0.0000 ... 0.0000 0.0000 0.0000\n", " 0.4349 0.2017 0.0000 ... 0.0000 0.0000 0.0000\n", " 0.3743 0.3398 0.0000 ... 0.0000 0.0000 0.0000\n", "\n", "( 0 ,1534,.,.) = \n", " 0.1190 0.0000 0.0000 ... 0.0000 0.0000 0.0000\n", " 0.0259 0.0000 0.0000 ... 0.3597 0.0000 0.0000\n", " 0.0000 0.0000 0.0000 ... 1.4928 0.0000 0.0000\n", " ... ⋱ ... \n", " 0.0000 0.0000 0.0000 ... 1.4441 1.2858 0.6525\n", " 0.0000 0.1889 0.5281 ... 0.7508 0.9813 0.5251\n", " 0.0000 0.9832 1.3777 ... 0.0639 0.2403 0.0000\n", "\n", "( 0 ,1535,.,.) = \n", " 0.0000 0.1068 0.3845 ... 0.0000 0.0000 0.0000\n", " 0.0000 0.2751 0.7059 ... 0.0000 0.0000 0.0000\n", " 0.0000 0.0711 0.4025 ... 1.2069 1.4548 1.1041\n", " ... ⋱ ... \n", " 0.0000 0.0000 0.0000 ... 0.7915 0.3439 0.1936\n", " 0.0000 0.0000 0.0000 ... 0.0000 0.0000 0.0000\n", " 0.0275 0.0000 0.0000 ... 0.0000 0.0000 0.0000\n", "[torch.FloatTensor of size 1x1536x8x8]" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "load_img = utils.LoadImage()\n", "# transformations depending on the model\n", "# rescale, center crop, normalize, and others (ex: ToBGR, ToRange255)\n", "tf_img = utils.TransformImage(model) \n", "\n", "path_img = '/home/dattran/data/xray/00000013_029.png'\n", "\n", "input_img = load_img(path_img)\n", "input_tensor = tf_img(input_img) # 3x400x225 -> 3x299x299 size may differ\n", "input_tensor = input_tensor.unsqueeze(0) # 3x299x299 -> 1x3x299x299\n", "input = torch.autograd.Variable(input_tensor,\n", " requires_grad=False)\n", "model.features(input)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\u001B[0;31mInit signature:\u001B[0m \u001B[0mtorch\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mnn\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mMaxPool2d\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mkernel_size\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mstride\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;32mNone\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mpadding\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;36m0\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mdilation\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;36m1\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mreturn_indices\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;32mFalse\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mceil_mode\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;32mFalse\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", "\u001B[0;31mDocstring:\u001B[0m \n", "Applies a 2D max pooling over an input signal composed of several input\n", "planes.\n", "\n", "In the simplest case, the output value of the layer with input size :math:`(N, C, H, W)`,\n", "output :math:`(N, C, H_{out}, W_{out})` and :attr:`kernel_size` :math:`(kH, kW)`\n", "can be precisely described as:\n", "\n", ".. math::\n", "\n", " \\begin{array}{ll}\n", " out(N_i, C_j, h, w) = \\max_{{m}=0}^{kH-1} \\max_{{n}=0}^{kW-1}\n", " input(N_i, C_j, stride[0] * h + m, stride[1] * w + n)\n", " \\end{array}\n", "\n", "| If :attr:`padding` is non-zero, then the input is implicitly zero-padded on both sides\n", " for :attr:`padding` number of points\n", "| :attr:`dilation` controls the spacing between the kernel points. It is harder to describe,\n", " but this `link`_ has a nice visualization of what :attr:`dilation` does.\n", "\n", "The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`dilation` can either be:\n", "\n", " - a single ``int`` -- in which case the same value is used for the height and width dimension\n", " - a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension,\n", " and the second `int` for the width dimension\n", "\n", "Args:\n", " kernel_size: the size of the window to take a max over\n", " stride: the stride of the window. Default value is :attr:`kernel_size`\n", " padding: implicit zero padding to be added on both sides\n", " dilation: a parameter that controls the stride of elements in the window\n", " return_indices: if ``True``, will return the max indices along with the outputs.\n", " Useful when Unpooling later\n", " ceil_mode: when True, will use `ceil` instead of `floor` to compute the output shape\n", "\n", "Shape:\n", " - Input: :math:`(N, C, H_{in}, W_{in})`\n", " - Output: :math:`(N, C, H_{out}, W_{out})` where\n", " :math:`H_{out} = floor((H_{in} + 2 * padding[0] - dilation[0] * (kernel\\_size[0] - 1) - 1) / stride[0] + 1)`\n", " :math:`W_{out} = floor((W_{in} + 2 * padding[1] - dilation[1] * (kernel\\_size[1] - 1) - 1) / stride[1] + 1)`\n", "\n", "Examples::\n", "\n", " >>> # pool of square window of size=3, stride=2\n", " >>> m = nn.MaxPool2d(3, stride=2)\n", " >>> # pool of non-square window\n", " >>> m = nn.MaxPool2d((3, 2), stride=(2, 1))\n", " >>> input = autograd.Variable(torch.randn(20, 16, 50, 32))\n", " >>> output = m(input)\n", "\n", ".. _link:\n", " https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md\n", "\u001B[0;31mFile:\u001B[0m ~/miniconda2/envs/dat/lib/python3.6/site-packages/torch/nn/modules/pooling.py\n", "\u001B[0;31mType:\u001B[0m type\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "torch.nn.MaxPool2d?" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "from pretrainedmodels.models.dpn import adaptive_avgmax_pool2d" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1000" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x = nn.Conv2d(1000, 14,kernel_size=1, bias=True)\n", "x.in_channels" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.eval()\n", "model.training" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 }