Commit 25985c31 authored by Hang Zhang's avatar Hang Zhang
Browse files

sync BN

parent d40adbc4
ROOTDIR = $(CURDIR)
lint: cpplint pylint
cpplint:
tests/lint.py encoding cpp src kernel
pylint:
pylint --rcfile=$(ROOTDIR)/tests/pylintrc --ignore-patterns=".*\.so$$,.*\.dll$$,.*\.dylib$$" encoding --ignore=_ext
......@@ -9,7 +9,7 @@ created by [Hang Zhang](http://hangzh.com/)
## Citations
**Context Encoding for Semantic Segmentation**
[Hang Zhang](http://hangzh.com/), [Kristin Dana](http://eceweb1.rutgers.edu/vision/dana.html), [Jianping Shi](http://shijianping.me/), [Zhongyue Zhang](http://zhongyuezhang.com/), [Xiaogang Wang](http://www.ee.cuhk.edu.hk/~xgwang/), [Ambrish Tyagi](https://scholar.google.com/citations?user=GaSWCoUAAAAJ&hl=en), [Amit Agrawal](http://www.amitkagrawal.com/)
[Hang Zhang](http://hangzh.com/), [Kristin Dana](http://eceweb1.rutgers.edu/vision/dana.html), [Jianping Shi](http://shijianping.me/), [Zhongyue Zhang](http://zhongyuezhang.com/), [Xiaogang Wang](http://www.ee.cuhk.edu.hk/~xgwang/), [Ambrish Tyagi](https://scholar.google.com/citations?user=GaSWCoUAAAAJ&hl=en), [Amit Agrawal](http://www.amitkagrawal.com/) [[arXiv]](https://arxiv.org/pdf/1803.08904.pdf)
```
@InProceedings{Zhang_2018_CVPR,
author = {Zhang, Hang and Dana, Kristin and Shi, Jianping and Zhang, Zhongyue and Wang, Xiaogang and Tyagi, Ambrish and Agrawal, Amit},
......
......@@ -65,7 +65,7 @@ master_doc = 'index'
# General information about the project.
project = 'Encoding'
copyright = '2017, Hang Zhang'
copyright = '2018, Hang Zhang'
author = 'Hang Zhang'
# The version info for the project you're documenting, acts as replacement for
......
......@@ -11,15 +11,8 @@ All provided models have been verified.
.. note::
This code is provided together with the paper
* Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, Amit Agrawal. "Context Encoding for Semantic Segmentation" *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*::
* Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, Amit Agrawal. "Context Encoding for Semantic Segmentation" *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*
@InProceedings{Zhang_2018_CVPR,
author = {Zhang, Hang and Dana, Kristin and Shi, Jianping and Zhang, Zhongyue and Wang, Xiaogang and Tyagi, Ambrish and Agrawal, Amit},
title = {Context Encoding for Semantic Segmentation},
booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2018}
}
.. automodule:: encoding.dilated
.. currentmodule:: encoding.dilated
......@@ -91,5 +84,3 @@ DenseNet
~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: densenet201
.. role:: hidden
:class: hidden-section
My NN Layers
============
NN Layers
=========
Modules
-------
Customized NN modules in Encoding Package. For Synchronized Cross-GPU Batch Normalization, please visit :class:`encoding.nn.BatchNorm2d`.
.. currentmodule:: encoding.nn
......@@ -34,17 +32,8 @@ Modules
.. autoclass:: DilatedAvgPool2d
:members:
Functions
---------
.. currentmodule:: encoding.functions
:hidden:`aggregate`
~~~~~~~~~~~~~~~~~~~
.. autofunction:: aggregate
:hidden:`GramMatrix`
~~~~~~~~~~~~~~~~~~~~
:hidden:`dilatedavgpool2d`
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: dilatedavgpool2d
.. autoclass:: GramMatrix
:members:
.. role:: hidden
:class: hidden-section
Other Functions
===============
encoding.functions
==================
.. automodule:: encoding.functions
.. currentmodule:: encoding.functions
:hidden:`scaledL2`
~~~~~~~~~~~~~~~~~~~
.. autofunction:: scaledL2
:hidden:`upsample`
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: upsample
:hidden:`dropout`
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: dropout
:hidden:`relu`
:hidden:`dilatedavgpool2d`
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: relu
:hidden:`view_each`
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: view_each
:hidden:`multi_each`
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: dilatedavgpool2d
.. autofunction:: multi_each
:hidden:`aggregate`
~~~~~~~~~~~~~~~~~~~
.. autofunction:: aggregate
:hidden:`sum_each`
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: sum_each
:hidden:`scaledL2`
~~~~~~~~~~~~~~~~~~~
.. autofunction:: scaledL2
:hidden:`cat_each`
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: cat_each
:hidden:`sum_square`
~~~~~~~~~~~~~~~~~~~~
.. autofunction:: sum_square
......@@ -9,8 +9,8 @@ Created by `Hang Zhang <http://hangzh.com/>`_
An optimized PyTorch package with CUDA backend.
.. todo::
A PyTorch DataParallel compatible Synchronized Cross-GPU Batch Normalization will be provided soon.
.. note::
PyTorch compatible Synchronized Cross-GPU :class:`encoding.nn.BatchNorm2d` has been released.
.. toctree::
:glob:
......@@ -34,7 +34,6 @@ An optimized PyTorch package with CUDA backend.
syncbn
parallel
dilated
nn
functions
utils
......
.. role:: hidden
:class: hidden-section
Other NN Layers
===============
.. automodule:: encoding.nn
Customized Layers
-----------------
:hidden:`GramMatrix`
~~~~~~~~~~~~~~~~~~~~
.. autoclass:: GramMatrix
:members:
:hidden:`Normalize`
~~~~~~~~~~~~~~~~~~~
.. autoclass:: Normalize
:members:
:hidden:`View`
~~~~~~~~~~~~~~
.. autoclass:: View
:members:
Standard Layers
---------------
Standard Layers as in PyTorch but in :class:`encoding.parallel.SelfDataParallel` mode. Use together with SyncBN.
:hidden:`Conv1d`
~~~~~~~~~~~~~~~~
.. autoclass:: Conv1d
:members:
:hidden:`Conv2d`
~~~~~~~~~~~~~~~~
.. autoclass:: Conv2d
:members:
:hidden:`ConvTranspose2d`
~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: ConvTranspose2d
:members:
:hidden:`ReLU`
~~~~~~~~~~~~~~
.. autoclass:: ReLU
:members:
:hidden:`Sigmoid`
~~~~~~~~~~~~~~~~~
.. autoclass:: Sigmoid
:members:
:hidden:`MaxPool2d`
~~~~~~~~~~~~~~~~~~~
.. autoclass:: MaxPool2d
:members:
:hidden:`AvgPool2d`
~~~~~~~~~~~~~~~~~~~
.. autoclass:: AvgPool2d
:members:
:hidden:`AdaptiveAvgPool2d`
~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: AdaptiveAvgPool2d
:members:
:hidden:`Dropout2d`
~~~~~~~~~~~~~~~~~~~
.. autoclass:: Dropout2d
:members:
:hidden:`Linear`
~~~~~~~~~~~~~~~~
.. autoclass:: Linear
:members:
......@@ -15,11 +15,13 @@ Install from Source
- On Linux::
pip install -r requirements.txt
python setup.py install
- On Mac OSX::
MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py install
pip install -r requirements.txt
MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py install
Citations
---------
......
......@@ -8,7 +8,7 @@ which is extending :mod:`torch.nn` and
Torch C and CUDA Backend
------------------------
Given an example of the residual operation (in a mini-batch):
Given a simple example of the residual operation (in a mini-batch):
.. math::
r_{ik} = x_i - c_k
......
Implementing Synchronized Multi-GPU Batch Normalization
=======================================================
In this tutorial, we discuss the implementation detail of Multi-GPU Batch Normalization (BN) (classic implementation: :class:`encoding.nn.BatchNorm2d` and compatible :class:`encoding.parallel.SelfDataParallel`). We will provide the training example in a later version.
In this tutorial, we discuss the implementation detail of Multi-GPU Batch Normalization (BN) (classic implementation: :class:`encoding.nn.BatchNorm2d`. We will provide the training example in a later version.
How BN works?
-------------
......@@ -17,13 +17,13 @@ BN layer was introduced in the paper `Batch Normalization: Accelerating Deep Net
where :math:`\mu=\frac{\sum_i^N x_i}{N} , \sigma = \sqrt{\frac{\sum_i^N (x_i-\mu)^2}{N}+\epsilon}` and :math:`\gamma, \beta` are the learnable parameters.
- Backward Pass:
For calculating the gradient :math:`\frac{d_\ell}{d_{x_i}}`, we need to consider the gradient from :math:`\frac{d_\ell}{d_y}` and the gradients from :math:`\frac{d_\ell}{d_\mu}` and :math:`\frac{d_\ell}{d_\sigma}`, since the :math:`\mu \text{ and } \sigma` are the function of the input :math:`x_i`. We use patial direvative in the notations:
For calculating the gradient :math:`\frac{d_\ell}{d_{x_i}}`, we need to consider the partial gradient from :math:`\frac{d_\ell}{d_y}` and the gradients from :math:`\frac{d_\ell}{d_\mu}` and :math:`\frac{d_\ell}{d_\sigma}`, since the :math:`\mu \text{ and } \sigma` are the function of the input :math:`x_i`. We use patial direvative in the notations:
.. math::
\frac{d_\ell}{d_{x_i}} = \frac{d_\ell}{d_{y_i}}\cdot\frac{d_{y_i}}{d_{x_i}} + \frac{d_\ell}{d_\mu}\cdot\frac{d_\mu}{d_{x_i}} + \frac{d_\ell}{d_\sigma}\cdot\frac{d_\sigma}{d_{x_i}}
\frac{d_\ell}{d_{x_i}} = \frac{d_\ell}{d_{y_i}}\cdot\frac{\partial_{y_i}}{\partial_{x_i}} + \frac{d_\ell}{d_\mu}\cdot\frac{d_\mu}{d_{x_i}} + \frac{d_\ell}{d_\sigma}\cdot\frac{d_\sigma}{d_{x_i}}
where :math:`\frac{d_{y_i}}{d_{x_i}}=\frac{\gamma}{\sigma}, \frac{d_\ell}{d_\mu}=-\frac{\gamma}{\sigma}\sum_i^N\frac{d_\ell}{d_{y_i}}
where :math:`\frac{\partial_{y_i}}{\partial_{x_i}}=\frac{\gamma}{\sigma}, \frac{d_\ell}{d_\mu}=-\frac{\gamma}{\sigma}\sum_i^N\frac{d_\ell}{d_{y_i}}
\text{ and } \frac{d_\sigma}{d_{x_i}}=-\frac{1}{\sigma}(\frac{x_i-\mu}{N})`.
Why Synchronize BN?
......@@ -41,41 +41,27 @@ How to Synchronize?
Suppose we have :math:`K` number of GPUs, :math:`sum(x)_k` and :math:`sum(x^2)_k` denotes the sum of elements and sum of element squares in :math:`k^{th}` GPU.
- Forward Pass:
We can calculate the sum of elements :math:`sum(x)=\sum x_i \text{ and sum of squares } sum(x^2)=\sum x_i^2` in each GPU, then apply :class:`encoding.parallel.AllReduce` operation to sum accross GPUs. Then calculate the global mean :math:`\mu=\frac{sum(x)}{N} \text{ and global variance } \sigma=\sqrt{\frac{sum(x^2)}{N}-\mu^2+\epsilon}`.
We can calculate the sum of elements :math:`sum(x)=\sum x_i \text{ and sum of squares } sum(x^2)=\sum x_i^2` in each GPU, then apply :class:`encoding.parallel.allreduce` operation to sum accross GPUs. Then calculate the global mean :math:`\mu=\frac{sum(x)}{N} \text{ and global variance } \sigma=\sqrt{\frac{sum(x^2)}{N}-\mu^2+\epsilon}`.
- Backward Pass:
* :math:`\frac{d_\ell}{d_{x_i}}=\frac{\gamma}{\sigma}` can be calculated locally in each GPU.
* :math:`\frac{d_\ell}{d_{x_i}}=\frac{d_\ell}{d_{y_i}}\frac{\gamma}{\sigma}` can be calculated locally in each GPU.
* Calculate the gradient of :math:`sum(x)` and :math:`sum(x^2)` individually in each GPU :math:`\frac{d_\ell}{d_{sum(x)_k}}` and :math:`\frac{d_\ell}{d_{sum(x^2)_k}}`.
* Then Sync the gradient (automatically handled by :class:`encoding.parallel.AllReduce`) and continue the backward.
Classic Implementation
~~~~~~~~~~~~~~~~~~~~~~
- Synchronized DataParallel:
Standard DataParallel pipeline of public frameworks (MXNet, PyTorch...) in each training iters:
* duplicate the network (weights) to all the GPUs,
* split the training batch to each GPU,
* forward and backward to calculate gradient,
* update network parameters (weights) then go to next iter.
Therefore, communicattion accross different GPUs are not supported. To address this problem, we introduce a :class:`encoding.parallel.SelfDataParallel` mode, which enables each layer to accept mutli-GPU inputs directly. Those self-parallel layers are provide in :class:`encoding.nn`.
- Cross GPU Autograd:
Due to the BN layers are frequently used in the networks, the PyTorch autograd engine will be messed up by such a complicated backward graph. To address this problem, we provide an aotograd function :class:`encoding.parallel.AllReduce` to handle the cross GPU gradient calculation.
Comparing Performance
---------------------
- Training Time:
- Segmentation Performance:
* Then Sync the gradient (automatically handled by :class:`encoding.parallel.allreduce`) and continue the backward.
Citation
--------
.. note::
This code is provided together with the paper, please cite our work.
* Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, Amit Agrawal. "Context Encoding for Semantic Segmentation" *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*::
This code is provided together with the paper (coming soon), please cite our work.
@InProceedings{Zhang_2018_CVPR,
author = {Zhang, Hang and Dana, Kristin and Shi, Jianping and Zhang, Zhongyue and Wang, Xiaogang and Tyagi, Ambrish and Agrawal, Amit},
title = {Context Encoding for Semantic Segmentation},
booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2018}
}
......@@ -4,8 +4,13 @@
Data Parallel
=============
- Current PyTorch DataParallel Table is not supporting mutl-gpu loss calculation, which makes the gpu memory usage very in-efficient. We address this issue here by doing CriterionDataParallel.
- :class:`encoding.parallel.SelfDataParallel` is compatible with Synchronized Batch Normalization :class:`encoding.nn.BatchNorm2d`.
- Current PyTorch DataParallel Table is not supporting mutl-gpu loss calculation, which makes the gpu memory usage very in-balance. We address this issue here by doing Model & CriterionDataParallel.
.. note::
This code is provided together with the paper
* Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, Amit Agrawal. "Context Encoding for Semantic Segmentation" *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*
.. automodule:: encoding.parallel
.. currentmodule:: encoding.parallel
......@@ -22,21 +27,8 @@ Data Parallel
.. autoclass:: CriterionDataParallel
:members:
:hidden:`SelfDataParallel`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: SelfDataParallel
:members:
:hidden:`AllReduce`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: AllReduce
:members:
:hidden:`Broadcast`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: Broadcast
:members:
:hidden:`allreduce`
~~~~~~~~~~~~~~~~~~~
.. autofunction:: allreduce
......@@ -4,9 +4,8 @@
Synchronized BatchNorm
======================
The current BN is implementated insynchronized accross the gpus, which is a big problem for memory consuming tasks such as Semantic Segmenation, since the mini-batch is very small.
To synchronize the batchnorm accross multiple gpus is not easy to implment within the current Dataparallel framework. We address this difficulty by making each layer 'self-parallel' :class:`encoding.parallel.SelfDataParallel`, that is accepting the inputs from multi-gpus. Therefore, we can handle the synchronizing across gpus.
.. note::
The original ``Self-Parallel`` version of ``BatchNorm`` has been deprecated in favor of PyTorch Compatible :class:`encoding.nn.BatchNorm2d`.
.. currentmodule:: encoding.nn
......@@ -44,9 +43,3 @@ Functions
~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: batchnormeval
:hidden:`sum_square`
~~~~~~~~~~~~~~~~~~~~
.. autofunction:: sum_square
......@@ -24,8 +24,3 @@ Useful util functions.
~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: save_checkpoint
:hidden:`progress_bar`
~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: progress_bar
......@@ -5,13 +5,9 @@
## Copyright (c) 2017
##
## This source code is licensed under the MIT-style license found in the
## LICENSE file in the root directory of this source tree
## LICENSE file in the root directory of this source tree
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
"""An optimized PyTorch package with CUDA backend."""
from .version import __version__
import encoding.nn
import encoding.functions
import encoding.dilated
import encoding.parallel
import encoding.utils
from . import nn, functions, dilated, parallel, utils
"""Dilated ResNet and DenseNet"""
from .resnet import *
from .densenet import *
"""Dilated DenseNet"""
from collections import OrderedDict
import torch
from torch.autograd import Variable
import torch.utils.model_zoo as model_zoo
from collections import OrderedDict
from .. import nn
from .. import functions as F
......@@ -74,17 +75,18 @@ def densenet161(pretrained=False, **kwargs):
class _DenseLayer(nn.Sequential):
# pylint: disable=expression-not-assigned
def __init__(self, num_input_features, growth_rate, bn_size, drop_rate, dilation=1):
super(_DenseLayer, self).__init__()
self.add_module('norm.1', nn.BatchNorm2d(num_input_features)),
self.add_module('relu.1', nn.ReLU(inplace=True)),
self.add_module('conv.1', nn.Conv2d(num_input_features, bn_size *
growth_rate, kernel_size=1, stride=1, bias=False)),
self.add_module('conv.1', nn.Conv2d(
num_input_features, bn_size * growth_rate, kernel_size=1, stride=1, bias=False)),
self.add_module('norm.2', nn.BatchNorm2d(bn_size * growth_rate)),
self.add_module('relu.2', nn.ReLU(inplace=True)),
self.add_module('conv.2', nn.Conv2d(bn_size * growth_rate, growth_rate,
kernel_size=3, stride=1, padding=dilation, dilation=dilation,
bias=False)),
self.add_module('conv.2', nn.Conv2d(
bn_size * growth_rate, growth_rate, kernel_size=3, stride=1,
padding=dilation, dilation=dilation, bias=False)),
self.drop_rate = drop_rate
def forward(self, x):
......@@ -92,12 +94,7 @@ class _DenseLayer(nn.Sequential):
if self.drop_rate > 0:
new_features = F.dropout(new_features, p=self.drop_rate, training=self.training)
if isinstance(x, Variable):
return torch.cat([x, new_features], 1)
elif isinstance(x, tuple) or isinstance(x, list):
return F.cat_each(x, new_features, 1)
else:
raise RuntimeError('unknown input type')
return torch.cat([x, new_features], 1)
class _DenseBlock(nn.Sequential):
......@@ -115,8 +112,8 @@ class _Transition(nn.Sequential):
self.add_module('relu', nn.ReLU(inplace=True))
self.add_module('conv', nn.Conv2d(num_input_features, num_output_features,
kernel_size=1, stride=1, bias=False))
self.add_module('pool', nn.DilatedAvgPool2d(kernel_size=2, stride=stride,
dilation=dilation))
self.add_module('pool', nn.DilatedAvgPool2d(kernel_size=2, stride=stride,
dilation=dilation))
class DenseNet(nn.Module):
......@@ -150,8 +147,8 @@ class DenseNet(nn.Module):
]))
# Each denseblock
strides = [1,2,1,1]
dilations = [1,1,2,4]
strides = [1, 2, 1, 1]
dilations = [1, 1, 2, 4]
num_features = num_init_features
for i, num_layers in enumerate(block_config):
block = _DenseBlock(num_layers=num_layers, num_input_features=num_features,
......@@ -173,8 +170,6 @@ class DenseNet(nn.Module):
def forward(self, x):
features = self.features(x)
out = F.relu(features, inplace=True)
"""
out = F.avg_pool2d(out, kernel_size=7).view(features.size(0), -1)
out = self.classifier(out)
"""
# out = F.avg_pool2d(out, kernel_size=7).view(features.size(0), -1)
# out = self.classifier(out)
return out
from .. import nn
"""Dilated ResNet"""
import math
from torch.autograd import Variable
import torch.utils.model_zoo as model_zoo
from .. import nn
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
'resnet152', 'BasicBlock', 'Bottleneck']
......@@ -22,6 +22,8 @@ def conv3x3(in_planes, out_planes, stride=1):
class BasicBlock(nn.Module):
"""ResNet BasicBlock
"""
expansion = 1
def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, first_dilation=1):
super(BasicBlock, self).__init__()
......@@ -55,25 +57,29 @@ class BasicBlock(nn.Module):
class Bottleneck(nn.Module):
"""ResNet Bottleneck
"""
# pylint: disable=unused-argument
expansion = 4
def __init__(self, inplanes, planes, stride=1, dilation=1,
downsample=None, first_dilation=1):
def __init__(self, inplanes, planes, stride=1, dilation=1,
downsample=None, first_dilation=1):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
self.conv2 = nn.Conv2d(
planes, planes, kernel_size=3, stride=stride,
padding=dilation, dilation=dilation, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1,
bias=False)
self.conv3 = nn.Conv2d(
planes, planes * 4, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.dilation = dilation
self.stride = stride
def _sum_each(self, x, y):
assert(len(x)==len(y))
assert(len(x) == len(y))
z = []
for i in range(len(x)):
z.append(x[i]+y[i])
......@@ -96,11 +102,7 @@ class Bottleneck(nn.Module):
if self.downsample is not None:
residual = self.downsample(x)
if isinstance(out, Variable):
out += residual
elif isinstance(out, tuple) or isinstance(out, list):
out = self._sum_each(out, residual)
out += residual
out = self.relu(out)
return out
......@@ -108,13 +110,14 @@ class Bottleneck(nn.Module):
class ResNet(nn.Module):
"""Dilated Pre-trained ResNet Model, which preduces the stride of 8 featuremaps at conv5.
Reference:
- He, Kaiming, et al. "Deep residual learning for image recognition." Proceedings of the IEEE conference on computer vision and pattern recognition. 2016.
- Yu, Fisher, and Vladlen Koltun. "Multi-scale context aggregation by dilated convolutions."
"""
# pylint: disable=unused-variable
def __init__(self, block, layers, num_classes=1000):
self.inplanes = 64
super(ResNet, self).__init__()
......@@ -149,14 +152,14 @@ class ResNet(nn.Module):
layers = []
if dilation == 1 or dilation == 2:
layers.append(block(self.inplanes, planes, stride, dilation=1,
layers.append(block(self.inplanes, planes, stride, dilation=1,
downsample=downsample, first_dilation=dilation))
elif dilation ==4:
layers.append(block(self.inplanes, planes, stride, dilation=2,
elif dilation == 4:
layers.append(block(self.inplanes, planes, stride, dilation=2,
downsample=downsample, first_dilation=dilation))
else:
raise RuntimeError("=> unknown dilation size: {}".format(dilation))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes, dilation=dilation, first_dilation=dilation))
......@@ -239,8 +242,3 @@ def resnet152(pretrained=False, **kwargs):
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
return model
if __name__ == "__main__":
model = ResNet(Bottleneck, [3, 4, 23, 3])
model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
print(model.layer4)
"""Encoding Autograd Fuctions"""
from .encoding import *
from .basic import *
from .syncbn import *
from .customize import *
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
## Created by: Hang Zhang
## ECE Department, Rutgers University
## Email: zhang.hang@rutgers.edu
## Copyright (c) 2017
##
## This source code is licensed under the MIT-style license found in the
## LICENSE file in the root directory of this source tree
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
import threading
import torch
import torch.nn.functional as F
from torch.autograd import Function, Variable
__all__ = ['squeeze_each', 'view_each', 'multi_each', 'sum_each',
'cat_each', 'upsample', 'dropout', 'relu']
def squeeze_each(x, dim=None):
"""Multi-GPU version torch. squeeze()
"""
y = []
for i in range(len(x)):
if dim is None:
y.append(x[i].squeeze())
else:
y.append(x[i].squeeze(dim))
return y
def view_each(x, size):
"""Multi-GPU version torch.view
Returns a new tensor with the same data but different size.
The returned tensor shares the same data and must have the same number
of elements, but may have a different size. A tensor must be
:attr:`contiguous` to be viewed.
Args:
input: list of multi-gpu tensors
size (torch.Size or int...): Desired size
"""
y = []
for i in range(len(x)):
y.append(x[i].view(size))
return y
def multi_each(a, b):
"""Multi-GPU version multiplication
.. math::
y[i] = a[i] * b[i]
"""
y = []
for i in range(len(a)):
y.append(a[i] * b[i])
return y
def sum_each(x, y):
"""Multi-GPU version torch.add
.. math::
y[i] = a[i] + b[i]
"""
assert(len(x)==len(y))
z = []
for i in range(len(x)):
z.append(x[i]+y[i])
return z
def cat_each(x1, x2, dim):
"""Multi-GPU version torch.cat
.. math::
y[i] = torch.cat(a[i], b[i], dim)
"""
assert(len(x1)==len(x2))
z = []
for i in range(len(x1)):
with torch.cuda.device_of(x1[i]):
x = torch.cat((x1[i], x2[i]), dim)
z.append(x)
return z
def dict_to_list(x):
"""Converting Dict{} to list[]
"""
y = []
for i in range(len(x)):
xi = x[i]
if isinstance(xi, Exception):
raise xi
y.append(xi)
return y
def upsample(input, size=None, scale_factor=None, mode='nearest'):
"""Multi-GPU version torch.nn.functional.upsample
Upsamples the input to either the given :attr:`size` or the given
:attr:`scale_factor`
The algorithm used for upsampling is determined by :attr:`mode`.
Currently temporal, spatial and volumetric upsampling are supported, i.e.
expected inputs are 3-D, 4-D or 5-D in shape.
The input dimensions are interpreted in the form:
`mini-batch x channels x [depth] x [height] x width`
The modes available for upsampling are: `nearest`, `linear` (3D-only),
`bilinear` (4D-only), `trilinear` (5D-only)
Args:
input (Variable): input
size (int or Tuple[int] or Tuple[int, int] or Tuple[int, int, int]):
output spatial size.
scale_factor (int): multiplier for spatial size. Has to be an integer.
mode (string): algorithm used for upsampling:
'nearest' | 'linear' | 'bilinear' | 'trilinear'. Default: 'nearest'
"""
if isinstance(input, Variable):
return F.upsample(input, size=size, scale_factor=scale_factor,
mode=mode)
elif isinstance(input, tuple) or isinstance(input, list):
lock = threading.Lock()
results = {}
def _worker(i, x):
try:
with torch.cuda.device_of(x):
result = F.upsample(x, size=size, \
scale_factor=scale_factor,mode=mode)
with lock:
results[i] = result
except Exception as e:
with lock:
resutls[i] = e
# multi-threading for different gpu
threads = [threading.Thread(target=_worker,
args=(i, x),
)
for i, (x) in enumerate(input)]
for thread in threads:
thread.start()
for thread in threads:
thread.join()
outputs = dict_to_list(results)
return outputs
else:
raise RuntimeError('unknown input type')
def dropout(input, p=0.5, training=False, inplace=True):
"""Multi-GPU version torch.nn.functional.droupout
The channels to zero-out are randomized on every forward call.
*Usually the input comes from Conv2d modules.*
As described in the paper
`Efficient Object Localization Using Convolutional Networks`,
if adjacent pixels within feature maps are strongly correlated
(as is normally the case in early convolution layers) then iid dropout
will not regularize the activations and will otherwise just result
in an effective learning rate decrease.
In this case, :func:`nn.Dropout2d` will help promote independence between
feature maps and should be used instead.
Args:
p (float, optional): probability of an element to be zeroed.
inplace (bool, optional): If set to True, will do this operation
in-place
Shape:
- Input: :math:`(N, C, H, W)`
- Output: :math:`(N, C, H, W)` (same shape as input)
"""
if isinstance(input, Variable):
return F.dropout(input, p, training, inplace)
elif isinstance(input, tuple) or isinstance(input, list):
lock = threading.Lock()
results = {}
def _worker(i, x):
try:
with torch.cuda.device_of(x):
result = F.dropout(x, p, training, inplace)
with lock:
results[i] = result
except Exception as e:
with lock:
resutls[i] = e
# multi-threading for different gpu
threads = [threading.Thread(target=_worker,
args=(i, x),
)
for i, (x) in enumerate(input)]
for thread in threads:
thread.start()
for thread in threads:
thread.join()
outputs = dict_to_list(results)
return outputs
else:
raise RuntimeError('unknown input type')
def relu(input, inplace=False):
"""Multi-GPU version torch.nn.functional.relu
Applies the rectified linear unit function element-wise
:math:`{ReLU}(x)= max(0, x)`
Args:
inplace: can optionally do the operation in-place. Default: False
Shape:
- Input: :math:`(N, *)` where `*` means, any number of additional
dimensions
- Output: :math:`(N, *)`, same shape as the input
"""
if isinstance(input, Variable):
return F.relu(input, inplace)
elif isinstance(input, tuple) or isinstance(input, list):
lock = threading.Lock()
results = {}
def _worker(i, x):
try:
with torch.cuda.device_of(x):
result = F.relu(x, inplace)
with lock:
results[i] = result
except Exception as e:
with lock:
resutls[i] = e
# multi-threading for different gpu
threads = [threading.Thread(target=_worker,
args=(i, x),
)
for i, (x) in enumerate(input)]
for thread in threads:
thread.start()
for thread in threads:
thread.join()
outputs = dict_to_list(results)
return outputs
else:
raise RuntimeError('unknown input type')
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment