Commit 0f6efd80 authored by Hang Zhang's avatar Hang Zhang
Browse files

path

parent c92a7c24
...@@ -6,5 +6,8 @@ build/ ...@@ -6,5 +6,8 @@ build/
data/ data/
docs/src/ docs/src/
docs/html/ docs/html/
encoding/lib/
encoding/_ext/ encoding/_ext/
encoding.egg-info/ encoding.egg-info/
experiments/recognition/
experiments/segmentation/
...@@ -9,8 +9,8 @@ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ...@@ -9,8 +9,8 @@ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions: furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all The above copyright notice and this permission notice shall be included in
copies or substantial portions of the Software. all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
......
...@@ -16,6 +16,7 @@ from torch.utils.ffi import create_extension ...@@ -16,6 +16,7 @@ from torch.utils.ffi import create_extension
lib_path = os.path.join(os.path.dirname(torch.__file__), 'lib') lib_path = os.path.join(os.path.dirname(torch.__file__), 'lib')
cwd = os.path.dirname(os.path.realpath(__file__)) cwd = os.path.dirname(os.path.realpath(__file__))
encoding_lib_path = os.path.join(cwd, "encoding", "lib")
# clean the build files # clean the build files
clean_cmd = ['bash', 'clean.sh'] clean_cmd = ['bash', 'clean.sh']
...@@ -26,11 +27,12 @@ os.environ['TORCH_BUILD_DIR'] = lib_path ...@@ -26,11 +27,12 @@ os.environ['TORCH_BUILD_DIR'] = lib_path
if platform.system() == 'Darwin': if platform.system() == 'Darwin':
os.environ['TH_LIBRARIES'] = os.path.join(lib_path,'libTH.1.dylib') os.environ['TH_LIBRARIES'] = os.path.join(lib_path,'libTH.1.dylib')
os.environ['THC_LIBRARIES'] = os.path.join(lib_path,'libTHC.1.dylib') os.environ['THC_LIBRARIES'] = os.path.join(lib_path,'libTHC.1.dylib')
ENCODING_LIB = os.path.join(lib_path, 'libENCODING.dylib') ENCODING_LIB = os.path.join(cwd, 'encoding/lib/libENCODING.dylib')
else: else:
os.environ['TH_LIBRARIES'] = os.path.join(lib_path,'libTH.so.1') os.environ['TH_LIBRARIES'] = os.path.join(lib_path,'libTH.so.1')
os.environ['THC_LIBRARIES'] = os.path.join(lib_path,'libTHC.so.1') os.environ['THC_LIBRARIES'] = os.path.join(lib_path,'libTHC.so.1')
ENCODING_LIB = os.path.join(lib_path, 'libENCODING.so') ENCODING_LIB = os.path.join(cwd, 'encoding/lib/libENCODING.so')
build_all_cmd = ['bash', 'encoding/make.sh'] build_all_cmd = ['bash', 'encoding/make.sh']
subprocess.check_call(build_all_cmd, env=dict(os.environ)) subprocess.check_call(build_all_cmd, env=dict(os.environ))
...@@ -44,7 +46,7 @@ defines = [('WITH_CUDA', None)] ...@@ -44,7 +46,7 @@ defines = [('WITH_CUDA', None)]
with_cuda = True with_cuda = True
include_path = [os.path.join(lib_path, 'include'), include_path = [os.path.join(lib_path, 'include'),
os.path.join(lib_path,'include/ENCODING'), os.path.join(cwd,'encoding/kernel'),
os.path.join(cwd,'encoding/kernel/include'), os.path.join(cwd,'encoding/kernel/include'),
os.path.join(cwd,'encoding/src/')] os.path.join(cwd,'encoding/src/')]
...@@ -65,6 +67,7 @@ ffi = create_extension( ...@@ -65,6 +67,7 @@ ffi = create_extension(
include_dirs = include_path, include_dirs = include_path,
extra_link_args = [ extra_link_args = [
make_relative_rpath(lib_path), make_relative_rpath(lib_path),
make_relative_rpath(encoding_lib_path),
ENCODING_LIB, ENCODING_LIB,
], ],
) )
......
#!/usr/bin/env bash #!/usr/bin/env bash
rm -rf build/ dist/ encoding.egg-info/ encoding/build/ encoding/_ext/ __pycache__ encoding/__pycache__ rm -rf build/ dist/ encoding.egg-info/ encoding/lib/ encoding/_ext/ __pycache__ encoding/__pycache__
...@@ -4,11 +4,13 @@ ...@@ -4,11 +4,13 @@
Dilated Networks Dilated Networks
================ ================
We provide correct dilated pre-trained ResNet and DenseNet for semantic segmentation. We provide correct dilated pre-trained ResNet and DenseNet (stride of 8) for semantic segmentation.
For dilation of ResNet, we replace the stride of 2 Conv3x3 at begining of certain stage and update the dilation of the conv layers afterwards. For dilation of DenseNet, we provide :class:`encoding.nn.DilatedAvgPool2d`.
For dilation of DenseNet, we provide :class:`encoding.nn.DilatedAvgPool2d` that handles the dilation of the transition layers, then update the dilation of the conv layers afterwards.
All provided models have been verified. All provided models have been verified.
.. note::
This code is provided together with the paper (coming soon), please cite our work.
.. automodule:: encoding.dilated .. automodule:: encoding.dilated
.. currentmodule:: encoding.dilated .. currentmodule:: encoding.dilated
......
...@@ -5,9 +5,7 @@ Installing PyTorch-Encoding ...@@ -5,9 +5,7 @@ Installing PyTorch-Encoding
Install from Source Install from Source
------------------- -------------------
* Please follow the `PyTorch instructions <https://github.com/pytorch/pytorch#from-source>`_ to install PyTorch from Source to the ``$HOME`` directory (recommended). Or you can simply clone a copy to ``$HOME`` directory:: * Install PyTorch from Source (recommended). Please follow the `PyTorch instructions <https://github.com/pytorch/pytorch#from-source>`_.
git clone https://github.com/pytorch/pytorch $HOME/pytorch
* Install this package * Install this package
......
...@@ -9,10 +9,10 @@ Useful util functions. ...@@ -9,10 +9,10 @@ Useful util functions.
.. automodule:: encoding.utils .. automodule:: encoding.utils
.. currentmodule:: encoding.utils .. currentmodule:: encoding.utils
:hidden:`CosLR_Scheduler` :hidden:`LR_Scheduler`
~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: CosLR_Scheduler .. autoclass:: LR_Scheduler
:members: :members:
:hidden:`get_optimizer` :hidden:`get_optimizer`
......
...@@ -75,9 +75,4 @@ IF(ENCODING_SO_VERSION) ...@@ -75,9 +75,4 @@ IF(ENCODING_SO_VERSION)
SOVERSION ${ENCODING_SO_VERSION}) SOVERSION ${ENCODING_SO_VERSION})
ENDIF(ENCODING_SO_VERSION) ENDIF(ENCODING_SO_VERSION)
FILE(GLOB kernel-header kernel/generic/*.h) #INSTALL(TARGETS ENCODING LIBRARY DESTINATION ${ENCODING_INSTALL_LIB_SUBDIR})
FILE(GLOB src-header src/generic/*.h)
INSTALL(TARGETS ENCODING LIBRARY DESTINATION ${ENCODING_INSTALL_LIB_SUBDIR})
INSTALL(FILES kernel/thc_encoding.h DESTINATION "${ENCODING_INSTALL_INCLUDE_SUBDIR}/ENCODING")
INSTALL(FILES ${src-header} ${kernel-header} DESTINATION "${ENCODING_INSTALL_INCLUDE_SUBDIR}/ENCODING/generic")
...@@ -61,7 +61,11 @@ def sum_square(input): ...@@ -61,7 +61,11 @@ def sum_square(input):
return _sum_square()(input) return _sum_square()(input)
class _batchnormtrain(Function): class _batchnorm(Function):
def __init__(self, training=False):
super(_batchnorm, self).__init__()
self.training = training
def forward(ctx, input, gamma, beta, mean, std): def forward(ctx, input, gamma, beta, mean, std):
ctx.save_for_backward(input, gamma, beta, mean, std) ctx.save_for_backward(input, gamma, beta, mean, std)
assert(input.dim()==3) assert(input.dim()==3)
...@@ -95,13 +99,13 @@ class _batchnormtrain(Function): ...@@ -95,13 +99,13 @@ class _batchnormtrain(Function):
encoding_lib.Encoding_Float_batchnorm_Backward( encoding_lib.Encoding_Float_batchnorm_Backward(
gradOutput, input, gradInput, gradGamma, gradBeta, gradOutput, input, gradInput, gradGamma, gradBeta,
mean, invstd, gamma, beta, gradMean, gradStd, mean, invstd, gamma, beta, gradMean, gradStd,
True) self.training)
elif isinstance(input, torch.cuda.DoubleTensor): elif isinstance(input, torch.cuda.DoubleTensor):
with torch.cuda.device_of(input): with torch.cuda.device_of(input):
encoding_lib.Encoding_Double_batchnorm_Backward( encoding_lib.Encoding_Double_batchnorm_Backward(
gradOutput, input, gradInput, gradGamma, gradBeta, gradOutput, input, gradInput, gradGamma, gradBeta,
mean, invstd, gamma, beta, gradMean, gradStd, mean, invstd, gamma, beta, gradMean, gradStd,
True) self.training)
else: else:
raise RuntimeError('Unimplemented data type!') raise RuntimeError('Unimplemented data type!')
return gradInput, gradGamma, gradBeta, gradMean, gradStd return gradInput, gradGamma, gradBeta, gradMean, gradStd
...@@ -122,52 +126,7 @@ def batchnormtrain(input, gamma, beta, mean, std): ...@@ -122,52 +126,7 @@ def batchnormtrain(input, gamma, beta, mean, std):
- Output: :math:`(N, C)` or :math:`(N, C, L)` (same shape as input) - Output: :math:`(N, C)` or :math:`(N, C, L)` (same shape as input)
""" """
return _batchnormtrain()(input, gamma, beta, mean, std) return _batchnorm(True)(input, gamma, beta, mean, std)
class _batchnormeval(Function):
def forward(ctx, input, gamma, beta, mean, std):
ctx.save_for_backward(input, gamma, beta, mean, std)
assert(input.dim()==3)
with torch.cuda.device_of(input):
invstd = 1.0 / std
output = input.new().resize_as_(input)
if isinstance(input, torch.cuda.FloatTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Float_batchnorm_Forward(output,
input, mean, invstd, gamma, beta)
elif isinstance(input, torch.cuda.DoubleTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Double_batchnorm_Forward(output,
input, mean, invstd, gamma, beta)
else:
raise RuntimeError('Unimplemented data type!')
return output
def backward(ctx, gradOutput):
input, gamma, beta, mean, std = ctx.saved_tensors
invstd = 1.0 / std
with torch.cuda.device_of(input):
gradInput = gradOutput.new().resize_as_(input).zero_()
gradGamma = gradOutput.new().resize_as_(gamma).zero_()
gradBeta = gradOutput.new().resize_as_(beta).zero_()
gradMean = gradOutput.new().resize_as_(mean).zero_()
gradStd = gradOutput.new().resize_as_(std).zero_()
if isinstance(input, torch.cuda.FloatTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Float_batchnorm_Backward(
gradOutput, input, gradInput, gradGamma, gradBeta,
mean, invstd, gamma, beta, gradMean, gradStd,
False)
elif isinstance(input, torch.cuda.DoubleTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Double_batchnorm_Backward(
gradOutput, input, gradInput, gradGamma, gradBeta,
mean, invstd, gamma, beta, gradMean, gradStd,
False)
else:
raise RuntimeError('Unimplemented data type!')
return gradInput, gradGamma, gradBeta, gradMean, gradStd
def batchnormeval(input, gamma, beta, mean, std): def batchnormeval(input, gamma, beta, mean, std):
...@@ -176,4 +135,4 @@ def batchnormeval(input, gamma, beta, mean, std): ...@@ -176,4 +135,4 @@ def batchnormeval(input, gamma, beta, mean, std):
Please see encoding.batchnormtrain_ Please see encoding.batchnormtrain_
""" """
return _batchnormeval()(input, gamma, beta, mean, std) return _batchnorm(False)(input, gamma, beta, mean, std)
#!/usr/bin/env bash #!/usr/bin/env bash
mkdir -p encoding/build && cd encoding/build mkdir -p encoding/lib && cd encoding/lib
# compile and install # compile and install
cmake .. cmake ..
make install make
cd ..
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
## LICENSE file in the root directory of this source tree ## LICENSE file in the root directory of this source tree
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
from .encoding import *
from .syncbn import * from .syncbn import *
from .basic import * from .basic import *
from .encoding import *
from .customize import * from .customize import *
...@@ -11,193 +11,29 @@ ...@@ -11,193 +11,29 @@
import math import math
import torch import torch
from torch.autograd import Variable from torch.autograd import Variable
from torch.nn import Module from torch.nn import Module, Parameter
from torch.nn import functional as F from torch.nn import functional as F
from torch.nn.parameter import Parameter
from ..parallel import my_data_parallel from ..parallel import my_data_parallel
from .syncbn import BatchNorm2d from .syncbn import BatchNorm2d
from ..functions import dilatedavgpool2d, view_each, upsample from ..functions import view_each, upsample
from .basic import * from .basic import *
__all__ = ['DilatedAvgPool2d', 'UpsampleConv2d', 'View', 'Sum', 'Mean', __all__ = ['GramMatrix', 'View', 'Sum', 'Mean', 'Normalize', 'PyramidPooling']
'Normalize', 'Bottleneck', 'PyramidPooling']
class DilatedAvgPool2d(Module):
r"""We provide Dilated Average Pooling for the dilation of Densenet as
in :class:`encoding.dilated.DenseNet`.
Reference:: class GramMatrix(Module):
We provide this code for a comming paper. r""" Gram Matrix for a 4D convolutional featuremaps as a mini-batch
Applies a 2D average pooling over an input signal composed of several input planes.
In the simplest case, the output value of the layer with input size :math:`(N, C, H, W)`,
output :math:`(N, C, H_{out}, W_{out})` and :attr:`kernel_size` :math:`(kH, kW)`
can be precisely described as:
.. math:: .. math::
\mathcal{G} = \sum_{h=1}^{H_i}\sum_{w=1}^{W_i} \mathcal{F}_{h,w}\mathcal{F}_{h,w}^T
\begin{array}{ll}
out(b, c, h, w) = 1 / (kH * kW) *
\sum_{{m}=0}^{kH-1} \sum_{{n}=0}^{kW-1}
input(b, c, dH * h + m, dW * w + n)
\end{array}
| If :attr:`padding` is non-zero, then the input is implicitly zero-padded on both sides
for :attr:`padding` number of points
The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`dilation` can either be:
- a single ``int`` -- in which case the same value is used for the height and width dimension
- a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension,
and the second `int` for the width dimension
Args:
kernel_size: the size of the window
stride: the stride of the window. Default value is :attr:`kernel_size`
padding: implicit zero padding to be added on both sides
dilation: the dilation parameter similar to Conv2d
Shape:
- Input: :math:`(N, C, H_{in}, W_{in})`
- Output: :math:`(N, C, H_{out}, W_{out})` where
:math:`H_{out} = floor((H_{in} + 2 * padding[0] - kernel\_size[0]) / stride[0] + 1)`
:math:`W_{out} = floor((W_{in} + 2 * padding[1] - kernel\_size[1]) / stride[1] + 1)`
Examples::
>>> # pool of square window of size=3, stride=2, dilation=2
>>> m = nn.DilatedAvgPool2d(3, stride=2, dilation=2)
>>> input = autograd.Variable(torch.randn(20, 16, 50, 32))
>>> output = m(input)
"""
def __init__(self, kernel_size, stride=None, padding=0, dilation=1):
super(DilatedAvgPool2d, self).__init__()
self.kernel_size = kernel_size
self.stride = stride or kernel_size
self.padding = padding
self.dilation = dilation
def forward(self, input):
if isinstance(input, Variable):
return dilatedavgpool2d(input, self.kernel_size, self.stride,
self.padding, self.dilation)
elif isinstance(input, tuple) or isinstance(input, list):
return my_data_parallel(self, input)
else:
raise RuntimeError('unknown input type')
def __repr__(self):
return self.__class__.__name__ + ' (' \
+ 'size=' + str(self.kernel_size) \
+ ', stride=' + str(self.stride) \
+ ', padding=' + str(self.padding) \
+ ', dilation=' + str(self.dilation) + ')'
class UpsampleConv2d(Module):
r"""
To avoid the checkerboard artifacts of standard Fractionally-strided Convolution, we adapt an integer stride convolution but producing a :math:`2\times 2` outputs for each convolutional window.
.. image:: _static/img/upconv.png
:width: 50%
:align: center
Reference:
Hang Zhang and Kristin Dana. "Multi-style Generative Network for Real-time Transfer." *arXiv preprint arXiv:1703.06953 (2017)*
Args:
in_channels (int): Number of channels in the input image
out_channels (int): Number of channels produced by the convolution
kernel_size (int or tuple): Size of the convolving kernel
stride (int or tuple, optional): Stride of the convolution. Default: 1
padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0
output_padding (int or tuple, optional): Zero-padding added to one side of the output. Default: 0
groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
bias (bool, optional): If True, adds a learnable bias to the output. Default: True
dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
scale_factor (int): scaling factor for upsampling convolution. Default: 1
Shape:
- Input: :math:`(N, C_{in}, H_{in}, W_{in})`
- Output: :math:`(N, C_{out}, H_{out}, W_{out})` where
:math:`H_{out} = scale * (H_{in} - 1) * stride[0] - 2 * padding[0] + kernel\_size[0] + output\_padding[0]`
:math:`W_{out} = scale * (W_{in} - 1) * stride[1] - 2 * padding[1] + kernel\_size[1] + output\_padding[1]`
Attributes:
weight (Tensor): the learnable weights of the module of shape
(in_channels, scale * scale * out_channels, kernel_size[0], kernel_size[1])
bias (Tensor): the learnable bias of the module of shape (scale * scale * out_channels)
Examples::
>>> # With square kernels and equal stride
>>> m = nn.UpsampleCov2d(16, 33, 3, stride=2)
>>> # non-square kernels and unequal stride and with padding
>>> m = nn.UpsampleCov2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
>>> input = autograd.Variable(torch.randn(20, 16, 50, 100))
>>> output = m(input)
>>> # exact output size can be also specified as an argument
>>> input = autograd.Variable(torch.randn(1, 16, 12, 12))
>>> downsample = nn.Conv2d(16, 16, 3, stride=2, padding=1)
>>> upsample = nn.UpsampleCov2d(16, 16, 3, stride=2, padding=1)
>>> h = downsample(input)
>>> h.size()
torch.Size([1, 16, 6, 6])
>>> output = upsample(h, output_size=input.size())
>>> output.size()
torch.Size([1, 16, 12, 12])
""" """
def __init__(self, in_channels, out_channels, kernel_size, stride=1, def forward(self, y):
padding=0, dilation=1, groups=1, scale_factor =1, (b, ch, h, w) = y.size()
bias=True): features = y.view(b, ch, w * h)
super(UpsampleConv2d, self).__init__() features_t = features.transpose(1, 2)
kernel_size = _pair(kernel_size) gram = features.bmm(features_t) / (ch * h * w)
stride = _pair(stride) return gram
padding = _pair(padding)
dilation = _pair(dilation)
if in_channels % groups != 0:
raise ValueError('in_channels must be divisible by groups')
if out_channels % groups != 0:
raise ValueError('out_channels must be divisible by groups')
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = kernel_size
self.stride = stride
self.padding = padding
self.dilation = dilation
self.groups = groups
self.scale_factor = scale_factor
self.weight = Parameter(torch.Tensor(
out_channels * scale_factor * scale_factor,
in_channels // groups, *kernel_size))
if bias:
self.bias = Parameter(torch.Tensor(out_channels *
scale_factor * scale_factor))
else:
self.register_parameter('bias', None)
self.reset_parameters()
def reset_parameters(self):
n = self.in_channels
for k in self.kernel_size:
n *= k
stdv = 1. / math.sqrt(n)
self.weight.data.uniform_(-stdv, stdv)
if self.bias is not None:
self.bias.data.uniform_(-stdv, stdv)
def forward(self, input):
if isinstance(input, Variable):
out = F.conv2d(input, self.weight, self.bias, self.stride,
self.padding, self.dilation, self.groups)
return F.pixel_shuffle(out, self.scale_factor)
elif isinstance(input, tuple) or isinstance(input, list):
return my_data_parallel(self, input)
else:
raise RuntimeError('unknown input type')
class View(Module): class View(Module):
...@@ -283,48 +119,6 @@ class Normalize(Module): ...@@ -283,48 +119,6 @@ class Normalize(Module):
raise RuntimeError('unknown input type') raise RuntimeError('unknown input type')
class Bottleneck(Module):
""" Pre-activation residual block
Identity Mapping in Deep Residual Networks
ref https://arxiv.org/abs/1603.05027
"""
def __init__(self, inplanes, planes, stride=1,
norm_layer=BatchNorm2d):
super(Bottleneck, self).__init__()
self.expansion = 4
if inplanes != planes*self.expansion or stride !=1 :
self.downsample = True
self.residual_layer = Conv2d(inplanes, planes * self.expansion,
kernel_size=1, stride=stride)
else:
self.downsample = False
conv_block = []
conv_block += [norm_layer(inplanes),
ReLU(inplace=True),
Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False)]
conv_block += [norm_layer(planes),
ReLU(inplace=True),
Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)]
conv_block += [norm_layer(planes),
ReLU(inplace=True),
Conv2d(planes, planes * self.expansion, kernel_size=1,
stride=1, bias=False)]
self.conv_block = Sequential(*conv_block)
def forward(self, x):
if self.downsample:
residual = self.residual_layer(x)
else:
residual = x
if isinstance(x, Variable):
return residual + self.conv_block(x)
elif isinstance(x, tuple) or isinstance(x, list):
return sum_each(residual, self.conv_block(x))
else:
raise RuntimeError('unknown input type')
class PyramidPooling(Module): class PyramidPooling(Module):
""" """
Reference: Reference:
......
...@@ -10,17 +10,18 @@ ...@@ -10,17 +10,18 @@
import threading import threading
import torch import torch
import torch.nn as nn from torch.nn import Module, Parameter
import torch.nn.functional as F import torch.nn.functional as F
from torch.autograd import Function, Variable from torch.autograd import Function, Variable
from .._ext import encoding_lib from .._ext import encoding_lib
from ..functions import scaledL2, aggregate from ..functions import scaledL2, aggregate
from ..parallel import my_data_parallel from ..parallel import my_data_parallel
from ..functions import dilatedavgpool2d
__all__ = ['Encoding', 'EncodingShake', 'Inspiration', 'GramMatrix'] __all__ = ['Encoding', 'EncodingShake', 'Inspiration', 'DilatedAvgPool2d', 'UpsampleConv2d']
class Encoding(nn.Module): class Encoding(Module):
r""" r"""
Encoding Layer: a learnable residual encoder over 3d or 4d input that Encoding Layer: a learnable residual encoder over 3d or 4d input that
is seen as a mini-batch. is seen as a mini-batch.
...@@ -35,6 +36,9 @@ class Encoding(nn.Module): ...@@ -35,6 +36,9 @@ class Encoding(nn.Module):
Please see the `example of training Deep TEN <./experiments/texture.html>`_. Please see the `example of training Deep TEN <./experiments/texture.html>`_.
Reference:
Hang Zhang, Jia Xue, and Kristin Dana. "Deep TEN: Texture Encoding Network." *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2017*
Args: Args:
D: dimention of the features or feature channels D: dimention of the features or feature channels
K: number of codeswords K: number of codeswords
...@@ -51,22 +55,19 @@ class Encoding(nn.Module): ...@@ -51,22 +55,19 @@ class Encoding(nn.Module):
>>> import encoding >>> import encoding
>>> import torch >>> import torch
>>> import torch.nn.functional as F >>> import torch.nn.functional as F
>>> from torch.autograd import Variable, gradcheck >>> from torch.autograd import Variable
>>> B,C,H,W,K = 2,3,4,5,6 >>> B,C,H,W,K = 2,3,4,5,6
>>> X = Variable(torch.cuda.DoubleTensor(B,C,H,W).uniform_(-0.5,0.5), requires_grad=True) >>> X = Variable(torch.cuda.DoubleTensor(B,C,H,W).uniform_(-0.5,0.5), requires_grad=True)
>>> layer = encoding.Encoding(C,K).double().cuda() >>> layer = encoding.Encoding(C,K).double().cuda()
>>> E = layer(X) >>> E = layer(X)
Reference:
Hang Zhang, Jia Xue, and Kristin Dana. "Deep TEN: Texture Encoding Network." *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2017*
""" """
def __init__(self, D, K): def __init__(self, D, K):
super(Encoding, self).__init__() super(Encoding, self).__init__()
# init codewords and smoothing factor # init codewords and smoothing factor
self.D, self.K = D, K self.D, self.K = D, K
self.codewords = nn.Parameter(torch.Tensor(K, D), self.codewords = Parameter(torch.Tensor(K, D),
requires_grad=True) requires_grad=True)
self.scale = nn.Parameter(torch.Tensor(K), requires_grad=True) self.scale = Parameter(torch.Tensor(K), requires_grad=True)
self.reset_params() self.reset_params()
def reset_params(self): def reset_params(self):
...@@ -93,7 +94,6 @@ class Encoding(nn.Module): ...@@ -93,7 +94,6 @@ class Encoding(nn.Module):
else: else:
raise RuntimeError('Encoding Layer unknown input dims!') raise RuntimeError('Encoding Layer unknown input dims!')
# assignment weights # assignment weights
#A = F.softmax(scaledL2(X, self.codewords, self.scale).view(B*N,-1), dim=1).view(B,N,K)
A = F.softmax(scaledL2(X, self.codewords, self.scale), dim=2) A = F.softmax(scaledL2(X, self.codewords, self.scale), dim=2)
# aggregate # aggregate
E = aggregate(A, X, self.codewords) E = aggregate(A, X, self.codewords)
...@@ -104,14 +104,14 @@ class Encoding(nn.Module): ...@@ -104,14 +104,14 @@ class Encoding(nn.Module):
+ 'N x ' + str(self.D) + '=>' + str(self.K) + 'x' \ + 'N x ' + str(self.D) + '=>' + str(self.K) + 'x' \
+ str(self.D) + ')' + str(self.D) + ')'
class EncodingShake(nn.Module): class EncodingShake(Module):
def __init__(self, D, K): def __init__(self, D, K):
super(EncodingShake, self).__init__() super(EncodingShake, self).__init__()
# init codewords and smoothing factor # init codewords and smoothing factor
self.D, self.K = D, K self.D, self.K = D, K
self.codewords = nn.Parameter(torch.Tensor(K, D), self.codewords = Parameter(torch.Tensor(K, D),
requires_grad=True) requires_grad=True)
self.scale = nn.Parameter(torch.Tensor(K), requires_grad=True) self.scale = Parameter(torch.Tensor(K), requires_grad=True)
self.reset_params() self.reset_params()
def reset_params(self): def reset_params(self):
...@@ -146,7 +146,7 @@ class EncodingShake(nn.Module): ...@@ -146,7 +146,7 @@ class EncodingShake(nn.Module):
# shake # shake
self.shake() self.shake()
# assignment weights # assignment weights
A = F.softmax(scaledL2(X, self.codewords, self.scale).view(B*N,-1), dim=1).view(B,N,K) A = F.softmax(scaledL2(X, self.codewords, self.scale), dim=2)
# aggregate # aggregate
E = aggregate(A, X, self.codewords) E = aggregate(A, X, self.codewords)
# shake # shake
...@@ -159,7 +159,7 @@ class EncodingShake(nn.Module): ...@@ -159,7 +159,7 @@ class EncodingShake(nn.Module):
+ str(self.D) + ')' + str(self.D) + ')'
class Inspiration(nn.Module): class Inspiration(Module):
r""" r"""
Inspiration Layer (CoMatch Layer) enables the multi-style transfer in feed-forward network, which learns to match the target feature statistics during the training. Inspiration Layer (CoMatch Layer) enables the multi-style transfer in feed-forward network, which learns to match the target feature statistics during the training.
This module is differentialble and can be inserted in standard feed-forward network to be learned directly from the loss function without additional supervision. This module is differentialble and can be inserted in standard feed-forward network to be learned directly from the loss function without additional supervision.
...@@ -176,7 +176,7 @@ class Inspiration(nn.Module): ...@@ -176,7 +176,7 @@ class Inspiration(nn.Module):
def __init__(self, C, B=1): def __init__(self, C, B=1):
super(Inspiration, self).__init__() super(Inspiration, self).__init__()
# B is equal to 1 or input mini_batch # B is equal to 1 or input mini_batch
self.weight = nn.Parameter(torch.Tensor(1,C,C), requires_grad=True) self.weight = Parameter(torch.Tensor(1,C,C), requires_grad=True)
# non-parameter buffer # non-parameter buffer
self.G = Variable(torch.Tensor(B,C,C), requires_grad=True) self.G = Variable(torch.Tensor(B,C,C), requires_grad=True)
self.C = C self.C = C
...@@ -198,16 +198,179 @@ class Inspiration(nn.Module): ...@@ -198,16 +198,179 @@ class Inspiration(nn.Module):
+ 'N x ' + str(self.C) + ')' + 'N x ' + str(self.C) + ')'
class GramMatrix(nn.Module): class DilatedAvgPool2d(Module):
r""" Gram Matrix for a 4D convolutional featuremaps as a mini-batch r"""We provide Dilated Average Pooling for the dilation of Densenet as
in :class:`encoding.dilated.DenseNet`.
Reference::
We provide this code for a comming paper.
Applies a 2D average pooling over an input signal composed of several input planes.
In the simplest case, the output value of the layer with input size :math:`(N, C, H, W)`,
output :math:`(N, C, H_{out}, W_{out})` and :attr:`kernel_size` :math:`(kH, kW)`
can be precisely described as:
.. math:: .. math::
\mathcal{G} = \sum_{h=1}^{H_i}\sum_{w=1}^{W_i} \mathcal{F}_{h,w}\mathcal{F}_{h,w}^T
\begin{array}{ll}
out(b, c, h, w) = 1 / (kH * kW) *
\sum_{{m}=0}^{kH-1} \sum_{{n}=0}^{kW-1}
input(b, c, dH * h + m, dW * w + n)
\end{array}
| If :attr:`padding` is non-zero, then the input is implicitly zero-padded on both sides
for :attr:`padding` number of points
The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`dilation` can either be:
- a single ``int`` -- in which case the same value is used for the height and width dimension
- a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension,
and the second `int` for the width dimension
Args:
kernel_size: the size of the window
stride: the stride of the window. Default value is :attr:`kernel_size`
padding: implicit zero padding to be added on both sides
dilation: the dilation parameter similar to Conv2d
Shape:
- Input: :math:`(N, C, H_{in}, W_{in})`
- Output: :math:`(N, C, H_{out}, W_{out})` where
:math:`H_{out} = floor((H_{in} + 2 * padding[0] - kernel\_size[0]) / stride[0] + 1)`
:math:`W_{out} = floor((W_{in} + 2 * padding[1] - kernel\_size[1]) / stride[1] + 1)`
Examples::
>>> # pool of square window of size=3, stride=2, dilation=2
>>> m = nn.DilatedAvgPool2d(3, stride=2, dilation=2)
>>> input = autograd.Variable(torch.randn(20, 16, 50, 32))
>>> output = m(input)
""" """
def forward(self, y): def __init__(self, kernel_size, stride=None, padding=0, dilation=1):
(b, ch, h, w) = y.size() super(DilatedAvgPool2d, self).__init__()
features = y.view(b, ch, w * h) self.kernel_size = kernel_size
features_t = features.transpose(1, 2) self.stride = stride or kernel_size
gram = features.bmm(features_t) / (ch * h * w) self.padding = padding
return gram self.dilation = dilation
def forward(self, input):
if isinstance(input, Variable):
return dilatedavgpool2d(input, self.kernel_size, self.stride,
self.padding, self.dilation)
elif isinstance(input, tuple) or isinstance(input, list):
return my_data_parallel(self, input)
else:
raise RuntimeError('unknown input type')
def __repr__(self):
return self.__class__.__name__ + ' (' \
+ 'size=' + str(self.kernel_size) \
+ ', stride=' + str(self.stride) \
+ ', padding=' + str(self.padding) \
+ ', dilation=' + str(self.dilation) + ')'
class UpsampleConv2d(Module):
r"""
To avoid the checkerboard artifacts of standard Fractionally-strided Convolution, we adapt an integer stride convolution but producing a :math:`2\times 2` outputs for each convolutional window.
.. image:: _static/img/upconv.png
:width: 50%
:align: center
Reference:
Hang Zhang and Kristin Dana. "Multi-style Generative Network for Real-time Transfer." *arXiv preprint arXiv:1703.06953 (2017)*
Args:
in_channels (int): Number of channels in the input image
out_channels (int): Number of channels produced by the convolution
kernel_size (int or tuple): Size of the convolving kernel
stride (int or tuple, optional): Stride of the convolution. Default: 1
padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0
output_padding (int or tuple, optional): Zero-padding added to one side of the output. Default: 0
groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
bias (bool, optional): If True, adds a learnable bias to the output. Default: True
dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
scale_factor (int): scaling factor for upsampling convolution. Default: 1
Shape:
- Input: :math:`(N, C_{in}, H_{in}, W_{in})`
- Output: :math:`(N, C_{out}, H_{out}, W_{out})` where
:math:`H_{out} = scale * (H_{in} - 1) * stride[0] - 2 * padding[0] + kernel\_size[0] + output\_padding[0]`
:math:`W_{out} = scale * (W_{in} - 1) * stride[1] - 2 * padding[1] + kernel\_size[1] + output\_padding[1]`
Attributes:
weight (Tensor): the learnable weights of the module of shape
(in_channels, scale * scale * out_channels, kernel_size[0], kernel_size[1])
bias (Tensor): the learnable bias of the module of shape (scale * scale * out_channels)
Examples::
>>> # With square kernels and equal stride
>>> m = nn.UpsampleCov2d(16, 33, 3, stride=2)
>>> # non-square kernels and unequal stride and with padding
>>> m = nn.UpsampleCov2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
>>> input = autograd.Variable(torch.randn(20, 16, 50, 100))
>>> output = m(input)
>>> # exact output size can be also specified as an argument
>>> input = autograd.Variable(torch.randn(1, 16, 12, 12))
>>> downsample = nn.Conv2d(16, 16, 3, stride=2, padding=1)
>>> upsample = nn.UpsampleCov2d(16, 16, 3, stride=2, padding=1)
>>> h = downsample(input)
>>> h.size()
torch.Size([1, 16, 6, 6])
>>> output = upsample(h, output_size=input.size())
>>> output.size()
torch.Size([1, 16, 12, 12])
"""
def __init__(self, in_channels, out_channels, kernel_size, stride=1,
padding=0, dilation=1, groups=1, scale_factor =1,
bias=True):
super(UpsampleConv2d, self).__init__()
kernel_size = _pair(kernel_size)
stride = _pair(stride)
padding = _pair(padding)
dilation = _pair(dilation)
if in_channels % groups != 0:
raise ValueError('in_channels must be divisible by groups')
if out_channels % groups != 0:
raise ValueError('out_channels must be divisible by groups')
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = kernel_size
self.stride = stride
self.padding = padding
self.dilation = dilation
self.groups = groups
self.scale_factor = scale_factor
self.weight = Parameter(torch.Tensor(
out_channels * scale_factor * scale_factor,
in_channels // groups, *kernel_size))
if bias:
self.bias = Parameter(torch.Tensor(out_channels *
scale_factor * scale_factor))
else:
self.register_parameter('bias', None)
self.reset_parameters()
def reset_parameters(self):
n = self.in_channels
for k in self.kernel_size:
n *= k
stdv = 1. / math.sqrt(n)
self.weight.data.uniform_(-stdv, stdv)
if self.bias is not None:
self.bias.data.uniform_(-stdv, stdv)
def forward(self, input):
if isinstance(input, Variable):
out = F.conv2d(input, self.weight, self.bias, self.stride,
self.padding, self.dilation, self.groups)
return F.pixel_shuffle(out, self.scale_factor)
elif isinstance(input, tuple) or isinstance(input, list):
return my_data_parallel(self, input)
else:
raise RuntimeError('unknown input type')
...@@ -27,7 +27,8 @@ __all__ = ['BatchNorm1d', 'BatchNorm2d'] ...@@ -27,7 +27,8 @@ __all__ = ['BatchNorm1d', 'BatchNorm2d']
class BatchNorm1d(Module): class BatchNorm1d(Module):
r"""Synchronized Batch Normalization 1d r"""Synchronized Batch Normalization 1d
Please use compatible :class:`encoding.parallel.SelfDataParallel` and :class:`encoding.nn`
`Implementation ideas <./notes/syncbn.html>`_. Please use compatible :class:`encoding.parallel.SelfDataParallel` and :class:`encoding.nn`
Reference:: Reference::
We provide this code for a comming paper. We provide this code for a comming paper.
...@@ -221,7 +222,8 @@ class BatchNorm1d(Module): ...@@ -221,7 +222,8 @@ class BatchNorm1d(Module):
class BatchNorm2d(Module): class BatchNorm2d(Module):
r"""Synchronized Batch Normalization 2d r"""Synchronized Batch Normalization 2d
Please use compatible :class:`encoding.parallel.SelfDataParallel` and :class:`encoding.nn`
`Implementation ideas <./notes/syncbn.html>`_. Please use compatible :class:`encoding.parallel.SelfDataParallel` and :class:`encoding.nn`.
Reference:: Reference::
We provide this code for a comming paper. We provide this code for a comming paper.
......
...@@ -19,6 +19,9 @@ from torch.nn.parallel.scatter_gather import scatter, scatter_kwargs, \ ...@@ -19,6 +19,9 @@ from torch.nn.parallel.scatter_gather import scatter, scatter_kwargs, \
from torch.nn.parallel.replicate import replicate from torch.nn.parallel.replicate import replicate
from torch.nn.parallel.parallel_apply import parallel_apply from torch.nn.parallel.parallel_apply import parallel_apply
__all__ = ['AllReduce', 'Broadcast', 'ModelDataParallel',
'CriterionDataParallel', 'SelfDataParallel']
def nccl_all_reduce(inputs): def nccl_all_reduce(inputs):
# TODO, figure out why nccl all_reduce doesn't work for gradcheck # TODO, figure out why nccl all_reduce doesn't work for gradcheck
input_size = inputs[0].size() input_size = inputs[0].size()
......
...@@ -16,6 +16,8 @@ import time ...@@ -16,6 +16,8 @@ import time
import math import math
import tqdm import tqdm
__all__ = ['get_optimizer', 'LR_Scheduler', 'save_checkpoint', 'progress_bar']
def get_optimizer(args, model, diff_LR=True): def get_optimizer(args, model, diff_LR=True):
""" """
Returns an optimizer for given model, Returns an optimizer for given model,
...@@ -41,41 +43,56 @@ def get_optimizer(args, model, diff_LR=True): ...@@ -41,41 +43,56 @@ def get_optimizer(args, model, diff_LR=True):
return optimizer return optimizer
class CosLR_Scheduler(object): class LR_Scheduler(object):
"""Cosine Learning Rate Scheduler """Learning Rate Scheduler
Step mode: ``lr = baselr * 0.1 ^ {floor(epoch-1 / lr_step)}``
.. math:: Cosine mode: ``lr = baselr * 0.5 * (1 + cos(iter/maxiter))``
lr = baselr * 0.5 * (1 + cos(iter/maxiter))
Poly mode: ``lr = baselr * (1 - iter/maxiter) ^ 0.9``
Args: Args:
args: base learning rate :attr:`args.lr`, number of epochs :attr:`args.epochs` args: :attr:`args.lr_scheduler` lr scheduler mode (`cos`, `poly`), :attr:`args.lr` base learning rate, :attr:`args.epochs` number of epochs, :attr:`args.lr_step`
niters: number of iterations per epoch niters: number of iterations per epoch
""" """
def __init__(self, args, niters): def __init__(self, args, niters=0):
self.mode = args.lr_scheduler
print('Using {} LR Scheduler!'.format(self.mode))
self.lr = args.lr self.lr = args.lr
self.niters = niters if self.mode == 'step':
self.N = args.epochs * niters self.lr_step = args.lr_step
else:
self.niters = niters
self.N = args.epochs * niters
self.epoch = -1 self.epoch = -1
def __call__(self, optimizer, i, epoch, best_pred): def __call__(self, optimizer, i, epoch):
T = (epoch - 1) * self.niters + i if self.mode == 'cos':
lr = 0.5 * self.lr * (1 + math.cos(1.0 * T / self.N * math.pi)) T = (epoch - 1) * self.niters + i
lr = 0.5 * self.lr * (1 + math.cos(1.0 * T / self.N * math.pi))
elif self.mode == 'poly':
T = (epoch - 1) * self.niters + i
lr = self.lr * pow((1 - 1.0 * T / self.N), 0.9)
elif self.mode == 'step':
lr = self.lr * (0.1 ** ((epoch - 1) // self.lr_step))
else:
raise RuntimeError('Unknown LR scheduler!')
if epoch > self.epoch: if epoch > self.epoch:
print('\n=>Epochs %i, learning rate = %.4f, previous best ='\ print('\n=>Epoches %i, learning rate = %.4f' % (
'%.3f%%' % (epoch, lr, best_pred)) epoch, lr))
self.epoch = epoch self.epoch = epoch
self._adjust_learning_rate(optimizer, lr) self._adjust_learning_rate(optimizer, lr)
def _adjust_learning_rate(self, optimizer, lr): def _adjust_learning_rate(self, optimizer, lr):
if len(optimizer.param_groups) == 1: if len(optimizer.param_groups) == 1:
optimizer.param_groups[0]['lr'] = lr optimizer.param_groups[0]['lr'] = lr
elif len(optimizer.param_groups) == 2: else:
# enlarge the lr at the head # enlarge the lr at the head
optimizer.param_groups[0]['lr'] = lr optimizer.param_groups[0]['lr'] = lr
optimizer.param_groups[1]['lr'] = lr * 10 for i in range(1,len(optimizer.param_groups)):
else: optimizer.param_groups[i]['lr'] = lr * 10
raise RuntimeError('unsupported number of param groups: {}' \
.format(len(optimizer.param_groups)))
# refer to https://github.com/xternalz/WideResNet-pytorch # refer to https://github.com/xternalz/WideResNet-pytorch
......
...@@ -10,8 +10,6 @@ ...@@ -10,8 +10,6 @@
import io import io
import os import os
import re
import sys
import subprocess import subprocess
from setuptools import setup, find_packages from setuptools import setup, find_packages
...@@ -55,6 +53,10 @@ setup( ...@@ -55,6 +53,10 @@ setup(
# Exclude the build files. # Exclude the build files.
packages=find_packages(exclude=["build"]), packages=find_packages(exclude=["build"]),
# Package where to put the extensions. Has to be a prefix of build.py. # Package where to put the extensions. Has to be a prefix of build.py.
package_data={'encoding': [
'lib/*.so*', 'lib/*.dylib*',
'kernel/*.h', 'kernel/generic/*h',
]},
ext_package="", ext_package="",
# Extensions to compile. # Extensions to compile.
cffi_modules=[ cffi_modules=[
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment