Commit d539ddfa authored by Hang Zhang's avatar Hang Zhang
Browse files

v0.1.0

parent 80a12ef6
...@@ -22,6 +22,9 @@ extern "C" { ...@@ -22,6 +22,9 @@ extern "C" {
#endif #endif
// float // float
#include "generic/encoding_utils.c"
#include "THC/THCGenerateFloatType.h"
#include "generic/encoding_kernel.c" #include "generic/encoding_kernel.c"
#include "THC/THCGenerateFloatType.h" #include "THC/THCGenerateFloatType.h"
...@@ -32,6 +35,9 @@ extern "C" { ...@@ -32,6 +35,9 @@ extern "C" {
#include "THC/THCGenerateFloatType.h" #include "THC/THCGenerateFloatType.h"
// double // double
#include "generic/encoding_utils.c"
#include "THC/THCGenerateDoubleType.h"
#include "generic/encoding_kernel.c" #include "generic/encoding_kernel.c"
#include "THC/THCGenerateDoubleType.h" #include "THC/THCGenerateDoubleType.h"
......
...@@ -77,16 +77,19 @@ class _ConvNd(Module): ...@@ -77,16 +77,19 @@ class _ConvNd(Module):
class Conv1d(_ConvNd): class Conv1d(_ConvNd):
r"""Applies a 1D convolution over an input signal composed of several input r"""Applies a 1D convolution over an input signal composed of several
planes. input planes.
In the simplest case, the output value of the layer with input size In the simplest case, the output value of the layer with input size
:math:`(N, C_{in}, L)` and output :math:`(N, C_{out}, L_{out})` can be :math:`(N, C_{in}, L)` and output :math:`(N, C_{out}, L_{out})` can be
precisely described as: precisely described as:
.. math:: .. math::
\begin{array}{ll} \begin{array}{ll}
out(N_i, C_{out_j}) = bias(C_{out_j}) out(N_i, C_{out_j}) = bias(C_{out_j})
+ \sum_{{k}=0}^{C_{in}-1} weight(C_{out_j}, k) \star input(N_i, k) + \sum_{{k}=0}^{C_{in}-1} weight(C_{out_j}, k)
\star input(N_i, k)
\end{array} \end{array}
where :math:`\star` is the valid `cross-correlation`_ operator where :math:`\star` is the valid `cross-correlation`_ operator
| :attr:`stride` controls the stride for the cross-correlation. | :attr:`stride` controls the stride for the cross-correlation.
...@@ -155,11 +158,13 @@ class Conv2d(_ConvNd): ...@@ -155,11 +158,13 @@ class Conv2d(_ConvNd):
In the simplest case, the output value of the layer with input size In the simplest case, the output value of the layer with input size
:math:`(N, C_{in}, H, W)` and output :math:`(N, C_{out}, H_{out}, W_{out})` :math:`(N, C_{in}, H, W)` and output :math:`(N, C_{out}, H_{out}, W_{out})`
can be precisely described as: can be precisely described as:
.. math:: .. math::
\begin{array}{ll} \begin{array}{ll}
out(N_i, C_{out_j}) = bias(C_{out_j}) out(N_i, C_{out_j}) = bias(C_{out_j})
+ \sum_{{k}=0}^{C_{in}-1} weight(C_{out_j}, k) \star input(N_i, k) + \sum_{{k}=0}^{C_{in}-1} weight(C_{out_j}, k) \star input(N_i, k)
\end{array} \end{array}
where :math:`\star` is the valid 2D `cross-correlation`_ operator where :math:`\star` is the valid 2D `cross-correlation`_ operator
| :attr:`stride` controls the stride for the cross-correlation. | :attr:`stride` controls the stride for the cross-correlation.
...@@ -414,10 +419,13 @@ class ReLU(Threshold): ...@@ -414,10 +419,13 @@ class ReLU(Threshold):
class Sigmoid(Module): class Sigmoid(Module):
"""Applies the element-wise function :math:`f(x) = 1 / ( 1 + exp(-x))` """Applies the element-wise function :math:`f(x) = 1 / ( 1 + exp(-x))`
Shape: Shape:
- Input: :math:`(N, *)` where `*` means, any number of additional - Input: :math:`(N, *)` where `*` means, any number of additional
dimensions dimensions
- Output: :math:`(N, *)`, same shape as the input - Output: :math:`(N, *)`, same shape as the input
Examples:: Examples::
>>> m = nn.Sigmoid() >>> m = nn.Sigmoid()
>>> input = autograd.Variable(torch.randn(2)) >>> input = autograd.Variable(torch.randn(2))
>>> print(input) >>> print(input)
...@@ -436,10 +444,11 @@ class Sigmoid(Module): ...@@ -436,10 +444,11 @@ class Sigmoid(Module):
class MaxPool2d(Module): class MaxPool2d(Module):
r"""Applies a 2D max pooling over an input signal composed of several input r"""Applies a 2D max pooling over an input signal composed of several
planes. input planes.
In the simplest case, the output value of the layer with input size :math:`(N, C, H, W)`, In the simplest case, the output value of the layer with input size
output :math:`(N, C, H_{out}, W_{out})` and :attr:`kernel_size` :math:`(kH, kW)` :math:`(N, C, H, W)`, output :math:`(N, C, H_{out}, W_{out})` and
:attr:`kernel_size` :math:`(kH, kW)`
can be precisely described as: can be precisely described as:
.. math:: .. math::
...@@ -450,8 +459,8 @@ class MaxPool2d(Module): ...@@ -450,8 +459,8 @@ class MaxPool2d(Module):
| If :attr:`padding` is non-zero, then the input is implicitly zero-padded on both sides | If :attr:`padding` is non-zero, then the input is implicitly zero-padded on both sides
for :attr:`padding` number of points for :attr:`padding` number of points
| :attr:`dilation` controls the spacing between the kernel points. It is harder to describe, | :attr:`dilation` controls the spacing between the kernel points. It is harder to describe, but this `link`_ has a nice visualization of what :attr:`dilation` does.
but this `link`_ has a nice visualization of what :attr:`dilation` does.
The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`dilation` can either be: The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`dilation` can either be:
- a single ``int`` -- in which case the same value is used for the height and width dimension - a single ``int`` -- in which case the same value is used for the height and width dimension
- a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension, - a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension,
...@@ -531,8 +540,8 @@ class AvgPool2d(Module): ...@@ -531,8 +540,8 @@ class AvgPool2d(Module):
input(N_i, C_j, stride[0] * h + m, stride[1] * w + n) input(N_i, C_j, stride[0] * h + m, stride[1] * w + n)
\end{array} \end{array}
| If :attr:`padding` is non-zero, then the input is implicitly zero-padded on both sides | If :attr:`padding` is non-zero, then the input is implicitly zero-padded on both sides for :attr:`padding` number of points
for :attr:`padding` number of points
The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding` can either be: The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding` can either be:
- a single ``int`` -- in which case the same value is used for the height and width dimension - a single ``int`` -- in which case the same value is used for the height and width dimension
- a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension, - a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension,
......
...@@ -17,15 +17,19 @@ from torch.nn.parameter import Parameter ...@@ -17,15 +17,19 @@ from torch.nn.parameter import Parameter
from ..parallel import my_data_parallel from ..parallel import my_data_parallel
from .syncbn import BatchNorm2d from .syncbn import BatchNorm2d
from ..functions import dilatedavgpool2d from ..functions import dilatedavgpool2d, view_each, upsample
from .basic import *
__all__ = ['DilatedAvgPool2d', 'MyConvTranspose2d', 'View', 'Normalize', __all__ = ['DilatedAvgPool2d', 'UpsampleConv2d', 'View', 'Sum', 'Mean',
'Bottleneck'] 'Normalize', 'Bottleneck', 'PyramidPooling']
class DilatedAvgPool2d(Module): class DilatedAvgPool2d(Module):
r"""We provide Dilated Average Pooling for the dilation of Densenet as r"""We provide Dilated Average Pooling for the dilation of Densenet as
in :class:`encoding.dilated.DenseNet`. in :class:`encoding.dilated.DenseNet`.
Reference::
We provide this code for a comming paper.
Applies a 2D average pooling over an input signal composed of several input planes. Applies a 2D average pooling over an input signal composed of several input planes.
In the simplest case, the output value of the layer with input size :math:`(N, C, H, W)`, In the simplest case, the output value of the layer with input size :math:`(N, C, H, W)`,
...@@ -68,8 +72,6 @@ class DilatedAvgPool2d(Module): ...@@ -68,8 +72,6 @@ class DilatedAvgPool2d(Module):
>>> input = autograd.Variable(torch.randn(20, 16, 50, 32)) >>> input = autograd.Variable(torch.randn(20, 16, 50, 32))
>>> output = m(input) >>> output = m(input)
Reference::
comming
""" """
def __init__(self, kernel_size, stride=None, padding=0, dilation=1): def __init__(self, kernel_size, stride=None, padding=0, dilation=1):
super(DilatedAvgPool2d, self).__init__() super(DilatedAvgPool2d, self).__init__()
...@@ -79,8 +81,13 @@ class DilatedAvgPool2d(Module): ...@@ -79,8 +81,13 @@ class DilatedAvgPool2d(Module):
self.dilation = dilation self.dilation = dilation
def forward(self, input): def forward(self, input):
return dilatedavgpool2d(input, self.kernel_size, self.stride, if isinstance(input, Variable):
return dilatedavgpool2d(input, self.kernel_size, self.stride,
self.padding, self.dilation) self.padding, self.dilation)
elif isinstance(input, tuple) or isinstance(input, list):
return my_data_parallel(self, input)
else:
raise RuntimeError('unknown input type')
def __repr__(self): def __repr__(self):
return self.__class__.__name__ + ' (' \ return self.__class__.__name__ + ' (' \
...@@ -90,13 +97,63 @@ class DilatedAvgPool2d(Module): ...@@ -90,13 +97,63 @@ class DilatedAvgPool2d(Module):
+ ', dilation=' + str(self.dilation) + ')' + ', dilation=' + str(self.dilation) + ')'
class MyConvTranspose2d(Module): class UpsampleConv2d(Module):
"""Customized Layers, discuss later r"""
To avoid the checkerboard artifacts of standard Fractionally-strided Convolution, we adapt an integer stride convolution but producing a :math:`2\times 2` outputs for each convolutional window.
.. image:: _static/img/upconv.png
:width: 50%
:align: center
Reference:
Hang Zhang and Kristin Dana. "Multi-style Generative Network for Real-time Transfer." *arXiv preprint arXiv:1703.06953 (2017)*
Args:
in_channels (int): Number of channels in the input image
out_channels (int): Number of channels produced by the convolution
kernel_size (int or tuple): Size of the convolving kernel
stride (int or tuple, optional): Stride of the convolution. Default: 1
padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0
output_padding (int or tuple, optional): Zero-padding added to one side of the output. Default: 0
groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
bias (bool, optional): If True, adds a learnable bias to the output. Default: True
dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
scale_factor (int): scaling factor for upsampling convolution. Default: 1
Shape:
- Input: :math:`(N, C_{in}, H_{in}, W_{in})`
- Output: :math:`(N, C_{out}, H_{out}, W_{out})` where
:math:`H_{out} = scale * (H_{in} - 1) * stride[0] - 2 * padding[0] + kernel\_size[0] + output\_padding[0]`
:math:`W_{out} = scale * (W_{in} - 1) * stride[1] - 2 * padding[1] + kernel\_size[1] + output\_padding[1]`
Attributes:
weight (Tensor): the learnable weights of the module of shape
(in_channels, scale * scale * out_channels, kernel_size[0], kernel_size[1])
bias (Tensor): the learnable bias of the module of shape (scale * scale * out_channels)
Examples::
>>> # With square kernels and equal stride
>>> m = nn.UpsampleCov2d(16, 33, 3, stride=2)
>>> # non-square kernels and unequal stride and with padding
>>> m = nn.UpsampleCov2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
>>> input = autograd.Variable(torch.randn(20, 16, 50, 100))
>>> output = m(input)
>>> # exact output size can be also specified as an argument
>>> input = autograd.Variable(torch.randn(1, 16, 12, 12))
>>> downsample = nn.Conv2d(16, 16, 3, stride=2, padding=1)
>>> upsample = nn.UpsampleCov2d(16, 16, 3, stride=2, padding=1)
>>> h = downsample(input)
>>> h.size()
torch.Size([1, 16, 6, 6])
>>> output = upsample(h, output_size=input.size())
>>> output.size()
torch.Size([1, 16, 12, 12])
""" """
def __init__(self, in_channels, out_channels, kernel_size, stride=1, def __init__(self, in_channels, out_channels, kernel_size, stride=1,
padding=0, dilation=1, groups=1, scale_factor =1, padding=0, dilation=1, groups=1, scale_factor =1,
bias=True): bias=True):
super(MyConvTranspose2d, self).__init__() super(UpsampleConv2d, self).__init__()
kernel_size = _pair(kernel_size) kernel_size = _pair(kernel_size)
stride = _pair(stride) stride = _pair(stride)
padding = _pair(padding) padding = _pair(padding)
...@@ -163,6 +220,36 @@ class View(Module): ...@@ -163,6 +220,36 @@ class View(Module):
raise RuntimeError('unknown input type') raise RuntimeError('unknown input type')
class Sum(Module):
def __init__(self, dim, keep_dim=False):
super(Sum, self).__init__()
self.dim = dim
self.keep_dim = keep_dim
def forward(self, input):
if isinstance(input, Variable):
return input.sum(self.dim, self.keep_dim)
elif isinstance(input, tuple) or isinstance(input, list):
return my_data_parallel(self, input)
else:
raise RuntimeError('unknown input type')
class Mean(Module):
def __init__(self, dim, keep_dim=False):
super(Mean, self).__init__()
self.dim = dim
self.keep_dim = keep_dim
def forward(self, input):
if isinstance(input, Variable):
return input.mean(self.dim, self.keep_dim)
elif isinstance(input, tuple) or isinstance(input, list):
return my_data_parallel(self, input)
else:
raise RuntimeError('unknown input type')
class Normalize(Module): class Normalize(Module):
r"""Performs :math:`L_p` normalization of inputs over specified dimension. r"""Performs :math:`L_p` normalization of inputs over specified dimension.
...@@ -189,7 +276,7 @@ class Normalize(Module): ...@@ -189,7 +276,7 @@ class Normalize(Module):
def forward(self, x): def forward(self, x):
if isinstance(x, Variable): if isinstance(x, Variable):
return F.normalize(x, self.p, self.dim) return F.normalize(x, self.p, self.dim, eps=1e-10)
elif isinstance(x, tuple) or isinstance(x, list): elif isinstance(x, tuple) or isinstance(x, list):
return my_data_parallel(self, x) return my_data_parallel(self, x)
else: else:
...@@ -214,15 +301,15 @@ class Bottleneck(Module): ...@@ -214,15 +301,15 @@ class Bottleneck(Module):
conv_block = [] conv_block = []
conv_block += [norm_layer(inplanes), conv_block += [norm_layer(inplanes),
ReLU(inplace=True), ReLU(inplace=True),
Conv2d(inplanes, planes, kernel_size=1, stride=1)] Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False)]
conv_block += [norm_layer(planes), conv_block += [norm_layer(planes),
ReLU(inplace=True), ReLU(inplace=True),
Conv2d(planes, planes, kernel_size=3, stride=stride, Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1)] padding=1, bias=False)]
conv_block += [norm_layer(planes), conv_block += [norm_layer(planes),
ReLU(inplace=True), ReLU(inplace=True),
Conv2d(planes, planes * self.expansion, kernel_size=1, Conv2d(planes, planes * self.expansion, kernel_size=1,
stride=1)] stride=1, bias=False)]
self.conv_block = Sequential(*conv_block) self.conv_block = Sequential(*conv_block)
def forward(self, x): def forward(self, x):
...@@ -238,18 +325,58 @@ class Bottleneck(Module): ...@@ -238,18 +325,58 @@ class Bottleneck(Module):
raise RuntimeError('unknown input type') raise RuntimeError('unknown input type')
def _get_a_var(obj): class PyramidPooling(Module):
if isinstance(obj, Variable): """
return obj Reference:
Zhao, Hengshuang, et al. *"Pyramid scene parsing network."*
if isinstance(obj, list) or isinstance(obj, tuple): """
results = map(_get_a_var, obj) def __init__(self, in_channels):
for result in results: super(PyramidPooling, self).__init__()
if isinstance(result, Variable): self.pool1 = AdaptiveAvgPool2d(1)
return result self.pool2 = AdaptiveAvgPool2d(2)
if isinstance(obj, dict): self.pool3 = AdaptiveAvgPool2d(3)
results = map(_get_a_var, obj.items()) self.pool4 = AdaptiveAvgPool2d(6)
for result in results:
if isinstance(result, Variable): out_channels = int(in_channels/4)
return result self.conv1 = Sequential(Conv2d(in_channels, out_channels, 1),
return None BatchNorm2d(out_channels),
ReLU(True))
self.conv2 = Sequential(Conv2d(in_channels, out_channels, 1),
BatchNorm2d(out_channels),
ReLU(True))
self.conv3 = Sequential(Conv2d(in_channels, out_channels, 1),
BatchNorm2d(out_channels),
ReLU(True))
self.conv4 = Sequential(Conv2d(in_channels, out_channels, 1),
BatchNorm2d(out_channels),
ReLU(True))
def _cat_each(self, x, feat1, feat2, feat3, feat4):
assert(len(x)==len(feat1))
z = []
for i in range(len(x)):
z.append( torch.cat((x[i], feat1[i], feat2[i], feat3[i], feat4[i]), 1))
return z
def forward(self, x):
if isinstance(x, Variable):
_, _, h, w = x.size()
elif isinstance(x, tuple) or isinstance(x, list):
_, _, h, w = x[0].size()
else:
raise RuntimeError('unknown input type')
feat1 = upsample(self.conv1(self.pool1(x)),(h,w),
mode='bilinear')
feat2 = upsample(self.conv2(self.pool2(x)),(h,w),
mode='bilinear')
feat3 = upsample(self.conv3(self.pool3(x)),(h,w),
mode='bilinear')
feat4 = upsample(self.conv4(self.pool4(x)),(h,w),
mode='bilinear')
if isinstance(x, Variable):
return torch.cat((x, feat1, feat2, feat3, feat4), 1)
elif isinstance(x, tuple) or isinstance(x, list):
return self._cat_each(x, feat1, feat2, feat3, feat4)
else:
raise RuntimeError('unknown input type')
...@@ -15,17 +15,17 @@ import torch.nn.functional as F ...@@ -15,17 +15,17 @@ import torch.nn.functional as F
from torch.autograd import Function, Variable from torch.autograd import Function, Variable
from .._ext import encoding_lib from .._ext import encoding_lib
from ..functions import scaledL2, aggregate, aggregateP, residual, assign from ..functions import scaledL2, aggregate
from ..parallel import my_data_parallel from ..parallel import my_data_parallel
__all__ = ['Encoding', 'Inspiration', 'GramMatrix', 'Aggregate','EncodingP'] __all__ = ['Encoding', 'EncodingShake', 'Inspiration', 'GramMatrix']
class Encoding(nn.Module): class Encoding(nn.Module):
r""" r"""
Encoding Layer: a learnable residual encoder over 3d or 4d input that Encoding Layer: a learnable residual encoder over 3d or 4d input that
is seen as a mini-batch. is seen as a mini-batch.
.. image:: http://hangzh.com/figure/cvpr17.svg .. image:: _static/img/cvpr17.svg
:width: 50% :width: 50%
:align: center :align: center
...@@ -71,9 +71,8 @@ class Encoding(nn.Module): ...@@ -71,9 +71,8 @@ class Encoding(nn.Module):
def reset_params(self): def reset_params(self):
std1 = 1./((self.K*self.D)**(1/2)) std1 = 1./((self.K*self.D)**(1/2))
std2 = 1./((self.K)**(1/2))
self.codewords.data.uniform_(-std1, std1) self.codewords.data.uniform_(-std1, std1)
self.scale.data.uniform_(-std2, std2) self.scale.data.uniform_(-1, 0)
def forward(self, X): def forward(self, X):
if isinstance(X, tuple) or isinstance(X, list): if isinstance(X, tuple) or isinstance(X, list):
...@@ -82,7 +81,7 @@ class Encoding(nn.Module): ...@@ -82,7 +81,7 @@ class Encoding(nn.Module):
elif not isinstance(X, Variable): elif not isinstance(X, Variable):
raise RuntimeError('unknown input type') raise RuntimeError('unknown input type')
# input X is a 4D tensor # input X is a 4D tensor
assert(X.size(1)==self.D,"Encoding Layer wrong channels!") assert(X.size(1)==self.D)
if X.dim() == 3: if X.dim() == 3:
# BxDxN # BxDxN
B, N, K, D = X.size(0), X.size(2), self.K, self.D B, N, K, D = X.size(0), X.size(2), self.K, self.D
...@@ -94,7 +93,8 @@ class Encoding(nn.Module): ...@@ -94,7 +93,8 @@ class Encoding(nn.Module):
else: else:
raise RuntimeError('Encoding Layer unknown input dims!') raise RuntimeError('Encoding Layer unknown input dims!')
# assignment weights # assignment weights
A = F.softmax(scaledL2(X, self.codewords, self.scale)) #A = F.softmax(scaledL2(X, self.codewords, self.scale).view(B*N,-1), dim=1).view(B,N,K)
A = F.softmax(scaledL2(X, self.codewords, self.scale), dim=2)
# aggregate # aggregate
E = aggregate(A, X, self.codewords) E = aggregate(A, X, self.codewords)
return E return E
...@@ -104,10 +104,65 @@ class Encoding(nn.Module): ...@@ -104,10 +104,65 @@ class Encoding(nn.Module):
+ 'N x ' + str(self.D) + '=>' + str(self.K) + 'x' \ + 'N x ' + str(self.D) + '=>' + str(self.K) + 'x' \
+ str(self.D) + ')' + str(self.D) + ')'
class EncodingShake(nn.Module):
def __init__(self, D, K):
super(EncodingShake, self).__init__()
# init codewords and smoothing factor
self.D, self.K = D, K
self.codewords = nn.Parameter(torch.Tensor(K, D),
requires_grad=True)
self.scale = nn.Parameter(torch.Tensor(K), requires_grad=True)
self.reset_params()
def reset_params(self):
std1 = 1./((self.K*self.D)**(1/2))
self.codewords.data.uniform_(-std1, std1)
self.scale.data.uniform_(-1, 0)
def shake(self):
if self.training:
self.scale.data.uniform_(-1, 0)
else:
self.scale.data.zero_().add_(-0.5)
def forward(self, X):
if isinstance(X, tuple) or isinstance(X, list):
# for self-parallel mode, please see encoding.nn
return my_data_parallel(self, X)
elif not isinstance(X, Variable):
raise RuntimeError('unknown input type')
# input X is a 4D tensor
assert(X.size(1)==self.D)
if X.dim() == 3:
# BxDxN
B, N, K, D = X.size(0), X.size(2), self.K, self.D
X = X.transpose(1,2).contiguous()
elif X.dim() == 4:
# BxDxHxW
B, N, K, D = X.size(0), X.size(2)*X.size(3), self.K, self.D
X = X.view(B,D,-1).transpose(1,2).contiguous()
else:
raise RuntimeError('Encoding Layer unknown input dims!')
# shake
self.shake()
# assignment weights
A = F.softmax(scaledL2(X, self.codewords, self.scale).view(B*N,-1), dim=1).view(B,N,K)
# aggregate
E = aggregate(A, X, self.codewords)
# shake
self.shake()
return E
def __repr__(self):
return self.__class__.__name__ + '(' \
+ 'N x ' + str(self.D) + '=>' + str(self.K) + 'x' \
+ str(self.D) + ')'
class Inspiration(nn.Module): class Inspiration(nn.Module):
r""" Inspiration Layer (for MSG-Net). r"""
Tuning the featuremap with target Gram Matrix Inspiration Layer (CoMatch Layer) enables the multi-style transfer in feed-forward network, which learns to match the target feature statistics during the training.
This module is differentialble and can be inserted in standard feed-forward network to be learned directly from the loss function without additional supervision.
.. math:: .. math::
Y = \phi^{-1}[\phi(\mathcal{F}^T)W\mathcal{G}] Y = \phi^{-1}[\phi(\mathcal{F}^T)W\mathcal{G}]
...@@ -116,7 +171,7 @@ class Inspiration(nn.Module): ...@@ -116,7 +171,7 @@ class Inspiration(nn.Module):
training multi-style generative network for real-time transfer. training multi-style generative network for real-time transfer.
Reference: Reference:
Hang Zhang, and Kristin Dana. "Multi-style Generative Network for Real-time Transfer." *arXiv preprint arXiv:1703.06953 (2017)* Hang Zhang and Kristin Dana. "Multi-style Generative Network for Real-time Transfer." *arXiv preprint arXiv:1703.06953 (2017)*
""" """
def __init__(self, C, B=1): def __init__(self, C, B=1):
super(Inspiration, self).__init__() super(Inspiration, self).__init__()
...@@ -156,76 +211,3 @@ class GramMatrix(nn.Module): ...@@ -156,76 +211,3 @@ class GramMatrix(nn.Module):
gram = features.bmm(features_t) / (ch * h * w) gram = features.bmm(features_t) / (ch * h * w)
return gram return gram
class Aggregate(nn.Module):
r"""
Aggregate operation, aggregate the residuals (:math:`R`) with
assignment weights (:math:`A`).
.. math::
e_{k} = \sum_{i=1}^{N} a_{ik} r_{ik}
Shape:
- Input: :math:`A\in\mathcal{R}^{B\times N\times K}` :math:`R\in\mathcal{R}^{B\times N\times K\times D}` (where :math:`B` is batch, :math:`N` is total number of features, :math:`K` is number is codewords, :math:`D` is feature dimensions.)
- Output: :math:`E\in\mathcal{R}^{B\times K\times D}`
"""
def forward(self, A, R):
if isinstance(A, tuple) or isinstance(A, list):
# for self-parallel mode, please see encoding.nn
return my_data_parallel(self, A, R)
elif not isinstance(A, Variable):
raise RuntimeError('unknown input type')
return aggregateP(A, R)
class EncodingP(nn.Module):
def __init__(self, D, K):
super(EncodingP, self).__init__()
# init codewords and smoothing factor
self.D, self.K = D, K
self.codewords = nn.Parameter(torch.Tensor(K, D),
requires_grad=True)
self.scale = nn.Parameter(torch.Tensor(K), requires_grad=True)
self.reset_params()
print('EncodingP is deprecated, please use Encoding.')
def reset_params(self):
std1 = 1./((self.K*self.D)**(1/2))
std2 = 1./((self.K)**(1/2))
self.codewords.data.uniform_(-std1, std1)
self.scale.data.uniform_(-std2, std2)
def forward(self, X):
if isinstance(X, tuple) or isinstance(X, list):
# for self-parallel mode, please see encoding.nn
return my_data_parallel(self, X)
elif not isinstance(X, Variable):
raise RuntimeError('unknown input type')
# input X is a 4D tensor
assert(X.size(1)==self.D,"Encoding Layer wrong channels!")
if X.dim() == 3:
# BxDxN
B, N, K, D = X.size(0), X.size(2), self.K, self.D
X = X.transpose(1,2)
elif X.dim() == 4:
# BxDxHxW
B, N, K, D = X.size(0), X.size(2)*X.size(3), self.K, self.D
X = X.view(B,D,-1).transpose(1,2)
else:
raise RuntimeError('Encoding Layer unknown input dims!')
# calculate residuals
R = residual(X.contiguous(), self.codewords)
# assignment weights
A = assign(R, self.scale)
# aggregate
E = aggregateP(A, R)
return E
def __repr__(self):
return self.__class__.__name__ + '(' \
+ 'N x ' + str(self.D) + '=>' + str(self.K) + 'x' \
+ str(self.D) + ')'
...@@ -29,6 +29,9 @@ class BatchNorm1d(Module): ...@@ -29,6 +29,9 @@ class BatchNorm1d(Module):
r"""Synchronized Batch Normalization 1d r"""Synchronized Batch Normalization 1d
Please use compatible :class:`encoding.parallel.SelfDataParallel` and :class:`encoding.nn` Please use compatible :class:`encoding.parallel.SelfDataParallel` and :class:`encoding.nn`
Reference::
We provide this code for a comming paper.
Applies Batch Normalization over a 2d or 3d input that is seen as a Applies Batch Normalization over a 2d or 3d input that is seen as a
mini-batch. mini-batch.
...@@ -220,6 +223,9 @@ class BatchNorm2d(Module): ...@@ -220,6 +223,9 @@ class BatchNorm2d(Module):
r"""Synchronized Batch Normalization 2d r"""Synchronized Batch Normalization 2d
Please use compatible :class:`encoding.parallel.SelfDataParallel` and :class:`encoding.nn` Please use compatible :class:`encoding.parallel.SelfDataParallel` and :class:`encoding.nn`
Reference::
We provide this code for a comming paper.
Applies Batch Normalization over a 4d input that is seen as a mini-batch Applies Batch Normalization over a 4d input that is seen as a mini-batch
of 3d inputs of 3d inputs
...@@ -411,3 +417,20 @@ class BatchNorm2d(Module): ...@@ -411,3 +417,20 @@ class BatchNorm2d(Module):
return outputs return outputs
else: else:
raise RuntimeError('unknown input type') raise RuntimeError('unknown input type')
def _get_a_var(obj):
if isinstance(obj, Variable):
return obj
if isinstance(obj, list) or isinstance(obj, tuple):
results = map(_get_a_var, obj)
for result in results:
if isinstance(result, Variable):
return result
if isinstance(obj, dict):
results = map(_get_a_var, obj.items())
for result in results:
if isinstance(result, Variable):
return result
return None
...@@ -82,6 +82,9 @@ class Broadcast(Function): ...@@ -82,6 +82,9 @@ class Broadcast(Function):
class ModelDataParallel(Module): class ModelDataParallel(Module):
"""Implements data parallelism at the module level. """Implements data parallelism at the module level.
Reference::
We provide this code for a comming paper.
This container parallelizes the application of the given module by This container parallelizes the application of the given module by
splitting the input across the specified devices by chunking in the splitting the input across the specified devices by chunking in the
batch dimension. batch dimension.
...@@ -149,6 +152,9 @@ class CriterionDataParallel(Module): ...@@ -149,6 +152,9 @@ class CriterionDataParallel(Module):
Calculate loss in multiple-GPUs, which balance the memory usage for Calculate loss in multiple-GPUs, which balance the memory usage for
Semantic Segmentation. Semantic Segmentation.
Reference::
We provide this code for a comming paper.
The targets are splitted across the specified devices by chunking in The targets are splitted across the specified devices by chunking in
the batch dimension. Please use together with :class:`encoding.parallel.ModelDataParallel`. the batch dimension. Please use together with :class:`encoding.parallel.ModelDataParallel`.
""" """
...@@ -191,9 +197,12 @@ class CriterionDataParallel(Module): ...@@ -191,9 +197,12 @@ class CriterionDataParallel(Module):
class SelfDataParallel(Module): class SelfDataParallel(Module):
"""SelfDataParallel, please make sure you understand it before using. """SelfDataParallel, please make sure you understand it before using.
Reference::
We provide this code for a comming paper.
Each module in the network should be in self-parallel mode, Each module in the network should be in self-parallel mode,
which allows list of inputs from multiple GPUs. which allows list of inputs from multiple GPUs.
Please see encoding.nn for detail, use with cautious Please see :class:`encoding.nn` for detail, use with cautious
""" """
def __init__(self, module, device_ids=None, output_device=None, dim=0): def __init__(self, module, device_ids=None, output_device=None, dim=0):
super(SelfDataParallel, self).__init__() super(SelfDataParallel, self).__init__()
...@@ -211,11 +220,26 @@ class SelfDataParallel(Module): ...@@ -211,11 +220,26 @@ class SelfDataParallel(Module):
def forward(self, *inputs, **kwargs): def forward(self, *inputs, **kwargs):
inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
outputs = self.module(inputs) if self.training:
return outputs # self parallel mode
outputs = self.module(inputs)
return outputs
else:
# TODO check faster?
if len(self.device_ids) == 1:
return self.module(*inputs[0], **kwargs[0])
replicas = self.replicate(self.module, \
self.device_ids[:len(inputs)])
outputs = self.parallel_apply(replicas, inputs, kwargs)
return outputs
def replicate(self, module, device_ids):
return replicate(module, device_ids)
def parallel_apply(self, replicas, inputs, kwargs):
return parallel_apply(replicas, inputs, kwargs)
def scatter(self, inputs, kwargs, device_ids): def scatter(self, inputs, kwargs, device_ids):
#return my_scatter(inputs, target_gpus=device_ids)
outputs = scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) outputs = scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim)
return outputs return outputs
...@@ -343,3 +367,4 @@ def my_data_parallel(module, inputs, device_ids=None, \ ...@@ -343,3 +367,4 @@ def my_data_parallel(module, inputs, device_ids=None, \
outputs = my_parallel_apply(replicas, inputs, module_kwargs) outputs = my_parallel_apply(replicas, inputs, module_kwargs)
return outputs return outputs
...@@ -9,15 +9,27 @@ ...@@ -9,15 +9,27 @@
*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
*/ */
//#include <THC/THC.h>
/* /*
#include <THC/THC.h>
#ifdef __cplusplus
extern "C" {
#endif
#define Encoding_(NAME) TH_CONCAT_4(Encoding_, Real, _, NAME) #define Encoding_(NAME) TH_CONCAT_4(Encoding_, Real, _, NAME)
#define THCTensor TH_CONCAT_3(TH,CReal,Tensor) #define THCTensor TH_CONCAT_3(TH,CReal,Tensor)
#define THCTensor_(NAME) TH_CONCAT_4(TH,CReal,Tensor_,NAME) #define THCTensor_(NAME) TH_CONCAT_4(TH,CReal,Tensor_,NAME)
// float
#include "generic/encoding_generic.h" #include "generic/encoding_generic.h"
#include "THC/THCGenerateFloatType.h" #include "THC/THCGenerateFloatType.h"
#include "generic/syncbn_generic.h"
#include "THC/THCGenerateFloatType.h"
#ifdef __cplusplus
}
#endif
*/ */
int Encoding_Float_scaledl2_forward(THCudaTensor *SL, int Encoding_Float_scaledl2_forward(THCudaTensor *SL,
...@@ -27,29 +39,12 @@ int Encoding_Float_scaledl2_backward( ...@@ -27,29 +39,12 @@ int Encoding_Float_scaledl2_backward(
THCudaTensor *GSL, THCudaTensor *GX, THCudaTensor *GC, THCudaTensor *GSL, THCudaTensor *GX, THCudaTensor *GC,
THCudaTensor *X, THCudaTensor *C, THCudaTensor *S); THCudaTensor *X, THCudaTensor *C, THCudaTensor *S);
int Encoding_Float_aggregateE_forward(THCudaTensor *E, THCudaTensor *A, int Encoding_Float_aggregate_forward(THCudaTensor *E, THCudaTensor *A,
THCudaTensor *X, THCudaTensor *C); THCudaTensor *X, THCudaTensor *C);
int Encoding_Float_aggregateE_backward(THCudaTensor *GA, THCudaTensor *GE, int Encoding_Float_aggregate_backward(THCudaTensor *GA, THCudaTensor *GE,
THCudaTensor *A, THCudaTensor *X, THCudaTensor *C); THCudaTensor *A, THCudaTensor *X, THCudaTensor *C);
int Encoding_Float_aggregate_forward(THCudaTensor *E, THCudaTensor *A,
THCudaTensor *R);
int Encoding_Float_aggregate_backward(THCudaTensor *GA, THCudaTensor *GR,
THCudaTensor *L, THCudaTensor *A, THCudaTensor *R);
int Encoding_Float_residual_forward(THCudaTensor *R, THCudaTensor *X,
THCudaTensor *D);
int Encoding_Float_residual_backward(THCudaTensor *GR, THCudaTensor *GX,
THCudaTensor *GD);
int Encoding_Float_squaresqueeze_forward(THCudaTensor *L, THCudaTensor *R);
int Encoding_Float_squaresqueeze_backward(THCudaTensor *GL,
THCudaTensor *GR, THCudaTensor *R);
int Encoding_Float_batchnorm_Forward(THCudaTensor *output_, int Encoding_Float_batchnorm_Forward(THCudaTensor *output_,
THCudaTensor *input_, THCudaTensor *mean_, THCudaTensor *input_, THCudaTensor *mean_,
THCudaTensor *invstd_, THCudaTensor *gamma_, THCudaTensor *beta_); THCudaTensor *invstd_, THCudaTensor *gamma_, THCudaTensor *beta_);
...@@ -90,33 +85,13 @@ int Encoding_Double_scaledl2_backward( ...@@ -90,33 +85,13 @@ int Encoding_Double_scaledl2_backward(
THCudaDoubleTensor *GC, THCudaDoubleTensor *X, THCudaDoubleTensor *GC, THCudaDoubleTensor *X,
THCudaDoubleTensor *C, THCudaDoubleTensor *S); THCudaDoubleTensor *C, THCudaDoubleTensor *S);
int Encoding_Double_aggregateE_forward(THCudaDoubleTensor *E, int Encoding_Double_aggregate_forward(THCudaDoubleTensor *E,
THCudaDoubleTensor *A, THCudaDoubleTensor *X, THCudaDoubleTensor *C); THCudaDoubleTensor *A, THCudaDoubleTensor *X, THCudaDoubleTensor *C);
int Encoding_Double_aggregateE_backward(THCudaDoubleTensor *GA, int Encoding_Double_aggregate_backward(THCudaDoubleTensor *GA,
THCudaDoubleTensor *GE, THCudaDoubleTensor *A, THCudaDoubleTensor *X, THCudaDoubleTensor *GE, THCudaDoubleTensor *A, THCudaDoubleTensor *X,
THCudaDoubleTensor *C); THCudaDoubleTensor *C);
int Encoding_Double_aggregate_forward(
THCudaDoubleTensor *E, THCudaDoubleTensor *A, THCudaDoubleTensor *R);
int Encoding_Double_aggregate_backward(
THCudaDoubleTensor *GA, THCudaDoubleTensor *GR, THCudaDoubleTensor *L,
THCudaDoubleTensor *A, THCudaDoubleTensor *R);
int Encoding_Double_residual_forward(
THCudaDoubleTensor *R, THCudaDoubleTensor *X, THCudaDoubleTensor *D);
int Encoding_Double_residual_backward(
THCudaDoubleTensor *GR, THCudaDoubleTensor *GX,
THCudaDoubleTensor *GD);
int Encoding_Double_squaresqueeze_forward(THCudaDoubleTensor *L,
THCudaDoubleTensor *R);
int Encoding_Double_squaresqueeze_backward(THCudaDoubleTensor *GL,
THCudaDoubleTensor *GR, THCudaDoubleTensor *R);
int Encoding_Double_batchnorm_Forward(THCudaDoubleTensor *output_, int Encoding_Double_batchnorm_Forward(THCudaDoubleTensor *output_,
THCudaDoubleTensor *input_, THCudaDoubleTensor *mean_, THCudaDoubleTensor *input_, THCudaDoubleTensor *mean_,
THCudaDoubleTensor *invstd_, THCudaDoubleTensor *gamma_, THCudaDoubleTensor *invstd_, THCudaDoubleTensor *gamma_,
...@@ -148,3 +123,4 @@ int Encoding_Double_DilatedAvgPool2d_Backward( ...@@ -148,3 +123,4 @@ int Encoding_Double_DilatedAvgPool2d_Backward(
int kH, int kW, int dH, int dW, int kH, int kW, int dH, int dW,
int padH, int padW, int padH, int padW,
int dilationH, int dilationW); int dilationH, int dilationW);
...@@ -23,7 +23,6 @@ int Encoding_(scaledl2_forward)(THCTensor *SL, ...@@ -23,7 +23,6 @@ int Encoding_(scaledl2_forward)(THCTensor *SL,
return 0; return 0;
} }
int Encoding_(scaledl2_backward)( int Encoding_(scaledl2_backward)(
THCTensor *GSL, THCTensor *GX, THCTensor *GC, THCTensor *GSL, THCTensor *GX, THCTensor *GC,
THCTensor *X, THCTensor *C, THCTensor *S) THCTensor *X, THCTensor *C, THCTensor *S)
...@@ -36,94 +35,25 @@ int Encoding_(scaledl2_backward)( ...@@ -36,94 +35,25 @@ int Encoding_(scaledl2_backward)(
return 0; return 0;
} }
int Encoding_(aggregate_forward)(THCTensor *E, THCTensor *A,
int Encoding_(aggregateE_forward)(THCTensor *E, THCTensor *A,
THCTensor *X, THCTensor *C) THCTensor *X, THCTensor *C)
/* /*
* Aggregate operation * Aggregate operation
*/ */
{ {
Encoding_(AggregateE_Forward)(state, E, A, X, C); Encoding_(Aggregate_Forward)(state, E, A, X, C);
/* C function return number of the outputs */ /* C function return number of the outputs */
return 0; return 0;
} }
int Encoding_(aggregate_backward)(THCTensor *GA, THCTensor *GE,
int Encoding_(aggregateE_backward)(THCTensor *GA, THCTensor *GE,
THCTensor *A, THCTensor *X, THCTensor *C) THCTensor *A, THCTensor *X, THCTensor *C)
/* /*
* Aggregate backward operation to A * Aggregate backward operation to A
* G (dl/dR), L (dl/dE), A (assignments) * G (dl/dR), L (dl/dE), A (assignments)
*/ */
{ {
Encoding_(AggregateE_Backward)(state, GA, GE, A, X, C); Encoding_(Aggregate_Backward)(state, GA, GE, A, X, C);
/* C function return number of the outputs */
return 0;
}
int Encoding_(aggregate_forward)(THCTensor *E, THCTensor *A,
THCTensor *R)
/*
* Aggregate operation
*/
{
Encoding_(Aggregate_Forward)(state, E, A, R);
/* C function return number of the outputs */
return 0;
}
int Encoding_(aggregate_backward)(THCTensor *GA, THCTensor *GR,
THCTensor *L, THCTensor *A, THCTensor *R)
/*
* Aggregate backward operation to A
* G (dl/dR), L (dl/dE), A (assignments)
*/
{
Encoding_(Aggregate_Backward)(state, GA, GR, L, A, R);
/* C function return number of the outputs */
return 0;
}
int Encoding_(residual_forward)(THCTensor *R, THCTensor *X, THCTensor *D)
/*
* Residual operation
*/
{
Encoding_(Residual_Forward)(state, R, X, D);
/* C function return number of the outputs */
return 0;
}
int Encoding_(residual_backward)(THCTensor *GR, THCTensor *GX,
THCTensor *GD)
/*
* Residual operation
*/
{
Encoding_(Residual_Backward)(state, GR, GX, GD);
/* C function return number of the outputs */
return 0;
}
int Encoding_(squaresqueeze_forward)(THCTensor *L, THCTensor *R)
/*
* Residual operation
*/
{
Encoding_(SquareSqueeze_Forward)(state, L, R);
/* C function return number of the outputs */
return 0;
}
int Encoding_(squaresqueeze_backward)(THCTensor *GL, THCTensor *GR,
THCTensor *R)
/*
* Residual operation
*/
{
Encoding_(SquareSqueeze_Backward)(state, GL, GR, R);
/* C function return number of the outputs */ /* C function return number of the outputs */
return 0; return 0;
} }
......
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
* Created by: Hang Zhang
* ECE Department, Rutgers University
* Email: zhang.hang@rutgers.edu
* Copyright (c) 2017
*
* This source code is licensed under the MIT-style license found in the
* LICENSE file in the root directory of this source tree
*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
*/
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/encoding_generic.h"
#else
int Encoding_(scaledl2_forward)(THCTensor *SL,
THCTensor *X, THCTensor *C, THCTensor *S);
int Encoding_(scaledl2_backward)(
THCTensor *GSL, THCTensor *GX, THCTensor *GC,
THCTensor *X, THCTensor *C, THCTensor *S);
int Encoding_(aggregate_forward)(THCTensor *E, THCTensor *A,
THCTensor *X, THCTensor *C);
int Encoding_(aggregate_backward)(THCTensor *GA, THCTensor *GE,
THCTensor *A, THCTensor *X, THCTensor *C);
int Encoding_(aggregateP_forward)(THCTensor *E, THCTensor *A,
THCTensor *R);
int Encoding_(aggregateP_backward)(THCTensor *GA, THCTensor *GR,
THCTensor *L, THCTensor *A, THCTensor *R);
int Encoding_(residual_forward)(THCTensor *R, THCTensor *X, THCTensor *D);
int Encoding_(residual_backward)(THCTensor *GR, THCTensor *GX,
THCTensor *GD);
int Encoding_(squaresqueeze_forward)(THCTensor *L, THCTensor *R);
int Encoding_(squaresqueeze_backward)(THCTensor *GL, THCTensor *GR,
THCTensor *R);
#endif
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
* Created by: Hang Zhang
* ECE Department, Rutgers University
* Email: zhang.hang@rutgers.edu
* Copyright (c) 2017
*
* This source code is licensed under the MIT-style license found in the
* LICENSE file in the root directory of this source tree
*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
*/
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/pooling_generic.c"
#else
int Encoding_(DilatedAvgPool2d_Forward)(
THCTensor *X_, THCTensor *Y_,
int kH, int kW, int dH, int dW,
int padH, int padW,
int dilationH, int dilationW);
int Encoding_(DilatedAvgPool2d_Backward)(
THCTensor *gradX_, THCTensor *gradY_,
int kH, int kW, int dH, int dW,
int padH, int padW,
int dilationH, int dilationW);
#endif
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
* Created by: Hang Zhang
* ECE Department, Rutgers University
* Email: zhang.hang@rutgers.edu
* Copyright (c) 2017
*
* This source code is licensed under the MIT-style license found in the
* LICENSE file in the root directory of this source tree
*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
*/
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/syncbn_generic.h"
#else
int Encoding_(batchnorm_Forward)(THCTensor *output_, THCTensor *input_,
THCTensor *mean_, THCTensor *invstd_,
THCTensor *gamma_, THCTensor *beta_);
int Encoding_(batchnorm_Backward)(THCTensor *gradoutput_,
THCTensor *input_, THCTensor *gradinput_,
THCTensor *gradgamma_, THCTensor *gradbeta_, THCTensor *mean_,
THCTensor *invstd_, THCTensor *gamma_, THCTensor *beta_,
THCTensor *gradMean_, THCTensor *gradStd_, int train);
int Encoding_(sum_square_Forward)(THCTensor *input_,
THCTensor *sum_, THCTensor *square_);
int Encoding_(sum_square_Backward)(
THCTensor *gradInput, THCTensor *input_,
THCTensor *gradSum_, THCTensor *gradSquare_);
#endif
...@@ -14,6 +14,7 @@ import os ...@@ -14,6 +14,7 @@ import os
import sys import sys
import time import time
import math import math
import tqdm
def get_optimizer(args, model, diff_LR=True): def get_optimizer(args, model, diff_LR=True):
""" """
...@@ -44,9 +45,7 @@ class CosLR_Scheduler(object): ...@@ -44,9 +45,7 @@ class CosLR_Scheduler(object):
"""Cosine Learning Rate Scheduler """Cosine Learning Rate Scheduler
.. math:: .. math::
lr = base_lr * 0.5 * (1 + cos(T/N)) lr = baselr * 0.5 * (1 + cos(iter/maxiter))
where ``T`` is current iters and ``N`` is total iters
Args: Args:
args: base learning rate :attr:`args.lr`, number of epochs :attr:`args.epochs` args: base learning rate :attr:`args.lr`, number of epochs :attr:`args.epochs`
...@@ -62,7 +61,7 @@ class CosLR_Scheduler(object): ...@@ -62,7 +61,7 @@ class CosLR_Scheduler(object):
T = (epoch - 1) * self.niters + i T = (epoch - 1) * self.niters + i
lr = 0.5 * self.lr * (1 + math.cos(1.0 * T / self.N * math.pi)) lr = 0.5 * self.lr * (1 + math.cos(1.0 * T / self.N * math.pi))
if epoch > self.epoch: if epoch > self.epoch:
print('=>Epochs %i, learning rate = %.4f, previous best ='\ print('\n=>Epochs %i, learning rate = %.4f, previous best ='\
'%.3f%%' % (epoch, lr, best_pred)) '%.3f%%' % (epoch, lr, best_pred))
self.epoch = epoch self.epoch = epoch
self._adjust_learning_rate(optimizer, lr) self._adjust_learning_rate(optimizer, lr)
...@@ -90,12 +89,14 @@ def save_checkpoint(state, args, is_best, filename='checkpoint.pth.tar'): ...@@ -90,12 +89,14 @@ def save_checkpoint(state, args, is_best, filename='checkpoint.pth.tar'):
if is_best: if is_best:
shutil.copyfile(filename, directory + 'model_best.pth.tar') shutil.copyfile(filename, directory + 'model_best.pth.tar')
# refer to https://github.com/kuangliu/pytorch-cifar/blob/master/utils.py # refer to https://github.com/kuangliu/pytorch-cifar/blob/master/utils.py
_, term_width = os.popen('stty size', 'r').read().split() _, term_width = os.popen('stty size', 'r').read().split()
term_width = int(term_width) term_width = int(term_width)-1
TOTAL_BAR_LENGTH = 86. TOTAL_BAR_LENGTH = 36.
last_time = time.time() last_time = time.time()
begin_time = last_time begin_time = last_time
def progress_bar(current, total, msg=None): def progress_bar(current, total, msg=None):
"""Progress Bar for display """Progress Bar for display
""" """
......
...@@ -15,45 +15,38 @@ import sys ...@@ -15,45 +15,38 @@ import sys
import subprocess import subprocess
from setuptools import setup, find_packages from setuptools import setup, find_packages
from setuptools.command.develop import develop import setuptools.command.develop
from setuptools.command.install import install import setuptools.command.install
this_file = os.path.dirname(__file__) cwd = os.path.dirname(os.path.abspath(__file__))
def read(*names, **kwargs): # run test scrip after installation
with io.open( class install(setuptools.command.install.install):
os.path.join(os.path.dirname(__file__), *names),
encoding=kwargs.get("encoding", "utf8")
) as fp:
return fp.read()
def find_version(*file_paths):
version_file = read(*file_paths)
version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]",
version_file, re.M)
if version_match:
return version_match.group(1)
raise RuntimeError("Unable to find version string.")
_version = find_version('encoding/__init__.py')
#extra_compile_args = ['-std=c++11', '-Wno-write-strings']
if os.getenv('PYTORCH_BINARY_BUILD') and platform.system() == 'Linux':
print('PYTORCH_BINARY_BUILD found. Static linking libstdc++ on Linux')
extra_compile_args += ['-static-libstdc++']
extra_link_args += ['-static-libstdc++']
class TestCommand(install):
"""Post-installation mode."""
def run(self): def run(self):
install.run(self) self.create_version_file()
setuptools.command.install.install.run(self)
subprocess.check_call("python test/test.py".split()) subprocess.check_call("python test/test.py".split())
@staticmethod
def create_version_file():
global version, cwd
print('-- Building version ' + version)
version_path = os.path.join(cwd, 'encoding', 'version.py')
with open(version_path, 'w') as f:
f.write("__version__ = '{}'\n".format(version))
version = '0.1.0'
try:
sha = subprocess.check_output(['git', 'rev-parse', 'HEAD'],
cwd=cwd).decode('ascii').strip()
version += '+' + sha[:7]
except Exception:
pass
setup( setup(
name="encoding", name="encoding",
version=_version, version=version,
description="PyTorch Encoding Layer", description="PyTorch Encoding",
url="https://github.com/zhanghang1989/PyTorch-Encoding-Layer", url="https://github.com/zhanghang1989/PyTorch-Encoding",
author="Hang Zhang", author="Hang Zhang",
author_email="zhang.hang@rutgers.edu", author_email="zhang.hang@rutgers.edu",
# Require cffi. # Require cffi.
...@@ -61,14 +54,13 @@ setup( ...@@ -61,14 +54,13 @@ setup(
setup_requires=["cffi>=1.0.0"], setup_requires=["cffi>=1.0.0"],
# Exclude the build files. # Exclude the build files.
packages=find_packages(exclude=["build"]), packages=find_packages(exclude=["build"]),
#extra_compile_args=extra_compile_args,
# Package where to put the extensions. Has to be a prefix of build.py. # Package where to put the extensions. Has to be a prefix of build.py.
ext_package="", ext_package="",
# Extensions to compile. # Extensions to compile.
cffi_modules=[ cffi_modules=[
os.path.join(this_file, "build.py:ffi") os.path.join(cwd, "build.py:ffi")
], ],
cmdclass={ cmdclass={
'install': TestCommand, 'install': install,
}, },
) )
...@@ -17,17 +17,6 @@ import torchvision.models as models ...@@ -17,17 +17,6 @@ import torchvision.models as models
EPS = 1e-6 EPS = 1e-6
def test_aggregateP():
B,N,K,D = 2,3,4,5
A = Variable(torch.cuda.DoubleTensor(B,N,K).uniform_(-0.5,0.5),
requires_grad=True)
R = Variable(torch.cuda.DoubleTensor(B,N,K,D).uniform_(-0.5,0.5),
requires_grad=True)
input = (A, R)
test = gradcheck(encoding.functions.aggregateP, input, eps=1e-6, atol=1e-4)
print('Testing aggregate(): {}'.format(test))
def test_aggregate(): def test_aggregate():
B,N,K,D = 2,3,4,5 B,N,K,D = 2,3,4,5
A = Variable(torch.cuda.DoubleTensor(B,N,K).uniform_(-0.5,0.5), A = Variable(torch.cuda.DoubleTensor(B,N,K).uniform_(-0.5,0.5),
...@@ -54,47 +43,6 @@ def test_scaledL2(): ...@@ -54,47 +43,6 @@ def test_scaledL2():
print('Testing scaledL2(): {}'.format(test)) print('Testing scaledL2(): {}'.format(test))
def test_assign():
B,N,K,D = 2,3,4,5
X = Variable(torch.cuda.DoubleTensor(B,N,D).uniform_(-0.5,0.5),
requires_grad=True)
C = Variable(torch.cuda.DoubleTensor(K,D).uniform_(-0.5,0.5),
requires_grad=True)
S = Variable(torch.cuda.DoubleTensor(K).uniform_(-0.5,0.5),
requires_grad=True)
R = encoding.functions.residual(X, C)
A1 = encoding.functions.assign(R, S)
E1 = encoding.functions.aggregateP(A1, R)
A2 = F.softmax(encoding.functions.scaledL2(X,C,S))
E2 = encoding.functions.aggregate(A2, X, C)
print('Testing assign(): {}'.format((E1-E2).norm(2).data[0] < EPS))
def test_residual():
B,N,K,D = 2,3,4,5
X = Variable(torch.cuda.DoubleTensor(B,N,D).uniform_(-0.5,0.5),
requires_grad=True)
C = Variable(torch.cuda.DoubleTensor(K,D).uniform_(-0.5,0.5),
requires_grad=True)
input = (X, C)
test = gradcheck(encoding.functions.residual, input, eps=1e-6, atol=1e-4)
print('Testing residual(): {}'.format(test))
"""
def test_square_squeeze():
B,N,K,D = 2,3,4,5
R = Variable(torch.cuda.DoubleTensor(B,N,K,D).uniform_(-0.5,0.5),
requires_grad=True)
input = (R,)
test = gradcheck(encoding.functions.square_squeeze(), input, eps=1e-6, atol=1e-4)
print('Testing square_squeeze(): {}'.format(test))
"""
def test_encoding(): def test_encoding():
B,C,H,W,K = 2,3,4,5,6 B,C,H,W,K = 2,3,4,5,6
X = Variable(torch.cuda.DoubleTensor(B,C,H,W).uniform_(-0.5,0.5), X = Variable(torch.cuda.DoubleTensor(B,C,H,W).uniform_(-0.5,0.5),
...@@ -105,16 +53,6 @@ def test_encoding(): ...@@ -105,16 +53,6 @@ def test_encoding():
print('Testing encoding(): {}'.format(test)) print('Testing encoding(): {}'.format(test))
def test_encodingP():
B,C,H,W,K = 2,3,4,5,6
X = Variable(torch.cuda.DoubleTensor(B,C,H,W).uniform_(-0.5,0.5),
requires_grad=True)
input = (X,)
layer = encoding.nn.EncodingP(C,K).double().cuda()
test = gradcheck(layer, input, eps=1e-6, atol=1e-4)
print('Testing encodingP(): {}'.format(test))
def test_sum_square(): def test_sum_square():
B,C,H,W = 2,3,4,5 B,C,H,W = 2,3,4,5
X = Variable(torch.cuda.DoubleTensor(B,C,H,W).uniform_(-0.5,0.5), X = Variable(torch.cuda.DoubleTensor(B,C,H,W).uniform_(-0.5,0.5),
...@@ -146,15 +84,10 @@ def test_dilated_avgpool(): ...@@ -146,15 +84,10 @@ def test_dilated_avgpool():
if __name__ == '__main__': if __name__ == '__main__':
test_aggregateP()
test_scaledL2() test_scaledL2()
test_encoding() test_encoding()
test_aggregate() test_aggregate()
test_residual()
#test_square_squeeze()
test_encodingP()
test_sum_square() test_sum_square()
test_assign()
test_dilated_avgpool() test_dilated_avgpool()
""" """
test_dilated_densenet() test_dilated_densenet()
......
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
## Created by: Hang Zhang
## ECE Department, Rutgers University
## Email: zhang.hang@rutgers.edu
## Copyright (c) 2017
##
## This source code is licensed under the MIT-style license found in the
## LICENSE file in the root directory of this source tree
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
import os
import numpy as np
import torch
import torch.nn as nn
from torch.autograd import Variable
import encoding.dilated as dresnet
import torchvision.models as orgresnet
class Dilated_ResNet(nn.Module):
def __init__(self, nclass):
super(Dilated_ResNet, self).__init__()
self.pretrained = dresnet.resnet50(pretrained=True)
def forward(self, x):
# pre-trained ResNet feature
x = self.pretrained.conv1(x)
x = self.pretrained.bn1(x)
x = self.pretrained.relu(x)
x = self.pretrained.maxpool(x)
x = self.pretrained.layer1(x)
x = self.pretrained.layer2(x)
x = self.pretrained.layer3(x)
x = self.pretrained.layer4(x)
return x
class Org_ResNet(nn.Module):
def __init__(self, nclass):
super(Org_ResNet, self).__init__()
self.pretrained = orgresnet.resnet50(pretrained=True)
def forward(self, x):
# pre-trained ResNet feature
x = self.pretrained.conv1(x)
x = self.pretrained.bn1(x)
x = self.pretrained.relu(x)
x = self.pretrained.maxpool(x)
x = self.pretrained.layer1(x)
x = self.pretrained.layer2(x)
x = self.pretrained.layer3(x)
x = self.pretrained.layer4(x)
return x
def test_resnet():
# test the model
model1 = Dilated_ResNet(10).eval().cuda()
model2 = Org_ResNet(10).eval().cuda()
model1.eval()
model2.eval()
x = Variable(torch.Tensor(1,3, 224, 224).uniform_(-0.5,0.5)).cuda()
y1 = model1(x)
y2 = model2(x)
print(y1[0][1])
print(y2[0][1])
if __name__ == "__main__":
test_resnet()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment