Commit d539ddfa authored by Hang Zhang's avatar Hang Zhang
Browse files

v0.1.0

parent 80a12ef6
......@@ -22,6 +22,9 @@ extern "C" {
#endif
// float
#include "generic/encoding_utils.c"
#include "THC/THCGenerateFloatType.h"
#include "generic/encoding_kernel.c"
#include "THC/THCGenerateFloatType.h"
......@@ -32,6 +35,9 @@ extern "C" {
#include "THC/THCGenerateFloatType.h"
// double
#include "generic/encoding_utils.c"
#include "THC/THCGenerateDoubleType.h"
#include "generic/encoding_kernel.c"
#include "THC/THCGenerateDoubleType.h"
......
......@@ -77,16 +77,19 @@ class _ConvNd(Module):
class Conv1d(_ConvNd):
r"""Applies a 1D convolution over an input signal composed of several input
planes.
r"""Applies a 1D convolution over an input signal composed of several
input planes.
In the simplest case, the output value of the layer with input size
:math:`(N, C_{in}, L)` and output :math:`(N, C_{out}, L_{out})` can be
precisely described as:
.. math::
\begin{array}{ll}
out(N_i, C_{out_j}) = bias(C_{out_j})
+ \sum_{{k}=0}^{C_{in}-1} weight(C_{out_j}, k) \star input(N_i, k)
+ \sum_{{k}=0}^{C_{in}-1} weight(C_{out_j}, k)
\star input(N_i, k)
\end{array}
where :math:`\star` is the valid `cross-correlation`_ operator
| :attr:`stride` controls the stride for the cross-correlation.
......@@ -155,11 +158,13 @@ class Conv2d(_ConvNd):
In the simplest case, the output value of the layer with input size
:math:`(N, C_{in}, H, W)` and output :math:`(N, C_{out}, H_{out}, W_{out})`
can be precisely described as:
.. math::
\begin{array}{ll}
out(N_i, C_{out_j}) = bias(C_{out_j})
+ \sum_{{k}=0}^{C_{in}-1} weight(C_{out_j}, k) \star input(N_i, k)
\end{array}
where :math:`\star` is the valid 2D `cross-correlation`_ operator
| :attr:`stride` controls the stride for the cross-correlation.
......@@ -414,10 +419,13 @@ class ReLU(Threshold):
class Sigmoid(Module):
"""Applies the element-wise function :math:`f(x) = 1 / ( 1 + exp(-x))`
Shape:
- Input: :math:`(N, *)` where `*` means, any number of additional
dimensions
- Output: :math:`(N, *)`, same shape as the input
Examples::
>>> m = nn.Sigmoid()
>>> input = autograd.Variable(torch.randn(2))
>>> print(input)
......@@ -436,10 +444,11 @@ class Sigmoid(Module):
class MaxPool2d(Module):
r"""Applies a 2D max pooling over an input signal composed of several input
planes.
In the simplest case, the output value of the layer with input size :math:`(N, C, H, W)`,
output :math:`(N, C, H_{out}, W_{out})` and :attr:`kernel_size` :math:`(kH, kW)`
r"""Applies a 2D max pooling over an input signal composed of several
input planes.
In the simplest case, the output value of the layer with input size
:math:`(N, C, H, W)`, output :math:`(N, C, H_{out}, W_{out})` and
:attr:`kernel_size` :math:`(kH, kW)`
can be precisely described as:
.. math::
......@@ -450,8 +459,8 @@ class MaxPool2d(Module):
| If :attr:`padding` is non-zero, then the input is implicitly zero-padded on both sides
for :attr:`padding` number of points
| :attr:`dilation` controls the spacing between the kernel points. It is harder to describe,
but this `link`_ has a nice visualization of what :attr:`dilation` does.
| :attr:`dilation` controls the spacing between the kernel points. It is harder to describe, but this `link`_ has a nice visualization of what :attr:`dilation` does.
The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`dilation` can either be:
- a single ``int`` -- in which case the same value is used for the height and width dimension
- a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension,
......@@ -531,8 +540,8 @@ class AvgPool2d(Module):
input(N_i, C_j, stride[0] * h + m, stride[1] * w + n)
\end{array}
| If :attr:`padding` is non-zero, then the input is implicitly zero-padded on both sides
for :attr:`padding` number of points
| If :attr:`padding` is non-zero, then the input is implicitly zero-padded on both sides for :attr:`padding` number of points
The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding` can either be:
- a single ``int`` -- in which case the same value is used for the height and width dimension
- a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension,
......
......@@ -17,15 +17,19 @@ from torch.nn.parameter import Parameter
from ..parallel import my_data_parallel
from .syncbn import BatchNorm2d
from ..functions import dilatedavgpool2d
from ..functions import dilatedavgpool2d, view_each, upsample
from .basic import *
__all__ = ['DilatedAvgPool2d', 'MyConvTranspose2d', 'View', 'Normalize',
'Bottleneck']
__all__ = ['DilatedAvgPool2d', 'UpsampleConv2d', 'View', 'Sum', 'Mean',
'Normalize', 'Bottleneck', 'PyramidPooling']
class DilatedAvgPool2d(Module):
r"""We provide Dilated Average Pooling for the dilation of Densenet as
in :class:`encoding.dilated.DenseNet`.
Reference::
We provide this code for a comming paper.
Applies a 2D average pooling over an input signal composed of several input planes.
In the simplest case, the output value of the layer with input size :math:`(N, C, H, W)`,
......@@ -68,8 +72,6 @@ class DilatedAvgPool2d(Module):
>>> input = autograd.Variable(torch.randn(20, 16, 50, 32))
>>> output = m(input)
Reference::
comming
"""
def __init__(self, kernel_size, stride=None, padding=0, dilation=1):
super(DilatedAvgPool2d, self).__init__()
......@@ -79,8 +81,13 @@ class DilatedAvgPool2d(Module):
self.dilation = dilation
def forward(self, input):
if isinstance(input, Variable):
return dilatedavgpool2d(input, self.kernel_size, self.stride,
self.padding, self.dilation)
elif isinstance(input, tuple) or isinstance(input, list):
return my_data_parallel(self, input)
else:
raise RuntimeError('unknown input type')
def __repr__(self):
return self.__class__.__name__ + ' (' \
......@@ -90,13 +97,63 @@ class DilatedAvgPool2d(Module):
+ ', dilation=' + str(self.dilation) + ')'
class MyConvTranspose2d(Module):
"""Customized Layers, discuss later
class UpsampleConv2d(Module):
r"""
To avoid the checkerboard artifacts of standard Fractionally-strided Convolution, we adapt an integer stride convolution but producing a :math:`2\times 2` outputs for each convolutional window.
.. image:: _static/img/upconv.png
:width: 50%
:align: center
Reference:
Hang Zhang and Kristin Dana. "Multi-style Generative Network for Real-time Transfer." *arXiv preprint arXiv:1703.06953 (2017)*
Args:
in_channels (int): Number of channels in the input image
out_channels (int): Number of channels produced by the convolution
kernel_size (int or tuple): Size of the convolving kernel
stride (int or tuple, optional): Stride of the convolution. Default: 1
padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0
output_padding (int or tuple, optional): Zero-padding added to one side of the output. Default: 0
groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
bias (bool, optional): If True, adds a learnable bias to the output. Default: True
dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
scale_factor (int): scaling factor for upsampling convolution. Default: 1
Shape:
- Input: :math:`(N, C_{in}, H_{in}, W_{in})`
- Output: :math:`(N, C_{out}, H_{out}, W_{out})` where
:math:`H_{out} = scale * (H_{in} - 1) * stride[0] - 2 * padding[0] + kernel\_size[0] + output\_padding[0]`
:math:`W_{out} = scale * (W_{in} - 1) * stride[1] - 2 * padding[1] + kernel\_size[1] + output\_padding[1]`
Attributes:
weight (Tensor): the learnable weights of the module of shape
(in_channels, scale * scale * out_channels, kernel_size[0], kernel_size[1])
bias (Tensor): the learnable bias of the module of shape (scale * scale * out_channels)
Examples::
>>> # With square kernels and equal stride
>>> m = nn.UpsampleCov2d(16, 33, 3, stride=2)
>>> # non-square kernels and unequal stride and with padding
>>> m = nn.UpsampleCov2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
>>> input = autograd.Variable(torch.randn(20, 16, 50, 100))
>>> output = m(input)
>>> # exact output size can be also specified as an argument
>>> input = autograd.Variable(torch.randn(1, 16, 12, 12))
>>> downsample = nn.Conv2d(16, 16, 3, stride=2, padding=1)
>>> upsample = nn.UpsampleCov2d(16, 16, 3, stride=2, padding=1)
>>> h = downsample(input)
>>> h.size()
torch.Size([1, 16, 6, 6])
>>> output = upsample(h, output_size=input.size())
>>> output.size()
torch.Size([1, 16, 12, 12])
"""
def __init__(self, in_channels, out_channels, kernel_size, stride=1,
padding=0, dilation=1, groups=1, scale_factor =1,
bias=True):
super(MyConvTranspose2d, self).__init__()
super(UpsampleConv2d, self).__init__()
kernel_size = _pair(kernel_size)
stride = _pair(stride)
padding = _pair(padding)
......@@ -163,6 +220,36 @@ class View(Module):
raise RuntimeError('unknown input type')
class Sum(Module):
def __init__(self, dim, keep_dim=False):
super(Sum, self).__init__()
self.dim = dim
self.keep_dim = keep_dim
def forward(self, input):
if isinstance(input, Variable):
return input.sum(self.dim, self.keep_dim)
elif isinstance(input, tuple) or isinstance(input, list):
return my_data_parallel(self, input)
else:
raise RuntimeError('unknown input type')
class Mean(Module):
def __init__(self, dim, keep_dim=False):
super(Mean, self).__init__()
self.dim = dim
self.keep_dim = keep_dim
def forward(self, input):
if isinstance(input, Variable):
return input.mean(self.dim, self.keep_dim)
elif isinstance(input, tuple) or isinstance(input, list):
return my_data_parallel(self, input)
else:
raise RuntimeError('unknown input type')
class Normalize(Module):
r"""Performs :math:`L_p` normalization of inputs over specified dimension.
......@@ -189,7 +276,7 @@ class Normalize(Module):
def forward(self, x):
if isinstance(x, Variable):
return F.normalize(x, self.p, self.dim)
return F.normalize(x, self.p, self.dim, eps=1e-10)
elif isinstance(x, tuple) or isinstance(x, list):
return my_data_parallel(self, x)
else:
......@@ -214,15 +301,15 @@ class Bottleneck(Module):
conv_block = []
conv_block += [norm_layer(inplanes),
ReLU(inplace=True),
Conv2d(inplanes, planes, kernel_size=1, stride=1)]
Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False)]
conv_block += [norm_layer(planes),
ReLU(inplace=True),
Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1)]
padding=1, bias=False)]
conv_block += [norm_layer(planes),
ReLU(inplace=True),
Conv2d(planes, planes * self.expansion, kernel_size=1,
stride=1)]
stride=1, bias=False)]
self.conv_block = Sequential(*conv_block)
def forward(self, x):
......@@ -238,18 +325,58 @@ class Bottleneck(Module):
raise RuntimeError('unknown input type')
def _get_a_var(obj):
if isinstance(obj, Variable):
return obj
if isinstance(obj, list) or isinstance(obj, tuple):
results = map(_get_a_var, obj)
for result in results:
if isinstance(result, Variable):
return result
if isinstance(obj, dict):
results = map(_get_a_var, obj.items())
for result in results:
if isinstance(result, Variable):
return result
return None
class PyramidPooling(Module):
"""
Reference:
Zhao, Hengshuang, et al. *"Pyramid scene parsing network."*
"""
def __init__(self, in_channels):
super(PyramidPooling, self).__init__()
self.pool1 = AdaptiveAvgPool2d(1)
self.pool2 = AdaptiveAvgPool2d(2)
self.pool3 = AdaptiveAvgPool2d(3)
self.pool4 = AdaptiveAvgPool2d(6)
out_channels = int(in_channels/4)
self.conv1 = Sequential(Conv2d(in_channels, out_channels, 1),
BatchNorm2d(out_channels),
ReLU(True))
self.conv2 = Sequential(Conv2d(in_channels, out_channels, 1),
BatchNorm2d(out_channels),
ReLU(True))
self.conv3 = Sequential(Conv2d(in_channels, out_channels, 1),
BatchNorm2d(out_channels),
ReLU(True))
self.conv4 = Sequential(Conv2d(in_channels, out_channels, 1),
BatchNorm2d(out_channels),
ReLU(True))
def _cat_each(self, x, feat1, feat2, feat3, feat4):
assert(len(x)==len(feat1))
z = []
for i in range(len(x)):
z.append( torch.cat((x[i], feat1[i], feat2[i], feat3[i], feat4[i]), 1))
return z
def forward(self, x):
if isinstance(x, Variable):
_, _, h, w = x.size()
elif isinstance(x, tuple) or isinstance(x, list):
_, _, h, w = x[0].size()
else:
raise RuntimeError('unknown input type')
feat1 = upsample(self.conv1(self.pool1(x)),(h,w),
mode='bilinear')
feat2 = upsample(self.conv2(self.pool2(x)),(h,w),
mode='bilinear')
feat3 = upsample(self.conv3(self.pool3(x)),(h,w),
mode='bilinear')
feat4 = upsample(self.conv4(self.pool4(x)),(h,w),
mode='bilinear')
if isinstance(x, Variable):
return torch.cat((x, feat1, feat2, feat3, feat4), 1)
elif isinstance(x, tuple) or isinstance(x, list):
return self._cat_each(x, feat1, feat2, feat3, feat4)
else:
raise RuntimeError('unknown input type')
......@@ -15,17 +15,17 @@ import torch.nn.functional as F
from torch.autograd import Function, Variable
from .._ext import encoding_lib
from ..functions import scaledL2, aggregate, aggregateP, residual, assign
from ..functions import scaledL2, aggregate
from ..parallel import my_data_parallel
__all__ = ['Encoding', 'Inspiration', 'GramMatrix', 'Aggregate','EncodingP']
__all__ = ['Encoding', 'EncodingShake', 'Inspiration', 'GramMatrix']
class Encoding(nn.Module):
r"""
Encoding Layer: a learnable residual encoder over 3d or 4d input that
is seen as a mini-batch.
.. image:: http://hangzh.com/figure/cvpr17.svg
.. image:: _static/img/cvpr17.svg
:width: 50%
:align: center
......@@ -71,9 +71,8 @@ class Encoding(nn.Module):
def reset_params(self):
std1 = 1./((self.K*self.D)**(1/2))
std2 = 1./((self.K)**(1/2))
self.codewords.data.uniform_(-std1, std1)
self.scale.data.uniform_(-std2, std2)
self.scale.data.uniform_(-1, 0)
def forward(self, X):
if isinstance(X, tuple) or isinstance(X, list):
......@@ -82,7 +81,7 @@ class Encoding(nn.Module):
elif not isinstance(X, Variable):
raise RuntimeError('unknown input type')
# input X is a 4D tensor
assert(X.size(1)==self.D,"Encoding Layer wrong channels!")
assert(X.size(1)==self.D)
if X.dim() == 3:
# BxDxN
B, N, K, D = X.size(0), X.size(2), self.K, self.D
......@@ -94,7 +93,8 @@ class Encoding(nn.Module):
else:
raise RuntimeError('Encoding Layer unknown input dims!')
# assignment weights
A = F.softmax(scaledL2(X, self.codewords, self.scale))
#A = F.softmax(scaledL2(X, self.codewords, self.scale).view(B*N,-1), dim=1).view(B,N,K)
A = F.softmax(scaledL2(X, self.codewords, self.scale), dim=2)
# aggregate
E = aggregate(A, X, self.codewords)
return E
......@@ -104,10 +104,65 @@ class Encoding(nn.Module):
+ 'N x ' + str(self.D) + '=>' + str(self.K) + 'x' \
+ str(self.D) + ')'
class EncodingShake(nn.Module):
def __init__(self, D, K):
super(EncodingShake, self).__init__()
# init codewords and smoothing factor
self.D, self.K = D, K
self.codewords = nn.Parameter(torch.Tensor(K, D),
requires_grad=True)
self.scale = nn.Parameter(torch.Tensor(K), requires_grad=True)
self.reset_params()
def reset_params(self):
std1 = 1./((self.K*self.D)**(1/2))
self.codewords.data.uniform_(-std1, std1)
self.scale.data.uniform_(-1, 0)
def shake(self):
if self.training:
self.scale.data.uniform_(-1, 0)
else:
self.scale.data.zero_().add_(-0.5)
def forward(self, X):
if isinstance(X, tuple) or isinstance(X, list):
# for self-parallel mode, please see encoding.nn
return my_data_parallel(self, X)
elif not isinstance(X, Variable):
raise RuntimeError('unknown input type')
# input X is a 4D tensor
assert(X.size(1)==self.D)
if X.dim() == 3:
# BxDxN
B, N, K, D = X.size(0), X.size(2), self.K, self.D
X = X.transpose(1,2).contiguous()
elif X.dim() == 4:
# BxDxHxW
B, N, K, D = X.size(0), X.size(2)*X.size(3), self.K, self.D
X = X.view(B,D,-1).transpose(1,2).contiguous()
else:
raise RuntimeError('Encoding Layer unknown input dims!')
# shake
self.shake()
# assignment weights
A = F.softmax(scaledL2(X, self.codewords, self.scale).view(B*N,-1), dim=1).view(B,N,K)
# aggregate
E = aggregate(A, X, self.codewords)
# shake
self.shake()
return E
def __repr__(self):
return self.__class__.__name__ + '(' \
+ 'N x ' + str(self.D) + '=>' + str(self.K) + 'x' \
+ str(self.D) + ')'
class Inspiration(nn.Module):
r""" Inspiration Layer (for MSG-Net).
Tuning the featuremap with target Gram Matrix
r"""
Inspiration Layer (CoMatch Layer) enables the multi-style transfer in feed-forward network, which learns to match the target feature statistics during the training.
This module is differentialble and can be inserted in standard feed-forward network to be learned directly from the loss function without additional supervision.
.. math::
Y = \phi^{-1}[\phi(\mathcal{F}^T)W\mathcal{G}]
......@@ -116,7 +171,7 @@ class Inspiration(nn.Module):
training multi-style generative network for real-time transfer.
Reference:
Hang Zhang, and Kristin Dana. "Multi-style Generative Network for Real-time Transfer." *arXiv preprint arXiv:1703.06953 (2017)*
Hang Zhang and Kristin Dana. "Multi-style Generative Network for Real-time Transfer." *arXiv preprint arXiv:1703.06953 (2017)*
"""
def __init__(self, C, B=1):
super(Inspiration, self).__init__()
......@@ -156,76 +211,3 @@ class GramMatrix(nn.Module):
gram = features.bmm(features_t) / (ch * h * w)
return gram
class Aggregate(nn.Module):
r"""
Aggregate operation, aggregate the residuals (:math:`R`) with
assignment weights (:math:`A`).
.. math::
e_{k} = \sum_{i=1}^{N} a_{ik} r_{ik}
Shape:
- Input: :math:`A\in\mathcal{R}^{B\times N\times K}` :math:`R\in\mathcal{R}^{B\times N\times K\times D}` (where :math:`B` is batch, :math:`N` is total number of features, :math:`K` is number is codewords, :math:`D` is feature dimensions.)
- Output: :math:`E\in\mathcal{R}^{B\times K\times D}`
"""
def forward(self, A, R):
if isinstance(A, tuple) or isinstance(A, list):
# for self-parallel mode, please see encoding.nn
return my_data_parallel(self, A, R)
elif not isinstance(A, Variable):
raise RuntimeError('unknown input type')
return aggregateP(A, R)
class EncodingP(nn.Module):
def __init__(self, D, K):
super(EncodingP, self).__init__()
# init codewords and smoothing factor
self.D, self.K = D, K
self.codewords = nn.Parameter(torch.Tensor(K, D),
requires_grad=True)
self.scale = nn.Parameter(torch.Tensor(K), requires_grad=True)
self.reset_params()
print('EncodingP is deprecated, please use Encoding.')
def reset_params(self):
std1 = 1./((self.K*self.D)**(1/2))
std2 = 1./((self.K)**(1/2))
self.codewords.data.uniform_(-std1, std1)
self.scale.data.uniform_(-std2, std2)
def forward(self, X):
if isinstance(X, tuple) or isinstance(X, list):
# for self-parallel mode, please see encoding.nn
return my_data_parallel(self, X)
elif not isinstance(X, Variable):
raise RuntimeError('unknown input type')
# input X is a 4D tensor
assert(X.size(1)==self.D,"Encoding Layer wrong channels!")
if X.dim() == 3:
# BxDxN
B, N, K, D = X.size(0), X.size(2), self.K, self.D
X = X.transpose(1,2)
elif X.dim() == 4:
# BxDxHxW
B, N, K, D = X.size(0), X.size(2)*X.size(3), self.K, self.D
X = X.view(B,D,-1).transpose(1,2)
else:
raise RuntimeError('Encoding Layer unknown input dims!')
# calculate residuals
R = residual(X.contiguous(), self.codewords)
# assignment weights
A = assign(R, self.scale)
# aggregate
E = aggregateP(A, R)
return E
def __repr__(self):
return self.__class__.__name__ + '(' \
+ 'N x ' + str(self.D) + '=>' + str(self.K) + 'x' \
+ str(self.D) + ')'
......@@ -29,6 +29,9 @@ class BatchNorm1d(Module):
r"""Synchronized Batch Normalization 1d
Please use compatible :class:`encoding.parallel.SelfDataParallel` and :class:`encoding.nn`
Reference::
We provide this code for a comming paper.
Applies Batch Normalization over a 2d or 3d input that is seen as a
mini-batch.
......@@ -220,6 +223,9 @@ class BatchNorm2d(Module):
r"""Synchronized Batch Normalization 2d
Please use compatible :class:`encoding.parallel.SelfDataParallel` and :class:`encoding.nn`
Reference::
We provide this code for a comming paper.
Applies Batch Normalization over a 4d input that is seen as a mini-batch
of 3d inputs
......@@ -411,3 +417,20 @@ class BatchNorm2d(Module):
return outputs
else:
raise RuntimeError('unknown input type')
def _get_a_var(obj):
if isinstance(obj, Variable):
return obj
if isinstance(obj, list) or isinstance(obj, tuple):
results = map(_get_a_var, obj)
for result in results:
if isinstance(result, Variable):
return result
if isinstance(obj, dict):
results = map(_get_a_var, obj.items())
for result in results:
if isinstance(result, Variable):
return result
return None
......@@ -82,6 +82,9 @@ class Broadcast(Function):
class ModelDataParallel(Module):
"""Implements data parallelism at the module level.
Reference::
We provide this code for a comming paper.
This container parallelizes the application of the given module by
splitting the input across the specified devices by chunking in the
batch dimension.
......@@ -149,6 +152,9 @@ class CriterionDataParallel(Module):
Calculate loss in multiple-GPUs, which balance the memory usage for
Semantic Segmentation.
Reference::
We provide this code for a comming paper.
The targets are splitted across the specified devices by chunking in
the batch dimension. Please use together with :class:`encoding.parallel.ModelDataParallel`.
"""
......@@ -191,9 +197,12 @@ class CriterionDataParallel(Module):
class SelfDataParallel(Module):
"""SelfDataParallel, please make sure you understand it before using.
Reference::
We provide this code for a comming paper.
Each module in the network should be in self-parallel mode,
which allows list of inputs from multiple GPUs.
Please see encoding.nn for detail, use with cautious
Please see :class:`encoding.nn` for detail, use with cautious
"""
def __init__(self, module, device_ids=None, output_device=None, dim=0):
super(SelfDataParallel, self).__init__()
......@@ -211,11 +220,26 @@ class SelfDataParallel(Module):
def forward(self, *inputs, **kwargs):
inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
if self.training:
# self parallel mode
outputs = self.module(inputs)
return outputs
else:
# TODO check faster?
if len(self.device_ids) == 1:
return self.module(*inputs[0], **kwargs[0])
replicas = self.replicate(self.module, \
self.device_ids[:len(inputs)])
outputs = self.parallel_apply(replicas, inputs, kwargs)
return outputs
def replicate(self, module, device_ids):
return replicate(module, device_ids)
def parallel_apply(self, replicas, inputs, kwargs):
return parallel_apply(replicas, inputs, kwargs)
def scatter(self, inputs, kwargs, device_ids):
#return my_scatter(inputs, target_gpus=device_ids)
outputs = scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim)
return outputs
......@@ -343,3 +367,4 @@ def my_data_parallel(module, inputs, device_ids=None, \
outputs = my_parallel_apply(replicas, inputs, module_kwargs)
return outputs
......@@ -9,15 +9,27 @@
*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
*/
//#include <THC/THC.h>
/*
#include <THC/THC.h>
#ifdef __cplusplus
extern "C" {
#endif
#define Encoding_(NAME) TH_CONCAT_4(Encoding_, Real, _, NAME)
#define THCTensor TH_CONCAT_3(TH,CReal,Tensor)
#define THCTensor_(NAME) TH_CONCAT_4(TH,CReal,Tensor_,NAME)
// float
#include "generic/encoding_generic.h"
#include "THC/THCGenerateFloatType.h"
#include "generic/syncbn_generic.h"
#include "THC/THCGenerateFloatType.h"
#ifdef __cplusplus
}
#endif
*/
int Encoding_Float_scaledl2_forward(THCudaTensor *SL,
......@@ -27,29 +39,12 @@ int Encoding_Float_scaledl2_backward(
THCudaTensor *GSL, THCudaTensor *GX, THCudaTensor *GC,
THCudaTensor *X, THCudaTensor *C, THCudaTensor *S);
int Encoding_Float_aggregateE_forward(THCudaTensor *E, THCudaTensor *A,
int Encoding_Float_aggregate_forward(THCudaTensor *E, THCudaTensor *A,
THCudaTensor *X, THCudaTensor *C);
int Encoding_Float_aggregateE_backward(THCudaTensor *GA, THCudaTensor *GE,
int Encoding_Float_aggregate_backward(THCudaTensor *GA, THCudaTensor *GE,
THCudaTensor *A, THCudaTensor *X, THCudaTensor *C);
int Encoding_Float_aggregate_forward(THCudaTensor *E, THCudaTensor *A,
THCudaTensor *R);
int Encoding_Float_aggregate_backward(THCudaTensor *GA, THCudaTensor *GR,
THCudaTensor *L, THCudaTensor *A, THCudaTensor *R);
int Encoding_Float_residual_forward(THCudaTensor *R, THCudaTensor *X,
THCudaTensor *D);
int Encoding_Float_residual_backward(THCudaTensor *GR, THCudaTensor *GX,
THCudaTensor *GD);
int Encoding_Float_squaresqueeze_forward(THCudaTensor *L, THCudaTensor *R);
int Encoding_Float_squaresqueeze_backward(THCudaTensor *GL,
THCudaTensor *GR, THCudaTensor *R);
int Encoding_Float_batchnorm_Forward(THCudaTensor *output_,
THCudaTensor *input_, THCudaTensor *mean_,
THCudaTensor *invstd_, THCudaTensor *gamma_, THCudaTensor *beta_);
......@@ -90,33 +85,13 @@ int Encoding_Double_scaledl2_backward(
THCudaDoubleTensor *GC, THCudaDoubleTensor *X,
THCudaDoubleTensor *C, THCudaDoubleTensor *S);
int Encoding_Double_aggregateE_forward(THCudaDoubleTensor *E,
int Encoding_Double_aggregate_forward(THCudaDoubleTensor *E,
THCudaDoubleTensor *A, THCudaDoubleTensor *X, THCudaDoubleTensor *C);
int Encoding_Double_aggregateE_backward(THCudaDoubleTensor *GA,
int Encoding_Double_aggregate_backward(THCudaDoubleTensor *GA,
THCudaDoubleTensor *GE, THCudaDoubleTensor *A, THCudaDoubleTensor *X,
THCudaDoubleTensor *C);
int Encoding_Double_aggregate_forward(
THCudaDoubleTensor *E, THCudaDoubleTensor *A, THCudaDoubleTensor *R);
int Encoding_Double_aggregate_backward(
THCudaDoubleTensor *GA, THCudaDoubleTensor *GR, THCudaDoubleTensor *L,
THCudaDoubleTensor *A, THCudaDoubleTensor *R);
int Encoding_Double_residual_forward(
THCudaDoubleTensor *R, THCudaDoubleTensor *X, THCudaDoubleTensor *D);
int Encoding_Double_residual_backward(
THCudaDoubleTensor *GR, THCudaDoubleTensor *GX,
THCudaDoubleTensor *GD);
int Encoding_Double_squaresqueeze_forward(THCudaDoubleTensor *L,
THCudaDoubleTensor *R);
int Encoding_Double_squaresqueeze_backward(THCudaDoubleTensor *GL,
THCudaDoubleTensor *GR, THCudaDoubleTensor *R);
int Encoding_Double_batchnorm_Forward(THCudaDoubleTensor *output_,
THCudaDoubleTensor *input_, THCudaDoubleTensor *mean_,
THCudaDoubleTensor *invstd_, THCudaDoubleTensor *gamma_,
......@@ -148,3 +123,4 @@ int Encoding_Double_DilatedAvgPool2d_Backward(
int kH, int kW, int dH, int dW,
int padH, int padW,
int dilationH, int dilationW);
......@@ -23,7 +23,6 @@ int Encoding_(scaledl2_forward)(THCTensor *SL,
return 0;
}
int Encoding_(scaledl2_backward)(
THCTensor *GSL, THCTensor *GX, THCTensor *GC,
THCTensor *X, THCTensor *C, THCTensor *S)
......@@ -36,94 +35,25 @@ int Encoding_(scaledl2_backward)(
return 0;
}
int Encoding_(aggregateE_forward)(THCTensor *E, THCTensor *A,
int Encoding_(aggregate_forward)(THCTensor *E, THCTensor *A,
THCTensor *X, THCTensor *C)
/*
* Aggregate operation
*/
{
Encoding_(AggregateE_Forward)(state, E, A, X, C);
Encoding_(Aggregate_Forward)(state, E, A, X, C);
/* C function return number of the outputs */
return 0;
}
int Encoding_(aggregateE_backward)(THCTensor *GA, THCTensor *GE,
int Encoding_(aggregate_backward)(THCTensor *GA, THCTensor *GE,
THCTensor *A, THCTensor *X, THCTensor *C)
/*
* Aggregate backward operation to A
* G (dl/dR), L (dl/dE), A (assignments)
*/
{
Encoding_(AggregateE_Backward)(state, GA, GE, A, X, C);
/* C function return number of the outputs */
return 0;
}
int Encoding_(aggregate_forward)(THCTensor *E, THCTensor *A,
THCTensor *R)
/*
* Aggregate operation
*/
{
Encoding_(Aggregate_Forward)(state, E, A, R);
/* C function return number of the outputs */
return 0;
}
int Encoding_(aggregate_backward)(THCTensor *GA, THCTensor *GR,
THCTensor *L, THCTensor *A, THCTensor *R)
/*
* Aggregate backward operation to A
* G (dl/dR), L (dl/dE), A (assignments)
*/
{
Encoding_(Aggregate_Backward)(state, GA, GR, L, A, R);
/* C function return number of the outputs */
return 0;
}
int Encoding_(residual_forward)(THCTensor *R, THCTensor *X, THCTensor *D)
/*
* Residual operation
*/
{
Encoding_(Residual_Forward)(state, R, X, D);
/* C function return number of the outputs */
return 0;
}
int Encoding_(residual_backward)(THCTensor *GR, THCTensor *GX,
THCTensor *GD)
/*
* Residual operation
*/
{
Encoding_(Residual_Backward)(state, GR, GX, GD);
/* C function return number of the outputs */
return 0;
}
int Encoding_(squaresqueeze_forward)(THCTensor *L, THCTensor *R)
/*
* Residual operation
*/
{
Encoding_(SquareSqueeze_Forward)(state, L, R);
/* C function return number of the outputs */
return 0;
}
int Encoding_(squaresqueeze_backward)(THCTensor *GL, THCTensor *GR,
THCTensor *R)
/*
* Residual operation
*/
{
Encoding_(SquareSqueeze_Backward)(state, GL, GR, R);
Encoding_(Aggregate_Backward)(state, GA, GE, A, X, C);
/* C function return number of the outputs */
return 0;
}
......
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
* Created by: Hang Zhang
* ECE Department, Rutgers University
* Email: zhang.hang@rutgers.edu
* Copyright (c) 2017
*
* This source code is licensed under the MIT-style license found in the
* LICENSE file in the root directory of this source tree
*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
*/
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/encoding_generic.h"
#else
int Encoding_(scaledl2_forward)(THCTensor *SL,
THCTensor *X, THCTensor *C, THCTensor *S);
int Encoding_(scaledl2_backward)(
THCTensor *GSL, THCTensor *GX, THCTensor *GC,
THCTensor *X, THCTensor *C, THCTensor *S);
int Encoding_(aggregate_forward)(THCTensor *E, THCTensor *A,
THCTensor *X, THCTensor *C);
int Encoding_(aggregate_backward)(THCTensor *GA, THCTensor *GE,
THCTensor *A, THCTensor *X, THCTensor *C);
int Encoding_(aggregateP_forward)(THCTensor *E, THCTensor *A,
THCTensor *R);
int Encoding_(aggregateP_backward)(THCTensor *GA, THCTensor *GR,
THCTensor *L, THCTensor *A, THCTensor *R);
int Encoding_(residual_forward)(THCTensor *R, THCTensor *X, THCTensor *D);
int Encoding_(residual_backward)(THCTensor *GR, THCTensor *GX,
THCTensor *GD);
int Encoding_(squaresqueeze_forward)(THCTensor *L, THCTensor *R);
int Encoding_(squaresqueeze_backward)(THCTensor *GL, THCTensor *GR,
THCTensor *R);
#endif
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
* Created by: Hang Zhang
* ECE Department, Rutgers University
* Email: zhang.hang@rutgers.edu
* Copyright (c) 2017
*
* This source code is licensed under the MIT-style license found in the
* LICENSE file in the root directory of this source tree
*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
*/
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/pooling_generic.c"
#else
int Encoding_(DilatedAvgPool2d_Forward)(
THCTensor *X_, THCTensor *Y_,
int kH, int kW, int dH, int dW,
int padH, int padW,
int dilationH, int dilationW);
int Encoding_(DilatedAvgPool2d_Backward)(
THCTensor *gradX_, THCTensor *gradY_,
int kH, int kW, int dH, int dW,
int padH, int padW,
int dilationH, int dilationW);
#endif
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
* Created by: Hang Zhang
* ECE Department, Rutgers University
* Email: zhang.hang@rutgers.edu
* Copyright (c) 2017
*
* This source code is licensed under the MIT-style license found in the
* LICENSE file in the root directory of this source tree
*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
*/
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/syncbn_generic.h"
#else
int Encoding_(batchnorm_Forward)(THCTensor *output_, THCTensor *input_,
THCTensor *mean_, THCTensor *invstd_,
THCTensor *gamma_, THCTensor *beta_);
int Encoding_(batchnorm_Backward)(THCTensor *gradoutput_,
THCTensor *input_, THCTensor *gradinput_,
THCTensor *gradgamma_, THCTensor *gradbeta_, THCTensor *mean_,
THCTensor *invstd_, THCTensor *gamma_, THCTensor *beta_,
THCTensor *gradMean_, THCTensor *gradStd_, int train);
int Encoding_(sum_square_Forward)(THCTensor *input_,
THCTensor *sum_, THCTensor *square_);
int Encoding_(sum_square_Backward)(
THCTensor *gradInput, THCTensor *input_,
THCTensor *gradSum_, THCTensor *gradSquare_);
#endif
......@@ -14,6 +14,7 @@ import os
import sys
import time
import math
import tqdm
def get_optimizer(args, model, diff_LR=True):
"""
......@@ -44,9 +45,7 @@ class CosLR_Scheduler(object):
"""Cosine Learning Rate Scheduler
.. math::
lr = base_lr * 0.5 * (1 + cos(T/N))
where ``T`` is current iters and ``N`` is total iters
lr = baselr * 0.5 * (1 + cos(iter/maxiter))
Args:
args: base learning rate :attr:`args.lr`, number of epochs :attr:`args.epochs`
......@@ -62,7 +61,7 @@ class CosLR_Scheduler(object):
T = (epoch - 1) * self.niters + i
lr = 0.5 * self.lr * (1 + math.cos(1.0 * T / self.N * math.pi))
if epoch > self.epoch:
print('=>Epochs %i, learning rate = %.4f, previous best ='\
print('\n=>Epochs %i, learning rate = %.4f, previous best ='\
'%.3f%%' % (epoch, lr, best_pred))
self.epoch = epoch
self._adjust_learning_rate(optimizer, lr)
......@@ -90,12 +89,14 @@ def save_checkpoint(state, args, is_best, filename='checkpoint.pth.tar'):
if is_best:
shutil.copyfile(filename, directory + 'model_best.pth.tar')
# refer to https://github.com/kuangliu/pytorch-cifar/blob/master/utils.py
_, term_width = os.popen('stty size', 'r').read().split()
term_width = int(term_width)
TOTAL_BAR_LENGTH = 86.
term_width = int(term_width)-1
TOTAL_BAR_LENGTH = 36.
last_time = time.time()
begin_time = last_time
def progress_bar(current, total, msg=None):
"""Progress Bar for display
"""
......
......@@ -15,45 +15,38 @@ import sys
import subprocess
from setuptools import setup, find_packages
from setuptools.command.develop import develop
from setuptools.command.install import install
import setuptools.command.develop
import setuptools.command.install
this_file = os.path.dirname(__file__)
cwd = os.path.dirname(os.path.abspath(__file__))
def read(*names, **kwargs):
with io.open(
os.path.join(os.path.dirname(__file__), *names),
encoding=kwargs.get("encoding", "utf8")
) as fp:
return fp.read()
def find_version(*file_paths):
version_file = read(*file_paths)
version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]",
version_file, re.M)
if version_match:
return version_match.group(1)
raise RuntimeError("Unable to find version string.")
_version = find_version('encoding/__init__.py')
#extra_compile_args = ['-std=c++11', '-Wno-write-strings']
if os.getenv('PYTORCH_BINARY_BUILD') and platform.system() == 'Linux':
print('PYTORCH_BINARY_BUILD found. Static linking libstdc++ on Linux')
extra_compile_args += ['-static-libstdc++']
extra_link_args += ['-static-libstdc++']
class TestCommand(install):
"""Post-installation mode."""
# run test scrip after installation
class install(setuptools.command.install.install):
def run(self):
install.run(self)
self.create_version_file()
setuptools.command.install.install.run(self)
subprocess.check_call("python test/test.py".split())
@staticmethod
def create_version_file():
global version, cwd
print('-- Building version ' + version)
version_path = os.path.join(cwd, 'encoding', 'version.py')
with open(version_path, 'w') as f:
f.write("__version__ = '{}'\n".format(version))
version = '0.1.0'
try:
sha = subprocess.check_output(['git', 'rev-parse', 'HEAD'],
cwd=cwd).decode('ascii').strip()
version += '+' + sha[:7]
except Exception:
pass
setup(
name="encoding",
version=_version,
description="PyTorch Encoding Layer",
url="https://github.com/zhanghang1989/PyTorch-Encoding-Layer",
version=version,
description="PyTorch Encoding",
url="https://github.com/zhanghang1989/PyTorch-Encoding",
author="Hang Zhang",
author_email="zhang.hang@rutgers.edu",
# Require cffi.
......@@ -61,14 +54,13 @@ setup(
setup_requires=["cffi>=1.0.0"],
# Exclude the build files.
packages=find_packages(exclude=["build"]),
#extra_compile_args=extra_compile_args,
# Package where to put the extensions. Has to be a prefix of build.py.
ext_package="",
# Extensions to compile.
cffi_modules=[
os.path.join(this_file, "build.py:ffi")
os.path.join(cwd, "build.py:ffi")
],
cmdclass={
'install': TestCommand,
'install': install,
},
)
......@@ -17,17 +17,6 @@ import torchvision.models as models
EPS = 1e-6
def test_aggregateP():
B,N,K,D = 2,3,4,5
A = Variable(torch.cuda.DoubleTensor(B,N,K).uniform_(-0.5,0.5),
requires_grad=True)
R = Variable(torch.cuda.DoubleTensor(B,N,K,D).uniform_(-0.5,0.5),
requires_grad=True)
input = (A, R)
test = gradcheck(encoding.functions.aggregateP, input, eps=1e-6, atol=1e-4)
print('Testing aggregate(): {}'.format(test))
def test_aggregate():
B,N,K,D = 2,3,4,5
A = Variable(torch.cuda.DoubleTensor(B,N,K).uniform_(-0.5,0.5),
......@@ -54,47 +43,6 @@ def test_scaledL2():
print('Testing scaledL2(): {}'.format(test))
def test_assign():
B,N,K,D = 2,3,4,5
X = Variable(torch.cuda.DoubleTensor(B,N,D).uniform_(-0.5,0.5),
requires_grad=True)
C = Variable(torch.cuda.DoubleTensor(K,D).uniform_(-0.5,0.5),
requires_grad=True)
S = Variable(torch.cuda.DoubleTensor(K).uniform_(-0.5,0.5),
requires_grad=True)
R = encoding.functions.residual(X, C)
A1 = encoding.functions.assign(R, S)
E1 = encoding.functions.aggregateP(A1, R)
A2 = F.softmax(encoding.functions.scaledL2(X,C,S))
E2 = encoding.functions.aggregate(A2, X, C)
print('Testing assign(): {}'.format((E1-E2).norm(2).data[0] < EPS))
def test_residual():
B,N,K,D = 2,3,4,5
X = Variable(torch.cuda.DoubleTensor(B,N,D).uniform_(-0.5,0.5),
requires_grad=True)
C = Variable(torch.cuda.DoubleTensor(K,D).uniform_(-0.5,0.5),
requires_grad=True)
input = (X, C)
test = gradcheck(encoding.functions.residual, input, eps=1e-6, atol=1e-4)
print('Testing residual(): {}'.format(test))
"""
def test_square_squeeze():
B,N,K,D = 2,3,4,5
R = Variable(torch.cuda.DoubleTensor(B,N,K,D).uniform_(-0.5,0.5),
requires_grad=True)
input = (R,)
test = gradcheck(encoding.functions.square_squeeze(), input, eps=1e-6, atol=1e-4)
print('Testing square_squeeze(): {}'.format(test))
"""
def test_encoding():
B,C,H,W,K = 2,3,4,5,6
X = Variable(torch.cuda.DoubleTensor(B,C,H,W).uniform_(-0.5,0.5),
......@@ -105,16 +53,6 @@ def test_encoding():
print('Testing encoding(): {}'.format(test))
def test_encodingP():
B,C,H,W,K = 2,3,4,5,6
X = Variable(torch.cuda.DoubleTensor(B,C,H,W).uniform_(-0.5,0.5),
requires_grad=True)
input = (X,)
layer = encoding.nn.EncodingP(C,K).double().cuda()
test = gradcheck(layer, input, eps=1e-6, atol=1e-4)
print('Testing encodingP(): {}'.format(test))
def test_sum_square():
B,C,H,W = 2,3,4,5
X = Variable(torch.cuda.DoubleTensor(B,C,H,W).uniform_(-0.5,0.5),
......@@ -146,15 +84,10 @@ def test_dilated_avgpool():
if __name__ == '__main__':
test_aggregateP()
test_scaledL2()
test_encoding()
test_aggregate()
test_residual()
#test_square_squeeze()
test_encodingP()
test_sum_square()
test_assign()
test_dilated_avgpool()
"""
test_dilated_densenet()
......
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
## Created by: Hang Zhang
## ECE Department, Rutgers University
## Email: zhang.hang@rutgers.edu
## Copyright (c) 2017
##
## This source code is licensed under the MIT-style license found in the
## LICENSE file in the root directory of this source tree
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
import os
import numpy as np
import torch
import torch.nn as nn
from torch.autograd import Variable
import encoding.dilated as dresnet
import torchvision.models as orgresnet
class Dilated_ResNet(nn.Module):
def __init__(self, nclass):
super(Dilated_ResNet, self).__init__()
self.pretrained = dresnet.resnet50(pretrained=True)
def forward(self, x):
# pre-trained ResNet feature
x = self.pretrained.conv1(x)
x = self.pretrained.bn1(x)
x = self.pretrained.relu(x)
x = self.pretrained.maxpool(x)
x = self.pretrained.layer1(x)
x = self.pretrained.layer2(x)
x = self.pretrained.layer3(x)
x = self.pretrained.layer4(x)
return x
class Org_ResNet(nn.Module):
def __init__(self, nclass):
super(Org_ResNet, self).__init__()
self.pretrained = orgresnet.resnet50(pretrained=True)
def forward(self, x):
# pre-trained ResNet feature
x = self.pretrained.conv1(x)
x = self.pretrained.bn1(x)
x = self.pretrained.relu(x)
x = self.pretrained.maxpool(x)
x = self.pretrained.layer1(x)
x = self.pretrained.layer2(x)
x = self.pretrained.layer3(x)
x = self.pretrained.layer4(x)
return x
def test_resnet():
# test the model
model1 = Dilated_ResNet(10).eval().cuda()
model2 = Org_ResNet(10).eval().cuda()
model1.eval()
model2.eval()
x = Variable(torch.Tensor(1,3, 224, 224).uniform_(-0.5,0.5)).cuda()
y1 = model1(x)
y2 = model2(x)
print(y1[0][1])
print(y2[0][1])
if __name__ == "__main__":
test_resnet()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment