v0.1.0

d539ddfa · Hang Zhang · 80a12ef6 · d539ddfa · d539ddfa · d539ddfa
Commit d539ddfa authored Nov 16, 2017 by Hang Zhang
16 changed files
--- a/encoding/kernel/thc_encoding.cu
+++ b/encoding/kernel/thc_encoding.cu
@@ -22,6 +22,9 @@ extern "C" {
 #endif
 // float
+#include "generic/encoding_utils.c"
+#include "THC/THCGenerateFloatType.h"
 #include "generic/encoding_kernel.c"
 #include "THC/THCGenerateFloatType.h"
@@ -32,6 +35,9 @@ extern "C" {
 #include "THC/THCGenerateFloatType.h"
 // double
+#include "generic/encoding_utils.c"
+#include "THC/THCGenerateDoubleType.h"
 #include "generic/encoding_kernel.c"
 #include "THC/THCGenerateDoubleType.h"

--- a/encoding/nn/basic.py
+++ b/encoding/nn/basic.py
@@ -77,16 +77,19 @@ class _ConvNd(Module):
 class Conv1d(_ConvNd):
-    r"""Applies a 1D convolution over an input signal composed of several input
+    r"""Applies a 1D convolution over an input signal composed of several 
-    planes.
+    input planes.
    In the simplest case, the output value of the layer with input size
    :math:`(N, C_{in}, L)` and output :math:`(N, C_{out}, L_{out})` can be
    precisely described as:
    .. math::
        \begin{array}{ll}
        out(N_i, C_{out_j})  = bias(C_{out_j})
-                       + \sum_{{k}=0}^{C_{in}-1} weight(C_{out_j}, k)  \star input(N_i, k)
+                       + \sum_{{k}=0}^{C_{in}-1} weight(C_{out_j}, k)  
+                       \star input(N_i, k)
        \end{array}
    where :math:`\star` is the valid `cross-correlation`_ operator
    | :attr:`stride` controls the stride for the cross-correlation.
@@ -155,11 +158,13 @@ class Conv2d(_ConvNd):
    In the simplest case, the output value of the layer with input size
    :math:`(N, C_{in}, H, W)` and output :math:`(N, C_{out}, H_{out}, W_{out})`
    can be precisely described as:
    .. math::
        \begin{array}{ll}
        out(N_i, C_{out_j})  = bias(C_{out_j})
                       + \sum_{{k}=0}^{C_{in}-1} weight(C_{out_j}, k)  \star input(N_i, k)
        \end{array}
    where :math:`\star` is the valid 2D `cross-correlation`_ operator
    | :attr:`stride` controls the stride for the cross-correlation.
@@ -414,10 +419,13 @@ class ReLU(Threshold):
 class Sigmoid(Module):
    """Applies the element-wise function :math:`f(x) = 1 / ( 1 + exp(-x))`
    Shape:
        - Input: :math:`(N, *)` where `*` means, any number of additional
          dimensions
        - Output: :math:`(N, *)`, same shape as the input
    Examples::
        >>> m = nn.Sigmoid()
        >>> input = autograd.Variable(torch.randn(2))
        >>> print(input)
@@ -436,10 +444,11 @@ class Sigmoid(Module):
 class MaxPool2d(Module):
-    r"""Applies a 2D max pooling over an input signal composed of several input
+    r"""Applies a 2D max pooling over an input signal composed of several 
-    planes.
+    input planes.
-    In the simplest case, the output value of the layer with input size :math:`(N, C, H, W)`,
+    In the simplest case, the output value of the layer with input size 
-    output :math:`(N, C, H_{out}, W_{out})` and :attr:`kernel_size` :math:`(kH, kW)`
+    :math:`(N, C, H, W)`, output :math:`(N, C, H_{out}, W_{out})` and 
+    :attr:`kernel_size` :math:`(kH, kW)`
    can be precisely described as:
    .. math::
@@ -450,8 +459,8 @@ class MaxPool2d(Module):
    | If :attr:`padding` is non-zero, then the input is implicitly zero-padded on both sides
      for :attr:`padding` number of points
-    | :attr:`dilation` controls the spacing between the kernel points. It is harder to describe,
+    | :attr:`dilation` controls the spacing between the kernel points. It is harder to describe, but this `link`_ has a nice visualization of what :attr:`dilation` does.
-      but this `link`_ has a nice visualization of what :attr:`dilation` does.
    The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`dilation` can either be:
        - a single ``int`` -- in which case the same value is used for the height and width dimension
        - a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension,
@@ -531,8 +540,8 @@ class AvgPool2d(Module):
                               input(N_i, C_j, stride[0] * h + m, stride[1] * w + n)
        \end{array}
-    | If :attr:`padding` is non-zero, then the input is implicitly zero-padded on both sides
+    | If :attr:`padding` is non-zero, then the input is implicitly zero-padded on both sides for :attr:`padding` number of points
-      for :attr:`padding` number of points
    The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding` can either be:
        - a single ``int`` -- in which case the same value is used for the height and width dimension
        - a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension,

--- a/encoding/nn/customize.py
+++ b/encoding/nn/customize.py
@@ -17,15 +17,19 @@ from torch.nn.parameter import Parameter
 from ..parallel import my_data_parallel
 from .syncbn import BatchNorm2d
-from ..functions import dilatedavgpool2d
+from ..functions import dilatedavgpool2d, view_each, upsample
+from .basic import *
-__all__ = ['DilatedAvgPool2d', 'MyConvTranspose2d', 'View', 'Normalize',
+__all__ = ['DilatedAvgPool2d', 'UpsampleConv2d', 'View', 'Sum', 'Mean', 
-    'Bottleneck']
+    'Normalize', 'Bottleneck', 'PyramidPooling']
 class DilatedAvgPool2d(Module):
    r"""We provide Dilated Average Pooling for the dilation of Densenet as
    in :class:`encoding.dilated.DenseNet`.
+    Reference::
+        We provide this code for a comming paper.
    Applies a 2D average pooling over an input signal composed of several input planes.
    In the simplest case, the output value of the layer with input size :math:`(N, C, H, W)`,
@@ -68,8 +72,6 @@ class DilatedAvgPool2d(Module):
        >>> input = autograd.Variable(torch.randn(20, 16, 50, 32))
        >>> output = m(input)
-    Reference::
-        comming 
    """
    def __init__(self, kernel_size, stride=None, padding=0, dilation=1):
        super(DilatedAvgPool2d, self).__init__()
@@ -79,8 +81,13 @@ class DilatedAvgPool2d(Module):
        self.dilation = dilation
    def forward(self, input):
-        return dilatedavgpool2d(input, self.kernel_size, self.stride,
+        if isinstance(input, Variable):
+            return dilatedavgpool2d(input, self.kernel_size, self.stride,
                                self.padding, self.dilation)
+        elif isinstance(input, tuple) or isinstance(input, list):
+            return my_data_parallel(self, input)
+        else:
+            raise RuntimeError('unknown input type')
    def __repr__(self):
        return self.__class__.__name__ + ' (' \
@@ -90,13 +97,63 @@ class DilatedAvgPool2d(Module):
            + ', dilation=' + str(self.dilation) + ')'
-class MyConvTranspose2d(Module):
+class UpsampleConv2d(Module):
-    """Customized Layers, discuss later
+    r"""
+    To avoid the checkerboard artifacts of standard Fractionally-strided Convolution, we adapt an integer stride convolution but producing a :math:`2\times 2` outputs for each convolutional window. 
+    .. image:: _static/img/upconv.png
+        :width: 50%
+        :align: center
+    Reference:
+        Hang Zhang and Kristin Dana. "Multi-style Generative Network for Real-time Transfer."  *arXiv preprint arXiv:1703.06953 (2017)*
+    Args:
+        in_channels (int): Number of channels in the input image
+        out_channels (int): Number of channels produced by the convolution
+        kernel_size (int or tuple): Size of the convolving kernel
+        stride (int or tuple, optional): Stride of the convolution. Default: 1
+        padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0
+        output_padding (int or tuple, optional): Zero-padding added to one side of the output. Default: 0
+        groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
+        bias (bool, optional): If True, adds a learnable bias to the output. Default: True
+        dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
+        scale_factor (int): scaling factor for upsampling convolution. Default: 1
+    Shape:
+        - Input: :math:`(N, C_{in}, H_{in}, W_{in})`
+        - Output: :math:`(N, C_{out}, H_{out}, W_{out})` where
+          :math:`H_{out} = scale * (H_{in} - 1) * stride[0] - 2 * padding[0] + kernel\_size[0] + output\_padding[0]`
+          :math:`W_{out} = scale * (W_{in} - 1) * stride[1] - 2 * padding[1] + kernel\_size[1] + output\_padding[1]`
+    Attributes:
+        weight (Tensor): the learnable weights of the module of shape
+                         (in_channels, scale * scale * out_channels, kernel_size[0], kernel_size[1])
+        bias (Tensor):   the learnable bias of the module of shape (scale * scale * out_channels)
+    Examples::
+        >>> # With square kernels and equal stride
+        >>> m = nn.UpsampleCov2d(16, 33, 3, stride=2)
+        >>> # non-square kernels and unequal stride and with padding
+        >>> m = nn.UpsampleCov2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
+        >>> input = autograd.Variable(torch.randn(20, 16, 50, 100))
+        >>> output = m(input)
+        >>> # exact output size can be also specified as an argument
+        >>> input = autograd.Variable(torch.randn(1, 16, 12, 12))
+        >>> downsample = nn.Conv2d(16, 16, 3, stride=2, padding=1)
+        >>> upsample = nn.UpsampleCov2d(16, 16, 3, stride=2, padding=1)
+        >>> h = downsample(input)
+        >>> h.size()
+        torch.Size([1, 16, 6, 6])
+        >>> output = upsample(h, output_size=input.size())
+        >>> output.size()
+        torch.Size([1, 16, 12, 12])
    """
    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
                 padding=0, dilation=1, groups=1, scale_factor =1, 
                 bias=True):
-        super(MyConvTranspose2d, self).__init__()
+        super(UpsampleConv2d, self).__init__()
        kernel_size = _pair(kernel_size)
        stride = _pair(stride)
        padding = _pair(padding)
@@ -163,6 +220,36 @@ class View(Module):
            raise RuntimeError('unknown input type')
+class Sum(Module):
+    def __init__(self, dim, keep_dim=False):
+        super(Sum, self).__init__()
+        self.dim = dim
+        self.keep_dim = keep_dim
+    def forward(self, input):
+        if isinstance(input, Variable):
+            return input.sum(self.dim, self.keep_dim)
+        elif isinstance(input, tuple) or isinstance(input, list):
+            return my_data_parallel(self, input)
+        else:
+            raise RuntimeError('unknown input type')
+class Mean(Module):
+    def __init__(self, dim, keep_dim=False):
+        super(Mean, self).__init__()
+        self.dim = dim
+        self.keep_dim = keep_dim
+    def forward(self, input):
+        if isinstance(input, Variable):
+            return input.mean(self.dim, self.keep_dim)
+        elif isinstance(input, tuple) or isinstance(input, list):
+            return my_data_parallel(self, input)
+        else:
+            raise RuntimeError('unknown input type')
 class Normalize(Module):
    r"""Performs :math:`L_p` normalization of inputs over specified dimension.
@@ -189,7 +276,7 @@ class Normalize(Module):
    def forward(self, x):
        if isinstance(x, Variable):
-            return F.normalize(x, self.p, self.dim)
+            return F.normalize(x, self.p, self.dim, eps=1e-10)
        elif isinstance(x, tuple) or isinstance(x, list):
            return my_data_parallel(self, x)
        else:
@@ -214,15 +301,15 @@ class Bottleneck(Module):
        conv_block = []
        conv_block += [norm_layer(inplanes),
                       ReLU(inplace=True),
-                       Conv2d(inplanes, planes, kernel_size=1, stride=1)]
+                       Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False)]
        conv_block += [norm_layer(planes),
                       ReLU(inplace=True),
                       Conv2d(planes, planes, kernel_size=3, stride=stride,
-                           padding=1)]
+                           padding=1, bias=False)]
        conv_block += [norm_layer(planes),
                       ReLU(inplace=True),
                       Conv2d(planes, planes * self.expansion, kernel_size=1,
-                           stride=1)]
+                           stride=1, bias=False)]
        self.conv_block = Sequential(*conv_block)
    def forward(self, x):
@@ -238,18 +325,58 @@ class Bottleneck(Module):
            raise RuntimeError('unknown input type')
-def _get_a_var(obj):
+class PyramidPooling(Module):
-    if isinstance(obj, Variable):
+    """
-        return obj
+    Reference: 
+        Zhao, Hengshuang, et al. *"Pyramid scene parsing network."*
-    if isinstance(obj, list) or isinstance(obj, tuple):
+    """
-        results = map(_get_a_var, obj)
+    def __init__(self, in_channels):
-        for result in results:
+        super(PyramidPooling, self).__init__()
-            if isinstance(result, Variable):
+        self.pool1 = AdaptiveAvgPool2d(1)
-                return result
+        self.pool2 = AdaptiveAvgPool2d(2)
-    if isinstance(obj, dict):
+        self.pool3 = AdaptiveAvgPool2d(3)
-        results = map(_get_a_var, obj.items())
+        self.pool4 = AdaptiveAvgPool2d(6)
-        for result in results:
-            if isinstance(result, Variable):
+        out_channels = int(in_channels/4)
-                return result
+        self.conv1 = Sequential(Conv2d(in_channels, out_channels, 1),
-    return None
+                                BatchNorm2d(out_channels),
+                                ReLU(True))
+        self.conv2 = Sequential(Conv2d(in_channels, out_channels, 1),
+                                BatchNorm2d(out_channels),
+                                ReLU(True))
+        self.conv3 = Sequential(Conv2d(in_channels, out_channels, 1),
+                                BatchNorm2d(out_channels),
+                                ReLU(True))
+        self.conv4 = Sequential(Conv2d(in_channels, out_channels, 1),
+                                BatchNorm2d(out_channels),
+                                ReLU(True))
+    def _cat_each(self, x, feat1, feat2, feat3, feat4):
+        assert(len(x)==len(feat1))
+        z = []
+        for i in range(len(x)):
+            z.append( torch.cat((x[i], feat1[i], feat2[i], feat3[i], feat4[i]), 1))
+        return z
+    def forward(self, x):
+        if isinstance(x, Variable):
+            _, _, h, w = x.size()
+        elif isinstance(x, tuple) or isinstance(x, list):
+            _, _, h, w = x[0].size()
+        else:
+            raise RuntimeError('unknown input type')
+        feat1 = upsample(self.conv1(self.pool1(x)),(h,w),
+                              mode='bilinear')
+        feat2 = upsample(self.conv2(self.pool2(x)),(h,w),
+                              mode='bilinear')
+        feat3 = upsample(self.conv3(self.pool3(x)),(h,w), 
+                              mode='bilinear')
+        feat4 = upsample(self.conv4(self.pool4(x)),(h,w), 
+                              mode='bilinear')
+        if isinstance(x, Variable):
+            return torch.cat((x, feat1, feat2, feat3, feat4), 1)
+        elif isinstance(x, tuple) or isinstance(x, list):
+            return self._cat_each(x, feat1, feat2, feat3, feat4)
+        else:
+            raise RuntimeError('unknown input type')
--- a/encoding/nn/encoding.py
+++ b/encoding/nn/encoding.py
@@ -15,17 +15,17 @@ import torch.nn.functional as F
 from torch.autograd import Function, Variable
 from .._ext import encoding_lib
-from ..functions import scaledL2, aggregate, aggregateP, residual, assign
+from ..functions import scaledL2, aggregate
 from ..parallel import my_data_parallel
-__all__ = ['Encoding', 'Inspiration', 'GramMatrix', 'Aggregate','EncodingP']
+__all__ = ['Encoding', 'EncodingShake', 'Inspiration', 'GramMatrix'] 
 class Encoding(nn.Module):
    r"""
    Encoding Layer: a learnable residual encoder over 3d or 4d input that 
    is seen as a mini-batch.
-    .. image:: http://hangzh.com/figure/cvpr17.svg
+    .. image:: _static/img/cvpr17.svg
        :width: 50%
        :align: center
@@ -71,9 +71,8 @@ class Encoding(nn.Module):
    def reset_params(self):
        std1 = 1./((self.K*self.D)**(1/2))
-        std2 = 1./((self.K)**(1/2))
        self.codewords.data.uniform_(-std1, std1)
-        self.scale.data.uniform_(-std2, std2)
+        self.scale.data.uniform_(-1, 0)
    def forward(self, X):
        if isinstance(X, tuple) or isinstance(X, list):
@@ -82,7 +81,7 @@ class Encoding(nn.Module):
        elif not isinstance(X, Variable):
            raise RuntimeError('unknown input type')
        # input X is a 4D tensor
-        assert(X.size(1)==self.D,"Encoding Layer wrong channels!")
+        assert(X.size(1)==self.D)
        if X.dim() == 3:
            # BxDxN
            B, N, K, D = X.size(0), X.size(2), self.K, self.D
@@ -94,7 +93,8 @@ class Encoding(nn.Module):
        else:
            raise RuntimeError('Encoding Layer unknown input dims!')
        # assignment weights
-        A = F.softmax(scaledL2(X, self.codewords, self.scale))
+        #A = F.softmax(scaledL2(X, self.codewords, self.scale).view(B*N,-1), dim=1).view(B,N,K)
+        A = F.softmax(scaledL2(X, self.codewords, self.scale), dim=2)
        # aggregate
        E = aggregate(A, X, self.codewords)
        return E
@@ -104,10 +104,65 @@ class Encoding(nn.Module):
            + 'N x ' + str(self.D) + '=>' + str(self.K) + 'x' \
            + str(self.D) + ')'
+class EncodingShake(nn.Module):
+    def __init__(self, D, K):
+        super(EncodingShake, self).__init__()
+        # init codewords and smoothing factor
+        self.D, self.K = D, K
+        self.codewords = nn.Parameter(torch.Tensor(K, D), 
+            requires_grad=True)
+        self.scale = nn.Parameter(torch.Tensor(K), requires_grad=True) 
+        self.reset_params()
+    def reset_params(self):
+        std1 = 1./((self.K*self.D)**(1/2))
+        self.codewords.data.uniform_(-std1, std1)
+        self.scale.data.uniform_(-1, 0)
+    def shake(self):
+        if self.training:
+            self.scale.data.uniform_(-1, 0)
+        else:
+            self.scale.data.zero_().add_(-0.5)
+    def forward(self, X):
+        if isinstance(X, tuple) or isinstance(X, list):
+            # for self-parallel mode, please see encoding.nn
+            return my_data_parallel(self, X)
+        elif not isinstance(X, Variable):
+            raise RuntimeError('unknown input type')
+        # input X is a 4D tensor
+        assert(X.size(1)==self.D)
+        if X.dim() == 3:
+            # BxDxN
+            B, N, K, D = X.size(0), X.size(2), self.K, self.D
+            X = X.transpose(1,2).contiguous()
+        elif X.dim() == 4:
+            # BxDxHxW
+            B, N, K, D = X.size(0), X.size(2)*X.size(3), self.K, self.D
+            X = X.view(B,D,-1).transpose(1,2).contiguous()
+        else:
+            raise RuntimeError('Encoding Layer unknown input dims!')
+        # shake
+        self.shake()
+        # assignment weights
+        A = F.softmax(scaledL2(X, self.codewords, self.scale).view(B*N,-1), dim=1).view(B,N,K)
+        # aggregate
+        E = aggregate(A, X, self.codewords)
+        # shake
+        self.shake()
+        return E
+    def __repr__(self):
+        return self.__class__.__name__ + '(' \
+            + 'N x ' + str(self.D) + '=>' + str(self.K) + 'x' \
+            + str(self.D) + ')'
 class Inspiration(nn.Module):
-    r""" Inspiration Layer (for MSG-Net). 
+    r""" 
-    Tuning the featuremap with target Gram Matrix
+    Inspiration Layer (CoMatch Layer) enables the multi-style transfer in feed-forward network, which learns to match the target feature statistics during the training. 
+    This module is differentialble and can be inserted in standard feed-forward network to be learned directly from the loss function without additional supervision. 
    .. math::
        Y = \phi^{-1}[\phi(\mathcal{F}^T)W\mathcal{G}]
@@ -116,7 +171,7 @@ class Inspiration(nn.Module):
    training multi-style generative network for real-time transfer.
    Reference:
-        Hang Zhang, and Kristin Dana. "Multi-style Generative Network for Real-time Transfer."  *arXiv preprint arXiv:1703.06953 (2017)*
+        Hang Zhang and Kristin Dana. "Multi-style Generative Network for Real-time Transfer."  *arXiv preprint arXiv:1703.06953 (2017)*
    """
    def __init__(self, C, B=1):
        super(Inspiration, self).__init__()
@@ -156,76 +211,3 @@ class GramMatrix(nn.Module):
        gram = features.bmm(features_t) / (ch * h * w)
        return gram
-class Aggregate(nn.Module):
-    r"""
-    Aggregate operation, aggregate the residuals (:math:`R`) with 
-    assignment weights (:math:`A`).
-    .. math::
-        e_{k} = \sum_{i=1}^{N} a_{ik} r_{ik}
-    Shape:
-        - Input: :math:`A\in\mathcal{R}^{B\times N\times K}` :math:`R\in\mathcal{R}^{B\times N\times K\times D}` (where :math:`B` is batch, :math:`N` is total number of features, :math:`K` is number is codewords, :math:`D` is feature dimensions.)
-        - Output: :math:`E\in\mathcal{R}^{B\times K\times D}`
-    """ 
-    def forward(self, A, R):
-        if isinstance(A, tuple) or isinstance(A, list):
-            # for self-parallel mode, please see encoding.nn
-            return my_data_parallel(self, A, R)
-        elif not isinstance(A, Variable):
-            raise RuntimeError('unknown input type')
-        return aggregateP(A, R)
-class EncodingP(nn.Module):
-    def __init__(self, D, K):
-        super(EncodingP, self).__init__()
-        # init codewords and smoothing factor
-        self.D, self.K = D, K
-        self.codewords = nn.Parameter(torch.Tensor(K, D), 
-            requires_grad=True)
-        self.scale = nn.Parameter(torch.Tensor(K), requires_grad=True) 
-        self.reset_params()
-        print('EncodingP is deprecated, please use Encoding.')
-    def reset_params(self):
-        std1 = 1./((self.K*self.D)**(1/2))
-        std2 = 1./((self.K)**(1/2))
-        self.codewords.data.uniform_(-std1, std1)
-        self.scale.data.uniform_(-std2, std2)
-    def forward(self, X):
-        if isinstance(X, tuple) or isinstance(X, list):
-            # for self-parallel mode, please see encoding.nn
-            return my_data_parallel(self, X)
-        elif not isinstance(X, Variable):
-            raise RuntimeError('unknown input type')
-        # input X is a 4D tensor
-        assert(X.size(1)==self.D,"Encoding Layer wrong channels!")
-        if X.dim() == 3:
-            # BxDxN
-            B, N, K, D = X.size(0), X.size(2), self.K, self.D
-            X = X.transpose(1,2)
-        elif X.dim() == 4:
-            # BxDxHxW
-            B, N, K, D = X.size(0), X.size(2)*X.size(3), self.K, self.D
-            X = X.view(B,D,-1).transpose(1,2)
-        else:
-            raise RuntimeError('Encoding Layer unknown input dims!')
-        # calculate residuals
-        R = residual(X.contiguous(), self.codewords)
-        # assignment weights
-        A = assign(R, self.scale)
-        # aggregate
-        E = aggregateP(A, R)
-        return E
-    def __repr__(self):
-        return self.__class__.__name__ + '(' \
-            + 'N x ' + str(self.D) + '=>' + str(self.K) + 'x' \
-            + str(self.D) + ')'
--- a/encoding/nn/syncbn.py
+++ b/encoding/nn/syncbn.py
@@ -29,6 +29,9 @@ class BatchNorm1d(Module):
    r"""Synchronized Batch Normalization 1d
    Please use compatible :class:`encoding.parallel.SelfDataParallel` and :class:`encoding.nn`
+    Reference::
+        We provide this code for a comming paper.
    Applies Batch Normalization over a 2d or 3d input that is seen as a
    mini-batch.
@@ -220,6 +223,9 @@ class BatchNorm2d(Module):
    r"""Synchronized Batch Normalization 2d
    Please use compatible :class:`encoding.parallel.SelfDataParallel` and :class:`encoding.nn`    
+    Reference::
+        We provide this code for a comming paper.
    Applies Batch Normalization over a 4d input that is seen as a mini-batch
    of 3d inputs
@@ -411,3 +417,20 @@ class BatchNorm2d(Module):
            return outputs
        else:
            raise RuntimeError('unknown input type')
+def _get_a_var(obj):
+    if isinstance(obj, Variable):
+        return obj
+    if isinstance(obj, list) or isinstance(obj, tuple):
+        results = map(_get_a_var, obj)
+        for result in results:
+            if isinstance(result, Variable):
+                return result
+    if isinstance(obj, dict):
+        results = map(_get_a_var, obj.items())
+        for result in results:
+            if isinstance(result, Variable):
+                return result
+    return None
--- a/encoding/parallel.py
+++ b/encoding/parallel.py
@@ -82,6 +82,9 @@ class Broadcast(Function):
 class ModelDataParallel(Module):
    """Implements data parallelism at the module level.
+    Reference::
+        We provide this code for a comming paper.
    This container parallelizes the application of the given module by
    splitting the input across the specified devices by chunking in the 
    batch dimension. 
@@ -149,6 +152,9 @@ class CriterionDataParallel(Module):
    Calculate loss in multiple-GPUs, which balance the memory usage for 
    Semantic Segmentation.
+    Reference::
+        We provide this code for a comming paper.
    The targets are splitted across the specified devices by chunking in
    the batch dimension. Please use together with :class:`encoding.parallel.ModelDataParallel`.
    """
@@ -191,9 +197,12 @@ class CriterionDataParallel(Module):
 class SelfDataParallel(Module):
    """SelfDataParallel, please make sure you understand it before using.
+    Reference::
+        We provide this code for a comming paper.
    Each module in the network should be in self-parallel mode, 
    which allows list of inputs from multiple GPUs.
-    Please see encoding.nn for detail, use with cautious
+    Please see :class:`encoding.nn` for detail, use with cautious
    """
    def __init__(self, module, device_ids=None, output_device=None, dim=0):
        super(SelfDataParallel, self).__init__()
@@ -211,11 +220,26 @@ class SelfDataParallel(Module):
    def forward(self, *inputs, **kwargs):
        inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
-        outputs = self.module(inputs)
+        if self.training:
-        return outputs
+            # self parallel mode
+            outputs = self.module(inputs)
+            return outputs
+        else:
+            # TODO check faster?
+            if len(self.device_ids) == 1:
+                return self.module(*inputs[0], **kwargs[0])
+            replicas = self.replicate(self.module, \
+                self.device_ids[:len(inputs)])
+            outputs = self.parallel_apply(replicas, inputs, kwargs)
+            return outputs 
+    def replicate(self, module, device_ids):
+        return replicate(module, device_ids)
+    def parallel_apply(self, replicas, inputs, kwargs):
+        return parallel_apply(replicas, inputs, kwargs)
    def scatter(self, inputs, kwargs, device_ids):
-        #return my_scatter(inputs, target_gpus=device_ids)
        outputs = scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim)
        return outputs
@@ -343,3 +367,4 @@ def my_data_parallel(module, inputs, device_ids=None, \
    outputs = my_parallel_apply(replicas, inputs, module_kwargs)
    return outputs 
--- a/encoding/src/encoding_lib.h
+++ b/encoding/src/encoding_lib.h
@@ -9,15 +9,27 @@
 *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 */
-//#include <THC/THC.h>
 /*
+#include <THC/THC.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
 #define Encoding_(NAME) TH_CONCAT_4(Encoding_, Real, _, NAME)
 #define THCTensor        TH_CONCAT_3(TH,CReal,Tensor)
 #define THCTensor_(NAME) TH_CONCAT_4(TH,CReal,Tensor_,NAME)
+// float
 #include "generic/encoding_generic.h"
 #include "THC/THCGenerateFloatType.h"
+#include "generic/syncbn_generic.h"
+#include "THC/THCGenerateFloatType.h"
+#ifdef __cplusplus
+}
+#endif
 */
 int Encoding_Float_scaledl2_forward(THCudaTensor *SL,  
@@ -27,29 +39,12 @@ int Encoding_Float_scaledl2_backward(
    THCudaTensor *GSL, THCudaTensor *GX, THCudaTensor *GC,
    THCudaTensor *X, THCudaTensor *C, THCudaTensor *S);
-int Encoding_Float_aggregateE_forward(THCudaTensor *E, THCudaTensor *A,
+int Encoding_Float_aggregate_forward(THCudaTensor *E, THCudaTensor *A,
 			THCudaTensor *X, THCudaTensor *C);
-int Encoding_Float_aggregateE_backward(THCudaTensor *GA, THCudaTensor *GE, 
+int Encoding_Float_aggregate_backward(THCudaTensor *GA, THCudaTensor *GE, 
 		THCudaTensor *A, THCudaTensor *X, THCudaTensor *C);
-int Encoding_Float_aggregate_forward(THCudaTensor *E, THCudaTensor *A,
-			THCudaTensor *R);
-int Encoding_Float_aggregate_backward(THCudaTensor *GA, THCudaTensor *GR, 
-		THCudaTensor *L, THCudaTensor *A, THCudaTensor *R);
-int Encoding_Float_residual_forward(THCudaTensor *R, THCudaTensor *X, 
-		THCudaTensor *D);
-int Encoding_Float_residual_backward(THCudaTensor *GR, THCudaTensor *GX, 
-		THCudaTensor *GD);
-int Encoding_Float_squaresqueeze_forward(THCudaTensor *L, THCudaTensor *R);
-int Encoding_Float_squaresqueeze_backward(THCudaTensor *GL, 
-    THCudaTensor *GR, THCudaTensor *R);
 int Encoding_Float_batchnorm_Forward(THCudaTensor *output_, 
    THCudaTensor *input_, THCudaTensor *mean_, 
    THCudaTensor *invstd_, THCudaTensor *gamma_, THCudaTensor *beta_);
@@ -90,33 +85,13 @@ int Encoding_Double_scaledl2_backward(
    THCudaDoubleTensor *GC, THCudaDoubleTensor *X, 
    THCudaDoubleTensor *C, THCudaDoubleTensor *S);
-int Encoding_Double_aggregateE_forward(THCudaDoubleTensor *E, 
+int Encoding_Double_aggregate_forward(THCudaDoubleTensor *E, 
    THCudaDoubleTensor *A, THCudaDoubleTensor *X, THCudaDoubleTensor *C);
-int Encoding_Double_aggregateE_backward(THCudaDoubleTensor *GA, 
+int Encoding_Double_aggregate_backward(THCudaDoubleTensor *GA, 
    THCudaDoubleTensor *GE, THCudaDoubleTensor *A, THCudaDoubleTensor *X, 
    THCudaDoubleTensor *C);
-int Encoding_Double_aggregate_forward(
-    THCudaDoubleTensor *E, THCudaDoubleTensor *A, THCudaDoubleTensor *R);
-int Encoding_Double_aggregate_backward(
-    THCudaDoubleTensor *GA, THCudaDoubleTensor *GR, THCudaDoubleTensor *L, 
-    THCudaDoubleTensor *A, THCudaDoubleTensor *R);
-int Encoding_Double_residual_forward(
-    THCudaDoubleTensor *R, THCudaDoubleTensor *X, THCudaDoubleTensor *D);
-int Encoding_Double_residual_backward(
-    THCudaDoubleTensor *GR, THCudaDoubleTensor *GX, 
-    THCudaDoubleTensor *GD);
-int Encoding_Double_squaresqueeze_forward(THCudaDoubleTensor *L,
-    THCudaDoubleTensor *R);
-int Encoding_Double_squaresqueeze_backward(THCudaDoubleTensor *GL, 
-    THCudaDoubleTensor *GR, THCudaDoubleTensor *R);
 int Encoding_Double_batchnorm_Forward(THCudaDoubleTensor *output_, 
    THCudaDoubleTensor *input_, THCudaDoubleTensor *mean_, 
    THCudaDoubleTensor *invstd_, THCudaDoubleTensor *gamma_, 
@@ -148,3 +123,4 @@ int Encoding_Double_DilatedAvgPool2d_Backward(
    int kH, int kW, int dH, int dW,
    int padH, int padW,
    int dilationH, int dilationW);
--- a/encoding/src/generic/encoding_generic.c
+++ b/encoding/src/generic/encoding_generic.c
@@ -23,7 +23,6 @@ int Encoding_(scaledl2_forward)(THCTensor *SL,
 		return 0;
 }
 int Encoding_(scaledl2_backward)(
    THCTensor *GSL, THCTensor *GX, THCTensor *GC,
    THCTensor *X, THCTensor *C, THCTensor *S)
@@ -36,94 +35,25 @@ int Encoding_(scaledl2_backward)(
 		return 0;
 }
+int Encoding_(aggregate_forward)(THCTensor *E, THCTensor *A,
-int Encoding_(aggregateE_forward)(THCTensor *E, THCTensor *A,
 			THCTensor *X, THCTensor *C)
 /*
 * Aggregate operation
 */
 {
-		Encoding_(AggregateE_Forward)(state, E, A, X, C);
+		Encoding_(Aggregate_Forward)(state, E, A, X, C);
 		/* C function return number of the outputs */
 		return 0;
 }
+int Encoding_(aggregate_backward)(THCTensor *GA, THCTensor *GE, 
-int Encoding_(aggregateE_backward)(THCTensor *GA, THCTensor *GE, 
 		THCTensor *A, THCTensor *X, THCTensor *C)
 /*
 * Aggregate backward operation to A
 * G (dl/dR), L (dl/dE), A (assignments)
 */
 {
-		Encoding_(AggregateE_Backward)(state, GA, GE, A, X, C);
+		Encoding_(Aggregate_Backward)(state, GA, GE, A, X, C);
-		/* C function return number of the outputs */
-		return 0;
-}
-int Encoding_(aggregate_forward)(THCTensor *E, THCTensor *A,
-			THCTensor *R)
-/*
- * Aggregate operation
- */
-{
-		Encoding_(Aggregate_Forward)(state, E, A, R);
-		/* C function return number of the outputs */
-		return 0;
-}
-int Encoding_(aggregate_backward)(THCTensor *GA, THCTensor *GR, 
-		THCTensor *L, THCTensor *A, THCTensor *R)
-/*
- * Aggregate backward operation to A
- * G (dl/dR), L (dl/dE), A (assignments)
- */
-{
-		Encoding_(Aggregate_Backward)(state, GA, GR, L, A, R);
-		/* C function return number of the outputs */
-		return 0;
-}
-int Encoding_(residual_forward)(THCTensor *R, THCTensor *X, THCTensor *D)
-/*
- * Residual operation
- */
-{
-		Encoding_(Residual_Forward)(state, R, X, D);
-		/* C function return number of the outputs */
-		return 0;
-}
-int Encoding_(residual_backward)(THCTensor *GR, THCTensor *GX, 
-    THCTensor *GD)
-/*
- * Residual operation
- */
-{
-		Encoding_(Residual_Backward)(state, GR, GX, GD);
-		/* C function return number of the outputs */
-		return 0;
-}
-int Encoding_(squaresqueeze_forward)(THCTensor *L, THCTensor *R)
-/*
- * Residual operation
- */
-{
-    Encoding_(SquareSqueeze_Forward)(state, L, R);
-		/* C function return number of the outputs */
-		return 0;
-}
-int Encoding_(squaresqueeze_backward)(THCTensor *GL, THCTensor *GR, 
-    THCTensor *R)
-/*
- * Residual operation
- */
-{
-    Encoding_(SquareSqueeze_Backward)(state, GL, GR, R);
 		/* C function return number of the outputs */
 		return 0;
 }

--- a/encoding/src/generic/encoding_generic.h
+++ b/encoding/src/generic/encoding_generic.h
+/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ * Created by: Hang Zhang
+ * ECE Department, Rutgers University
+ * Email: zhang.hang@rutgers.edu
+ * Copyright (c) 2017
+ *
+ * This source code is licensed under the MIT-style license found in the
+ * LICENSE file in the root directory of this source tree 
+ *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ */
+#ifndef THC_GENERIC_FILE
+#define THC_GENERIC_FILE "generic/encoding_generic.h"
+#else
+int Encoding_(scaledl2_forward)(THCTensor *SL,  
+    THCTensor *X, THCTensor *C,  THCTensor *S);
+int Encoding_(scaledl2_backward)(
+    THCTensor *GSL, THCTensor *GX, THCTensor *GC,
+    THCTensor *X, THCTensor *C, THCTensor *S);
+int Encoding_(aggregate_forward)(THCTensor *E, THCTensor *A,
+			THCTensor *X, THCTensor *C);
+int Encoding_(aggregate_backward)(THCTensor *GA, THCTensor *GE, 
+		THCTensor *A, THCTensor *X, THCTensor *C);
+int Encoding_(aggregateP_forward)(THCTensor *E, THCTensor *A,
+			THCTensor *R);
+int Encoding_(aggregateP_backward)(THCTensor *GA, THCTensor *GR, 
+		THCTensor *L, THCTensor *A, THCTensor *R);
+int Encoding_(residual_forward)(THCTensor *R, THCTensor *X, THCTensor *D);
+int Encoding_(residual_backward)(THCTensor *GR, THCTensor *GX, 
+    THCTensor *GD);
+int Encoding_(squaresqueeze_forward)(THCTensor *L, THCTensor *R);
+int Encoding_(squaresqueeze_backward)(THCTensor *GL, THCTensor *GR, 
+    THCTensor *R);
+#endif
--- a/encoding/src/generic/pooling_generic.h
+++ b/encoding/src/generic/pooling_generic.h
+/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ * Created by: Hang Zhang
+ * ECE Department, Rutgers University
+ * Email: zhang.hang@rutgers.edu
+ * Copyright (c) 2017
+ *
+ * This source code is licensed under the MIT-style license found in the
+ * LICENSE file in the root directory of this source tree 
+ *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ */
+#ifndef THC_GENERIC_FILE
+#define THC_GENERIC_FILE "generic/pooling_generic.c"
+#else
+int Encoding_(DilatedAvgPool2d_Forward)(
+    THCTensor *X_, THCTensor *Y_, 
+    int kH, int kW, int dH, int dW,
+    int padH, int padW,
+    int dilationH, int dilationW);
+int Encoding_(DilatedAvgPool2d_Backward)(
+    THCTensor *gradX_, THCTensor *gradY_, 
+    int kH, int kW, int dH, int dW,
+    int padH, int padW,
+    int dilationH, int dilationW);
+#endif
--- a/encoding/src/generic/syncbn_generic.h
+++ b/encoding/src/generic/syncbn_generic.h
+/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ * Created by: Hang Zhang
+ * ECE Department, Rutgers University
+ * Email: zhang.hang@rutgers.edu
+ * Copyright (c) 2017
+ *
+ * This source code is licensed under the MIT-style license found in the
+ * LICENSE file in the root directory of this source tree 
+ *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ */
+#ifndef THC_GENERIC_FILE
+#define THC_GENERIC_FILE "generic/syncbn_generic.h"
+#else
+int Encoding_(batchnorm_Forward)(THCTensor *output_, THCTensor *input_, 
+        THCTensor *mean_, THCTensor *invstd_,
+        THCTensor *gamma_, THCTensor *beta_);
+int Encoding_(batchnorm_Backward)(THCTensor *gradoutput_, 
+        THCTensor *input_, THCTensor *gradinput_, 
+        THCTensor *gradgamma_, THCTensor *gradbeta_, THCTensor *mean_, 
+        THCTensor *invstd_, THCTensor *gamma_, THCTensor *beta_, 
+        THCTensor *gradMean_, THCTensor *gradStd_, int train);
+int Encoding_(sum_square_Forward)(THCTensor *input_, 
+        THCTensor *sum_, THCTensor *square_);
+int Encoding_(sum_square_Backward)(
+        THCTensor *gradInput, THCTensor *input_, 
+        THCTensor *gradSum_, THCTensor *gradSquare_);
+#endif
--- a/encoding/utils.py
+++ b/encoding/utils.py
@@ -14,6 +14,7 @@ import os
 import sys
 import time
 import math
+import tqdm
 def get_optimizer(args, model, diff_LR=True):
    """
@@ -44,9 +45,7 @@ class CosLR_Scheduler(object):
    """Cosine Learning Rate Scheduler
    .. math::
-        lr = base_lr * 0.5 * (1 + cos(T/N))
+        lr = baselr * 0.5 * (1 + cos(iter/maxiter))
-    where ``T`` is current iters and ``N`` is total iters
    Args:
        args:  base learning rate :attr:`args.lr`, number of epochs :attr:`args.epochs`
@@ -62,7 +61,7 @@ class CosLR_Scheduler(object):
        T = (epoch - 1) * self.niters + i
        lr = 0.5 * self.lr * (1 + math.cos(1.0 * T / self.N * math.pi))
        if epoch > self.epoch:
-            print('=>Epochs %i, learning rate = %.4f, previous best ='\
+            print('\n=>Epochs %i, learning rate = %.4f, previous best ='\
                '%.3f%%' % (epoch, lr, best_pred))
            self.epoch = epoch
        self._adjust_learning_rate(optimizer, lr)
@@ -90,12 +89,14 @@ def save_checkpoint(state, args, is_best, filename='checkpoint.pth.tar'):
    if is_best:
        shutil.copyfile(filename, directory + 'model_best.pth.tar')
 # refer to https://github.com/kuangliu/pytorch-cifar/blob/master/utils.py
 _, term_width = os.popen('stty size', 'r').read().split()
-term_width = int(term_width)
+term_width = int(term_width)-1
-TOTAL_BAR_LENGTH = 86.
+TOTAL_BAR_LENGTH = 36.
 last_time = time.time()
 begin_time = last_time
 def progress_bar(current, total, msg=None):
    """Progress Bar for display
    """

--- a/requirements.txt
+++ b/requirements.txt
+tqdm
+dominate
--- a/setup.py
+++ b/setup.py
@@ -15,45 +15,38 @@ import sys
 import subprocess
 from setuptools import setup, find_packages
-from setuptools.command.develop import develop
+import setuptools.command.develop 
-from setuptools.command.install import install
+import setuptools.command.install 
-this_file = os.path.dirname(__file__)
+cwd = os.path.dirname(os.path.abspath(__file__))
-def read(*names, **kwargs):
+# run test scrip after installation
-    with io.open(
+class install(setuptools.command.install.install):
-        os.path.join(os.path.dirname(__file__), *names),
-        encoding=kwargs.get("encoding", "utf8")
-    ) as fp:
-        return fp.read()
-def find_version(*file_paths):
-    version_file = read(*file_paths)
-    version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]",
-                              version_file, re.M)
-    if version_match:
-        return version_match.group(1)
-    raise RuntimeError("Unable to find version string.")
-_version = find_version('encoding/__init__.py')
-#extra_compile_args = ['-std=c++11', '-Wno-write-strings']
-if os.getenv('PYTORCH_BINARY_BUILD') and platform.system() == 'Linux':
-    print('PYTORCH_BINARY_BUILD found. Static linking libstdc++ on Linux')
-    extra_compile_args += ['-static-libstdc++']
-    extra_link_args += ['-static-libstdc++']
-class TestCommand(install):
-    """Post-installation mode.""" 
    def run(self):
-        install.run(self)
+        self.create_version_file()
+        setuptools.command.install.install.run(self)
        subprocess.check_call("python test/test.py".split())
+    @staticmethod
+    def create_version_file():
+        global version, cwd
+        print('-- Building version ' + version)
+        version_path = os.path.join(cwd, 'encoding', 'version.py')
+        with open(version_path, 'w') as f:
+            f.write("__version__ = '{}'\n".format(version))
+version = '0.1.0'
+try:
+    sha = subprocess.check_output(['git', 'rev-parse', 'HEAD'], 
+        cwd=cwd).decode('ascii').strip()
+    version += '+' + sha[:7]
+except Exception:
+    pass
 setup(
    name="encoding",
-    version=_version,
+    version=version,
-    description="PyTorch Encoding Layer",
+    description="PyTorch Encoding",
-    url="https://github.com/zhanghang1989/PyTorch-Encoding-Layer",
+    url="https://github.com/zhanghang1989/PyTorch-Encoding",
    author="Hang Zhang",
    author_email="zhang.hang@rutgers.edu",
    # Require cffi.
@@ -61,14 +54,13 @@ setup(
    setup_requires=["cffi>=1.0.0"],
    # Exclude the build files.
    packages=find_packages(exclude=["build"]),
-    #extra_compile_args=extra_compile_args,
    # Package where to put the extensions. Has to be a prefix of build.py.
    ext_package="",
    # Extensions to compile.
    cffi_modules=[
-        os.path.join(this_file, "build.py:ffi")
+        os.path.join(cwd, "build.py:ffi")
    ],
    cmdclass={
-        'install': TestCommand,
+        'install': install,
    },
 )
--- a/test/test.py
+++ b/test/test.py
@@ -17,17 +17,6 @@ import torchvision.models as models
 EPS = 1e-6
-def test_aggregateP():
-    B,N,K,D = 2,3,4,5
-    A = Variable(torch.cuda.DoubleTensor(B,N,K).uniform_(-0.5,0.5), 
-        requires_grad=True)
-    R = Variable(torch.cuda.DoubleTensor(B,N,K,D).uniform_(-0.5,0.5), 
-        requires_grad=True)
-    input = (A, R)
-    test = gradcheck(encoding.functions.aggregateP, input, eps=1e-6, atol=1e-4)
-    print('Testing aggregate(): {}'.format(test))
 def test_aggregate():
    B,N,K,D = 2,3,4,5
    A = Variable(torch.cuda.DoubleTensor(B,N,K).uniform_(-0.5,0.5), 
@@ -54,47 +43,6 @@ def test_scaledL2():
    print('Testing scaledL2(): {}'.format(test))
-def test_assign():
-    B,N,K,D = 2,3,4,5
-    X = Variable(torch.cuda.DoubleTensor(B,N,D).uniform_(-0.5,0.5), 
-        requires_grad=True)
-    C = Variable(torch.cuda.DoubleTensor(K,D).uniform_(-0.5,0.5), 
-        requires_grad=True)
-    S = Variable(torch.cuda.DoubleTensor(K).uniform_(-0.5,0.5), 
-        requires_grad=True)
-    R = encoding.functions.residual(X, C)
-    A1 = encoding.functions.assign(R, S)
-    E1 = encoding.functions.aggregateP(A1, R)
-    A2 = F.softmax(encoding.functions.scaledL2(X,C,S))
-    E2 = encoding.functions.aggregate(A2, X, C)
-    print('Testing assign(): {}'.format((E1-E2).norm(2).data[0] < EPS))
-def test_residual():
-    B,N,K,D = 2,3,4,5
-    X = Variable(torch.cuda.DoubleTensor(B,N,D).uniform_(-0.5,0.5), 
-        requires_grad=True)
-    C = Variable(torch.cuda.DoubleTensor(K,D).uniform_(-0.5,0.5), 
-        requires_grad=True)
-    input = (X, C)
-    test = gradcheck(encoding.functions.residual, input, eps=1e-6, atol=1e-4)
-    print('Testing residual(): {}'.format(test))
-"""
-def test_square_squeeze():
-    B,N,K,D = 2,3,4,5
-    R = Variable(torch.cuda.DoubleTensor(B,N,K,D).uniform_(-0.5,0.5), 
-        requires_grad=True)
-    input = (R,)
-    test = gradcheck(encoding.functions.square_squeeze(), input, eps=1e-6, atol=1e-4)
-    print('Testing square_squeeze(): {}'.format(test))
-"""
 def test_encoding():
    B,C,H,W,K = 2,3,4,5,6
    X = Variable(torch.cuda.DoubleTensor(B,C,H,W).uniform_(-0.5,0.5), 
@@ -105,16 +53,6 @@ def test_encoding():
    print('Testing encoding(): {}'.format(test))
-def test_encodingP():
-    B,C,H,W,K = 2,3,4,5,6
-    X = Variable(torch.cuda.DoubleTensor(B,C,H,W).uniform_(-0.5,0.5), 
-        requires_grad=True)
-    input = (X,)
-    layer = encoding.nn.EncodingP(C,K).double().cuda()
-    test = gradcheck(layer, input, eps=1e-6, atol=1e-4)
-    print('Testing encodingP(): {}'.format(test))
 def test_sum_square():
    B,C,H,W = 2,3,4,5
    X = Variable(torch.cuda.DoubleTensor(B,C,H,W).uniform_(-0.5,0.5), 
@@ -146,15 +84,10 @@ def test_dilated_avgpool():
 if __name__ == '__main__':
-    test_aggregateP()
    test_scaledL2()
    test_encoding() 
    test_aggregate()
-    test_residual()
-    #test_square_squeeze()
-    test_encodingP()
    test_sum_square()
-    test_assign()
    test_dilated_avgpool()
    """
    test_dilated_densenet()

--- a/test/test_dilated_resnet.py
+++ b/test/test_dilated_resnet.py
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: Hang Zhang
+## ECE Department, Rutgers University
+## Email: zhang.hang@rutgers.edu
+## Copyright (c) 2017
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+import os
+import numpy as np
+import torch
+import torch.nn as nn
+from torch.autograd import Variable
+import encoding.dilated as dresnet
+import torchvision.models as orgresnet
+class Dilated_ResNet(nn.Module):
+    def __init__(self, nclass):
+        super(Dilated_ResNet, self).__init__()
+        self.pretrained = dresnet.resnet50(pretrained=True)
+    def forward(self, x):
+        # pre-trained ResNet feature
+        x = self.pretrained.conv1(x)
+        x = self.pretrained.bn1(x)
+        x = self.pretrained.relu(x)
+        x = self.pretrained.maxpool(x)
+        x = self.pretrained.layer1(x)
+        x = self.pretrained.layer2(x)
+        x = self.pretrained.layer3(x)
+        x = self.pretrained.layer4(x)
+        return x
+class Org_ResNet(nn.Module):
+    def __init__(self, nclass):
+        super(Org_ResNet, self).__init__()
+        self.pretrained = orgresnet.resnet50(pretrained=True)
+    def forward(self, x):
+        # pre-trained ResNet feature
+        x = self.pretrained.conv1(x)
+        x = self.pretrained.bn1(x)
+        x = self.pretrained.relu(x)
+        x = self.pretrained.maxpool(x)
+        x = self.pretrained.layer1(x)
+        x = self.pretrained.layer2(x)
+        x = self.pretrained.layer3(x)
+        x = self.pretrained.layer4(x)
+        return x
+def test_resnet():   
+    # test the model
+    model1 = Dilated_ResNet(10).eval().cuda()
+    model2 = Org_ResNet(10).eval().cuda()
+    model1.eval()
+    model2.eval()
+    x = Variable(torch.Tensor(1,3, 224, 224).uniform_(-0.5,0.5)).cuda()
+    y1 = model1(x)
+    y2 = model2(x)
+    print(y1[0][1])
+    print(y2[0][1])
+if __name__ == "__main__":
+    test_resnet()