"examples/git@developer.sourcefind.cn:OpenDAS/torchaudio.git" did not exist on "df6556049da332d978799a7f67d60c4f21484133"
Commit 0f6efd80 authored by Hang Zhang's avatar Hang Zhang
Browse files

path

parent c92a7c24
......@@ -6,5 +6,8 @@ build/
data/
docs/src/
docs/html/
encoding/lib/
encoding/_ext/
encoding.egg-info/
experiments/recognition/
experiments/segmentation/
......@@ -9,8 +9,8 @@ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
......
......@@ -16,6 +16,7 @@ from torch.utils.ffi import create_extension
lib_path = os.path.join(os.path.dirname(torch.__file__), 'lib')
cwd = os.path.dirname(os.path.realpath(__file__))
encoding_lib_path = os.path.join(cwd, "encoding", "lib")
# clean the build files
clean_cmd = ['bash', 'clean.sh']
......@@ -26,11 +27,12 @@ os.environ['TORCH_BUILD_DIR'] = lib_path
if platform.system() == 'Darwin':
os.environ['TH_LIBRARIES'] = os.path.join(lib_path,'libTH.1.dylib')
os.environ['THC_LIBRARIES'] = os.path.join(lib_path,'libTHC.1.dylib')
ENCODING_LIB = os.path.join(lib_path, 'libENCODING.dylib')
ENCODING_LIB = os.path.join(cwd, 'encoding/lib/libENCODING.dylib')
else:
os.environ['TH_LIBRARIES'] = os.path.join(lib_path,'libTH.so.1')
os.environ['THC_LIBRARIES'] = os.path.join(lib_path,'libTHC.so.1')
ENCODING_LIB = os.path.join(lib_path, 'libENCODING.so')
ENCODING_LIB = os.path.join(cwd, 'encoding/lib/libENCODING.so')
build_all_cmd = ['bash', 'encoding/make.sh']
subprocess.check_call(build_all_cmd, env=dict(os.environ))
......@@ -44,7 +46,7 @@ defines = [('WITH_CUDA', None)]
with_cuda = True
include_path = [os.path.join(lib_path, 'include'),
os.path.join(lib_path,'include/ENCODING'),
os.path.join(cwd,'encoding/kernel'),
os.path.join(cwd,'encoding/kernel/include'),
os.path.join(cwd,'encoding/src/')]
......@@ -65,6 +67,7 @@ ffi = create_extension(
include_dirs = include_path,
extra_link_args = [
make_relative_rpath(lib_path),
make_relative_rpath(encoding_lib_path),
ENCODING_LIB,
],
)
......
#!/usr/bin/env bash
rm -rf build/ dist/ encoding.egg-info/ encoding/build/ encoding/_ext/ __pycache__ encoding/__pycache__
rm -rf build/ dist/ encoding.egg-info/ encoding/lib/ encoding/_ext/ __pycache__ encoding/__pycache__
......@@ -4,11 +4,13 @@
Dilated Networks
================
We provide correct dilated pre-trained ResNet and DenseNet for semantic segmentation.
For dilation of ResNet, we replace the stride of 2 Conv3x3 at begining of certain stage and update the dilation of the conv layers afterwards.
For dilation of DenseNet, we provide :class:`encoding.nn.DilatedAvgPool2d` that handles the dilation of the transition layers, then update the dilation of the conv layers afterwards.
We provide correct dilated pre-trained ResNet and DenseNet (stride of 8) for semantic segmentation.
For dilation of DenseNet, we provide :class:`encoding.nn.DilatedAvgPool2d`.
All provided models have been verified.
.. note::
This code is provided together with the paper (coming soon), please cite our work.
.. automodule:: encoding.dilated
.. currentmodule:: encoding.dilated
......
......@@ -5,9 +5,7 @@ Installing PyTorch-Encoding
Install from Source
-------------------
* Please follow the `PyTorch instructions <https://github.com/pytorch/pytorch#from-source>`_ to install PyTorch from Source to the ``$HOME`` directory (recommended). Or you can simply clone a copy to ``$HOME`` directory::
git clone https://github.com/pytorch/pytorch $HOME/pytorch
* Install PyTorch from Source (recommended). Please follow the `PyTorch instructions <https://github.com/pytorch/pytorch#from-source>`_.
* Install this package
......
......@@ -9,10 +9,10 @@ Useful util functions.
.. automodule:: encoding.utils
.. currentmodule:: encoding.utils
:hidden:`CosLR_Scheduler`
:hidden:`LR_Scheduler`
~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: CosLR_Scheduler
.. autoclass:: LR_Scheduler
:members:
:hidden:`get_optimizer`
......
......@@ -75,9 +75,4 @@ IF(ENCODING_SO_VERSION)
SOVERSION ${ENCODING_SO_VERSION})
ENDIF(ENCODING_SO_VERSION)
FILE(GLOB kernel-header kernel/generic/*.h)
FILE(GLOB src-header src/generic/*.h)
INSTALL(TARGETS ENCODING LIBRARY DESTINATION ${ENCODING_INSTALL_LIB_SUBDIR})
INSTALL(FILES kernel/thc_encoding.h DESTINATION "${ENCODING_INSTALL_INCLUDE_SUBDIR}/ENCODING")
INSTALL(FILES ${src-header} ${kernel-header} DESTINATION "${ENCODING_INSTALL_INCLUDE_SUBDIR}/ENCODING/generic")
#INSTALL(TARGETS ENCODING LIBRARY DESTINATION ${ENCODING_INSTALL_LIB_SUBDIR})
......@@ -61,7 +61,11 @@ def sum_square(input):
return _sum_square()(input)
class _batchnormtrain(Function):
class _batchnorm(Function):
def __init__(self, training=False):
super(_batchnorm, self).__init__()
self.training = training
def forward(ctx, input, gamma, beta, mean, std):
ctx.save_for_backward(input, gamma, beta, mean, std)
assert(input.dim()==3)
......@@ -95,13 +99,13 @@ class _batchnormtrain(Function):
encoding_lib.Encoding_Float_batchnorm_Backward(
gradOutput, input, gradInput, gradGamma, gradBeta,
mean, invstd, gamma, beta, gradMean, gradStd,
True)
self.training)
elif isinstance(input, torch.cuda.DoubleTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Double_batchnorm_Backward(
gradOutput, input, gradInput, gradGamma, gradBeta,
mean, invstd, gamma, beta, gradMean, gradStd,
True)
self.training)
else:
raise RuntimeError('Unimplemented data type!')
return gradInput, gradGamma, gradBeta, gradMean, gradStd
......@@ -122,52 +126,7 @@ def batchnormtrain(input, gamma, beta, mean, std):
- Output: :math:`(N, C)` or :math:`(N, C, L)` (same shape as input)
"""
return _batchnormtrain()(input, gamma, beta, mean, std)
class _batchnormeval(Function):
def forward(ctx, input, gamma, beta, mean, std):
ctx.save_for_backward(input, gamma, beta, mean, std)
assert(input.dim()==3)
with torch.cuda.device_of(input):
invstd = 1.0 / std
output = input.new().resize_as_(input)
if isinstance(input, torch.cuda.FloatTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Float_batchnorm_Forward(output,
input, mean, invstd, gamma, beta)
elif isinstance(input, torch.cuda.DoubleTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Double_batchnorm_Forward(output,
input, mean, invstd, gamma, beta)
else:
raise RuntimeError('Unimplemented data type!')
return output
def backward(ctx, gradOutput):
input, gamma, beta, mean, std = ctx.saved_tensors
invstd = 1.0 / std
with torch.cuda.device_of(input):
gradInput = gradOutput.new().resize_as_(input).zero_()
gradGamma = gradOutput.new().resize_as_(gamma).zero_()
gradBeta = gradOutput.new().resize_as_(beta).zero_()
gradMean = gradOutput.new().resize_as_(mean).zero_()
gradStd = gradOutput.new().resize_as_(std).zero_()
if isinstance(input, torch.cuda.FloatTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Float_batchnorm_Backward(
gradOutput, input, gradInput, gradGamma, gradBeta,
mean, invstd, gamma, beta, gradMean, gradStd,
False)
elif isinstance(input, torch.cuda.DoubleTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Double_batchnorm_Backward(
gradOutput, input, gradInput, gradGamma, gradBeta,
mean, invstd, gamma, beta, gradMean, gradStd,
False)
else:
raise RuntimeError('Unimplemented data type!')
return gradInput, gradGamma, gradBeta, gradMean, gradStd
return _batchnorm(True)(input, gamma, beta, mean, std)
def batchnormeval(input, gamma, beta, mean, std):
......@@ -176,4 +135,4 @@ def batchnormeval(input, gamma, beta, mean, std):
Please see encoding.batchnormtrain_
"""
return _batchnormeval()(input, gamma, beta, mean, std)
return _batchnorm(False)(input, gamma, beta, mean, std)
#!/usr/bin/env bash
mkdir -p encoding/build && cd encoding/build
mkdir -p encoding/lib && cd encoding/lib
# compile and install
cmake ..
make install
cd ..
make
......@@ -8,7 +8,7 @@
## LICENSE file in the root directory of this source tree
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
from .encoding import *
from .syncbn import *
from .basic import *
from .encoding import *
from .customize import *
......@@ -11,193 +11,29 @@
import math
import torch
from torch.autograd import Variable
from torch.nn import Module
from torch.nn import Module, Parameter
from torch.nn import functional as F
from torch.nn.parameter import Parameter
from ..parallel import my_data_parallel
from .syncbn import BatchNorm2d
from ..functions import dilatedavgpool2d, view_each, upsample
from ..functions import view_each, upsample
from .basic import *
__all__ = ['DilatedAvgPool2d', 'UpsampleConv2d', 'View', 'Sum', 'Mean',
'Normalize', 'Bottleneck', 'PyramidPooling']
__all__ = ['GramMatrix', 'View', 'Sum', 'Mean', 'Normalize', 'PyramidPooling']
class DilatedAvgPool2d(Module):
r"""We provide Dilated Average Pooling for the dilation of Densenet as
in :class:`encoding.dilated.DenseNet`.
Reference::
We provide this code for a comming paper.
Applies a 2D average pooling over an input signal composed of several input planes.
In the simplest case, the output value of the layer with input size :math:`(N, C, H, W)`,
output :math:`(N, C, H_{out}, W_{out})` and :attr:`kernel_size` :math:`(kH, kW)`
can be precisely described as:
class GramMatrix(Module):
r""" Gram Matrix for a 4D convolutional featuremaps as a mini-batch
.. math::
\begin{array}{ll}
out(b, c, h, w) = 1 / (kH * kW) *
\sum_{{m}=0}^{kH-1} \sum_{{n}=0}^{kW-1}
input(b, c, dH * h + m, dW * w + n)
\end{array}
| If :attr:`padding` is non-zero, then the input is implicitly zero-padded on both sides
for :attr:`padding` number of points
The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`dilation` can either be:
- a single ``int`` -- in which case the same value is used for the height and width dimension
- a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension,
and the second `int` for the width dimension
Args:
kernel_size: the size of the window
stride: the stride of the window. Default value is :attr:`kernel_size`
padding: implicit zero padding to be added on both sides
dilation: the dilation parameter similar to Conv2d
Shape:
- Input: :math:`(N, C, H_{in}, W_{in})`
- Output: :math:`(N, C, H_{out}, W_{out})` where
:math:`H_{out} = floor((H_{in} + 2 * padding[0] - kernel\_size[0]) / stride[0] + 1)`
:math:`W_{out} = floor((W_{in} + 2 * padding[1] - kernel\_size[1]) / stride[1] + 1)`
Examples::
>>> # pool of square window of size=3, stride=2, dilation=2
>>> m = nn.DilatedAvgPool2d(3, stride=2, dilation=2)
>>> input = autograd.Variable(torch.randn(20, 16, 50, 32))
>>> output = m(input)
"""
def __init__(self, kernel_size, stride=None, padding=0, dilation=1):
super(DilatedAvgPool2d, self).__init__()
self.kernel_size = kernel_size
self.stride = stride or kernel_size
self.padding = padding
self.dilation = dilation
def forward(self, input):
if isinstance(input, Variable):
return dilatedavgpool2d(input, self.kernel_size, self.stride,
self.padding, self.dilation)
elif isinstance(input, tuple) or isinstance(input, list):
return my_data_parallel(self, input)
else:
raise RuntimeError('unknown input type')
def __repr__(self):
return self.__class__.__name__ + ' (' \
+ 'size=' + str(self.kernel_size) \
+ ', stride=' + str(self.stride) \
+ ', padding=' + str(self.padding) \
+ ', dilation=' + str(self.dilation) + ')'
class UpsampleConv2d(Module):
r"""
To avoid the checkerboard artifacts of standard Fractionally-strided Convolution, we adapt an integer stride convolution but producing a :math:`2\times 2` outputs for each convolutional window.
.. image:: _static/img/upconv.png
:width: 50%
:align: center
Reference:
Hang Zhang and Kristin Dana. "Multi-style Generative Network for Real-time Transfer." *arXiv preprint arXiv:1703.06953 (2017)*
Args:
in_channels (int): Number of channels in the input image
out_channels (int): Number of channels produced by the convolution
kernel_size (int or tuple): Size of the convolving kernel
stride (int or tuple, optional): Stride of the convolution. Default: 1
padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0
output_padding (int or tuple, optional): Zero-padding added to one side of the output. Default: 0
groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
bias (bool, optional): If True, adds a learnable bias to the output. Default: True
dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
scale_factor (int): scaling factor for upsampling convolution. Default: 1
Shape:
- Input: :math:`(N, C_{in}, H_{in}, W_{in})`
- Output: :math:`(N, C_{out}, H_{out}, W_{out})` where
:math:`H_{out} = scale * (H_{in} - 1) * stride[0] - 2 * padding[0] + kernel\_size[0] + output\_padding[0]`
:math:`W_{out} = scale * (W_{in} - 1) * stride[1] - 2 * padding[1] + kernel\_size[1] + output\_padding[1]`
Attributes:
weight (Tensor): the learnable weights of the module of shape
(in_channels, scale * scale * out_channels, kernel_size[0], kernel_size[1])
bias (Tensor): the learnable bias of the module of shape (scale * scale * out_channels)
Examples::
>>> # With square kernels and equal stride
>>> m = nn.UpsampleCov2d(16, 33, 3, stride=2)
>>> # non-square kernels and unequal stride and with padding
>>> m = nn.UpsampleCov2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
>>> input = autograd.Variable(torch.randn(20, 16, 50, 100))
>>> output = m(input)
>>> # exact output size can be also specified as an argument
>>> input = autograd.Variable(torch.randn(1, 16, 12, 12))
>>> downsample = nn.Conv2d(16, 16, 3, stride=2, padding=1)
>>> upsample = nn.UpsampleCov2d(16, 16, 3, stride=2, padding=1)
>>> h = downsample(input)
>>> h.size()
torch.Size([1, 16, 6, 6])
>>> output = upsample(h, output_size=input.size())
>>> output.size()
torch.Size([1, 16, 12, 12])
\mathcal{G} = \sum_{h=1}^{H_i}\sum_{w=1}^{W_i} \mathcal{F}_{h,w}\mathcal{F}_{h,w}^T
"""
def __init__(self, in_channels, out_channels, kernel_size, stride=1,
padding=0, dilation=1, groups=1, scale_factor =1,
bias=True):
super(UpsampleConv2d, self).__init__()
kernel_size = _pair(kernel_size)
stride = _pair(stride)
padding = _pair(padding)
dilation = _pair(dilation)
if in_channels % groups != 0:
raise ValueError('in_channels must be divisible by groups')
if out_channels % groups != 0:
raise ValueError('out_channels must be divisible by groups')
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = kernel_size
self.stride = stride
self.padding = padding
self.dilation = dilation
self.groups = groups
self.scale_factor = scale_factor
self.weight = Parameter(torch.Tensor(
out_channels * scale_factor * scale_factor,
in_channels // groups, *kernel_size))
if bias:
self.bias = Parameter(torch.Tensor(out_channels *
scale_factor * scale_factor))
else:
self.register_parameter('bias', None)
self.reset_parameters()
def reset_parameters(self):
n = self.in_channels
for k in self.kernel_size:
n *= k
stdv = 1. / math.sqrt(n)
self.weight.data.uniform_(-stdv, stdv)
if self.bias is not None:
self.bias.data.uniform_(-stdv, stdv)
def forward(self, input):
if isinstance(input, Variable):
out = F.conv2d(input, self.weight, self.bias, self.stride,
self.padding, self.dilation, self.groups)
return F.pixel_shuffle(out, self.scale_factor)
elif isinstance(input, tuple) or isinstance(input, list):
return my_data_parallel(self, input)
else:
raise RuntimeError('unknown input type')
def forward(self, y):
(b, ch, h, w) = y.size()
features = y.view(b, ch, w * h)
features_t = features.transpose(1, 2)
gram = features.bmm(features_t) / (ch * h * w)
return gram
class View(Module):
......@@ -283,48 +119,6 @@ class Normalize(Module):
raise RuntimeError('unknown input type')
class Bottleneck(Module):
""" Pre-activation residual block
Identity Mapping in Deep Residual Networks
ref https://arxiv.org/abs/1603.05027
"""
def __init__(self, inplanes, planes, stride=1,
norm_layer=BatchNorm2d):
super(Bottleneck, self).__init__()
self.expansion = 4
if inplanes != planes*self.expansion or stride !=1 :
self.downsample = True
self.residual_layer = Conv2d(inplanes, planes * self.expansion,
kernel_size=1, stride=stride)
else:
self.downsample = False
conv_block = []
conv_block += [norm_layer(inplanes),
ReLU(inplace=True),
Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False)]
conv_block += [norm_layer(planes),
ReLU(inplace=True),
Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)]
conv_block += [norm_layer(planes),
ReLU(inplace=True),
Conv2d(planes, planes * self.expansion, kernel_size=1,
stride=1, bias=False)]
self.conv_block = Sequential(*conv_block)
def forward(self, x):
if self.downsample:
residual = self.residual_layer(x)
else:
residual = x
if isinstance(x, Variable):
return residual + self.conv_block(x)
elif isinstance(x, tuple) or isinstance(x, list):
return sum_each(residual, self.conv_block(x))
else:
raise RuntimeError('unknown input type')
class PyramidPooling(Module):
"""
Reference:
......
......@@ -10,17 +10,18 @@
import threading
import torch
import torch.nn as nn
from torch.nn import Module, Parameter
import torch.nn.functional as F
from torch.autograd import Function, Variable
from .._ext import encoding_lib
from ..functions import scaledL2, aggregate
from ..parallel import my_data_parallel
from ..functions import dilatedavgpool2d
__all__ = ['Encoding', 'EncodingShake', 'Inspiration', 'GramMatrix']
__all__ = ['Encoding', 'EncodingShake', 'Inspiration', 'DilatedAvgPool2d', 'UpsampleConv2d']
class Encoding(nn.Module):
class Encoding(Module):
r"""
Encoding Layer: a learnable residual encoder over 3d or 4d input that
is seen as a mini-batch.
......@@ -35,6 +36,9 @@ class Encoding(nn.Module):
Please see the `example of training Deep TEN <./experiments/texture.html>`_.
Reference:
Hang Zhang, Jia Xue, and Kristin Dana. "Deep TEN: Texture Encoding Network." *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2017*
Args:
D: dimention of the features or feature channels
K: number of codeswords
......@@ -51,22 +55,19 @@ class Encoding(nn.Module):
>>> import encoding
>>> import torch
>>> import torch.nn.functional as F
>>> from torch.autograd import Variable, gradcheck
>>> from torch.autograd import Variable
>>> B,C,H,W,K = 2,3,4,5,6
>>> X = Variable(torch.cuda.DoubleTensor(B,C,H,W).uniform_(-0.5,0.5), requires_grad=True)
>>> layer = encoding.Encoding(C,K).double().cuda()
>>> E = layer(X)
Reference:
Hang Zhang, Jia Xue, and Kristin Dana. "Deep TEN: Texture Encoding Network." *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2017*
"""
def __init__(self, D, K):
super(Encoding, self).__init__()
# init codewords and smoothing factor
self.D, self.K = D, K
self.codewords = nn.Parameter(torch.Tensor(K, D),
self.codewords = Parameter(torch.Tensor(K, D),
requires_grad=True)
self.scale = nn.Parameter(torch.Tensor(K), requires_grad=True)
self.scale = Parameter(torch.Tensor(K), requires_grad=True)
self.reset_params()
def reset_params(self):
......@@ -93,7 +94,6 @@ class Encoding(nn.Module):
else:
raise RuntimeError('Encoding Layer unknown input dims!')
# assignment weights
#A = F.softmax(scaledL2(X, self.codewords, self.scale).view(B*N,-1), dim=1).view(B,N,K)
A = F.softmax(scaledL2(X, self.codewords, self.scale), dim=2)
# aggregate
E = aggregate(A, X, self.codewords)
......@@ -104,14 +104,14 @@ class Encoding(nn.Module):
+ 'N x ' + str(self.D) + '=>' + str(self.K) + 'x' \
+ str(self.D) + ')'
class EncodingShake(nn.Module):
class EncodingShake(Module):
def __init__(self, D, K):
super(EncodingShake, self).__init__()
# init codewords and smoothing factor
self.D, self.K = D, K
self.codewords = nn.Parameter(torch.Tensor(K, D),
self.codewords = Parameter(torch.Tensor(K, D),
requires_grad=True)
self.scale = nn.Parameter(torch.Tensor(K), requires_grad=True)
self.scale = Parameter(torch.Tensor(K), requires_grad=True)
self.reset_params()
def reset_params(self):
......@@ -146,7 +146,7 @@ class EncodingShake(nn.Module):
# shake
self.shake()
# assignment weights
A = F.softmax(scaledL2(X, self.codewords, self.scale).view(B*N,-1), dim=1).view(B,N,K)
A = F.softmax(scaledL2(X, self.codewords, self.scale), dim=2)
# aggregate
E = aggregate(A, X, self.codewords)
# shake
......@@ -159,7 +159,7 @@ class EncodingShake(nn.Module):
+ str(self.D) + ')'
class Inspiration(nn.Module):
class Inspiration(Module):
r"""
Inspiration Layer (CoMatch Layer) enables the multi-style transfer in feed-forward network, which learns to match the target feature statistics during the training.
This module is differentialble and can be inserted in standard feed-forward network to be learned directly from the loss function without additional supervision.
......@@ -176,7 +176,7 @@ class Inspiration(nn.Module):
def __init__(self, C, B=1):
super(Inspiration, self).__init__()
# B is equal to 1 or input mini_batch
self.weight = nn.Parameter(torch.Tensor(1,C,C), requires_grad=True)
self.weight = Parameter(torch.Tensor(1,C,C), requires_grad=True)
# non-parameter buffer
self.G = Variable(torch.Tensor(B,C,C), requires_grad=True)
self.C = C
......@@ -198,16 +198,179 @@ class Inspiration(nn.Module):
+ 'N x ' + str(self.C) + ')'
class GramMatrix(nn.Module):
r""" Gram Matrix for a 4D convolutional featuremaps as a mini-batch
class DilatedAvgPool2d(Module):
r"""We provide Dilated Average Pooling for the dilation of Densenet as
in :class:`encoding.dilated.DenseNet`.
Reference::
We provide this code for a comming paper.
Applies a 2D average pooling over an input signal composed of several input planes.
In the simplest case, the output value of the layer with input size :math:`(N, C, H, W)`,
output :math:`(N, C, H_{out}, W_{out})` and :attr:`kernel_size` :math:`(kH, kW)`
can be precisely described as:
.. math::
\mathcal{G} = \sum_{h=1}^{H_i}\sum_{w=1}^{W_i} \mathcal{F}_{h,w}\mathcal{F}_{h,w}^T
\begin{array}{ll}
out(b, c, h, w) = 1 / (kH * kW) *
\sum_{{m}=0}^{kH-1} \sum_{{n}=0}^{kW-1}
input(b, c, dH * h + m, dW * w + n)
\end{array}
| If :attr:`padding` is non-zero, then the input is implicitly zero-padded on both sides
for :attr:`padding` number of points
The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`dilation` can either be:
- a single ``int`` -- in which case the same value is used for the height and width dimension
- a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension,
and the second `int` for the width dimension
Args:
kernel_size: the size of the window
stride: the stride of the window. Default value is :attr:`kernel_size`
padding: implicit zero padding to be added on both sides
dilation: the dilation parameter similar to Conv2d
Shape:
- Input: :math:`(N, C, H_{in}, W_{in})`
- Output: :math:`(N, C, H_{out}, W_{out})` where
:math:`H_{out} = floor((H_{in} + 2 * padding[0] - kernel\_size[0]) / stride[0] + 1)`
:math:`W_{out} = floor((W_{in} + 2 * padding[1] - kernel\_size[1]) / stride[1] + 1)`
Examples::
>>> # pool of square window of size=3, stride=2, dilation=2
>>> m = nn.DilatedAvgPool2d(3, stride=2, dilation=2)
>>> input = autograd.Variable(torch.randn(20, 16, 50, 32))
>>> output = m(input)
"""
def forward(self, y):
(b, ch, h, w) = y.size()
features = y.view(b, ch, w * h)
features_t = features.transpose(1, 2)
gram = features.bmm(features_t) / (ch * h * w)
return gram
def __init__(self, kernel_size, stride=None, padding=0, dilation=1):
super(DilatedAvgPool2d, self).__init__()
self.kernel_size = kernel_size
self.stride = stride or kernel_size
self.padding = padding
self.dilation = dilation
def forward(self, input):
if isinstance(input, Variable):
return dilatedavgpool2d(input, self.kernel_size, self.stride,
self.padding, self.dilation)
elif isinstance(input, tuple) or isinstance(input, list):
return my_data_parallel(self, input)
else:
raise RuntimeError('unknown input type')
def __repr__(self):
return self.__class__.__name__ + ' (' \
+ 'size=' + str(self.kernel_size) \
+ ', stride=' + str(self.stride) \
+ ', padding=' + str(self.padding) \
+ ', dilation=' + str(self.dilation) + ')'
class UpsampleConv2d(Module):
r"""
To avoid the checkerboard artifacts of standard Fractionally-strided Convolution, we adapt an integer stride convolution but producing a :math:`2\times 2` outputs for each convolutional window.
.. image:: _static/img/upconv.png
:width: 50%
:align: center
Reference:
Hang Zhang and Kristin Dana. "Multi-style Generative Network for Real-time Transfer." *arXiv preprint arXiv:1703.06953 (2017)*
Args:
in_channels (int): Number of channels in the input image
out_channels (int): Number of channels produced by the convolution
kernel_size (int or tuple): Size of the convolving kernel
stride (int or tuple, optional): Stride of the convolution. Default: 1
padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0
output_padding (int or tuple, optional): Zero-padding added to one side of the output. Default: 0
groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
bias (bool, optional): If True, adds a learnable bias to the output. Default: True
dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
scale_factor (int): scaling factor for upsampling convolution. Default: 1
Shape:
- Input: :math:`(N, C_{in}, H_{in}, W_{in})`
- Output: :math:`(N, C_{out}, H_{out}, W_{out})` where
:math:`H_{out} = scale * (H_{in} - 1) * stride[0] - 2 * padding[0] + kernel\_size[0] + output\_padding[0]`
:math:`W_{out} = scale * (W_{in} - 1) * stride[1] - 2 * padding[1] + kernel\_size[1] + output\_padding[1]`
Attributes:
weight (Tensor): the learnable weights of the module of shape
(in_channels, scale * scale * out_channels, kernel_size[0], kernel_size[1])
bias (Tensor): the learnable bias of the module of shape (scale * scale * out_channels)
Examples::
>>> # With square kernels and equal stride
>>> m = nn.UpsampleCov2d(16, 33, 3, stride=2)
>>> # non-square kernels and unequal stride and with padding
>>> m = nn.UpsampleCov2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
>>> input = autograd.Variable(torch.randn(20, 16, 50, 100))
>>> output = m(input)
>>> # exact output size can be also specified as an argument
>>> input = autograd.Variable(torch.randn(1, 16, 12, 12))
>>> downsample = nn.Conv2d(16, 16, 3, stride=2, padding=1)
>>> upsample = nn.UpsampleCov2d(16, 16, 3, stride=2, padding=1)
>>> h = downsample(input)
>>> h.size()
torch.Size([1, 16, 6, 6])
>>> output = upsample(h, output_size=input.size())
>>> output.size()
torch.Size([1, 16, 12, 12])
"""
def __init__(self, in_channels, out_channels, kernel_size, stride=1,
padding=0, dilation=1, groups=1, scale_factor =1,
bias=True):
super(UpsampleConv2d, self).__init__()
kernel_size = _pair(kernel_size)
stride = _pair(stride)
padding = _pair(padding)
dilation = _pair(dilation)
if in_channels % groups != 0:
raise ValueError('in_channels must be divisible by groups')
if out_channels % groups != 0:
raise ValueError('out_channels must be divisible by groups')
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = kernel_size
self.stride = stride
self.padding = padding
self.dilation = dilation
self.groups = groups
self.scale_factor = scale_factor
self.weight = Parameter(torch.Tensor(
out_channels * scale_factor * scale_factor,
in_channels // groups, *kernel_size))
if bias:
self.bias = Parameter(torch.Tensor(out_channels *
scale_factor * scale_factor))
else:
self.register_parameter('bias', None)
self.reset_parameters()
def reset_parameters(self):
n = self.in_channels
for k in self.kernel_size:
n *= k
stdv = 1. / math.sqrt(n)
self.weight.data.uniform_(-stdv, stdv)
if self.bias is not None:
self.bias.data.uniform_(-stdv, stdv)
def forward(self, input):
if isinstance(input, Variable):
out = F.conv2d(input, self.weight, self.bias, self.stride,
self.padding, self.dilation, self.groups)
return F.pixel_shuffle(out, self.scale_factor)
elif isinstance(input, tuple) or isinstance(input, list):
return my_data_parallel(self, input)
else:
raise RuntimeError('unknown input type')
......@@ -27,7 +27,8 @@ __all__ = ['BatchNorm1d', 'BatchNorm2d']
class BatchNorm1d(Module):
r"""Synchronized Batch Normalization 1d
Please use compatible :class:`encoding.parallel.SelfDataParallel` and :class:`encoding.nn`
`Implementation ideas <./notes/syncbn.html>`_. Please use compatible :class:`encoding.parallel.SelfDataParallel` and :class:`encoding.nn`
Reference::
We provide this code for a comming paper.
......@@ -221,7 +222,8 @@ class BatchNorm1d(Module):
class BatchNorm2d(Module):
r"""Synchronized Batch Normalization 2d
Please use compatible :class:`encoding.parallel.SelfDataParallel` and :class:`encoding.nn`
`Implementation ideas <./notes/syncbn.html>`_. Please use compatible :class:`encoding.parallel.SelfDataParallel` and :class:`encoding.nn`.
Reference::
We provide this code for a comming paper.
......
......@@ -19,6 +19,9 @@ from torch.nn.parallel.scatter_gather import scatter, scatter_kwargs, \
from torch.nn.parallel.replicate import replicate
from torch.nn.parallel.parallel_apply import parallel_apply
__all__ = ['AllReduce', 'Broadcast', 'ModelDataParallel',
'CriterionDataParallel', 'SelfDataParallel']
def nccl_all_reduce(inputs):
# TODO, figure out why nccl all_reduce doesn't work for gradcheck
input_size = inputs[0].size()
......
......@@ -16,6 +16,8 @@ import time
import math
import tqdm
__all__ = ['get_optimizer', 'LR_Scheduler', 'save_checkpoint', 'progress_bar']
def get_optimizer(args, model, diff_LR=True):
"""
Returns an optimizer for given model,
......@@ -41,41 +43,56 @@ def get_optimizer(args, model, diff_LR=True):
return optimizer
class CosLR_Scheduler(object):
"""Cosine Learning Rate Scheduler
class LR_Scheduler(object):
"""Learning Rate Scheduler
Step mode: ``lr = baselr * 0.1 ^ {floor(epoch-1 / lr_step)}``
.. math::
lr = baselr * 0.5 * (1 + cos(iter/maxiter))
Cosine mode: ``lr = baselr * 0.5 * (1 + cos(iter/maxiter))``
Poly mode: ``lr = baselr * (1 - iter/maxiter) ^ 0.9``
Args:
args: base learning rate :attr:`args.lr`, number of epochs :attr:`args.epochs`
args: :attr:`args.lr_scheduler` lr scheduler mode (`cos`, `poly`), :attr:`args.lr` base learning rate, :attr:`args.epochs` number of epochs, :attr:`args.lr_step`
niters: number of iterations per epoch
"""
def __init__(self, args, niters):
def __init__(self, args, niters=0):
self.mode = args.lr_scheduler
print('Using {} LR Scheduler!'.format(self.mode))
self.lr = args.lr
self.niters = niters
self.N = args.epochs * niters
if self.mode == 'step':
self.lr_step = args.lr_step
else:
self.niters = niters
self.N = args.epochs * niters
self.epoch = -1
def __call__(self, optimizer, i, epoch, best_pred):
T = (epoch - 1) * self.niters + i
lr = 0.5 * self.lr * (1 + math.cos(1.0 * T / self.N * math.pi))
def __call__(self, optimizer, i, epoch):
if self.mode == 'cos':
T = (epoch - 1) * self.niters + i
lr = 0.5 * self.lr * (1 + math.cos(1.0 * T / self.N * math.pi))
elif self.mode == 'poly':
T = (epoch - 1) * self.niters + i
lr = self.lr * pow((1 - 1.0 * T / self.N), 0.9)
elif self.mode == 'step':
lr = self.lr * (0.1 ** ((epoch - 1) // self.lr_step))
else:
raise RuntimeError('Unknown LR scheduler!')
if epoch > self.epoch:
print('\n=>Epochs %i, learning rate = %.4f, previous best ='\
'%.3f%%' % (epoch, lr, best_pred))
print('\n=>Epoches %i, learning rate = %.4f' % (
epoch, lr))
self.epoch = epoch
self._adjust_learning_rate(optimizer, lr)
def _adjust_learning_rate(self, optimizer, lr):
if len(optimizer.param_groups) == 1:
optimizer.param_groups[0]['lr'] = lr
elif len(optimizer.param_groups) == 2:
else:
# enlarge the lr at the head
optimizer.param_groups[0]['lr'] = lr
optimizer.param_groups[1]['lr'] = lr * 10
else:
raise RuntimeError('unsupported number of param groups: {}' \
.format(len(optimizer.param_groups)))
for i in range(1,len(optimizer.param_groups)):
optimizer.param_groups[i]['lr'] = lr * 10
# refer to https://github.com/xternalz/WideResNet-pytorch
......
......@@ -10,8 +10,6 @@
import io
import os
import re
import sys
import subprocess
from setuptools import setup, find_packages
......@@ -55,6 +53,10 @@ setup(
# Exclude the build files.
packages=find_packages(exclude=["build"]),
# Package where to put the extensions. Has to be a prefix of build.py.
package_data={'encoding': [
'lib/*.so*', 'lib/*.dylib*',
'kernel/*.h', 'kernel/generic/*h',
]},
ext_package="",
# Extensions to compile.
cffi_modules=[
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment