Commit 30be3799 authored by Hang Zhang's avatar Hang Zhang
Browse files

update docs

parent 79d52ff9
.. role:: hidden
:class: hidden-section
encoding
========
.. automodule:: encoding
Modules
-------
.. automodule:: encoding.modules
.. currentmodule:: encoding.modules
.. currentmodule:: encoding
:hidden:`Encoding`
~~~~~~~~~~~~~~~~~~
.. autoclass:: Encoding
:members:
.. automodule:: encoding.functions
:hidden:`Aggregate`
~~~~~~~~~~~~~~~~~~~
.. autoclass:: Aggregate
:members:
Functions
---------
:hidden:`aggregate`
~~~~~~~~~~~~~~~~~~~
.. autoclass:: aggregate
:members:
.. automodule:: encoding.syncbn
......@@ -6,7 +6,7 @@
:github_url: https://github.com/zhanghang1989/PyTorch-Encoding
Encoding documentation
===================================
======================
PyTorch-Encoding is an optimized PyTorch package using GPU, including Encoding Layer, Synchronized Batch Normalization.
......@@ -18,7 +18,7 @@ PyTorch-Encoding is an optimized PyTorch package using GPU, including Encoding L
notes/*
.. toctree::
:maxdepth: 1
:maxdepth: 2
:caption: Package Reference
encoding
......
......@@ -8,416 +8,7 @@
## LICENSE file in the root directory of this source tree
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
import threading
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Function, Variable
from ._ext import encoding_lib
class aggregateE(Function):
def forward(self, A, X, C):
# A \in(BxNxK) R \in(BxNxKxD) => E \in(BxNxD)
self.save_for_backward(A, X, C)
B, N, K = A.size()
D = X.size(2)
with torch.cuda.device_of(A):
E = A.new(B,K,D)
if isinstance(A, torch.cuda.FloatTensor):
with torch.cuda.device_of(A):
encoding_lib.Encoding_Float_aggregateE_forward(E, A, X, C)
elif isinstance(A, torch.cuda.DoubleTensor):
with torch.cuda.device_of(A):
encoding_lib.Encoding_Double_aggregateE_forward(E, A, X, C)
else:
raise RuntimeError('Unimplemented data type!')
return E
def backward(self, gradE):
A, X, C = self.saved_tensors
with torch.cuda.device_of(A):
gradA = A.new().resize_as_(A)
gradX = A.new().resize_as_(X)
gradC = A.new().resize_as_(C)
if isinstance(A, torch.cuda.FloatTensor):
with torch.cuda.device_of(A):
encoding_lib.Encoding_Float_aggregateE_backward(gradA,
gradE, A, X, C)
elif isinstance(A, torch.cuda.DoubleTensor):
with torch.cuda.device_of(A):
encoding_lib.Encoding_Double_aggregateE_backward(gradA,
gradE, A, X, C)
else:
raise RuntimeError('Unimplemented data type!')
gradX.copy_(torch.bmm(A, gradE))
gradC.copy_((-gradE*A.sum(1).unsqueeze(2)).sum(0))
return gradA, gradX, gradC
class ScaledL2(Function):
def forward(self, X, C, S):
B,N,D = X.size()
K = C.size(0)
with torch.cuda.device_of(X):
SL = X.new(B,N,K)
if isinstance(X, torch.cuda.FloatTensor):
with torch.cuda.device_of(X):
encoding_lib.Encoding_Float_scaledl2_forward(SL, X, C, S)
elif isinstance(X, torch.cuda.DoubleTensor):
with torch.cuda.device_of(X):
encoding_lib.Encoding_Double_scaledl2_forward(SL, X, C, S)
else:
raise RuntimeError('Unimplemented data type!')
self.save_for_backward(X, C, S, SL)
return SL
def backward(self, gradSL):
X, C, S, SL = self.saved_tensors
K = C.size(0)
with torch.cuda.device_of(X):
gradX = X.new().resize_as_(X)
gradC = X.new().resize_as_(C)
gradS = X.new().resize_as_(S)
if isinstance(X, torch.cuda.FloatTensor):
with torch.cuda.device_of(X):
encoding_lib.Encoding_Float_scaledl2_backward(gradSL,
gradX, gradC, X, C, S)
elif isinstance(X, torch.cuda.DoubleTensor):
with torch.cuda.device_of(X):
encoding_lib.Encoding_Double_scaledl2_backward(gradSL,
gradX, gradC, X, C, S)
else:
raise RuntimeError('Unimplemented data type!')
gradS.copy_((gradSL*(SL/S.view(1,1,K))).sum(0).sum(0))
return gradX, gradC, gradS
class Encoding(nn.Module):
def __init__(self, D, K):
super(Encoding, self).__init__()
# init codewords and smoothing factor
self.D, self.K = D, K
self.codewords = nn.Parameter(torch.Tensor(K, D),
requires_grad=True)
self.scale = nn.Parameter(torch.Tensor(K), requires_grad=True)
self.reset_params()
def reset_params(self):
std1 = 1./((self.K*self.D)**(1/2))
std2 = 1./((self.K)**(1/2))
self.codewords.data.uniform_(-std1, std1)
self.scale.data.uniform_(-std2, std2)
def forward(self, X):
# input X is a 4D tensor
assert(X.size(1)==self.D,"Encoding Layer wrong channels!")
if X.dim() == 3:
# BxDxN
B, N, K, D = X.size(0), X.size(2), self.K, self.D
X = X.transpose(1,2).contiguous()
elif X.dim() == 4:
# BxDxHxW
B, N, K, D = X.size(0), X.size(2)*X.size(3), self.K, self.D
X = X.view(B,D,-1).transpose(1,2).contiguous()
else:
raise RuntimeError('Encoding Layer unknown input dims!')
# assignment weights
A = F.softmax(ScaledL2()(X, self.codewords, self.scale))
# aggregate
E = aggregateE()(A, X, self.codewords)
return E
def __repr__(self):
return self.__class__.__name__ + '(' \
+ 'N x ' + str(self.D) + '=>' + str(self.K) + 'x' \
+ str(self.D) + ')'
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
class aggregate(Function):
def forward(self, A, R):
# A \in(BxNxK) R \in(BxNxKxD) => E \in(BxNxD)
self.save_for_backward(A, R)
B, N, K, D = R.size()
with torch.cuda.device_of(A):
E = A.new(B,K,D)
if isinstance(A, torch.cuda.FloatTensor):
with torch.cuda.device_of(A):
encoding_lib.Encoding_Float_aggregate_forward(E, A, R)
elif isinstance(A, torch.cuda.DoubleTensor):
with torch.cuda.device_of(A):
encoding_lib.Encoding_Double_aggregate_forward(E, A, R)
else:
raise RuntimeError('Unimplemented data type!')
return E
def backward(self, gradE):
A, R = self.saved_tensors
with torch.cuda.device_of(A):
gradA = A.new().resize_as_(A)
gradR = R.new().resize_as_(R)
if isinstance(A, torch.cuda.FloatTensor):
with torch.cuda.device_of(A):
encoding_lib.Encoding_Float_aggregate_backward(gradA,
gradR, gradE, A, R)
elif isinstance(A, torch.cuda.DoubleTensor):
with torch.cuda.device_of(A):
encoding_lib.Encoding_Double_aggregate_backward(gradA,
gradR, gradE, A, R)
else:
raise RuntimeError('Unimplemented data type!')
return gradA, gradR
class residual(Function):
def forward(self, X, C):
# X \in(BxNxD) D \in(KxD) R \in(BxNxKxD)
B, N, D = X.size()
K = C.size(0)
with torch.cuda.device_of(X):
R = X.new(B,N,K,D)
if isinstance(X, torch.cuda.FloatTensor):
with torch.cuda.device_of(X):
encoding_lib.Encoding_Float_residual_forward(R, X, C)
elif isinstance(X, torch.cuda.DoubleTensor):
with torch.cuda.device_of(X):
encoding_lib.Encoding_Double_residual_forward(R, X, C)
else:
raise RuntimeError('Unimplemented data type!')
return R
def backward(self, gradR):
B, N, K, D = gradR.size()
with torch.cuda.device_of(gradR):
gradX = gradR.new(B,N,D)
gradD = gradR.new(K,D)
if isinstance(gradR, torch.cuda.FloatTensor):
with torch.cuda.device_of(gradR):
encoding_lib.Encoding_Float_residual_backward(gradR,
gradX, gradD)
elif isinstance(gradR, torch.cuda.DoubleTensor):
with torch.cuda.device_of(gradR):
encoding_lib.Encoding_Double_residual_backward(gradR,
gradX, gradD)
else:
raise RuntimeError('Unimplemented data type!')
return gradX, gradD
class square_squeeze(Function):
def forward(self, R):
B, N, K, D = R.size()
with torch.cuda.device_of(R):
L = R.new(B,N,K)
if isinstance(R, torch.cuda.FloatTensor):
with torch.cuda.device_of(R):
encoding_lib.Encoding_Float_squaresqueeze_forward(L, R)
elif isinstance(R, torch.cuda.DoubleTensor):
with torch.cuda.device_of(R):
encoding_lib.Encoding_Double_squaresqueeze_forward(L, R)
else:
raise RuntimeError('Unimplemented data type!')
self.save_for_backward(L, R)
return L
def backward(self, gradL):
L, R = self.saved_tensors
B, N, K, D = R.size()
with torch.cuda.device_of(R):
gradR = R.new(B,N,K,D)
if isinstance(R, torch.cuda.FloatTensor):
with torch.cuda.device_of(gradL):
encoding_lib.Encoding_Float_squaresqueeze_backward(gradL,
gradR, R)
elif isinstance(R, torch.cuda.DoubleTensor):
with torch.cuda.device_of(gradL):
encoding_lib.Encoding_Double_squaresqueeze_backward(gradL,
gradR, R)
else:
raise RuntimeError('Unimplemented data type!')
return gradR
def assign(R, S):
L = square_squeeze()(R)
K = S.size(0)
SL = L * S.view(1,1,K)
return F.softmax(SL)
class Aggregate(nn.Module):
def forward(self, A, R):
return aggregate()(A, R)
class EncodingP(nn.Module):
def __init__(self, D, K):
super(EncodingP, self).__init__()
# init codewords and smoothing factor
self.D, self.K = D, K
self.codewords = nn.Parameter(torch.Tensor(K, D),
requires_grad=True)
self.scale = nn.Parameter(torch.Tensor(K), requires_grad=True)
self.reset_params()
def reset_params(self):
std1 = 1./((self.K*self.D)**(1/2))
std2 = 1./((self.K)**(1/2))
self.codewords.data.uniform_(-std1, std1)
self.scale.data.uniform_(-std2, std2)
def forward(self, X):
# input X is a 4D tensor
assert(X.size(1)==self.D,"Encoding Layer wrong channels!")
if X.dim() == 3:
# BxDxN
B, N, K, D = X.size(0), X.size(2), self.K, self.D
X = X.transpose(1,2)
elif X.dim() == 4:
# BxDxHxW
B, N, K, D = X.size(0), X.size(2)*X.size(3), self.K, self.D
X = X.view(B,D,-1).transpose(1,2)
else:
raise RuntimeError('Encoding Layer unknown input dims!')
# calculate residuals
R = residual()(X.contiguous(), self.codewords)
# assignment weights
A = assign(R, self.scale)
# aggregate
E = aggregate()(A, R)
return E
def __repr__(self):
return self.__class__.__name__ + '(' \
+ 'N x ' + str(self.D) + '=>' + str(self.K) + 'x' \
+ str(self.D) + ')'
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
class sum_square(Function):
def forward(ctx, input):
ctx.save_for_backward(input)
B,C,H,W = input.size()
with torch.cuda.device_of(input):
xsum = input.new().resize_(C).zero_()
xsquare = input.new().resize_(C).zero_()
if isinstance(input, torch.cuda.FloatTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Float_sum_square_Forward(
input.view(B,C,-1), xsum, xsquare)
elif isinstance(input, torch.cuda.DoubleTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Double_sum_square_Forward(
input.view(B,C,-1), xsum, xsquare)
else:
raise RuntimeError('Unimplemented data type!')
return xsum, xsquare
def backward(ctx, gradSum, gradSquare):
input, = ctx.saved_tensors
B,C,H,W = input.size()
with torch.cuda.device_of(input):
gradInput = input.new().resize_(B,C,H*W).zero_()
if isinstance(input, torch.cuda.FloatTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Float_sum_square_Backward(
gradInput, input.view(B,C,-1), gradSum, gradSquare)
elif isinstance(input, torch.cuda.DoubleTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Double_sum_square_Backward(
gradInput, input.view(B,C,-1), gradSum, gradSquare)
else:
raise RuntimeError('Unimplemented data type!')
return gradInput.view(B,C,H,W)
class batchnormtrain(Function):
def forward(ctx, input, gamma, beta, mean, std):
ctx.save_for_backward(input, gamma, beta, mean, std)
assert(input.dim()==3)
with torch.cuda.device_of(input):
invstd = 1.0 / std
output = input.new().resize_as_(input)
if isinstance(input, torch.cuda.FloatTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Float_batchnorm_Forward(output,
input, mean, invstd, gamma, beta)
elif isinstance(input, torch.cuda.DoubleTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Double_batchnorm_Forward(output,
input, mean, invstd, gamma, beta)
else:
raise RuntimeError('Unimplemented data type!')
return output
def backward(ctx, gradOutput):
input, gamma, beta, mean, std = ctx.saved_tensors
invstd = 1.0 / std
with torch.cuda.device_of(input):
gradInput = gradOutput.new().resize_as_(input).zero_()
gradGamma = gradOutput.new().resize_as_(gamma).zero_()
gradBeta = gradOutput.new().resize_as_(beta).zero_()
gradMean = gradOutput.new().resize_as_(mean).zero_()
gradStd = gradOutput.new().resize_as_(std).zero_()
if isinstance(input, torch.cuda.FloatTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Float_batchnorm_Backward(
gradOutput, input, gradInput, gradGamma, gradBeta,
mean, invstd, gamma, beta, gradMean, gradStd,
True)
elif isinstance(input, torch.cuda.DoubleTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Double_batchnorm_Backward(
gradOutput, input, gradInput, gradGamma, gradBeta,
mean, invstd, gamma, beta, gradMean, gradStd,
True)
else:
raise RuntimeError('Unimplemented data type!')
return gradInput, gradGamma, gradBeta, gradMean, gradStd
class batchnormeval(Function):
def forward(ctx, input, gamma, beta, mean, std):
ctx.save_for_backward(input, gamma, beta, mean, std)
assert(input.dim()==3)
with torch.cuda.device_of(input):
invstd = 1.0 / std
output = input.new().resize_as_(input)
if isinstance(input, torch.cuda.FloatTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Float_batchnorm_Forward(output,
input, mean, invstd, gamma, beta)
elif isinstance(input, torch.cuda.DoubleTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Double_batchnorm_Forward(output,
input, mean, invstd, gamma, beta)
else:
raise RuntimeError('Unimplemented data type!')
return output
def backward(ctx, gradOutput):
input, gamma, beta, mean, std = ctx.saved_tensors
invstd = 1.0 / std
with torch.cuda.device_of(input):
gradInput = gradOutput.new().resize_as_(input).zero_()
gradGamma = gradOutput.new().resize_as_(gamma).zero_()
gradBeta = gradOutput.new().resize_as_(beta).zero_()
gradMean = gradOutput.new().resize_as_(mean).zero_()
gradStd = gradOutput.new().resize_as_(std).zero_()
if isinstance(input, torch.cuda.FloatTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Float_batchnorm_Backward(
gradOutput, input, gradInput, gradGamma, gradBeta,
mean, invstd, gamma, beta, gradMean, gradStd,
False)
elif isinstance(input, torch.cuda.DoubleTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Double_batchnorm_Backward(
gradOutput, input, gradInput, gradGamma, gradBeta,
mean, invstd, gamma, beta, gradMean, gradStd,
False)
else:
raise RuntimeError('Unimplemented data type!')
return gradInput, gradGamma, gradBeta, gradMean, gradStd
from .functions import *
from .modules import *
from .syncbn import *
from .aggregate import *
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
## Created by: Hang Zhang
## ECE Department, Rutgers University
## Email: zhang.hang@rutgers.edu
## Copyright (c) 2017
##
## This source code is licensed under the MIT-style license found in the
## LICENSE file in the root directory of this source tree
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
import threading
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Function, Variable
from .._ext import encoding_lib
class aggregate(Function):
r"""
Aggregate operation, aggregate the residuals of inputs (:math:`X`) with repect to the codewords (:math:`C`) with assignment weights (:math:`A`).
.. math::
e_{k} = \sum_{i=1}^{N} a_{ik} (x_i - d_k)
Shape:
- Input: :math:`A\in\mathcal{R}^{B\times N\times K}` :math:`X\in\mathcal{R}^{B\times N\times D}` :math:`C\in\mathcal{R}^{K\times D}` (where :math:`B` is batch, :math:`N` is total number of features, :math:`K` is number is codewords, :math:`D` is feature dimensions.)
- Output: :math:`E\in\mathcal{R}^{B\times K\times D}`
"""
def forward(self, A, X, C):
# A \in(BxNxK) R \in(BxNxKxD) => E \in(BxNxD)
self.save_for_backward(A, X, C)
B, N, K = A.size()
D = X.size(2)
with torch.cuda.device_of(A):
E = A.new(B,K,D)
if isinstance(A, torch.cuda.FloatTensor):
with torch.cuda.device_of(A):
encoding_lib.Encoding_Float_aggregateE_forward(E, A, X, C)
elif isinstance(A, torch.cuda.DoubleTensor):
with torch.cuda.device_of(A):
encoding_lib.Encoding_Double_aggregateE_forward(E, A, X, C)
else:
raise RuntimeError('Unimplemented data type!')
return E
def backward(self, gradE):
A, X, C = self.saved_tensors
with torch.cuda.device_of(A):
gradA = A.new().resize_as_(A)
gradX = A.new().resize_as_(X)
gradC = A.new().resize_as_(C)
if isinstance(A, torch.cuda.FloatTensor):
with torch.cuda.device_of(A):
encoding_lib.Encoding_Float_aggregateE_backward(gradA,
gradE, A, X, C)
elif isinstance(A, torch.cuda.DoubleTensor):
with torch.cuda.device_of(A):
encoding_lib.Encoding_Double_aggregateE_backward(gradA,
gradE, A, X, C)
else:
raise RuntimeError('Unimplemented data type!')
gradX.copy_(torch.bmm(A, gradE))
gradC.copy_((-gradE*A.sum(1).unsqueeze(2)).sum(0))
return gradA, gradX, gradC
class ScaledL2(Function):
def forward(self, X, C, S):
B,N,D = X.size()
K = C.size(0)
with torch.cuda.device_of(X):
SL = X.new(B,N,K)
if isinstance(X, torch.cuda.FloatTensor):
with torch.cuda.device_of(X):
encoding_lib.Encoding_Float_scaledl2_forward(SL, X, C, S)
elif isinstance(X, torch.cuda.DoubleTensor):
with torch.cuda.device_of(X):
encoding_lib.Encoding_Double_scaledl2_forward(SL, X, C, S)
else:
raise RuntimeError('Unimplemented data type!')
self.save_for_backward(X, C, S, SL)
return SL
def backward(self, gradSL):
X, C, S, SL = self.saved_tensors
K = C.size(0)
with torch.cuda.device_of(X):
gradX = X.new().resize_as_(X)
gradC = X.new().resize_as_(C)
gradS = X.new().resize_as_(S)
if isinstance(X, torch.cuda.FloatTensor):
with torch.cuda.device_of(X):
encoding_lib.Encoding_Float_scaledl2_backward(gradSL,
gradX, gradC, X, C, S)
elif isinstance(X, torch.cuda.DoubleTensor):
with torch.cuda.device_of(X):
encoding_lib.Encoding_Double_scaledl2_backward(gradSL,
gradX, gradC, X, C, S)
else:
raise RuntimeError('Unimplemented data type!')
gradS.copy_((gradSL*(SL/S.view(1,1,K))).sum(0).sum(0))
return gradX, gradC, gradS
class aggregateP(Function):
def forward(self, A, R):
# A \in(BxNxK) R \in(BxNxKxD) => E \in(BxNxD)
self.save_for_backward(A, R)
B, N, K, D = R.size()
with torch.cuda.device_of(A):
E = A.new(B,K,D)
if isinstance(A, torch.cuda.FloatTensor):
with torch.cuda.device_of(A):
encoding_lib.Encoding_Float_aggregate_forward(E, A, R)
elif isinstance(A, torch.cuda.DoubleTensor):
with torch.cuda.device_of(A):
encoding_lib.Encoding_Double_aggregate_forward(E, A, R)
else:
raise RuntimeError('Unimplemented data type!')
return E
def backward(self, gradE):
A, R = self.saved_tensors
with torch.cuda.device_of(A):
gradA = A.new().resize_as_(A)
gradR = R.new().resize_as_(R)
if isinstance(A, torch.cuda.FloatTensor):
with torch.cuda.device_of(A):
encoding_lib.Encoding_Float_aggregate_backward(gradA,
gradR, gradE, A, R)
elif isinstance(A, torch.cuda.DoubleTensor):
with torch.cuda.device_of(A):
encoding_lib.Encoding_Double_aggregate_backward(gradA,
gradR, gradE, A, R)
else:
raise RuntimeError('Unimplemented data type!')
return gradA, gradR
class residual(Function):
def forward(self, X, C):
# X \in(BxNxD) D \in(KxD) R \in(BxNxKxD)
B, N, D = X.size()
K = C.size(0)
with torch.cuda.device_of(X):
R = X.new(B,N,K,D)
if isinstance(X, torch.cuda.FloatTensor):
with torch.cuda.device_of(X):
encoding_lib.Encoding_Float_residual_forward(R, X, C)
elif isinstance(X, torch.cuda.DoubleTensor):
with torch.cuda.device_of(X):
encoding_lib.Encoding_Double_residual_forward(R, X, C)
else:
raise RuntimeError('Unimplemented data type!')
return R
def backward(self, gradR):
B, N, K, D = gradR.size()
with torch.cuda.device_of(gradR):
gradX = gradR.new(B,N,D)
gradD = gradR.new(K,D)
if isinstance(gradR, torch.cuda.FloatTensor):
with torch.cuda.device_of(gradR):
encoding_lib.Encoding_Float_residual_backward(gradR,
gradX, gradD)
elif isinstance(gradR, torch.cuda.DoubleTensor):
with torch.cuda.device_of(gradR):
encoding_lib.Encoding_Double_residual_backward(gradR,
gradX, gradD)
else:
raise RuntimeError('Unimplemented data type!')
return gradX, gradD
class square_squeeze(Function):
def forward(self, R):
B, N, K, D = R.size()
with torch.cuda.device_of(R):
L = R.new(B,N,K)
if isinstance(R, torch.cuda.FloatTensor):
with torch.cuda.device_of(R):
encoding_lib.Encoding_Float_squaresqueeze_forward(L, R)
elif isinstance(R, torch.cuda.DoubleTensor):
with torch.cuda.device_of(R):
encoding_lib.Encoding_Double_squaresqueeze_forward(L, R)
else:
raise RuntimeError('Unimplemented data type!')
self.save_for_backward(L, R)
return L
def backward(self, gradL):
L, R = self.saved_tensors
B, N, K, D = R.size()
with torch.cuda.device_of(R):
gradR = R.new(B,N,K,D)
if isinstance(R, torch.cuda.FloatTensor):
with torch.cuda.device_of(gradL):
encoding_lib.Encoding_Float_squaresqueeze_backward(gradL,
gradR, R)
elif isinstance(R, torch.cuda.DoubleTensor):
with torch.cuda.device_of(gradL):
encoding_lib.Encoding_Double_squaresqueeze_backward(gradL,
gradR, R)
else:
raise RuntimeError('Unimplemented data type!')
return gradR
def assign(R, S):
L = square_squeeze()(R)
K = S.size(0)
SL = L * S.view(1,1,K)
return F.softmax(SL)
from .encoding import *
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
## Created by: Hang Zhang
## ECE Department, Rutgers University
## Email: zhang.hang@rutgers.edu
## Copyright (c) 2017
##
## This source code is licensed under the MIT-style license found in the
## LICENSE file in the root directory of this source tree
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
import threading
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Function, Variable
from .._ext import encoding_lib
from ..functions import *
class Encoding(nn.Module):
r"""
Encoding Layer: learnable residual encoders over 3d or 4d input that is seen as a mini-batch.
.. math::
a_{ik} = \frac{exp(-\beta\|x_{i}-c_k\|^2)}{\sum_{j=1}^K exp(-\beta\|x_{i}-c_j\|^2)}
Args:
D: dimention of the features or feature channels
K: number of codeswords
Shape:
- Input: :math:`X\in\mathcal{R}^{B\times N\times D}` or :math:`\mathcal{R}^{B\times D\times H\times W}` (where :math:`B` is batch, :math:`N` is total number of features or :math:`H\times W`.)
- Output: :math:`E\in\mathcal{R}^{B\times K\times D}`
Attributes:
codewords (Tensor): the learnable codewords of shape (:math:`K\times D`)
scale (Tensor): the learnable scale factor of visual centers
Examples:
>>> import encoding
>>> import torch
>>> import torch.nn.functional as F
>>> from torch.autograd import Variable, gradcheck
>>> B,C,H,W,K = 2,3,4,5,6
>>> X = Variable(torch.cuda.DoubleTensor(B,C,H,W).uniform_(-0.5,0.5), requires_grad=True)
>>> layer = encoding.Encoding(C,K).double().cuda()
>>> E = layer(X)
Reference:
Zhang, Hang, Jia Xue, and Kristin Dana. "Deep TEN: Texture Encoding Network." *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2017*
"""
def __init__(self, D, K):
super(Encoding, self).__init__()
# init codewords and smoothing factor
self.D, self.K = D, K
self.codewords = nn.Parameter(torch.Tensor(K, D),
requires_grad=True)
self.scale = nn.Parameter(torch.Tensor(K), requires_grad=True)
self.reset_params()
def reset_params(self):
std1 = 1./((self.K*self.D)**(1/2))
std2 = 1./((self.K)**(1/2))
self.codewords.data.uniform_(-std1, std1)
self.scale.data.uniform_(-std2, std2)
def forward(self, X):
# input X is a 4D tensor
assert(X.size(1)==self.D,"Encoding Layer wrong channels!")
if X.dim() == 3:
# BxDxN
B, N, K, D = X.size(0), X.size(2), self.K, self.D
X = X.transpose(1,2).contiguous()
elif X.dim() == 4:
# BxDxHxW
B, N, K, D = X.size(0), X.size(2)*X.size(3), self.K, self.D
X = X.view(B,D,-1).transpose(1,2).contiguous()
else:
raise RuntimeError('Encoding Layer unknown input dims!')
# assignment weights
A = F.softmax(ScaledL2()(X, self.codewords, self.scale))
# aggregate
E = aggregate()(A, X, self.codewords)
return E
def __repr__(self):
return self.__class__.__name__ + '(' \
+ 'N x ' + str(self.D) + '=>' + str(self.K) + 'x' \
+ str(self.D) + ')'
class Aggregate(nn.Module):
r"""
Aggregate operation, aggregate the residuals (:math:`R`) with assignment weights (:math:`A`).
.. math::
e_{k} = \sum_{i=1}^{N} a_{ik} (r_{ik})
Shape:
- Input: :math:`A\in\mathcal{R}^{B\times N\times K}` :math:`R\in\mathcal{R}^{B\times N\times K\times D}` (where :math:`B` is batch, :math:`N` is total number of features, :math:`K` is number is codewords, :math:`D` is feature dimensions.)
- Output: :math:`E\in\mathcal{R}^{B\times K\times D}`
"""
def forward(self, A, R):
return aggregateP()(A, R)
class EncodingP(nn.Module):
def __init__(self, D, K):
super(EncodingP, self).__init__()
# init codewords and smoothing factor
self.D, self.K = D, K
self.codewords = nn.Parameter(torch.Tensor(K, D),
requires_grad=True)
self.scale = nn.Parameter(torch.Tensor(K), requires_grad=True)
self.reset_params()
print('EncodingP is deprecated, please use Encoding.')
def reset_params(self):
std1 = 1./((self.K*self.D)**(1/2))
std2 = 1./((self.K)**(1/2))
self.codewords.data.uniform_(-std1, std1)
self.scale.data.uniform_(-std2, std2)
def forward(self, X):
# input X is a 4D tensor
assert(X.size(1)==self.D,"Encoding Layer wrong channels!")
if X.dim() == 3:
# BxDxN
B, N, K, D = X.size(0), X.size(2), self.K, self.D
X = X.transpose(1,2)
elif X.dim() == 4:
# BxDxHxW
B, N, K, D = X.size(0), X.size(2)*X.size(3), self.K, self.D
X = X.view(B,D,-1).transpose(1,2)
else:
raise RuntimeError('Encoding Layer unknown input dims!')
# calculate residuals
R = residual()(X.contiguous(), self.codewords)
# assignment weights
A = assign(R, self.scale)
# aggregate
E = aggregateP()(A, R)
return E
def __repr__(self):
return self.__class__.__name__ + '(' \
+ 'N x ' + str(self.D) + '=>' + str(self.K) + 'x' \
+ str(self.D) + ')'
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
## Created by: Hang Zhang
## ECE Department, Rutgers University
## Email: zhang.hang@rutgers.edu
## Copyright (c) 2017
##
## This source code is licensed under the MIT-style license found in the
## LICENSE file in the root directory of this source tree
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
import threading
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Function, Variable
from ._ext import encoding_lib
class sum_square(Function):
def forward(ctx, input):
ctx.save_for_backward(input)
B,C,H,W = input.size()
with torch.cuda.device_of(input):
xsum = input.new().resize_(C).zero_()
xsquare = input.new().resize_(C).zero_()
if isinstance(input, torch.cuda.FloatTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Float_sum_square_Forward(
input.view(B,C,-1), xsum, xsquare)
elif isinstance(input, torch.cuda.DoubleTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Double_sum_square_Forward(
input.view(B,C,-1), xsum, xsquare)
else:
raise RuntimeError('Unimplemented data type!')
return xsum, xsquare
def backward(ctx, gradSum, gradSquare):
input, = ctx.saved_tensors
B,C,H,W = input.size()
with torch.cuda.device_of(input):
gradInput = input.new().resize_(B,C,H*W).zero_()
if isinstance(input, torch.cuda.FloatTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Float_sum_square_Backward(
gradInput, input.view(B,C,-1), gradSum, gradSquare)
elif isinstance(input, torch.cuda.DoubleTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Double_sum_square_Backward(
gradInput, input.view(B,C,-1), gradSum, gradSquare)
else:
raise RuntimeError('Unimplemented data type!')
return gradInput.view(B,C,H,W)
class batchnormtrain(Function):
def forward(ctx, input, gamma, beta, mean, std):
ctx.save_for_backward(input, gamma, beta, mean, std)
assert(input.dim()==3)
with torch.cuda.device_of(input):
invstd = 1.0 / std
output = input.new().resize_as_(input)
if isinstance(input, torch.cuda.FloatTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Float_batchnorm_Forward(output,
input, mean, invstd, gamma, beta)
elif isinstance(input, torch.cuda.DoubleTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Double_batchnorm_Forward(output,
input, mean, invstd, gamma, beta)
else:
raise RuntimeError('Unimplemented data type!')
return output
def backward(ctx, gradOutput):
input, gamma, beta, mean, std = ctx.saved_tensors
invstd = 1.0 / std
with torch.cuda.device_of(input):
gradInput = gradOutput.new().resize_as_(input).zero_()
gradGamma = gradOutput.new().resize_as_(gamma).zero_()
gradBeta = gradOutput.new().resize_as_(beta).zero_()
gradMean = gradOutput.new().resize_as_(mean).zero_()
gradStd = gradOutput.new().resize_as_(std).zero_()
if isinstance(input, torch.cuda.FloatTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Float_batchnorm_Backward(
gradOutput, input, gradInput, gradGamma, gradBeta,
mean, invstd, gamma, beta, gradMean, gradStd,
True)
elif isinstance(input, torch.cuda.DoubleTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Double_batchnorm_Backward(
gradOutput, input, gradInput, gradGamma, gradBeta,
mean, invstd, gamma, beta, gradMean, gradStd,
True)
else:
raise RuntimeError('Unimplemented data type!')
return gradInput, gradGamma, gradBeta, gradMean, gradStd
class batchnormeval(Function):
def forward(ctx, input, gamma, beta, mean, std):
ctx.save_for_backward(input, gamma, beta, mean, std)
assert(input.dim()==3)
with torch.cuda.device_of(input):
invstd = 1.0 / std
output = input.new().resize_as_(input)
if isinstance(input, torch.cuda.FloatTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Float_batchnorm_Forward(output,
input, mean, invstd, gamma, beta)
elif isinstance(input, torch.cuda.DoubleTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Double_batchnorm_Forward(output,
input, mean, invstd, gamma, beta)
else:
raise RuntimeError('Unimplemented data type!')
return output
def backward(ctx, gradOutput):
input, gamma, beta, mean, std = ctx.saved_tensors
invstd = 1.0 / std
with torch.cuda.device_of(input):
gradInput = gradOutput.new().resize_as_(input).zero_()
gradGamma = gradOutput.new().resize_as_(gamma).zero_()
gradBeta = gradOutput.new().resize_as_(beta).zero_()
gradMean = gradOutput.new().resize_as_(mean).zero_()
gradStd = gradOutput.new().resize_as_(std).zero_()
if isinstance(input, torch.cuda.FloatTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Float_batchnorm_Backward(
gradOutput, input, gradInput, gradGamma, gradBeta,
mean, invstd, gamma, beta, gradMean, gradStd,
False)
elif isinstance(input, torch.cuda.DoubleTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Double_batchnorm_Backward(
gradOutput, input, gradInput, gradGamma, gradBeta,
mean, invstd, gamma, beta, gradMean, gradStd,
False)
else:
raise RuntimeError('Unimplemented data type!')
return gradInput, gradGamma, gradBeta, gradMean, gradStd
......@@ -13,18 +13,18 @@ import torch
import torch.nn.functional as F
from torch.autograd import Variable, gradcheck
def test_aggregate():
def test_aggregateP():
B,N,K,D = 2,3,4,5
A = Variable(torch.cuda.DoubleTensor(B,N,K).uniform_(-0.5,0.5),
requires_grad=True)
R = Variable(torch.cuda.DoubleTensor(B,N,K,D).uniform_(-0.5,0.5),
requires_grad=True)
input = (A, R)
test = gradcheck(encoding.aggregate(), input, eps=1e-6, atol=1e-4)
test = gradcheck(encoding.aggregateP(), input, eps=1e-6, atol=1e-4)
print('Testing aggregate(): {}'.format(test))
def test_aggregateE():
def test_aggregate():
B,N,K,D = 2,3,4,5
A = Variable(torch.cuda.DoubleTensor(B,N,K).uniform_(-0.5,0.5),
requires_grad=True)
......@@ -33,8 +33,8 @@ def test_aggregateE():
C = Variable(torch.cuda.DoubleTensor(K,D).uniform_(-0.5,0.5),
requires_grad=True)
input = (A, X, C)
test = gradcheck(encoding.aggregateE(), input, eps=1e-6, atol=1e-4)
print('Testing aggregateE(): {}'.format(test))
test = gradcheck(encoding.aggregate(), input, eps=1e-6, atol=1e-4)
print('Testing aggregate(): {}'.format(test))
def test_ScaledL2():
......@@ -61,10 +61,10 @@ def test_assign():
R = encoding.residual()(X, C)
A1 = encoding.assign(R, S)
E1 = encoding.aggregate()(A1, R)
E1 = encoding.aggregateP()(A1, R)
A2 = F.softmax(encoding.ScaledL2()(X,C,S))
E2 = encoding.aggregateE()(A2, X, C)
E2 = encoding.aggregate()(A2, X, C)
print('E1', E1)
print('E2', E2)
......@@ -120,7 +120,7 @@ def test_sum_square():
if __name__ == '__main__':
test_aggregateE()
test_aggregateP()
test_ScaledL2()
test_encoding()
test_aggregate()
......@@ -129,3 +129,4 @@ if __name__ == '__main__':
test_square_squeeze()
test_encodingP()
test_sum_square()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment