"...git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "8d477daed507801a50dc9f285c982b1c8051ae2d"
Commit 528210a3 authored by Hang Zhang's avatar Hang Zhang
Browse files

update docs

parent 30be3799
...@@ -72,8 +72,7 @@ author = 'Hang Zhang' ...@@ -72,8 +72,7 @@ author = 'Hang Zhang'
# built documents. # built documents.
# #
# The short X.Y version. # The short X.Y version.
# TODO: change to [:2] at v1.0 version = 'master (0.0.1)'
version = '0.0.1'
# The full version, including alpha/beta/rc tags. # The full version, including alpha/beta/rc tags.
# TODO: verify this works as expected # TODO: verify this works as expected
release = 'master' release = 'master'
...@@ -115,7 +114,7 @@ html_theme_options = { ...@@ -115,7 +114,7 @@ html_theme_options = {
'logo_only': True, 'logo_only': True,
} }
html_logo = '_static/img/favicon.png' html_logo = '_static/img/icon.png'
# Add any paths that contain custom static files (such as style sheets) here, # Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files, # relative to this directory. They are copied after the builtin static files,
......
.. role:: hidden .. role:: hidden
:class: hidden-section :class: hidden-section
encoding Encoding Layer
======== ==============
.. automodule:: encoding .. automodule:: encoding
...@@ -33,4 +33,16 @@ Functions ...@@ -33,4 +33,16 @@ Functions
.. autoclass:: aggregate .. autoclass:: aggregate
:members: :members:
:hidden:`scaledL2`
~~~~~~~~~~~~~~~~~~~
.. autoclass:: scaledL2
:members:
:hidden:`residual`
~~~~~~~~~~~~~~~~~~~
.. autoclass:: residual
:members:
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
Encoding documentation Encoding documentation
====================== ======================
PyTorch-Encoding is an optimized PyTorch package using GPU, including Encoding Layer, Synchronized Batch Normalization. PyTorch-Encoding is an optimized PyTorch package using GPU, including Encoding Layer, Multi-GPU Synchronized Batch Normalization.
.. toctree:: .. toctree::
:glob: :glob:
...@@ -18,10 +18,11 @@ PyTorch-Encoding is an optimized PyTorch package using GPU, including Encoding L ...@@ -18,10 +18,11 @@ PyTorch-Encoding is an optimized PyTorch package using GPU, including Encoding L
notes/* notes/*
.. toctree:: .. toctree::
:maxdepth: 2 :maxdepth: 3
:caption: Package Reference :caption: Package Reference
encoding encoding
syncbn
Indices and tables Indices and tables
......
...@@ -7,3 +7,104 @@ which is extending :mod:`torch.nn` and ...@@ -7,3 +7,104 @@ which is extending :mod:`torch.nn` and
Torch C and CUDA Backend Torch C and CUDA Backend
------------------------ ------------------------
Given an example of the residual operation (in a mini-batch):
.. math::
r_{ik} = x_i - c_k
where the inputs are :math:`X=\{x_1, ...x_N\}` and :math:`C=\{c_1,...c_k\}` and the output is :math:`R=\{r_{ik}\}`.
- Add CUDA kernel function and expose a C API to the generic file ``encoding/kernel/generic/encoding_kernel.c`` using Torch generic files::
__global__ void Encoding_(Residual_Forward_kernel) (
THCDeviceTensor<real, 4> R,
THCDeviceTensor<real, 3> X,
THCDeviceTensor<real, 2> D)
/*
* residual forward kernel function
*/
{
/* declarations of the variables */
int b, k, d, i, K;
/* Get the index and channels */
b = blockIdx.z;
d = blockIdx.x * blockDim.x + threadIdx.x;
i = blockIdx.y * blockDim.y + threadIdx.y;
K = R.getSize(2);
/* boundary check for output */
if (d >= X.getSize(2) || i >= X.getSize(1)) return;
/* main operation */
for(k=0; k<K; k++) {
R[b][i][k][d] = X[b][i][d].ldg() - D[k][d].ldg();
}
}
void Encoding_(Residual_Forward)(
THCState *state, THCTensor *R_, THCTensor *X_, THCTensor *D_)
/*
* residual forward
*/
{
/* Check the GPU index and tensor dims*/
THCTensor_(checkGPU)(state, 3, R_, X_, D_);
if (THCTensor_(nDimension)(state, R_) != 4 ||
THCTensor_(nDimension)(state, X_) != 3 ||
THCTensor_(nDimension)(state, D_) != 2)
THError("Encoding: incorrect input dims. \n");
/* Device tensors */
THCDeviceTensor<real, 4> R = devicetensor<4>(state, R_);
THCDeviceTensor<real, 3> X = devicetensor<3>(state, X_);
THCDeviceTensor<real, 2> D = devicetensor<2>(state, D_);
/* kernel function */
cudaStream_t stream = THCState_getCurrentStream(state);
dim3 threads(16, 16);
dim3 blocks(X.getSize(2)/16+1, X.getSize(1)/16+1,
X.getSize(0));
Encoding_(Residual_Forward_kernel)<<<blocks, threads, 0, stream>>>(R, X, D);
THCudaCheck(cudaGetLastError());
}
- Add corresponding function header to ``encoding/kernel/generic/encoding_kernel.h``::
void Encoding_(Residual_Forward)(
THCState *state, THCTensor *R_, THCTensor *X_, THCTensor *D_);
- Add a CFFI function to ``encoding/src/generic/encoding_generic.c``, which calls the C API we just write::
int Encoding_(residual_forward)(THCTensor *R, THCTensor *X, THCTensor *D)
/*
* Residual operation
*/
{
Encoding_(Residual_Forward)(state, R, X, D);
/* C function return number of the outputs */
return 0;
}
- Add corresponding function header to ``encoding/src/encoding_lib.h``::
int Encoding_Float_residual_forward(THCudaTensor *R, THCudaTensor *X,
THCudaTensor *D);
- Finally, call this function using python::
class residual(Function):
def forward(self, X, C):
# X \in(BxNxD) D \in(KxD) R \in(BxNxKxD)
B, N, D = X.size()
K = C.size(0)
with torch.cuda.device_of(X):
R = X.new(B,N,K,D)
if isinstance(X, torch.cuda.FloatTensor):
with torch.cuda.device_of(X):
encoding_lib.Encoding_Float_residual_forward(R, X, C)
elif isinstance(X, torch.cuda.DoubleTensor):
with torch.cuda.device_of(X):
encoding_lib.Encoding_Double_residual_forward(R, X, C)
else:
raise RuntimeError('Unimplemented data type!')
return R
- Note this is just an example. You also need to implement backward function for ``residual`` operation.
.. role:: hidden
:class: hidden-section
Synchronized BatchNorm
======================
The current BN is implementated insynchronized accross the gpus, which is a big problem for memory consuming tasks such as Semantic Segmenation, since the mini-batch is very small.
To synchronize the batchnorm accross multiple gpus is not easy to implment within the current Dataparallel framework. We address this difficulty by making each layer 'self-parallel', that is accepting the inputs from multi-gpus. Therefore, we can handle different layers seperately for synchronizing it across gpus.
.. currentmodule:: encoding
Functions
---------
:hidden:`batchnormtrain`
~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: batchnormtrain
:members:
:hidden:`batchnormeval`
~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: batchnormeval
:members:
:hidden:`sum_square`
~~~~~~~~~~~~~~~~~~~~
.. autoclass:: sum_square
:members:
from .aggregate import * from .aggregate import aggregate, scaledL2, aggregateP, residual, square_squeeze, assign
...@@ -28,6 +28,14 @@ class aggregate(Function): ...@@ -28,6 +28,14 @@ class aggregate(Function):
- Input: :math:`A\in\mathcal{R}^{B\times N\times K}` :math:`X\in\mathcal{R}^{B\times N\times D}` :math:`C\in\mathcal{R}^{K\times D}` (where :math:`B` is batch, :math:`N` is total number of features, :math:`K` is number is codewords, :math:`D` is feature dimensions.) - Input: :math:`A\in\mathcal{R}^{B\times N\times K}` :math:`X\in\mathcal{R}^{B\times N\times D}` :math:`C\in\mathcal{R}^{K\times D}` (where :math:`B` is batch, :math:`N` is total number of features, :math:`K` is number is codewords, :math:`D` is feature dimensions.)
- Output: :math:`E\in\mathcal{R}^{B\times K\times D}` - Output: :math:`E\in\mathcal{R}^{B\times K\times D}`
Examples:
>>> B,N,K,D = 2,3,4,5
>>> A = Variable(torch.cuda.DoubleTensor(B,N,K).uniform_(-0.5,0.5), requires_grad=True)
>>> X = Variable(torch.cuda.DoubleTensor(B,N,D).uniform_(-0.5,0.5), requires_grad=True)
>>> C = Variable(torch.cuda.DoubleTensor(K,D).uniform_(-0.5,0.5), requires_grad=True)
>>> func = encoding.aggregate()
>>> E = func(A, X, C)
""" """
def forward(self, A, X, C): def forward(self, A, X, C):
# A \in(BxNxK) R \in(BxNxKxD) => E \in(BxNxD) # A \in(BxNxK) R \in(BxNxKxD) => E \in(BxNxD)
...@@ -67,7 +75,18 @@ class aggregate(Function): ...@@ -67,7 +75,18 @@ class aggregate(Function):
return gradA, gradX, gradC return gradA, gradX, gradC
class ScaledL2(Function): class scaledL2(Function):
r"""
scaledL2 distance
.. math::
sl_{ik} = s_k \|x_i-c_k\|^2
Shape:
- Input: :math:`X\in\mathcal{R}^{B\times N\times D}` :math:`C\in\mathcal{R}^{K\times D}` :math:`S\in \mathcal{R}^K` (where :math:`B` is batch, :math:`N` is total number of features, :math:`K` is number is codewords, :math:`D` is feature dimensions.)
- Output: :math:`E\in\mathcal{R}^{B\times N\times K}`
"""
def forward(self, X, C, S): def forward(self, X, C, S):
B,N,D = X.size() B,N,D = X.size()
K = C.size(0) K = C.size(0)
...@@ -140,6 +159,17 @@ class aggregateP(Function): ...@@ -140,6 +159,17 @@ class aggregateP(Function):
class residual(Function): class residual(Function):
r"""
Calculate residuals over a mini-batch
.. math::
r_{ik} = x_i - c_k
Shape:
- Input: :math:`X\in\mathcal{R}^{B\times N\times D}` :math:`C\in\mathcal{R}^{K\times D}` (where :math:`B` is batch, :math:`N` is total number of features, :math:`K` is number is codewords, :math:`D` is feature dimensions.)
- Output: :math:`R\in\mathcal{R}^{B\times N\times K\times D}`
"""
def forward(self, X, C): def forward(self, X, C):
# X \in(BxNxD) D \in(KxD) R \in(BxNxKxD) # X \in(BxNxD) D \in(KxD) R \in(BxNxKxD)
B, N, D = X.size() B, N, D = X.size()
......
...@@ -48,7 +48,7 @@ class Encoding(nn.Module): ...@@ -48,7 +48,7 @@ class Encoding(nn.Module):
>>> E = layer(X) >>> E = layer(X)
Reference: Reference:
Zhang, Hang, Jia Xue, and Kristin Dana. "Deep TEN: Texture Encoding Network." *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2017* Hang Zhang, Jia Xue, and Kristin Dana. "Deep TEN: Texture Encoding Network." *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2017*
""" """
def __init__(self, D, K): def __init__(self, D, K):
super(Encoding, self).__init__() super(Encoding, self).__init__()
...@@ -79,7 +79,7 @@ class Encoding(nn.Module): ...@@ -79,7 +79,7 @@ class Encoding(nn.Module):
else: else:
raise RuntimeError('Encoding Layer unknown input dims!') raise RuntimeError('Encoding Layer unknown input dims!')
# assignment weights # assignment weights
A = F.softmax(ScaledL2()(X, self.codewords, self.scale)) A = F.softmax(scaledL2()(X, self.codewords, self.scale))
# aggregate # aggregate
E = aggregate()(A, X, self.codewords) E = aggregate()(A, X, self.codewords)
return E return E
......
...@@ -16,6 +16,9 @@ from torch.autograd import Function, Variable ...@@ -16,6 +16,9 @@ from torch.autograd import Function, Variable
from ._ext import encoding_lib from ._ext import encoding_lib
class sum_square(Function): class sum_square(Function):
r"""
Calculate sum of elements and sum of squares for Batch Normalization.
"""
def forward(ctx, input): def forward(ctx, input):
ctx.save_for_backward(input) ctx.save_for_backward(input)
B,C,H,W = input.size() B,C,H,W = input.size()
...@@ -53,6 +56,20 @@ class sum_square(Function): ...@@ -53,6 +56,20 @@ class sum_square(Function):
class batchnormtrain(Function): class batchnormtrain(Function):
r"""Applies Batch Normalization over a 3d input that is seen as a
mini-batch.
.. _bencoding.atchnormtrain:
.. math::
y = \frac{x - \mu[x]}{ \sqrt{var[x] + \epsilon}} * \gamma + \beta
Shape:
- Input: :math:`(N, C)` or :math:`(N, C, L)`
- Output: :math:`(N, C)` or :math:`(N, C, L)` (same shape as input)
"""
def forward(ctx, input, gamma, beta, mean, std): def forward(ctx, input, gamma, beta, mean, std):
ctx.save_for_backward(input, gamma, beta, mean, std) ctx.save_for_backward(input, gamma, beta, mean, std)
assert(input.dim()==3) assert(input.dim()==3)
...@@ -99,6 +116,11 @@ class batchnormtrain(Function): ...@@ -99,6 +116,11 @@ class batchnormtrain(Function):
class batchnormeval(Function): class batchnormeval(Function):
r"""Applies Batch Normalization over a 3d input that is seen as a
mini-batch.
Please see encoding.batchnormtrain_
"""
def forward(ctx, input, gamma, beta, mean, std): def forward(ctx, input, gamma, beta, mean, std):
ctx.save_for_backward(input, gamma, beta, mean, std) ctx.save_for_backward(input, gamma, beta, mean, std)
assert(input.dim()==3) assert(input.dim()==3)
......
...@@ -37,7 +37,7 @@ def test_aggregate(): ...@@ -37,7 +37,7 @@ def test_aggregate():
print('Testing aggregate(): {}'.format(test)) print('Testing aggregate(): {}'.format(test))
def test_ScaledL2(): def test_scaledL2():
B,N,K,D = 2,3,4,5 B,N,K,D = 2,3,4,5
X = Variable(torch.cuda.DoubleTensor(B,N,D).uniform_(-0.5,0.5), X = Variable(torch.cuda.DoubleTensor(B,N,D).uniform_(-0.5,0.5),
requires_grad=True) requires_grad=True)
...@@ -46,8 +46,8 @@ def test_ScaledL2(): ...@@ -46,8 +46,8 @@ def test_ScaledL2():
S = Variable(torch.cuda.DoubleTensor(K).uniform_(-0.5,0.5), S = Variable(torch.cuda.DoubleTensor(K).uniform_(-0.5,0.5),
requires_grad=True) requires_grad=True)
input = (X, C, S) input = (X, C, S)
test = gradcheck(encoding.ScaledL2(), input, eps=1e-6, atol=1e-4) test = gradcheck(encoding.scaledL2(), input, eps=1e-6, atol=1e-4)
print('Testing ScaledL2(): {}'.format(test)) print('Testing scaledL2(): {}'.format(test))
def test_assign(): def test_assign():
...@@ -63,7 +63,7 @@ def test_assign(): ...@@ -63,7 +63,7 @@ def test_assign():
A1 = encoding.assign(R, S) A1 = encoding.assign(R, S)
E1 = encoding.aggregateP()(A1, R) E1 = encoding.aggregateP()(A1, R)
A2 = F.softmax(encoding.ScaledL2()(X,C,S)) A2 = F.softmax(encoding.scaledL2()(X,C,S))
E2 = encoding.aggregate()(A2, X, C) E2 = encoding.aggregate()(A2, X, C)
print('E1', E1) print('E1', E1)
...@@ -121,7 +121,7 @@ def test_sum_square(): ...@@ -121,7 +121,7 @@ def test_sum_square():
if __name__ == '__main__': if __name__ == '__main__':
test_aggregateP() test_aggregateP()
test_ScaledL2() test_scaledL2()
test_encoding() test_encoding()
test_aggregate() test_aggregate()
test_residual() test_residual()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment