"docs/git@developer.sourcefind.cn:OpenDAS/torchaudio.git" did not exist on "e7cb18c10b7b15e11b4c0f7994ad62379debdc38"
Commit 1177a80b authored by Hang Zhang's avatar Hang Zhang
Browse files

memory efficient implementation and test script

parent 8dd870b1
......@@ -28,9 +28,11 @@ else:
os.environ['THC_LIBRARIES'] = os.path.join(lib_path,'libTHC.so.1')
ENCODING_LIB = os.path.join(lib_path, 'libENCODING.so')
clean_cmd = ['bash', 'clean.sh']
subprocess.check_call(clean_cmd)
build_all_cmd = ['bash', 'encoding/make.sh']
if subprocess.call(build_all_cmd, env=dict(os.environ)) != 0:
sys.exit(1)
subprocess.check_call(build_all_cmd, env=dict(os.environ))
sources = ['encoding/src/encoding_lib.cpp']
headers = ['encoding/src/encoding_lib.h']
......
......@@ -10,55 +10,253 @@
import threading
import torch
import torch.cuda.nccl as nccl
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Function, Variable
from torch.nn.parameter import Parameter
from ._ext import encoding_lib
class aggregateE(Function):
def forward(self, A, X, C):
# A \in(BxNxK) R \in(BxNxKxD) => E \in(BxNxD)
self.save_for_backward(A, X, C)
B, N, K = A.size()
D = X.size(2)
with torch.cuda.device_of(A):
E = A.new(B,K,D)
if isinstance(A, torch.cuda.FloatTensor):
with torch.cuda.device_of(A):
encoding_lib.Encoding_Float_aggregateE_forward(E, A, X, C)
elif isinstance(A, torch.cuda.DoubleTensor):
with torch.cuda.device_of(A):
encoding_lib.Encoding_Double_aggregateE_forward(E, A, X, C)
else:
raise RuntimeError('Unimplemented data type!')
return E
def backward(self, gradE):
A, X, C = self.saved_tensors
with torch.cuda.device_of(A):
gradA = A.new().resize_as_(A)
gradX = A.new().resize_as_(X)
gradC = A.new().resize_as_(C)
if isinstance(A, torch.cuda.FloatTensor):
with torch.cuda.device_of(A):
encoding_lib.Encoding_Float_aggregateE_backward(gradA,
gradE, A, X, C)
elif isinstance(A, torch.cuda.DoubleTensor):
with torch.cuda.device_of(A):
encoding_lib.Encoding_Double_aggregateE_backward(gradA,
gradE, A, X, C)
else:
raise RuntimeError('Unimplemented data type!')
gradX.copy_(torch.bmm(A, gradE))
gradC.copy_((-gradE*A.sum(1).unsqueeze(2)).sum(0))
return gradA, gradX, gradC
class ScaledL2(Function):
def forward(self, X, C, S):
B,N,D = X.size()
K = C.size(0)
with torch.cuda.device_of(X):
SL = X.new(B,N,K)
if isinstance(X, torch.cuda.FloatTensor):
with torch.cuda.device_of(X):
encoding_lib.Encoding_Float_scaledl2_forward(SL, X, C, S)
elif isinstance(X, torch.cuda.DoubleTensor):
with torch.cuda.device_of(X):
encoding_lib.Encoding_Double_scaledl2_forward(SL, X, C, S)
else:
raise RuntimeError('Unimplemented data type!')
self.save_for_backward(X, C, S, SL)
return SL
def backward(self, gradSL):
X, C, S, SL = self.saved_tensors
K = C.size(0)
with torch.cuda.device_of(X):
gradX = X.new().resize_as_(X)
gradC = X.new().resize_as_(C)
gradS = X.new().resize_as_(S)
if isinstance(X, torch.cuda.FloatTensor):
with torch.cuda.device_of(X):
encoding_lib.Encoding_Float_scaledl2_backward(gradSL,
gradX, gradC, X, C, S)
elif isinstance(X, torch.cuda.DoubleTensor):
with torch.cuda.device_of(X):
encoding_lib.Encoding_Double_scaledl2_backward(gradSL,
gradX, gradC, X, C, S)
else:
raise RuntimeError('Unimplemented data type!')
gradS.copy_((gradSL*(SL/S.view(1,1,K))).sum(0).sum(0))
return gradX, gradC, gradS
class Encoding(nn.Module):
def __init__(self, D, K):
super(Encoding, self).__init__()
# init codewords and smoothing factor
self.D, self.K = D, K
self.codewords = nn.Parameter(torch.Tensor(K, D),
requires_grad=True)
self.scale = nn.Parameter(torch.Tensor(K), requires_grad=True)
self.reset_params()
def reset_params(self):
std1 = 1./((self.K*self.D)**(1/2))
std2 = 1./((self.K)**(1/2))
self.codewords.data.uniform_(-std1, std1)
self.scale.data.uniform_(-std2, std2)
def forward(self, X):
# input X is a 4D tensor
assert(X.size(1)==self.D,"Encoding Layer wrong channels!")
if X.dim() == 3:
# BxDxN
B, N, K, D = X.size(0), X.size(2), self.K, self.D
X = X.transpose(1,2).contiguous()
elif X.dim() == 4:
# BxDxHxW
B, N, K, D = X.size(0), X.size(2)*X.size(3), self.K, self.D
X = X.view(B,D,-1).transpose(1,2).contiguous()
else:
raise RuntimeError('Encoding Layer unknown input dims!')
# assignment weights
A = F.softmax(ScaledL2()(X, self.codewords, self.scale))
# aggregate
E = aggregateE()(A, X, self.codewords)
return E
def __repr__(self):
return self.__class__.__name__ + '(' \
+ 'N x ' + str(self.D) + '=>' + str(self.K) + 'x' \
+ str(self.D) + ')'
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
class aggregate(Function):
def forward(self, A, R):
# A \in(BxNxK) R \in(BxNxKxD) => E \in(BxNxD)
self.save_for_backward(A, R)
B, N, K, D = R.size()
E = A.new(B,K,D)
# TODO support cpu backend
with torch.cuda.device_of(A):
E = A.new(B,K,D)
if isinstance(A, torch.cuda.FloatTensor):
encoding_lib.Encoding_Float_aggregate_forward(E, A, R)
with torch.cuda.device_of(A):
encoding_lib.Encoding_Float_aggregate_forward(E, A, R)
elif isinstance(A, torch.cuda.DoubleTensor):
encoding_lib.Encoding_Double_aggregate_forward(E, A, R)
with torch.cuda.device_of(A):
encoding_lib.Encoding_Double_aggregate_forward(E, A, R)
else:
raise RuntimeError('unimplemented')
raise RuntimeError('Unimplemented data type!')
return E
def backward(self, gradE):
A, R = self.saved_tensors
gradA = A.new().resize_as_(A)
gradR = R.new().resize_as_(R)
with torch.cuda.device_of(A):
gradA = A.new().resize_as_(A)
gradR = R.new().resize_as_(R)
if isinstance(A, torch.cuda.FloatTensor):
encoding_lib.Encoding_Float_aggregate_backward(gradA, gradR, gradE,
A, R)
with torch.cuda.device_of(A):
encoding_lib.Encoding_Float_aggregate_backward(gradA,
gradR, gradE, A, R)
elif isinstance(A, torch.cuda.DoubleTensor):
encoding_lib.Encoding_Double_aggregate_backward(gradA, gradR, gradE,
A, R)
with torch.cuda.device_of(A):
encoding_lib.Encoding_Double_aggregate_backward(gradA,
gradR, gradE, A, R)
else:
raise RuntimeError('unimplemented')
raise RuntimeError('Unimplemented data type!')
return gradA, gradR
class residual(Function):
def forward(self, X, C):
# X \in(BxNxD) D \in(KxD) R \in(BxNxKxD)
B, N, D = X.size()
K = C.size(0)
with torch.cuda.device_of(X):
R = X.new(B,N,K,D)
if isinstance(X, torch.cuda.FloatTensor):
with torch.cuda.device_of(X):
encoding_lib.Encoding_Float_residual_forward(R, X, C)
elif isinstance(X, torch.cuda.DoubleTensor):
with torch.cuda.device_of(X):
encoding_lib.Encoding_Double_residual_forward(R, X, C)
else:
raise RuntimeError('Unimplemented data type!')
return R
def backward(self, gradR):
B, N, K, D = gradR.size()
with torch.cuda.device_of(gradR):
gradX = gradR.new(B,N,D)
gradD = gradR.new(K,D)
if isinstance(gradR, torch.cuda.FloatTensor):
with torch.cuda.device_of(gradR):
encoding_lib.Encoding_Float_residual_backward(gradR,
gradX, gradD)
elif isinstance(gradR, torch.cuda.DoubleTensor):
with torch.cuda.device_of(gradR):
encoding_lib.Encoding_Double_residual_backward(gradR,
gradX, gradD)
else:
raise RuntimeError('Unimplemented data type!')
return gradX, gradD
class square_squeeze(Function):
def forward(self, R):
B, N, K, D = R.size()
with torch.cuda.device_of(R):
L = R.new(B,N,K)
if isinstance(R, torch.cuda.FloatTensor):
with torch.cuda.device_of(R):
encoding_lib.Encoding_Float_squaresqueeze_forward(L, R)
elif isinstance(R, torch.cuda.DoubleTensor):
with torch.cuda.device_of(R):
encoding_lib.Encoding_Double_squaresqueeze_forward(L, R)
else:
raise RuntimeError('Unimplemented data type!')
self.save_for_backward(L, R)
return L
def backward(self, gradL):
L, R = self.saved_tensors
B, N, K, D = R.size()
with torch.cuda.device_of(R):
gradR = R.new(B,N,K,D)
if isinstance(R, torch.cuda.FloatTensor):
with torch.cuda.device_of(gradL):
encoding_lib.Encoding_Float_squaresqueeze_backward(gradL,
gradR, R)
elif isinstance(R, torch.cuda.DoubleTensor):
with torch.cuda.device_of(gradL):
encoding_lib.Encoding_Double_squaresqueeze_backward(gradL,
gradR, R)
else:
raise RuntimeError('Unimplemented data type!')
return gradR
def assign(R, S):
L = square_squeeze()(R)
K = S.size(0)
SL = L * S.view(1,1,K)
return F.softmax(SL)
class Aggregate(nn.Module):
def forward(self, A, R):
return aggregate()(A, R)
class Encoding(nn.Module):
class EncodingP(nn.Module):
def __init__(self, D, K):
super(Encoding, self).__init__()
super(EncodingP, self).__init__()
# init codewords and smoothing factor
self.D, self.K = D, K
self.codewords = nn.Parameter(torch.Tensor(K, D), requires_grad=True)
self.codewords = nn.Parameter(torch.Tensor(K, D),
requires_grad=True)
self.scale = nn.Parameter(torch.Tensor(K), requires_grad=True)
self.softmax = nn.Softmax()
self.reset_params()
def reset_params(self):
......@@ -69,34 +267,33 @@ class Encoding(nn.Module):
def forward(self, X):
# input X is a 4D tensor
assert(X.size(1)==self.D,"Encoding Layer incompatible input channels!")
unpacked = False
assert(X.size(1)==self.D,"Encoding Layer wrong channels!")
if X.dim() == 3:
unpacked = True
X = X.unsqueeze(0)
B, N, K, D = X.size(0), X.size(2)*X.size(3), self.K, self.D
# reshape input
X = X.view(B,D,-1).transpose(1,2)
# BxDxN
B, N, K, D = X.size(0), X.size(2), self.K, self.D
X = X.transpose(1,2)
elif X.dim() == 4:
# BxDxHxW
B, N, K, D = X.size(0), X.size(2)*X.size(3), self.K, self.D
X = X.view(B,D,-1).transpose(1,2)
else:
raise RuntimeError('Encoding Layer unknown input dims!')
# calculate residuals
R = X.contiguous().view(B,N,1,D).expand(B,N,K,D) - self.codewords.view(
1,1,K,D).expand(B,N,K,D)
R = residual()(X.contiguous(), self.codewords)
# assignment weights
A = R
A = A.pow(2).sum(3).view(B,N,K)
A = A*self.scale.view(1,1,K).expand_as(A)
A = self.softmax(A.view(B*N,K)).view(B,N,K)
A = assign(R, self.scale)
# aggregate
E = aggregate()(A, R)
if unpacked:
E = E.squeeze(0)
return E
def __repr__(self):
return self.__class__.__name__ + '(' \
+ 'N x ' + str(self.D) + '=>' + str(self.K) + 'x' + str(self.D) + ')'
+ 'N x ' + str(self.D) + '=>' + str(self.K) + 'x' \
+ str(self.D) + ')'
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
class sum_square(Function):
def forward(ctx, input):
ctx.save_for_backward(input)
......@@ -113,7 +310,7 @@ class sum_square(Function):
encoding_lib.Encoding_Double_sum_square_Forward(
input.view(B,C,-1), xsum, xsquare)
else:
raise RuntimeError('unimplemented')
raise RuntimeError('Unimplemented data type!')
return xsum, xsquare
def backward(ctx, gradSum, gradSquare):
......@@ -121,8 +318,6 @@ class sum_square(Function):
B,C,H,W = input.size()
with torch.cuda.device_of(input):
gradInput = input.new().resize_(B,C,H*W).zero_()
# gradSum.view(1,C,1,1).expand_as(input) + \
# 2*gradSquare.view(1,C,1,1).expand_as(input)*input
if isinstance(input, torch.cuda.FloatTensor):
with torch.cuda.device_of(input):
encoding_lib.Encoding_Float_sum_square_Backward(
......@@ -132,9 +327,10 @@ class sum_square(Function):
encoding_lib.Encoding_Double_sum_square_Backward(
gradInput, input.view(B,C,-1), gradSum, gradSquare)
else:
raise RuntimeError('unimplemented')
raise RuntimeError('Unimplemented data type!')
return gradInput.view(B,C,H,W)
class batchnormtrain(Function):
def forward(ctx, input, gamma, beta, mean, std):
ctx.save_for_backward(input, gamma, beta, mean, std)
......@@ -151,7 +347,7 @@ class batchnormtrain(Function):
encoding_lib.Encoding_Double_batchnorm_Forward(output,
input, mean, invstd, gamma, beta)
else:
raise RuntimeError('unimplemented')
raise RuntimeError('Unimplemented data type!')
return output
def backward(ctx, gradOutput):
......@@ -177,9 +373,10 @@ class batchnormtrain(Function):
mean, invstd, gamma, beta, gradMean, gradStd,
True)
else:
raise RuntimeError('unimplemented')
raise RuntimeError('Unimplemented data type!')
return gradInput, gradGamma, gradBeta, gradMean, gradStd
class batchnormeval(Function):
def forward(ctx, input, gamma, beta, mean, std):
ctx.save_for_backward(input, gamma, beta, mean, std)
......@@ -196,7 +393,7 @@ class batchnormeval(Function):
encoding_lib.Encoding_Double_batchnorm_Forward(output,
input, mean, invstd, gamma, beta)
else:
raise RuntimeError('unimplemented')
raise RuntimeError('Unimplemented data type!')
return output
def backward(ctx, gradOutput):
......@@ -221,6 +418,6 @@ class batchnormeval(Function):
mean, invstd, gamma, beta, gradMean, gradStd,
False)
else:
raise RuntimeError('unimplemented')
raise RuntimeError('Unimplemented data type!')
return gradInput, gradGamma, gradBeta, gradMean, gradStd
This diff is collapsed.
......@@ -12,28 +12,54 @@
#define THC_GENERIC_FILE "generic/encoding_kernel.h"
#else
void Encoding_(Aggregate_Forward)(THCState *state, THCTensor *E_,
THCTensor *A_, THCTensor *R_);
void Encoding_(AggregateE_Forward)(THCState *state, THCTensor *E_,
THCTensor *A_, THCTensor *X_, THCTensor *C_);
void Encoding_(Aggregate_Backward)(THCState *state, THCTensor *GA_,
THCTensor *GR_, THCTensor *L_, THCTensor *A_, THCTensor *R_);
void Encoding_(AggregateE_Backward)(THCState *state, THCTensor *GA_,
THCTensor *GE_, THCTensor *A_, THCTensor *X_, THCTensor *C_);
void Encoding_(ScaledL2_Forward)( THCState *state, THCTensor *SL_,
THCTensor *X_, THCTensor *C_, THCTensor *S_);
void Encoding_(ScaledL2_Backward)(
THCState *state, THCTensor *GSL_, THCTensor *GX_, THCTensor *GC_,
THCTensor *X_, THCTensor *C_, THCTensor *S_);
void Encoding_(Aggregate_Forward)(
THCState *state, THCTensor *E_, THCTensor *A_, THCTensor *R_);
void Encoding_(Aggregate_Backward)(
THCState *state, THCTensor *GA_, THCTensor *GR_, THCTensor *L_,
THCTensor *A_, THCTensor *R_);
void Encoding_(Residual_Forward)(
THCState *state, THCTensor *R_, THCTensor *X_, THCTensor *D_);
void Encoding_(Residual_Backward)(
THCState *state, THCTensor *GR_, THCTensor *GX_, THCTensor *GD_);
void Encoding_(SquareSqueeze_Forward)(
THCState *state, THCTensor *L_, THCTensor *R_);
void Encoding_(SquareSqueeze_Backward)(
THCState *state, THCTensor *GL_, THCTensor *GR_, THCTensor *R_);
void Encoding_(BatchNorm_Forward)(THCState *state,
THCTensor *output_, THCTensor *input_,
THCTensor *mean_, THCTensor *invstd_,
THCTensor *gamma_, THCTensor *beta_);
THCTensor *output_, THCTensor *input_,
THCTensor *mean_, THCTensor *invstd_,
THCTensor *gamma_, THCTensor *beta_);
void Encoding_(BatchNorm_Backward)(THCState *state,
THCTensor *gradoutput_, THCTensor *input_, THCTensor *gradinput_,
THCTensor *gradgamma_, THCTensor *gradbeta_, THCTensor *mean_,
THCTensor *invstd_, THCTensor *gamma_, THCTensor *beta_,
THCTensor *gradMean_, THCTensor *gradStd_, int train);
THCTensor *gradoutput_, THCTensor *input_, THCTensor *gradinput_,
THCTensor *gradgamma_, THCTensor *gradbeta_, THCTensor *mean_,
THCTensor *invstd_, THCTensor *gamma_, THCTensor *beta_,
THCTensor *gradMean_, THCTensor *gradStd_, int train);
void Encoding_(Sum_Square_Forward)(THCState *state,
THCTensor *input_, THCTensor *sum_, THCTensor *square_);
THCTensor *input_, THCTensor *sum_, THCTensor *square_);
void Encoding_(Sum_Square_Backward)(THCState *state,
THCTensor *gradInput, THCTensor *input_,
THCTensor *gradSum_, THCTensor *gradSquare_);
THCTensor *gradInput, THCTensor *input_,
THCTensor *gradSum_, THCTensor *gradSquare_);
#endif
......@@ -20,52 +20,107 @@
#include "THC/THCGenerateFloatType.h"
*/
int Encoding_Float_scaledl2_forward(THCudaTensor *SL,
THCudaTensor *X, THCudaTensor *C, THCudaTensor *S);
int Encoding_Float_scaledl2_backward(
THCudaTensor *GSL, THCudaTensor *GX, THCudaTensor *GC,
THCudaTensor *X, THCudaTensor *C, THCudaTensor *S);
int Encoding_Float_aggregateE_forward(THCudaTensor *E, THCudaTensor *A,
THCudaTensor *X, THCudaTensor *C);
int Encoding_Float_aggregateE_backward(THCudaTensor *GA, THCudaTensor *GE,
THCudaTensor *A, THCudaTensor *X, THCudaTensor *C);
int Encoding_Float_aggregate_forward(THCudaTensor *E, THCudaTensor *A,
THCudaTensor *R);
int Encoding_Float_aggregate_backward(THCudaTensor *GA, THCudaTensor *GR,
THCudaTensor *L, THCudaTensor *A, THCudaTensor *R);
int Encoding_Float_residual_forward(THCudaTensor *R, THCudaTensor *X,
THCudaTensor *D);
int Encoding_Float_residual_backward(THCudaTensor *GR, THCudaTensor *GX,
THCudaTensor *GD);
int Encoding_Float_squaresqueeze_forward(THCudaTensor *L, THCudaTensor *R);
int Encoding_Float_squaresqueeze_backward(THCudaTensor *GL,
THCudaTensor *GR, THCudaTensor *R);
int Encoding_Float_batchnorm_Forward(THCudaTensor *output_,
THCudaTensor *input_, THCudaTensor *mean_,
THCudaTensor *invstd_, THCudaTensor *gamma_, THCudaTensor *beta_);
THCudaTensor *input_, THCudaTensor *mean_,
THCudaTensor *invstd_, THCudaTensor *gamma_, THCudaTensor *beta_);
int Encoding_Float_batchnorm_Backward(THCudaTensor *gradoutput_,
THCudaTensor *input_, THCudaTensor *gradinput_,
THCudaTensor *gradgamma_, THCudaTensor *gradbeta_,
THCudaTensor *mean_, THCudaTensor *invstd_,
THCudaTensor *gamma_,THCudaTensor *beta_,
THCudaTensor *gradMean_, THCudaTensor *gradStd_, int train);
THCudaTensor *input_, THCudaTensor *gradinput_,
THCudaTensor *gradgamma_, THCudaTensor *gradbeta_,
THCudaTensor *mean_, THCudaTensor *invstd_,
THCudaTensor *gamma_,THCudaTensor *beta_,
THCudaTensor *gradMean_, THCudaTensor *gradStd_, int train);
int Encoding_Float_sum_square_Forward(THCudaTensor *input_,
THCudaTensor *sum_, THCudaTensor *square_);
THCudaTensor *sum_, THCudaTensor *square_);
void Encoding_Float_sum_square_Backward(
THCudaTensor *gradInput, THCudaTensor *input_,
THCudaTensor *gradSum_, THCudaTensor *gradSquare_);
THCudaTensor *gradInput, THCudaTensor *input_,
THCudaTensor *gradSum_, THCudaTensor *gradSquare_);
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
int Encoding_Double_scaledl2_forward(THCudaDoubleTensor *SL,
THCudaDoubleTensor *X, THCudaDoubleTensor *C, THCudaDoubleTensor *S);
int Encoding_Double_scaledl2_backward(
THCudaDoubleTensor *GSL, THCudaDoubleTensor *GX,
THCudaDoubleTensor *GC, THCudaDoubleTensor *X,
THCudaDoubleTensor *C, THCudaDoubleTensor *S);
int Encoding_Double_aggregateE_forward(THCudaDoubleTensor *E,
THCudaDoubleTensor *A, THCudaDoubleTensor *X, THCudaDoubleTensor *C);
int Encoding_Double_aggregateE_backward(THCudaDoubleTensor *GA,
THCudaDoubleTensor *GE, THCudaDoubleTensor *A, THCudaDoubleTensor *X,
THCudaDoubleTensor *C);
int Encoding_Double_aggregate_forward(
THCudaDoubleTensor *E, THCudaDoubleTensor *A, THCudaDoubleTensor *R);
int Encoding_Double_aggregate_backward(
THCudaDoubleTensor *GA, THCudaDoubleTensor *GR, THCudaDoubleTensor *L,
THCudaDoubleTensor *A, THCudaDoubleTensor *R);
int Encoding_Double_residual_forward(
THCudaDoubleTensor *R, THCudaDoubleTensor *X, THCudaDoubleTensor *D);
/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
int Encoding_Double_residual_backward(
THCudaDoubleTensor *GR, THCudaDoubleTensor *GX,
THCudaDoubleTensor *GD);
int Encoding_Double_aggregate_forward(THCudaDoubleTensor *E,
THCudaDoubleTensor *A, THCudaDoubleTensor *R);
int Encoding_Double_squaresqueeze_forward(THCudaDoubleTensor *L,
THCudaDoubleTensor *R);
int Encoding_Double_aggregate_backward(THCudaDoubleTensor *GA,
THCudaDoubleTensor *GR, THCudaDoubleTensor *L,
THCudaDoubleTensor *A, THCudaDoubleTensor *R);
int Encoding_Double_squaresqueeze_backward(THCudaDoubleTensor *GL,
THCudaDoubleTensor *GR, THCudaDoubleTensor *R);
int Encoding_Double_batchnorm_Forward(THCudaDoubleTensor *output_,
THCudaDoubleTensor *input_, THCudaDoubleTensor *mean_,
THCudaDoubleTensor *invstd_, THCudaDoubleTensor *gamma_, THCudaDoubleTensor *beta_);
THCudaDoubleTensor *input_, THCudaDoubleTensor *mean_,
THCudaDoubleTensor *invstd_, THCudaDoubleTensor *gamma_,
THCudaDoubleTensor *beta_);
int Encoding_Double_batchnorm_Backward(THCudaDoubleTensor *gradoutput_,
THCudaDoubleTensor *input_, THCudaDoubleTensor *gradinput_,
THCudaDoubleTensor *gradgamma_, THCudaDoubleTensor *gradbeta_,
THCudaDoubleTensor *mean_, THCudaDoubleTensor *invstd_,
THCudaDoubleTensor *gamma_, THCudaDoubleTensor *beta_,
THCudaDoubleTensor *gradMean_, THCudaDoubleTensor *gradStd_, int train);
THCudaDoubleTensor *input_, THCudaDoubleTensor *gradinput_,
THCudaDoubleTensor *gradgamma_, THCudaDoubleTensor *gradbeta_,
THCudaDoubleTensor *mean_, THCudaDoubleTensor *invstd_,
THCudaDoubleTensor *gamma_, THCudaDoubleTensor *beta_,
THCudaDoubleTensor *gradMean_, THCudaDoubleTensor *gradStd_,
int train);
int Encoding_Double_sum_square_Forward(THCudaDoubleTensor *input_,
THCudaDoubleTensor *sum_, THCudaDoubleTensor *square_);
THCudaDoubleTensor *sum_, THCudaDoubleTensor *square_);
void Encoding_Double_sum_square_Backward(
THCudaDoubleTensor *gradInput, THCudaDoubleTensor *input_,
THCudaDoubleTensor *gradSum_, THCudaDoubleTensor *gradSquare_);
THCudaDoubleTensor *gradInput, THCudaDoubleTensor *input_,
THCudaDoubleTensor *gradSum_, THCudaDoubleTensor *gradSquare_);
......@@ -12,15 +12,65 @@
#define THC_GENERIC_FILE "generic/encoding_generic.c"
#else
int Encoding_(scaledl2_forward)(THCTensor *SL,
THCTensor *X, THCTensor *C, THCTensor *S)
/*
* ScaledL2 operation
*/
{
Encoding_(ScaledL2_Forward)(state, SL, X, C, S);
/* C function return number of the outputs */
return 0;
}
int Encoding_(scaledl2_backward)(
THCTensor *GSL, THCTensor *GX, THCTensor *GC,
THCTensor *X, THCTensor *C, THCTensor *S)
/*
* ScaledL2 operation
*/
{
Encoding_(ScaledL2_Backward)(state, GSL, GX, GC, X, C, S);
/* C function return number of the outputs */
return 0;
}
int Encoding_(aggregateE_forward)(THCTensor *E, THCTensor *A,
THCTensor *X, THCTensor *C)
/*
* Aggregate operation
*/
{
Encoding_(AggregateE_Forward)(state, E, A, X, C);
/* C function return number of the outputs */
return 0;
}
int Encoding_(aggregateE_backward)(THCTensor *GA, THCTensor *GE,
THCTensor *A, THCTensor *X, THCTensor *C)
/*
* Aggregate backward operation to A
* G (dl/dR), L (dl/dE), A (assignments)
*/
{
Encoding_(AggregateE_Backward)(state, GA, GE, A, X, C);
/* C function return number of the outputs */
return 0;
}
int Encoding_(aggregate_forward)(THCTensor *E, THCTensor *A,
THCTensor *R)
/*
* Aggregate operation
*/
{
Encoding_(Aggregate_Forward)(state, E, A, R);
/* C function return number of the outputs */
return 0;
Encoding_(Aggregate_Forward)(state, E, A, R);
/* C function return number of the outputs */
return 0;
}
int Encoding_(aggregate_backward)(THCTensor *GA, THCTensor *GR,
......@@ -30,11 +80,54 @@ int Encoding_(aggregate_backward)(THCTensor *GA, THCTensor *GR,
* G (dl/dR), L (dl/dE), A (assignments)
*/
{
Encoding_(Aggregate_Backward)(state, GA, GR, L, A, R);
/* C function return number of the outputs */
return 0;
Encoding_(Aggregate_Backward)(state, GA, GR, L, A, R);
/* C function return number of the outputs */
return 0;
}
int Encoding_(residual_forward)(THCTensor *R, THCTensor *X, THCTensor *D)
/*
* Residual operation
*/
{
Encoding_(Residual_Forward)(state, R, X, D);
/* C function return number of the outputs */
return 0;
}
int Encoding_(residual_backward)(THCTensor *GR, THCTensor *GX,
THCTensor *GD)
/*
* Residual operation
*/
{
Encoding_(Residual_Backward)(state, GR, GX, GD);
/* C function return number of the outputs */
return 0;
}
int Encoding_(squaresqueeze_forward)(THCTensor *L, THCTensor *R)
/*
* Residual operation
*/
{
Encoding_(SquareSqueeze_Forward)(state, L, R);
/* C function return number of the outputs */
return 0;
}
int Encoding_(squaresqueeze_backward)(THCTensor *GL, THCTensor *GR,
THCTensor *R)
/*
* Residual operation
*/
{
Encoding_(SquareSqueeze_Backward)(state, GL, GR, R);
/* C function return number of the outputs */
return 0;
}
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
int Encoding_(batchnorm_Forward)(THCTensor *output_, THCTensor *input_,
THCTensor *mean_, THCTensor *invstd_,
THCTensor *gamma_, THCTensor *beta_)
......
......@@ -10,19 +10,26 @@
import os
import sys
import subprocess
from setuptools import setup, find_packages
import build
from setuptools.command.develop import develop
from setuptools.command.install import install
this_file = os.path.dirname(__file__)
extra_compile_args = ['-std=c++11', '-Wno-write-strings']
#extra_compile_args = ['-std=c++11', '-Wno-write-strings']
if os.getenv('PYTORCH_BINARY_BUILD') and platform.system() == 'Linux':
print('PYTORCH_BINARY_BUILD found. Static linking libstdc++ on Linux')
extra_compile_args += ['-static-libstdc++']
extra_link_args += ['-static-libstdc++']
class TestCommand(install):
"""Post-installation mode."""
def run(self):
install.run(self)
subprocess.check_call("python test/test.py".split())
setup(
name="encoding",
version="0.0.1",
......@@ -35,11 +42,14 @@ setup(
setup_requires=["cffi>=1.0.0"],
# Exclude the build files.
packages=find_packages(exclude=["build"]),
extra_compile_args=extra_compile_args,
#extra_compile_args=extra_compile_args,
# Package where to put the extensions. Has to be a prefix of build.py.
ext_package="",
# Extensions to compile.
cffi_modules=[
os.path.join(this_file, "build.py:ffi")
],
cmdclass={
'install': TestCommand,
},
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment