Commit 24a25dd3 authored by rusty1s's avatar rusty1s
Browse files

basis backward implementation gpu

parent 1b777c39
...@@ -65,6 +65,7 @@ ...@@ -65,6 +65,7 @@
v = CODE; \ v = CODE; \
tmp *= v; \ tmp *= v; \
} \ } \
\
g += tmp * gradBasisData[e * gradBasis->stride[0] + s * gradBasis->stride[1]]; \ g += tmp * gradBasisData[e * gradBasis->stride[0] + s * gradBasis->stride[1]]; \
} \ } \
g *= kernelSizeData[d] - M * isOpenSplineData[d]; \ g *= kernelSizeData[d] - M * isOpenSplineData[d]; \
......
...@@ -31,7 +31,7 @@ inline real THTensor_(cubic)(real v, int64_t kMod) { ...@@ -31,7 +31,7 @@ inline real THTensor_(cubic)(real v, int64_t kMod) {
inline real THTensor_(gradCubic)(real v, int64_t kMod) { inline real THTensor_(gradCubic)(real v, int64_t kMod) {
if (kMod == 0) return (-v * v + 2 * v - 1) / 2; if (kMod == 0) return (-v * v + 2 * v - 1) / 2;
else if (kMod == 1) return (3 * v * v -4 * v) / 2; else if (kMod == 1) return (3 * v * v - 4 * v) / 2;
else if (kMod == 2) return (-3 * v * v + 2 * v + 1) / 2; else if (kMod == 2) return (-3 * v * v + 2 * v + 1) / 2;
else return v * v / 2; else return v * v / 2;
} }
......
#include "THCBasis.h" #include "THCBasis.h"
#include "THCBasisForward.cuh" #include "THCBasisForward.cuh"
#include "THCBasisBackward.cuh"
template<typename T> template<typename T>
__global__ void linearBasisForwardKernel(TensorInfo<T> basis, TensorInfo<int64_t>weightIndex, __global__ void linearBasisForwardKernel(TensorInfo<T> basis, TensorInfo<int64_t>weightIndex,
TensorInfo<T> pseudo, int64_t *kernelSize, TensorInfo<T> pseudo, int64_t *kernelSize,
uint8_t *isOpenSpline, ptrdiff_t n) { uint8_t *isOpenSpline, ptrdiff_t n) {
THC_TENSOR_BASIS_FORWARD_KERNEL(1, basis, weightIndex, pseudo, kernelSize, isOpenSpline, n, THC_TENSOR_BASIS_FORWARD_KERNEL(1, basis, weightIndex, pseudo, kernelSize, isOpenSpline, n,
v = BasisForward<T>::linear(v, kMod); BasisForward<T>::linear(v, kMod))
)
} }
template<typename T> template<typename T>
...@@ -16,8 +16,7 @@ __global__ void quadraticBasisForwardKernel(TensorInfo<T> basis, TensorInfo<int6 ...@@ -16,8 +16,7 @@ __global__ void quadraticBasisForwardKernel(TensorInfo<T> basis, TensorInfo<int6
TensorInfo<T> pseudo, int64_t *kernelSize, TensorInfo<T> pseudo, int64_t *kernelSize,
uint8_t *isOpenSpline, ptrdiff_t n) { uint8_t *isOpenSpline, ptrdiff_t n) {
THC_TENSOR_BASIS_FORWARD_KERNEL(2, basis, weightIndex, pseudo, kernelSize, isOpenSpline, n, THC_TENSOR_BASIS_FORWARD_KERNEL(2, basis, weightIndex, pseudo, kernelSize, isOpenSpline, n,
v = BasisForward<T>::quadratic(v, kMod); BasisForward<T>::quadratic(v, kMod))
)
} }
template<typename T> template<typename T>
...@@ -25,8 +24,34 @@ __global__ void cubicBasisForwardKernel(TensorInfo<T> basis, TensorInfo<int64_t> ...@@ -25,8 +24,34 @@ __global__ void cubicBasisForwardKernel(TensorInfo<T> basis, TensorInfo<int64_t>
TensorInfo<T> pseudo, int64_t *kernelSize, TensorInfo<T> pseudo, int64_t *kernelSize,
uint8_t *isOpenSpline, ptrdiff_t n) { uint8_t *isOpenSpline, ptrdiff_t n) {
THC_TENSOR_BASIS_FORWARD_KERNEL(3, basis, weightIndex, pseudo, kernelSize, isOpenSpline, n, THC_TENSOR_BASIS_FORWARD_KERNEL(3, basis, weightIndex, pseudo, kernelSize, isOpenSpline, n,
v = BasisForward<T>::cubic(v, kMod); BasisForward<T>::cubic(v, kMod))
) }
template<typename T>
__global__ void linearBasisBackwardKernel(TensorInfo<T> self, TensorInfo<T>gradBasis,
TensorInfo<T> pseudo, int64_t *kernelSize,
uint8_t *isOpenSpline, ptrdiff_t n) {
THC_TENSOR_BASIS_BACKWARD_KERNEL(1, self, gradBasis, pseudo, kernelSize, isOpenSpline, n,
BasisForward<T>::linear(v, kMod),
BasisBackward<T>::linear(v, kMod))
}
template<typename T>
__global__ void quadraticBasisBackwardKernel(TensorInfo<T> self, TensorInfo<T>gradBasis,
TensorInfo<T> pseudo, int64_t *kernelSize,
uint8_t *isOpenSpline, ptrdiff_t n) {
THC_TENSOR_BASIS_BACKWARD_KERNEL(2, self, gradBasis, pseudo, kernelSize, isOpenSpline, n,
BasisForward<T>::quadratic(v, kMod),
BasisBackward<T>::quadratic(v, kMod))
}
template<typename T>
__global__ void cubicBasisBackwardKernel(TensorInfo<T> self, TensorInfo<T>gradBasis,
TensorInfo<T> pseudo, int64_t *kernelSize,
uint8_t *isOpenSpline, ptrdiff_t n) {
THC_TENSOR_BASIS_BACKWARD_KERNEL(3, self, gradBasis, pseudo, kernelSize, isOpenSpline, n,
BasisForward<T>::cubic(v, kMod),
BasisBackward<T>::cubic(v, kMod))
} }
#include "generic/THCBasis.cu" #include "generic/THCBasis.cu"
......
#ifndef THC_BASIS_BACKWARD_INC
#define THC_BASIS_BACKWARD_INC
#include "common.cuh"
#include "THCNumerics.cuh"
#define THC_TENSOR_BASIS_BACKWARD(NAME, state, self, gradBasis, pseudo, kernelSize, \
isOpenSpline) { \
THCAssertSameGPU( \
THCTensor_(checkGPU)(state, 5, self, gradBasis, pseudo, kernelSize, isOpenSpline)); \
\
TensorInfo<real> selfInfo = THCTensor_(getTensorInfo)(state, self); \
TensorInfo<real> gradBasisInfo = THCTensor_(getTensorInfo)(state, gradBasis); \
TensorInfo<real> pseudoInfo = THCTensor_(getTensorInfo)(state, pseudo); \
int64_t *kernelSizeData = THCudaLongTensor_data(state, kernelSize); \
uint8_t *isOpenSplineData = THCudaByteTensor_data(state, isOpenSpline); \
\
KERNEL_REAL_RUN(NAME, THCTensor_(nElement)(state, pseudo), selfInfo, gradBasisInfo, pseudoInfo, \
kernelSizeData, isOpenSplineData); \
}
#define THC_TENSOR_BASIS_BACKWARD_KERNEL(M, self, gradBasis, pseudo, kernelSize, isOpenSpline, \
N, CODE, GRAD_CODE) { \
KERNEL_LOOP(i, N) { \
ptrdiff_t e = i / self.size[1], d = i % self.size[1], s, dIt, dOther; \
int64_t kMod; \
T g = ScalarConvert<int, T>::to(0), v, tmp; \
for (s = 0; s < gradBasis.size[1]; s++) { \
kMod = (s / (ptrdiff_t) pow(M + 1, d)) % (M + 1); \
v = pseudo.data[e * pseudo.stride[0] + d * pseudo.stride[1]]; \
v = THCNumerics<T>::mul(v, ScalarConvert<int64_t, T>::to(kernelSize[d] - M * isOpenSpline[d])); \
v = THCNumerics<T>::sub(v, ScalarConvert<int64_t, T>::to(ScalarConvert<T, int64_t>::to(v))); \
v = GRAD_CODE; \
tmp = v; \
\
for (dIt = 1; dIt < pseudo.size[1]; dIt++) { \
dOther = dIt - (d >= dIt); \
kMod = (s / (ptrdiff_t) pow(M + 1, dOther)) % (M + 1); \
v = pseudo.data[e * pseudo.stride[0] + dOther * pseudo.stride[1]]; \
v = THCNumerics<T>::mul(v, ScalarConvert<int64_t, T>::to(kernelSize[dOther] - M * isOpenSpline[dOther])); \
v = THCNumerics<T>::sub(v, ScalarConvert<int64_t, T>::to(ScalarConvert<T, int64_t>::to(v))); \
v = CODE; \
tmp = THCNumerics<T>::mul(tmp, v); \
} \
\
tmp = THCNumerics<T>::mul(tmp, gradBasis.data[e * gradBasis.stride[0] + s * gradBasis.stride[1]]); \
g = THCNumerics<T>::add(g, tmp); \
} \
g = THCNumerics<T>::mul(g, ScalarConvert<int64_t, T>::to(kernelSize[d] - M * isOpenSpline[d])); \
self.data[e * self.stride[0] + d * self.stride[1]] = g; \
} \
}
template<typename T>
struct BasisBackward {
static inline __device__ T linear(T v, int64_t kMod) {
// 2 * kMod - 1
return ScalarConvert<int64_t, T>::to(2 * kMod - 1);
}
static inline __device__ T quadratic(T v, int64_t kMod) {
if (kMod == 0) {
// v - 1
return THCNumerics<T>::sub(v, ScalarConvert<int, T>::to(1));
}
else if (kMod == 1) {
// -2 * v + 1
T tmp = THCNumerics<T>::mul(ScalarConvert<int, T>::to(-2), v);
return THCNumerics<T>::add(tmp, ScalarConvert<int, T>::to(1));
}
else return v;
}
static inline __device__ T cubic(T v, int64_t kMod) {
if (kMod == 0) {
// (-v * v + 2 * v - 1) / 2
T tmp1 = THCNumerics<T>::mul(THCNumerics<T>::neg(v), v);
T tmp2 = THCNumerics<T>::mul(ScalarConvert<int, T>::to(2), v);
tmp1 = THCNumerics<T>::sub(THCNumerics<T>::add(tmp1, tmp2), ScalarConvert<int, T>::to(1));
return THCNumerics<T>::div(tmp1, ScalarConvert<int, T>::to(2));
}
else if (kMod == 1) {
// (3 * v * v - 4 * v) / 2
T tmp = THCNumerics<T>::mul(ScalarConvert<int, T>::to(3), THCNumerics<T>::mul(v, v));
tmp = THCNumerics<T>::sub(tmp, THCNumerics<T>::mul(ScalarConvert<int, T>::to(4), v));
return THCNumerics<T>::div(tmp, ScalarConvert<int, T>::to(2));
}
else if (kMod == 2) {
T tmp = THCNumerics<T>::mul(ScalarConvert<int, T>::to(-3), THCNumerics<T>::mul(v, v));
tmp = THCNumerics<T>::add(tmp, THCNumerics<T>::mul(ScalarConvert<int, T>::to(2), v));
tmp = THCNumerics<T>::add(tmp, ScalarConvert<int, T>::to(1));
return THCNumerics<T>::div(tmp, ScalarConvert<int, T>::to(2));
}
else {
// v * v / 2;
return THCNumerics<T>::div(THCNumerics<T>::mul(v, v), ScalarConvert<int, T>::to(2));
}
}
};
#endif // THC_BASIS_BACKWARD_INC
...@@ -15,8 +15,8 @@ ...@@ -15,8 +15,8 @@
int64_t *kernelSizeData = THCudaLongTensor_data(state, kernelSize); \ int64_t *kernelSizeData = THCudaLongTensor_data(state, kernelSize); \
uint8_t *isOpenSplineData = THCudaByteTensor_data(state, isOpenSpline); \ uint8_t *isOpenSplineData = THCudaByteTensor_data(state, isOpenSpline); \
\ \
KERNEL_REAL_RUN(NAME, THCTensor_(nElement)(state, basis), basisInfo, \ KERNEL_REAL_RUN(NAME, THCTensor_(nElement)(state, basis), basisInfo, weightIndexInfo, \
weightIndexInfo, pseudoInfo, kernelSizeData, isOpenSplineData); \ pseudoInfo, kernelSizeData, isOpenSplineData); \
} }
#define THC_TENSOR_BASIS_FORWARD_KERNEL(M, basis, weightIndex, pseudo, kernelSize, isOpenSpline, \ #define THC_TENSOR_BASIS_FORWARD_KERNEL(M, basis, weightIndex, pseudo, kernelSize, isOpenSpline, \
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
wiOffset *= kernelSize[d]; \ wiOffset *= kernelSize[d]; \
\ \
v = THCNumerics<T>::sub(v, ScalarConvert<int64_t, T>::to(ScalarConvert<T, int64_t>::to(v))); \ v = THCNumerics<T>::sub(v, ScalarConvert<int64_t, T>::to(ScalarConvert<T, int64_t>::to(v))); \
CODE \ v = CODE; \
b = THCNumerics<T>::mul(b, v); \ b = THCNumerics<T>::mul(b, v); \
} \ } \
\ \
......
...@@ -24,4 +24,25 @@ void THCTensor_(cubicBasisForward)(THCState *state, THCTensor *basis, ...@@ -24,4 +24,25 @@ void THCTensor_(cubicBasisForward)(THCState *state, THCTensor *basis,
isOpenSpline) isOpenSpline)
} }
void THCTensor_(linearBasisBackward)(THCState *state, THCTensor *self, THCTensor *gradBasis,
THCTensor *pseudo, THCudaLongTensor *kernelSize,
THCudaByteTensor *isOpenSpline) {
THC_TENSOR_BASIS_BACKWARD(linearBasisBackwardKernel, state, self, gradBasis, pseudo, kernelSize,
isOpenSpline)
}
void THCTensor_(quadraticBasisBackward)(THCState *state, THCTensor *self, THCTensor *gradBasis,
THCTensor *pseudo, THCudaLongTensor *kernelSize,
THCudaByteTensor *isOpenSpline) {
THC_TENSOR_BASIS_BACKWARD(quadraticBasisBackwardKernel, state, self, gradBasis, pseudo,
kernelSize, isOpenSpline)
}
void THCTensor_(cubicBasisBackward)(THCState *state, THCTensor *self, THCTensor *gradBasis,
THCTensor *pseudo, THCudaLongTensor *kernelSize,
THCudaByteTensor *isOpenSpline) {
THC_TENSOR_BASIS_BACKWARD(cubicBasisBackwardKernel, state, self, gradBasis, pseudo, kernelSize,
isOpenSpline)
}
#endif // THC_GENERIC_FILE #endif // THC_GENERIC_FILE
...@@ -15,8 +15,16 @@ void THCTensor_(cubicBasisForward)(THCState *state, THCTensor *basis, ...@@ -15,8 +15,16 @@ void THCTensor_(cubicBasisForward)(THCState *state, THCTensor *basis,
THCudaLongTensor *weightIndex, THCTensor *pseudo, THCudaLongTensor *weightIndex, THCTensor *pseudo,
THCudaLongTensor *kernelSize, THCudaByteTensor *isOpenSpline); THCudaLongTensor *kernelSize, THCudaByteTensor *isOpenSpline);
void THCTensor_(linearBasisBackward)(THCState *state, THCTensor *basis, void THCTensor_(linearBasisBackward)(THCState *state, THCTensor *self, THCTensor *gradBasis,
THCudaLongTensor *weightIndex, THCTensor *pseudo, THCTensor *pseudo, THCudaLongTensor *kernelSize,
THCudaLongTensor *kernelSize, THCudaByteTensor *isOpenSpline); THCudaByteTensor *isOpenSpline);
void THCTensor_(quadraticBasisBackward)(THCState *state, THCTensor *self, THCTensor *gradBasis,
THCTensor *pseudo, THCudaLongTensor *kernelSize,
THCudaByteTensor *isOpenSpline);
void THCTensor_(cubicBasisBackward)(THCState *state, THCTensor *self, THCTensor *gradBasis,
THCTensor *pseudo, THCudaLongTensor *kernelSize,
THCudaByteTensor *isOpenSpline);
#endif // THC_GENERIC_FILE #endif // THC_GENERIC_FILE
...@@ -23,16 +23,19 @@ void THCCTensor_(cubicBasisForward)(THCTensor *basis, THCudaLongTensor *weightIn ...@@ -23,16 +23,19 @@ void THCCTensor_(cubicBasisForward)(THCTensor *basis, THCudaLongTensor *weightIn
void THCCTensor_(linearBasisBackward)(THCTensor *self, THCTensor *gradBasis, THCTensor *pseudo, void THCCTensor_(linearBasisBackward)(THCTensor *self, THCTensor *gradBasis, THCTensor *pseudo,
THCudaLongTensor *kernelSize, THCudaLongTensor *kernelSize,
THCudaByteTensor *isOpenSpline) { THCudaByteTensor *isOpenSpline) {
THCTensor_(linearBasisBackward)(state, self, gradBasis, pseudo, kernelSize, isOpenSpline);
} }
void THCCTensor_(quadraticBasisBackward)(THCTensor *self, THCTensor *gradBasis, THCTensor *pseudo, void THCCTensor_(quadraticBasisBackward)(THCTensor *self, THCTensor *gradBasis, THCTensor *pseudo,
THCudaLongTensor *kernelSize, THCudaLongTensor *kernelSize,
THCudaByteTensor *isOpenSpline) { THCudaByteTensor *isOpenSpline) {
THCTensor_(quadraticBasisBackward)(state, self, gradBasis, pseudo, kernelSize, isOpenSpline);
} }
void THCCTensor_(cubicBasisBackward)(THCTensor *self, THCTensor *gradBasis, THCTensor *pseudo, void THCCTensor_(cubicBasisBackward)(THCTensor *self, THCTensor *gradBasis, THCTensor *pseudo,
THCudaLongTensor *kernelSize, THCudaLongTensor *kernelSize,
THCudaByteTensor *isOpenSpline) { THCudaByteTensor *isOpenSpline) {
THCTensor_(cubicBasisBackward)(state, self, gradBasis, pseudo, kernelSize, isOpenSpline);
} }
#endif // THC_GENERIC_FILE #endif // THC_GENERIC_FILE
...@@ -30,7 +30,7 @@ tests = [{ ...@@ -30,7 +30,7 @@ tests = [{
@pytest.mark.parametrize('tensor,i', product(tensors, range(len(tests)))) @pytest.mark.parametrize('tensor,i', product(tensors, range(len(tests))))
def test_spline_basis_cpu(tensor, i): def test_spline_basis_forward_cpu(tensor, i):
data = tests[i] data = tests[i]
pseudo = getattr(torch, tensor)(data['pseudo']) pseudo = getattr(torch, tensor)(data['pseudo'])
...@@ -44,7 +44,7 @@ def test_spline_basis_cpu(tensor, i): ...@@ -44,7 +44,7 @@ def test_spline_basis_cpu(tensor, i):
@pytest.mark.skipif(not torch.cuda.is_available(), reason='no CUDA') @pytest.mark.skipif(not torch.cuda.is_available(), reason='no CUDA')
@pytest.mark.parametrize('tensor,i', product(tensors, range(len(tests)))) @pytest.mark.parametrize('tensor,i', product(tensors, range(len(tests))))
def test_spline_basis_gpu(tensor, i): # pragma: no cover def test_spline_basis_forward_gpu(tensor, i): # pragma: no cover
data = tests[i] data = tests[i]
pseudo = getattr(torch.cuda, tensor)(data['pseudo']) pseudo = getattr(torch.cuda, tensor)(data['pseudo'])
...@@ -56,12 +56,24 @@ def test_spline_basis_gpu(tensor, i): # pragma: no cover ...@@ -56,12 +56,24 @@ def test_spline_basis_gpu(tensor, i): # pragma: no cover
assert weight_index.cpu().tolist() == data['weight_index'] assert weight_index.cpu().tolist() == data['weight_index']
def test_spline_basis_grad_cpu(): @pytest.mark.parametrize('degree', implemented_degrees.keys())
def test_spline_basis_backward_cpu(degree):
kernel_size = torch.LongTensor([5, 5, 5]) kernel_size = torch.LongTensor([5, 5, 5])
is_open_spline = torch.ByteTensor([1, 0, 1]) is_open_spline = torch.ByteTensor([1, 0, 1])
pseudo = torch.DoubleTensor(4, 3).uniform_(0, 1) pseudo = torch.DoubleTensor(4, 3).uniform_(0, 1)
pseudo = Variable(pseudo, requires_grad=True) pseudo = Variable(pseudo, requires_grad=True)
for degree in implemented_degrees.keys(): op = SplineBasis(degree, kernel_size, is_open_spline)
op = SplineBasis(degree, kernel_size, is_open_spline) assert gradcheck(op, (pseudo, ), eps=1e-6, atol=1e-4) is True
assert gradcheck(op, (pseudo, ), eps=1e-6, atol=1e-4) is True
@pytest.mark.skipif(not torch.cuda.is_available(), reason='no CUDA')
@pytest.mark.parametrize('degree', implemented_degrees.keys())
def test_spline_basis_backward_gpu(degree):
kernel_size = torch.cuda.LongTensor([5, 5, 5])
is_open_spline = torch.cuda.ByteTensor([1, 0, 1])
pseudo = torch.cuda.DoubleTensor(4, 1).uniform_(0, 1)
pseudo = Variable(pseudo, requires_grad=True)
op = SplineBasis(degree, kernel_size, is_open_spline)
assert gradcheck(op, (pseudo, ), eps=1e-6, atol=1e-4) is True
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment