#include "THCBasis.h" #include "common.cuh" #include "THCNumerics.cuh" #define THC_TENSOR_BASIS_FORWARD(NAME, state, basis, weightIndex, pseudo, kernelSize, \ isOpenSpline) { \ THCAssertSameGPU( \ THCTensor_(checkGPU)(state, 5, basis, weightIndex, pseudo, kernelSize, isOpenSpline)); \ \ TensorInfo basisInfo = THCTensor_(getTensorInfo)(state, basis); \ TensorInfo weightIndexInfo = THCudaLongTensor_getTensorInfo(state, weightIndex); \ TensorInfo pseudoInfo = THCTensor_(getTensorInfo)(state, pseudo); \ int64_t *kernelSizeData = THCudaLongTensor_data(state, kernelSize); \ uint8_t *isOpenSplineData = THCudaByteTensor_data(state, isOpenSpline); \ \ KERNEL_REAL_RUN(NAME, THCTensor_(nElement)(state, basis), basisInfo, \ weightIndexInfo, pseudoInfo, kernelSizeData, isOpenSplineData); \ } #define THC_TENSOR_BASIS_FORWARD_KERNEL(M, basis, weightIndex, pseudo, kernelSize, isOpenSpline, \ N, CODE) { \ KERNEL_LOOP(i, N) { \ ptrdiff_t e = i / basis.size[1], s = i % basis.size[1], d; \ int64_t k = s, kMod, wi = 0, wiOffset = 1; \ T b = ScalarConvert::to(1), v; \ \ for (d = 0; d < pseudo.size[1]; d++) { \ kMod = k % (M + 1); \ k /= M + 1; \ \ v = pseudo.data[e * pseudo.stride[0] + d * pseudo.stride[1]]; \ v = THCNumerics::mul(v, ScalarConvert::to(kernelSize[d] - M * isOpenSpline[d])); \ \ wi += ((ScalarConvert::to(v) + kMod) % kernelSize[d]) * wiOffset; \ wiOffset *= kernelSize[d]; \ \ v = THCNumerics::sub(v, ScalarConvert::to(ScalarConvert::to(v))); \ CODE \ b = THCNumerics::mul(b, v); \ } \ \ basis.data[e * basis.stride[0] + s * basis.stride[1]] = b; \ weightIndex.data[e * weightIndex.stride[0] + s * weightIndex.stride[1]] = wi; \ } \ } template __global__ void linearBasisForwardKernel(TensorInfo basis, TensorInfoweightIndex, TensorInfo pseudo, int64_t *kernelSize, uint8_t *isOpenSpline, ptrdiff_t n) { THC_TENSOR_BASIS_FORWARD_KERNEL(1, basis, weightIndex, pseudo, kernelSize, isOpenSpline, n, // 1 - v - kMod + 2 * v * kMod T tmp1 = THCNumerics::sub(ScalarConvert::to(1), v); tmp1 = THCNumerics::sub(tmp1, ScalarConvert::to(kMod)); T tmp2 = THCNumerics::mul(ScalarConvert::to(2), v); tmp2 = THCNumerics::mul(tmp2, ScalarConvert::to(kMod)); v = THCNumerics::add(tmp1, tmp2); ) } template __global__ void quadraticBasisForwardKernel(TensorInfo basis, TensorInfoweightIndex, TensorInfo pseudo, int64_t *kernelSize, uint8_t *isOpenSpline, ptrdiff_t n) { THC_TENSOR_BASIS_FORWARD_KERNEL(2, basis, weightIndex, pseudo, kernelSize, isOpenSpline, n, /* printf("DRIN"); */ ) } template __global__ void cubicBasisForwardKernel(TensorInfo basis, TensorInfoweightIndex, TensorInfo pseudo, int64_t *kernelSize, uint8_t *isOpenSpline, ptrdiff_t n) { THC_TENSOR_BASIS_FORWARD_KERNEL(3, basis, weightIndex, pseudo, kernelSize, isOpenSpline, n, /* printf("DRIN"); */ ) } #include "generic/THCBasis.cu" #include "THC/THCGenerateFloatTypes.h"