Commit 36ed7951 authored by rusty1s's avatar rusty1s
Browse files

new aten build

parent 7f7b2b0a
...@@ -2,7 +2,4 @@ include LICENSE ...@@ -2,7 +2,4 @@ include LICENSE
include build.py include build.py
include build.sh include build.sh
recursive-include torch_spline_conv/src *
recursive-include torch_spline_conv/kernel *
recursive-exclude torch_spline_conv/_ext * recursive-exclude torch_spline_conv/_ext *
| Library | Meaning |
|---------|------------------------------------|
| TH | **T**orc**H** |
| THC | **T**orc**H** **C**uda |
| THCC | **T**orc**H** **C**uda **C**onnect |
#include <TH/TH.h>
#define TH_TENSOR_BASIS_FORWARD(M, basis, weightIndex, pseudo, kernelSize, isOpenSpline, CODE) { \
real *basisData = THTensor_(data)(basis); \
int64_t *weightIndexData = THLongTensor_data(weightIndex); \
real *pseudoData = THTensor_(data)(pseudo); \
int64_t *kernelSizeData = THLongTensor_data(kernelSize); \
uint8_t *isOpenSplineData = THByteTensor_data(isOpenSpline); \
\
ptrdiff_t e, s, d; \
int64_t k, kMod, wi, wiOffset; \
real b, v; \
for (e = 0; e < THTensor_(size)(pseudo, 0); e++) { \
for (s = 0; s < THTensor_(size)(basis, 1); s++) { \
k = s; b = 1; wi = 0; wiOffset = 1; \
for (d = 0; d < THTensor_(size)(pseudo, 1); d++) { \
kMod = k % (M + 1); \
k /= M + 1; \
\
v = pseudoData[e * pseudo->stride[0] + d * pseudo->stride[1]]; \
v *= kernelSizeData[d] - M * isOpenSplineData[d]; \
\
wi += (((int64_t) v + kMod) % kernelSizeData[d]) * wiOffset; \
wiOffset *= kernelSizeData[d]; \
\
v -= floor(v); \
CODE \
b *= v; \
} \
basisData[e * basis->stride[0] + s * basis->stride[1]] = b; \
weightIndexData[e * weightIndex->stride[0] + s * weightIndex->stride[1]] = wi; \
} \
} \
}
#include "generic/THBasis.c"
#include "THGenerateFloatTypes.h"
void THFloatTensor_linearBasisForward( THFloatTensor *basis, THLongTensor *weightIndex, THFloatTensor *pseudo, THLongTensor *kernelSize, THByteTensor *isOpenSpline);
void THDoubleTensor_linearBasisForward(THDoubleTensor *basis, THLongTensor *weightIndex, THDoubleTensor *pseudo, THLongTensor *kernelSize, THByteTensor *isOpenSpline);
void THFloatTensor_quadraticBasisForward( THFloatTensor *basis, THLongTensor *weightIndex, THFloatTensor *pseudo, THLongTensor *kernelSize, THByteTensor *isOpenSpline);
void THDoubleTensor_quadraticBasisForward(THDoubleTensor *basis, THLongTensor *weightIndex, THDoubleTensor *pseudo, THLongTensor *kernelSize, THByteTensor *isOpenSpline);
void THFloatTensor_cubicBasisForward( THFloatTensor *basis, THLongTensor *weightIndex, THFloatTensor *pseudo, THLongTensor *kernelSize, THByteTensor *isOpenSpline);
void THDoubleTensor_cubicBasisForward(THDoubleTensor *basis, THLongTensor *weightIndex, THDoubleTensor *pseudo, THLongTensor *kernelSize, THByteTensor *isOpenSpline);
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/THBasis.c"
#else
void THTensor_(linearBasisForward)(THTensor *basis, THLongTensor *weightIndex, THTensor *pseudo,
THLongTensor *kernelSize, THByteTensor *isOpenSpline) {
TH_TENSOR_BASIS_FORWARD(1, basis, weightIndex, pseudo, kernelSize, isOpenSpline,
v = 1 - v - kMod + 2 * v * kMod;
)
}
void THTensor_(quadraticBasisForward)(THTensor *basis, THLongTensor *weightIndex, THTensor *pseudo,
THLongTensor *kernelSize, THByteTensor *isOpenSpline) {
TH_TENSOR_BASIS_FORWARD(2, basis, weightIndex, pseudo, kernelSize, isOpenSpline,
if (kMod == 0) v = 0.5 * v * v - v + 0.5;
else if (kMod == 1) v = -v * v + v + 0.5;
else v = 0.5 * v * v;
)
}
void THTensor_(cubicBasisForward)(THTensor *basis, THLongTensor *weightIndex, THTensor *pseudo,
THLongTensor *kernelSize, THByteTensor *isOpenSpline) {
TH_TENSOR_BASIS_FORWARD(3, basis, weightIndex, pseudo, kernelSize, isOpenSpline,
if (kMod == 0) { v = (1 - v); v = v * v * v / 6.0; }
else if (kMod == 1) v = (3 * v * v * v - 6 * v * v + 4) / 6;
else if (kMod == 2) v = (-3 * v * v * v + 3 * v * v + 3 * v + 1) / 6;
else v = v * v * v / 6;
)
}
#endif // TH_GENERIC_FILE
#include "THCBasis.cu"
#ifndef THC_INC
#define THC_INC
#include "THCBasis.h"
#endif // THC_INC
#include "THCBasis.h"
#include "common.cuh"
#include "THCNumerics.cuh"
#define THC_TENSOR_BASIS_FORWARD(NAME, state, basis, weightIndex, pseudo, kernelSize, \
isOpenSpline) { \
THCAssertSameGPU( \
THCTensor_(checkGPU)(state, 5, basis, weightIndex, pseudo, kernelSize, isOpenSpline)); \
\
TensorInfo<real> basisInfo = THCTensor_(getTensorInfo)(state, basis); \
TensorInfo<int64_t> weightIndexInfo = THCudaLongTensor_getTensorInfo(state, weightIndex); \
TensorInfo<real> pseudoInfo = THCTensor_(getTensorInfo)(state, pseudo); \
int64_t *kernelSizeData = THCudaLongTensor_data(state, kernelSize); \
uint8_t *isOpenSplineData = THCudaByteTensor_data(state, isOpenSpline); \
\
KERNEL_REAL_RUN(NAME, THCTensor_(nElement)(state, basis), basisInfo, \
weightIndexInfo, pseudoInfo, kernelSizeData, isOpenSplineData); \
}
#define THC_TENSOR_BASIS_FORWARD_KERNEL(M, basis, weightIndex, pseudo, kernelSize, isOpenSpline, \
N, CODE) { \
KERNEL_LOOP(i, N) { \
ptrdiff_t e = i / basis.size[1], s = i % basis.size[1], d; \
int64_t k = s, kMod, wi = 0, wiOffset = 1; \
T b = ScalarConvert<int, T>::to(1), v; \
\
for (d = 0; d < pseudo.size[1]; d++) { \
kMod = k % (M + 1); \
k /= M + 1; \
\
v = pseudo.data[e * pseudo.stride[0] + d * pseudo.stride[1]]; \
v = THCNumerics<T>::mul(v, ScalarConvert<int64_t, T>::to(kernelSize[d] - M * isOpenSpline[d])); \
\
wi += ((ScalarConvert<T, int64_t>::to(v) + kMod) % kernelSize[d]) * wiOffset; \
wiOffset *= kernelSize[d]; \
\
v = THCNumerics<T>::sub(v, ScalarConvert<int64_t, T>::to(ScalarConvert<T, int64_t>::to(v))); \
CODE \
b = THCNumerics<T>::mul(b, v); \
} \
\
basis.data[e * basis.stride[0] + s * basis.stride[1]] = b; \
weightIndex.data[e * weightIndex.stride[0] + s * weightIndex.stride[1]] = wi; \
} \
}
template<typename T>
__global__ void linearBasisForwardKernel(TensorInfo<T> basis, TensorInfo<int64_t>weightIndex,
TensorInfo<T> pseudo, int64_t *kernelSize,
uint8_t *isOpenSpline, ptrdiff_t n) {
THC_TENSOR_BASIS_FORWARD_KERNEL(1, basis, weightIndex, pseudo, kernelSize, isOpenSpline, n,
// 1 - v - kMod + 2 * v * kMod
T tmp1 = THCNumerics<T>::sub(ScalarConvert<int, T>::to(1), v);
tmp1 = THCNumerics<T>::sub(tmp1, ScalarConvert<int64_t, T>::to(kMod));
T tmp2 = THCNumerics<T>::mul(ScalarConvert<int, T>::to(2), v);
tmp2 = THCNumerics<T>::mul(tmp2, ScalarConvert<int64_t, T>::to(kMod));
v = THCNumerics<T>::add(tmp1, tmp2);
)
}
template<typename T>
__global__ void quadraticBasisForwardKernel(TensorInfo<T> basis, TensorInfo<int64_t>weightIndex,
TensorInfo<T> pseudo, int64_t *kernelSize,
uint8_t *isOpenSpline, ptrdiff_t n) {
THC_TENSOR_BASIS_FORWARD_KERNEL(2, basis, weightIndex, pseudo, kernelSize, isOpenSpline, n,
/* printf("DRIN"); */
)
}
template<typename T>
__global__ void cubicBasisForwardKernel(TensorInfo<T> basis, TensorInfo<int64_t>weightIndex,
TensorInfo<T> pseudo, int64_t *kernelSize,
uint8_t *isOpenSpline, ptrdiff_t n) {
THC_TENSOR_BASIS_FORWARD_KERNEL(3, basis, weightIndex, pseudo, kernelSize, isOpenSpline, n,
/* printf("DRIN"); */
)
}
#include "generic/THCBasis.cu"
#include "THC/THCGenerateFloatTypes.h"
#ifndef THC_BASIS_INC
#define THC_BASIS_INC
#include <THC/THC.h>
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
#include "generic/THCBasis.h"
#include "THC/THCGenerateFloatTypes.h"
#ifdef __cplusplus
}
#endif // __cplusplus
#endif // THC_BASIS_INC
#ifndef THC_NUMERICS_INC
#define THC_NUMERICS_INC
#include <THC/THCHalf.h>
#ifdef CUDA_HALF_TENSOR
#ifdef __CUDA_ARCH__
#define h2f(A) __half2float(A)
#define f2h(A) __float2half(A)
#else // CUDA_ARCH__
#define h2f(A) THC_half2float(A)
#define f2h(A) THC_float2half(A)
#endif // CUDA_ARCH__
#endif // CUDA_HALF_TENSOR
template<typename T>
struct THCNumerics {
static inline __host__ __device__ T add(T a, T b) { return a + b; }
static inline __host__ __device__ T sub(T a, T b) { return a - b; }
static inline __host__ __device__ T mul(T a, T b) { return a * b; }
};
#ifdef CUDA_HALF_TENSOR
template<>
struct THCNumerics<half> {
static inline __host__ __device__ half add(half a, half b) { return f2h(h2f(a) + h2f(b)); }
static inline __host__ __device__ half sub(half a, half b) { return f2h(h2f(a) - h2f(b)); }
static inline __host__ __device__ half mul(half a, half b) { return f2h(h2f(a) * h2f(b)); }
};
#endif // CUDA_HALF_TENSOR
template <typename In, typename Out>
struct ScalarConvert {
static __host__ __device__ Out to(const In v) { return (Out) v; }
};
#ifdef CUDA_HALF_TENSOR
template <typename Out>
struct ScalarConvert<half, Out> {
static __host__ __device__ Out to(const half v) { return (Out) h2f(v); }
};
template <typename In>
struct ScalarConvert<In, half> {
static __host__ __device__ half to(const In v) { return f2h((float) v); }
};
#endif // CUDA_HALF_TENSOR
#endif // THC_NUMERICS_INC
#ifndef THC_COMMON_INC
#define THC_COMMON_INC
#define KERNEL_LOOP(I, N) \
for (ptrdiff_t I = blockIdx.x * blockDim.x + threadIdx.x; I < N; I += blockDim.x * gridDim.x)
const int MAX_DIMS = 25;
const int NUM_THREADS = 1024;
inline int GET_BLOCKS(int N) {
return (N + NUM_THREADS - 1) / NUM_THREADS;
}
#define KERNEL_REAL_RUN(NAME, N, ...) \
int grid = GET_BLOCKS(N); \
cudaStream_t stream = THCState_getCurrentStream(state); \
NAME<real><<<grid, NUM_THREADS, 0, stream>>>(__VA_ARGS__, N); \
THCudaCheck(cudaGetLastError())
template<typename T>
struct TensorInfo {
T *data;
int dims;
int size[MAX_DIMS];
int stride[MAX_DIMS];
};
#include "generic/common.cuh"
#include "THC/THCGenerateAllTypes.h"
#endif // THC_COMMON_INC
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/THCBasis.cu"
#else
void THCTensor_(linearBasisForward)(THCState *state, THCTensor *basis,
THCudaLongTensor *weightIndex, THCTensor *pseudo,
THCudaLongTensor *kernelSize, THCudaByteTensor *isOpenSpline) {
THC_TENSOR_BASIS_FORWARD(linearBasisForwardKernel, state, basis, weightIndex, pseudo, kernelSize,
isOpenSpline)
}
void THCTensor_(quadraticBasisForward)(THCState *state, THCTensor *basis,
THCudaLongTensor *weightIndex, THCTensor *pseudo,
THCudaLongTensor *kernelSize,
THCudaByteTensor *isOpenSpline) {
THC_TENSOR_BASIS_FORWARD(quadraticBasisForwardKernel, state, basis, weightIndex, pseudo,
kernelSize, isOpenSpline)
}
void THCTensor_(cubicBasisForward)(THCState *state, THCTensor *basis,
THCudaLongTensor *weightIndex, THCTensor *pseudo,
THCudaLongTensor *kernelSize, THCudaByteTensor *isOpenSpline) {
THC_TENSOR_BASIS_FORWARD(cubicBasisForwardKernel, state, basis, weightIndex, pseudo, kernelSize,
isOpenSpline)
}
#endif // THC_GENERIC_FILE
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/THCBasis.h"
#else
void THCTensor_(linearBasisForward)(THCState *state, THCTensor *basis,
THCudaLongTensor *weightIndex, THCTensor *pseudo,
THCudaLongTensor *kernelSize, THCudaByteTensor *isOpenSpline);
void THCTensor_(quadraticBasisForward)(THCState *state, THCTensor *basis,
THCudaLongTensor *weightIndex, THCTensor *pseudo,
THCudaLongTensor *kernelSize,
THCudaByteTensor *isOpenSpline);
void THCTensor_(cubicBasisForward)(THCState *state, THCTensor *basis,
THCudaLongTensor *weightIndex, THCTensor *pseudo,
THCudaLongTensor *kernelSize, THCudaByteTensor *isOpenSpline);
#endif // THC_GENERIC_FILE
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/common.cuh"
#else
TensorInfo<real> THCTensor_(getTensorInfo)(THCState *state, THCTensor *tensor) {
TensorInfo<real> tensorInfo = TensorInfo<real>();
tensorInfo.data = THCTensor_(data)(state, tensor);
tensorInfo.dims = THCTensor_(nDimension)(state, tensor);
for (ptrdiff_t d = 0; d < tensorInfo.dims; d++) {
tensorInfo.size[d] = THCTensor_(size)(state, tensor, d);
tensorInfo.stride[d] = THCTensor_(stride)(state, tensor, d);
}
return tensorInfo;
}
#endif // THC_GENERIC_FILE
#include <THC/THC.h>
#include "THC.h"
#define THCCTensor_(NAME) TH_CONCAT_4(THCC,Real,Tensor_,NAME)
extern THCState *state;
#include "generic/THCCBasis.c"
#include "THCGenerateFloatTypes.h"
void THCCFloatTensor_linearBasisForward( THCudaTensor *basis, THCudaLongTensor *weightIndex, THCudaTensor *pseudo, THCudaLongTensor *kernelSize, THCudaByteTensor *isOpenSpline);
void THCCDoubleTensor_linearBasisForward(THCudaDoubleTensor *basis, THCudaLongTensor *weightIndex, THCudaDoubleTensor *pseudo, THCudaLongTensor *kernelSize, THCudaByteTensor *isOpenSpline);
void THCCFloatTensor_quadraticBasisForward( THCudaTensor *basis, THCudaLongTensor *weightIndex, THCudaTensor *pseudo, THCudaLongTensor *kernelSize, THCudaByteTensor *isOpenSpline);
void THCCDoubleTensor_quadraticBasisForward(THCudaDoubleTensor *basis, THCudaLongTensor *weightIndex, THCudaDoubleTensor *pseudo, THCudaLongTensor *kernelSize, THCudaByteTensor *isOpenSpline);
void THCCFloatTensor_cubicBasisForward( THCudaTensor *basis, THCudaLongTensor *weightIndex, THCudaTensor *pseudo, THCudaLongTensor *kernelSize, THCudaByteTensor *isOpenSpline);
void THCCDoubleTensor_cubicBasisForward(THCudaDoubleTensor *basis, THCudaLongTensor *weightIndex, THCudaDoubleTensor *pseudo, THCudaLongTensor *kernelSize, THCudaByteTensor *isOpenSpline);
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/THCCBasis.c"
#else
void THCCTensor_(linearBasisForward)(THCTensor *basis, THCudaLongTensor *weightIndex,
THCTensor *pseudo, THCudaLongTensor *kernelSize,
THCudaByteTensor *isOpenSpline) {
THCTensor_(linearBasisForward)(state, basis, weightIndex, pseudo, kernelSize, isOpenSpline);
}
void THCCTensor_(quadraticBasisForward)(THCTensor *basis, THCudaLongTensor *weightIndex,
THCTensor *pseudo, THCudaLongTensor *kernelSize,
THCudaByteTensor *isOpenSpline) {
THCTensor_(quadraticBasisForward)(state, basis, weightIndex, pseudo, kernelSize, isOpenSpline);
}
void THCCTensor_(cubicBasisForward)(THCTensor *basis, THCudaLongTensor *weightIndex,
THCTensor *pseudo, THCudaLongTensor *kernelSize,
THCudaByteTensor *isOpenSpline) {
THCTensor_(cubicBasisForward)(state, basis, weightIndex, pseudo, kernelSize, isOpenSpline);
}
#endif // THC_GENERIC_FILE
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment