"tests/python/pytorch/graphbolt/test_dataloader.py" did not exist on "2968c9b247314dcb0ff64d00416654e99ca01de7"
Commit c99021ca authored by rusty1s's avatar rusty1s
Browse files

added quadratic and cubic impl

parent 36ed7951
...@@ -46,16 +46,72 @@ ...@@ -46,16 +46,72 @@
} }
template<typename T> template<typename T>
__global__ void linearBasisForwardKernel(TensorInfo<T> basis, TensorInfo<int64_t>weightIndex, struct BasisForward {
TensorInfo<T> pseudo, int64_t *kernelSize, static inline __device__ T linear(T v, int64_t kMod) {
uint8_t *isOpenSpline, ptrdiff_t n) {
THC_TENSOR_BASIS_FORWARD_KERNEL(1, basis, weightIndex, pseudo, kernelSize, isOpenSpline, n,
// 1 - v - kMod + 2 * v * kMod // 1 - v - kMod + 2 * v * kMod
T tmp1 = THCNumerics<T>::sub(ScalarConvert<int, T>::to(1), v); T tmp1 = THCNumerics<T>::sub(ScalarConvert<int, T>::to(1), v);
tmp1 = THCNumerics<T>::sub(tmp1, ScalarConvert<int64_t, T>::to(kMod)); tmp1 = THCNumerics<T>::sub(tmp1, ScalarConvert<int64_t, T>::to(kMod));
T tmp2 = THCNumerics<T>::mul(ScalarConvert<int, T>::to(2), v); T tmp2 = THCNumerics<T>::mul(ScalarConvert<int, T>::to(2), v);
tmp2 = THCNumerics<T>::mul(tmp2, ScalarConvert<int64_t, T>::to(kMod)); tmp2 = THCNumerics<T>::mul(tmp2, ScalarConvert<int64_t, T>::to(kMod));
v = THCNumerics<T>::add(tmp1, tmp2); return THCNumerics<T>::add(tmp1, tmp2);
}
static inline __device__ T quadratic(T v, int64_t kMod) {
if (kMod == 0) {
// 0.5 * v * v - v + 0.5
T tmp = THCNumerics<T>::mul(THCNumerics<T>::mul(ScalarConvert<float, T>::to(0.5), v), v);
return THCNumerics<T>::sub(tmp, THCNumerics<T>::add(v, ScalarConvert<float, T>::to(0.5)));
}
else if (kMod == 1) {
// -v * v + v + 0.5
T tmp = THCNumerics<T>::mul(THCNumerics<T>::neg(v), v);
return THCNumerics<T>::add(THCNumerics<T>::add(tmp, v), ScalarConvert<float, T>::to(0.5));
}
else {
// 0.5 * v * v
return THCNumerics<T>::mul(ScalarConvert<float, T>::to(0.5), THCNumerics<T>::mul(v, v));
}
}
static inline __device__ T cubic(T v, int64_t kMod) {
if (kMod == 0) {
// (1 - v) * (1 -v) * (1 - v) / 6
T tmp = THCNumerics<T>::sub(ScalarConvert<int, T>::to(1), v);
tmp = THCNumerics<T>::mul(THCNumerics<T>::mul(tmp, tmp), tmp);
return THCNumerics<T>::div(tmp, ScalarConvert<int, T>::to(6));
}
else if (kMod == 1) {
// (3 * v * v * v - 6 * v * v + 4) / 6
T tmp1 = THCNumerics<T>::mul(THCNumerics<T>::mul(v, v), v);
tmp1 = THCNumerics<T>::mul(ScalarConvert<int, T>::to(3), tmp1);
T tmp2 = THCNumerics<T>::mul(ScalarConvert<int, T>::to(6), THCNumerics<T>::mul(v, v));
tmp1 = THCNumerics<T>::add(THCNumerics<T>::sub(tmp1, tmp2), ScalarConvert<int, T>::to(4));
return THCNumerics<T>::div(tmp1, ScalarConvert<int, T>::to(6));
}
else if (kMod == 2) {
// (-3 * v * v * v + 3 * v * v + 3 * v + 1) / 6
T tmp1 = THCNumerics<T>::mul(THCNumerics<T>::mul(v, v), v);
tmp1 = THCNumerics<T>::mul(ScalarConvert<int, T>::to(-3), tmp1);
T tmp2 = THCNumerics<T>::mul(ScalarConvert<int, T>::to(3), THCNumerics<T>::mul(v, v));
T tmp3 = THCNumerics<T>::mul(ScalarConvert<int, T>::to(3), v);
tmp1 = THCNumerics<T>::add(THCNumerics<T>::add(tmp1, tmp2), tmp3);
tmp1 = THCNumerics<T>::add(tmp1, ScalarConvert<int, T>::to(1));
return THCNumerics<T>::div(tmp1, ScalarConvert<int, T>::to(6));
}
else {
// v * v * v / 6
T tmp = THCNumerics<T>::mul(THCNumerics<T>::mul(v, v), v);
return THCNumerics<T>::div(tmp, ScalarConvert<int, T>::to(6));
}
}
};
template<typename T>
__global__ void linearBasisForwardKernel(TensorInfo<T> basis, TensorInfo<int64_t>weightIndex,
TensorInfo<T> pseudo, int64_t *kernelSize,
uint8_t *isOpenSpline, ptrdiff_t n) {
THC_TENSOR_BASIS_FORWARD_KERNEL(1, basis, weightIndex, pseudo, kernelSize, isOpenSpline, n,
v = BasisForward<T>::linear(v, kMod);
) )
} }
...@@ -64,7 +120,7 @@ __global__ void quadraticBasisForwardKernel(TensorInfo<T> basis, TensorInfo<int6 ...@@ -64,7 +120,7 @@ __global__ void quadraticBasisForwardKernel(TensorInfo<T> basis, TensorInfo<int6
TensorInfo<T> pseudo, int64_t *kernelSize, TensorInfo<T> pseudo, int64_t *kernelSize,
uint8_t *isOpenSpline, ptrdiff_t n) { uint8_t *isOpenSpline, ptrdiff_t n) {
THC_TENSOR_BASIS_FORWARD_KERNEL(2, basis, weightIndex, pseudo, kernelSize, isOpenSpline, n, THC_TENSOR_BASIS_FORWARD_KERNEL(2, basis, weightIndex, pseudo, kernelSize, isOpenSpline, n,
/* printf("DRIN"); */ v = BasisForward<T>::quadratic(v, kMod);
) )
} }
...@@ -73,7 +129,7 @@ __global__ void cubicBasisForwardKernel(TensorInfo<T> basis, TensorInfo<int64_t> ...@@ -73,7 +129,7 @@ __global__ void cubicBasisForwardKernel(TensorInfo<T> basis, TensorInfo<int64_t>
TensorInfo<T> pseudo, int64_t *kernelSize, TensorInfo<T> pseudo, int64_t *kernelSize,
uint8_t *isOpenSpline, ptrdiff_t n) { uint8_t *isOpenSpline, ptrdiff_t n) {
THC_TENSOR_BASIS_FORWARD_KERNEL(3, basis, weightIndex, pseudo, kernelSize, isOpenSpline, n, THC_TENSOR_BASIS_FORWARD_KERNEL(3, basis, weightIndex, pseudo, kernelSize, isOpenSpline, n,
/* printf("DRIN"); */ v = BasisForward<T>::cubic(v, kMod);
) )
} }
......
...@@ -18,6 +18,8 @@ struct THCNumerics { ...@@ -18,6 +18,8 @@ struct THCNumerics {
static inline __host__ __device__ T add(T a, T b) { return a + b; } static inline __host__ __device__ T add(T a, T b) { return a + b; }
static inline __host__ __device__ T sub(T a, T b) { return a - b; } static inline __host__ __device__ T sub(T a, T b) { return a - b; }
static inline __host__ __device__ T mul(T a, T b) { return a * b; } static inline __host__ __device__ T mul(T a, T b) { return a * b; }
static inline __host__ __device__ T div(T a, T b) { return a / b; }
static inline __host__ __device__ T neg(T a) { return -a; }
}; };
#ifdef CUDA_HALF_TENSOR #ifdef CUDA_HALF_TENSOR
...@@ -26,6 +28,8 @@ struct THCNumerics<half> { ...@@ -26,6 +28,8 @@ struct THCNumerics<half> {
static inline __host__ __device__ half add(half a, half b) { return f2h(h2f(a) + h2f(b)); } static inline __host__ __device__ half add(half a, half b) { return f2h(h2f(a) + h2f(b)); }
static inline __host__ __device__ half sub(half a, half b) { return f2h(h2f(a) - h2f(b)); } static inline __host__ __device__ half sub(half a, half b) { return f2h(h2f(a) - h2f(b)); }
static inline __host__ __device__ half mul(half a, half b) { return f2h(h2f(a) * h2f(b)); } static inline __host__ __device__ half mul(half a, half b) { return f2h(h2f(a) * h2f(b)); }
static inline __host__ __device__ half div(half a, half b) { return f2h(h2f(a) / h2f(b)); }
static inline __host__ __device__ half neg(half a) { return f2h(-h2f(a)); }
}; };
#endif // CUDA_HALF_TENSOR #endif // CUDA_HALF_TENSOR
......
...@@ -15,4 +15,8 @@ void THCTensor_(cubicBasisForward)(THCState *state, THCTensor *basis, ...@@ -15,4 +15,8 @@ void THCTensor_(cubicBasisForward)(THCState *state, THCTensor *basis,
THCudaLongTensor *weightIndex, THCTensor *pseudo, THCudaLongTensor *weightIndex, THCTensor *pseudo,
THCudaLongTensor *kernelSize, THCudaByteTensor *isOpenSpline); THCudaLongTensor *kernelSize, THCudaByteTensor *isOpenSpline);
void THCTensor_(linearBasisBackward)(THCState *state, THCTensor *basis,
THCudaLongTensor *weightIndex, THCTensor *pseudo,
THCudaLongTensor *kernelSize, THCudaByteTensor *isOpenSpline);
#endif // THC_GENERIC_FILE #endif // THC_GENERIC_FILE
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment