impl of tensorinfo

b3091036 · rusty1s · 9d0fa071 · b3091036 · b3091036 · b3091036
Commit b3091036 authored Dec 20, 2017 by rusty1s
6 changed files
--- a/test/test_max.py
+++ b/test/test_max.py
@@ -37,7 +37,8 @@ def test_scatter_max(str):
    assert input.grad.data.tolist() == expected_grad_input


-@pytest.mark.parametrize('str', tensor_strs)
+# @pytest.mark.parametrize('str', tensor_strs)
+@pytest.mark.parametrize('str', ['FloatTensor'])
 def test_scatter_cuda_max(str):
    input = [[2, 0, 1, 4, 3], [0, 2, 1, 3, 4]]
    index = [[4, 5, 4, 2, 3], [0, 0, 2, 2, 1]]

--- a/torch_scatter/kernel/THCTensorInfo.cuh
+++ b/torch_scatter/kernel/THCTensorInfo.cuh
+
--- a/torch_scatter/kernel/common.cuh
+++ b/torch_scatter/kernel/common.cuh
+#define KERNEL_LOOP(i, n) \
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x)
+
+const int MAX_DIMS = 25;
+const int NUM_THREADS = 1024;
+
+inline int GET_BLOCKS(const int n) {
+  return (n + NUM_THREADS - 1) / NUM_THREADS;
+}
+
+template <typename T>
+struct TensorInfo {
+  TensorInfo(T *t, int d, int sz[MAX_DIMS], int st[MAX_DIMS]) {
+    data = t; dims = d;
+    for (int i = 0; i < dims; i++) {
+      size[i] = sz[i];
+      stride[i] = st[i];
+    }
+  }
+
+  T *data;
+  int dims;
+  int size[MAX_DIMS];
+  int stride[MAX_DIMS];
+};
--- a/torch_scatter/kernel/generic/common.cu
+++ b/torch_scatter/kernel/generic/common.cu
+#ifndef THC_GENERIC_FILE
+#define THC_GENERIC_FILE "generic/common.cu"
+#else
+
+void thc_(check)(THCState *state, THCTensor *output, THCudaLongTensor *index, THCTensor *input) {
+  THCAssertSameGPU(THCTensor_(checkGPU)(state, 2, output, input));
+  THCAssertSameGPU(THCudaLongTensor_checkGPU(state, 1, index));
+  THArgCheck(THCTensor_(nDimension)(state, output) <= MAX_DIMS, 1, "Tensor too large or too many dimensions");
+}
+
+TensorInfo<real> thc_(getTensorInfo)(THCState *state, THCTensor *tensor) {
+  real *data = THCTensor_(data)(state, tensor);
+  int dims = THCTensor_(nDimension)(state, tensor);
+  int size[MAX_DIMS]; int stride[MAX_DIMS];
+  for (int i = 0; i < dims; i++) {
+    size[i] = THCTensor_(size)(state, tensor, i);
+    stride[i] = THCTensor_(stride)(state, tensor, i);
+  }
+  return TensorInfo<real>(data, dims, size, stride);
+}
+
+#endif
--- a/torch_scatter/kernel/generic/kernel.cu
+++ b/torch_scatter/kernel/generic/kernel.cu
@@ -2,42 +2,40 @@
 #define THC_GENERIC_FILE "generic/kernel.cu"
 #else

-void check(THCState *state, THCTensor *output, THCudaLongTensor *index, THCTensor *input) {
-  THCAssertSameGPU(THCTensor_(checkGPU)(state, 1, output, input));
-  THCAssertSameGPU(THCudaLongTensor_checkGPU(state, 2, index));
-  THArgCheck(THCTensor_(nDimension)(state, output) <= MAX_DIMS, 1, "Tensor too large or too many dimensions");
-}
-
 void scatter_(mul)(THCState *state, int dim, THCTensor *output, THCudaLongTensor *index, THCTensor *input) {
-  check(state, output, index, input);
-
-  const ptrdiff_t n = THCudaLongTensor_nElement(state, index);
-  const dim3 block = dim3(NUM_THREADS);
+  thc_(check)(state, output, index, input);
  printf("mul");
 }

 void scatter_(div)(THCState *state, int dim, THCTensor *output, THCudaLongTensor *index, THCTensor *input) {
-  check(state, output, index, input);
+  thc_(check)(state, output, index, input);
  printf("div");
 }

 void scatter_(mean)(THCState *state, int dim, THCTensor *output, THCudaLongTensor *index, THCTensor *input, THCTensor *num_output) {
-  check(state, output, index, input);
+  thc_(check)(state, output, index, input);
  printf("mean");
 }

 void scatter_(max)(THCState *state, int dim, THCTensor *output, THCudaLongTensor *index, THCTensor *input, THCudaLongTensor *arg_output) {
-  check(state, output, index, input);
-  printf("max");
+  thc_(check)(state, output, index, input);
+
+  const int n = THCudaLongTensor_nElement(state, index);
+  TensorInfo<real> outputInfo = thc_(getTensorInfo)(state, output);
+  TensorInfo<int64_t> indexInfo = thc_getTensorInfo_Long(state, index);
+  TensorInfo<real> inputInfo = thc_(getTensorInfo)(state, input);
+  TensorInfo<int64_t> argOutputInfo = thc_getTensorInfo_Long(state, arg_output);
+
+  maxKernel<real, -1><<<GET_BLOCKS(n), NUM_THREADS, 0, THCState_getCurrentStream(state)>>>(outputInfo, indexInfo, inputInfo, argOutputInfo, dim, n);
 }

 void scatter_(min)(THCState *state, int dim, THCTensor *output, THCudaLongTensor *index, THCTensor *input, THCudaLongTensor *arg_output) {
-  check(state, output, index, input);
+  thc_(check)(state, output, index, input);
  printf("min");
 }

 void index_backward(THCState *state, int dim, THCTensor *output, THCudaLongTensor *index, THCTensor *grad, THCudaLongTensor *arg_grad) {
-  check(state, output, index, grad);
+  thc_(check)(state, output, index, grad);
  printf("index_backward");
 }


--- a/torch_scatter/kernel/kernel.cu
+++ b/torch_scatter/kernel/kernel.cu
 #include <THC/THC.h>

 #include "kernel.h"
+#include "common.cuh"

 #define scatter_(NAME) TH_CONCAT_4(scatter_, NAME, _kernel_, Real)
 #define index_backward TH_CONCAT_2(index_backward_kernel_, Real)
 #define check TH_CONCAT_2(check_kernel_, Real)

-#define MAX_DIMS 25
-#define NUM_THREADS 32 * 16
+#define thc_(NAME) TH_CONCAT_4(thc_, NAME, _, Real)
+
+#include "generic/common.cu"
+#include "THCGenerateAllTypes.h"
+
+template <typename Real, int Dims>
+__global__ void maxKernel(TensorInfo<Real> output, TensorInfo<int64_t> index, TensorInfo<Real> input, TensorInfo<int64_t> arg_output, const int dim, const int n) {
+  KERNEL_LOOP(i, n) {
+
+  }
+}

 #include "generic/kernel.cu"
 #include "THCGenerateAllTypes.h"