clean up

9a1f7817 · rusty1s · e6638edd · 9a1f7817 · 9a1f7817 · e6638edd
Commit 9a1f7817 authored Apr 07, 2018 by rusty1s
20 changed files
--- a/test/tensor.py
+++ b/test/tensor.py
@@ -3,4 +3,4 @@ cpu_tensors = [
    'FloatTensor', 'DoubleTensor'
 ]
-cuda_tensors = ['cuda.{}'.format(t) for t in cpu_tensors + ['HalfTensor']]
+gpu_tensors = ['cuda.{}'.format(t) for t in cpu_tensors + ['HalfTensor']]
--- a/test/test_graclus.py
+++ b/test/test_graclus.py
@@ -5,7 +5,7 @@ import torch
 import numpy as np
 from torch_cluster import graclus_cluster
-from .tensor import cpu_tensors
+from .tensor import cpu_tensors, gpu_tensors
 tests = [{
    'row': [0, 0, 1, 1, 1, 2, 2, 2, 3, 3],
@@ -20,6 +20,7 @@ tests = [{
 def assert_correct_graclus(row, col, cluster):
    row, col, cluster = row.numpy(), col.numpy(), cluster.numpy()
+    n_nodes = cluster.shape[0]
    # Every node was assigned a cluster.
    assert cluster.min() >= 0
@@ -28,6 +29,9 @@ def assert_correct_graclus(row, col, cluster):
    _, count = np.unique(cluster, return_counts=True)
    assert (count > 2).max() == 0
+    # Cluster value is minimal.
+    assert (cluster <= np.arange(n_nodes, dtype=row.dtype)).sum() == n_nodes
    # Corresponding clusters must be adjacent.
    for n in range(cluster.shape[0]):
        x = cluster[col[row == n]] == cluster[n]  # Neighbors with same cluster
@@ -50,9 +54,16 @@ def test_graclus_cluster_cpu(tensor, i):
    assert_correct_graclus(row, col, cluster)
-def test_graclus_cluster_gpu():
+@pytest.mark.skipif(not torch.cuda.is_available(), reason='no CUDA')
-    row = torch.cuda.LongTensor([0, 0, 1, 1, 1, 2, 2, 2, 3, 3])
+@pytest.mark.parametrize('tensor,i', product(gpu_tensors, range(len(tests))))
-    col = torch.cuda.LongTensor([1, 2, 0, 2, 3, 0, 1, 3, 1, 2])
+def test_graclus_cluster_gpu(tensor, i):
+    data = tests[i]
+    row = torch.cuda.LongTensor(data['row'])
+    col = torch.cuda.LongTensor(data['col'])
-    cluster = graclus_cluster(row, col)
+    weight = data['weight']
-    print(cluster.cpu().tolist())
+    weight = weight if weight is None else getattr(torch.cuda, tensor)(weight)
+    cluster = graclus_cluster(row, col, weight)
+    assert_correct_graclus(row, col, cluster)
--- a/test/utils/test_perm.py
+++ b/test/utils/test_perm.py
--- a/torch_cluster/kernel/THCIndex.cuh
+++ b/torch_cluster/kernel/THCIndex.cuh
-template <typename a, int Dims>
-struct IndexToOffset {
-  static __device__ void compute(int i, const TensorInfo<a>& t1, int* t1Offset) {
-    int curDimIndex;
-    for (int d = Dims - 2; d >= 0; d--) {
-      curDimIndex = i % t1.size[d];
-      *t1Offset += curDimIndex * t1.stride[d];
-      i /= t1.size[d];
-    }
-  }
-};
-template <typename a>
-struct IndexToOffset<a, -1> {
-  static __device__ void compute(int i, const TensorInfo<a>& t1, int* t1Offset) {
-    int curDimIndex;
-    for (int d = t1.dims - 2; d >= 0; d--) {
-      curDimIndex = i % t1.size[d];
-      *t1Offset += curDimIndex * t1.stride[d];
-      i /= t1.size[d];
-    }
-  }
-};
--- a/torch_cluster/kernel/common.cuh
+++ b/torch_cluster/kernel/common.cuh
-const int MAX_DIMS = 25;
-const int NUM_THREADS = 1024;
-inline int GET_BLOCKS(const int n) {
-  return (n + NUM_THREADS - 1) / NUM_THREADS;
-}
-template<typename T>
-struct TensorInfo {
-  TensorInfo(T *t, int d, int sz[MAX_DIMS], int st[MAX_DIMS]) {
-    data = t; dims = d;
-    for (int i = 0; i < dims; i++) {
-      size[i] = sz[i];
-      stride[i] = st[i];
-    }
-  }
-  T *data;
-  int dims;
-  int size[MAX_DIMS];
-  int stride[MAX_DIMS];
-};
-#define KERNEL_LOOP(I, N) \
-  for (int I = blockIdx.x * blockDim.x + threadIdx.x; I < N; i += blockDim.x * gridDim.x)
--- a/torch_cluster/kernel/generic/common.cu
+++ b/torch_cluster/kernel/generic/common.cu
-#ifndef THC_GENERIC_FILE
-#define THC_GENERIC_FILE "generic/common.cu"
-#else
-TensorInfo<real> thc_(getTensorInfo)(THCState *state, THCTensor *tensor) {
-  real *data = THCTensor_(data)(state, tensor);
-  int dims = THCTensor_(nDimension)(state, tensor);
-  int size[MAX_DIMS]; int stride[MAX_DIMS];
-  for (int i = 0; i < dims; i++) {
-    size[i] = THCTensor_(size)(state, tensor, i);
-    stride[i] = THCTensor_(stride)(state, tensor, i);
-  }
-  return TensorInfo<real>(data, dims, size, stride);
-}
-#endif
--- a/torch_cluster/kernel/generic/grid.cu
+++ b/torch_cluster/kernel/generic/grid.cu
-#ifndef THC_GENERIC_FILE
-#define THC_GENERIC_FILE "generic/grid.cu"
-#else
-void cluster_(grid)(THCState *state, int C, THCudaLongTensor *output, THCTensor *position, THCTensor *size, THCudaLongTensor *count) {
-  THCAssertSameGPU(THCTensor_(checkGPU)(state, 2, position, size));
-  THCAssertSameGPU(THCudaLongTensor_checkGPU(state, 2, output, count));
-  THArgCheck(THCudaLongTensor_nDimension(state, output) <= MAX_DIMS, 1, "Tensor too large or too many dimensions");
-  int64_t *outputData = THCudaLongTensor_data(state, output);
-  TensorInfo<real> positionInfo = thc_(getTensorInfo)(state, position);
-  real *sizeData = THCTensor_(data)(state, size);
-  int64_t *countData = THCudaLongTensor_data(state, count);
-  const int N = THCudaLongTensor_nElement(state, output);
-  int grid = GET_BLOCKS(N);
-  cudaStream_t stream = THCState_getCurrentStream(state);
-  switch (positionInfo.dims) {
-    case  1: gridKernel<real,  1><<<grid, NUM_THREADS, 0, stream>>>(outputData, positionInfo, sizeData, countData, C, N); break;
-    case  2: gridKernel<real,  2><<<grid, NUM_THREADS, 0, stream>>>(outputData, positionInfo, sizeData, countData, C, N); break;
-    case  3: gridKernel<real,  3><<<grid, NUM_THREADS, 0, stream>>>(outputData, positionInfo, sizeData, countData, C, N); break;
-    case  4: gridKernel<real,  4><<<grid, NUM_THREADS, 0, stream>>>(outputData, positionInfo, sizeData, countData, C, N); break;
-    default: gridKernel<real, -1><<<grid, NUM_THREADS, 0, stream>>>(outputData, positionInfo, sizeData, countData, C, N); break;
-  }
-  THCudaCheck(cudaGetLastError());
-}
-#endif
--- a/torch_cluster/kernel/generic/serial.cu
+++ b/torch_cluster/kernel/generic/serial.cu
-#ifndef THC_GENERIC_FILE
-#define THC_GENERIC_FILE "generic/serial.cu"
-#else
-void cluster_(serial)(THCState *state, THCudaLongTensor *output, THCudaLongTensor *row, THCudaLongTensor *col, THCudaLongTensor *degree, THCTensor *weight) {
-}
-#endif
--- a/torch_cluster/kernel/grid.cu
+++ b/torch_cluster/kernel/grid.cu
-#include <THC.h>
-#include "grid.h"
-#include "common.cuh"
-#include "THCIndex.cuh"
-#define cluster_(NAME) TH_CONCAT_4(cluster_, NAME, _kernel_, Real)
-#define thc_(NAME) TH_CONCAT_4(thc_, NAME, _, Real)
-#include "generic/common.cu"
-#include "THCGenerateAllTypes.h"
-template<typename Real, int Dims>
-__global__ void gridKernel(int64_t *output, TensorInfo<Real> position, Real *size, int64_t *count, const int C, const int N) {
-  KERNEL_LOOP(i, N) {
-    int positionOffset = 0; int tmp = C; int64_t c = 0;
-    IndexToOffset<Real, Dims>::compute(i, position, &positionOffset);
-    for (int d = 0; d < position.size[position.dims - 1]; d++) {
-      tmp = tmp / count[d];
-      c += tmp * (int64_t) (position.data[positionOffset + d] / size[d]);
-    }
-    output[i] = c;
-  }
-}
-#include "generic/grid.cu"
-#include "THCGenerateFloatType.h"
-#include "generic/grid.cu"
-#include "THCGenerateDoubleType.h"
-#include "generic/grid.cu"
-#include "THCGenerateByteType.h"
-#include "generic/grid.cu"
-#include "THCGenerateCharType.h"
-#include "generic/grid.cu"
-#include "THCGenerateShortType.h"
-#include "generic/grid.cu"
-#include "THCGenerateIntType.h"
-#include "generic/grid.cu"
-#include "THCGenerateLongType.h"
--- a/torch_cluster/kernel/grid.h
+++ b/torch_cluster/kernel/grid.h
-#ifdef __cplusplus
-extern "C" {
-#endif
-void cluster_grid_kernel_Float (THCState *state, int C, THCudaLongTensor *output, THCudaTensor       *position, THCudaTensor       *size, THCudaLongTensor *count);
-void cluster_grid_kernel_Double(THCState *state, int C, THCudaLongTensor *output, THCudaDoubleTensor *position, THCudaDoubleTensor *size, THCudaLongTensor *count);
-void cluster_grid_kernel_Byte  (THCState *state, int C, THCudaLongTensor *output, THCudaByteTensor   *position, THCudaByteTensor   *size, THCudaLongTensor *count);
-void cluster_grid_kernel_Char  (THCState *state, int C, THCudaLongTensor *output, THCudaCharTensor   *position, THCudaCharTensor   *size, THCudaLongTensor *count);
-void cluster_grid_kernel_Short (THCState *state, int C, THCudaLongTensor *output, THCudaShortTensor  *position, THCudaShortTensor  *size, THCudaLongTensor *count);
-void cluster_grid_kernel_Int   (THCState *state, int C, THCudaLongTensor *output, THCudaIntTensor    *position, THCudaIntTensor    *size, THCudaLongTensor *count);
-void cluster_grid_kernel_Long  (THCState *state, int C, THCudaLongTensor *output, THCudaLongTensor   *position, THCudaLongTensor   *size, THCudaLongTensor *count);
-#ifdef __cplusplus
-}
-#endif
--- a/torch_cluster/kernel/serial.cu
+++ b/torch_cluster/kernel/serial.cu
-#include <THC.h>
-#include "THCTensorRandom.h"
-#include "serial.h"
-#include <curand.h>
-#include <curand_kernel.h>
-#include "common.cuh"
-#define cluster_(NAME) TH_CONCAT_4(cluster_, NAME, _kernel_, Real)
-#define thc_(NAME) TH_CONCAT_4(thc_, NAME, _, Real)
-__global__ void assignColorKernel(curandStateMtgp32 *state, int64_t *color, const int n, uint8_t *done) {
-  KERNEL_LOOP(i, n) {
-    if (color[i] < 0) {
-      color[i] = 0; //(int64_t) (curand_uniform(&state[blockIdx.x]) <= 0.53406) - 2;
-      *done = 0;
-    }
-  }
-}
-int assignColor(THCState *state, THCudaLongTensor *color) {
-  curandStateMtgp32 *gen_states = THCRandom_generatorStates(state);
-  int64_t *colorVec = THCudaLongTensor_data(state, color);
-  const int n = THCudaLongTensor_nElement(state, color);
-  uint8_t done; uint8_t* d_done; cudaMalloc(&d_done, sizeof(uint8_t)); cudaMemset(d_done, 1, sizeof(uint8_t)); // *(done) = (int) 1;
-  assignColorKernel<<<GET_BLOCKS(n), NUM_THREADS, 0, THCState_getCurrentStream(state)>>>(gen_states, colorVec, n, d_done);
-  cudaMemcpy(&done, d_done, sizeof(uint8_t), cudaMemcpyDeviceToHost); cudaFree(d_done);
-  return done;
-}
-/* GENERATE_KERNEL1(generate_bernoulli, double, double p, double, curand_uniform_double, x <= p) */
-/* #define GENERATE_KERNEL1(NAME, T, ARG1, CURAND_T, CURAND_FUNC, TRANSFORM)      \ */
-/* __global__ void NAME(curandStateMtgp32 *state, int size, T *result, ARG1)      \ */
-/* {                                                                              \ */
-/*   int idx = blockIdx.x * BLOCK_SIZE + threadIdx.x;                             \ */
-/*   int rounded_size = THCCeilDiv(size, BLOCK_SIZE) * BLOCK_SIZE;                \ */
-/*   for (int i = idx; i < rounded_size; i += BLOCK_SIZE * MAX_NUM_BLOCKS) {      \ */
-/*     CURAND_T x = CURAND_FUNC(&state[blockIdx.x]);                              \ */
-/*     if (i < size) {                                                            \ */
-/*       T y = TRANSFORM;                                                         \ */
-/*       result[i] = y;                                                           \ */
-/*     }                                                                          \ */
-/*   }                                                                            \ */
-/* } */
-void cluster_serial_kernel(THCState *state, THCudaLongTensor *output, THCudaLongTensor *row, THCudaLongTensor *col, THCudaLongTensor *degree) {
-}
-#include "generic/serial.cu"
-#include "THCGenerateFloatType.h"
-#include "generic/serial.cu"
-#include "THCGenerateDoubleType.h"
-#include "generic/serial.cu"
-#include "THCGenerateByteType.h"
-#include "generic/serial.cu"
-#include "THCGenerateCharType.h"
-#include "generic/serial.cu"
-#include "THCGenerateShortType.h"
-#include "generic/serial.cu"
-#include "THCGenerateIntType.h"
-#include "generic/serial.cu"
-#include "THCGenerateLongType.h"
--- a/torch_cluster/kernel/serial.h
+++ b/torch_cluster/kernel/serial.h
-#ifdef __cplusplus
-extern "C" {
-#endif
-void cluster_serial_kernel(THCState *state, THCudaLongTensor *output, THCudaLongTensor *row, THCudaLongTensor *col, THCudaLongTensor *degree);
-void cluster_serial_kernel_Float (THCState *state, THCudaLongTensor *output, THCudaLongTensor *row, THCudaLongTensor *col, THCudaLongTensor *degree,       THCudaTensor *weight);
-void cluster_serial_kernel_Double(THCState *state, THCudaLongTensor *output, THCudaLongTensor *row, THCudaLongTensor *col, THCudaLongTensor *degree, THCudaDoubleTensor *weight);
-void cluster_serial_kernel_Byte  (THCState *state, THCudaLongTensor *output, THCudaLongTensor *row, THCudaLongTensor *col, THCudaLongTensor *degree,   THCudaByteTensor *weight);
-void cluster_serial_kernel_Char  (THCState *state, THCudaLongTensor *output, THCudaLongTensor *row, THCudaLongTensor *col, THCudaLongTensor *degree,   THCudaCharTensor *weight);
-void cluster_serial_kernel_Short (THCState *state, THCudaLongTensor *output, THCudaLongTensor *row, THCudaLongTensor *col, THCudaLongTensor *degree,  THCudaShortTensor *weight);
-void cluster_serial_kernel_Int   (THCState *state, THCudaLongTensor *output, THCudaLongTensor *row, THCudaLongTensor *col, THCudaLongTensor *degree,    THCudaIntTensor *weight);
-void cluster_serial_kernel_Long  (THCState *state, THCudaLongTensor *output, THCudaLongTensor *row, THCudaLongTensor *col, THCudaLongTensor *degree,   THCudaLongTensor *weight);
-#ifdef __cplusplus
-}
-#endif
--- a/torch_cluster/src/generic/grid_cpu.c
+++ b/torch_cluster/src/generic/grid_cpu.c
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "generic/grid_cpu.c"
-#else
-void cluster_(grid)(int C, THLongTensor *output, THTensor *position, THTensor *size, THLongTensor *count) {
-  real *size_data = size->storage->data + size->storageOffset;
-  int64_t *count_data = count->storage->data + count->storageOffset;
-  int64_t D = THLongTensor_nElement(count), d, c, tmp;
-  TH_TENSOR_DIM_APPLY2(int64_t, output, real, position, THTensor_(nDimension)(position) - 1,
-    tmp = C; c = 0;
-    for (d = 0; d < D; d++) {
-      tmp = tmp / *(count_data + d);
-      c += tmp * (int64_t) (*(position_data + d * position_stride) / *(size_data + d));
-    }
-    output_data[0] = c;
-  )
-}
-#endif
--- a/torch_cluster/src/generic/grid_cuda.c
+++ b/torch_cluster/src/generic/grid_cuda.c
-#ifndef THC_GENERIC_FILE
-#define THC_GENERIC_FILE "generic/grid_cuda.c"
-#else
-void cluster_(grid)(int C, THCudaLongTensor *output, THCTensor *position, THCTensor *size, THCudaLongTensor *count) {
-  cluster_kernel_(grid)(state, C, output, position, size, count);
-}
-#endif
--- a/torch_cluster/src/generic/serial_cpu.c
+++ b/torch_cluster/src/generic/serial_cpu.c
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "generic/serial_cpu.c"
-#else
-void cluster_(serial)(THLongTensor *output, THLongTensor *row, THLongTensor *col, THLongTensor *degree, THTensor *weight) {
-  real *weight_data = weight->storage->data + weight->storageOffset;
-  real weight_value, w;
-  int64_t d, c;
-  SERIAL(output, row, col, degree,
-    weight_value = 0;
-    for (d = 0; d < degree_data[row_value]; d++) {  // Iterate over neighbors.
-      c = col_data[e + d];
-      w = weight_data[e + d];
-      if (output_data[c] < 0 && w >= weight_value) {
-        // Neighbor is unmatched and edge has a higher weight.
-        col_value = c;
-        weight_value = w;
-      }
-    }
-  )
-}
-#endif
--- a/torch_cluster/src/generic/serial_cuda.c
+++ b/torch_cluster/src/generic/serial_cuda.c
-#ifndef THC_GENERIC_FILE
-#define THC_GENERIC_FILE "generic/serial_cuda.c"
-#else
-void cluster_(serial)(THCudaLongTensor *output, THCudaLongTensor *row, THCudaLongTensor *col, THCudaLongTensor *degree, THCTensor *weight) {
-  int bla = assignColor(state, output);
-}
-#endif
--- a/torch_cluster/src/grid_cpu.c
+++ b/torch_cluster/src/grid_cpu.c
-#include <TH/TH.h>
-#define cluster_(NAME) TH_CONCAT_4(cluster_, NAME, _, Real)
-#include "generic/grid_cpu.c"
-#include "THGenerateAllTypes.h"
--- a/torch_cluster/src/grid_cpu.h
+++ b/torch_cluster/src/grid_cpu.h
-void cluster_grid_Float (int C, THLongTensor *output, THFloatTensor  *position,  THFloatTensor *size, THLongTensor *count);
-void cluster_grid_Double(int C, THLongTensor *output, THDoubleTensor *position, THDoubleTensor *size, THLongTensor *count);
-void cluster_grid_Byte  (int C, THLongTensor *output, THByteTensor   *position,   THByteTensor *size, THLongTensor *count);
-void cluster_grid_Char  (int C, THLongTensor *output, THCharTensor   *position,   THCharTensor *size, THLongTensor *count);
-void cluster_grid_Short (int C, THLongTensor *output, THShortTensor  *position,  THShortTensor *size, THLongTensor *count);
-void cluster_grid_Int   (int C, THLongTensor *output, THIntTensor    *position,    THIntTensor *size, THLongTensor *count);
-void cluster_grid_Long  (int C, THLongTensor *output, THLongTensor   *position,   THLongTensor *size, THLongTensor *count);
--- a/torch_cluster/src/grid_cuda.c
+++ b/torch_cluster/src/grid_cuda.c
-#include <THC/THC.h>
-#include "grid.h"
-#define cluster_(NAME) TH_CONCAT_4(cluster_, NAME, _cuda_, Real)
-#define cluster_kernel_(NAME) TH_CONCAT_4(cluster_, NAME, _kernel_, Real)
-extern THCState *state;
-#include "generic/grid_cuda.c"
-#include "THCGenerateFloatType.h"
-#include "generic/grid_cuda.c"
-#include "THCGenerateDoubleType.h"
-#include "generic/grid_cuda.c"
-#include "THCGenerateByteType.h"
-#include "generic/grid_cuda.c"
-#include "THCGenerateCharType.h"
-#include "generic/grid_cuda.c"
-#include "THCGenerateShortType.h"
-#include "generic/grid_cuda.c"
-#include "THCGenerateIntType.h"
-#include "generic/grid_cuda.c"
-#include "THCGenerateLongType.h"
--- a/torch_cluster/src/grid_cuda.h
+++ b/torch_cluster/src/grid_cuda.h
-void cluster_grid_cuda_Float (int C, THCudaLongTensor *output, THCudaTensor       *position, THCudaTensor       *size, THCudaLongTensor *count);
-void cluster_grid_cuda_Double(int C, THCudaLongTensor *output, THCudaDoubleTensor *position, THCudaDoubleTensor *size, THCudaLongTensor *count);
-void cluster_grid_cuda_Byte  (int C, THCudaLongTensor *output, THCudaByteTensor   *position, THCudaByteTensor   *size, THCudaLongTensor *count);
-void cluster_grid_cuda_Char  (int C, THCudaLongTensor *output, THCudaCharTensor   *position, THCudaCharTensor   *size, THCudaLongTensor *count);
-void cluster_grid_cuda_Short (int C, THCudaLongTensor *output, THCudaShortTensor  *position, THCudaShortTensor  *size, THCudaLongTensor *count);
-void cluster_grid_cuda_Int   (int C, THCudaLongTensor *output, THCudaIntTensor    *position, THCudaIntTensor    *size, THCudaLongTensor *count);
-void cluster_grid_cuda_Long  (int C, THCudaLongTensor *output, THCudaLongTensor   *position, THCudaLongTensor   *size, THCudaLongTensor *count);