smarter cumDegree

bd050f90 · rusty1s · 69f59517 · bd050f90 · bd050f90 · bd050f90
Commit bd050f90 authored Apr 06, 2018 by rusty1s
5 changed files
--- a/aten/THC/THCColor.cuh
+++ b/aten/THC/THCColor.cuh
@@ -3,30 +3,30 @@

 #include "common.cuh"

-#include <curand.h>
-#include <curand_kernel.h>
-
 #define BLUE_PROBABILITY 0.53406

-__global__ void colorKernel(int64_t *self, curandStateMtgp32 *state, uint8_t *done,
-                                  ptrdiff_t nNodes) {
+__global__ void colorKernel(int64_t *self, int64_t *bernoulli, uint8_t *done, ptrdiff_t nNodes) {
  KERNEL_LOOP(i, nNodes) {
    if (self[i] < 0) {
-      self[i] = (curand_uniform(&state[0]) < BLUE_PROBABILITY) - 2;  // blue = -1, red = -2
+      self[i] = bernoulli[i] - 2;
      *done = 0;
    }
  }
 }

 int THCTensor_color(THCState *state, THCudaLongTensor *self) {
+  ptrdiff_t nNodes = THCudaLongTensor_nElement(state, self);
+  THCudaLongTensor *bernoulli = THCudaLongTensor_newWithSize1d(state, nNodes);
+  THCudaLongTensor_bernoulli(state, bernoulli, BLUE_PROBABILITY);
+
+  int64_t *selfData = THCudaLongTensor_data(state, self);
+  int64_t *bernoulliData = THCudaLongTensor_data(state, bernoulli);
+
  uint8_t* d_done;
  cudaMalloc(&d_done, sizeof(uint8_t));
  cudaMemset(d_done, 1, sizeof(uint8_t));

-  ptrdiff_t nNodes = THCudaLongTensor_nElement(state, self);
-  int64_t *selfData = THCudaLongTensor_data(state, self);
-
-  KERNEL_RUN(colorKernel, nNodes, selfData, THCRandom_generatorStates(state), d_done);
+  KERNEL_RUN(colorKernel, nNodes, selfData, bernoulliData, d_done);

  uint8_t done;
  cudaMemcpy(&done, d_done, sizeof(uint8_t), cudaMemcpyDeviceToHost);

--- a/aten/THC/THCDegree.cuh
+++ b/aten/THC/THCDegree.cuh
 #ifndef THC_DEGREE_INC
 #define THC_DEGREE_INC

+#include "common.cuh"
 #include "THCNumerics.cuh"

+template<typename T>
+__global__ void cumDegreeKernel(T *self, int64_t *index, ptrdiff_t nEdges) {
+  KERNEL_LOOP(i, nEdges) {
+    int64_t r = index[i];
+    if (r != index[i+1]) { self[r] = ScalarConvert<int, T>::to(i + 1); }
+  }
+}
+
 #include "generic/THCDegree.cuh"
 #include "THC/THCGenerateAllTypes.h"


--- a/aten/THC/THCGraclus.cu
+++ b/aten/THC/THCGraclus.cu
@@ -20,12 +20,12 @@ void THCTensor_graclus(THCState *state, THCudaLongTensor *self, THCudaLongTensor
  THCudaLongTensor_degree(state, degree, row);

  THCudaLongTensor *cumDegree = THCudaLongTensor_newWithSize1d(state, nNodes);
-  THCudaLongTensor_cumsum(state, cumDegree, degree, 0);
+  THCudaLongTensor_cumDegree(state, cumDegree, row);

  while(!THCTensor_color(state, self)) {
    THCTensor_propose(state, self, prop, row, col, degree, cumDegree);
    THCTensor_response(state, self, prop, row, col, degree, cumDegree);
-  };
+  }

  THCudaLongTensor_free(state, prop);
  THCudaLongTensor_free(state, degree);

--- a/aten/THC/generic/THCDegree.cuh
+++ b/aten/THC/generic/THCDegree.cuh
@@ -14,4 +14,13 @@ void THCTensor_(degree)(THCState *state, THCTensor *self, THCudaLongTensor *inde
  THCTensor_(free)(state, one);
 }

+void THCTensor_(cumDegree)(THCState *state, THCTensor *self, THCudaLongTensor *index) {
+  ptrdiff_t nEdges = THCudaLongTensor_nElement(state, index);
+
+  real *selfData = THCTensor_(data)(state, self);
+  int64_t *indexData = THCudaLongTensor_data(state, index);
+
+  KERNEL_RUN(cumDegreeKernel, nEdges - 1, selfData, indexData);
+}
+
 #endif  // THC_GENERIC_FILE
--- a/torch_cluster/graclus.py
+++ b/torch_cluster/graclus.py
@@ -5,10 +5,10 @@ from .utils.ffi import graclus
 def graclus_cluster(row, col, weight=None, num_nodes=None):
    num_nodes = row.max() + 1 if num_nodes is None else num_nodes

-    row, col = randperm(row, col)
    if row.is_cuda:
        row, col = sort_row(row, col)
    else:
+        row, col = randperm(row, col)
        row, col = randperm_sort_row(row, col, num_nodes)

    cluster = row.new(num_nodes)