rename and bugfixes

8c15ed64 · rusty1s · bebd72f3 · 8c15ed64 · 8c15ed64 · bebd72f3
Commit 8c15ed64 authored Apr 06, 2018 by rusty1s
20 changed files
--- a/aten/THC/THC.cu
+++ b/aten/THC/THC.cu
-#include "THCGreedy.cu"
+#include "THCGraclus.cu"
 #include "THCGrid.cu"
--- a/aten/THC/THC.h
+++ b/aten/THC/THC.h
 #ifndef THC_INC
 #define THC_INC

-#include "THCGreedy.h"
+#include "THCGraclus.h"
 #include "THCGrid.h"

 #endif
--- a/aten/THC/THCColor.cu
+++ b/aten/THC/THCColor.cu
-#include <curand.h>
-#include <curand_kernel.h>
-
-#include "common.cuh"
-
-__global__ void assignColorKernel(int64_t *color, curandStateMtgp32 *state, uint8_t *done,
-                                  ptrdiff_t nNodes) {
-  KERNEL_LOOP(i, nNodes) {
-    if (color[i] < 0) {
-      color[i] = (curand_uniform(&state[0]) < 0.53406) - 2;  // blue = -1, red = -2
-      *done = 0;
-    }
-  }
-}
-
-int THCGreedy_assignColor(THCState *state, THCudaLongTensor *color) {
-  int64_t *colorData = THCudaLongTensor_data(state, color);
-  ptrdiff_t nNodes = THCudaLongTensor_nElement(state, color);
-  uint8_t* d_done; cudaMalloc(&d_done, sizeof(uint8_t)); cudaMemset(d_done, 1, sizeof(uint8_t));
-  KERNEL_RUN(assignColorKernel, nNodes, colorData, THCRandom_generatorStates(state), d_done);
-  uint8_t done; cudaMemcpy(&done, d_done, sizeof(uint8_t), cudaMemcpyDeviceToHost);
-  cudaFree(d_done);
-  return done;
-}
--- a/aten/THC/THCColor.cuh
+++ b/aten/THC/THCColor.cuh
+#ifndef THC_COLOR_INC
+#define THC_COLOR_INC
+
+#include "common.cuh"
+
+#include <curand.h>
+#include <curand_kernel.h>
+
+#define BLUE_PROBABILITY 0.53406
+
+__global__ void colorKernel(int64_t *self, curandStateMtgp32 *state, uint8_t *done,
+                                  ptrdiff_t nNodes) {
+  KERNEL_LOOP(i, nNodes) {
+    if (self[i] < 0) {
+      self[i] = (curand_uniform(&state[0]) < BLUE_PROBABILITY) - 2;  // blue = -1, red = -2
+      *done = 0;
+    }
+  }
+}
+
+int THCTensor_color(THCState *state, THCudaLongTensor *self) {
+  uint8_t* d_done;
+  cudaMalloc(&d_done, sizeof(uint8_t));
+  cudaMemset(d_done, 1, sizeof(uint8_t));
+
+  ptrdiff_t nNodes = THCudaLongTensor_nElement(state, self);
+  int64_t *selfData = THCudaLongTensor_data(state, self);
+
+  KERNEL_RUN(colorKernel, nNodes, selfData, THCRandom_generatorStates(state), d_done);
+
+  uint8_t done;
+  cudaMemcpy(&done, d_done, sizeof(uint8_t), cudaMemcpyDeviceToHost);
+  cudaFree(d_done);
+
+  return done;
+}
+
+#endif  // THC_COLOR_INC
--- a/aten/THC/THCDegree.cu
+++ b/aten/THC/THCDegree.cu
-#define THCTensor_(NAME) TH_CONCAT_4(TH,CReal,Tensor_,NAME)
-
-#include "generic/THCDegree.cu"
-#include "THC/THCGenerateAllTypes.h"
--- a/aten/THC/THCDegree.cuh
+++ b/aten/THC/THCDegree.cuh
+#ifndef THC_DEGREE_INC
+#define THC_DEGREE_INC
+
+#include "THCNumerics.cuh"
+
+#include "generic/THCDegree.cuh"
+#include "THC/THCGenerateAllTypes.h"
+
+#endif  // THC_DEGREE_INC
--- a/aten/THC/THCGreedy.cu
+++ b/aten/THC/THCGreedy.cu
-#include "THCGreedy.h"
+#include "THCGraclus.h"

 #include "common.cuh"
-#include "THCDegree.cu"
-#include "THCColor.cu"
-#include "THCPropose.cu"
-#include "THCResponse.cu"
+#include "THCDegree.cuh"
+#include "THCColor.cuh"
+#include "THCPropose.cuh"
+#include "THCResponse.cuh"

-void THCGreedy(THCState *state, THCudaLongTensor *cluster, THCudaLongTensor *row,
-               THCudaLongTensor *col) {
-  THCAssertSameGPU(THCudaLongTensor_checkGPU(state, 3, cluster, row, col));
+void THCTensor_graclus(THCState *state, THCudaLongTensor *self, THCudaLongTensor *row,
+                       THCudaLongTensor *col) {
+  THCAssertSameGPU(THCudaLongTensor_checkGPU(state, 3, self, row, col));

-  int nNodes = THCudaLongTensor_nElement(state, cluster);
+  int nNodes = THCudaLongTensor_nElement(state, self);
+  THCudaLongTensor_fill(state, self, -1);

-  THCudaLongTensor_fill(state, cluster, -1);
-  THCudaLongTensor *prop = THCudaLongTensor_newClone(state, cluster);
+  THCudaLongTensor *prop = THCudaLongTensor_newWithSize1d(state, nNodes);
+  THCudaLongTensor_fill(state, prop, -1);

  THCudaLongTensor *degree = THCudaLongTensor_newWithSize1d(state, nNodes);
  THCudaLongTensor_degree(state, degree, row);
@@ -21,15 +22,19 @@ void THCGreedy(THCState *state, THCudaLongTensor *cluster, THCudaLongTensor *row
  THCudaLongTensor *cumDegree = THCudaLongTensor_newWithSize1d(state, nNodes);
  THCudaLongTensor_cumsum(state, cumDegree, degree, 0);

-  while(!THCGreedy_assignColor(state, cluster)) {
-    THCGreedy_propose(state, cluster, prop, row, col, degree, cumDegree);
-    THCGreedy_response(state, cluster, prop, row, col, degree, cumDegree);
-  };
+  THCTensor_color(state, self);
+  THCTensor_propose(state, self, prop, row, col, degree, cumDegree);
+  THCTensor_response(state, self, prop, row, col, degree, cumDegree);
+
+  /* while(!THCTensor_assignColor(state, self)) { */
+  /*   THCTensor_propose(state, self, prop, row, col, degree, cumDegree); */
+  /*   THCTensor_response(state, self, prop, row, col, degree, cumDegree); */
+  /* }; */

  THCudaLongTensor_free(state, prop);
  THCudaLongTensor_free(state, degree);
  THCudaLongTensor_free(state, cumDegree);
 }

-#include "generic/THCGreedy.cu"
+#include "generic/THCGraclus.cu"
 #include "THC/THCGenerateAllTypes.h"
--- a/aten/THC/THCGreedy.h
+++ b/aten/THC/THCGreedy.h
-#ifndef THC_GREEDY_INC
-#define THC_GREEDY_INC
+#ifndef THC_GRACLUS_INC
+#define THC_GRACLUS_INC

 #include <THC/THC.h>

-#define THCGreedy_ TH_CONCAT_3(TH,CReal,Greedy)
-
 #ifdef __cplusplus
 extern "C" {
 #endif  // __cplusplus

-void THCGreedy(THCState *state, THCudaLongTensor *cluster, THCudaLongTensor *row,
-               THCudaLongTensor *col);
+void THCTensor_graclus(THCState *state, THCudaLongTensor *self, THCudaLongTensor *row,
+                       THCudaLongTensor *col);

-#include "generic/THCGreedy.h"
+#include "generic/THCGraclus.h"
 #include "THC/THCGenerateAllTypes.h"

 #ifdef __cplusplus
 }
 #endif  // __cplusplus

-#endif  // THC_GREEDY_INC
+#endif  // THC_GRACLUS_INC
--- a/aten/THC/THCGrid.cu
+++ b/aten/THC/THCGrid.cu
@@ -4,7 +4,7 @@
 #include "THCNumerics.cuh"

 template<typename T>
-__global__ void gridKernel(int64_t *cluster, TensorInfo<T> posInfo, T *size,
+__global__ void gridKernel(int64_t *self, TensorInfo<T> posInfo, T *size,
                           int64_t *count, ptrdiff_t nNodes) {
  KERNEL_LOOP(i, nNodes) {
    T *pos = posInfo.data + i * posInfo.stride[0];
@@ -13,7 +13,7 @@ __global__ void gridKernel(int64_t *cluster, TensorInfo<T> posInfo, T *size,
      value += coef * THCNumerics<T>::floor(THCNumerics<T>::div(pos[d], size[d]));
      coef *= count[d];
    }
-    cluster[i] = value;
+    self[i] = value;
  }
 }


--- a/aten/THC/THCGrid.h
+++ b/aten/THC/THCGrid.h
@@ -3,8 +3,6 @@

 #include <THC/THC.h>

-#define THCGrid_ TH_CONCAT_3(TH,CReal,Grid)
-
 #ifdef __cplusplus
 extern "C" {
 #endif  // __cplusplus

--- a/aten/THC/THCNumerics.cuh
+++ b/aten/THC/THCNumerics.cuh
 #ifndef THC_NUMERICS_INC
 #define THC_NUMERICS_INC

-#include "THC/THCHalf.h"
-
-template<typename T>
-struct THCNumerics {
-  static inline __host__ __device__ T div(T a, T b) { return a / b; }
-  static inline __host__ __device__ int floor(T a) { return a; }
-};
+#include <THC/THCHalf.h>

 #ifdef CUDA_HALF_TENSOR
 #ifdef __CUDA_ARCH__
@@ -16,7 +10,16 @@ struct THCNumerics {
 #else  // CUDA_ARCH__
 #define h2f(A) THC_half2float(A)
 #define f2h(A) THC_float2half(A)
-#endif
+#endif  // CUDA_ARCH__
+#endif  // CUDA_HALF_TENSOR
+
+template<typename T>
+struct THCNumerics {
+  static inline __host__ __device__ T div(T a, T b) { return a / b; }
+  static inline __host__ __device__ int floor(T a) { return a; }
+};
+
+#ifdef CUDA_HALF_TENSOR
 template<>
 struct THCNumerics<half> {
  static inline __host__ __device__ half div(half a, half b) { return f2h(h2f(a) / h2f(b)); }
@@ -24,4 +27,21 @@ struct THCNumerics<half> {
 };
 #endif  // CUDA_HALF_TENSOR

+template <typename In, typename Out>
+struct ScalarConvert {
+  static __host__ __device__ Out to(const In v) { return (Out) v; }
+};
+
+#ifdef CUDA_HALF_TENSOR
+template <typename Out>
+struct ScalarConvert<half, Out> {
+  static __host__ __device__ Out to(const half v) { return (Out) h2f(v); }
+};
+
+template <typename In>
+struct ScalarConvert<In, half> {
+  static __host__ __device__ half to(const In v) { return f2h((float) v); }
+};
+#endif  // CUDA_HALF_TENSOR
+
 #endif  // THC_NUMERICS_INC
--- a/aten/THC/THCPropose.cu
+++ b/aten/THC/THCPropose.cu
+#ifndef THC_PROPOSE_INC
+#define THC_PROPOSE_INC
+
 #include "common.cuh"

 __global__ void proposeKernel(int64_t *color, int64_t *prop, int64_t *row, int64_t *col,
                              int64_t *degree, int64_t *cumDegree, ptrdiff_t nNodes) {
  KERNEL_LOOP(i, nNodes) {
-    if (color[i] != -1) continue;  // Only visit blue nodes.
-    ptrdiff_t c;
+    if (color[i] != -1) { continue; }  // Only visit blue nodes.
+    ptrdiff_t c; bool isDead = true;
    for (ptrdiff_t e = cumDegree[i] - degree[i]; e < cumDegree[i]; e++) {
      c = col[e];
-      if (color[c] == -2) {  // Red neighbor found.
-        prop[i] = c;  // Propose neighbor.
-        break;
-      }
+      if (isDead && color[c] < 0) { isDead = false; }  // Unmatched neighbor found.
+      if (color[c] == -2) { prop[i] = c; break; }  // Propose to first red neighbor.
    }
-    if (prop[i] < 0) color[i] = i;  // Mark node as dead.
+    if (isDead) { color[i] = i; }  // Mark node as dead.
  }
 }

-void THCGreedy_propose(THCState *state, THCudaLongTensor *color, THCudaLongTensor *prop,
+void THCTensor_propose(THCState *state, THCudaLongTensor *color, THCudaLongTensor *prop,
                       THCudaLongTensor *row, THCudaLongTensor *col, THCudaLongTensor *degree,
                       THCudaLongTensor *cumDegree) {
  KERNEL_RUN(proposeKernel, THCudaLongTensor_nElement(state, color),
             THCudaLongTensor_data(state, color), THCudaLongTensor_data(state, prop),
             THCudaLongTensor_data(state, row), THCudaLongTensor_data(state, col),
-             THCudaLongTensor_data(state, degree), THCudaLongTensor_data(state, cumDegree))
+             THCudaLongTensor_data(state, degree), THCudaLongTensor_data(state, cumDegree));
 }
+
+#endif  // THC_PROPOSE_INC
--- a/aten/THC/THCResponse.cu
+++ b/aten/THC/THCResponse.cu
+#ifndef THC_RESPONSE_INC
+#define THC_RESPONSE_INC
+
 #include "common.cuh"

+    /* if (color[i] != -1) { continue; }  // Only visit blue nodes. */
+    /* ptrdiff_t c; bool isDead = true; */
+    /* for (ptrdiff_t e = cumDegree[i] - degree[i]; e < cumDegree[i]; e++) { */
+    /*   c = col[e]; */
+    /*   if (isDead && color[c] < 0) { isDead = false; }  // Unmatched neighbor found. */
+    /*   if (color[c] == -2) { prop[i] = c; break; }  // Propose to first red neighbor. */
+    /* } */
+    /* if (isDead) { color[i] = i; }  // Mark node as dead. */
+
 __global__ void responseKernel(int64_t *color, int64_t *prop, int64_t *row, int64_t *col,
                               int64_t *degree, int64_t *cumDegree, ptrdiff_t nNodes) {
  KERNEL_LOOP(i, nNodes) {
-    if (color[i] != -2) continue; // Only visit red nodes.
-    ptrdiff_t c; int64_t neighborColor, minValue;
-    bool isDead = true;
+    if (color[i] != -2) { continue; }  // Only visit red nodes.
+    /* ptrdiff_t c;  // int64_t neighborColor, minValue; */
+    /* bool isDead = true; */
    for (ptrdiff_t e = cumDegree[i] - degree[i]; e < cumDegree[i]; e++) {
-      c = col[e];
-      neighborColor = color[c];
-      if (neighborColor == -1 && prop[c] == i) {  // Blue neighbor found which proposed to node i.
-        minValue = min(i, c);
-        color[i] = minValue;
-        color[c] = minValue;
-        break;
-      }
-      if (neighborColor < 0) isDead = false;
+    /*   c = col[e]; */
+    /*   neighborColor = color[c]; */
+    /*   if (neighborColor == -1 && prop[c] == i) {  // Blue neighbor found which proposed to node i. */
+    /*     minValue = min(i, c); */
+    /*     color[i] = minValue; */
+    /*     color[c] = minValue; */
+    /*     break; */
+    /*   } */
+    /*   if (neighborColor < 0) isDead = false; */
    }
-    if (isDead && color[i] < 0) color[i] = i;  // Mark node as dead.
+    /* if (isDead && color[i] < 0) color[i] = i;  // Mark node as dead. */
  }
 }

-void THCGreedy_response(THCState *state, THCudaLongTensor *color, THCudaLongTensor *prop,
+void THCTensor_response(THCState *state, THCudaLongTensor *color, THCudaLongTensor *prop,
                        THCudaLongTensor *row, THCudaLongTensor *col, THCudaLongTensor *degree,
                        THCudaLongTensor *cumDegree) {
  KERNEL_RUN(responseKernel, THCudaLongTensor_nElement(state, color),
             THCudaLongTensor_data(state, color), THCudaLongTensor_data(state, prop),
             THCudaLongTensor_data(state, row), THCudaLongTensor_data(state, col),
-             THCudaLongTensor_data(state, degree), THCudaLongTensor_data(state, cumDegree))
+             THCudaLongTensor_data(state, degree), THCudaLongTensor_data(state, cumDegree));
 }
+
+#endif  // THC_RESPONSE_INC
--- a/aten/THC/common.cuh
+++ b/aten/THC/common.cuh
 #ifndef THC_COMMON_INC
 #define THC_COMMON_INC

-#define THCTensor_(NAME) TH_CONCAT_4(TH,CReal,Tensor_,NAME)
-
 #define KERNEL_LOOP(I, N) \
  for (ptrdiff_t I = blockIdx.x * blockDim.x + threadIdx.x; I < N; I += blockDim.x * gridDim.x)


--- a/aten/THC/generic/THCDegree.cu
+++ b/aten/THC/generic/THCDegree.cu
 #ifndef THC_GENERIC_FILE
-#define THC_GENERIC_FILE "generic/THCDegree.cu"
-
+#define THC_GENERIC_FILE "generic/THCDegree.cuh"
 #else
+
 void THCTensor_(degree)(THCState *state, THCTensor *self, THCudaLongTensor *index) {
  int nEdges = THCudaLongTensor_nElement(state, index);
+
  THCTensor *one = THCTensor_(newWithSize1d)(state, nEdges);
-  THCTensor_(fill)(state, one, 1);
+  THCTensor_(fill)(state, one, ScalarConvert<int, real>::to(1));

-  THCTensor_(fill)(state, self, 0);
+  THCTensor_(fill)(state, self, ScalarConvert<int, real>::to(0));
  THCTensor_(scatterAdd)(state, self, 0, index, one);

  THCTensor_(free)(state, one);

--- a/aten/THC/generic/THCGraclus.cu
+++ b/aten/THC/generic/THCGraclus.cu
+#ifndef THC_GENERIC_FILE
+#define THC_GENERIC_FILE "generic/THCGraclus.cu"
+#else
+
+void THCTensor_(graclus)(THCState *state, THCudaLongTensor *self, THCudaLongTensor *row,
+                         THCudaLongTensor *col, THCTensor *weight) {
+}
+
+#endif  // THC_GENERIC_FILE
--- a/aten/THC/generic/THCGraclus.h
+++ b/aten/THC/generic/THCGraclus.h
+#ifndef THC_GENERIC_FILE
+#define THC_GENERIC_FILE "generic/THCGraclus.h"
+#else
+
+void THCTensor_(graclus)(THCState *state, THCudaLongTensor *self, THCudaLongTensor *row,
+                         THCudaLongTensor *col, THCTensor *weight);
+
+#endif  // THC_GENERIC_FILE
--- a/aten/THC/generic/THCGreedy.cu
+++ b/aten/THC/generic/THCGreedy.cu
-#ifndef THC_GENERIC_FILE
-#define THC_GENERIC_FILE "generic/THCGreedy.cu"
-#else
-
-void THCGreedy_(THCState *state, THCudaLongTensor *cluster, THCudaLongTensor *row,
-                THCudaLongTensor *col, THCTensor *weight) {
-  printf("THCGreedy dynamic drin");
-}
-
-#endif  // THC_GENERIC_FILE
--- a/aten/THC/generic/THCGreedy.h
+++ b/aten/THC/generic/THCGreedy.h
-#ifndef THC_GENERIC_FILE
-#define THC_GENERIC_FILE "generic/THCGreedy.h"
-#else
-
-void THCGreedy_(THCState *state, THCudaLongTensor *cluster, THCudaLongTensor *row,
-                THCudaLongTensor *col, THCTensor *weight);
-
-#endif  // THC_GENERIC_FILE
--- a/aten/THC/generic/THCGrid.cu
+++ b/aten/THC/generic/THCGrid.cu
@@ -2,17 +2,17 @@
 #define THC_GENERIC_FILE "generic/THCGrid.cu"
 #else

-void THCGrid_(THCState *state, THCudaLongTensor *cluster, THCTensor *pos, THCTensor *size,
-              THCudaLongTensor *count) {
-  THCAssertSameGPU(THCTensor_(checkGPU)(state, 4, cluster, pos, size, count));
+void THCTensor_(grid)(THCState *state, THCudaLongTensor *self, THCTensor *pos, THCTensor *size,
+                      THCudaLongTensor *count) {
+  THCAssertSameGPU(THCTensor_(checkGPU)(state, 4, self, pos, size, count));

-  int64_t *clusterData = THCudaLongTensor_data(state, cluster);
+  int64_t *selfData = THCudaLongTensor_data(state, self);
  TensorInfo<real> posInfo = THCTensor_(getTensorInfo)(state, pos);
  real *sizeData = THCTensor_(data)(state, size);
  int64_t *countData = THCudaLongTensor_data(state, count);

-  ptrdiff_t nNodes = THCudaLongTensor_nElement(state, cluster);
-  KERNEL_REAL_RUN(gridKernel, nNodes, clusterData, posInfo, sizeData, countData);
+  ptrdiff_t nNodes = THCudaLongTensor_nElement(state, self);
+  KERNEL_REAL_RUN(gridKernel, nNodes, selfData, posInfo, sizeData, countData);
 }

 #endif  // THC_GENERIC_FILE