graclus cuda, cleanup old code

920cc934 · rusty1s · d2cc3162 · d2cc3162 · d2cc3162 · d2cc3162
Commit 920cc934 authored Aug 23, 2018 by rusty1s
20 changed files
--- a/aten/README.md
+++ b/aten/README.md
-| Library | Meaning                            |
-|---------|------------------------------------|
-| TH      | **T**orc**H**                      |
-| THC     | **T**orc**H** **C**uda             |
-| THCC    | **T**orc**H** **C**uda **C**onnect |
--- a/aten/TH/THGraclus.c
+++ b/aten/TH/THGraclus.c
-#include <TH/TH.h>
-
-#define TH_TENSOR_GRACLUS(self, row, col, PRESELECT, SELECT) { \
-  THLongTensor_fill(self, -1); \
-  int64_t *selfData = THLongTensor_data(self); \
-  int64_t *rowData = THLongTensor_data(row); \
-  int64_t *colData = THLongTensor_data(col); \
-  ptrdiff_t e = 0, nEdges = THLongTensor_nElement(row); \
-  int64_t rowValue, colValue, matchedValue, value; \
-  while(e < nEdges) { \
-    rowValue = rowData[e]; \
-    matchedValue = rowValue; \
-    PRESELECT \
-    if (selfData[rowValue] < 0) { \
-      do { \
-        colValue = colData[e]; \
-        SELECT \
-        e++; \
-      } while(e < nEdges && rowData[e] == rowValue); \
-      value = rowValue < matchedValue ? rowValue : matchedValue; \
-      selfData[rowValue] = value; \
-      selfData[matchedValue] = value; \
-    } \
-    while(e < nEdges && rowData[e] == rowValue) e++; \
-  } \
-}
-
-void THTensor_graclus(THLongTensor *self, THLongTensor *row, THLongTensor *col) {
-  TH_TENSOR_GRACLUS(self, row, col, {},
-    if (selfData[colValue] < 0) { matchedValue = colValue; break; }
-  )
-}
-
-#include "generic/THGraclus.c"
-#include "THGenerateAllTypes.h"
--- a/aten/TH/THGraclus.h
+++ b/aten/TH/THGraclus.h
-void       THTensor_graclus(THLongTensor *self, THLongTensor *row, THLongTensor *col);
-
-void   THByteTensor_graclus(THLongTensor *self, THLongTensor *row, THLongTensor *col,   THByteTensor *weight);
-void   THCharTensor_graclus(THLongTensor *self, THLongTensor *row, THLongTensor *col,   THCharTensor *weight);
-void  THShortTensor_graclus(THLongTensor *self, THLongTensor *row, THLongTensor *col,  THShortTensor *weight);
-void    THIntTensor_graclus(THLongTensor *self, THLongTensor *row, THLongTensor *col,    THIntTensor *weight);
-void   THLongTensor_graclus(THLongTensor *self, THLongTensor *row, THLongTensor *col,   THLongTensor *weight);
-void  THFloatTensor_graclus(THLongTensor *self, THLongTensor *row, THLongTensor *col,  THFloatTensor *weight);
-void THDoubleTensor_graclus(THLongTensor *self, THLongTensor *row, THLongTensor *col, THDoubleTensor *weight);
--- a/aten/TH/THGrid.c
+++ b/aten/TH/THGrid.c
-#include <TH/TH.h>
-
-#include "generic/THGrid.c"
-#include "THGenerateAllTypes.h"
--- a/aten/TH/THGrid.h
+++ b/aten/TH/THGrid.h
-void   THByteTensor_grid(THLongTensor *self,   THByteTensor *pos,   THByteTensor *size, THLongTensor *count);
-void   THCharTensor_grid(THLongTensor *self,   THCharTensor *pos,   THCharTensor *size, THLongTensor *count);
-void  THShortTensor_grid(THLongTensor *self,  THShortTensor *pos,  THShortTensor *size, THLongTensor *count);
-void    THIntTensor_grid(THLongTensor *self,    THIntTensor *pos,    THIntTensor *size, THLongTensor *count);
-void   THLongTensor_grid(THLongTensor *self,   THLongTensor *pos,   THLongTensor *size, THLongTensor *count);
-void  THFloatTensor_grid(THLongTensor *self,  THFloatTensor *pos,  THFloatTensor *size, THLongTensor *count);
-void THDoubleTensor_grid(THLongTensor *self, THDoubleTensor *pos, THDoubleTensor *size, THLongTensor *count);
--- a/aten/TH/generic/THGraclus.c
+++ b/aten/TH/generic/THGraclus.c
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "generic/THGraclus.c"
-#else
-
-void THTensor_(graclus)(THLongTensor *self, THLongTensor *row, THLongTensor *col, THTensor *weight) {
-  real *weightData = THTensor_(data)(weight);
-  real maxWeight, tmp;
-  TH_TENSOR_GRACLUS(self, row, col, maxWeight = 0;,
-    tmp = weightData[e];
-    if (selfData[colValue] < 0 && tmp > maxWeight) { matchedValue = colValue; maxWeight = tmp; }
-  )
-}
-
-#endif  // TH_GENERIC_FILE
--- a/aten/TH/generic/THGrid.c
+++ b/aten/TH/generic/THGrid.c
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "generic/THGrid.c"
-#else
-
-void THTensor_(grid)(THLongTensor *self, THTensor *pos, THTensor *size,
-                     THLongTensor *count) {
-  int64_t *selfData = THLongTensor_data(self);
-  real *posData = THTensor_(data)(pos);
-  real *sizeData = THTensor_(data)(size);
-  int64_t posStride0 = THTensor_(stride)(pos, 0);
-  int64_t posStride1 = THTensor_(stride)(pos, 1);
-  int64_t *countData = THLongTensor_data(count);
-
-  ptrdiff_t n, d;
-  int64_t coef, value;
-  for (n = 0; n < THTensor_(size)(pos, 0); n++) {
-    coef = 1;
-    value = 0;
-    for (d = 0; d < THTensor_(size)(pos, 1); d++) {
-      value += coef * (int64_t)(posData[d * posStride1] / sizeData[d]);
-      coef *= countData[d];
-    }
-    posData += posStride0;
-    selfData[n] = value;
-  }
-}
-
-#endif // TH_GENERIC_FILE
--- a/aten/THC/THC.cu
+++ b/aten/THC/THC.cu
-#include "THCGraclus.cu"
-#include "THCGrid.cu"
--- a/aten/THC/THC.h
+++ b/aten/THC/THC.h
-#ifndef THC_INC
-#define THC_INC
-
-#include "THCGraclus.h"
-#include "THCGrid.h"
-
-#endif  // THC_INC
--- a/aten/THC/THCColor.cuh
+++ b/aten/THC/THCColor.cuh
-#ifndef THC_COLOR_INC
-#define THC_COLOR_INC
-
-#include "common.cuh"
-
-#define BLUE_PROB 0.53406
-
-__device__ int d_done;
-__global__ void initDoneKernel() { d_done = 1; }
-
-__global__ void colorKernel(int64_t *self, uint8_t *bernoulli, ptrdiff_t nNodes) {
-  KERNEL_LOOP(i, nNodes) {
-    if (self[i] < 0) {
-      self[i] = bernoulli[i] - 2;
-      d_done = 0;
-    }
-  }
-}
-
-int THCudaLongTensor_color(THCState *state, THCudaLongTensor *self) {
-  initDoneKernel<<<1, 1>>>();
-
-  ptrdiff_t nNodes = THCudaLongTensor_nElement(state, self);
-
-  THCudaByteTensor *bernoulli = THCudaByteTensor_newWithSize1d(state, nNodes);
-  THCudaByteTensor_bernoulli(state, bernoulli, BLUE_PROB);
-
-  int64_t *selfData = THCudaLongTensor_data(state, self);
-  uint8_t *bernoulliData = THCudaByteTensor_data(state, bernoulli);
-
-  KERNEL_RUN(colorKernel, nNodes, selfData, bernoulliData);
-
-  THCudaByteTensor_free(state, bernoulli);
-  int done; cudaMemcpyFromSymbol(&done, d_done, sizeof(done), 0, cudaMemcpyDeviceToHost);
-
-  return done;
-}
-
-#endif  // THC_COLOR_INC
--- a/aten/THC/THCDegree.cuh
+++ b/aten/THC/THCDegree.cuh
-#ifndef THC_DEGREE_INC
-#define THC_DEGREE_INC
-
-#include "common.cuh"
-#include "THCNumerics.cuh"
-
-template<typename T>
-__global__ void cumDegreeKernel(T *self, int64_t *index, ptrdiff_t nEdges) {
-  KERNEL_LOOP(i, nEdges) {
-    int64_t r = index[i];
-    if (i + 1 == nEdges) {self[r] = ScalarConvert<int, T>::to(nEdges);; continue; }
-    if (r != index[i+1]) { self[r] = ScalarConvert<int, T>::to(i + 1); }
-  }
-}
-
-#include "generic/THCDegree.cuh"
-#include "THC/THCGenerateAllTypes.h"
-
-#endif  // THC_DEGREE_INC
--- a/aten/THC/THCGraclus.cu
+++ b/aten/THC/THCGraclus.cu
-#include "THCGraclus.h"
-
-#include "common.cuh"
-#include "THCDegree.cuh"
-#include "THCColor.cuh"
-#include "THCPropose.cuh"
-#include "THCResponse.cuh"
-
-#define THC_TENSOR_GRACLUS(state, self, row, CODE) { \
-  int nNodes = THCudaLongTensor_nElement(state, self); \
-  THCudaLongTensor_fill(state, self, -1); \
-\
-  THCudaLongTensor *prop = THCudaLongTensor_newWithSize1d(state, nNodes); \
-  THCudaLongTensor_fill(state, prop, -1); \
-\
-  THCudaLongTensor *degree = THCudaLongTensor_newWithSize1d(state, nNodes); \
-  THCudaLongTensor_degree(state, degree, row); \
-\
-  THCudaLongTensor *cumDegree = THCudaLongTensor_newWithSize1d(state, nNodes); \
-  THCudaLongTensor_cumDegree(state, cumDegree, row); \
-\
-  CODE \
-\
-  THCudaLongTensor_free(state, prop); \
-  THCudaLongTensor_free(state, degree); \
-  THCudaLongTensor_free(state, cumDegree); \
-}
-
-void THCTensor_graclus(THCState *state, THCudaLongTensor *self, THCudaLongTensor *row,
-                       THCudaLongTensor *col) {
-  THCAssertSameGPU(THCudaLongTensor_checkGPU(state, 3, self, row, col));
-
-  THC_TENSOR_GRACLUS(state, self, row,
-    while(!THCudaLongTensor_color(state, self)) {
-      THCTensor_propose(state, self, prop, row, col, degree, cumDegree);
-      THCTensor_response(state, self, prop, row, col, degree, cumDegree);
-    }
-  )
-}
-
-#include "generic/THCGraclus.cu"
-#include "THC/THCGenerateAllTypes.h"
--- a/aten/THC/THCGraclus.h
+++ b/aten/THC/THCGraclus.h
-#ifndef THC_GRACLUS_INC
-#define THC_GRACLUS_INC
-
-#include <THC/THC.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif  // __cplusplus
-
-void THCTensor_graclus(THCState *state, THCudaLongTensor *self, THCudaLongTensor *row,
-                       THCudaLongTensor *col);
-
-#include "generic/THCGraclus.h"
-#include "THC/THCGenerateAllTypes.h"
-
-#ifdef __cplusplus
-}
-#endif  // __cplusplus
-
-#endif  // THC_GRACLUS_INC
--- a/aten/THC/THCGrid.cu
+++ b/aten/THC/THCGrid.cu
-#include "THCGrid.h"
-
-#include "common.cuh"
-#include "THCNumerics.cuh"
-
-template<typename T>
-__global__ void gridKernel(int64_t *self, TensorInfo<T> posInfo, T *size,
-                           int64_t *count, ptrdiff_t nNodes) {
-  KERNEL_LOOP(i, nNodes) {
-    T *pos = posInfo.data + i * posInfo.stride[0];
-    int64_t coef = 1, value = 0;
-    for (ptrdiff_t d = 0; d < posInfo.size[1]; d += posInfo.stride[1]) {
-      value += coef * ScalarConvert<T, int64_t>::to(THCNumerics<T>::div(pos[d], size[d]));
-      coef *= count[d];
-    }
-    self[i] = value;
-  }
-}
-
-#include "generic/THCGrid.cu"
-#include "THC/THCGenerateAllTypes.h"
--- a/aten/THC/THCGrid.h
+++ b/aten/THC/THCGrid.h
-#ifndef THC_GRID_INC
-#define THC_GRID_INC
-
-#include <THC/THC.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif  // __cplusplus
-
-#include "generic/THCGrid.h"
-#include "THC/THCGenerateAllTypes.h"
-
-#ifdef __cplusplus
-}
-#endif  // __cplusplus
-
-#endif  // THC_GRID_INC
--- a/aten/THC/THCNumerics.cuh
+++ b/aten/THC/THCNumerics.cuh
-#ifndef THC_NUMERICS_INC
-#define THC_NUMERICS_INC
-
-#include <THC/THCHalf.h>
-
-#ifdef CUDA_HALF_TENSOR
-#ifdef __CUDA_ARCH__
-#define h2f(A) __half2float(A)
-#define f2h(A) __float2half(A)
-#else  // CUDA_ARCH__
-#define h2f(A) THC_half2float(A)
-#define f2h(A) THC_float2half(A)
-#endif  // CUDA_ARCH__
-#endif  // CUDA_HALF_TENSOR
-
-template<typename T>
-struct THCNumerics {
-  static inline __host__ __device__ T div(T a, T b) { return a / b; }
-  static inline __host__ __device__ bool gte(T a, T b) { return a >= b; }
-};
-
-#ifdef CUDA_HALF_TENSOR
-template<>
-struct THCNumerics<half> {
-  static inline __host__ __device__ half div(half a, half b) { return f2h(h2f(a) / h2f(b)); }
-  static inline __host__ __device__ bool gte(half a, half b) { return h2f(a) >= h2f(b); }
-};
-#endif  // CUDA_HALF_TENSOR
-
-template <typename In, typename Out>
-struct ScalarConvert {
-  static __host__ __device__ Out to(const In v) { return (Out) v; }
-};
-
-#ifdef CUDA_HALF_TENSOR
-template <typename Out>
-struct ScalarConvert<half, Out> {
-  static __host__ __device__ Out to(const half v) { return (Out) h2f(v); }
-};
-
-template <typename In>
-struct ScalarConvert<In, half> {
-  static __host__ __device__ half to(const In v) { return f2h((float) v); }
-};
-#endif  // CUDA_HALF_TENSOR
-
-#endif  // THC_NUMERICS_INC
--- a/aten/THC/THCPropose.cuh
+++ b/aten/THC/THCPropose.cuh
-#ifndef THC_PROPOSE_INC
-#define THC_PROPOSE_INC
-
-#include "common.cuh"
-#include "THCNumerics.cuh"
-
-__global__ void proposeKernel(int64_t *color, int64_t *prop, int64_t *row, int64_t *col,
-                              int64_t *degree, int64_t *cumDegree, ptrdiff_t nNodes) {
-  KERNEL_LOOP(i, nNodes) {
-    if (color[i] != -1) { continue; }  // Only visit blue nodes.
-    ptrdiff_t c; bool isDead = true;
-    for (ptrdiff_t e = cumDegree[i] - degree[i]; e < cumDegree[i]; e++) {
-      c = col[e];
-      if (isDead && color[c] < 0) { isDead = false; }  // Unmatched neighbor found.
-      if (color[c] == -2) { prop[i] = c; break; }  // Propose to first red neighbor.
-    }
-    if (isDead) { color[i] = i; }  // Mark node as dead.
-  }
-}
-
-template<typename T>
-__global__ void weightedProposeKernel(int64_t *color, int64_t *prop, int64_t *row, int64_t *col,
-                                      T *weight, int64_t *degree, int64_t *cumDegree,
-                                      ptrdiff_t nNodes) {
-  KERNEL_LOOP(i, nNodes) {
-    if (color[i] != -1) { continue; }  // Only visit blue nodes.
-    ptrdiff_t c; bool isDead = true;
-    T maxWeight = ScalarConvert<int, T>::to(0), tmp;
-    int64_t matchedValue = -1;
-    for (ptrdiff_t e = cumDegree[i] - degree[i]; e < cumDegree[i]; e++) {
-      c = col[e];
-      tmp = weight[e];
-      if (isDead && color[c] < 0) { isDead = false; }  // Unmatched neighbor found.
-      // Find maximum weighted red neighbor.
-      if (color[c] == -2 && THCNumerics<T>::gte(tmp, maxWeight)) {
-        matchedValue = c;
-        maxWeight = tmp;
-      }
-    }
-    prop[i] = matchedValue;  // Propose.
-    if (isDead) { color[i] = i; }  // Mark node as dead.
-  }
-}
-
-void THCTensor_propose(THCState *state, THCudaLongTensor *color, THCudaLongTensor *prop,
-                       THCudaLongTensor *row, THCudaLongTensor *col, THCudaLongTensor *degree,
-                       THCudaLongTensor *cumDegree) {
-  KERNEL_RUN(proposeKernel, THCudaLongTensor_nElement(state, color),
-             THCudaLongTensor_data(state, color), THCudaLongTensor_data(state, prop),
-             THCudaLongTensor_data(state, row), THCudaLongTensor_data(state, col),
-             THCudaLongTensor_data(state, degree), THCudaLongTensor_data(state, cumDegree));
-}
-
-#include "generic/THCPropose.cuh"
-#include "THC/THCGenerateAllTypes.h"
-
-#endif  // THC_PROPOSE_INC
--- a/aten/THC/THCResponse.cuh
+++ b/aten/THC/THCResponse.cuh
-#ifndef THC_RESPONSE_INC
-#define THC_RESPONSE_INC
-
-#include "common.cuh"
-
-__global__ void responseKernel(int64_t *color, int64_t *prop, int64_t *row, int64_t *col,
-                               int64_t *degree, int64_t *cumDegree, ptrdiff_t nNodes) {
-  KERNEL_LOOP(i, nNodes) {
-    if (color[i] != -2) { continue; }  // Only visit red nodes.
-    ptrdiff_t c; bool isDead = true;
-    for (ptrdiff_t e = cumDegree[i] - degree[i]; e < cumDegree[i]; e++) {
-      c = col[e];
-      if (isDead && color[c] < 0) { isDead = false; }  // Unmatched neighbor found.
-      if (color[c] == -1 && prop[c] == i) {  // Match first blue neighbor who proposed to i.
-        color[i] = min(i, c);
-        color[c] = min(i, c);
-        break;
-      }
-    }
-    if (isDead) { color[i] = i; }  // Mark node as dead.
-  }
-}
-
-template<typename T>
-__global__ void weightedResponseKernel(int64_t *color, int64_t *prop, int64_t *row, int64_t *col,
-                                       T *weight, int64_t *degree, int64_t *cumDegree,
-                                       ptrdiff_t nNodes) {
-  KERNEL_LOOP(i, nNodes) {
-    if (color[i] != -2) { continue; }  // Only visit red nodes.
-    ptrdiff_t c; bool isDead = true;
-    T maxWeight = ScalarConvert<int, T>::to(0), tmp;
-    ptrdiff_t matchedValue = -1;
-    for (ptrdiff_t e = cumDegree[i] - degree[i]; e < cumDegree[i]; e++) {
-      c = col[e];
-      tmp = weight[e];
-      if (isDead && color[c] < 0) { isDead = false; }  // Unmatched neighbor found.
-      // Find maximum weighted blue neighbor, who proposed to i.
-      if (color[c] == -1 && prop[c] == i && THCNumerics<T>::gte(tmp, maxWeight)) {
-        matchedValue = c;
-        maxWeight = tmp;
-      }
-    }
-    if (matchedValue >= 0) {  // Match neighbors.
-      color[i] = min(i, matchedValue);
-      color[matchedValue] = min(i, matchedValue);
-    }
-    if (isDead) { color[i] = i; }  // Mark node as dead.
-  }
-}
-
-void THCTensor_response(THCState *state, THCudaLongTensor *color, THCudaLongTensor *prop,
-                        THCudaLongTensor *row, THCudaLongTensor *col, THCudaLongTensor *degree,
-                        THCudaLongTensor *cumDegree) {
-  KERNEL_RUN(responseKernel, THCudaLongTensor_nElement(state, color),
-             THCudaLongTensor_data(state, color), THCudaLongTensor_data(state, prop),
-             THCudaLongTensor_data(state, row), THCudaLongTensor_data(state, col),
-             THCudaLongTensor_data(state, degree), THCudaLongTensor_data(state, cumDegree));
-}
-
-#include "generic/THCResponse.cuh"
-#include "THC/THCGenerateAllTypes.h"
-
-#endif  // THC_RESPONSE_INC
--- a/aten/THC/common.cuh
+++ b/aten/THC/common.cuh
-#ifndef THC_COMMON_INC
-#define THC_COMMON_INC
-
-#define KERNEL_LOOP(I, N) \
-  for (ptrdiff_t I = blockIdx.x * blockDim.x + threadIdx.x; I < N; I += blockDim.x * gridDim.x)
-
-const int MAX_DIMS = 25;
-const int NUM_THREADS = 1024;
-
-inline int GET_BLOCKS(int N) {
-  return (N + NUM_THREADS - 1) / NUM_THREADS;
-}
-
-#define KERNEL_RUN(NAME, N, ...) \
-  int grid = GET_BLOCKS(N); \
-  cudaStream_t stream = THCState_getCurrentStream(state); \
-  NAME<<<grid, NUM_THREADS, 0, stream>>>(__VA_ARGS__, N); \
-  THCudaCheck(cudaGetLastError())
-
-#define KERNEL_REAL_RUN(NAME, N, ...) \
-  int grid = GET_BLOCKS(N); \
-  cudaStream_t stream = THCState_getCurrentStream(state); \
-  NAME<real><<<grid, NUM_THREADS, 0, stream>>>(__VA_ARGS__, N); \
-  THCudaCheck(cudaGetLastError())
-
-template<typename T>
-struct TensorInfo {
-  T *data;
-  int dims;
-  int size[MAX_DIMS];
-  int stride[MAX_DIMS];
-};
-
-#include "generic/common.cuh"
-#include "THC/THCGenerateAllTypes.h"
-
-#endif  // THC_COMMON_INC
--- a/aten/THC/generic/THCDegree.cuh
+++ b/aten/THC/generic/THCDegree.cuh
-#ifndef THC_GENERIC_FILE
-#define THC_GENERIC_FILE "generic/THCDegree.cuh"
-#else
-
-void THCTensor_(degree)(THCState *state, THCTensor *self, THCudaLongTensor *index) {
-  int nEdges = THCudaLongTensor_nElement(state, index);
-
-  THCTensor *one = THCTensor_(newWithSize1d)(state, nEdges);
-  THCTensor_(fill)(state, one, ScalarConvert<int, real>::to(1));
-
-  THCTensor_(fill)(state, self, ScalarConvert<int, real>::to(0));
-  THCTensor_(scatterAdd)(state, self, 0, index, one);
-
-  THCTensor_(free)(state, one);
-}
-
-void THCTensor_(cumDegree)(THCState *state, THCTensor *self, THCudaLongTensor *index) {
-  ptrdiff_t nEdges = THCudaLongTensor_nElement(state, index);
-
-  real *selfData = THCTensor_(data)(state, self);
-  int64_t *indexData = THCudaLongTensor_data(state, index);
-
-  KERNEL_REAL_RUN(cumDegreeKernel, nEdges, selfData, indexData);
-}
-
-#endif  // THC_GENERIC_FILE