Commit 8c15ed64 authored by rusty1s's avatar rusty1s
Browse files

rename and bugfixes

parent bebd72f3
#include "THCGreedy.cu" #include "THCGraclus.cu"
#include "THCGrid.cu" #include "THCGrid.cu"
#ifndef THC_INC #ifndef THC_INC
#define THC_INC #define THC_INC
#include "THCGreedy.h" #include "THCGraclus.h"
#include "THCGrid.h" #include "THCGrid.h"
#endif #endif
#include <curand.h>
#include <curand_kernel.h>
#include "common.cuh"
__global__ void assignColorKernel(int64_t *color, curandStateMtgp32 *state, uint8_t *done,
ptrdiff_t nNodes) {
KERNEL_LOOP(i, nNodes) {
if (color[i] < 0) {
color[i] = (curand_uniform(&state[0]) < 0.53406) - 2; // blue = -1, red = -2
*done = 0;
}
}
}
int THCGreedy_assignColor(THCState *state, THCudaLongTensor *color) {
int64_t *colorData = THCudaLongTensor_data(state, color);
ptrdiff_t nNodes = THCudaLongTensor_nElement(state, color);
uint8_t* d_done; cudaMalloc(&d_done, sizeof(uint8_t)); cudaMemset(d_done, 1, sizeof(uint8_t));
KERNEL_RUN(assignColorKernel, nNodes, colorData, THCRandom_generatorStates(state), d_done);
uint8_t done; cudaMemcpy(&done, d_done, sizeof(uint8_t), cudaMemcpyDeviceToHost);
cudaFree(d_done);
return done;
}
#ifndef THC_COLOR_INC
#define THC_COLOR_INC
#include "common.cuh"
#include <curand.h>
#include <curand_kernel.h>
#define BLUE_PROBABILITY 0.53406
__global__ void colorKernel(int64_t *self, curandStateMtgp32 *state, uint8_t *done,
ptrdiff_t nNodes) {
KERNEL_LOOP(i, nNodes) {
if (self[i] < 0) {
self[i] = (curand_uniform(&state[0]) < BLUE_PROBABILITY) - 2; // blue = -1, red = -2
*done = 0;
}
}
}
int THCTensor_color(THCState *state, THCudaLongTensor *self) {
uint8_t* d_done;
cudaMalloc(&d_done, sizeof(uint8_t));
cudaMemset(d_done, 1, sizeof(uint8_t));
ptrdiff_t nNodes = THCudaLongTensor_nElement(state, self);
int64_t *selfData = THCudaLongTensor_data(state, self);
KERNEL_RUN(colorKernel, nNodes, selfData, THCRandom_generatorStates(state), d_done);
uint8_t done;
cudaMemcpy(&done, d_done, sizeof(uint8_t), cudaMemcpyDeviceToHost);
cudaFree(d_done);
return done;
}
#endif // THC_COLOR_INC
#define THCTensor_(NAME) TH_CONCAT_4(TH,CReal,Tensor_,NAME)
#include "generic/THCDegree.cu"
#include "THC/THCGenerateAllTypes.h"
#ifndef THC_DEGREE_INC
#define THC_DEGREE_INC
#include "THCNumerics.cuh"
#include "generic/THCDegree.cuh"
#include "THC/THCGenerateAllTypes.h"
#endif // THC_DEGREE_INC
#include "THCGreedy.h" #include "THCGraclus.h"
#include "common.cuh" #include "common.cuh"
#include "THCDegree.cu" #include "THCDegree.cuh"
#include "THCColor.cu" #include "THCColor.cuh"
#include "THCPropose.cu" #include "THCPropose.cuh"
#include "THCResponse.cu" #include "THCResponse.cuh"
void THCGreedy(THCState *state, THCudaLongTensor *cluster, THCudaLongTensor *row, void THCTensor_graclus(THCState *state, THCudaLongTensor *self, THCudaLongTensor *row,
THCudaLongTensor *col) { THCudaLongTensor *col) {
THCAssertSameGPU(THCudaLongTensor_checkGPU(state, 3, cluster, row, col)); THCAssertSameGPU(THCudaLongTensor_checkGPU(state, 3, self, row, col));
int nNodes = THCudaLongTensor_nElement(state, cluster); int nNodes = THCudaLongTensor_nElement(state, self);
THCudaLongTensor_fill(state, self, -1);
THCudaLongTensor_fill(state, cluster, -1); THCudaLongTensor *prop = THCudaLongTensor_newWithSize1d(state, nNodes);
THCudaLongTensor *prop = THCudaLongTensor_newClone(state, cluster); THCudaLongTensor_fill(state, prop, -1);
THCudaLongTensor *degree = THCudaLongTensor_newWithSize1d(state, nNodes); THCudaLongTensor *degree = THCudaLongTensor_newWithSize1d(state, nNodes);
THCudaLongTensor_degree(state, degree, row); THCudaLongTensor_degree(state, degree, row);
...@@ -21,15 +22,19 @@ void THCGreedy(THCState *state, THCudaLongTensor *cluster, THCudaLongTensor *row ...@@ -21,15 +22,19 @@ void THCGreedy(THCState *state, THCudaLongTensor *cluster, THCudaLongTensor *row
THCudaLongTensor *cumDegree = THCudaLongTensor_newWithSize1d(state, nNodes); THCudaLongTensor *cumDegree = THCudaLongTensor_newWithSize1d(state, nNodes);
THCudaLongTensor_cumsum(state, cumDegree, degree, 0); THCudaLongTensor_cumsum(state, cumDegree, degree, 0);
while(!THCGreedy_assignColor(state, cluster)) { THCTensor_color(state, self);
THCGreedy_propose(state, cluster, prop, row, col, degree, cumDegree); THCTensor_propose(state, self, prop, row, col, degree, cumDegree);
THCGreedy_response(state, cluster, prop, row, col, degree, cumDegree); THCTensor_response(state, self, prop, row, col, degree, cumDegree);
};
/* while(!THCTensor_assignColor(state, self)) { */
/* THCTensor_propose(state, self, prop, row, col, degree, cumDegree); */
/* THCTensor_response(state, self, prop, row, col, degree, cumDegree); */
/* }; */
THCudaLongTensor_free(state, prop); THCudaLongTensor_free(state, prop);
THCudaLongTensor_free(state, degree); THCudaLongTensor_free(state, degree);
THCudaLongTensor_free(state, cumDegree); THCudaLongTensor_free(state, cumDegree);
} }
#include "generic/THCGreedy.cu" #include "generic/THCGraclus.cu"
#include "THC/THCGenerateAllTypes.h" #include "THC/THCGenerateAllTypes.h"
#ifndef THC_GREEDY_INC #ifndef THC_GRACLUS_INC
#define THC_GREEDY_INC #define THC_GRACLUS_INC
#include <THC/THC.h> #include <THC/THC.h>
#define THCGreedy_ TH_CONCAT_3(TH,CReal,Greedy)
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif // __cplusplus #endif // __cplusplus
void THCGreedy(THCState *state, THCudaLongTensor *cluster, THCudaLongTensor *row, void THCTensor_graclus(THCState *state, THCudaLongTensor *self, THCudaLongTensor *row,
THCudaLongTensor *col); THCudaLongTensor *col);
#include "generic/THCGreedy.h" #include "generic/THCGraclus.h"
#include "THC/THCGenerateAllTypes.h" #include "THC/THCGenerateAllTypes.h"
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif // __cplusplus #endif // __cplusplus
#endif // THC_GREEDY_INC #endif // THC_GRACLUS_INC
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
#include "THCNumerics.cuh" #include "THCNumerics.cuh"
template<typename T> template<typename T>
__global__ void gridKernel(int64_t *cluster, TensorInfo<T> posInfo, T *size, __global__ void gridKernel(int64_t *self, TensorInfo<T> posInfo, T *size,
int64_t *count, ptrdiff_t nNodes) { int64_t *count, ptrdiff_t nNodes) {
KERNEL_LOOP(i, nNodes) { KERNEL_LOOP(i, nNodes) {
T *pos = posInfo.data + i * posInfo.stride[0]; T *pos = posInfo.data + i * posInfo.stride[0];
...@@ -13,7 +13,7 @@ __global__ void gridKernel(int64_t *cluster, TensorInfo<T> posInfo, T *size, ...@@ -13,7 +13,7 @@ __global__ void gridKernel(int64_t *cluster, TensorInfo<T> posInfo, T *size,
value += coef * THCNumerics<T>::floor(THCNumerics<T>::div(pos[d], size[d])); value += coef * THCNumerics<T>::floor(THCNumerics<T>::div(pos[d], size[d]));
coef *= count[d]; coef *= count[d];
} }
cluster[i] = value; self[i] = value;
} }
} }
......
...@@ -3,8 +3,6 @@ ...@@ -3,8 +3,6 @@
#include <THC/THC.h> #include <THC/THC.h>
#define THCGrid_ TH_CONCAT_3(TH,CReal,Grid)
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif // __cplusplus #endif // __cplusplus
......
#ifndef THC_NUMERICS_INC #ifndef THC_NUMERICS_INC
#define THC_NUMERICS_INC #define THC_NUMERICS_INC
#include "THC/THCHalf.h" #include <THC/THCHalf.h>
template<typename T>
struct THCNumerics {
static inline __host__ __device__ T div(T a, T b) { return a / b; }
static inline __host__ __device__ int floor(T a) { return a; }
};
#ifdef CUDA_HALF_TENSOR #ifdef CUDA_HALF_TENSOR
#ifdef __CUDA_ARCH__ #ifdef __CUDA_ARCH__
...@@ -16,7 +10,16 @@ struct THCNumerics { ...@@ -16,7 +10,16 @@ struct THCNumerics {
#else // CUDA_ARCH__ #else // CUDA_ARCH__
#define h2f(A) THC_half2float(A) #define h2f(A) THC_half2float(A)
#define f2h(A) THC_float2half(A) #define f2h(A) THC_float2half(A)
#endif #endif // CUDA_ARCH__
#endif // CUDA_HALF_TENSOR
template<typename T>
struct THCNumerics {
static inline __host__ __device__ T div(T a, T b) { return a / b; }
static inline __host__ __device__ int floor(T a) { return a; }
};
#ifdef CUDA_HALF_TENSOR
template<> template<>
struct THCNumerics<half> { struct THCNumerics<half> {
static inline __host__ __device__ half div(half a, half b) { return f2h(h2f(a) / h2f(b)); } static inline __host__ __device__ half div(half a, half b) { return f2h(h2f(a) / h2f(b)); }
...@@ -24,4 +27,21 @@ struct THCNumerics<half> { ...@@ -24,4 +27,21 @@ struct THCNumerics<half> {
}; };
#endif // CUDA_HALF_TENSOR #endif // CUDA_HALF_TENSOR
template <typename In, typename Out>
struct ScalarConvert {
static __host__ __device__ Out to(const In v) { return (Out) v; }
};
#ifdef CUDA_HALF_TENSOR
template <typename Out>
struct ScalarConvert<half, Out> {
static __host__ __device__ Out to(const half v) { return (Out) h2f(v); }
};
template <typename In>
struct ScalarConvert<In, half> {
static __host__ __device__ half to(const In v) { return f2h((float) v); }
};
#endif // CUDA_HALF_TENSOR
#endif // THC_NUMERICS_INC #endif // THC_NUMERICS_INC
#ifndef THC_PROPOSE_INC
#define THC_PROPOSE_INC
#include "common.cuh" #include "common.cuh"
__global__ void proposeKernel(int64_t *color, int64_t *prop, int64_t *row, int64_t *col, __global__ void proposeKernel(int64_t *color, int64_t *prop, int64_t *row, int64_t *col,
int64_t *degree, int64_t *cumDegree, ptrdiff_t nNodes) { int64_t *degree, int64_t *cumDegree, ptrdiff_t nNodes) {
KERNEL_LOOP(i, nNodes) { KERNEL_LOOP(i, nNodes) {
if (color[i] != -1) continue; // Only visit blue nodes. if (color[i] != -1) { continue; } // Only visit blue nodes.
ptrdiff_t c; ptrdiff_t c; bool isDead = true;
for (ptrdiff_t e = cumDegree[i] - degree[i]; e < cumDegree[i]; e++) { for (ptrdiff_t e = cumDegree[i] - degree[i]; e < cumDegree[i]; e++) {
c = col[e]; c = col[e];
if (color[c] == -2) { // Red neighbor found. if (isDead && color[c] < 0) { isDead = false; } // Unmatched neighbor found.
prop[i] = c; // Propose neighbor. if (color[c] == -2) { prop[i] = c; break; } // Propose to first red neighbor.
break;
}
} }
if (prop[i] < 0) color[i] = i; // Mark node as dead. if (isDead) { color[i] = i; } // Mark node as dead.
} }
} }
void THCGreedy_propose(THCState *state, THCudaLongTensor *color, THCudaLongTensor *prop, void THCTensor_propose(THCState *state, THCudaLongTensor *color, THCudaLongTensor *prop,
THCudaLongTensor *row, THCudaLongTensor *col, THCudaLongTensor *degree, THCudaLongTensor *row, THCudaLongTensor *col, THCudaLongTensor *degree,
THCudaLongTensor *cumDegree) { THCudaLongTensor *cumDegree) {
KERNEL_RUN(proposeKernel, THCudaLongTensor_nElement(state, color), KERNEL_RUN(proposeKernel, THCudaLongTensor_nElement(state, color),
THCudaLongTensor_data(state, color), THCudaLongTensor_data(state, prop), THCudaLongTensor_data(state, color), THCudaLongTensor_data(state, prop),
THCudaLongTensor_data(state, row), THCudaLongTensor_data(state, col), THCudaLongTensor_data(state, row), THCudaLongTensor_data(state, col),
THCudaLongTensor_data(state, degree), THCudaLongTensor_data(state, cumDegree)) THCudaLongTensor_data(state, degree), THCudaLongTensor_data(state, cumDegree));
} }
#endif // THC_PROPOSE_INC
#ifndef THC_RESPONSE_INC
#define THC_RESPONSE_INC
#include "common.cuh" #include "common.cuh"
/* if (color[i] != -1) { continue; } // Only visit blue nodes. */
/* ptrdiff_t c; bool isDead = true; */
/* for (ptrdiff_t e = cumDegree[i] - degree[i]; e < cumDegree[i]; e++) { */
/* c = col[e]; */
/* if (isDead && color[c] < 0) { isDead = false; } // Unmatched neighbor found. */
/* if (color[c] == -2) { prop[i] = c; break; } // Propose to first red neighbor. */
/* } */
/* if (isDead) { color[i] = i; } // Mark node as dead. */
__global__ void responseKernel(int64_t *color, int64_t *prop, int64_t *row, int64_t *col, __global__ void responseKernel(int64_t *color, int64_t *prop, int64_t *row, int64_t *col,
int64_t *degree, int64_t *cumDegree, ptrdiff_t nNodes) { int64_t *degree, int64_t *cumDegree, ptrdiff_t nNodes) {
KERNEL_LOOP(i, nNodes) { KERNEL_LOOP(i, nNodes) {
if (color[i] != -2) continue; // Only visit red nodes. if (color[i] != -2) { continue; } // Only visit red nodes.
ptrdiff_t c; int64_t neighborColor, minValue; /* ptrdiff_t c; // int64_t neighborColor, minValue; */
bool isDead = true; /* bool isDead = true; */
for (ptrdiff_t e = cumDegree[i] - degree[i]; e < cumDegree[i]; e++) { for (ptrdiff_t e = cumDegree[i] - degree[i]; e < cumDegree[i]; e++) {
c = col[e]; /* c = col[e]; */
neighborColor = color[c]; /* neighborColor = color[c]; */
if (neighborColor == -1 && prop[c] == i) { // Blue neighbor found which proposed to node i. /* if (neighborColor == -1 && prop[c] == i) { // Blue neighbor found which proposed to node i. */
minValue = min(i, c); /* minValue = min(i, c); */
color[i] = minValue; /* color[i] = minValue; */
color[c] = minValue; /* color[c] = minValue; */
break; /* break; */
} /* } */
if (neighborColor < 0) isDead = false; /* if (neighborColor < 0) isDead = false; */
} }
if (isDead && color[i] < 0) color[i] = i; // Mark node as dead. /* if (isDead && color[i] < 0) color[i] = i; // Mark node as dead. */
} }
} }
void THCGreedy_response(THCState *state, THCudaLongTensor *color, THCudaLongTensor *prop, void THCTensor_response(THCState *state, THCudaLongTensor *color, THCudaLongTensor *prop,
THCudaLongTensor *row, THCudaLongTensor *col, THCudaLongTensor *degree, THCudaLongTensor *row, THCudaLongTensor *col, THCudaLongTensor *degree,
THCudaLongTensor *cumDegree) { THCudaLongTensor *cumDegree) {
KERNEL_RUN(responseKernel, THCudaLongTensor_nElement(state, color), KERNEL_RUN(responseKernel, THCudaLongTensor_nElement(state, color),
THCudaLongTensor_data(state, color), THCudaLongTensor_data(state, prop), THCudaLongTensor_data(state, color), THCudaLongTensor_data(state, prop),
THCudaLongTensor_data(state, row), THCudaLongTensor_data(state, col), THCudaLongTensor_data(state, row), THCudaLongTensor_data(state, col),
THCudaLongTensor_data(state, degree), THCudaLongTensor_data(state, cumDegree)) THCudaLongTensor_data(state, degree), THCudaLongTensor_data(state, cumDegree));
} }
#endif // THC_RESPONSE_INC
#ifndef THC_COMMON_INC #ifndef THC_COMMON_INC
#define THC_COMMON_INC #define THC_COMMON_INC
#define THCTensor_(NAME) TH_CONCAT_4(TH,CReal,Tensor_,NAME)
#define KERNEL_LOOP(I, N) \ #define KERNEL_LOOP(I, N) \
for (ptrdiff_t I = blockIdx.x * blockDim.x + threadIdx.x; I < N; I += blockDim.x * gridDim.x) for (ptrdiff_t I = blockIdx.x * blockDim.x + threadIdx.x; I < N; I += blockDim.x * gridDim.x)
......
#ifndef THC_GENERIC_FILE #ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/THCDegree.cu" #define THC_GENERIC_FILE "generic/THCDegree.cuh"
#else #else
void THCTensor_(degree)(THCState *state, THCTensor *self, THCudaLongTensor *index) { void THCTensor_(degree)(THCState *state, THCTensor *self, THCudaLongTensor *index) {
int nEdges = THCudaLongTensor_nElement(state, index); int nEdges = THCudaLongTensor_nElement(state, index);
THCTensor *one = THCTensor_(newWithSize1d)(state, nEdges); THCTensor *one = THCTensor_(newWithSize1d)(state, nEdges);
THCTensor_(fill)(state, one, 1); THCTensor_(fill)(state, one, ScalarConvert<int, real>::to(1));
THCTensor_(fill)(state, self, 0); THCTensor_(fill)(state, self, ScalarConvert<int, real>::to(0));
THCTensor_(scatterAdd)(state, self, 0, index, one); THCTensor_(scatterAdd)(state, self, 0, index, one);
THCTensor_(free)(state, one); THCTensor_(free)(state, one);
......
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/THCGraclus.cu"
#else
void THCTensor_(graclus)(THCState *state, THCudaLongTensor *self, THCudaLongTensor *row,
THCudaLongTensor *col, THCTensor *weight) {
}
#endif // THC_GENERIC_FILE
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/THCGraclus.h"
#else
void THCTensor_(graclus)(THCState *state, THCudaLongTensor *self, THCudaLongTensor *row,
THCudaLongTensor *col, THCTensor *weight);
#endif // THC_GENERIC_FILE
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/THCGreedy.cu"
#else
void THCGreedy_(THCState *state, THCudaLongTensor *cluster, THCudaLongTensor *row,
THCudaLongTensor *col, THCTensor *weight) {
printf("THCGreedy dynamic drin");
}
#endif // THC_GENERIC_FILE
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/THCGreedy.h"
#else
void THCGreedy_(THCState *state, THCudaLongTensor *cluster, THCudaLongTensor *row,
THCudaLongTensor *col, THCTensor *weight);
#endif // THC_GENERIC_FILE
...@@ -2,17 +2,17 @@ ...@@ -2,17 +2,17 @@
#define THC_GENERIC_FILE "generic/THCGrid.cu" #define THC_GENERIC_FILE "generic/THCGrid.cu"
#else #else
void THCGrid_(THCState *state, THCudaLongTensor *cluster, THCTensor *pos, THCTensor *size, void THCTensor_(grid)(THCState *state, THCudaLongTensor *self, THCTensor *pos, THCTensor *size,
THCudaLongTensor *count) { THCudaLongTensor *count) {
THCAssertSameGPU(THCTensor_(checkGPU)(state, 4, cluster, pos, size, count)); THCAssertSameGPU(THCTensor_(checkGPU)(state, 4, self, pos, size, count));
int64_t *clusterData = THCudaLongTensor_data(state, cluster); int64_t *selfData = THCudaLongTensor_data(state, self);
TensorInfo<real> posInfo = THCTensor_(getTensorInfo)(state, pos); TensorInfo<real> posInfo = THCTensor_(getTensorInfo)(state, pos);
real *sizeData = THCTensor_(data)(state, size); real *sizeData = THCTensor_(data)(state, size);
int64_t *countData = THCudaLongTensor_data(state, count); int64_t *countData = THCudaLongTensor_data(state, count);
ptrdiff_t nNodes = THCudaLongTensor_nElement(state, cluster); ptrdiff_t nNodes = THCudaLongTensor_nElement(state, self);
KERNEL_REAL_RUN(gridKernel, nNodes, clusterData, posInfo, sizeData, countData); KERNEL_REAL_RUN(gridKernel, nNodes, selfData, posInfo, sizeData, countData);
} }
#endif // THC_GENERIC_FILE #endif // THC_GENERIC_FILE
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment