Commit 920cc934 authored by rusty1s's avatar rusty1s
Browse files

graclus cuda, cleanup old code

parent d2cc3162
| Library | Meaning |
|---------|------------------------------------|
| TH | **T**orc**H** |
| THC | **T**orc**H** **C**uda |
| THCC | **T**orc**H** **C**uda **C**onnect |
#include <TH/TH.h>
#define TH_TENSOR_GRACLUS(self, row, col, PRESELECT, SELECT) { \
THLongTensor_fill(self, -1); \
int64_t *selfData = THLongTensor_data(self); \
int64_t *rowData = THLongTensor_data(row); \
int64_t *colData = THLongTensor_data(col); \
ptrdiff_t e = 0, nEdges = THLongTensor_nElement(row); \
int64_t rowValue, colValue, matchedValue, value; \
while(e < nEdges) { \
rowValue = rowData[e]; \
matchedValue = rowValue; \
PRESELECT \
if (selfData[rowValue] < 0) { \
do { \
colValue = colData[e]; \
SELECT \
e++; \
} while(e < nEdges && rowData[e] == rowValue); \
value = rowValue < matchedValue ? rowValue : matchedValue; \
selfData[rowValue] = value; \
selfData[matchedValue] = value; \
} \
while(e < nEdges && rowData[e] == rowValue) e++; \
} \
}
void THTensor_graclus(THLongTensor *self, THLongTensor *row, THLongTensor *col) {
TH_TENSOR_GRACLUS(self, row, col, {},
if (selfData[colValue] < 0) { matchedValue = colValue; break; }
)
}
#include "generic/THGraclus.c"
#include "THGenerateAllTypes.h"
void THTensor_graclus(THLongTensor *self, THLongTensor *row, THLongTensor *col);
void THByteTensor_graclus(THLongTensor *self, THLongTensor *row, THLongTensor *col, THByteTensor *weight);
void THCharTensor_graclus(THLongTensor *self, THLongTensor *row, THLongTensor *col, THCharTensor *weight);
void THShortTensor_graclus(THLongTensor *self, THLongTensor *row, THLongTensor *col, THShortTensor *weight);
void THIntTensor_graclus(THLongTensor *self, THLongTensor *row, THLongTensor *col, THIntTensor *weight);
void THLongTensor_graclus(THLongTensor *self, THLongTensor *row, THLongTensor *col, THLongTensor *weight);
void THFloatTensor_graclus(THLongTensor *self, THLongTensor *row, THLongTensor *col, THFloatTensor *weight);
void THDoubleTensor_graclus(THLongTensor *self, THLongTensor *row, THLongTensor *col, THDoubleTensor *weight);
#include <TH/TH.h>
#include "generic/THGrid.c"
#include "THGenerateAllTypes.h"
void THByteTensor_grid(THLongTensor *self, THByteTensor *pos, THByteTensor *size, THLongTensor *count);
void THCharTensor_grid(THLongTensor *self, THCharTensor *pos, THCharTensor *size, THLongTensor *count);
void THShortTensor_grid(THLongTensor *self, THShortTensor *pos, THShortTensor *size, THLongTensor *count);
void THIntTensor_grid(THLongTensor *self, THIntTensor *pos, THIntTensor *size, THLongTensor *count);
void THLongTensor_grid(THLongTensor *self, THLongTensor *pos, THLongTensor *size, THLongTensor *count);
void THFloatTensor_grid(THLongTensor *self, THFloatTensor *pos, THFloatTensor *size, THLongTensor *count);
void THDoubleTensor_grid(THLongTensor *self, THDoubleTensor *pos, THDoubleTensor *size, THLongTensor *count);
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/THGraclus.c"
#else
void THTensor_(graclus)(THLongTensor *self, THLongTensor *row, THLongTensor *col, THTensor *weight) {
real *weightData = THTensor_(data)(weight);
real maxWeight, tmp;
TH_TENSOR_GRACLUS(self, row, col, maxWeight = 0;,
tmp = weightData[e];
if (selfData[colValue] < 0 && tmp > maxWeight) { matchedValue = colValue; maxWeight = tmp; }
)
}
#endif // TH_GENERIC_FILE
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/THGrid.c"
#else
void THTensor_(grid)(THLongTensor *self, THTensor *pos, THTensor *size,
THLongTensor *count) {
int64_t *selfData = THLongTensor_data(self);
real *posData = THTensor_(data)(pos);
real *sizeData = THTensor_(data)(size);
int64_t posStride0 = THTensor_(stride)(pos, 0);
int64_t posStride1 = THTensor_(stride)(pos, 1);
int64_t *countData = THLongTensor_data(count);
ptrdiff_t n, d;
int64_t coef, value;
for (n = 0; n < THTensor_(size)(pos, 0); n++) {
coef = 1;
value = 0;
for (d = 0; d < THTensor_(size)(pos, 1); d++) {
value += coef * (int64_t)(posData[d * posStride1] / sizeData[d]);
coef *= countData[d];
}
posData += posStride0;
selfData[n] = value;
}
}
#endif // TH_GENERIC_FILE
#include "THCGraclus.cu"
#include "THCGrid.cu"
#ifndef THC_INC
#define THC_INC
#include "THCGraclus.h"
#include "THCGrid.h"
#endif // THC_INC
#ifndef THC_COLOR_INC
#define THC_COLOR_INC
#include "common.cuh"
#define BLUE_PROB 0.53406
__device__ int d_done;
__global__ void initDoneKernel() { d_done = 1; }
__global__ void colorKernel(int64_t *self, uint8_t *bernoulli, ptrdiff_t nNodes) {
KERNEL_LOOP(i, nNodes) {
if (self[i] < 0) {
self[i] = bernoulli[i] - 2;
d_done = 0;
}
}
}
int THCudaLongTensor_color(THCState *state, THCudaLongTensor *self) {
initDoneKernel<<<1, 1>>>();
ptrdiff_t nNodes = THCudaLongTensor_nElement(state, self);
THCudaByteTensor *bernoulli = THCudaByteTensor_newWithSize1d(state, nNodes);
THCudaByteTensor_bernoulli(state, bernoulli, BLUE_PROB);
int64_t *selfData = THCudaLongTensor_data(state, self);
uint8_t *bernoulliData = THCudaByteTensor_data(state, bernoulli);
KERNEL_RUN(colorKernel, nNodes, selfData, bernoulliData);
THCudaByteTensor_free(state, bernoulli);
int done; cudaMemcpyFromSymbol(&done, d_done, sizeof(done), 0, cudaMemcpyDeviceToHost);
return done;
}
#endif // THC_COLOR_INC
#ifndef THC_DEGREE_INC
#define THC_DEGREE_INC
#include "common.cuh"
#include "THCNumerics.cuh"
template<typename T>
__global__ void cumDegreeKernel(T *self, int64_t *index, ptrdiff_t nEdges) {
KERNEL_LOOP(i, nEdges) {
int64_t r = index[i];
if (i + 1 == nEdges) {self[r] = ScalarConvert<int, T>::to(nEdges);; continue; }
if (r != index[i+1]) { self[r] = ScalarConvert<int, T>::to(i + 1); }
}
}
#include "generic/THCDegree.cuh"
#include "THC/THCGenerateAllTypes.h"
#endif // THC_DEGREE_INC
#include "THCGraclus.h"
#include "common.cuh"
#include "THCDegree.cuh"
#include "THCColor.cuh"
#include "THCPropose.cuh"
#include "THCResponse.cuh"
#define THC_TENSOR_GRACLUS(state, self, row, CODE) { \
int nNodes = THCudaLongTensor_nElement(state, self); \
THCudaLongTensor_fill(state, self, -1); \
\
THCudaLongTensor *prop = THCudaLongTensor_newWithSize1d(state, nNodes); \
THCudaLongTensor_fill(state, prop, -1); \
\
THCudaLongTensor *degree = THCudaLongTensor_newWithSize1d(state, nNodes); \
THCudaLongTensor_degree(state, degree, row); \
\
THCudaLongTensor *cumDegree = THCudaLongTensor_newWithSize1d(state, nNodes); \
THCudaLongTensor_cumDegree(state, cumDegree, row); \
\
CODE \
\
THCudaLongTensor_free(state, prop); \
THCudaLongTensor_free(state, degree); \
THCudaLongTensor_free(state, cumDegree); \
}
void THCTensor_graclus(THCState *state, THCudaLongTensor *self, THCudaLongTensor *row,
THCudaLongTensor *col) {
THCAssertSameGPU(THCudaLongTensor_checkGPU(state, 3, self, row, col));
THC_TENSOR_GRACLUS(state, self, row,
while(!THCudaLongTensor_color(state, self)) {
THCTensor_propose(state, self, prop, row, col, degree, cumDegree);
THCTensor_response(state, self, prop, row, col, degree, cumDegree);
}
)
}
#include "generic/THCGraclus.cu"
#include "THC/THCGenerateAllTypes.h"
#ifndef THC_GRACLUS_INC
#define THC_GRACLUS_INC
#include <THC/THC.h>
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
void THCTensor_graclus(THCState *state, THCudaLongTensor *self, THCudaLongTensor *row,
THCudaLongTensor *col);
#include "generic/THCGraclus.h"
#include "THC/THCGenerateAllTypes.h"
#ifdef __cplusplus
}
#endif // __cplusplus
#endif // THC_GRACLUS_INC
#include "THCGrid.h"
#include "common.cuh"
#include "THCNumerics.cuh"
template<typename T>
__global__ void gridKernel(int64_t *self, TensorInfo<T> posInfo, T *size,
int64_t *count, ptrdiff_t nNodes) {
KERNEL_LOOP(i, nNodes) {
T *pos = posInfo.data + i * posInfo.stride[0];
int64_t coef = 1, value = 0;
for (ptrdiff_t d = 0; d < posInfo.size[1]; d += posInfo.stride[1]) {
value += coef * ScalarConvert<T, int64_t>::to(THCNumerics<T>::div(pos[d], size[d]));
coef *= count[d];
}
self[i] = value;
}
}
#include "generic/THCGrid.cu"
#include "THC/THCGenerateAllTypes.h"
#ifndef THC_GRID_INC
#define THC_GRID_INC
#include <THC/THC.h>
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
#include "generic/THCGrid.h"
#include "THC/THCGenerateAllTypes.h"
#ifdef __cplusplus
}
#endif // __cplusplus
#endif // THC_GRID_INC
#ifndef THC_NUMERICS_INC
#define THC_NUMERICS_INC
#include <THC/THCHalf.h>
#ifdef CUDA_HALF_TENSOR
#ifdef __CUDA_ARCH__
#define h2f(A) __half2float(A)
#define f2h(A) __float2half(A)
#else // CUDA_ARCH__
#define h2f(A) THC_half2float(A)
#define f2h(A) THC_float2half(A)
#endif // CUDA_ARCH__
#endif // CUDA_HALF_TENSOR
template<typename T>
struct THCNumerics {
static inline __host__ __device__ T div(T a, T b) { return a / b; }
static inline __host__ __device__ bool gte(T a, T b) { return a >= b; }
};
#ifdef CUDA_HALF_TENSOR
template<>
struct THCNumerics<half> {
static inline __host__ __device__ half div(half a, half b) { return f2h(h2f(a) / h2f(b)); }
static inline __host__ __device__ bool gte(half a, half b) { return h2f(a) >= h2f(b); }
};
#endif // CUDA_HALF_TENSOR
template <typename In, typename Out>
struct ScalarConvert {
static __host__ __device__ Out to(const In v) { return (Out) v; }
};
#ifdef CUDA_HALF_TENSOR
template <typename Out>
struct ScalarConvert<half, Out> {
static __host__ __device__ Out to(const half v) { return (Out) h2f(v); }
};
template <typename In>
struct ScalarConvert<In, half> {
static __host__ __device__ half to(const In v) { return f2h((float) v); }
};
#endif // CUDA_HALF_TENSOR
#endif // THC_NUMERICS_INC
#ifndef THC_PROPOSE_INC
#define THC_PROPOSE_INC
#include "common.cuh"
#include "THCNumerics.cuh"
__global__ void proposeKernel(int64_t *color, int64_t *prop, int64_t *row, int64_t *col,
int64_t *degree, int64_t *cumDegree, ptrdiff_t nNodes) {
KERNEL_LOOP(i, nNodes) {
if (color[i] != -1) { continue; } // Only visit blue nodes.
ptrdiff_t c; bool isDead = true;
for (ptrdiff_t e = cumDegree[i] - degree[i]; e < cumDegree[i]; e++) {
c = col[e];
if (isDead && color[c] < 0) { isDead = false; } // Unmatched neighbor found.
if (color[c] == -2) { prop[i] = c; break; } // Propose to first red neighbor.
}
if (isDead) { color[i] = i; } // Mark node as dead.
}
}
template<typename T>
__global__ void weightedProposeKernel(int64_t *color, int64_t *prop, int64_t *row, int64_t *col,
T *weight, int64_t *degree, int64_t *cumDegree,
ptrdiff_t nNodes) {
KERNEL_LOOP(i, nNodes) {
if (color[i] != -1) { continue; } // Only visit blue nodes.
ptrdiff_t c; bool isDead = true;
T maxWeight = ScalarConvert<int, T>::to(0), tmp;
int64_t matchedValue = -1;
for (ptrdiff_t e = cumDegree[i] - degree[i]; e < cumDegree[i]; e++) {
c = col[e];
tmp = weight[e];
if (isDead && color[c] < 0) { isDead = false; } // Unmatched neighbor found.
// Find maximum weighted red neighbor.
if (color[c] == -2 && THCNumerics<T>::gte(tmp, maxWeight)) {
matchedValue = c;
maxWeight = tmp;
}
}
prop[i] = matchedValue; // Propose.
if (isDead) { color[i] = i; } // Mark node as dead.
}
}
void THCTensor_propose(THCState *state, THCudaLongTensor *color, THCudaLongTensor *prop,
THCudaLongTensor *row, THCudaLongTensor *col, THCudaLongTensor *degree,
THCudaLongTensor *cumDegree) {
KERNEL_RUN(proposeKernel, THCudaLongTensor_nElement(state, color),
THCudaLongTensor_data(state, color), THCudaLongTensor_data(state, prop),
THCudaLongTensor_data(state, row), THCudaLongTensor_data(state, col),
THCudaLongTensor_data(state, degree), THCudaLongTensor_data(state, cumDegree));
}
#include "generic/THCPropose.cuh"
#include "THC/THCGenerateAllTypes.h"
#endif // THC_PROPOSE_INC
#ifndef THC_RESPONSE_INC
#define THC_RESPONSE_INC
#include "common.cuh"
__global__ void responseKernel(int64_t *color, int64_t *prop, int64_t *row, int64_t *col,
int64_t *degree, int64_t *cumDegree, ptrdiff_t nNodes) {
KERNEL_LOOP(i, nNodes) {
if (color[i] != -2) { continue; } // Only visit red nodes.
ptrdiff_t c; bool isDead = true;
for (ptrdiff_t e = cumDegree[i] - degree[i]; e < cumDegree[i]; e++) {
c = col[e];
if (isDead && color[c] < 0) { isDead = false; } // Unmatched neighbor found.
if (color[c] == -1 && prop[c] == i) { // Match first blue neighbor who proposed to i.
color[i] = min(i, c);
color[c] = min(i, c);
break;
}
}
if (isDead) { color[i] = i; } // Mark node as dead.
}
}
template<typename T>
__global__ void weightedResponseKernel(int64_t *color, int64_t *prop, int64_t *row, int64_t *col,
T *weight, int64_t *degree, int64_t *cumDegree,
ptrdiff_t nNodes) {
KERNEL_LOOP(i, nNodes) {
if (color[i] != -2) { continue; } // Only visit red nodes.
ptrdiff_t c; bool isDead = true;
T maxWeight = ScalarConvert<int, T>::to(0), tmp;
ptrdiff_t matchedValue = -1;
for (ptrdiff_t e = cumDegree[i] - degree[i]; e < cumDegree[i]; e++) {
c = col[e];
tmp = weight[e];
if (isDead && color[c] < 0) { isDead = false; } // Unmatched neighbor found.
// Find maximum weighted blue neighbor, who proposed to i.
if (color[c] == -1 && prop[c] == i && THCNumerics<T>::gte(tmp, maxWeight)) {
matchedValue = c;
maxWeight = tmp;
}
}
if (matchedValue >= 0) { // Match neighbors.
color[i] = min(i, matchedValue);
color[matchedValue] = min(i, matchedValue);
}
if (isDead) { color[i] = i; } // Mark node as dead.
}
}
void THCTensor_response(THCState *state, THCudaLongTensor *color, THCudaLongTensor *prop,
THCudaLongTensor *row, THCudaLongTensor *col, THCudaLongTensor *degree,
THCudaLongTensor *cumDegree) {
KERNEL_RUN(responseKernel, THCudaLongTensor_nElement(state, color),
THCudaLongTensor_data(state, color), THCudaLongTensor_data(state, prop),
THCudaLongTensor_data(state, row), THCudaLongTensor_data(state, col),
THCudaLongTensor_data(state, degree), THCudaLongTensor_data(state, cumDegree));
}
#include "generic/THCResponse.cuh"
#include "THC/THCGenerateAllTypes.h"
#endif // THC_RESPONSE_INC
#ifndef THC_COMMON_INC
#define THC_COMMON_INC
#define KERNEL_LOOP(I, N) \
for (ptrdiff_t I = blockIdx.x * blockDim.x + threadIdx.x; I < N; I += blockDim.x * gridDim.x)
const int MAX_DIMS = 25;
const int NUM_THREADS = 1024;
inline int GET_BLOCKS(int N) {
return (N + NUM_THREADS - 1) / NUM_THREADS;
}
#define KERNEL_RUN(NAME, N, ...) \
int grid = GET_BLOCKS(N); \
cudaStream_t stream = THCState_getCurrentStream(state); \
NAME<<<grid, NUM_THREADS, 0, stream>>>(__VA_ARGS__, N); \
THCudaCheck(cudaGetLastError())
#define KERNEL_REAL_RUN(NAME, N, ...) \
int grid = GET_BLOCKS(N); \
cudaStream_t stream = THCState_getCurrentStream(state); \
NAME<real><<<grid, NUM_THREADS, 0, stream>>>(__VA_ARGS__, N); \
THCudaCheck(cudaGetLastError())
template<typename T>
struct TensorInfo {
T *data;
int dims;
int size[MAX_DIMS];
int stride[MAX_DIMS];
};
#include "generic/common.cuh"
#include "THC/THCGenerateAllTypes.h"
#endif // THC_COMMON_INC
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/THCDegree.cuh"
#else
void THCTensor_(degree)(THCState *state, THCTensor *self, THCudaLongTensor *index) {
int nEdges = THCudaLongTensor_nElement(state, index);
THCTensor *one = THCTensor_(newWithSize1d)(state, nEdges);
THCTensor_(fill)(state, one, ScalarConvert<int, real>::to(1));
THCTensor_(fill)(state, self, ScalarConvert<int, real>::to(0));
THCTensor_(scatterAdd)(state, self, 0, index, one);
THCTensor_(free)(state, one);
}
void THCTensor_(cumDegree)(THCState *state, THCTensor *self, THCudaLongTensor *index) {
ptrdiff_t nEdges = THCudaLongTensor_nElement(state, index);
real *selfData = THCTensor_(data)(state, self);
int64_t *indexData = THCudaLongTensor_data(state, index);
KERNEL_REAL_RUN(cumDegreeKernel, nEdges, selfData, indexData);
}
#endif // THC_GENERIC_FILE
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment