Commit bd050f90 authored by rusty1s's avatar rusty1s
Browse files

smarter cumDegree

parent 69f59517
...@@ -3,30 +3,30 @@ ...@@ -3,30 +3,30 @@
#include "common.cuh" #include "common.cuh"
#include <curand.h>
#include <curand_kernel.h>
#define BLUE_PROBABILITY 0.53406 #define BLUE_PROBABILITY 0.53406
__global__ void colorKernel(int64_t *self, curandStateMtgp32 *state, uint8_t *done, __global__ void colorKernel(int64_t *self, int64_t *bernoulli, uint8_t *done, ptrdiff_t nNodes) {
ptrdiff_t nNodes) {
KERNEL_LOOP(i, nNodes) { KERNEL_LOOP(i, nNodes) {
if (self[i] < 0) { if (self[i] < 0) {
self[i] = (curand_uniform(&state[0]) < BLUE_PROBABILITY) - 2; // blue = -1, red = -2 self[i] = bernoulli[i] - 2;
*done = 0; *done = 0;
} }
} }
} }
int THCTensor_color(THCState *state, THCudaLongTensor *self) { int THCTensor_color(THCState *state, THCudaLongTensor *self) {
ptrdiff_t nNodes = THCudaLongTensor_nElement(state, self);
THCudaLongTensor *bernoulli = THCudaLongTensor_newWithSize1d(state, nNodes);
THCudaLongTensor_bernoulli(state, bernoulli, BLUE_PROBABILITY);
int64_t *selfData = THCudaLongTensor_data(state, self);
int64_t *bernoulliData = THCudaLongTensor_data(state, bernoulli);
uint8_t* d_done; uint8_t* d_done;
cudaMalloc(&d_done, sizeof(uint8_t)); cudaMalloc(&d_done, sizeof(uint8_t));
cudaMemset(d_done, 1, sizeof(uint8_t)); cudaMemset(d_done, 1, sizeof(uint8_t));
ptrdiff_t nNodes = THCudaLongTensor_nElement(state, self); KERNEL_RUN(colorKernel, nNodes, selfData, bernoulliData, d_done);
int64_t *selfData = THCudaLongTensor_data(state, self);
KERNEL_RUN(colorKernel, nNodes, selfData, THCRandom_generatorStates(state), d_done);
uint8_t done; uint8_t done;
cudaMemcpy(&done, d_done, sizeof(uint8_t), cudaMemcpyDeviceToHost); cudaMemcpy(&done, d_done, sizeof(uint8_t), cudaMemcpyDeviceToHost);
......
#ifndef THC_DEGREE_INC #ifndef THC_DEGREE_INC
#define THC_DEGREE_INC #define THC_DEGREE_INC
#include "common.cuh"
#include "THCNumerics.cuh" #include "THCNumerics.cuh"
template<typename T>
__global__ void cumDegreeKernel(T *self, int64_t *index, ptrdiff_t nEdges) {
KERNEL_LOOP(i, nEdges) {
int64_t r = index[i];
if (r != index[i+1]) { self[r] = ScalarConvert<int, T>::to(i + 1); }
}
}
#include "generic/THCDegree.cuh" #include "generic/THCDegree.cuh"
#include "THC/THCGenerateAllTypes.h" #include "THC/THCGenerateAllTypes.h"
......
...@@ -20,12 +20,12 @@ void THCTensor_graclus(THCState *state, THCudaLongTensor *self, THCudaLongTensor ...@@ -20,12 +20,12 @@ void THCTensor_graclus(THCState *state, THCudaLongTensor *self, THCudaLongTensor
THCudaLongTensor_degree(state, degree, row); THCudaLongTensor_degree(state, degree, row);
THCudaLongTensor *cumDegree = THCudaLongTensor_newWithSize1d(state, nNodes); THCudaLongTensor *cumDegree = THCudaLongTensor_newWithSize1d(state, nNodes);
THCudaLongTensor_cumsum(state, cumDegree, degree, 0); THCudaLongTensor_cumDegree(state, cumDegree, row);
while(!THCTensor_color(state, self)) { while(!THCTensor_color(state, self)) {
THCTensor_propose(state, self, prop, row, col, degree, cumDegree); THCTensor_propose(state, self, prop, row, col, degree, cumDegree);
THCTensor_response(state, self, prop, row, col, degree, cumDegree); THCTensor_response(state, self, prop, row, col, degree, cumDegree);
}; }
THCudaLongTensor_free(state, prop); THCudaLongTensor_free(state, prop);
THCudaLongTensor_free(state, degree); THCudaLongTensor_free(state, degree);
......
...@@ -14,4 +14,13 @@ void THCTensor_(degree)(THCState *state, THCTensor *self, THCudaLongTensor *inde ...@@ -14,4 +14,13 @@ void THCTensor_(degree)(THCState *state, THCTensor *self, THCudaLongTensor *inde
THCTensor_(free)(state, one); THCTensor_(free)(state, one);
} }
void THCTensor_(cumDegree)(THCState *state, THCTensor *self, THCudaLongTensor *index) {
ptrdiff_t nEdges = THCudaLongTensor_nElement(state, index);
real *selfData = THCTensor_(data)(state, self);
int64_t *indexData = THCudaLongTensor_data(state, index);
KERNEL_RUN(cumDegreeKernel, nEdges - 1, selfData, indexData);
}
#endif // THC_GENERIC_FILE #endif // THC_GENERIC_FILE
...@@ -5,10 +5,10 @@ from .utils.ffi import graclus ...@@ -5,10 +5,10 @@ from .utils.ffi import graclus
def graclus_cluster(row, col, weight=None, num_nodes=None): def graclus_cluster(row, col, weight=None, num_nodes=None):
num_nodes = row.max() + 1 if num_nodes is None else num_nodes num_nodes = row.max() + 1 if num_nodes is None else num_nodes
row, col = randperm(row, col)
if row.is_cuda: if row.is_cuda:
row, col = sort_row(row, col) row, col = sort_row(row, col)
else: else:
row, col = randperm(row, col)
row, col = randperm_sort_row(row, col, num_nodes) row, col = randperm_sort_row(row, col, num_nodes)
cluster = row.new(num_nodes) cluster = row.new(num_nodes)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment