Commit 4c4478a7 authored by rusty1s's avatar rusty1s
Browse files

bugfixes

parent 16eb9e1d
#ifndef THC_COLOR_INC #ifndef THC_COLOR_INC
#define THC_COLOR_INC #define THC_COLOR_INC
#include <curand_kernel.h>
#include "common.cuh" #include "common.cuh"
#define BLUE_PROBABILITY 0.53406 #define BLUE_PROB 0.53406
__device__ int d_done;
__global__ void initDoneKernel() { d_done = 1; }
__global__ void colorKernel(int64_t *self, int64_t *bernoulli, uint8_t *done, ptrdiff_t nNodes) { __global__ void colorKernel(int64_t *self, uint8_t *bernoulli, ptrdiff_t nNodes) {
KERNEL_LOOP(i, nNodes) { KERNEL_LOOP(i, nNodes) {
if (self[i] < 0) { if (self[i] < 0) {
self[i] = bernoulli[i] - 2; self[i] = bernoulli[i] - 2;
*done = 0; d_done = 0;
} }
} }
} }
int THCTensor_color(THCState *state, THCudaLongTensor *self) { int THCudaLongTensor_color(THCState *state, THCudaLongTensor *self) {
ptrdiff_t nNodes = THCudaLongTensor_nElement(state, self); initDoneKernel<<<1, 1>>>();
THCudaLongTensor *bernoulli = THCudaLongTensor_newWithSize1d(state, nNodes);
THCudaLongTensor_bernoulli(state, bernoulli, BLUE_PROBABILITY);
int64_t *selfData = THCudaLongTensor_data(state, self); ptrdiff_t nNodes = THCudaLongTensor_nElement(state, self);
int64_t *bernoulliData = THCudaLongTensor_data(state, bernoulli);
uint8_t* d_done; THCudaByteTensor *bernoulli = THCudaByteTensor_newWithSize1d(state, nNodes);
cudaMalloc(&d_done, sizeof(uint8_t)); THCudaByteTensor_bernoulli(state, bernoulli, BLUE_PROB);
cudaMemset(d_done, 1, sizeof(uint8_t));
KERNEL_RUN(colorKernel, nNodes, selfData, bernoulliData, d_done); int64_t *selfData = THCudaLongTensor_data(state, self);
uint8_t *bernoulliData = THCudaByteTensor_data(state, bernoulli);
uint8_t done; KERNEL_RUN(colorKernel, nNodes, selfData, bernoulliData);
cudaMemcpy(&done, d_done, sizeof(uint8_t), cudaMemcpyDeviceToHost);
cudaFree(d_done);
int done; cudaMemcpyFromSymbol(&done, d_done, sizeof(done), 0, cudaMemcpyDeviceToHost);
return done; return done;
} }
......
...@@ -20,9 +20,9 @@ void THCTensor_graclus(THCState *state, THCudaLongTensor *self, THCudaLongTensor ...@@ -20,9 +20,9 @@ void THCTensor_graclus(THCState *state, THCudaLongTensor *self, THCudaLongTensor
THCudaLongTensor_degree(state, degree, row); THCudaLongTensor_degree(state, degree, row);
THCudaLongTensor *cumDegree = THCudaLongTensor_newWithSize1d(state, nNodes); THCudaLongTensor *cumDegree = THCudaLongTensor_newWithSize1d(state, nNodes);
THCudaLongTensor_cumDegree(state, self, row); THCudaLongTensor_cumDegree(state, cumDegree, row);
while(!THCTensor_color(state, self)) { while(!THCudaLongTensor_color(state, self)) {
THCTensor_propose(state, self, prop, row, col, degree, cumDegree); THCTensor_propose(state, self, prop, row, col, degree, cumDegree);
THCTensor_response(state, self, prop, row, col, degree, cumDegree); THCTensor_response(state, self, prop, row, col, degree, cumDegree);
} }
......
...@@ -5,8 +5,7 @@ ...@@ -5,8 +5,7 @@
for (ptrdiff_t I = blockIdx.x * blockDim.x + threadIdx.x; I < N; I += blockDim.x * gridDim.x) for (ptrdiff_t I = blockIdx.x * blockDim.x + threadIdx.x; I < N; I += blockDim.x * gridDim.x)
const int MAX_DIMS = 25; const int MAX_DIMS = 25;
/* const int NUM_THREADS = 1024; */ const int NUM_THREADS = 1024;
const int NUM_THREADS = 256;
inline int GET_BLOCKS(int N) { inline int GET_BLOCKS(int N) {
return (N + NUM_THREADS - 1) / NUM_THREADS; return (N + NUM_THREADS - 1) / NUM_THREADS;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment