Commit 4c4478a7 authored by rusty1s's avatar rusty1s
Browse files

bugfixes

parent 16eb9e1d
#ifndef THC_COLOR_INC
#define THC_COLOR_INC
#include <curand_kernel.h>
#include "common.cuh"
#define BLUE_PROBABILITY 0.53406
#define BLUE_PROB 0.53406
__device__ int d_done;
__global__ void initDoneKernel() { d_done = 1; }
__global__ void colorKernel(int64_t *self, int64_t *bernoulli, uint8_t *done, ptrdiff_t nNodes) {
__global__ void colorKernel(int64_t *self, uint8_t *bernoulli, ptrdiff_t nNodes) {
KERNEL_LOOP(i, nNodes) {
if (self[i] < 0) {
self[i] = bernoulli[i] - 2;
*done = 0;
d_done = 0;
}
}
}
int THCTensor_color(THCState *state, THCudaLongTensor *self) {
ptrdiff_t nNodes = THCudaLongTensor_nElement(state, self);
THCudaLongTensor *bernoulli = THCudaLongTensor_newWithSize1d(state, nNodes);
THCudaLongTensor_bernoulli(state, bernoulli, BLUE_PROBABILITY);
int THCudaLongTensor_color(THCState *state, THCudaLongTensor *self) {
initDoneKernel<<<1, 1>>>();
int64_t *selfData = THCudaLongTensor_data(state, self);
int64_t *bernoulliData = THCudaLongTensor_data(state, bernoulli);
ptrdiff_t nNodes = THCudaLongTensor_nElement(state, self);
uint8_t* d_done;
cudaMalloc(&d_done, sizeof(uint8_t));
cudaMemset(d_done, 1, sizeof(uint8_t));
THCudaByteTensor *bernoulli = THCudaByteTensor_newWithSize1d(state, nNodes);
THCudaByteTensor_bernoulli(state, bernoulli, BLUE_PROB);
KERNEL_RUN(colorKernel, nNodes, selfData, bernoulliData, d_done);
int64_t *selfData = THCudaLongTensor_data(state, self);
uint8_t *bernoulliData = THCudaByteTensor_data(state, bernoulli);
uint8_t done;
cudaMemcpy(&done, d_done, sizeof(uint8_t), cudaMemcpyDeviceToHost);
cudaFree(d_done);
KERNEL_RUN(colorKernel, nNodes, selfData, bernoulliData);
int done; cudaMemcpyFromSymbol(&done, d_done, sizeof(done), 0, cudaMemcpyDeviceToHost);
return done;
}
......
......@@ -20,9 +20,9 @@ void THCTensor_graclus(THCState *state, THCudaLongTensor *self, THCudaLongTensor
THCudaLongTensor_degree(state, degree, row);
THCudaLongTensor *cumDegree = THCudaLongTensor_newWithSize1d(state, nNodes);
THCudaLongTensor_cumDegree(state, self, row);
THCudaLongTensor_cumDegree(state, cumDegree, row);
while(!THCTensor_color(state, self)) {
while(!THCudaLongTensor_color(state, self)) {
THCTensor_propose(state, self, prop, row, col, degree, cumDegree);
THCTensor_response(state, self, prop, row, col, degree, cumDegree);
}
......
......@@ -5,8 +5,7 @@
for (ptrdiff_t I = blockIdx.x * blockDim.x + threadIdx.x; I < N; I += blockDim.x * gridDim.x)
const int MAX_DIMS = 25;
/* const int NUM_THREADS = 1024; */
const int NUM_THREADS = 256;
const int NUM_THREADS = 1024;
inline int GET_BLOCKS(int N) {
return (N + NUM_THREADS - 1) / NUM_THREADS;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment