Commit ef9677c5 authored by rusty1s's avatar rusty1s
Browse files

assignColorKernel

parent 2f0248cf
#include "THCGreedy.cu"
#include "THCGrid.cu"
#ifndef THC_INC
#define THC_INC
#include "THCGreedy.h"
#include "THCGrid.h"
#endif
#include <curand.h>
#include <curand_kernel.h>
#include "common.cuh"
__global__ void assignColorKernel(int64_t *color, curandStateMtgp32 *state, uint8_t *done,
ptrdiff_t nNodes) {
KERNEL_LOOP(i, nNodes) {
if (color[i] < 0) {
color[i] = (curand_uniform(&state[0]) < 0.53406) - 2; // blue = -1, red = -2
*done = 0;
}
}
}
int THCGreedy_assignColor(THCState *state, THCudaLongTensor *color) {
THCAssertSameGPU(THCudaLongTensor_checkGPU(state, 1, color));
int64_t *colorData = THCudaLongTensor_data(state, color);
ptrdiff_t nNodes = THCudaLongTensor_nElement(state, color);
uint8_t* d_done; cudaMalloc(&d_done, sizeof(uint8_t)); cudaMemset(d_done, 1, sizeof(uint8_t));
KERNEL_RUN(assignColorKernel, nNodes, colorData, THCRandom_generatorStates(state), d_done);
uint8_t done; cudaMemcpy(&done, d_done, sizeof(uint8_t), cudaMemcpyDeviceToHost);
cudaFree(d_done);
return done;
}
#include "THCGreedy.h" #include "THCGreedy.h"
#include "THCColor.cu"
void THCGreedy(THCState *state, THCudaLongTensor *cluster, THCudaLongTensor *row, void THCGreedy(THCState *state, THCudaLongTensor *cluster, THCudaLongTensor *row,
THCudaLongTensor *col, THCudaLongTensor *deg) { THCudaLongTensor *col, THCudaLongTensor *deg) {
printf("THCGreedy drin"); THCAssertSameGPU(THCudaLongTensor_checkGPU(state, 4, cluster, row, col, deg));
THCudaLongTensor_fill(state, cluster, -1);
THCGreedy_assignColor(state, cluster);
/* while(!THCGreedy_assignColor(state, cluster)) { */
/* printf("DRIN"); */
/* }; */
// Fill cluster with -1 // Fill cluster with -1
// assign color to clusters < 0 (return done) // assign color to clusters < 0 (return done)
// Generate proposal vector with length of nodes (init?) // Generate proposal vector with length of nodes (init?)
......
#include "THCGrid.h" #include "THCGrid.h"
#include "common.h" #include "common.cuh"
#include "THCNumerics.cuh" #include "THCNumerics.cuh"
template<typename T> template<typename T>
__global__ void gridKernel(int64_t *cluster, TensorInfo<T> posInfo, T *size, __global__ void gridKernel(int64_t *cluster, TensorInfo<T> posInfo, T *size,
int64_t *count, const int nNodes) { int64_t *count, ptrdiff_t nNodes) {
KERNEL_LOOP(i, nNodes) { KERNEL_LOOP(i, nNodes) {
T *pos = posInfo.data + i * posInfo.stride[0]; T *pos = posInfo.data + i * posInfo.stride[0];
int64_t coef = 1, value = 0; int64_t coef = 1, value = 0;
......
...@@ -6,17 +6,21 @@ ...@@ -6,17 +6,21 @@
#define KERNEL_LOOP(I, N) \ #define KERNEL_LOOP(I, N) \
for (ptrdiff_t I = blockIdx.x * blockDim.x + threadIdx.x; I < N; I += blockDim.x * gridDim.x) for (ptrdiff_t I = blockIdx.x * blockDim.x + threadIdx.x; I < N; I += blockDim.x * gridDim.x)
#define THC_assertSameGPU(...) THAssertMsg(THCTensor_(checkGPU)(__VA_ARGS__), \
"Some of the input tensors are located on different GPUs. Please move them to a single one.")
const int MAX_DIMS = 25; const int MAX_DIMS = 25;
const int NUM_THREADS = 1024; /* const int NUM_THREADS = 1024; */
const int NUM_THREADS = 256;
inline int GET_BLOCKS(const int N) { inline int GET_BLOCKS(int N) {
return (N + NUM_THREADS - 1) / NUM_THREADS; return (N + NUM_THREADS - 1) / NUM_THREADS;
} }
#define KERNEL_RUN(NAME, N, ...) \ #define KERNEL_RUN(NAME, N, ...) \
int grid = GET_BLOCKS(N); \
cudaStream_t stream = THCState_getCurrentStream(state); \
NAME<<<grid, NUM_THREADS, 0, stream>>>(__VA_ARGS__, N); \
THCudaCheck(cudaGetLastError())
#define KERNEL_REAL_RUN(NAME, N, ...) \
int grid = GET_BLOCKS(N); \ int grid = GET_BLOCKS(N); \
cudaStream_t stream = THCState_getCurrentStream(state); \ cudaStream_t stream = THCState_getCurrentStream(state); \
NAME<real><<<grid, NUM_THREADS, 0, stream>>>(__VA_ARGS__, N); \ NAME<real><<<grid, NUM_THREADS, 0, stream>>>(__VA_ARGS__, N); \
...@@ -30,7 +34,7 @@ struct TensorInfo { ...@@ -30,7 +34,7 @@ struct TensorInfo {
int stride[MAX_DIMS]; int stride[MAX_DIMS];
}; };
#include "generic/common.h" #include "generic/common.cuh"
#include "THC/THCGenerateAllTypes.h" #include "THC/THCGenerateAllTypes.h"
#endif // THC_COMMON_INC #endif // THC_COMMON_INC
...@@ -4,15 +4,15 @@ ...@@ -4,15 +4,15 @@
void THCGrid_(THCState *state, THCudaLongTensor *cluster, THCTensor *pos, THCTensor *size, void THCGrid_(THCState *state, THCudaLongTensor *cluster, THCTensor *pos, THCTensor *size,
THCudaLongTensor *count) { THCudaLongTensor *count) {
THC_assertSameGPU(state, 4, cluster, pos, size, count); THCAssertSameGPU(THCTensor_(checkGPU)(state, 4, cluster, pos, size, count));
int64_t *clusterData = THCudaLongTensor_data(state, cluster); int64_t *clusterData = THCudaLongTensor_data(state, cluster);
TensorInfo<real> posInfo = THCTensor_(getTensorInfo)(state, pos); TensorInfo<real> posInfo = THCTensor_(getTensorInfo)(state, pos);
real *sizeData = THCTensor_(data)(state, size); real *sizeData = THCTensor_(data)(state, size);
int64_t *countData = THCudaLongTensor_data(state, count); int64_t *countData = THCudaLongTensor_data(state, count);
const int nNodes = THCudaLongTensor_nElement(state, cluster); ptrdiff_t nNodes = THCudaLongTensor_nElement(state, cluster);
KERNEL_RUN(gridKernel, nNodes, clusterData, posInfo, sizeData, countData); KERNEL_REAL_RUN(gridKernel, nNodes, clusterData, posInfo, sizeData, countData);
} }
#endif // THC_GENERIC_FILE #endif // THC_GENERIC_FILE
#ifndef THC_GENERIC_FILE #ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/common.h" #define THC_GENERIC_FILE "generic/common.cuh"
#else #else
TensorInfo<real> THCTensor_(getTensorInfo)(THCState *state, THCTensor *tensor) { TensorInfo<real> THCTensor_(getTensorInfo)(THCState *state, THCTensor *tensor) {
......
...@@ -24,7 +24,7 @@ if torch.cuda.is_available(): ...@@ -24,7 +24,7 @@ if torch.cuda.is_available():
sources += ['aten/THCC/THCC{}.c'.format(f) for f in files] sources += ['aten/THCC/THCC{}.c'.format(f) for f in files]
include_dirs += ['aten/THC', 'aten/THCC'] include_dirs += ['aten/THC', 'aten/THCC']
define_macros += [('WITH_CUDA', None)] define_macros += [('WITH_CUDA', None)]
extra_objects += ['aten/build/THC{}.so'.format(f) for f in files] extra_objects += ['aten/build/THC.so']
with_cuda = True with_cuda = True
ffi = create_extension( ffi = create_extension(
......
...@@ -7,6 +7,4 @@ SRC_DIR=aten/THC ...@@ -7,6 +7,4 @@ SRC_DIR=aten/THC
BUILD_DIR=aten/build BUILD_DIR=aten/build
mkdir -p $BUILD_DIR mkdir -p $BUILD_DIR
for i in THCGreedy THCGrid; do $(which nvcc) -c -o "$BUILD_DIR/THC.so" "$SRC_DIR/THC.cu" -arch=sm_52 -Xcompiler -fPIC -shared "-I$TORCH/lib/include/TH" "-I$TORCH/lib/include" "-I$SRC_DIR"
$(which nvcc) -c -o "$BUILD_DIR/$i.so" "$SRC_DIR/$i.cu" -arch=sm_52 -Xcompiler -fPIC -shared "-I$TORCH/lib/include/TH" "-I$TORCH/lib/include" "-I$SRC_DIR"
done
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment