Commit ef9677c5 authored by rusty1s's avatar rusty1s
Browse files

assignColorKernel

parent 2f0248cf
#include "THCGreedy.cu"
#include "THCGrid.cu"
#ifndef THC_INC
#define THC_INC
#include "THCGreedy.h"
#include "THCGrid.h"
#endif
#include <curand.h>
#include <curand_kernel.h>
#include "common.cuh"
__global__ void assignColorKernel(int64_t *color, curandStateMtgp32 *state, uint8_t *done,
ptrdiff_t nNodes) {
KERNEL_LOOP(i, nNodes) {
if (color[i] < 0) {
color[i] = (curand_uniform(&state[0]) < 0.53406) - 2; // blue = -1, red = -2
*done = 0;
}
}
}
int THCGreedy_assignColor(THCState *state, THCudaLongTensor *color) {
THCAssertSameGPU(THCudaLongTensor_checkGPU(state, 1, color));
int64_t *colorData = THCudaLongTensor_data(state, color);
ptrdiff_t nNodes = THCudaLongTensor_nElement(state, color);
uint8_t* d_done; cudaMalloc(&d_done, sizeof(uint8_t)); cudaMemset(d_done, 1, sizeof(uint8_t));
KERNEL_RUN(assignColorKernel, nNodes, colorData, THCRandom_generatorStates(state), d_done);
uint8_t done; cudaMemcpy(&done, d_done, sizeof(uint8_t), cudaMemcpyDeviceToHost);
cudaFree(d_done);
return done;
}
#include "THCGreedy.h"
#include "THCColor.cu"
void THCGreedy(THCState *state, THCudaLongTensor *cluster, THCudaLongTensor *row,
THCudaLongTensor *col, THCudaLongTensor *deg) {
printf("THCGreedy drin");
THCAssertSameGPU(THCudaLongTensor_checkGPU(state, 4, cluster, row, col, deg));
THCudaLongTensor_fill(state, cluster, -1);
THCGreedy_assignColor(state, cluster);
/* while(!THCGreedy_assignColor(state, cluster)) { */
/* printf("DRIN"); */
/* }; */
// Fill cluster with -1
// assign color to clusters < 0 (return done)
// Generate proposal vector with length of nodes (init?)
......
#include "THCGrid.h"
#include "common.h"
#include "common.cuh"
#include "THCNumerics.cuh"
template<typename T>
__global__ void gridKernel(int64_t *cluster, TensorInfo<T> posInfo, T *size,
int64_t *count, const int nNodes) {
int64_t *count, ptrdiff_t nNodes) {
KERNEL_LOOP(i, nNodes) {
T *pos = posInfo.data + i * posInfo.stride[0];
int64_t coef = 1, value = 0;
......
......@@ -6,17 +6,21 @@
#define KERNEL_LOOP(I, N) \
for (ptrdiff_t I = blockIdx.x * blockDim.x + threadIdx.x; I < N; I += blockDim.x * gridDim.x)
#define THC_assertSameGPU(...) THAssertMsg(THCTensor_(checkGPU)(__VA_ARGS__), \
"Some of the input tensors are located on different GPUs. Please move them to a single one.")
const int MAX_DIMS = 25;
const int NUM_THREADS = 1024;
/* const int NUM_THREADS = 1024; */
const int NUM_THREADS = 256;
inline int GET_BLOCKS(const int N) {
inline int GET_BLOCKS(int N) {
return (N + NUM_THREADS - 1) / NUM_THREADS;
}
#define KERNEL_RUN(NAME, N, ...) \
int grid = GET_BLOCKS(N); \
cudaStream_t stream = THCState_getCurrentStream(state); \
NAME<<<grid, NUM_THREADS, 0, stream>>>(__VA_ARGS__, N); \
THCudaCheck(cudaGetLastError())
#define KERNEL_REAL_RUN(NAME, N, ...) \
int grid = GET_BLOCKS(N); \
cudaStream_t stream = THCState_getCurrentStream(state); \
NAME<real><<<grid, NUM_THREADS, 0, stream>>>(__VA_ARGS__, N); \
......@@ -30,7 +34,7 @@ struct TensorInfo {
int stride[MAX_DIMS];
};
#include "generic/common.h"
#include "generic/common.cuh"
#include "THC/THCGenerateAllTypes.h"
#endif // THC_COMMON_INC
......@@ -4,15 +4,15 @@
void THCGrid_(THCState *state, THCudaLongTensor *cluster, THCTensor *pos, THCTensor *size,
THCudaLongTensor *count) {
THC_assertSameGPU(state, 4, cluster, pos, size, count);
THCAssertSameGPU(THCTensor_(checkGPU)(state, 4, cluster, pos, size, count));
int64_t *clusterData = THCudaLongTensor_data(state, cluster);
TensorInfo<real> posInfo = THCTensor_(getTensorInfo)(state, pos);
real *sizeData = THCTensor_(data)(state, size);
int64_t *countData = THCudaLongTensor_data(state, count);
const int nNodes = THCudaLongTensor_nElement(state, cluster);
KERNEL_RUN(gridKernel, nNodes, clusterData, posInfo, sizeData, countData);
ptrdiff_t nNodes = THCudaLongTensor_nElement(state, cluster);
KERNEL_REAL_RUN(gridKernel, nNodes, clusterData, posInfo, sizeData, countData);
}
#endif // THC_GENERIC_FILE
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/common.h"
#define THC_GENERIC_FILE "generic/common.cuh"
#else
TensorInfo<real> THCTensor_(getTensorInfo)(THCState *state, THCTensor *tensor) {
......
......@@ -24,7 +24,7 @@ if torch.cuda.is_available():
sources += ['aten/THCC/THCC{}.c'.format(f) for f in files]
include_dirs += ['aten/THC', 'aten/THCC']
define_macros += [('WITH_CUDA', None)]
extra_objects += ['aten/build/THC{}.so'.format(f) for f in files]
extra_objects += ['aten/build/THC.so']
with_cuda = True
ffi = create_extension(
......
......@@ -7,6 +7,4 @@ SRC_DIR=aten/THC
BUILD_DIR=aten/build
mkdir -p $BUILD_DIR
for i in THCGreedy THCGrid; do
$(which nvcc) -c -o "$BUILD_DIR/$i.so" "$SRC_DIR/$i.cu" -arch=sm_52 -Xcompiler -fPIC -shared "-I$TORCH/lib/include/TH" "-I$TORCH/lib/include" "-I$SRC_DIR"
done
$(which nvcc) -c -o "$BUILD_DIR/THC.so" "$SRC_DIR/THC.cu" -arch=sm_52 -Xcompiler -fPIC -shared "-I$TORCH/lib/include/TH" "-I$TORCH/lib/include" "-I$SRC_DIR"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment