Commit 0ad730f3 authored by rusty1s's avatar rusty1s
Browse files

first assign vertex try

parent 667614f6
import time
import torch
from torch_cluster.functions.utils.ffi import _get_func
output = torch.cuda.FloatTensor(500000000).fill_(0.5)
torch.cuda.synchronize()
t = time.perf_counter()
torch.bernoulli(output)
torch.cuda.synchronize()
print(time.perf_counter() - t)
output = output.long().fill_(-1)
func = _get_func('serial', output)
torch.cuda.synchronize()
t = time.perf_counter()
func(output, output, output, output)
torch.cuda.synchronize()
print(time.perf_counter() - t)
import pytest
import torch import torch
from torch_cluster.functions.utils.ffi import ffi_serial, ffi_grid from torch_cluster.functions.utils.ffi import ffi_serial, ffi_grid, _get_func
def test_serial_cpu(): def test_serial_cpu():
...@@ -23,3 +24,22 @@ def test_grid_cpu(): ...@@ -23,3 +24,22 @@ def test_grid_cpu():
cluster = ffi_grid(position, size, count) cluster = ffi_grid(position, size, count)
expected_cluster = [0, 5, 1, 0, 2] expected_cluster = [0, 5, 1, 0, 2]
assert cluster.tolist() == expected_cluster assert cluster.tolist() == expected_cluster
@pytest.mark.skipif(not torch.cuda.is_available(), reason='no CUDA')
def test_assign_color_gpu():
output = torch.cuda.LongTensor(60000).fill_(-1)
func = _get_func('serial', output)
func(output, output, output, output)
print((output + 2).sum() / output.size(0))
print((output + 2)[:10])
# print(torch.initial_seed())
# torch.cuda.manual_seed(2)
# bla = torch.bernoulli(torch.cuda.FloatTensor(10).fill_(0.2))
# print(bla)
# print(bla.sum() / bla.size(0))
# func = ffi.()
# # return getattr(ffi, 'cluster_{}{}'.format(name, cuda))
# print('drin')
# pass
#include <THC.h> #include <THC.h>
#include "THCTensorRandom.h"
#include "serial.h" #include "serial.h"
#include <curand.h>
#include <curand_kernel.h>
#include "common.cuh"
#define cluster_(NAME) TH_CONCAT_4(cluster_, NAME, _kernel_, Real) #define cluster_(NAME) TH_CONCAT_4(cluster_, NAME, _kernel_, Real)
#define thc_(NAME) TH_CONCAT_4(thc_, NAME, _, Real) #define thc_(NAME) TH_CONCAT_4(thc_, NAME, _, Real)
__global__ void assignColorKernel(curandStateMtgp32 *state, int64_t *color, const int n, uint8_t *done) {
KERNEL_LOOP(i, n) {
if (color[i] < 0) {
color[i] = 0; //(int64_t) (curand_uniform(&state[blockIdx.x]) <= 0.53406) - 2;
*done = 0;
}
}
}
int assignColor(THCState *state, THCudaLongTensor *color) {
curandStateMtgp32 *gen_states = THCRandom_generatorStates(state);
int64_t *colorVec = THCudaLongTensor_data(state, color);
const int n = THCudaLongTensor_nElement(state, color);
uint8_t done; uint8_t* d_done; cudaMalloc(&d_done, sizeof(uint8_t)); cudaMemset(d_done, 1, sizeof(uint8_t)); // *(done) = (int) 1;
assignColorKernel<<<GET_BLOCKS(n), NUM_THREADS, 0, THCState_getCurrentStream(state)>>>(gen_states, colorVec, n, d_done);
cudaMemcpy(&done, d_done, sizeof(uint8_t), cudaMemcpyDeviceToHost); cudaFree(d_done);
return done;
}
/* GENERATE_KERNEL1(generate_bernoulli, double, double p, double, curand_uniform_double, x <= p) */
/* #define GENERATE_KERNEL1(NAME, T, ARG1, CURAND_T, CURAND_FUNC, TRANSFORM) \ */
/* __global__ void NAME(curandStateMtgp32 *state, int size, T *result, ARG1) \ */
/* { \ */
/* int idx = blockIdx.x * BLOCK_SIZE + threadIdx.x; \ */
/* int rounded_size = THCCeilDiv(size, BLOCK_SIZE) * BLOCK_SIZE; \ */
/* for (int i = idx; i < rounded_size; i += BLOCK_SIZE * MAX_NUM_BLOCKS) { \ */
/* CURAND_T x = CURAND_FUNC(&state[blockIdx.x]); \ */
/* if (i < size) { \ */
/* T y = TRANSFORM; \ */
/* result[i] = y; \ */
/* } \ */
/* } \ */
/* } */
void cluster_serial_kernel(THCState *state, THCudaLongTensor *output, THCudaLongTensor *row, THCudaLongTensor *col, THCudaLongTensor *degree) { void cluster_serial_kernel(THCState *state, THCudaLongTensor *output, THCudaLongTensor *row, THCudaLongTensor *col, THCudaLongTensor *degree) {
} }
......
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
extern "C" { extern "C" {
#endif #endif
int assignColor(THCState *state, THCudaLongTensor *color);
void cluster_serial_kernel(THCState *state, THCudaLongTensor *output, THCudaLongTensor *row, THCudaLongTensor *col, THCudaLongTensor *degree); void cluster_serial_kernel(THCState *state, THCudaLongTensor *output, THCudaLongTensor *row, THCudaLongTensor *col, THCudaLongTensor *degree);
void cluster_serial_kernel_Float (THCState *state, THCudaLongTensor *output, THCudaLongTensor *row, THCudaLongTensor *col, THCudaLongTensor *degree, THCudaTensor *weight); void cluster_serial_kernel_Float (THCState *state, THCudaLongTensor *output, THCudaLongTensor *row, THCudaLongTensor *col, THCudaLongTensor *degree, THCudaTensor *weight);
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#else #else
void cluster_(serial)(THCudaLongTensor *output, THCudaLongTensor *row, THCudaLongTensor *col, THCudaLongTensor *degree, THCTensor *weight) { void cluster_(serial)(THCudaLongTensor *output, THCudaLongTensor *row, THCudaLongTensor *col, THCudaLongTensor *degree, THCTensor *weight) {
cluster_kernel_(serial)(state, output, row, col, degree, weight); int bla = assignColor(state, output);
} }
#endif #endif
......
...@@ -8,7 +8,9 @@ ...@@ -8,7 +8,9 @@
extern THCState *state; extern THCState *state;
void cluster_serial_cuda(THCudaLongTensor *output, THCudaLongTensor *row, THCudaLongTensor *col, THCudaLongTensor *degree) { void cluster_serial_cuda(THCudaLongTensor *output, THCudaLongTensor *row, THCudaLongTensor *col, THCudaLongTensor *degree) {
cluster_serial_kernel(state, output, row, col, degree); int bla = assignColor(state, output);
printf("RETURN TYPE IS %i \n", bla);
/* cluster_serial_kernel(state, output, row, col, degree); */
} }
#include "generic/serial_cuda.c" #include "generic/serial_cuda.c"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment