rw_cuda.cu 2.2 KB
Newer Older
rusty1s's avatar
rusty1s committed
1
2
3
4
5
6
7
8
9
10
11
12
#include "rw_cuda.h"

#include <ATen/cuda/CUDAContext.h>

#include "utils.cuh"

#define THREADS 1024
#define BLOCKS(N) (N + THREADS - 1) / THREADS

__global__ void uniform_random_walk_kernel(const int64_t *rowptr,
                                           const int64_t *col,
                                           const int64_t *start,
rusty1s's avatar
rusty1s committed
13
14
15
                                           const float *rand, int64_t *n_out,
                                           int64_t *e_out, int64_t walk_length,
                                           int64_t numel) {
rusty1s's avatar
rusty1s committed
16
17
18
  const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x;

  if (thread_idx < numel) {
rusty1s's avatar
rusty1s committed
19
    int64_t n_cur = start[thread_idx], e_cur, row_start, row_end, rnd;
rusty1s's avatar
rusty1s committed
20

rusty1s's avatar
rusty1s committed
21
    n_out[thread_idx] = n_cur;
rusty1s's avatar
rusty1s committed
22

rusty1s's avatar
rusty1s committed
23
24
    for (int64_t l = 0; l < walk_length; l++) {
      row_start = rowptr[n_cur], row_end = rowptr[n_cur + 1];
rusty1s's avatar
rusty1s committed
25
      if (row_end - row_start == 0) {
rusty1s's avatar
rusty1s committed
26
        e_cur = -1;
rusty1s's avatar
rusty1s committed
27
      } else {
rusty1s's avatar
rusty1s committed
28
29
30
        rnd = int64_t(rand[l * numel + thread_idx] * (row_end - row_start));
        e_cur = row_start + rnd;
        n_cur = col[e_cur];
rusty1s's avatar
rusty1s committed
31
      }
rusty1s's avatar
rusty1s committed
32
33
      n_out[(l + 1) * numel + thread_idx] = n_cur;
      e_out[l * numel + thread_idx] = e_cur;
rusty1s's avatar
rusty1s committed
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
    }
  }
}

torch::Tensor random_walk_cuda(torch::Tensor rowptr, torch::Tensor col,
                               torch::Tensor start, int64_t walk_length,
                               double p, double q) {
  CHECK_CUDA(rowptr);
  CHECK_CUDA(col);
  CHECK_CUDA(start);
  cudaSetDevice(rowptr.get_device());

  CHECK_INPUT(rowptr.dim() == 1);
  CHECK_INPUT(col.dim() == 1);
  CHECK_INPUT(start.dim() == 1);

  auto rand = torch::rand({start.size(0), walk_length},
                          start.options().dtype(torch::kFloat));
rusty1s's avatar
rusty1s committed
52
53
54

  auto n_out = torch::empty({walk_length + 1, start.size(0)}, start.options());
  auto e_out = torch::empty({walk_length, start.size(0)}, start.options());
rusty1s's avatar
rusty1s committed
55
56
57
58
59

  auto stream = at::cuda::getCurrentCUDAStream();
  uniform_random_walk_kernel<<<BLOCKS(start.numel()), THREADS, 0, stream>>>(
      rowptr.data_ptr<int64_t>(), col.data_ptr<int64_t>(),
      start.data_ptr<int64_t>(), rand.data_ptr<float>(),
rusty1s's avatar
rusty1s committed
60
61
      n_out.data_ptr<int64_t>(), e_out.data_ptr<int64_t>(), walk_length,
      start.numel());
rusty1s's avatar
rusty1s committed
62

rusty1s's avatar
rusty1s committed
63
  return n_out.t().contiguous();
rusty1s's avatar
rusty1s committed
64
}