utils.h 1.92 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
/**
 *  Copyright (c) 2023 by Contributors
 *
 * @file utils.h
 * @brief CUDA utilities.
 */

#ifndef GRAPHBOLT_CUDA_UTILS_H_
#define GRAPHBOLT_CUDA_UTILS_H_

// The cache line size of GPU.
12
constexpr int GPU_CACHE_LINE_SIZE = 128;
13
// The max number of threads per block.
14
15
16
17
constexpr int CUDA_MAX_NUM_THREADS = 1024;

namespace graphbolt {
namespace cuda {
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33

/**
 * @brief Calculate the number of threads needed given the size of the dimension
 * to be processed.
 *
 * It finds the largest power of two that is less than or equal to the minimum
 * of size and CUDA_MAX_NUM_THREADS.
 */
inline int FindNumThreads(int size) {
  int ret = 1;
  while ((ret << 1) <= std::min(size, CUDA_MAX_NUM_THREADS)) {
    ret <<= 1;
  }
  return ret;
}

34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
/**
 * @brief Calculate the smallest number of bits needed to represent a given
 * range of integers [0, range).
 */
template <typename T>
int NumberOfBits(const T& range) {
  if (range <= 1) {
    // ranges of 0 or 1 require no bits to store
    return 0;
  }

  int bits = 1;
  const auto urange = static_cast<std::make_unsigned_t<T>>(range);
  while (bits < static_cast<int>(sizeof(T) * 8) && (1ull << bits) < urange) {
    ++bits;
  }

  return bits;
}

54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
/**
 * @brief Given a sorted array and a value this function returns the index
 * of the first element which compares greater than value.
 *
 * This function assumes 0-based index
 * @param A: ascending sorted array
 * @param n: size of the A
 * @param x: value to search in A
 * @return index, i, of the first element st. A[i]>x. If x>=A[n-1] returns n.
 * if x<A[0] then it returns 0.
 */
template <typename indptr_t, typename indices_t>
__device__ indices_t UpperBound(const indptr_t* A, indices_t n, indptr_t x) {
  indices_t l = 0, r = n;
  while (l < r) {
    const auto m = l + (r - l) / 2;
    if (x >= A[m]) {
      l = m + 1;
    } else {
      r = m;
    }
  }
  return l;
}

79
80
81
82
}  // namespace cuda
}  // namespace graphbolt

#endif  // GRAPHBOLT_CUDA_UTILS_H_