cuda_helpers.h 399 Bytes
Newer Older
1
2
#pragma once

3
4
5
namespace vision {
namespace ops {

6
7
8
#define CUDA_1D_KERNEL_LOOP(i, n)                                \
  for (int i = (blockIdx.x * blockDim.x) + threadIdx.x; i < (n); \
       i += (blockDim.x * gridDim.x))
9
10

template <typename integer>
11
constexpr __host__ __device__ inline integer ceil_div(integer n, integer m) {
12
13
  return (n + m - 1) / m;
}
14
15
16

} // namespace ops
} // namespace vision