"megatron/vscode:/vscode.git/clone" did not exist on "31d39ec0f02783f2f036019a2ea9a1b7c548c7b0"
cuda_helpers.h 475 Bytes
Newer Older
1
2
#pragma once

3
4
5
namespace vision {
namespace ops {

6
7
#define CUDA_1D_KERNEL_LOOP_T(i, n, index_t)                         \
  for (index_t i = (blockIdx.x * blockDim.x) + threadIdx.x; i < (n); \
8
       i += (blockDim.x * gridDim.x))
9

10
11
#define CUDA_1D_KERNEL_LOOP(i, n) CUDA_1D_KERNEL_LOOP_T(i, n, int)

12
template <typename integer>
13
constexpr __host__ __device__ inline integer ceil_div(integer n, integer m) {
14
15
  return (n + m - 1) / m;
}
16
17
18

} // namespace ops
} // namespace vision