utils.cuh 1.56 KB
Newer Older
limm's avatar
limm committed
1
2
// !!! This is a file automatically generated by hipify!!!
#include <ATen/dtk_macros.h>
quyuanhao123's avatar
quyuanhao123 committed
3
4
#pragma once

limm's avatar
limm committed
5
#include "../extensions.h"
quyuanhao123's avatar
quyuanhao123 committed
6
7
8
9
10
11
12
13

#define CHECK_CUDA(x)                                                          \
  AT_ASSERTM(x.device().is_cuda(), #x " must be CUDA tensor")
#define CHECK_INPUT(x) AT_ASSERTM(x, "Input mismatch")

__device__ __inline__ at::Half __shfl_up_sync(const unsigned mask,
                                              const at::Half var,
                                              const unsigned int delta) {
limm's avatar
limm committed
14
  return __shfl_up_sync(mask, var.operator __half(), delta);
quyuanhao123's avatar
quyuanhao123 committed
15
16
17
18
19
}

__device__ __inline__ at::Half __shfl_down_sync(const unsigned mask,
                                                const at::Half var,
                                                const unsigned int delta) {
limm's avatar
limm committed
20
  return __shfl_down_sync(mask, var.operator __half(), delta);
quyuanhao123's avatar
quyuanhao123 committed
21
}
limm's avatar
limm committed
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42

__device__ __inline__ at::Half __shfl_up(const at::Half var,
                                         const unsigned int delta) {
  return __shfl_up(var.operator __half(), delta);
}

__device__ __inline__ at::Half __shfl_down(const at::Half var,
                                           const unsigned int delta) {
  return __shfl_down(var.operator __half(), delta);
}

#ifdef USE_ROCM
__device__ __inline__ at::Half __ldg(const at::Half* ptr) {
  return __ldg(reinterpret_cast<const __half*>(ptr));
}
#define SHFL_UP_SYNC(mask, var, delta) __shfl_up(var, delta)
#define SHFL_DOWN_SYNC(mask, var, delta) __shfl_down(var, delta)
#else
#define SHFL_UP_SYNC __shfl_up_sync
#define SHFL_DOWN_SYNC __shfl_down_sync
#endif