"src/vscode:/vscode.git/clone" did not exist on "f20c8f5a1aba27f5972cad50516f18ba516e4d9e"
fp16.cuh 4.52 KB
Newer Older
sangwzh's avatar
sangwzh committed
1
// !!! This is a file automatically generated by hipify!!!
2
/**
3
4
5
6
7
8
9
10
11
12
13
14
15
16
 *  Copyright (c) 2020-2022 by Contributors
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
17
18
19
 * @file array/cuda/fp16.cuh
 * @brief float16 related functions.
 * @note this file is modified from TVM project:
20
21
 *       https://github.com/apache/tvm/blob/e561007f0c330e3d14c2bc8a3ef40fb741db9004/src/target/source/literal/cuda_half_t.h.
 */
22
23
#ifndef DGL_ARRAY_CUDA_FP16_CUH_
#define DGL_ARRAY_CUDA_FP16_CUH_
24

sangwzh's avatar
sangwzh committed
25
#include <hip/hip_fp16.h>
26

27
#include <algorithm>
28

29
static __device__ __forceinline__ half max(half a, half b) {
sangwzh's avatar
sangwzh committed
30
#if defined(__HIP_DEVICE_COMPILE__) 
31
32
  return __hgt(__half(a), __half(b)) ? a : b;
#else
33
  return __half(max(float(a), float(b)));  // NOLINT
34
35
36
#endif
}

37
static __device__ __forceinline__ half min(half a, half b) {
sangwzh's avatar
sangwzh committed
38
#if defined(__HIP_DEVICE_COMPILE__) 
39
40
  return __hlt(__half(a), __half(b)) ? a : b;
#else
41
  return __half(min(float(a), float(b)));  // NOLINT
42
43
#endif
}
sangwzh's avatar
sangwzh committed
44
45
#if 0
#ifdef __HIPCC__
46
// Arithmetic FP16 operations for architecture >= 5.3 are already defined in
sangwzh's avatar
sangwzh committed
47
48
// hip/hip_fp16.h
#if defined(__HIP_DEVICE_COMPILE__) 
49
// CUDA 12.2 adds "emulated" support for older architectures.
sangwzh's avatar
sangwzh committed
50
#if defined(DTKRT_VERSION) && (DTKRT_VERSION < 12020)
51
52
__device__ __forceinline__ __half
operator+(const __half& lh, const __half& rh) {
53
54
  return __half(float(lh) + float(rh));  // NOLINT
}
55
56
__device__ __forceinline__ __half
operator-(const __half& lh, const __half& rh) {
57
58
  return __half(float(lh) - float(rh));  // NOLINT
}
59
60
__device__ __forceinline__ __half
operator*(const __half& lh, const __half& rh) {
61
62
  return __half(float(lh) * float(rh));  // NOLINT
}
63
64
__device__ __forceinline__ __half
operator/(const __half& lh, const __half& rh) {
65
66
  return __half(float(lh) / float(rh));  // NOLINT
}
67

68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
__device__ __forceinline__ __half& operator+=(
    __half& lh, const __half& rh) {    // NOLINT
  lh = __half(float(lh) + float(rh));  // NOLINT
  return lh;
}
__device__ __forceinline__ __half& operator-=(
    __half& lh, const __half& rh) {    // NOLINT
  lh = __half(float(lh) - float(rh));  // NOLINT
  return lh;
}
__device__ __forceinline__ __half& operator*=(
    __half& lh, const __half& rh) {    // NOLINT
  lh = __half(float(lh) * float(rh));  // NOLINT
  return lh;
}
__device__ __forceinline__ __half& operator/=(
    __half& lh, const __half& rh) {    // NOLINT
  lh = __half(float(lh) / float(rh));  // NOLINT
  return lh;
87
}
88

89
__device__ __forceinline__ __half& operator++(__half& h) {  // NOLINT
90
91
  h = __half(float(h) + 1.0f);                              // NOLINT
  return h;
92
93
}
__device__ __forceinline__ __half& operator--(__half& h) {  // NOLINT
94
95
  h = __half(float(h) - 1.0f);                              // NOLINT
  return h;
96
}
97
98
99
100
__device__ __forceinline__ __half operator++(__half& h, int) {  // NOLINT
  __half ret = h;
  h = __half(float(h) + 1.0f);  // NOLINT
  return ret;
101
}
102
103
104
105
__device__ __forceinline__ __half operator--(__half& h, int) {  // NOLINT
  __half ret = h;
  h = __half(float(h) - 1.0f);  // NOLINT
  return ret;
106
}
107
108

__device__ __forceinline__ __half operator+(const __half& h) { return h; }
109
110
111
__device__ __forceinline__ __half operator-(const __half& h) {
  return __half(-float(h));  // NOLINT
}
112

113
114
115
116
117
118
__device__ __forceinline__ bool operator==(const __half& lh, const __half& rh) {
  return float(lh) == float(rh);  // NOLINT
}
__device__ __forceinline__ bool operator!=(const __half& lh, const __half& rh) {
  return float(lh) != float(rh);  // NOLINT
}
119
120
__device__ __forceinline__ bool operator>(const __half& lh, const __half& rh) {
  return float(lh) > float(rh);  // NOLINT
121
}
122
123
__device__ __forceinline__ bool operator<(const __half& lh, const __half& rh) {
  return float(lh) < float(rh);  // NOLINT
124
125
126
127
128
129
130
}
__device__ __forceinline__ bool operator>=(const __half& lh, const __half& rh) {
  return float(lh) >= float(rh);  // NOLINT
}
__device__ __forceinline__ bool operator<=(const __half& lh, const __half& rh) {
  return float(lh) <= float(rh);  // NOLINT
}
sangwzh's avatar
sangwzh committed
131
132
133
134
#endif  // defined(DTKRT_VERSION) && (DTKRT_VERSION < 12020)
#endif  // defined(__HIP_DEVICE_COMPILE__) 
#endif  // __HIPCC__
#endif
135
#endif  // DGL_ARRAY_CUDA_FP16_CUH_