rocm_bf16.patch 546 Bytes
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
--- amd_hip_bf16.h	2024-02-06 18:28:58.268699142 +0000
+++ amd_hip_bf16.h.new	2024-02-06 18:28:31.988647133 +0000
@@ -90,10 +90,10 @@
 #include "math_fwd.h"              // ocml device functions
 
 #if defined(__HIPCC_RTC__)
-#define __HOST_DEVICE__ __device__
+#define __HOST_DEVICE__ __device__ static
 #else
 #include <climits>
-#define __HOST_DEVICE__ __host__ __device__
+#define __HOST_DEVICE__ __host__ __device__ static inline
 #endif
 
 // Since we are using unsigned short to represent data in bfloat16, it can be of different sizes on