"vllm/vscode:/vscode.git/clone" did not exist on "ab9e8488d57687d1144e7b4802d195f35882ecdc"
Unverified Commit dbaa02d0 authored by Przemyslaw Tredak's avatar Przemyslaw Tredak Committed by GitHub
Browse files

Fix the sm120 compilation with CUDA 12 (#2482)


Signed-off-by: default avatarPrzemek Tredak <ptredak@nvidia.com>
parent e05f87e1
...@@ -867,10 +867,10 @@ __device__ __forceinline__ void fma_f32_bf16(float &out, uint16_t const &a, uint ...@@ -867,10 +867,10 @@ __device__ __forceinline__ void fma_f32_bf16(float &out, uint16_t const &a, uint
} }
__device__ __forceinline__ void reduce_sync_max_abs_f32(float &out, float const &in) { __device__ __forceinline__ void reduce_sync_max_abs_f32(float &out, float const &in) {
#if ((__CUDA_ARCH_HAS_FEATURE__(SM100_ALL)) || (__CUDA_ARCH_HAS_FEATURE__(SM101_ALL)) || \ constexpr bool is_sm_100f = NVTE_CUDA_ARCH_MATCHES(ptx::FamilySpecific<100>);
(__CUDA_ARCH_HAS_FEATURE__(SM120_ALL))) if constexpr (is_sm_100f) {
asm volatile("redux.sync.max.abs.f32 %0, %1, 0xFFFFFFFF;" : "=f"(out) : "f"(in)); asm volatile("redux.sync.max.abs.f32 %0, %1, 0xFFFFFFFF;" : "=f"(out) : "f"(in));
#else } else {
asm volatile( asm volatile(
"{\n\t" "{\n\t"
".reg.b32 val;\n" ".reg.b32 val;\n"
...@@ -879,7 +879,7 @@ __device__ __forceinline__ void reduce_sync_max_abs_f32(float &out, float const ...@@ -879,7 +879,7 @@ __device__ __forceinline__ void reduce_sync_max_abs_f32(float &out, float const
"}\n\t" "}\n\t"
: "=r"(reinterpret_cast<uint32_t &>(out)) : "=r"(reinterpret_cast<uint32_t &>(out))
: "f"(in)); : "f"(in));
#endif }
} }
__device__ __forceinline__ bf16 get_amax(bf16 a, bf16 b) { __device__ __forceinline__ bf16 get_amax(bf16 a, bf16 b) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment