Commit 24cf35e3 authored by sangwzh's avatar sangwzh
Browse files

update file name in extension after hipify

parent ff295599
......@@ -21,7 +21,7 @@ template <typename T>
__forceinline__ __device__ T warpReduceSum(T val) {
for (int mask = (WARP_REDUCE_SIZE >> 1); mask > 0; mask >>= 1)
#ifdef COLOSSAL_HIP
val += __shfl_xor_sync(val, mask, WARP_REDUCE_SIZE);
val += __shfl_xor(val, mask, WARP_REDUCE_SIZE);
#else
val += __shfl_xor_sync(WARP_REDUCE_MASK, val, mask, WARP_REDUCE_SIZE);
#endif
......
......@@ -57,7 +57,7 @@ struct Max {
template <typename T>
__device__ __forceinline__ T WARP_SHFL_XOR_NATIVE(T value, int laneMask, int width = warpSize, unsigned int mask = 0xffffffff)
{
#if CUDA_VERSION >= 9000
#if CUDA_VERSION >= 9000&&!defined(COLOSSAL_HIP)
return __shfl_xor_sync(mask, value, laneMask, width);
#else
return __shfl_xor(value, laneMask, width);
......
......@@ -72,7 +72,7 @@ struct Max {
template <typename T>
__device__ __forceinline__ T WARP_SHFL_XOR_NATIVE(T value, int laneMask, int width = warpSize, unsigned int mask = 0xffffffff)
{
#if CUDA_VERSION >= 9000
#if CUDA_VERSION >= 9000&&!defined(COLOSSAL_HIP)
return __shfl_xor_sync(mask, value, laneMask, width);
#else
return __shfl_xor(value, laneMask, width);
......
......@@ -247,20 +247,20 @@ if build_hip_ext:
ext_modules.append(
cuda_ext_helper('colossalai._C.scaled_upper_triang_masked_softmax',
['scaled_upper_triang_masked_softmax.cpp', 'scaled_upper_triang_masked_softmax_hip.hip'],
['scaled_upper_triang_masked_softmax.cpp', 'scaled_upper_triang_masked_softmax_cuda.hip'],
extra_cuda_flags + cc_flag))
ext_modules.append(
cuda_ext_helper('colossalai._C.scaled_masked_softmax',
['scaled_masked_softmax.cpp', 'scaled_masked_softmax_hip.hip'], extra_cuda_flags + cc_flag))
['scaled_masked_softmax.cpp', 'scaled_masked_softmax_cuda.hip'], extra_cuda_flags + cc_flag))
ext_modules.append(
cuda_ext_helper('colossalai._C.moe', ['moe_hip.cpp', 'moe_hip_kernel.hip'], extra_cuda_flags + cc_flag))
cuda_ext_helper('colossalai._C.moe', ['moe_cuda.cpp', 'moe_hip_kernel.hip'], extra_cuda_flags + cc_flag))
extra_cuda_flags = []
ext_modules.append(
cuda_ext_helper('colossalai._C.layer_norm', ['layer_norm_hip.cpp', 'layer_norm_hip_kernel.hip'],
cuda_ext_helper('colossalai._C.layer_norm', ['layer_norm_cuda.cpp', 'layer_norm_hip_kernel.hip'],
extra_cuda_flags + cc_flag))
extra_cuda_flags = [
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment