Commit 24cf35e3 authored by sangwzh's avatar sangwzh
Browse files

update file name in extension after hipify

parent ff295599
...@@ -21,7 +21,7 @@ template <typename T> ...@@ -21,7 +21,7 @@ template <typename T>
__forceinline__ __device__ T warpReduceSum(T val) { __forceinline__ __device__ T warpReduceSum(T val) {
for (int mask = (WARP_REDUCE_SIZE >> 1); mask > 0; mask >>= 1) for (int mask = (WARP_REDUCE_SIZE >> 1); mask > 0; mask >>= 1)
#ifdef COLOSSAL_HIP #ifdef COLOSSAL_HIP
val += __shfl_xor_sync(val, mask, WARP_REDUCE_SIZE); val += __shfl_xor(val, mask, WARP_REDUCE_SIZE);
#else #else
val += __shfl_xor_sync(WARP_REDUCE_MASK, val, mask, WARP_REDUCE_SIZE); val += __shfl_xor_sync(WARP_REDUCE_MASK, val, mask, WARP_REDUCE_SIZE);
#endif #endif
......
...@@ -57,7 +57,7 @@ struct Max { ...@@ -57,7 +57,7 @@ struct Max {
template <typename T> template <typename T>
__device__ __forceinline__ T WARP_SHFL_XOR_NATIVE(T value, int laneMask, int width = warpSize, unsigned int mask = 0xffffffff) __device__ __forceinline__ T WARP_SHFL_XOR_NATIVE(T value, int laneMask, int width = warpSize, unsigned int mask = 0xffffffff)
{ {
#if CUDA_VERSION >= 9000 #if CUDA_VERSION >= 9000&&!defined(COLOSSAL_HIP)
return __shfl_xor_sync(mask, value, laneMask, width); return __shfl_xor_sync(mask, value, laneMask, width);
#else #else
return __shfl_xor(value, laneMask, width); return __shfl_xor(value, laneMask, width);
......
...@@ -72,7 +72,7 @@ struct Max { ...@@ -72,7 +72,7 @@ struct Max {
template <typename T> template <typename T>
__device__ __forceinline__ T WARP_SHFL_XOR_NATIVE(T value, int laneMask, int width = warpSize, unsigned int mask = 0xffffffff) __device__ __forceinline__ T WARP_SHFL_XOR_NATIVE(T value, int laneMask, int width = warpSize, unsigned int mask = 0xffffffff)
{ {
#if CUDA_VERSION >= 9000 #if CUDA_VERSION >= 9000&&!defined(COLOSSAL_HIP)
return __shfl_xor_sync(mask, value, laneMask, width); return __shfl_xor_sync(mask, value, laneMask, width);
#else #else
return __shfl_xor(value, laneMask, width); return __shfl_xor(value, laneMask, width);
......
...@@ -247,20 +247,20 @@ if build_hip_ext: ...@@ -247,20 +247,20 @@ if build_hip_ext:
ext_modules.append( ext_modules.append(
cuda_ext_helper('colossalai._C.scaled_upper_triang_masked_softmax', cuda_ext_helper('colossalai._C.scaled_upper_triang_masked_softmax',
['scaled_upper_triang_masked_softmax.cpp', 'scaled_upper_triang_masked_softmax_hip.hip'], ['scaled_upper_triang_masked_softmax.cpp', 'scaled_upper_triang_masked_softmax_cuda.hip'],
extra_cuda_flags + cc_flag)) extra_cuda_flags + cc_flag))
ext_modules.append( ext_modules.append(
cuda_ext_helper('colossalai._C.scaled_masked_softmax', cuda_ext_helper('colossalai._C.scaled_masked_softmax',
['scaled_masked_softmax.cpp', 'scaled_masked_softmax_hip.hip'], extra_cuda_flags + cc_flag)) ['scaled_masked_softmax.cpp', 'scaled_masked_softmax_cuda.hip'], extra_cuda_flags + cc_flag))
ext_modules.append( ext_modules.append(
cuda_ext_helper('colossalai._C.moe', ['moe_hip.cpp', 'moe_hip_kernel.hip'], extra_cuda_flags + cc_flag)) cuda_ext_helper('colossalai._C.moe', ['moe_cuda.cpp', 'moe_hip_kernel.hip'], extra_cuda_flags + cc_flag))
extra_cuda_flags = [] extra_cuda_flags = []
ext_modules.append( ext_modules.append(
cuda_ext_helper('colossalai._C.layer_norm', ['layer_norm_hip.cpp', 'layer_norm_hip_kernel.hip'], cuda_ext_helper('colossalai._C.layer_norm', ['layer_norm_cuda.cpp', 'layer_norm_hip_kernel.hip'],
extra_cuda_flags + cc_flag)) extra_cuda_flags + cc_flag))
extra_cuda_flags = [ extra_cuda_flags = [
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment