Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ColossalAI
Commits
24cf35e3
Commit
24cf35e3
authored
Jun 11, 2024
by
sangwzh
Browse files
update file name in extension after hipify
parent
ff295599
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
7 additions
and
7 deletions
+7
-7
colossalai/kernel/cuda_native/csrc/kernels/include/block_reduce.h
...ai/kernel/cuda_native/csrc/kernels/include/block_reduce.h
+1
-1
colossalai/kernel/cuda_native/csrc/scaled_masked_softmax.h
colossalai/kernel/cuda_native/csrc/scaled_masked_softmax.h
+1
-1
colossalai/kernel/cuda_native/csrc/scaled_upper_triang_masked_softmax.h
...nel/cuda_native/csrc/scaled_upper_triang_masked_softmax.h
+1
-1
setup.py
setup.py
+4
-4
No files found.
colossalai/kernel/cuda_native/csrc/kernels/include/block_reduce.h
View file @
24cf35e3
...
...
@@ -21,7 +21,7 @@ template <typename T>
__forceinline__
__device__
T
warpReduceSum
(
T
val
)
{
for
(
int
mask
=
(
WARP_REDUCE_SIZE
>>
1
);
mask
>
0
;
mask
>>=
1
)
#ifdef COLOSSAL_HIP
val
+=
__shfl_xor
_sync
(
val
,
mask
,
WARP_REDUCE_SIZE
);
val
+=
__shfl_xor
(
val
,
mask
,
WARP_REDUCE_SIZE
);
#else
val
+=
__shfl_xor_sync
(
WARP_REDUCE_MASK
,
val
,
mask
,
WARP_REDUCE_SIZE
);
#endif
...
...
colossalai/kernel/cuda_native/csrc/scaled_masked_softmax.h
View file @
24cf35e3
...
...
@@ -57,7 +57,7 @@ struct Max {
template
<
typename
T
>
__device__
__forceinline__
T
WARP_SHFL_XOR_NATIVE
(
T
value
,
int
laneMask
,
int
width
=
warpSize
,
unsigned
int
mask
=
0xffffffff
)
{
#if CUDA_VERSION >= 9000
#if CUDA_VERSION >= 9000
&&!defined(COLOSSAL_HIP)
return
__shfl_xor_sync
(
mask
,
value
,
laneMask
,
width
);
#else
return
__shfl_xor
(
value
,
laneMask
,
width
);
...
...
colossalai/kernel/cuda_native/csrc/scaled_upper_triang_masked_softmax.h
View file @
24cf35e3
...
...
@@ -72,7 +72,7 @@ struct Max {
template
<
typename
T
>
__device__
__forceinline__
T
WARP_SHFL_XOR_NATIVE
(
T
value
,
int
laneMask
,
int
width
=
warpSize
,
unsigned
int
mask
=
0xffffffff
)
{
#if CUDA_VERSION >= 9000
#if CUDA_VERSION >= 9000
&&!defined(COLOSSAL_HIP)
return
__shfl_xor_sync
(
mask
,
value
,
laneMask
,
width
);
#else
return
__shfl_xor
(
value
,
laneMask
,
width
);
...
...
setup.py
View file @
24cf35e3
...
...
@@ -247,20 +247,20 @@ if build_hip_ext:
ext_modules
.
append
(
cuda_ext_helper
(
'colossalai._C.scaled_upper_triang_masked_softmax'
,
[
'scaled_upper_triang_masked_softmax.cpp'
,
'scaled_upper_triang_masked_softmax_
hip
.hip'
],
[
'scaled_upper_triang_masked_softmax.cpp'
,
'scaled_upper_triang_masked_softmax_
cuda
.hip'
],
extra_cuda_flags
+
cc_flag
))
ext_modules
.
append
(
cuda_ext_helper
(
'colossalai._C.scaled_masked_softmax'
,
[
'scaled_masked_softmax.cpp'
,
'scaled_masked_softmax_
hip
.hip'
],
extra_cuda_flags
+
cc_flag
))
[
'scaled_masked_softmax.cpp'
,
'scaled_masked_softmax_
cuda
.hip'
],
extra_cuda_flags
+
cc_flag
))
ext_modules
.
append
(
cuda_ext_helper
(
'colossalai._C.moe'
,
[
'moe_
hip
.cpp'
,
'moe_hip_kernel.hip'
],
extra_cuda_flags
+
cc_flag
))
cuda_ext_helper
(
'colossalai._C.moe'
,
[
'moe_
cuda
.cpp'
,
'moe_hip_kernel.hip'
],
extra_cuda_flags
+
cc_flag
))
extra_cuda_flags
=
[]
ext_modules
.
append
(
cuda_ext_helper
(
'colossalai._C.layer_norm'
,
[
'layer_norm_
hip
.cpp'
,
'layer_norm_hip_kernel.hip'
],
cuda_ext_helper
(
'colossalai._C.layer_norm'
,
[
'layer_norm_
cuda
.cpp'
,
'layer_norm_hip_kernel.hip'
],
extra_cuda_flags
+
cc_flag
))
extra_cuda_flags
=
[
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment