Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
d66da6be
Commit
d66da6be
authored
Feb 14, 2024
by
illsilin
Browse files
initial enablement of gfx950
parent
29dcb956
Changes
45
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
27 additions
and
28 deletions
+27
-28
example/52_im2col_col2im/CMakeLists.txt
example/52_im2col_col2im/CMakeLists.txt
+1
-1
example/60_gemm_multi_ABD/CMakeLists.txt
example/60_gemm_multi_ABD/CMakeLists.txt
+1
-1
example/61_contraction_multi_ABD/CMakeLists.txt
example/61_contraction_multi_ABD/CMakeLists.txt
+1
-1
example/62_conv_fwd_activ/CMakeLists.txt
example/62_conv_fwd_activ/CMakeLists.txt
+1
-1
include/ck/ck.hpp
include/ck/ck.hpp
+1
-1
include/ck/host_utility/device_prop.hpp
include/ck/host_utility/device_prop.hpp
+3
-2
include/ck/tensor_operation/gpu/device/impl/device_contraction_multiple_d_xdl_cshuffle.hpp
...evice/impl/device_contraction_multiple_d_xdl_cshuffle.hpp
+1
-3
include/ck/tensor_operation/gpu/device/impl/device_elementwise_3d_impl.hpp
..._operation/gpu/device/impl/device_elementwise_3d_impl.hpp
+1
-1
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_splitk_lds_direct_load.hpp
.../gpu/grid/gridwise_gemm_xdlops_splitk_lds_direct_load.hpp
+1
-1
include/ck/utility/amd_xdlops.hpp
include/ck/utility/amd_xdlops.hpp
+1
-1
include/ck/utility/type_convert.hpp
include/ck/utility/type_convert.hpp
+1
-1
test/batched_gemm/CMakeLists.txt
test/batched_gemm/CMakeLists.txt
+2
-2
test/batched_gemm_gemm/CMakeLists.txt
test/batched_gemm_gemm/CMakeLists.txt
+2
-2
test/batched_gemm_reduce/CMakeLists.txt
test/batched_gemm_reduce/CMakeLists.txt
+1
-1
test/batched_gemm_softmax_gemm/CMakeLists.txt
test/batched_gemm_softmax_gemm/CMakeLists.txt
+2
-2
test/batched_gemm_softmax_gemm_permute/CMakeLists.txt
test/batched_gemm_softmax_gemm_permute/CMakeLists.txt
+2
-2
test/contraction/CMakeLists.txt
test/contraction/CMakeLists.txt
+1
-1
test/convnd_bwd_data/CMakeLists.txt
test/convnd_bwd_data/CMakeLists.txt
+2
-2
test/convnd_fwd/CMakeLists.txt
test/convnd_fwd/CMakeLists.txt
+1
-1
test/gemm_layernorm/CMakeLists.txt
test/gemm_layernorm/CMakeLists.txt
+1
-1
No files found.
example/52_im2col_col2im/CMakeLists.txt
View file @
d66da6be
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
example/60_gemm_multi_ABD/CMakeLists.txt
View file @
d66da6be
list
(
APPEND gpu_list2 gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list2 gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list2 AND target EQUAL 0
)
...
...
example/61_contraction_multi_ABD/CMakeLists.txt
View file @
d66da6be
list
(
APPEND gpu_list2 gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list2 gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list2 AND target EQUAL 0
)
...
...
example/62_conv_fwd_activ/CMakeLists.txt
View file @
d66da6be
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
include/ck/ck.hpp
View file @
d66da6be
...
...
@@ -45,7 +45,7 @@
#endif
// define general macros for various architectures
#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)
#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)
|| defined(__gfx950__)
#define __gfx94__
#endif
#if defined(__gfx1010__) || defined(__gfx1011__) || defined(__gfx1012__)
...
...
include/ck/host_utility/device_prop.hpp
View file @
d66da6be
...
...
@@ -55,14 +55,15 @@ inline bool is_xdl_supported()
{
return
ck
::
get_device_name
()
==
"gfx908"
||
ck
::
get_device_name
()
==
"gfx90a"
||
ck
::
get_device_name
()
==
"gfx940"
||
ck
::
get_device_name
()
==
"gfx941"
||
ck
::
get_device_name
()
==
"gfx942"
;
ck
::
get_device_name
()
==
"gfx942"
||
ck
::
get_device_name
()
==
"gfx950"
;
}
inline
bool
is_lds_direct_load_supported
()
{
// Check if direct loads from global memory to LDS are supported.
return
ck
::
get_device_name
()
==
"gfx90a"
||
ck
::
get_device_name
()
==
"gfx940"
||
ck
::
get_device_name
()
==
"gfx941"
||
ck
::
get_device_name
()
==
"gfx942"
;
ck
::
get_device_name
()
==
"gfx941"
||
ck
::
get_device_name
()
==
"gfx942"
||
ck
::
get_device_name
()
==
"gfx950"
;
}
inline
bool
is_navi1_supported
()
...
...
include/ck/tensor_operation/gpu/device/impl/device_contraction_multiple_d_xdl_cshuffle.hpp
View file @
d66da6be
...
...
@@ -602,9 +602,7 @@ struct DeviceContractionMultipleD_Xdl_CShuffle
return
false
;
}
if
(
ck
::
get_device_name
()
!=
"gfx90a"
&&
ck
::
get_device_name
()
!=
"gfx940"
&&
ck
::
get_device_name
()
!=
"gfx941"
&&
ck
::
get_device_name
()
!=
"gfx942"
&&
std
::
is_same
<
ADataType
,
double
>::
value
)
if
(
!
ck
::
is_lds_direct_load_supported
()
&&
std
::
is_same
<
ADataType
,
double
>::
value
)
{
return
false
;
}
...
...
include/ck/tensor_operation/gpu/device/impl/device_elementwise_3d_impl.hpp
View file @
d66da6be
...
...
@@ -294,7 +294,7 @@ struct DeviceElementwise3dImpl : public DeviceElementwise<InDataTypeTuple,
bool
IsSupportedArgument
(
const
BaseArgument
*
p_arg
)
override
{
if
((
ck
::
get_device_name
()
==
"gfx940"
||
ck
::
get_device_name
()
==
"gfx941"
||
ck
::
get_device_name
()
==
"gfx942"
))
ck
::
get_device_name
()
==
"gfx942"
||
ck
::
get_device_name
()
==
"gfx950"
))
{
return
false
;
}
...
...
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_splitk_lds_direct_load.hpp
View file @
d66da6be
...
...
@@ -39,7 +39,7 @@ __global__ void
const
CElementwiseOperation
c_element_op
)
{
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx908__) || defined(__gfx90a__) || \
defined(__gfx94
0__) || defined(__gfx941__) || defined(__gfx942
__))
defined(__gfx94__))
constexpr
index_t
shared_size
=
GridwiseGemm
::
GetSharedMemoryNumberOfByte
();
__shared__
uint8_t
p_shared
[
shared_size
];
...
...
include/ck/utility/amd_xdlops.hpp
View file @
d66da6be
...
...
@@ -5,7 +5,7 @@
namespace
ck
{
// Define the common macro for MI300 models
#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)
#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)
|| defined(__gfx950__)
#define __gfx94__
#endif
...
...
include/ck/utility/type_convert.hpp
View file @
d66da6be
...
...
@@ -9,7 +9,7 @@
namespace
ck
{
// Define the common macro for MI300 models
#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)
#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)
|| defined(__gfx950__)
#define __gfx94__
#endif
...
...
test/batched_gemm/CMakeLists.txt
View file @
d66da6be
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
test/batched_gemm_gemm/CMakeLists.txt
View file @
d66da6be
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
test/batched_gemm_reduce/CMakeLists.txt
View file @
d66da6be
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
test/batched_gemm_softmax_gemm/CMakeLists.txt
View file @
d66da6be
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
test/batched_gemm_softmax_gemm_permute/CMakeLists.txt
View file @
d66da6be
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
test/contraction/CMakeLists.txt
View file @
d66da6be
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
test/convnd_bwd_data/CMakeLists.txt
View file @
d66da6be
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
test/convnd_fwd/CMakeLists.txt
View file @
d66da6be
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
test/gemm_layernorm/CMakeLists.txt
View file @
d66da6be
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment