Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
e60c5aea
Unverified
Commit
e60c5aea
authored
Feb 15, 2024
by
Illia Silin
Committed by
GitHub
Feb 15, 2024
Browse files
Merge pull request #36 from ROCm/lwpck-1299
Initial MI350 enablement.
parents
29dcb956
63df00cd
Changes
46
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
27 additions
and
28 deletions
+27
-28
example/52_im2col_col2im/CMakeLists.txt
example/52_im2col_col2im/CMakeLists.txt
+1
-1
example/60_gemm_multi_ABD/CMakeLists.txt
example/60_gemm_multi_ABD/CMakeLists.txt
+1
-1
example/61_contraction_multi_ABD/CMakeLists.txt
example/61_contraction_multi_ABD/CMakeLists.txt
+1
-1
example/62_conv_fwd_activ/CMakeLists.txt
example/62_conv_fwd_activ/CMakeLists.txt
+1
-1
include/ck/ck.hpp
include/ck/ck.hpp
+1
-1
include/ck/host_utility/device_prop.hpp
include/ck/host_utility/device_prop.hpp
+3
-2
include/ck/tensor_operation/gpu/device/impl/device_contraction_multiple_d_xdl_cshuffle.hpp
...evice/impl/device_contraction_multiple_d_xdl_cshuffle.hpp
+1
-3
include/ck/tensor_operation/gpu/device/impl/device_elementwise_3d_impl.hpp
..._operation/gpu/device/impl/device_elementwise_3d_impl.hpp
+1
-1
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_splitk_lds_direct_load.hpp
.../gpu/grid/gridwise_gemm_xdlops_splitk_lds_direct_load.hpp
+1
-1
include/ck/utility/amd_xdlops.hpp
include/ck/utility/amd_xdlops.hpp
+1
-1
include/ck/utility/data_type.hpp
include/ck/utility/data_type.hpp
+1
-1
include/ck/utility/type_convert.hpp
include/ck/utility/type_convert.hpp
+1
-1
test/batched_gemm/CMakeLists.txt
test/batched_gemm/CMakeLists.txt
+2
-2
test/batched_gemm_gemm/CMakeLists.txt
test/batched_gemm_gemm/CMakeLists.txt
+2
-2
test/batched_gemm_reduce/CMakeLists.txt
test/batched_gemm_reduce/CMakeLists.txt
+1
-1
test/batched_gemm_softmax_gemm/CMakeLists.txt
test/batched_gemm_softmax_gemm/CMakeLists.txt
+2
-2
test/batched_gemm_softmax_gemm_permute/CMakeLists.txt
test/batched_gemm_softmax_gemm_permute/CMakeLists.txt
+2
-2
test/contraction/CMakeLists.txt
test/contraction/CMakeLists.txt
+1
-1
test/convnd_bwd_data/CMakeLists.txt
test/convnd_bwd_data/CMakeLists.txt
+2
-2
test/convnd_fwd/CMakeLists.txt
test/convnd_fwd/CMakeLists.txt
+1
-1
No files found.
example/52_im2col_col2im/CMakeLists.txt
View file @
e60c5aea
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
example/60_gemm_multi_ABD/CMakeLists.txt
View file @
e60c5aea
list
(
APPEND gpu_list2 gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list2 gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list2 AND target EQUAL 0
)
...
...
example/61_contraction_multi_ABD/CMakeLists.txt
View file @
e60c5aea
list
(
APPEND gpu_list2 gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list2 gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list2 AND target EQUAL 0
)
...
...
example/62_conv_fwd_activ/CMakeLists.txt
View file @
e60c5aea
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
include/ck/ck.hpp
View file @
e60c5aea
...
...
@@ -45,7 +45,7 @@
#endif
// define general macros for various architectures
#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)
#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)
|| defined(__gfx950__)
#define __gfx94__
#endif
#if defined(__gfx1010__) || defined(__gfx1011__) || defined(__gfx1012__)
...
...
include/ck/host_utility/device_prop.hpp
View file @
e60c5aea
...
...
@@ -55,14 +55,15 @@ inline bool is_xdl_supported()
{
return
ck
::
get_device_name
()
==
"gfx908"
||
ck
::
get_device_name
()
==
"gfx90a"
||
ck
::
get_device_name
()
==
"gfx940"
||
ck
::
get_device_name
()
==
"gfx941"
||
ck
::
get_device_name
()
==
"gfx942"
;
ck
::
get_device_name
()
==
"gfx942"
||
ck
::
get_device_name
()
==
"gfx950"
;
}
inline
bool
is_lds_direct_load_supported
()
{
// Check if direct loads from global memory to LDS are supported.
return
ck
::
get_device_name
()
==
"gfx90a"
||
ck
::
get_device_name
()
==
"gfx940"
||
ck
::
get_device_name
()
==
"gfx941"
||
ck
::
get_device_name
()
==
"gfx942"
;
ck
::
get_device_name
()
==
"gfx941"
||
ck
::
get_device_name
()
==
"gfx942"
||
ck
::
get_device_name
()
==
"gfx950"
;
}
inline
bool
is_navi1_supported
()
...
...
include/ck/tensor_operation/gpu/device/impl/device_contraction_multiple_d_xdl_cshuffle.hpp
View file @
e60c5aea
...
...
@@ -602,9 +602,7 @@ struct DeviceContractionMultipleD_Xdl_CShuffle
return
false
;
}
if
(
ck
::
get_device_name
()
!=
"gfx90a"
&&
ck
::
get_device_name
()
!=
"gfx940"
&&
ck
::
get_device_name
()
!=
"gfx941"
&&
ck
::
get_device_name
()
!=
"gfx942"
&&
std
::
is_same
<
ADataType
,
double
>::
value
)
if
(
!
ck
::
is_lds_direct_load_supported
()
&&
std
::
is_same
<
ADataType
,
double
>::
value
)
{
return
false
;
}
...
...
include/ck/tensor_operation/gpu/device/impl/device_elementwise_3d_impl.hpp
View file @
e60c5aea
...
...
@@ -294,7 +294,7 @@ struct DeviceElementwise3dImpl : public DeviceElementwise<InDataTypeTuple,
bool
IsSupportedArgument
(
const
BaseArgument
*
p_arg
)
override
{
if
((
ck
::
get_device_name
()
==
"gfx940"
||
ck
::
get_device_name
()
==
"gfx941"
||
ck
::
get_device_name
()
==
"gfx942"
))
ck
::
get_device_name
()
==
"gfx942"
||
ck
::
get_device_name
()
==
"gfx950"
))
{
return
false
;
}
...
...
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_splitk_lds_direct_load.hpp
View file @
e60c5aea
...
...
@@ -39,7 +39,7 @@ __global__ void
const
CElementwiseOperation
c_element_op
)
{
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx908__) || defined(__gfx90a__) || \
defined(__gfx94
0__) || defined(__gfx941__) || defined(__gfx942
__))
defined(__gfx94__))
constexpr
index_t
shared_size
=
GridwiseGemm
::
GetSharedMemoryNumberOfByte
();
__shared__
uint8_t
p_shared
[
shared_size
];
...
...
include/ck/utility/amd_xdlops.hpp
View file @
e60c5aea
...
...
@@ -5,7 +5,7 @@
namespace
ck
{
// Define the common macro for MI300 models
#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)
#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)
|| defined(__gfx950__)
#define __gfx94__
#endif
...
...
include/ck/utility/data_type.hpp
View file @
e60c5aea
...
...
@@ -189,7 +189,7 @@ struct vector_type<T, 1>
}
};
int
static
err
=
0
;
__device__
int
static
err
=
0
;
template
<
typename
T
>
struct
vector_type
<
T
,
2
>
{
...
...
include/ck/utility/type_convert.hpp
View file @
e60c5aea
...
...
@@ -9,7 +9,7 @@
namespace
ck
{
// Define the common macro for MI300 models
#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)
#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)
|| defined(__gfx950__)
#define __gfx94__
#endif
...
...
test/batched_gemm/CMakeLists.txt
View file @
e60c5aea
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
@@ -6,4 +6,4 @@ foreach(gpu IN LISTS GPU_TARGETS)
target_link_libraries
(
test_batched_gemm PRIVATE utility device_batched_gemm_instance
)
set
(
target 1
)
endif
()
endforeach
()
\ No newline at end of file
endforeach
()
test/batched_gemm_gemm/CMakeLists.txt
View file @
e60c5aea
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
@@ -10,4 +10,4 @@ foreach(gpu IN LISTS GPU_TARGETS)
set
(
target 1
)
endif
()
endif
()
endforeach
()
\ No newline at end of file
endforeach
()
test/batched_gemm_reduce/CMakeLists.txt
View file @
e60c5aea
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
test/batched_gemm_softmax_gemm/CMakeLists.txt
View file @
e60c5aea
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
@@ -10,4 +10,4 @@ foreach(gpu IN LISTS GPU_TARGETS)
set
(
target 1
)
endif
()
endif
()
endforeach
()
\ No newline at end of file
endforeach
()
test/batched_gemm_softmax_gemm_permute/CMakeLists.txt
View file @
e60c5aea
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
@@ -26,4 +26,4 @@ foreach(gpu IN LISTS GPU_TARGETS)
endif
()
set
(
target 1
)
endif
()
endforeach
()
\ No newline at end of file
endforeach
()
test/contraction/CMakeLists.txt
View file @
e60c5aea
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
test/convnd_bwd_data/CMakeLists.txt
View file @
e60c5aea
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
@@ -6,4 +6,4 @@ foreach(gpu IN LISTS GPU_TARGETS)
target_link_libraries
(
test_convnd_bwd_data PRIVATE utility device_conv1d_bwd_data_instance device_conv2d_bwd_data_instance device_conv3d_bwd_data_instance
)
set
(
target 1
)
endif
()
endforeach
()
\ No newline at end of file
endforeach
()
test/convnd_fwd/CMakeLists.txt
View file @
e60c5aea
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment