Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
3b854040
You need to sign in or sign up before continuing.
Commit
3b854040
authored
Dec 11, 2024
by
illsilin
Browse files
merge from public repo
parent
ef5e60f6
Changes
57
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
27 additions
and
29 deletions
+27
-29
CMakeLists.txt
CMakeLists.txt
+3
-3
client_example/CMakeLists.txt
client_example/CMakeLists.txt
+1
-1
example/01_gemm/CMakeLists.txt
example/01_gemm/CMakeLists.txt
+1
-1
example/04_gemm_add_add_fastgelu/CMakeLists.txt
example/04_gemm_add_add_fastgelu/CMakeLists.txt
+1
-1
example/18_batched_gemm_reduce/CMakeLists.txt
example/18_batched_gemm_reduce/CMakeLists.txt
+1
-1
example/62_convnd_activ/binary/CMakeLists.txt
example/62_convnd_activ/binary/CMakeLists.txt
+1
-1
example/62_convnd_activ/convinvscale/CMakeLists.txt
example/62_convnd_activ/convinvscale/CMakeLists.txt
+1
-1
example/62_convnd_activ/convscale/CMakeLists.txt
example/62_convnd_activ/convscale/CMakeLists.txt
+1
-1
example/62_convnd_activ/convscale_add/CMakeLists.txt
example/62_convnd_activ/convscale_add/CMakeLists.txt
+1
-1
example/62_convnd_activ/convscale_reduce/CMakeLists.txt
example/62_convnd_activ/convscale_reduce/CMakeLists.txt
+1
-1
example/62_convnd_activ/convscale_relu/CMakeLists.txt
example/62_convnd_activ/convscale_relu/CMakeLists.txt
+1
-1
example/62_convnd_activ/dynamic_unary/CMakeLists.txt
example/62_convnd_activ/dynamic_unary/CMakeLists.txt
+1
-1
example/62_convnd_activ/multi_AB/CMakeLists.txt
example/62_convnd_activ/multi_AB/CMakeLists.txt
+1
-1
example/62_convnd_activ/unary/CMakeLists.txt
example/62_convnd_activ/unary/CMakeLists.txt
+1
-1
example/CMakeLists.txt
example/CMakeLists.txt
+2
-2
include/ck/ck.hpp
include/ck/ck.hpp
+2
-2
include/ck/host_utility/device_prop.hpp
include/ck/host_utility/device_prop.hpp
+4
-3
include/ck/tensor_operation/gpu/device/impl/codegen_device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp
...gen_device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp
+1
-2
include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_xdl_cshuffle.hpp
...pl/device_batched_contraction_multiple_d_xdl_cshuffle.hpp
+1
-2
include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_e_permute_xdl.hpp
...ion/gpu/device/impl/device_batched_gemm_e_permute_xdl.hpp
+1
-2
No files found.
CMakeLists.txt
View file @
3b854040
...
@@ -152,9 +152,9 @@ message("checking which targets are supported")
...
@@ -152,9 +152,9 @@ message("checking which targets are supported")
if
(
NOT ENABLE_ASAN_PACKAGING
)
if
(
NOT ENABLE_ASAN_PACKAGING
)
if
(
NOT WIN32 AND
${
hip_VERSION_FLAT
}
LESS 600300000
)
if
(
NOT WIN32 AND
${
hip_VERSION_FLAT
}
LESS 600300000
)
# WORKAROUND: compiler does not yet fully support gfx12 targets, need to fix version above
# WORKAROUND: compiler does not yet fully support gfx12 targets, need to fix version above
set
(
CK_GPU_TARGETS
"gfx90
8;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102
"
)
set
(
CK_GPU_TARGETS
"gfx9
5
0"
)
else
()
else
()
set
(
CK_GPU_TARGETS
"gfx90
8;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201
"
)
set
(
CK_GPU_TARGETS
"gfx9
5
0"
)
endif
()
endif
()
else
()
else
()
#build CK only for xnack-supported targets when using ASAN
#build CK only for xnack-supported targets when using ASAN
...
@@ -195,7 +195,7 @@ if (SUPPORTED_GPU_TARGETS MATCHES "gfx11" OR SUPPORTED_GPU_TARGETS MATCHES "gfx1
...
@@ -195,7 +195,7 @@ if (SUPPORTED_GPU_TARGETS MATCHES "gfx11" OR SUPPORTED_GPU_TARGETS MATCHES "gfx1
message
(
"Enabling WMMA instances"
)
message
(
"Enabling WMMA instances"
)
add_definitions
(
-DCK_USE_WMMA
)
add_definitions
(
-DCK_USE_WMMA
)
endif
()
endif
()
if
(
SUPPORTED_GPU_TARGETS MATCHES
"gfx12"
)
if
(
SUPPORTED_GPU_TARGETS MATCHES
"gfx12"
OR SUPPORTED_GPU_TARGETS MATCHES
"gfx950"
)
add_definitions
(
-DCK_USE_OCP_FP8
)
add_definitions
(
-DCK_USE_OCP_FP8
)
set
(
CK_USE_OCP_FP8
"ON"
)
set
(
CK_USE_OCP_FP8
"ON"
)
endif
()
endif
()
...
...
client_example/CMakeLists.txt
View file @
3b854040
...
@@ -56,7 +56,7 @@ if (GPU_TARGETS)
...
@@ -56,7 +56,7 @@ if (GPU_TARGETS)
add_definitions
(
-DCK_USE_WMMA
)
add_definitions
(
-DCK_USE_WMMA
)
set
(
CK_USE_WMMA
"ON"
)
set
(
CK_USE_WMMA
"ON"
)
endif
()
endif
()
if
(
GPU_TARGETS MATCHES
"gfx12"
)
if
(
GPU_TARGETS MATCHES
"gfx12"
OR GPU_TARGETS MATCHES
"gfx950"
)
add_definitions
(
-DCK_USE_OCP_FP8
)
add_definitions
(
-DCK_USE_OCP_FP8
)
set
(
CK_USE_OCP_FP8
"ON"
)
set
(
CK_USE_OCP_FP8
"ON"
)
endif
()
endif
()
...
...
example/01_gemm/CMakeLists.txt
View file @
3b854040
...
@@ -58,7 +58,7 @@ add_example_dependencies(example_gemm_xdl example_gemm_xdl_fp64)
...
@@ -58,7 +58,7 @@ add_example_dependencies(example_gemm_xdl example_gemm_xdl_fp64)
add_example_executable
(
example_gemm_xdl_streamk gemm_xdl_streamk.cpp
)
add_example_executable
(
example_gemm_xdl_streamk gemm_xdl_streamk.cpp
)
list
(
APPEND gpu_list gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
example/04_gemm_add_add_fastgelu/CMakeLists.txt
View file @
3b854040
...
@@ -16,7 +16,7 @@ if(USE_BITINT_EXTENSION_INT4)
...
@@ -16,7 +16,7 @@ if(USE_BITINT_EXTENSION_INT4)
add_example_dependencies
(
example_gemm_add_add_fastgelu_xdl example_gemm_add_add_fastgelu_xdl_int4
)
add_example_dependencies
(
example_gemm_add_add_fastgelu_xdl example_gemm_add_add_fastgelu_xdl_int4
)
endif
(
USE_BITINT_EXTENSION_INT4
)
endif
(
USE_BITINT_EXTENSION_INT4
)
list
(
APPEND gpu_list gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
example/18_batched_gemm_reduce/CMakeLists.txt
View file @
3b854040
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
example/62_convnd_activ/binary/CMakeLists.txt
View file @
3b854040
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
example/62_convnd_activ/convinvscale/CMakeLists.txt
View file @
3b854040
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
example/62_convnd_activ/convscale/CMakeLists.txt
View file @
3b854040
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
example/62_convnd_activ/convscale_add/CMakeLists.txt
View file @
3b854040
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
example/62_convnd_activ/convscale_reduce/CMakeLists.txt
View file @
3b854040
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
example/62_convnd_activ/convscale_relu/CMakeLists.txt
View file @
3b854040
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
example/62_convnd_activ/dynamic_unary/CMakeLists.txt
View file @
3b854040
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
example/62_convnd_activ/multi_AB/CMakeLists.txt
View file @
3b854040
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
example/62_convnd_activ/unary/CMakeLists.txt
View file @
3b854040
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
)
list
(
APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942
gfx950
)
set
(
target 0
)
set
(
target 0
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
foreach
(
gpu IN LISTS GPU_TARGETS
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
if
(
gpu IN_LIST gpu_list AND target EQUAL 0
)
...
...
example/CMakeLists.txt
View file @
3b854040
...
@@ -94,7 +94,7 @@ function(add_example_executable EXAMPLE_NAME FILE_NAME)
...
@@ -94,7 +94,7 @@ function(add_example_executable EXAMPLE_NAME FILE_NAME)
if
(
FILE_NAME MATCHES
"_xdl"
)
if
(
FILE_NAME MATCHES
"_xdl"
)
list
(
REMOVE_ITEM EX_TARGETS gfx900 gfx906 gfx906:xnack- gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1200 gfx1201 gfx10.3-generic gfx11-generic gfx12-generic
)
list
(
REMOVE_ITEM EX_TARGETS gfx900 gfx906 gfx906:xnack- gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1200 gfx1201 gfx10.3-generic gfx11-generic gfx12-generic
)
elseif
(
FILE_NAME MATCHES
"_wmma"
)
elseif
(
FILE_NAME MATCHES
"_wmma"
)
list
(
REMOVE_ITEM EX_TARGETS gfx900 gfx906 gfx906:xnack- gfx908:xnack+ gfx908:xnack- gfx90a:xnack+ gfx90a:xnack- gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030
)
list
(
REMOVE_ITEM EX_TARGETS gfx900 gfx906 gfx906:xnack- gfx908:xnack+ gfx908:xnack- gfx90a:xnack+ gfx90a:xnack- gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030
gfx950
)
endif
()
endif
()
set_source_files_properties
(
${
FILE_NAME
}
PROPERTIES LANGUAGE HIP
)
set_source_files_properties
(
${
FILE_NAME
}
PROPERTIES LANGUAGE HIP
)
add_executable
(
${
EXAMPLE_NAME
}
${
FILE_NAME
}
)
add_executable
(
${
EXAMPLE_NAME
}
${
FILE_NAME
}
)
...
@@ -178,7 +178,7 @@ function(add_example_executable_no_testing EXAMPLE_NAME FILE_NAME)
...
@@ -178,7 +178,7 @@ function(add_example_executable_no_testing EXAMPLE_NAME FILE_NAME)
if
(
FILE_NAME MATCHES
"_xdl"
)
if
(
FILE_NAME MATCHES
"_xdl"
)
list
(
REMOVE_ITEM EX_TARGETS gfx900 gfx906 gfx906:xnack- gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1200 gfx1201 gfx10.3-generic gfx11-generic gfx12-generic
)
list
(
REMOVE_ITEM EX_TARGETS gfx900 gfx906 gfx906:xnack- gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1200 gfx1201 gfx10.3-generic gfx11-generic gfx12-generic
)
elseif
(
FILE_NAME MATCHES
"_wmma"
)
elseif
(
FILE_NAME MATCHES
"_wmma"
)
list
(
REMOVE_ITEM EX_TARGETS gfx900 gfx906 gfx906:xnack- gfx908:xnack+ gfx908:xnack- gfx90a:xnack+ gfx90a:xnack- gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030
)
list
(
REMOVE_ITEM EX_TARGETS gfx900 gfx906 gfx906:xnack- gfx908:xnack+ gfx908:xnack- gfx90a:xnack+ gfx90a:xnack- gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030
gfx950
)
endif
()
endif
()
set_source_files_properties
(
${
FILE_NAME
}
PROPERTIES LANGUAGE HIP
)
set_source_files_properties
(
${
FILE_NAME
}
PROPERTIES LANGUAGE HIP
)
add_executable
(
${
EXAMPLE_NAME
}
${
FILE_NAME
}
)
add_executable
(
${
EXAMPLE_NAME
}
${
FILE_NAME
}
)
...
...
include/ck/ck.hpp
View file @
3b854040
...
@@ -53,10 +53,10 @@ CK_DECLARE_ENV_VAR_BOOL(CK_LOGGING)
...
@@ -53,10 +53,10 @@ CK_DECLARE_ENV_VAR_BOOL(CK_LOGGING)
// define general macros for various architectures
// define general macros for various architectures
#if defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx940__) || defined(__gfx941__) || \
#if defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx940__) || defined(__gfx941__) || \
defined(__gfx942__)
defined(__gfx942__)
|| defined(__gfx950__)
#define __gfx9__
#define __gfx9__
#endif
#endif
#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)
#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)
|| defined(__gfx950__)
#define __gfx94__
#define __gfx94__
#endif
#endif
#if defined(__gfx1010__) || defined(__gfx1011__) || defined(__gfx1012__)
#if defined(__gfx1010__) || defined(__gfx1011__) || defined(__gfx1012__)
...
...
include/ck/host_utility/device_prop.hpp
View file @
3b854040
...
@@ -55,20 +55,21 @@ inline bool is_xdl_supported()
...
@@ -55,20 +55,21 @@ inline bool is_xdl_supported()
{
{
return
ck
::
get_device_name
()
==
"gfx908"
||
ck
::
get_device_name
()
==
"gfx90a"
||
return
ck
::
get_device_name
()
==
"gfx908"
||
ck
::
get_device_name
()
==
"gfx90a"
||
ck
::
get_device_name
()
==
"gfx940"
||
ck
::
get_device_name
()
==
"gfx941"
||
ck
::
get_device_name
()
==
"gfx940"
||
ck
::
get_device_name
()
==
"gfx941"
||
ck
::
get_device_name
()
==
"gfx942"
;
ck
::
get_device_name
()
==
"gfx942"
||
ck
::
get_device_name
()
==
"gfx950"
;
}
}
inline
bool
is_lds_direct_load_supported
()
inline
bool
is_lds_direct_load_supported
()
{
{
// Check if direct loads from global memory to LDS are supported.
// Check if direct loads from global memory to LDS are supported.
return
ck
::
get_device_name
()
==
"gfx90a"
||
ck
::
get_device_name
()
==
"gfx940"
||
return
ck
::
get_device_name
()
==
"gfx90a"
||
ck
::
get_device_name
()
==
"gfx940"
||
ck
::
get_device_name
()
==
"gfx941"
||
ck
::
get_device_name
()
==
"gfx942"
;
ck
::
get_device_name
()
==
"gfx941"
||
ck
::
get_device_name
()
==
"gfx942"
||
ck
::
get_device_name
()
==
"gfx950"
;
}
}
inline
bool
is_bf16_atomic_supported
()
inline
bool
is_bf16_atomic_supported
()
{
{
return
ck
::
get_device_name
()
==
"gfx940"
||
ck
::
get_device_name
()
==
"gfx941"
||
return
ck
::
get_device_name
()
==
"gfx940"
||
ck
::
get_device_name
()
==
"gfx941"
||
ck
::
get_device_name
()
==
"gfx942"
;
ck
::
get_device_name
()
==
"gfx942"
||
ck
::
get_device_name
()
==
"gfx950"
;
}
}
inline
bool
is_gfx101_supported
()
inline
bool
is_gfx101_supported
()
...
...
include/ck/tensor_operation/gpu/device/impl/codegen_device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp
View file @
3b854040
...
@@ -91,8 +91,7 @@ __device__ void device_grouped_conv_fwd_multiple_abd_xdl_cshuffle(
...
@@ -91,8 +91,7 @@ __device__ void device_grouped_conv_fwd_multiple_abd_xdl_cshuffle(
const
Block2ETileMap
block_2_ctile_map
,
const
Block2ETileMap
block_2_ctile_map
,
const
ComputePtrOffsetOfBatch
compute_ptr_offset_of_batch
)
const
ComputePtrOffsetOfBatch
compute_ptr_offset_of_batch
)
{
{
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx908__) || defined(__gfx90a__) || \
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx9__))
defined(__gfx94__))
// offset base pointer for each work-group
// offset base pointer for each work-group
const
index_t
num_blocks_per_batch
=
const
index_t
num_blocks_per_batch
=
__builtin_amdgcn_readfirstlane
(
get_grid_size
()
/
batch_count
);
__builtin_amdgcn_readfirstlane
(
get_grid_size
()
/
batch_count
);
...
...
include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_xdl_cshuffle.hpp
View file @
3b854040
...
@@ -56,8 +56,7 @@ __global__ void
...
@@ -56,8 +56,7 @@ __global__ void
const
ComputePtrOffsetOfBatch
compute_ptr_offset_of_batch
,
const
ComputePtrOffsetOfBatch
compute_ptr_offset_of_batch
,
const
Block2ETileMap
block_2_etile_map
)
const
Block2ETileMap
block_2_etile_map
)
{
{
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx908__) || defined(__gfx90a__) || \
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx9__))
defined(__gfx94__))
__shared__
char
p_shared
[
GridwiseGemm
::
GetSharedMemoryNumberOfByte
()];
__shared__
char
p_shared
[
GridwiseGemm
::
GetSharedMemoryNumberOfByte
()];
const
index_t
num_blocks_per_batch
=
const
index_t
num_blocks_per_batch
=
...
...
include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_e_permute_xdl.hpp
View file @
3b854040
...
@@ -74,8 +74,7 @@ __global__ void
...
@@ -74,8 +74,7 @@ __global__ void
const
ComputePtrOffsetOfBatch
compute_ptr_offset_of_batch
,
const
ComputePtrOffsetOfBatch
compute_ptr_offset_of_batch
,
const
Block2ETileMap
block_2_etile_map
)
const
Block2ETileMap
block_2_etile_map
)
{
{
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx908__) || defined(__gfx90a__) || \
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx9__))
defined(__gfx94__))
const
index_t
num_blocks_per_batch
=
const
index_t
num_blocks_per_batch
=
__builtin_amdgcn_readfirstlane
(
get_grid_size
()
/
batch_count
);
__builtin_amdgcn_readfirstlane
(
get_grid_size
()
/
batch_count
);
const
index_t
g_idx
=
__builtin_amdgcn_readfirstlane
(
get_block_1d_id
()
/
num_blocks_per_batch
);
const
index_t
g_idx
=
__builtin_amdgcn_readfirstlane
(
get_block_1d_id
()
/
num_blocks_per_batch
);
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment