Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
f4055dc7
Commit
f4055dc7
authored
Aug 25, 2022
by
Jing Zhang
Browse files
fixed splitK_gemm_fp32
parent
74604d74
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
52 additions
and
46 deletions
+52
-46
include/ck/tensor_operation/gpu/device/device_gemm_xdl_splitk.hpp
...ck/tensor_operation/gpu/device/device_gemm_xdl_splitk.hpp
+2
-2
profiler/CMakeLists.txt
profiler/CMakeLists.txt
+39
-39
profiler/src/profiler.cpp
profiler/src/profiler.cpp
+6
-0
script/cmake-rocm.sh
script/cmake-rocm.sh
+1
-1
script/run_full_performance_tests.sh
script/run_full_performance_tests.sh
+4
-4
No files found.
include/ck/tensor_operation/gpu/device/device_gemm_xdl_splitk.hpp
View file @
f4055dc7
...
@@ -93,9 +93,9 @@ struct DeviceGemmXdlSplitK : public DeviceGemmSplitK<ALayout,
...
@@ -93,9 +93,9 @@ struct DeviceGemmXdlSplitK : public DeviceGemmSplitK<ALayout,
const
auto
a_grid_desc_m_kpad
=
transform_tensor_descriptor
(
const
auto
a_grid_desc_m_kpad
=
transform_tensor_descriptor
(
a_grid_desc_m_k
,
a_grid_desc_m_k
,
make_tuple
(
make_
right_pad_transform
(
K
,
KPad
-
K
),
make_pass_through_transform
(
M
)),
make_tuple
(
make_
pass_through_transform
(
M
),
make_right_pad_transform
(
K
,
KPad
-
K
)),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}));
make_tuple
(
Sequence
<
1
>
{},
Sequence
<
0
>
{}));
if
constexpr
(
GemmSpec
==
GemmSpecialization
::
MNPadding
)
if
constexpr
(
GemmSpec
==
GemmSpecialization
::
MNPadding
)
{
{
...
...
profiler/CMakeLists.txt
View file @
f4055dc7
...
@@ -5,49 +5,49 @@ include_directories(BEFORE
...
@@ -5,49 +5,49 @@ include_directories(BEFORE
# ck_profiler
# ck_profiler
set
(
PROFILER_SOURCE
set
(
PROFILER_SOURCE
src/profiler.cpp
src/profiler.cpp
src/profile_gemm.cpp
#
src/profile_gemm.cpp
src/profile_gemm_splitk.cpp
src/profile_gemm_splitk.cpp
src/profile_gemm_bilinear.cpp
#
src/profile_gemm_bilinear.cpp
src/profile_gemm_bias_add_reduce.cpp
#
src/profile_gemm_bias_add_reduce.cpp
src/profile_gemm_add_add_fastgelu.cpp
#
src/profile_gemm_add_add_fastgelu.cpp
src/profile_gemm_reduce.cpp
#
src/profile_gemm_reduce.cpp
src/profile_batched_gemm.cpp
#
src/profile_batched_gemm.cpp
src/profile_batched_gemm_reduce.cpp
#
src/profile_batched_gemm_reduce.cpp
src/profile_grouped_gemm.cpp
#
src/profile_grouped_gemm.cpp
src/profile_conv_fwd.cpp
#
src/profile_conv_fwd.cpp
src/profile_conv_fwd_bias_relu.cpp
#
src/profile_conv_fwd_bias_relu.cpp
src/profile_conv_fwd_bias_relu_add.cpp
#
src/profile_conv_fwd_bias_relu_add.cpp
src/profile_conv_bwd_data.cpp
#
src/profile_conv_bwd_data.cpp
src/profile_conv_bwd_weight.cpp
#
src/profile_conv_bwd_weight.cpp
src/profile_grouped_conv_fwd.cpp
#
src/profile_grouped_conv_fwd.cpp
src/profile_reduce.cpp
#
src/profile_reduce.cpp
src/profile_layernorm.cpp
#
src/profile_layernorm.cpp
src/profile_normalization.cpp
#
src/profile_normalization.cpp
)
)
add_executable
(
ckProfiler
${
PROFILER_SOURCE
}
)
add_executable
(
ckProfiler
${
PROFILER_SOURCE
}
)
target_link_libraries
(
ckProfiler PRIVATE utility
)
target_link_libraries
(
ckProfiler PRIVATE utility
)
target_link_libraries
(
ckProfiler PRIVATE device_gemm_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_gemm_instance)
target_link_libraries
(
ckProfiler PRIVATE device_gemm_splitk_instance
)
target_link_libraries
(
ckProfiler PRIVATE device_gemm_splitk_instance
)
target_link_libraries
(
ckProfiler PRIVATE device_gemm_bilinear_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_gemm_bilinear_instance)
target_link_libraries
(
ckProfiler PRIVATE device_gemm_add_add_fastgelu_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_gemm_add_add_fastgelu_instance)
target_link_libraries
(
ckProfiler PRIVATE device_gemm_reduce_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_gemm_reduce_instance)
target_link_libraries
(
ckProfiler PRIVATE device_gemm_bias_add_reduce_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_gemm_bias_add_reduce_instance)
target_link_libraries
(
ckProfiler PRIVATE device_batched_gemm_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_batched_gemm_instance)
target_link_libraries
(
ckProfiler PRIVATE device_batched_gemm_reduce_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_batched_gemm_reduce_instance)
target_link_libraries
(
ckProfiler PRIVATE device_grouped_gemm_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_grouped_gemm_instance)
target_link_libraries
(
ckProfiler PRIVATE device_conv2d_fwd_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_conv2d_fwd_instance)
target_link_libraries
(
ckProfiler PRIVATE device_grouped_conv1d_fwd_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_grouped_conv1d_fwd_instance)
target_link_libraries
(
ckProfiler PRIVATE device_grouped_conv2d_fwd_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_grouped_conv2d_fwd_instance)
target_link_libraries
(
ckProfiler PRIVATE device_grouped_conv3d_fwd_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_grouped_conv3d_fwd_instance)
target_link_libraries
(
ckProfiler PRIVATE device_conv1d_bwd_data_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_conv1d_bwd_data_instance)
target_link_libraries
(
ckProfiler PRIVATE device_conv2d_bwd_data_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_conv2d_bwd_data_instance)
target_link_libraries
(
ckProfiler PRIVATE device_conv3d_bwd_data_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_conv3d_bwd_data_instance)
target_link_libraries
(
ckProfiler PRIVATE device_conv1d_bwd_weight_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_conv1d_bwd_weight_instance)
target_link_libraries
(
ckProfiler PRIVATE device_conv2d_bwd_weight_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_conv2d_bwd_weight_instance)
target_link_libraries
(
ckProfiler PRIVATE device_conv3d_bwd_weight_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_conv3d_bwd_weight_instance)
target_link_libraries
(
ckProfiler PRIVATE device_conv2d_fwd_bias_relu_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_conv2d_fwd_bias_relu_instance)
target_link_libraries
(
ckProfiler PRIVATE device_conv2d_fwd_bias_relu_add_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_conv2d_fwd_bias_relu_add_instance)
target_link_libraries
(
ckProfiler PRIVATE device_normalization_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_normalization_instance)
target_link_libraries
(
ckProfiler PRIVATE device_reduce_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_reduce_instance)
profiler/src/profiler.cpp
View file @
f4055dc7
...
@@ -46,6 +46,7 @@ static void print_helper_message()
...
@@ -46,6 +46,7 @@ static void print_helper_message()
int
main
(
int
argc
,
char
*
argv
[])
int
main
(
int
argc
,
char
*
argv
[])
{
{
#if 0
if(argc == 1)
if(argc == 1)
{
{
print_helper_message();
print_helper_message();
...
@@ -124,6 +125,11 @@ int main(int argc, char* argv[])
...
@@ -124,6 +125,11 @@ int main(int argc, char* argv[])
{
{
return profile_layernorm(argc, argv);
return profile_layernorm(argc, argv);
}
}
#endif
if
(
strcmp
(
argv
[
1
],
"gemm_splitk"
)
==
0
)
{
return
profile_gemm_splitk
(
argc
,
argv
);
}
else
else
{
{
print_helper_message
();
print_helper_message
();
...
...
script/cmake-rocm.sh
View file @
f4055dc7
...
@@ -10,7 +10,7 @@ cmake
...
@@ -10,7 +10,7 @@ cmake
-D
CMAKE_INSTALL_PREFIX
=
${
MY_PROJECT_INSTALL
}
\
-D
CMAKE_INSTALL_PREFIX
=
${
MY_PROJECT_INSTALL
}
\
-D
BUILD_DEV
=
OFF
\
-D
BUILD_DEV
=
OFF
\
-D
CMAKE_BUILD_TYPE
=
Release
\
-D
CMAKE_BUILD_TYPE
=
Release
\
-D
CMAKE_CXX_FLAGS
=
" -
O3 -ftemplate-backtrace-limit
=0 -gline-tables-only -save-temps=
$PWD
"
\
-D
CMAKE_CXX_FLAGS
=
" -
-offload-arch=gfx908 --offload-arch=gfx90a -O3 -ftemplate-backtrace-limit=0 -mllvm --amdgpu-spill-vgpr-to-agpr
=0 -gline-tables-only -save-temps=
$PWD
"
\
-D
CMAKE_CXX_COMPILER
=
/opt/rocm/bin/hipcc
\
-D
CMAKE_CXX_COMPILER
=
/opt/rocm/bin/hipcc
\
-D
CMAKE_PREFIX_PATH
=
/opt/rocm
\
-D
CMAKE_PREFIX_PATH
=
/opt/rocm
\
-D
CMAKE_VERBOSE_MAKEFILE:BOOL
=
ON
\
-D
CMAKE_VERBOSE_MAKEFILE:BOOL
=
ON
\
...
...
script/run_full_performance_tests.sh
View file @
f4055dc7
...
@@ -127,10 +127,10 @@ print_log_header $reduction_log $env_type $branch $host_name
...
@@ -127,10 +127,10 @@ print_log_header $reduction_log $env_type $branch $host_name
export
splitK_gemm_log
=
"perf_splitK_gemm_
${
gpu_arch
}
.log"
export
splitK_gemm_log
=
"perf_splitK_gemm_
${
gpu_arch
}
.log"
print_log_header
$splitK_gemm_log
$env_type
$branch
$host_name
print_log_header
$splitK_gemm_log
$env_type
$branch
$host_name
#
../script/profile_splitK_gemm.sh gemm_splitk 0 0 $verify 1 0 1 4 | tee -a $splitK_gemm_log
../script/profile_splitK_gemm.sh gemm_splitk 0 0
$verify
1 0 1 4 |
tee
-a
$splitK_gemm_log
#
../script/profile_splitK_gemm.sh gemm_splitk 0 1 $verify 1 0 1 4 | tee -a $splitK_gemm_log
../script/profile_splitK_gemm.sh gemm_splitk 0 1
$verify
1 0 1 4 |
tee
-a
$splitK_gemm_log
#
../script/profile_splitK_gemm.sh gemm_splitk 0 2 $verify 1 0 1 4 | tee -a $splitK_gemm_log
../script/profile_splitK_gemm.sh gemm_splitk 0 2
$verify
1 0 1 4 |
tee
-a
$splitK_gemm_log
#
../script/profile_splitK_gemm.sh gemm_splitk 0 3 $verify 1 0 1 4 | tee -a $splitK_gemm_log
../script/profile_splitK_gemm.sh gemm_splitk 0 3
$verify
1 0 1 4 |
tee
-a
$splitK_gemm_log
../script/profile_splitK_gemm.sh gemm_splitk 1 0
$verify
1 0 1 4 |
tee
-a
$splitK_gemm_log
../script/profile_splitK_gemm.sh gemm_splitk 1 0
$verify
1 0 1 4 |
tee
-a
$splitK_gemm_log
../script/profile_splitK_gemm.sh gemm_splitk 1 1
$verify
1 0 1 4 |
tee
-a
$splitK_gemm_log
../script/profile_splitK_gemm.sh gemm_splitk 1 1
$verify
1 0 1 4 |
tee
-a
$splitK_gemm_log
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment