Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
43adf1fa
Commit
43adf1fa
authored
Dec 13, 2023
by
Harisankar Sadasivan
Browse files
clang format
parent
ab3d3b4a
Changes
4
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
1154 additions
and
1014 deletions
+1154
-1014
example/54_tall_and_skinny_gemm_splitk/run_tall_and_skinny_gemm_splitk_example.inc
...y_gemm_splitk/run_tall_and_skinny_gemm_splitk_example.inc
+0
-0
include/ck/host_utility/kernel_launch.hpp
include/ck/host_utility/kernel_launch.hpp
+15
-16
include/ck/tensor_operation/gpu/device/impl/device_tall_and_skinny_gemm_splitk.hpp
...on/gpu/device/impl/device_tall_and_skinny_gemm_splitk.hpp
+438
-337
include/ck/tensor_operation/gpu/grid/gridwise_tall_and_skinny_gemm_splitk.hpp
...eration/gpu/grid/gridwise_tall_and_skinny_gemm_splitk.hpp
+701
-661
No files found.
example/54_tall_and_skinny_gemm_splitk/run_tall_and_skinny_gemm_splitk_example.inc
100644 → 100755
View file @
43adf1fa
File mode changed from 100644 to 100755
include/ck/host_utility/kernel_launch.hpp
View file @
43adf1fa
...
@@ -11,7 +11,7 @@
...
@@ -11,7 +11,7 @@
#ifndef KERNARG_PRELOAD
#ifndef KERNARG_PRELOAD
template
<
typename
...
Args
,
typename
F
>
template
<
typename
...
Args
,
typename
F
>
float
launch_and_time_kernel
(
const
StreamConfig
&
stream_config
,
float
launch_and_time_kernel
(
const
StreamConfig
&
stream_config
,
F
kernel
,
F
kernel
,
dim3
grid_dim
,
dim3
grid_dim
,
dim3
block_dim
,
dim3
block_dim
,
...
@@ -19,7 +19,7 @@ float launch_and_time_kernel(const StreamConfig &stream_config,
...
@@ -19,7 +19,7 @@ float launch_and_time_kernel(const StreamConfig &stream_config,
Args
...
args
)
Args
...
args
)
{
{
#if CK_TIME_KERNEL
#if CK_TIME_KERNEL
if
(
stream_config
.
time_kernel_
)
if
(
stream_config
.
time_kernel_
)
{
{
#if DEBUG_LOG
#if DEBUG_LOG
printf
(
"%s: grid_dim {%d, %d, %d}, block_dim {%d, %d, %d}
\n
"
,
printf
(
"%s: grid_dim {%d, %d, %d}, block_dim {%d, %d, %d}
\n
"
,
...
@@ -49,7 +49,7 @@ float launch_and_time_kernel(const StreamConfig &stream_config,
...
@@ -49,7 +49,7 @@ float launch_and_time_kernel(const StreamConfig &stream_config,
hip_check_error
(
hipDeviceSynchronize
());
hip_check_error
(
hipDeviceSynchronize
());
hip_check_error
(
hipEventRecord
(
start
,
stream_config
.
stream_id_
));
hip_check_error
(
hipEventRecord
(
start
,
stream_config
.
stream_id_
));
for
(
int
i
=
0
;
i
<
nrepeat
;
++
i
)
for
(
int
i
=
0
;
i
<
nrepeat
;
++
i
)
{
{
kernel
<<<
grid_dim
,
block_dim
,
lds_byte
,
stream_config
.
stream_id_
>>>
(
args
...);
kernel
<<<
grid_dim
,
block_dim
,
lds_byte
,
stream_config
.
stream_id_
>>>
(
args
...);
hip_check_error
(
hipGetLastError
());
hip_check_error
(
hipGetLastError
());
...
@@ -81,7 +81,7 @@ float launch_and_time_kernel(const StreamConfig &stream_config,
...
@@ -81,7 +81,7 @@ float launch_and_time_kernel(const StreamConfig &stream_config,
#else
#else
template
<
typename
...
Args
,
typename
F
>
template
<
typename
...
Args
,
typename
F
>
float
launch_and_time_kernel
(
const
StreamConfig
&
stream_config
,
float
launch_and_time_kernel
(
const
StreamConfig
&
stream_config
,
F
kernel
,
F
kernel
,
dim3
grid_dim
,
dim3
grid_dim
,
dim3
block_dim
,
dim3
block_dim
,
...
@@ -92,7 +92,7 @@ float launch_and_time_kernel(const StreamConfig &stream_config,
...
@@ -92,7 +92,7 @@ float launch_and_time_kernel(const StreamConfig &stream_config,
// hipGetErrorString(hipMalloc(&args1, sizeof(Args)));
// hipGetErrorString(hipMalloc(&args1, sizeof(Args)));
// hip_check_error(hipMemcpy(args1, &args, sizeof(Args), hipMemcpyHostToDevice));
// hip_check_error(hipMemcpy(args1, &args, sizeof(Args), hipMemcpyHostToDevice));
#if CK_TIME_KERNEL
#if CK_TIME_KERNEL
if
(
stream_config
.
time_kernel_
)
if
(
stream_config
.
time_kernel_
)
{
{
#if DEBUG_LOG
#if DEBUG_LOG
printf
(
"%s: grid_dim {%d, %d, %d}, block_dim {%d, %d, %d}
\n
"
,
printf
(
"%s: grid_dim {%d, %d, %d}, block_dim {%d, %d, %d}
\n
"
,
...
@@ -109,9 +109,9 @@ float launch_and_time_kernel(const StreamConfig &stream_config,
...
@@ -109,9 +109,9 @@ float launch_and_time_kernel(const StreamConfig &stream_config,
//
//
// warm up
// warm up
const
int
nrepeat
=
1000
;
const
int
nrepeat
=
1000
;
for
(
auto
i
=
0
;
i
<
nrepeat
;
i
++
)
for
(
auto
i
=
0
;
i
<
nrepeat
;
i
++
)
hipLaunchKernelGGL
(
kernel
,
grid_dim
,
block_dim
,
lds_byte
,
stream_config
.
stream_id_
,
hipLaunchKernelGGL
(
args
...);
kernel
,
grid_dim
,
block_dim
,
lds_byte
,
stream_config
.
stream_id_
,
args
...);
hip_check_error
(
hipGetLastError
());
hip_check_error
(
hipGetLastError
());
#if DEBUG_LOG
#if DEBUG_LOG
...
@@ -127,9 +127,9 @@ float launch_and_time_kernel(const StreamConfig &stream_config,
...
@@ -127,9 +127,9 @@ float launch_and_time_kernel(const StreamConfig &stream_config,
hip_check_error
(
hipEventRecord
(
start
,
stream_config
.
stream_id_
));
hip_check_error
(
hipEventRecord
(
start
,
stream_config
.
stream_id_
));
for
(
int
i
=
0
;
i
<
nrepeat
;
++
i
)
for
(
int
i
=
0
;
i
<
nrepeat
;
++
i
)
hipLaunchKernelGGL
(
kernel
,
grid_dim
,
block_dim
,
lds_byte
,
stream_config
.
stream_id_
,
hipLaunchKernelGGL
(
args
...);
kernel
,
grid_dim
,
block_dim
,
lds_byte
,
stream_config
.
stream_id_
,
args
...);
// hip_check_error(hipGetLastError());
// hip_check_error(hipGetLastError());
hip_check_error
(
hipEventRecord
(
stop
,
stream_config
.
stream_id_
));
hip_check_error
(
hipEventRecord
(
stop
,
stream_config
.
stream_id_
));
...
@@ -140,8 +140,7 @@ float launch_and_time_kernel(const StreamConfig &stream_config,
...
@@ -140,8 +140,7 @@ float launch_and_time_kernel(const StreamConfig &stream_config,
}
}
else
else
{
{
kernel
<<<
grid_dim
,
block_dim
,
lds_byte
,
stream_config
.
stream_id_
>>>
(
kernel
<<<
grid_dim
,
block_dim
,
lds_byte
,
stream_config
.
stream_id_
>>>
(
args
...);
args
...);
hip_check_error
(
hipGetLastError
());
hip_check_error
(
hipGetLastError
());
return
0
;
return
0
;
...
@@ -155,7 +154,7 @@ float launch_and_time_kernel(const StreamConfig &stream_config,
...
@@ -155,7 +154,7 @@ float launch_and_time_kernel(const StreamConfig &stream_config,
}
}
#endif
#endif
template
<
typename
...
Args
,
typename
F
,
typename
PreProcessFunc
>
template
<
typename
...
Args
,
typename
F
,
typename
PreProcessFunc
>
float
launch_and_time_kernel_with_preprocess
(
const
StreamConfig
&
stream_config
,
float
launch_and_time_kernel_with_preprocess
(
const
StreamConfig
&
stream_config
,
PreProcessFunc
preprocess
,
PreProcessFunc
preprocess
,
F
kernel
,
F
kernel
,
dim3
grid_dim
,
dim3
grid_dim
,
...
@@ -164,7 +163,7 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig &stream_config,
...
@@ -164,7 +163,7 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig &stream_config,
Args
...
args
)
Args
...
args
)
{
{
#if CK_TIME_KERNEL
#if CK_TIME_KERNEL
if
(
stream_config
.
time_kernel_
)
if
(
stream_config
.
time_kernel_
)
{
{
#if DEBUG_LOG
#if DEBUG_LOG
printf
(
"%s: grid_dim {%d, %d, %d}, block_dim {%d, %d, %d}
\n
"
,
printf
(
"%s: grid_dim {%d, %d, %d}, block_dim {%d, %d, %d}
\n
"
,
...
@@ -195,7 +194,7 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig &stream_config,
...
@@ -195,7 +194,7 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig &stream_config,
hip_check_error
(
hipDeviceSynchronize
());
hip_check_error
(
hipDeviceSynchronize
());
hip_check_error
(
hipEventRecord
(
start
,
stream_config
.
stream_id_
));
hip_check_error
(
hipEventRecord
(
start
,
stream_config
.
stream_id_
));
for
(
int
i
=
0
;
i
<
nrepeat
;
++
i
)
for
(
int
i
=
0
;
i
<
nrepeat
;
++
i
)
{
{
preprocess
();
preprocess
();
kernel
<<<
grid_dim
,
block_dim
,
lds_byte
,
stream_config
.
stream_id_
>>>
(
args
...);
kernel
<<<
grid_dim
,
block_dim
,
lds_byte
,
stream_config
.
stream_id_
>>>
(
args
...);
...
...
include/ck/tensor_operation/gpu/device/impl/device_tall_and_skinny_gemm_splitk.hpp
View file @
43adf1fa
This diff is collapsed.
Click to expand it.
include/ck/tensor_operation/gpu/grid/gridwise_tall_and_skinny_gemm_splitk.hpp
View file @
43adf1fa
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment