Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
804e6803
Commit
804e6803
authored
Apr 15, 2024
by
Harisankar Sadasivan
Browse files
files modified for 1s cold and warm runs
parent
87efbb63
Changes
3
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
442 additions
and
431 deletions
+442
-431
include/ck/host_utility/kernel_launch.hpp
include/ck/host_utility/kernel_launch.hpp
+27
-0
include/ck/stream_config.hpp
include/ck/stream_config.hpp
+2
-2
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4r2.hpp
...tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4r2.hpp
+413
-429
No files found.
include/ck/host_utility/kernel_launch.hpp
View file @
804e6803
...
...
@@ -20,6 +20,33 @@ float launch_and_time_kernel(const StreamConfig& stream_config,
#if CK_TIME_KERNEL
if
(
stream_config
.
time_kernel_
)
{
if
(
ck
::
get_device_name
()
==
"gfx940"
||
ck
::
get_device_name
()
==
"gfx941"
||
ck
::
get_device_name
()
==
"gfx942"
)
{
hipEvent_t
start
,
stop
;
hip_check_error
(
hipEventCreate
(
&
start
));
hip_check_error
(
hipEventCreate
(
&
stop
));
hip_check_error
(
hipDeviceSynchronize
());
hip_check_error
(
hipEventRecord
(
start
,
stream_config
.
stream_id_
));
for
(
int
i
=
0
;
i
<
stream_config
.
nrepeat_
;
++
i
)
{
kernel
<<<
grid_dim
,
block_dim
,
lds_byte
,
stream_config
.
stream_id_
>>>
(
args
...);
hip_check_error
(
hipGetLastError
());
}
hip_check_error
(
hipEventRecord
(
stop
,
stream_config
.
stream_id_
));
hip_check_error
(
hipEventSynchronize
(
stop
));
float
total_time
=
0
;
hip_check_error
(
hipEventElapsedTime
(
&
total_time
,
start
,
stop
));
total_time
/=
10
;
stream_config
.
cold_niters_
=
(
1000.0
/
total_time
);
//we need longer runtime to ramp up the clk on MI300s
stream_config
.
nrepeat_
=
stream_config
.
cold_niters_
;
}
#if DEBUG_LOG
printf
(
"%s: grid_dim {%d, %d, %d}, block_dim {%d, %d, %d}
\n
"
,
__func__
,
...
...
include/ck/stream_config.hpp
View file @
804e6803
...
...
@@ -11,6 +11,6 @@ struct StreamConfig
hipStream_t
stream_id_
=
nullptr
;
bool
time_kernel_
=
false
;
int
log_level_
=
0
;
int
cold_niters_
=
5
;
int
nrepeat_
=
50
;
mutable
int
cold_niters_
=
5
;
mutable
int
nrepeat_
=
50
;
};
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4r2.hpp
View file @
804e6803
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment