Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
2944825c
"vscode:/vscode.git/clone" did not exist on "6e221334cdec1ad4642252e87e869c452981726d"
Commit
2944825c
authored
Sep 06, 2024
by
Emin Ozturk
Browse files
rocm-smi and heuristic -beginning
parent
83c0e377
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
13 additions
and
0 deletions
+13
-0
include/ck/host_utility/kernel_launch.hpp
include/ck/host_utility/kernel_launch.hpp
+13
-0
No files found.
include/ck/host_utility/kernel_launch.hpp
View file @
2944825c
...
@@ -10,6 +10,8 @@
...
@@ -10,6 +10,8 @@
#include "ck/stream_config.hpp"
#include "ck/stream_config.hpp"
#include "ck/host_utility/hip_check_error.hpp"
#include "ck/host_utility/hip_check_error.hpp"
#include "rocm_smi/rocm_smi.h"
template
<
typename
...
Args
,
typename
F
>
template
<
typename
...
Args
,
typename
F
>
float
launch_and_time_kernel
(
const
StreamConfig
&
stream_config
,
float
launch_and_time_kernel
(
const
StreamConfig
&
stream_config
,
F
kernel
,
F
kernel
,
...
@@ -19,6 +21,14 @@ float launch_and_time_kernel(const StreamConfig& stream_config,
...
@@ -19,6 +21,14 @@ float launch_and_time_kernel(const StreamConfig& stream_config,
Args
...
args
)
Args
...
args
)
{
{
#if CK_TIME_KERNEL
#if CK_TIME_KERNEL
rsmi_status_t
ret
;
uint32_t
num_devices
;
uint16_t
dev_id
;
ret
=
rsmi_init
(
0
);
ret
=
rsmi_num_monitor_devices
(
&
num_devices
);
if
(
stream_config
.
time_kernel_
)
if
(
stream_config
.
time_kernel_
)
{
{
...
@@ -46,6 +56,9 @@ float launch_and_time_kernel(const StreamConfig& stream_config,
...
@@ -46,6 +56,9 @@ float launch_and_time_kernel(const StreamConfig& stream_config,
hip_check_error
(
hipEventElapsedTime
(
&
total_time
,
start
,
stop
));
hip_check_error
(
hipEventElapsedTime
(
&
total_time
,
start
,
stop
));
total_time
/=
10
;
total_time
/=
10
;
stream_config
.
cold_niters_
=
(
1000.0
/
total_time
);
//we need longer runtime to ramp up the clk on MI300s
stream_config
.
cold_niters_
=
(
1000.0
/
total_time
);
//we need longer runtime to ramp up the clk on MI300s
// Need to find some heuristic which Dynamically Define cold iterations based on GPU clock cycle
// #Emin #lookAt1
stream_config
.
nrepeat_
=
stream_config
.
cold_niters_
;
stream_config
.
nrepeat_
=
stream_config
.
cold_niters_
;
}
}
#if DEBUG_LOG
#if DEBUG_LOG
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment