Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
5898ba83
"vscode:/vscode.git/clone" did not exist on "892a8d769d95cf85ce5e5cab3432ddb000826588"
Commit
5898ba83
authored
Apr 17, 2024
by
Harisankar Sadasivan
Browse files
modified average finding to correct for repeats and changed 1000ms time limit to config parameter
parent
4396a224
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
12 additions
and
9 deletions
+12
-9
include/ck/host_utility/kernel_launch.hpp
include/ck/host_utility/kernel_launch.hpp
+6
-4
include/ck/stream_config.hpp
include/ck/stream_config.hpp
+6
-5
No files found.
include/ck/host_utility/kernel_launch.hpp
View file @
5898ba83
...
...
@@ -45,9 +45,10 @@ float launch_and_time_kernel(const StreamConfig& stream_config,
float
total_time
=
0
;
hip_check_error
(
hipEventElapsedTime
(
&
total_time
,
start
,
stop
));
total_time
/=
10
;
total_time
/=
stream_config
.
nrepeat_
;
stream_config
.
cold_niters_
=
(
1000.0
/
total_time
);
// we need longer runtime to ramp up the clk on MI300s
(
stream_config
.
time_limit_ms
/
total_time
);
// we need longer runtime to ramp up the clk on MI300s
stream_config
.
nrepeat_
=
stream_config
.
cold_niters_
;
}
#endif
...
...
@@ -148,9 +149,10 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig& stream_config,
float
total_time
=
0
;
hip_check_error
(
hipEventElapsedTime
(
&
total_time
,
start
,
stop
));
total_time
/=
10
;
total_time
/=
stream_config
.
nrepeat_
;
stream_config
.
cold_niters_
=
(
1000.0
/
total_time
);
// we need longer runtime to ramp up the clk on MI300s
(
stream_config
.
nrepeat_
/
total_time
);
// we need longer runtime to ramp up the clk on MI300s
stream_config
.
nrepeat_
=
stream_config
.
cold_niters_
;
}
#endif
...
...
include/ck/stream_config.hpp
View file @
5898ba83
...
...
@@ -8,9 +8,10 @@
struct
StreamConfig
{
hipStream_t
stream_id_
=
nullptr
;
bool
time_kernel_
=
false
;
int
log_level_
=
0
;
mutable
int
cold_niters_
=
5
;
mutable
int
nrepeat_
=
50
;
hipStream_t
stream_id_
=
nullptr
;
bool
time_kernel_
=
false
;
int
log_level_
=
0
;
mutable
int
cold_niters_
=
5
;
mutable
int
nrepeat_
=
50
;
mutable
int
time_limit_ms
=
1000
;
// for timing MI300 runs
};
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment