Commit 04afd30e authored by Harisankar Sadasivan's avatar Harisankar Sadasivan
Browse files

1s cold and 1s warm kernel run for mi300 perf measurement

parent dd34ab6e
...@@ -20,6 +20,34 @@ float launch_and_time_kernel(const StreamConfig& stream_config, ...@@ -20,6 +20,34 @@ float launch_and_time_kernel(const StreamConfig& stream_config,
#if CK_TIME_KERNEL #if CK_TIME_KERNEL
if(stream_config.time_kernel_) if(stream_config.time_kernel_)
{ {
if(ck::get_device_name() == "gfx940" || ck::get_device_name() == "gfx941" ||
ck::get_device_name() == "gfx942")
{
hipEvent_t start, stop;
hip_check_error(hipEventCreate(&start));
hip_check_error(hipEventCreate(&stop));
hip_check_error(hipDeviceSynchronize());
hip_check_error(hipEventRecord(start, stream_config.stream_id_));
for(int i = 0; i < stream_config.nrepeat_; ++i)
{
kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
hip_check_error(hipGetLastError());
}
hip_check_error(hipEventRecord(stop, stream_config.stream_id_));
hip_check_error(hipEventSynchronize(stop));
float total_time = 0;
hip_check_error(hipEventElapsedTime(&total_time, start, stop));
total_time /= 10;
stream_config.cold_niters_ =
(1000.0 / total_time); // we need longer runtime to ramp up the clk on MI300s
stream_config.nrepeat_ = stream_config.cold_niters_;
}
#if DEBUG_LOG #if DEBUG_LOG
printf("%s: grid_dim {%d, %d, %d}, block_dim {%d, %d, %d} \n", printf("%s: grid_dim {%d, %d, %d}, block_dim {%d, %d, %d} \n",
__func__, __func__,
......
...@@ -8,9 +8,9 @@ ...@@ -8,9 +8,9 @@
struct StreamConfig struct StreamConfig
{ {
hipStream_t stream_id_ = nullptr; hipStream_t stream_id_ = nullptr;
bool time_kernel_ = false; bool time_kernel_ = false;
int log_level_ = 0; int log_level_ = 0;
int cold_niters_ = 5; mutable int cold_niters_ = 5;
int nrepeat_ = 50; mutable int nrepeat_ = 50;
}; };
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment