Commit 5898ba83 authored by Harisankar Sadasivan's avatar Harisankar Sadasivan
Browse files

modified average finding to correct for repeats and changed 1000ms time limit to config parameter

parent 4396a224
......@@ -45,9 +45,10 @@ float launch_and_time_kernel(const StreamConfig& stream_config,
float total_time = 0;
hip_check_error(hipEventElapsedTime(&total_time, start, stop));
total_time /= 10;
total_time /= stream_config.nrepeat_;
stream_config.cold_niters_ =
(1000.0 / total_time); // we need longer runtime to ramp up the clk on MI300s
(stream_config.time_limit_ms /
total_time); // we need longer runtime to ramp up the clk on MI300s
stream_config.nrepeat_ = stream_config.cold_niters_;
}
#endif
......@@ -148,9 +149,10 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig& stream_config,
float total_time = 0;
hip_check_error(hipEventElapsedTime(&total_time, start, stop));
total_time /= 10;
total_time /= stream_config.nrepeat_;
stream_config.cold_niters_ =
(1000.0 / total_time); // we need longer runtime to ramp up the clk on MI300s
(stream_config.nrepeat_ /
total_time); // we need longer runtime to ramp up the clk on MI300s
stream_config.nrepeat_ = stream_config.cold_niters_;
}
#endif
......
......@@ -13,4 +13,5 @@ struct StreamConfig
int log_level_ = 0;
mutable int cold_niters_ = 5;
mutable int nrepeat_ = 50;
mutable int time_limit_ms = 1000; // for timing MI300 runs
};
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment