Commit 5a2cfafd authored by Paul's avatar Paul
Browse files

Fix up timing of kernel

parent 94bba2c1
...@@ -447,22 +447,15 @@ struct gemm_impl ...@@ -447,22 +447,15 @@ struct gemm_impl
rocblas_int best_sol = 0; rocblas_int best_sol = 0;
for(auto sol : solution_indices) for(auto sol : solution_indices)
{ {
// Define the function to be timed
auto run_func = [&]() {
run(ctx, input_args, sol);
ctx.finish();
};
// Warmup: the first call to an op. may not be representative since there is // Warmup: the first call to an op. may not be representative since there is
// more time taken initializing caches, etc. so we won't time it. // more time taken initializing caches, etc. so we won't time it.
run_func(); run(ctx, input_args, sol);
double host_time = 0.0; double host_time = time<milliseconds>([&] {
for([[maybe_unused]] int hc:range(hot_calls))
for(int hc = 0; hc < hot_calls; ++hc) run(ctx, input_args, sol);
{
ctx.finish(); ctx.finish();
host_time += time<microseconds>(run_func); });
}
// todo: Measured time dropped from 20 us to about 6.7 us when I raised hot_calls from // todo: Measured time dropped from 20 us to about 6.7 us when I raised hot_calls from
// 1 to 11. The higher the hot_calls value, the faster per-call time up to at least 25, // 1 to 11. The higher the hot_calls value, the faster per-call time up to at least 25,
// and increasing cold_calls makes little or no difference. Why? // and increasing cold_calls makes little or no difference. Why?
...@@ -479,8 +472,8 @@ struct gemm_impl ...@@ -479,8 +472,8 @@ struct gemm_impl
best_time = host_time; best_time = host_time;
} }
} }
std::cout << "Winning GEMM solution: " << best_sol << " in " << best_time << " us, beats " std::cout << "Winning GEMM solution: " << best_sol << " in " << best_time << " ms, beats "
<< first_time << std::endl; << first_time << "ms" << std::endl;
return best_sol; return best_sol;
} }
#endif #endif
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment