"runner/vscode:/vscode.git/clone" did not exist on "bb71654ebe846d97df306b163c086167239431e5"
Unverified Commit 34b68ee4 authored by Paul Fultz II's avatar Paul Fultz II Committed by GitHub
Browse files

Update time op to more accurately get device time (#2104)

parent bc7db104
......@@ -185,8 +185,7 @@ struct compile_plan
results.begin(), results.end(), std::back_inserter(times), [&](const auto& cr) {
if(not cr.has_value())
return std::numeric_limits<double>::max();
return time_op(*ctx, cr->replace.code_object, to_shapes(cr->ins->inputs()), 20)
.first;
return time_op(*ctx, cr->replace.code_object, to_shapes(cr->ins->inputs()), 20);
});
auto i = std::distance(times.begin(), std::min_element(times.begin(), times.end()));
std::cout << "Fastest solution: " << config->solutions.at(i) << std::endl;
......
......@@ -38,10 +38,8 @@ struct compile_op : action<compile_op>
context ctx;
auto inputs = p.parse_shapes(v.at("inputs"));
auto op = gpu::compile_op(v.at("name").to<std::string>(), ctx, inputs, v);
auto [host_time, device_time] = time_op(ctx, op, inputs, p.get(v, "iterations", 100));
std::cout << op << ": " << host_time << "ms";
if(device_time > 0)
std::cout << ", " << device_time << "ms";
auto t = time_op(ctx, op, inputs, p.get(v, "iterations", 100));
std::cout << op << ": " << t << "ms";
std::cout << std::endl;
}
};
......
......@@ -43,8 +43,8 @@ struct run_op : action<run_op>
auto op = make_op(name);
if(v.contains("fields"))
op.from_value(v.at("fields"));
auto [host_time, device_time] = time_op(ctx, op, inputs, p.get(v, "iterations", 100));
std::cout << op << ": " << host_time << "ms" << std::endl;
auto t = time_op(ctx, op, inputs, p.get(v, "iterations", 100));
std::cout << op << ": " << t << "ms" << std::endl;
}
};
......
......@@ -299,23 +299,6 @@ struct context
any_ptr get_queue() { return get_stream().get(); }
void enable_perf_measurement(bool b = true)
{
if(b)
{
start_event = create_event_for_timing();
stop_event = create_event_for_timing();
get_stream().record(start_event.get());
get_stream().record(stop_event.get());
}
else
{
start_event = nullptr;
stop_event = nullptr;
}
measure_perf = b;
}
std::pair<hipEvent_t, hipEvent_t> get_perf_events() const
{
if(measure_perf)
......@@ -323,12 +306,12 @@ struct context
return std::make_pair(nullptr, nullptr);
}
float get_elapsed_ms() const
static float get_elapsed_ms(hipEvent_t start, hipEvent_t stop)
{
float result = 0;
if(start_event != nullptr and stop_event != nullptr)
if(start != nullptr and stop != nullptr)
{
auto status = hipEventElapsedTime(&result, start_event.get(), stop_event.get());
auto status = hipEventElapsedTime(&result, start, stop);
if(status != hipSuccess)
MIGRAPHX_THROW("Failed hipEventElapsedTime: " + hip_error(status));
}
......
......@@ -32,7 +32,7 @@ namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
MIGRAPHX_GPU_EXPORT std::pair<double, double>
MIGRAPHX_GPU_EXPORT double
time_op(context& ictx, operation op, const std::vector<shape>& inputs, int n = 100);
} // namespace gpu
......
......@@ -41,8 +41,7 @@ std::vector<argument> generate_arguments(const std::vector<shape>& shapes, unsig
}
using milliseconds = std::chrono::duration<double, std::milli>;
std::pair<double, double>
time_op(context& ictx, operation op, const std::vector<shape>& inputs, int n)
double time_op(context& ictx, operation op, const std::vector<shape>& inputs, int n)
{
// TODO: Use std::ref
......@@ -51,21 +50,19 @@ time_op(context& ictx, operation op, const std::vector<shape>& inputs, int n)
auto output = op.compute_shape(inputs);
op.finalize(ctx, output, inputs);
auto args = generate_arguments(inputs);
auto run = [&] {
op.compute(ctx, output, args);
ctx.finish();
};
gctx.enable_perf_measurement();
auto start = context::create_event_for_timing();
auto stop = context::create_event_for_timing();
auto run = [&] { op.compute(ctx, output, args); };
run();
double host_time = 0.0;
double device_time = 0.0;
gctx.get_stream().record(start.get());
for(auto i : range(n))
{
(void)i;
host_time += time<milliseconds>(run);
device_time += gctx.get_elapsed_ms();
run();
}
return std::make_pair(host_time / n, device_time / n);
gctx.get_stream().record(stop.get());
gctx.finish();
return context::get_elapsed_ms(start.get(), stop.get()) / n;
}
} // namespace gpu
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment