Commit bb263e51 authored by Paul's avatar Paul
Browse files

Merge branch 'perk-kernel' into jit-layernorm2

parents 6ac586a9 6ee87f92
...@@ -51,7 +51,8 @@ code_object_op::compute(context& ctx, const shape&, const std::vector<argument>& ...@@ -51,7 +51,8 @@ code_object_op::compute(context& ctx, const shape&, const std::vector<argument>&
std::vector<void*> kargs(args.size()); std::vector<void*> kargs(args.size());
std::transform( std::transform(
args.begin(), args.end(), kargs.begin(), [](const argument& a) { return a.data(); }); args.begin(), args.end(), kargs.begin(), [](const argument& a) { return a.data(); });
k.launch(ctx.get_stream().get(), global, local, std::move(kargs)); auto [start, stop] = ctx.get_perf_events();
k.launch(ctx.get_stream().get(), global, local, std::move(kargs), start, stop);
return args[get_output_arg(args.size())]; return args[get_output_arg(args.size())];
} }
void code_object_op::finalize(context&, const shape&, const std::vector<shape>&) void code_object_op::finalize(context&, const shape&, const std::vector<shape>&)
......
...@@ -38,8 +38,11 @@ struct compile_op : action<compile_op> ...@@ -38,8 +38,11 @@ struct compile_op : action<compile_op>
context ctx; context ctx;
auto inputs = p.parse_shapes(v.at("inputs")); auto inputs = p.parse_shapes(v.at("inputs"));
auto op = gpu::compile_op(v.at("name").to<std::string>(), ctx, inputs, v); auto op = gpu::compile_op(v.at("name").to<std::string>(), ctx, inputs, v);
double t = time_op(ctx, op, inputs, p.get(v, "iterations", 100)); auto [host_time, device_time] = time_op(ctx, op, inputs, p.get(v, "iterations", 100));
std::cout << op << ": " << t << "ms" << std::endl; std::cout << op << ": " << host_time << "ms";
if(device_time > 0)
std::cout << ", " << device_time << "ms";
std::cout << std::endl;
} }
}; };
......
...@@ -33,7 +33,8 @@ inline namespace MIGRAPHX_INLINE_NS { ...@@ -33,7 +33,8 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
namespace driver { namespace driver {
double time_op(context& ctx, operation op, const std::vector<shape>& inputs, int n = 100); std::pair<double, double>
time_op(context& ictx, operation op, const std::vector<shape>& inputs, int n = 100);
} // namespace driver } // namespace driver
} // namespace gpu } // namespace gpu
......
...@@ -42,22 +42,31 @@ std::vector<argument> generate_arguments(const std::vector<shape>& shapes, unsig ...@@ -42,22 +42,31 @@ std::vector<argument> generate_arguments(const std::vector<shape>& shapes, unsig
} }
using milliseconds = std::chrono::duration<double, std::milli>; using milliseconds = std::chrono::duration<double, std::milli>;
double time_op(context& ctx, operation op, const std::vector<shape>& inputs, int n) std::pair<double, double>
time_op(context& ictx, operation op, const std::vector<shape>& inputs, int n)
{ {
// TODO: Use std::ref // TODO: Use std::ref
migraphx::context gctx = ctx; migraphx::context ctx = ictx;
auto output = op.compute_shape(inputs); auto& gctx = any_cast<migraphx::gpu::context>(ctx);
op.finalize(gctx, output, inputs); auto output = op.compute_shape(inputs);
op.finalize(ctx, output, inputs);
auto args = generate_arguments(inputs); auto args = generate_arguments(inputs);
auto run = [&] { auto run = [&] {
op.compute(gctx, output, args); op.compute(ctx, output, args);
gctx.finish(); ctx.finish();
}; };
gctx.enable_perf_measurement();
run(); run();
auto r = range(n); double host_time = 0.0;
double t = std::accumulate( double device_time = 0.0;
r.begin(), r.end(), double{0.0}, [&](auto x, auto) { return x + time<milliseconds>(run); }); for(auto i : range(n))
return t / n; {
(void)i;
host_time += time<milliseconds>(run);
device_time += gctx.get_elapsed_ms();
}
return std::make_pair(host_time / n, device_time / n);
} }
} // namespace driver } // namespace driver
......
...@@ -43,8 +43,8 @@ struct run_op : action<run_op> ...@@ -43,8 +43,8 @@ struct run_op : action<run_op>
auto op = make_op(name); auto op = make_op(name);
if(v.contains("fields")) if(v.contains("fields"))
op.from_value(v.at("fields")); op.from_value(v.at("fields"));
double t = time_op(ctx, op, inputs, p.get(v, "iterations", 100)); auto [host_time, device_time] = time_op(ctx, op, inputs, p.get(v, "iterations", 100));
std::cout << op << ": " << t << "ms" << std::endl; std::cout << op << ": " << host_time << "ms" << std::endl;
} }
}; };
......
...@@ -244,6 +244,15 @@ struct context ...@@ -244,6 +244,15 @@ struct context
return hip_event_ptr{event}; return hip_event_ptr{event};
} }
static hip_event_ptr create_event_for_timing()
{
hipEvent_t event;
auto status = hipEventCreate(&event);
if(status != hipSuccess)
MIGRAPHX_THROW("Failed to create event");
return hip_event_ptr{event};
}
value to_value() const value to_value() const
{ {
value result; value result;
...@@ -267,10 +276,49 @@ struct context ...@@ -267,10 +276,49 @@ struct context
any_ptr get_queue() { return get_stream().get(); } any_ptr get_queue() { return get_stream().get(); }
void enable_perf_measurement(bool b = true)
{
if(b)
{
start_event = create_event_for_timing();
stop_event = create_event_for_timing();
get_stream().record(start_event.get());
get_stream().record(stop_event.get());
}
else
{
start_event = nullptr;
stop_event = nullptr;
}
measure_perf = b;
}
std::pair<hipEvent_t, hipEvent_t> get_perf_events() const
{
if(measure_perf)
return std::make_pair(start_event.get(), stop_event.get());
return std::make_pair(nullptr, nullptr);
}
float get_elapsed_ms() const
{
float result = 0;
if(start_event != nullptr and stop_event != nullptr)
{
auto status = hipEventElapsedTime(&result, start_event.get(), stop_event.get());
if(status != hipSuccess)
MIGRAPHX_THROW("Failed hipEventElapsedTime: " + hip_error(status));
}
return result;
}
private: private:
// TODO: Make this a vector to support multiple devices // TODO: Make this a vector to support multiple devices
std::shared_ptr<hip_device> current_device; std::shared_ptr<hip_device> current_device;
std::vector<shared<hip_event_ptr>> events; std::vector<shared<hip_event_ptr>> events;
bool measure_perf = false;
shared<hip_event_ptr> start_event = nullptr;
shared<hip_event_ptr> stop_event = nullptr;
}; };
inline void migraphx_to_value(value& v, const context& ctx) { v = ctx.to_value(); } inline void migraphx_to_value(value& v, const context& ctx) { v = ctx.to_value(); }
......
...@@ -37,6 +37,8 @@ namespace gpu { ...@@ -37,6 +37,8 @@ namespace gpu {
struct context; struct context;
std::string hip_error(int error);
argument allocate_gpu(const shape& s, bool host = false); argument allocate_gpu(const shape& s, bool host = false);
argument register_on_gpu(const argument& arg); argument register_on_gpu(const argument& arg);
......
...@@ -50,17 +50,22 @@ struct kernel ...@@ -50,17 +50,22 @@ struct kernel
void launch(hipStream_t stream, void launch(hipStream_t stream,
std::size_t global, std::size_t global,
std::size_t local, std::size_t local,
const std::vector<kernel_argument>& args) const; const std::vector<kernel_argument>& args,
hipEvent_t start = nullptr,
hipEvent_t stop = nullptr) const;
void launch(hipStream_t stream, void launch(hipStream_t stream,
std::size_t global, std::size_t global,
std::size_t local, std::size_t local,
std::vector<void*> args) const; std::vector<void*> args,
hipEvent_t start = nullptr,
hipEvent_t stop = nullptr) const;
auto launch(hipStream_t stream, std::size_t global, std::size_t local) const template <class... Ts>
auto launch(hipStream_t stream, std::size_t global, std::size_t local, Ts... zs) const
{ {
return [=](auto&&... xs) { return [=](auto&&... xs) {
launch(stream, global, local, std::vector<kernel_argument>{xs...}); launch(stream, global, local, std::vector<kernel_argument>{xs...}, zs...);
}; };
} }
......
...@@ -80,7 +80,9 @@ void launch_kernel(hipFunction_t fun, ...@@ -80,7 +80,9 @@ void launch_kernel(hipFunction_t fun,
std::size_t global, std::size_t global,
std::size_t local, std::size_t local,
void* kernargs, void* kernargs,
std::size_t size) std::size_t size,
hipEvent_t start,
hipEvent_t stop)
{ {
assert(global > 0); assert(global > 0);
assert(local > 0); assert(local > 0);
...@@ -97,34 +99,55 @@ void launch_kernel(hipFunction_t fun, ...@@ -97,34 +99,55 @@ void launch_kernel(hipFunction_t fun,
#endif #endif
}; };
auto status = hipExtModuleLaunchKernel( auto status = hipExtModuleLaunchKernel(fun,
fun, global, 1, 1, local, 1, 1, 0, stream, nullptr, reinterpret_cast<void**>(&config)); global,
1,
1,
local,
1,
1,
0,
stream,
nullptr,
reinterpret_cast<void**>(&config),
start,
stop);
if(status != hipSuccess) if(status != hipSuccess)
MIGRAPHX_THROW("Failed to launch kernel: " + hip_error(status)); MIGRAPHX_THROW("Failed to launch kernel: " + hip_error(status));
if(stop)
{
status = hipEventSynchronize(stop);
if(status != hipSuccess)
MIGRAPHX_THROW("Failed to sync event: " + hip_error(status));
}
} }
void kernel::launch(hipStream_t stream, void kernel::launch(hipStream_t stream,
std::size_t global, std::size_t global,
std::size_t local, std::size_t local,
std::vector<void*> args) const std::vector<void*> args,
hipEvent_t start,
hipEvent_t stop) const
{ {
assert(impl != nullptr); assert(impl != nullptr);
void* kernargs = args.data(); void* kernargs = args.data();
std::size_t size = args.size() * sizeof(void*); std::size_t size = args.size() * sizeof(void*);
launch_kernel(impl->fun, stream, global, local, kernargs, size); launch_kernel(impl->fun, stream, global, local, kernargs, size, start, stop);
} }
void kernel::launch(hipStream_t stream, void kernel::launch(hipStream_t stream,
std::size_t global, std::size_t global,
std::size_t local, std::size_t local,
const std::vector<kernel_argument>& args) const const std::vector<kernel_argument>& args,
hipEvent_t start,
hipEvent_t stop) const
{ {
assert(impl != nullptr); assert(impl != nullptr);
std::vector<char> kernargs = pack_args(args); std::vector<char> kernargs = pack_args(args);
std::size_t size = kernargs.size(); std::size_t size = kernargs.size();
launch_kernel(impl->fun, stream, global, local, kernargs.data(), size); launch_kernel(impl->fun, stream, global, local, kernargs.data(), size, start, stop);
} }
} // namespace gpu } // namespace gpu
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment