Commit 6df03163 authored by Paul's avatar Paul
Browse files

Add stream and device management

parent ceaf5ee0
...@@ -34,7 +34,7 @@ argument miopen_add::compute(context& ctx, ...@@ -34,7 +34,7 @@ argument miopen_add::compute(context& ctx,
auto a_desc = make_tensor(args[0].get_shape()); auto a_desc = make_tensor(args[0].get_shape());
auto b_desc = make_tensor(args[1].get_shape()); auto b_desc = make_tensor(args[1].get_shape());
auto c_desc = make_tensor(output_shape); auto c_desc = make_tensor(output_shape);
miopenOpTensor(ctx.handle.get(), miopenOpTensor(ctx.get_stream().get_miopen(),
miopenTensorOpAdd, miopenTensorOpAdd,
&alpha, &alpha,
a_desc.get(), a_desc.get(),
......
...@@ -23,7 +23,7 @@ argument miopen_batch_norm_inference::compute(context& ctx, ...@@ -23,7 +23,7 @@ argument miopen_batch_norm_inference::compute(context& ctx,
float alpha = 1.0, beta = 0.0f; float alpha = 1.0, beta = 0.0f;
miopenBatchNormalizationForwardInference(ctx.handle.get(), miopenBatchNormalizationForwardInference(ctx.get_stream().get_miopen(),
miopenBatchNormMode_t(op.bn_mode), miopenBatchNormMode_t(op.bn_mode),
&alpha, &alpha,
&beta, &beta,
......
...@@ -21,7 +21,7 @@ argument miopen_convolution::compute(context& ctx, ...@@ -21,7 +21,7 @@ argument miopen_convolution::compute(context& ctx,
auto y_desc = make_tensor(output_shape); auto y_desc = make_tensor(output_shape);
float alpha = 1, beta = 0; float alpha = 1, beta = 0;
miopenConvolutionForward(ctx.handle.get(), miopenConvolutionForward(ctx.get_stream().get_miopen(),
&alpha, &alpha,
x_desc.get(), x_desc.get(),
args[0].implicit(), args[0].implicit(),
...@@ -48,7 +48,7 @@ shape miopen_convolution::compile(context& ctx, ...@@ -48,7 +48,7 @@ shape miopen_convolution::compile(context& ctx,
std::size_t workspace_size = 0; std::size_t workspace_size = 0;
miopenConvolutionForwardGetWorkSpaceSize( miopenConvolutionForwardGetWorkSpaceSize(
ctx.handle.get(), w_desc.get(), x_desc.get(), cd.get(), y_desc.get(), &workspace_size); ctx.get_stream().get_miopen(), w_desc.get(), x_desc.get(), cd.get(), y_desc.get(), &workspace_size);
workspace_shape = shape{shape::int8_type, {workspace_size}}; workspace_shape = shape{shape::int8_type, {workspace_size}};
auto x = to_gpu(generate_argument(inputs[0]->get_shape())); auto x = to_gpu(generate_argument(inputs[0]->get_shape()));
...@@ -58,7 +58,7 @@ shape miopen_convolution::compile(context& ctx, ...@@ -58,7 +58,7 @@ shape miopen_convolution::compile(context& ctx,
int algo_count = 1; int algo_count = 1;
miopenConvAlgoPerf_t perf; miopenConvAlgoPerf_t perf;
miopenFindConvolutionForwardAlgorithm(ctx.handle.get(), miopenFindConvolutionForwardAlgorithm(ctx.get_stream().get_miopen(),
x_desc.get(), x_desc.get(),
x.implicit(), x.implicit(),
w_desc.get(), w_desc.get(),
......
...@@ -82,13 +82,13 @@ struct fusion ...@@ -82,13 +82,13 @@ struct fusion
// int algo_count = 1; // int algo_count = 1;
// miopenConvFwdAlgorithm_t algo; // miopenConvFwdAlgorithm_t algo;
// miopenFusionPlanConvolutionGetAlgo(fp.get(), 1, &algo_count, &algo); // miopenFusionPlanConvolutionGetAlgo(fp.get(), 1, &algo_count, &algo);
// miopenFusionPlanGetWorkSpaceSize(ctx.handle.get(), fp.get(), &ws_size, algo); // miopenFusionPlanGetWorkSpaceSize(ctx.get_stream().get_miopen(), fp.get(), &ws_size, algo);
return shape{shape::int8_type, {ws_size}}; return shape{shape::int8_type, {ws_size}};
} }
void compile(context& ctx) void compile(context& ctx)
{ {
auto status = miopenCompileFusionPlan(ctx.handle.get(), fp.get()); auto status = miopenCompileFusionPlan(ctx.get_stream().get_miopen(), fp.get());
if(status != miopenStatusSuccess) if(status != miopenStatusSuccess)
MIGRAPH_THROW("Compiling fusion plan failed"); MIGRAPH_THROW("Compiling fusion plan failed");
} }
...@@ -100,7 +100,7 @@ struct fusion ...@@ -100,7 +100,7 @@ struct fusion
{ {
auto x_td = make_tensor(x.get_shape()); auto x_td = make_tensor(x.get_shape());
auto y_td = make_tensor(y.get_shape()); auto y_td = make_tensor(y.get_shape());
auto status = miopenExecuteFusionPlan(ctx.handle.get(), auto status = miopenExecuteFusionPlan(ctx.get_stream().get_miopen(),
fp.get(), fp.get(),
x_td.get(), x_td.get(),
x.implicit(), x.implicit(),
......
...@@ -26,7 +26,7 @@ argument miopen_gemm::compute(context& ctx, ...@@ -26,7 +26,7 @@ argument miopen_gemm::compute(context& ctx,
rocblas_int m = output_shape.lens()[0]; rocblas_int m = output_shape.lens()[0];
rocblas_int n = output_shape.lens()[1]; rocblas_int n = output_shape.lens()[1];
rocblas_int k = args[0].get_shape().lens()[1]; rocblas_int k = args[0].get_shape().lens()[1];
rocblas_sgemm(ctx.rbhandle.get(), rocblas_sgemm(ctx.get_stream().get_rocblas(),
transb ? rocblas_operation_transpose : rocblas_operation_none, transb ? rocblas_operation_transpose : rocblas_operation_none,
transa ? rocblas_operation_transpose : rocblas_operation_none, transa ? rocblas_operation_transpose : rocblas_operation_none,
n, n,
......
...@@ -88,6 +88,13 @@ argument from_gpu(argument arg) ...@@ -88,6 +88,13 @@ argument from_gpu(argument arg)
return result; return result;
} }
void set_device(std::size_t id)
{
auto status = hipSetDevice(id);
if(status != hipSuccess)
MIGRAPH_THROW("Error setting device");
}
void gpu_sync() { hipDeviceSynchronize(); } void gpu_sync() { hipDeviceSynchronize(); }
void copy_to_gpu(argument src, argument dst) void copy_to_gpu(argument src, argument dst)
......
...@@ -8,13 +8,116 @@ ...@@ -8,13 +8,116 @@
namespace migraph { namespace migraph {
namespace gpu { namespace gpu {
struct hip_device
{
hip_device()
{
add_stream();
}
hip_device(std::size_t id)
: device_id(id)
{
add_stream();
}
struct stream
{
using hip_stream_ptr = MIGRAPH_MANAGE_PTR(hipStream_t, hipStreamDestroy);
stream()
{}
stream(std::size_t device_number)
: id(device_number)
{}
static hip_stream_ptr create_stream()
{
hipStream_t result = nullptr;
auto status = hipStreamCreate(&result);
if(status != hipSuccess)
MIGRAPH_THROW("Failed to allocate stream");
return hip_stream_ptr{result};
}
auto get()
{
set_device(id);
if(s == nullptr)
s = create_stream();
assert(s.get() != nullptr);
return s.get();
}
auto get_miopen()
{
set_device(id);
if(mihandle == nullptr)
mihandle = make_obj<miopen_handle>(&miopenCreateWithStream, get());
assert(mihandle.get() != nullptr);
return mihandle.get();
}
auto get_rocblas()
{
set_device(id);
if(rbhandle == nullptr)
rbhandle = create_rocblas_handle_ptr(get());
assert(rbhandle.get() != nullptr);
return rbhandle.get();
}
private:
std::size_t id = 0;
shared<hip_stream_ptr> s = nullptr;
shared<miopen_handle> mihandle = nullptr;
shared<rocblas_handle_ptr> rbhandle = nullptr;
};
void add_stream()
{
streams.emplace_back(device_id);
}
stream& get_stream()
{
return streams.at(current_stream);
}
void set_stream(std::size_t n)
{
current_stream = n;
}
private:
std::size_t device_id = 0;
std::size_t current_stream = 0;
std::vector<stream> streams;
};
struct context struct context
{ {
shared<miopen_handle> handle; context(std::size_t n=0)
shared<rocblas_handle_ptr> rbhandle; : current_device(std::make_shared<hip_device>(n))
argument scratch; {}
hip_device& get_current_device()
{
assert(current_device != nullptr);
return *current_device;
}
hip_device::stream& get_stream()
{
return get_current_device().get_stream();
}
std::vector<argument> literals{}; std::vector<argument> literals{};
void finish() const { gpu_sync(); } void finish() const { gpu_sync(); }
private:
// TODO: Make this a vector to support multiple devices
std::shared_ptr<hip_device> current_device;
}; };
} // namespace gpu } // namespace gpu
} // namespace migraph } // namespace migraph
......
...@@ -13,6 +13,8 @@ migraph::argument to_gpu(migraph::argument arg, bool host = false); ...@@ -13,6 +13,8 @@ migraph::argument to_gpu(migraph::argument arg, bool host = false);
migraph::argument from_gpu(migraph::argument arg); migraph::argument from_gpu(migraph::argument arg);
void set_device(std::size_t id);
void gpu_sync(); void gpu_sync();
void copy_to_gpu(argument src, argument dst); void copy_to_gpu(argument src, argument dst);
......
...@@ -11,6 +11,7 @@ namespace gpu { ...@@ -11,6 +11,7 @@ namespace gpu {
using rocblas_handle_ptr = MIGRAPH_MANAGE_PTR(rocblas_handle, rocblas_destroy_handle); using rocblas_handle_ptr = MIGRAPH_MANAGE_PTR(rocblas_handle, rocblas_destroy_handle);
rocblas_handle_ptr create_rocblas_handle_ptr(); rocblas_handle_ptr create_rocblas_handle_ptr();
rocblas_handle_ptr create_rocblas_handle_ptr(hipStream_t s);
} // namespace gpu } // namespace gpu
......
...@@ -20,7 +20,7 @@ argument miopen_leaky_relu::compute(context& ctx, ...@@ -20,7 +20,7 @@ argument miopen_leaky_relu::compute(context& ctx,
float alpha = 1, beta = 0; float alpha = 1, beta = 0;
auto x_desc = make_tensor(args[0].get_shape()); auto x_desc = make_tensor(args[0].get_shape());
auto y_desc = make_tensor(output_shape); auto y_desc = make_tensor(output_shape);
miopenActivationForward(ctx.handle.get(), miopenActivationForward(ctx.get_stream().get_miopen(),
ad.get(), ad.get(),
&alpha, &alpha,
x_desc.get(), x_desc.get(),
......
...@@ -21,7 +21,7 @@ argument miopen_pooling::compute(context& ctx, ...@@ -21,7 +21,7 @@ argument miopen_pooling::compute(context& ctx,
float alpha = 1, beta = 0; float alpha = 1, beta = 0;
miopenPoolingForward(ctx.handle.get(), miopenPoolingForward(ctx.get_stream().get_miopen(),
pd.get(), pd.get(),
&alpha, &alpha,
x_desc.get(), x_desc.get(),
......
...@@ -20,7 +20,7 @@ argument miopen_relu::compute(context& ctx, ...@@ -20,7 +20,7 @@ argument miopen_relu::compute(context& ctx,
float alpha = 1, beta = 0; float alpha = 1, beta = 0;
auto x_desc = make_tensor(args[0].get_shape()); auto x_desc = make_tensor(args[0].get_shape());
auto y_desc = make_tensor(output_shape); auto y_desc = make_tensor(output_shape);
miopenActivationForward(ctx.handle.get(), miopenActivationForward(ctx.get_stream().get_miopen(),
ad.get(), ad.get(),
&alpha, &alpha,
x_desc.get(), x_desc.get(),
......
...@@ -10,6 +10,13 @@ rocblas_handle_ptr create_rocblas_handle_ptr() ...@@ -10,6 +10,13 @@ rocblas_handle_ptr create_rocblas_handle_ptr()
return rocblas_handle_ptr{handle}; return rocblas_handle_ptr{handle};
} }
rocblas_handle_ptr create_rocblas_handle_ptr(hipStream_t s)
{
rocblas_handle_ptr rb = create_rocblas_handle_ptr();
rocblas_set_stream(rb.get(), s);
return rb;
}
} // namespace gpu } // namespace gpu
} // namespace migraph } // namespace migraph
...@@ -20,7 +20,7 @@ argument miopen_softmax::compute(context& ctx, ...@@ -20,7 +20,7 @@ argument miopen_softmax::compute(context& ctx,
float alpha = 1, beta = 0; float alpha = 1, beta = 0;
auto x_desc = make_tensor(args[0].get_shape()); auto x_desc = make_tensor(args[0].get_shape());
auto y_desc = make_tensor(output_shape); auto y_desc = make_tensor(output_shape);
miopenSoftmaxForward(ctx.handle.get(), miopenSoftmaxForward(ctx.get_stream().get_miopen(),
&alpha, &alpha,
x_desc.get(), x_desc.get(),
args[0].implicit(), args[0].implicit(),
......
...@@ -56,8 +56,7 @@ std::string target::name() const { return "miopen"; } ...@@ -56,8 +56,7 @@ std::string target::name() const { return "miopen"; }
migraph::context target::get_context() const migraph::context target::get_context() const
{ {
return context{ return context{};
share(make_obj<miopen_handle>(&miopenCreate)), share(create_rocblas_handle_ptr()), {}};
} }
} // namespace gpu } // namespace gpu
} // namespace migraph } // namespace migraph
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment