"src/targets/ref/lowering.cpp" did not exist on "9174b783e42056c84690da54beb9eab94cf7f163"
Commit 6df03163 authored by Paul's avatar Paul
Browse files

Add stream and device management

parent ceaf5ee0
......@@ -34,7 +34,7 @@ argument miopen_add::compute(context& ctx,
auto a_desc = make_tensor(args[0].get_shape());
auto b_desc = make_tensor(args[1].get_shape());
auto c_desc = make_tensor(output_shape);
miopenOpTensor(ctx.handle.get(),
miopenOpTensor(ctx.get_stream().get_miopen(),
miopenTensorOpAdd,
&alpha,
a_desc.get(),
......
......@@ -23,7 +23,7 @@ argument miopen_batch_norm_inference::compute(context& ctx,
float alpha = 1.0, beta = 0.0f;
miopenBatchNormalizationForwardInference(ctx.handle.get(),
miopenBatchNormalizationForwardInference(ctx.get_stream().get_miopen(),
miopenBatchNormMode_t(op.bn_mode),
&alpha,
&beta,
......
......@@ -21,7 +21,7 @@ argument miopen_convolution::compute(context& ctx,
auto y_desc = make_tensor(output_shape);
float alpha = 1, beta = 0;
miopenConvolutionForward(ctx.handle.get(),
miopenConvolutionForward(ctx.get_stream().get_miopen(),
&alpha,
x_desc.get(),
args[0].implicit(),
......@@ -48,7 +48,7 @@ shape miopen_convolution::compile(context& ctx,
std::size_t workspace_size = 0;
miopenConvolutionForwardGetWorkSpaceSize(
ctx.handle.get(), w_desc.get(), x_desc.get(), cd.get(), y_desc.get(), &workspace_size);
ctx.get_stream().get_miopen(), w_desc.get(), x_desc.get(), cd.get(), y_desc.get(), &workspace_size);
workspace_shape = shape{shape::int8_type, {workspace_size}};
auto x = to_gpu(generate_argument(inputs[0]->get_shape()));
......@@ -58,7 +58,7 @@ shape miopen_convolution::compile(context& ctx,
int algo_count = 1;
miopenConvAlgoPerf_t perf;
miopenFindConvolutionForwardAlgorithm(ctx.handle.get(),
miopenFindConvolutionForwardAlgorithm(ctx.get_stream().get_miopen(),
x_desc.get(),
x.implicit(),
w_desc.get(),
......
......@@ -82,13 +82,13 @@ struct fusion
// int algo_count = 1;
// miopenConvFwdAlgorithm_t algo;
// miopenFusionPlanConvolutionGetAlgo(fp.get(), 1, &algo_count, &algo);
// miopenFusionPlanGetWorkSpaceSize(ctx.handle.get(), fp.get(), &ws_size, algo);
// miopenFusionPlanGetWorkSpaceSize(ctx.get_stream().get_miopen(), fp.get(), &ws_size, algo);
return shape{shape::int8_type, {ws_size}};
}
void compile(context& ctx)
{
auto status = miopenCompileFusionPlan(ctx.handle.get(), fp.get());
auto status = miopenCompileFusionPlan(ctx.get_stream().get_miopen(), fp.get());
if(status != miopenStatusSuccess)
MIGRAPH_THROW("Compiling fusion plan failed");
}
......@@ -100,7 +100,7 @@ struct fusion
{
auto x_td = make_tensor(x.get_shape());
auto y_td = make_tensor(y.get_shape());
auto status = miopenExecuteFusionPlan(ctx.handle.get(),
auto status = miopenExecuteFusionPlan(ctx.get_stream().get_miopen(),
fp.get(),
x_td.get(),
x.implicit(),
......
......@@ -26,7 +26,7 @@ argument miopen_gemm::compute(context& ctx,
rocblas_int m = output_shape.lens()[0];
rocblas_int n = output_shape.lens()[1];
rocblas_int k = args[0].get_shape().lens()[1];
rocblas_sgemm(ctx.rbhandle.get(),
rocblas_sgemm(ctx.get_stream().get_rocblas(),
transb ? rocblas_operation_transpose : rocblas_operation_none,
transa ? rocblas_operation_transpose : rocblas_operation_none,
n,
......
......@@ -88,6 +88,13 @@ argument from_gpu(argument arg)
return result;
}
void set_device(std::size_t id)
{
auto status = hipSetDevice(id);
if(status != hipSuccess)
MIGRAPH_THROW("Error setting device");
}
void gpu_sync() { hipDeviceSynchronize(); }
void copy_to_gpu(argument src, argument dst)
......
......@@ -8,13 +8,116 @@
namespace migraph {
namespace gpu {
struct hip_device
{
hip_device()
{
add_stream();
}
hip_device(std::size_t id)
: device_id(id)
{
add_stream();
}
struct stream
{
using hip_stream_ptr = MIGRAPH_MANAGE_PTR(hipStream_t, hipStreamDestroy);
stream()
{}
stream(std::size_t device_number)
: id(device_number)
{}
static hip_stream_ptr create_stream()
{
hipStream_t result = nullptr;
auto status = hipStreamCreate(&result);
if(status != hipSuccess)
MIGRAPH_THROW("Failed to allocate stream");
return hip_stream_ptr{result};
}
auto get()
{
set_device(id);
if(s == nullptr)
s = create_stream();
assert(s.get() != nullptr);
return s.get();
}
auto get_miopen()
{
set_device(id);
if(mihandle == nullptr)
mihandle = make_obj<miopen_handle>(&miopenCreateWithStream, get());
assert(mihandle.get() != nullptr);
return mihandle.get();
}
auto get_rocblas()
{
set_device(id);
if(rbhandle == nullptr)
rbhandle = create_rocblas_handle_ptr(get());
assert(rbhandle.get() != nullptr);
return rbhandle.get();
}
private:
std::size_t id = 0;
shared<hip_stream_ptr> s = nullptr;
shared<miopen_handle> mihandle = nullptr;
shared<rocblas_handle_ptr> rbhandle = nullptr;
};
void add_stream()
{
streams.emplace_back(device_id);
}
stream& get_stream()
{
return streams.at(current_stream);
}
void set_stream(std::size_t n)
{
current_stream = n;
}
private:
std::size_t device_id = 0;
std::size_t current_stream = 0;
std::vector<stream> streams;
};
struct context
{
shared<miopen_handle> handle;
shared<rocblas_handle_ptr> rbhandle;
argument scratch;
context(std::size_t n=0)
: current_device(std::make_shared<hip_device>(n))
{}
hip_device& get_current_device()
{
assert(current_device != nullptr);
return *current_device;
}
hip_device::stream& get_stream()
{
return get_current_device().get_stream();
}
std::vector<argument> literals{};
void finish() const { gpu_sync(); }
private:
// TODO: Make this a vector to support multiple devices
std::shared_ptr<hip_device> current_device;
};
} // namespace gpu
} // namespace migraph
......
......@@ -13,6 +13,8 @@ migraph::argument to_gpu(migraph::argument arg, bool host = false);
migraph::argument from_gpu(migraph::argument arg);
void set_device(std::size_t id);
void gpu_sync();
void copy_to_gpu(argument src, argument dst);
......
......@@ -11,6 +11,7 @@ namespace gpu {
using rocblas_handle_ptr = MIGRAPH_MANAGE_PTR(rocblas_handle, rocblas_destroy_handle);
rocblas_handle_ptr create_rocblas_handle_ptr();
rocblas_handle_ptr create_rocblas_handle_ptr(hipStream_t s);
} // namespace gpu
......
......@@ -20,7 +20,7 @@ argument miopen_leaky_relu::compute(context& ctx,
float alpha = 1, beta = 0;
auto x_desc = make_tensor(args[0].get_shape());
auto y_desc = make_tensor(output_shape);
miopenActivationForward(ctx.handle.get(),
miopenActivationForward(ctx.get_stream().get_miopen(),
ad.get(),
&alpha,
x_desc.get(),
......
......@@ -21,7 +21,7 @@ argument miopen_pooling::compute(context& ctx,
float alpha = 1, beta = 0;
miopenPoolingForward(ctx.handle.get(),
miopenPoolingForward(ctx.get_stream().get_miopen(),
pd.get(),
&alpha,
x_desc.get(),
......
......@@ -20,7 +20,7 @@ argument miopen_relu::compute(context& ctx,
float alpha = 1, beta = 0;
auto x_desc = make_tensor(args[0].get_shape());
auto y_desc = make_tensor(output_shape);
miopenActivationForward(ctx.handle.get(),
miopenActivationForward(ctx.get_stream().get_miopen(),
ad.get(),
&alpha,
x_desc.get(),
......
......@@ -10,6 +10,13 @@ rocblas_handle_ptr create_rocblas_handle_ptr()
return rocblas_handle_ptr{handle};
}
rocblas_handle_ptr create_rocblas_handle_ptr(hipStream_t s)
{
rocblas_handle_ptr rb = create_rocblas_handle_ptr();
rocblas_set_stream(rb.get(), s);
return rb;
}
} // namespace gpu
} // namespace migraph
......@@ -20,7 +20,7 @@ argument miopen_softmax::compute(context& ctx,
float alpha = 1, beta = 0;
auto x_desc = make_tensor(args[0].get_shape());
auto y_desc = make_tensor(output_shape);
miopenSoftmaxForward(ctx.handle.get(),
miopenSoftmaxForward(ctx.get_stream().get_miopen(),
&alpha,
x_desc.get(),
args[0].implicit(),
......
......@@ -56,8 +56,7 @@ std::string target::name() const { return "miopen"; }
migraph::context target::get_context() const
{
return context{
share(make_obj<miopen_handle>(&miopenCreate)), share(create_rocblas_handle_ptr()), {}};
return context{};
}
} // namespace gpu
} // namespace migraph
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment