Commit fa52b516 authored by Paul's avatar Paul
Browse files

Allocate workspace

parent ad21f52f
......@@ -11,11 +11,17 @@ namespace gpu {
using hip_ptr = MIGRAPH_MANAGE_PTR(void, hipFree);
std::string hip_error(int error)
{
return hipGetErrorString(static_cast<hipError_t>(error));
}
hip_ptr allocate_gpu(std::size_t sz)
{
void* result;
// TODO: Check status
hipMalloc(&result, sz);
auto status = hipMalloc(&result, sz);
if(status != hipSuccess)
MIGRAPH_THROW("Gpu allocation failed: " + hip_error(status));
return hip_ptr{result};
}
......@@ -31,34 +37,36 @@ template <class T>
std::vector<T> read_from_gpu(const void* x, std::size_t sz)
{
std::vector<T> result(sz);
// TODO: Check status
hipMemcpy(result.data(), x, sz * sizeof(T), hipMemcpyDeviceToHost);
auto status = hipMemcpy(result.data(), x, sz * sizeof(T), hipMemcpyDeviceToHost);
if(status != hipSuccess)
MIGRAPH_THROW("Copy from gpu failed: " + hip_error(status));
return result;
}
hip_ptr write_to_gpu(const void* x, std::size_t sz)
{
auto result = allocate_gpu(sz);
// TODO: Check status
hipMemcpy(result.get(), x, sz, hipMemcpyHostToDevice);
auto status = hipMemcpy(result.get(), x, sz, hipMemcpyHostToDevice);
if(status != hipSuccess)
MIGRAPH_THROW("Copy to gpu failed: " + hip_error(status));
return result;
}
migraph::argument allocate_gpu(migraph::shape s)
argument allocate_gpu(shape s)
{
auto p = share(allocate_gpu(s.bytes()));
auto p = share(allocate_gpu(s.bytes()+1));
return {s, [p]() mutable { return reinterpret_cast<char*>(p.get()); }};
}
migraph::argument to_gpu(migraph::argument arg)
argument to_gpu(argument arg)
{
auto p = share(write_to_gpu(arg.data(), arg.get_shape().bytes()));
return {arg.get_shape(), [p]() mutable { return reinterpret_cast<char*>(p.get()); }};
}
migraph::argument from_gpu(migraph::argument arg)
argument from_gpu(argument arg)
{
migraph::argument result;
argument result;
arg.visit([&](auto x) {
using type = typename decltype(x)::value_type;
auto v = read_from_gpu<type>(arg.data(), x.get_shape().bytes() / sizeof(type));
......
......@@ -9,7 +9,7 @@ namespace gpu {
struct target
{
std::string name() const;
std::vector<pass> get_passes(migraph::context& ctx) const;
std::vector<pass> get_passes(migraph::context& gctx) const;
migraph::context get_context() const;
};
......
......@@ -68,7 +68,7 @@ struct miopen_convolution
std::string name() const { return "gpu::convolution"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(3).standard();
check_shapes{inputs, *this}.has(4).standard();
return op.compute_shape({inputs.at(0), inputs.at(1)});
}
argument compute(context& ctx, shape output_shape, std::vector<argument> args) const
......@@ -88,21 +88,27 @@ struct miopen_convolution
algo,
&beta,
y_desc.get(),
args[3].implicit(),
args[2].implicit(),
nullptr,
0);
return args[2];
args[2].get_shape().bytes());
return args[3];
}
void compile(context& ctx, shape output_shape, std::vector<instruction_ref> inputs)
shape compile(context& ctx, shape output_shape, std::vector<instruction_ref> inputs)
{
shape workspace_shape{};
auto x_desc = make_tensor(inputs[0]->get_shape());
auto w_desc = make_tensor(inputs[1]->get_shape());
auto y_desc = make_tensor(output_shape);
std::size_t workspace_size = 0;
miopenConvolutionForwardGetWorkSpaceSize(ctx.handle.get(), x_desc.get(), w_desc.get(), cd.get(), y_desc.get(), &workspace_size);
workspace_shape = shape{shape::int8_type, {workspace_size}};
auto x = to_gpu(generate_argument(inputs[0]->get_shape()));
auto w = to_gpu(generate_argument(inputs[1]->get_shape()));
auto y = to_gpu(generate_argument(output_shape));
auto workspace = allocate_gpu(workspace_shape);
int algo_count;
miopenConvAlgoPerf_t perf;
......@@ -117,10 +123,11 @@ struct miopen_convolution
1,
&algo_count,
&perf,
nullptr,
0,
workspace.implicit(),
workspace_size,
false);
algo = perf.fwd_algo;
return workspace_shape;
}
};
......@@ -346,11 +353,14 @@ struct miopen_apply
void apply_convolution(instruction_ref ins)
{
auto&& op = any_cast<convolution>(ins->op);
auto conv = miopen_convolution{op, make_conv(op)};
conv.compile(ctx, ins->result, ins->arguments);
auto ws = conv.compile(ctx, ins->result, ins->arguments);
auto workspace = insert_allocation(ins, ws);
auto output = insert_allocation(ins, ins->result);
prog->replace_instruction(ins, conv, ins->arguments.at(0), ins->arguments.at(1), output);
prog->replace_instruction(ins, conv, ins->arguments.at(0), ins->arguments.at(1), workspace, output);
}
void apply_pooling(instruction_ref ins)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment