"mmdet3d/ops/vscode:/vscode.git/clone" did not exist on "e6d0e214c3d2fe7db4db33b54e3f8de89e29b480"
Commit fa52b516 authored by Paul's avatar Paul
Browse files

Allocate workspace

parent ad21f52f
...@@ -11,11 +11,17 @@ namespace gpu { ...@@ -11,11 +11,17 @@ namespace gpu {
using hip_ptr = MIGRAPH_MANAGE_PTR(void, hipFree); using hip_ptr = MIGRAPH_MANAGE_PTR(void, hipFree);
std::string hip_error(int error)
{
return hipGetErrorString(static_cast<hipError_t>(error));
}
hip_ptr allocate_gpu(std::size_t sz) hip_ptr allocate_gpu(std::size_t sz)
{ {
void* result; void* result;
// TODO: Check status auto status = hipMalloc(&result, sz);
hipMalloc(&result, sz); if(status != hipSuccess)
MIGRAPH_THROW("Gpu allocation failed: " + hip_error(status));
return hip_ptr{result}; return hip_ptr{result};
} }
...@@ -31,34 +37,36 @@ template <class T> ...@@ -31,34 +37,36 @@ template <class T>
std::vector<T> read_from_gpu(const void* x, std::size_t sz) std::vector<T> read_from_gpu(const void* x, std::size_t sz)
{ {
std::vector<T> result(sz); std::vector<T> result(sz);
// TODO: Check status auto status = hipMemcpy(result.data(), x, sz * sizeof(T), hipMemcpyDeviceToHost);
hipMemcpy(result.data(), x, sz * sizeof(T), hipMemcpyDeviceToHost); if(status != hipSuccess)
MIGRAPH_THROW("Copy from gpu failed: " + hip_error(status));
return result; return result;
} }
hip_ptr write_to_gpu(const void* x, std::size_t sz) hip_ptr write_to_gpu(const void* x, std::size_t sz)
{ {
auto result = allocate_gpu(sz); auto result = allocate_gpu(sz);
// TODO: Check status auto status = hipMemcpy(result.get(), x, sz, hipMemcpyHostToDevice);
hipMemcpy(result.get(), x, sz, hipMemcpyHostToDevice); if(status != hipSuccess)
MIGRAPH_THROW("Copy to gpu failed: " + hip_error(status));
return result; return result;
} }
migraph::argument allocate_gpu(migraph::shape s) argument allocate_gpu(shape s)
{ {
auto p = share(allocate_gpu(s.bytes())); auto p = share(allocate_gpu(s.bytes()+1));
return {s, [p]() mutable { return reinterpret_cast<char*>(p.get()); }}; return {s, [p]() mutable { return reinterpret_cast<char*>(p.get()); }};
} }
migraph::argument to_gpu(migraph::argument arg) argument to_gpu(argument arg)
{ {
auto p = share(write_to_gpu(arg.data(), arg.get_shape().bytes())); auto p = share(write_to_gpu(arg.data(), arg.get_shape().bytes()));
return {arg.get_shape(), [p]() mutable { return reinterpret_cast<char*>(p.get()); }}; return {arg.get_shape(), [p]() mutable { return reinterpret_cast<char*>(p.get()); }};
} }
migraph::argument from_gpu(migraph::argument arg) argument from_gpu(argument arg)
{ {
migraph::argument result; argument result;
arg.visit([&](auto x) { arg.visit([&](auto x) {
using type = typename decltype(x)::value_type; using type = typename decltype(x)::value_type;
auto v = read_from_gpu<type>(arg.data(), x.get_shape().bytes() / sizeof(type)); auto v = read_from_gpu<type>(arg.data(), x.get_shape().bytes() / sizeof(type));
......
...@@ -9,7 +9,7 @@ namespace gpu { ...@@ -9,7 +9,7 @@ namespace gpu {
struct target struct target
{ {
std::string name() const; std::string name() const;
std::vector<pass> get_passes(migraph::context& ctx) const; std::vector<pass> get_passes(migraph::context& gctx) const;
migraph::context get_context() const; migraph::context get_context() const;
}; };
......
...@@ -68,7 +68,7 @@ struct miopen_convolution ...@@ -68,7 +68,7 @@ struct miopen_convolution
std::string name() const { return "gpu::convolution"; } std::string name() const { return "gpu::convolution"; }
shape compute_shape(std::vector<shape> inputs) const shape compute_shape(std::vector<shape> inputs) const
{ {
check_shapes{inputs, *this}.has(3).standard(); check_shapes{inputs, *this}.has(4).standard();
return op.compute_shape({inputs.at(0), inputs.at(1)}); return op.compute_shape({inputs.at(0), inputs.at(1)});
} }
argument compute(context& ctx, shape output_shape, std::vector<argument> args) const argument compute(context& ctx, shape output_shape, std::vector<argument> args) const
...@@ -88,21 +88,27 @@ struct miopen_convolution ...@@ -88,21 +88,27 @@ struct miopen_convolution
algo, algo,
&beta, &beta,
y_desc.get(), y_desc.get(),
args[3].implicit(),
args[2].implicit(), args[2].implicit(),
nullptr, args[2].get_shape().bytes());
0); return args[3];
return args[2];
} }
void compile(context& ctx, shape output_shape, std::vector<instruction_ref> inputs) shape compile(context& ctx, shape output_shape, std::vector<instruction_ref> inputs)
{ {
shape workspace_shape{};
auto x_desc = make_tensor(inputs[0]->get_shape()); auto x_desc = make_tensor(inputs[0]->get_shape());
auto w_desc = make_tensor(inputs[1]->get_shape()); auto w_desc = make_tensor(inputs[1]->get_shape());
auto y_desc = make_tensor(output_shape); auto y_desc = make_tensor(output_shape);
std::size_t workspace_size = 0;
miopenConvolutionForwardGetWorkSpaceSize(ctx.handle.get(), x_desc.get(), w_desc.get(), cd.get(), y_desc.get(), &workspace_size);
workspace_shape = shape{shape::int8_type, {workspace_size}};
auto x = to_gpu(generate_argument(inputs[0]->get_shape())); auto x = to_gpu(generate_argument(inputs[0]->get_shape()));
auto w = to_gpu(generate_argument(inputs[1]->get_shape())); auto w = to_gpu(generate_argument(inputs[1]->get_shape()));
auto y = to_gpu(generate_argument(output_shape)); auto y = to_gpu(generate_argument(output_shape));
auto workspace = allocate_gpu(workspace_shape);
int algo_count; int algo_count;
miopenConvAlgoPerf_t perf; miopenConvAlgoPerf_t perf;
...@@ -117,10 +123,11 @@ struct miopen_convolution ...@@ -117,10 +123,11 @@ struct miopen_convolution
1, 1,
&algo_count, &algo_count,
&perf, &perf,
nullptr, workspace.implicit(),
0, workspace_size,
false); false);
algo = perf.fwd_algo; algo = perf.fwd_algo;
return workspace_shape;
} }
}; };
...@@ -346,11 +353,14 @@ struct miopen_apply ...@@ -346,11 +353,14 @@ struct miopen_apply
void apply_convolution(instruction_ref ins) void apply_convolution(instruction_ref ins)
{ {
auto&& op = any_cast<convolution>(ins->op); auto&& op = any_cast<convolution>(ins->op);
auto conv = miopen_convolution{op, make_conv(op)}; auto conv = miopen_convolution{op, make_conv(op)};
conv.compile(ctx, ins->result, ins->arguments); auto ws = conv.compile(ctx, ins->result, ins->arguments);
auto workspace = insert_allocation(ins, ws);
auto output = insert_allocation(ins, ins->result); auto output = insert_allocation(ins, ins->result);
prog->replace_instruction(ins, conv, ins->arguments.at(0), ins->arguments.at(1), output); prog->replace_instruction(ins, conv, ins->arguments.at(0), ins->arguments.at(1), workspace, output);
} }
void apply_pooling(instruction_ref ins) void apply_pooling(instruction_ref ins)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment