Unverified Commit e19f78ae authored by Umang Yadav's avatar Umang Yadav Committed by GitHub
Browse files

Use find_2.0 API for the convolution (#1346)

Improvements/Additions to be made:

changes for the quant_convolution,
changes for the deconvolution,
Macros for MIOpen status checks
parent c2842c1e
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include <algorithm> #include <algorithm>
#include <migraphx/rank.hpp> #include <migraphx/rank.hpp>
#include <migraphx/config.hpp> #include <migraphx/config.hpp>
#include <vector>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -59,28 +60,35 @@ inline stream_range_container<Range> stream_range(const Range& r) ...@@ -59,28 +60,35 @@ inline stream_range_container<Range> stream_range(const Range& r)
namespace detail { namespace detail {
inline void stream_write_value_impl(rank<2>, std::ostream& os, const std::string& x) { os << x; } template <class T>
auto stream_write_value_impl(rank<1>, std::ostream& os, const T& x) -> decltype(os << x, void())
{
os << x;
}
template <class Range> template <class T>
auto stream_write_value_impl(rank<1>, std::ostream& os, const Range& r) void stream_write_value_impl(rank<1>, std::ostream& os, const std::vector<T>& r)
-> decltype(r.begin(), r.end(), void())
{ {
os << "{"; os << "{";
os << stream_range(r); os << stream_range(r);
os << "}"; os << "}";
} }
template <class T> template <class Range>
void stream_write_value_impl(rank<0>, std::ostream& os, const T& x) auto stream_write_value_impl(rank<0>, std::ostream& os, const Range& r)
-> decltype(r.begin(), r.end(), void())
{ {
os << x; os << "{";
os << stream_range(r);
os << "}";
} }
} // namespace detail } // namespace detail
template <class T> template <class T>
void stream_write_value(std::ostream& os, const T& x) void stream_write_value(std::ostream& os, const T& x)
{ {
detail::stream_write_value_impl(rank<2>{}, os, x); detail::stream_write_value_impl(rank<1>{}, os, x);
} }
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
......
...@@ -184,6 +184,12 @@ struct value ...@@ -184,6 +184,12 @@ struct value
{ {
} }
explicit binary(std::size_t s) : base(s) {} explicit binary(std::size_t s) : base(s) {}
friend std::ostream& operator<<(std::ostream& os, const binary& obj)
{
os << "{binary_object: " << obj.size() << "}";
return os;
}
}; };
value() = default; value() = default;
......
...@@ -239,9 +239,18 @@ endif() ...@@ -239,9 +239,18 @@ endif()
include(CheckLibraryExists) include(CheckLibraryExists)
get_target_property(MIOPEN_LOCATION MIOpen LOCATION) get_target_property(MIOPEN_LOCATION MIOpen LOCATION)
check_library_exists(MIOpen "miopenHiddenSetConvolutionFindMode" "${MIOPEN_LOCATION}" HAS_FIND_MODE_API) check_library_exists(MIOpen "miopenHiddenSetConvolutionFindMode" "${MIOPEN_LOCATION}" HAS_FIND_MODE_API)
check_library_exists(MIOpen "miopenFindSolutions" "${MIOPEN_LOCATION}" HAS_FIND_2_API)
if(HAS_FIND_2_API)
target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_2_API)
message(STATUS "MIGraphx is using Find-2.0 API of MIOpen")
else()
message(STATUS "MIOpen does not have Find-2.0 API")
endif()
if(HAS_FIND_MODE_API) if(HAS_FIND_MODE_API)
target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_MODE_API) target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_MODE_API)
message(STATUS "MIOpen has find mode api") message(STATUS "MIGraphx is using Find Mode API of MIOpen")
else() else()
message(STATUS "MIOpen does not have find mode api") message(STATUS "MIOpen does not have find mode api")
endif() endif()
......
...@@ -131,7 +131,7 @@ preload preload::broadcasts(std::size_t axis, const std::vector<shape>& inputs) ...@@ -131,7 +131,7 @@ preload preload::broadcasts(std::size_t axis, const std::vector<shape>& inputs)
std::size_t bytes = 0; std::size_t bytes = 0;
for(auto i : preloaded) for(auto i : preloaded)
{ {
auto input = inputs[i]; const auto& input = inputs[i];
bytes += input.bytes(); bytes += input.bytes();
if(bytes > max_lds_bytes) if(bytes > max_lds_bytes)
break; break;
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include <migraphx/gpu/convolution.hpp> #include <migraphx/gpu/convolution.hpp>
#include <migraphx/gpu/context.hpp> #include <migraphx/gpu/context.hpp>
#include <migraphx/generate.hpp> #include <migraphx/generate.hpp>
#include <miopen/miopen.h>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -55,14 +56,40 @@ argument miopen_convolution::compute(context& ctx, ...@@ -55,14 +56,40 @@ argument miopen_convolution::compute(context& ctx,
const shape& output_shape, const shape& output_shape,
const std::vector<argument>& args) const const std::vector<argument>& args) const
{ {
auto x_desc = make_tensor(reshape_if_1d(args[0].get_shape())); auto x_desc = make_tensor(reshape_if_1d(args[0].get_shape()));
auto w_desc = make_tensor(reshape_if_1d(args[1].get_shape())); auto w_desc = make_tensor(reshape_if_1d(args[1].get_shape()));
auto y_desc = make_tensor(reshape_if_1d(output_shape)); auto y_desc = make_tensor(reshape_if_1d(output_shape));
auto* miopen_stream_handle = ctx.get_stream().get_miopen();
auto workspace_size = args[2].get_shape().bytes();
#ifdef MIGRAPHX_HAS_FIND_2_API
{
const miopenTensorArgument_t tensor_args[3] = {
{miopenTensorConvolutionX, nullptr, args[0].implicit()},
{miopenTensorConvolutionW, nullptr, args[1].implicit()},
{miopenTensorConvolutionY, nullptr, args[3].implicit()},
};
if(solution_ptr.get() == nullptr)
MIGRAPHX_THROW("MIOpen Convolution : Load MIOpen Solution before running it");
auto status = miopenRunSolution(miopen_stream_handle,
solution_ptr.get(),
3,
tensor_args,
args[2].implicit(),
workspace_size);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Convolution: running convolution using find_2.0 failed");
return args[3];
}
#else
// else use immediate mode
if(solution_id == 0) if(solution_id == 0)
MIGRAPHX_THROW("MIOpen Convolution: invalid solution ID"); MIGRAPHX_THROW("MIOpen Convolution: invalid solution ID");
auto status = miopenConvolutionForwardImmediate(ctx.get_stream().get_miopen(), auto status = miopenConvolutionForwardImmediate(miopen_stream_handle,
w_desc.get(), w_desc.get(),
args[1].implicit(), args[1].implicit(),
x_desc.get(), x_desc.get(),
...@@ -71,29 +98,66 @@ argument miopen_convolution::compute(context& ctx, ...@@ -71,29 +98,66 @@ argument miopen_convolution::compute(context& ctx,
y_desc.get(), y_desc.get(),
args[3].implicit(), args[3].implicit(),
args[2].implicit(), args[2].implicit(),
args[2].get_shape().bytes(), workspace_size,
solution_id); solution_id);
if(status != miopenStatusSuccess) if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Convolution: running convolution failed"); MIGRAPHX_THROW("MIOpen Convolution: running convolution failed");
return args[3]; return args[3];
#endif
} }
shape miopen_convolution::find(context& ctx, const shape& output_shape, std::vector<shape> inputs) shape miopen_convolution::find(context& ctx, const shape& output_shape, std::vector<shape> inputs)
{ {
shape workspace_shape{}; shape workspace_shape{};
auto x_desc = make_tensor(reshape_if_1d(inputs[0]));
auto w_desc = make_tensor(reshape_if_1d(inputs[1]));
auto y_desc = make_tensor(reshape_if_1d(output_shape));
std::size_t workspace_size = 0;
auto x_desc = make_tensor(reshape_if_1d(inputs[0])); #ifdef MIGRAPHX_HAS_FIND_2_API
auto w_desc = make_tensor(reshape_if_1d(inputs[1])); {
auto y_desc = make_tensor(reshape_if_1d(output_shape)); auto conv_problem = make_obj<miopen_problem>(
&miopenCreateConvProblem, cd.get(), miopenProblemDirectionForward);
set_tensor_descriptor(miopenTensorConvolutionX, x_desc, conv_problem);
set_tensor_descriptor(miopenTensorConvolutionW, w_desc, conv_problem);
set_tensor_descriptor(miopenTensorConvolutionY, y_desc, conv_problem);
auto* miopen_stream_handle = ctx.get_stream().get_miopen();
solution_ptr = find_solution(miopen_stream_handle, conv_problem.get());
auto status = miopenGetSolutionWorkspaceSize(solution_ptr.get(), &workspace_size);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Convolution : failed to get solution's workspace size");
std::size_t solution_size;
status = miopenGetSolutionSize(solution_ptr.get(), &solution_size);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Convolution: Failed to fetch solution size");
auto solution_binary = std::vector<char>{};
solution_binary.resize(solution_size);
status = miopenSaveSolution(solution_ptr.get(), solution_binary.data());
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Convolution: Saving solution failed");
solution_object = value::binary{solution_binary.data(), solution_size};
return shape{shape::int8_type, {workspace_size}};
}
#else
// else use immediate find mode
auto status = miopenConvolutionForwardGetWorkSpaceSize(ctx.get_stream().get_miopen(),
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
&workspace_size);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Convolution: Failed to get forward workspace size");
std::size_t workspace_size = 0;
miopenConvolutionForwardGetWorkSpaceSize(ctx.get_stream().get_miopen(),
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
&workspace_size);
workspace_shape = shape{shape::int8_type, {workspace_size}}; workspace_shape = shape{shape::int8_type, {workspace_size}};
auto x = to_gpu(generate_argument(inputs[0])); auto x = to_gpu(generate_argument(inputs[0]));
...@@ -103,20 +167,20 @@ shape miopen_convolution::find(context& ctx, const shape& output_shape, std::vec ...@@ -103,20 +167,20 @@ shape miopen_convolution::find(context& ctx, const shape& output_shape, std::vec
int algo_count = 1; int algo_count = 1;
miopenConvAlgoPerf_t perf; miopenConvAlgoPerf_t perf;
auto status = miopenFindConvolutionForwardAlgorithm(ctx.get_stream().get_miopen(), status = miopenFindConvolutionForwardAlgorithm(ctx.get_stream().get_miopen(),
x_desc.get(), x_desc.get(),
x.implicit(), x.implicit(),
w_desc.get(), w_desc.get(),
w.implicit(), w.implicit(),
cd.get(), cd.get(),
y_desc.get(), y_desc.get(),
y.implicit(), y.implicit(),
1, 1,
&algo_count, &algo_count,
&perf, &perf,
workspace.implicit(), workspace.implicit(),
workspace_size, workspace_size,
false); false);
if(status != miopenStatusSuccess) if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Convolution: find convolution failed"); MIGRAPHX_THROW("MIOpen Convolution: find convolution failed");
algo = perf.fwd_algo; algo = perf.fwd_algo;
...@@ -148,35 +212,58 @@ shape miopen_convolution::find(context& ctx, const shape& output_shape, std::vec ...@@ -148,35 +212,58 @@ shape miopen_convolution::find(context& ctx, const shape& output_shape, std::vec
solution_id = solutions.front().solution_id; solution_id = solutions.front().solution_id;
return shape{shape::int8_type, {perf.memory}}; return shape{shape::int8_type, {perf.memory}};
#endif
} }
void miopen_convolution::finalize(context& ctx, void miopen_convolution::finalize(context& ctx,
const shape& output_shape, const shape& output_shape,
std::vector<shape> inputs) const std::vector<shape>& inputs)
{ {
if(cd == nullptr) #ifdef MIGRAPHX_HAS_FIND_2_API
cd = make_conv(op);
if(solution_id == 0)
{ {
// Check that workspace hasn't changed (void)(ctx); // avoid warnings
auto size = inputs.at(2).bytes(); (void)(output_shape);
auto ws = find(ctx, output_shape, inputs); (void)(inputs);
if(ws.bytes() > size) // load solution
MIGRAPHX_THROW("MIOpen Convolution: workspace has changed during finalization."); if(solution_ptr == nullptr)
{
miopenSolution_t ptr;
auto status = miopenLoadSolution(&ptr,
reinterpret_cast<const char*>(solution_object.data()),
solution_object.size());
solution_ptr = miopen_solution{ptr};
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Convolution: loading convolution solution failed");
}
} }
#else
// Use immediate mode API
{
if(cd == nullptr)
cd = make_conv(op);
if(solution_id == 0)
{
// Check that workspace hasn't changed
auto size = inputs.at(2).bytes();
auto ws = find(ctx, output_shape, inputs);
if(ws.bytes() > size)
MIGRAPHX_THROW("MIOpen Convolution: workspace has changed during finalization.");
}
auto x_desc = make_tensor(reshape_if_1d(inputs[0])); auto x_desc = make_tensor(reshape_if_1d(inputs[0]));
auto w_desc = make_tensor(reshape_if_1d(inputs[1])); auto w_desc = make_tensor(reshape_if_1d(inputs[1]));
auto y_desc = make_tensor(reshape_if_1d(output_shape)); auto y_desc = make_tensor(reshape_if_1d(output_shape));
auto status = miopenConvolutionForwardCompileSolution(ctx.get_stream().get_miopen(), auto status = miopenConvolutionForwardCompileSolution(ctx.get_stream().get_miopen(),
w_desc.get(), w_desc.get(),
x_desc.get(), x_desc.get(),
cd.get(), cd.get(),
y_desc.get(), y_desc.get(),
solution_id); solution_id);
if(status != miopenStatusSuccess) if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Convolution: compile solution failed"); MIGRAPHX_THROW("MIOpen Convolution: compile solution failed");
}
#endif
} }
} // namespace gpu } // namespace gpu
......
...@@ -39,6 +39,10 @@ struct miopen_convolution ...@@ -39,6 +39,10 @@ struct miopen_convolution
op::convolution op; op::convolution op;
shared<convolution_descriptor> cd = nullptr; shared<convolution_descriptor> cd = nullptr;
miopenConvFwdAlgorithm_t algo{}; miopenConvFwdAlgorithm_t algo{};
#ifdef MIGRAPHX_HAS_FIND_2_API
value::binary solution_object{};
shared<miopen_solution> solution_ptr = nullptr;
#endif
uint64_t solution_id = 0; uint64_t solution_id = 0;
template <class Self, class F> template <class Self, class F>
...@@ -49,6 +53,9 @@ struct miopen_convolution ...@@ -49,6 +53,9 @@ struct miopen_convolution
f(self.op.dilation, "dilation"), f(self.op.dilation, "dilation"),
f(self.op.group, "group"), f(self.op.group, "group"),
f(self.op.padding_mode, "padding_mode"), f(self.op.padding_mode, "padding_mode"),
#ifdef MIGRAPHX_HAS_FIND_2_API
f(self.solution_object, "solution_object"),
#endif
f(self.solution_id, "solution_id")); f(self.solution_id, "solution_id"));
} }
...@@ -57,7 +64,7 @@ struct miopen_convolution ...@@ -57,7 +64,7 @@ struct miopen_convolution
argument argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const; compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
shape find(context& ctx, const shape& output_shape, std::vector<shape> inputs); shape find(context& ctx, const shape& output_shape, std::vector<shape> inputs);
void finalize(context& ctx, const shape& output_shape, std::vector<shape> inputs); void finalize(context& ctx, const shape& output_shape, const std::vector<shape>& inputs);
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{ {
return shapes.size() - 1; return shapes.size() - 1;
......
...@@ -70,6 +70,34 @@ Result make_obj(F f, Ts... xs) ...@@ -70,6 +70,34 @@ Result make_obj(F f, Ts... xs)
return r; return r;
} }
#ifdef MIGRAPHX_HAS_FIND_2_API
using miopen_find_options = MIGRAPHX_MANAGE_PTR(miopenFindOptions_t, miopenDestroyFindOptions);
using miopen_problem = MIGRAPHX_MANAGE_PTR(miopenProblem_t, miopenDestroyProblem);
using miopen_solution = MIGRAPHX_MANAGE_PTR(miopenSolution_t, miopenDestroySolution);
inline miopen_solution find_solution(miopenHandle_t handle, miopenProblem_t problem)
{
miopenSolution_t solution;
size_t found = 0;
auto status = miopenFindSolutions(handle, problem, nullptr, &solution, &found, 1);
auto result = miopen_solution{solution};
if(status != miopenStatusSuccess or found == 0)
MIGRAPHX_THROW("MIOpen miopenFindSolutions failed");
return result;
}
inline void set_tensor_descriptor(miopenTensorArgumentId_t name,
tensor_descriptor& desc,
miopen_problem& problem_ptr)
{
auto status = miopenSetProblemTensorDescriptor(problem_ptr.get(), name, desc.get());
if(status != miopenStatusSuccess)
{
MIGRAPHX_THROW("setting problem tensor description failed");
}
}
#endif
inline tensor_descriptor make_tensor(const migraphx::shape& os, bool pack = false) inline tensor_descriptor make_tensor(const migraphx::shape& os, bool pack = false)
{ {
auto s = os.normalize_standard(); auto s = os.normalize_standard();
......
...@@ -65,7 +65,7 @@ struct gathernd_compiler : compiler<gathernd_compiler> ...@@ -65,7 +65,7 @@ struct gathernd_compiler : compiler<gathernd_compiler>
operation compile_op(context& ctx, const std::vector<shape>& inputs, const value& v) const operation compile_op(context& ctx, const std::vector<shape>& inputs, const value& v) const
{ {
hip_compile_options options; hip_compile_options options;
auto out_s = inputs.back(); const auto& out_s = inputs.back();
options.set_launch_params(v, compute_global_for(ctx, out_s.elements())); options.set_launch_params(v, compute_global_for(ctx, out_s.elements()));
options.inputs = inputs; options.inputs = inputs;
options.output = out_s; options.output = out_s;
......
...@@ -511,14 +511,7 @@ void print_value(std::ostream& os, const std::vector<value>& x) ...@@ -511,14 +511,7 @@ void print_value(std::ostream& os, const std::vector<value>& x)
os << "}"; os << "}";
} }
void print_value(std::ostream& os, const value::binary& x) void print_value(std::ostream& os, const value::binary& x) { os << x; }
{
// Convert binary to integers
std::vector<int> v(x.begin(), x.end());
os << "{";
os << to_string_range(v);
os << "}";
}
std::ostream& operator<<(std::ostream& os, const value& d) std::ostream& operator<<(std::ostream& os, const value& d)
{ {
......
...@@ -37,10 +37,6 @@ ...@@ -37,10 +37,6 @@
#include <migraphx/functional.hpp> #include <migraphx/functional.hpp>
#include <test.hpp> #include <test.hpp>
using migraphx::trim;
// m test_gpu_mlir && ./bin/test_gpu_mlir
struct mlir_gpu_target : migraphx::gpu::target struct mlir_gpu_target : migraphx::gpu::target
{ {
std::string name() const { return "mlir"; } std::string name() const { return "mlir"; }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment