Unverified Commit e19f78ae authored by Umang Yadav's avatar Umang Yadav Committed by GitHub
Browse files

Use find_2.0 API for the convolution (#1346)

Improvements/Additions to be made:

changes for the quant_convolution,
changes for the deconvolution,
Macros for MIOpen status checks
parent c2842c1e
......@@ -28,6 +28,7 @@
#include <algorithm>
#include <migraphx/rank.hpp>
#include <migraphx/config.hpp>
#include <vector>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......@@ -59,28 +60,35 @@ inline stream_range_container<Range> stream_range(const Range& r)
namespace detail {
inline void stream_write_value_impl(rank<2>, std::ostream& os, const std::string& x) { os << x; }
template <class T>
auto stream_write_value_impl(rank<1>, std::ostream& os, const T& x) -> decltype(os << x, void())
{
os << x;
}
template <class Range>
auto stream_write_value_impl(rank<1>, std::ostream& os, const Range& r)
-> decltype(r.begin(), r.end(), void())
template <class T>
void stream_write_value_impl(rank<1>, std::ostream& os, const std::vector<T>& r)
{
os << "{";
os << stream_range(r);
os << "}";
}
template <class T>
void stream_write_value_impl(rank<0>, std::ostream& os, const T& x)
template <class Range>
auto stream_write_value_impl(rank<0>, std::ostream& os, const Range& r)
-> decltype(r.begin(), r.end(), void())
{
os << x;
os << "{";
os << stream_range(r);
os << "}";
}
} // namespace detail
template <class T>
void stream_write_value(std::ostream& os, const T& x)
{
detail::stream_write_value_impl(rank<2>{}, os, x);
detail::stream_write_value_impl(rank<1>{}, os, x);
}
} // namespace MIGRAPHX_INLINE_NS
......
......@@ -184,6 +184,12 @@ struct value
{
}
explicit binary(std::size_t s) : base(s) {}
friend std::ostream& operator<<(std::ostream& os, const binary& obj)
{
os << "{binary_object: " << obj.size() << "}";
return os;
}
};
value() = default;
......
......@@ -239,9 +239,18 @@ endif()
include(CheckLibraryExists)
get_target_property(MIOPEN_LOCATION MIOpen LOCATION)
check_library_exists(MIOpen "miopenHiddenSetConvolutionFindMode" "${MIOPEN_LOCATION}" HAS_FIND_MODE_API)
check_library_exists(MIOpen "miopenFindSolutions" "${MIOPEN_LOCATION}" HAS_FIND_2_API)
if(HAS_FIND_2_API)
target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_2_API)
message(STATUS "MIGraphx is using Find-2.0 API of MIOpen")
else()
message(STATUS "MIOpen does not have Find-2.0 API")
endif()
if(HAS_FIND_MODE_API)
target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_MODE_API)
message(STATUS "MIOpen has find mode api")
message(STATUS "MIGraphx is using Find Mode API of MIOpen")
else()
message(STATUS "MIOpen does not have find mode api")
endif()
......
......@@ -131,7 +131,7 @@ preload preload::broadcasts(std::size_t axis, const std::vector<shape>& inputs)
std::size_t bytes = 0;
for(auto i : preloaded)
{
auto input = inputs[i];
const auto& input = inputs[i];
bytes += input.bytes();
if(bytes > max_lds_bytes)
break;
......
......@@ -24,6 +24,7 @@
#include <migraphx/gpu/convolution.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/generate.hpp>
#include <miopen/miopen.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......@@ -55,14 +56,40 @@ argument miopen_convolution::compute(context& ctx,
const shape& output_shape,
const std::vector<argument>& args) const
{
auto x_desc = make_tensor(reshape_if_1d(args[0].get_shape()));
auto w_desc = make_tensor(reshape_if_1d(args[1].get_shape()));
auto y_desc = make_tensor(reshape_if_1d(output_shape));
auto x_desc = make_tensor(reshape_if_1d(args[0].get_shape()));
auto w_desc = make_tensor(reshape_if_1d(args[1].get_shape()));
auto y_desc = make_tensor(reshape_if_1d(output_shape));
auto* miopen_stream_handle = ctx.get_stream().get_miopen();
auto workspace_size = args[2].get_shape().bytes();
#ifdef MIGRAPHX_HAS_FIND_2_API
{
const miopenTensorArgument_t tensor_args[3] = {
{miopenTensorConvolutionX, nullptr, args[0].implicit()},
{miopenTensorConvolutionW, nullptr, args[1].implicit()},
{miopenTensorConvolutionY, nullptr, args[3].implicit()},
};
if(solution_ptr.get() == nullptr)
MIGRAPHX_THROW("MIOpen Convolution : Load MIOpen Solution before running it");
auto status = miopenRunSolution(miopen_stream_handle,
solution_ptr.get(),
3,
tensor_args,
args[2].implicit(),
workspace_size);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Convolution: running convolution using find_2.0 failed");
return args[3];
}
#else
// else use immediate mode
if(solution_id == 0)
MIGRAPHX_THROW("MIOpen Convolution: invalid solution ID");
auto status = miopenConvolutionForwardImmediate(ctx.get_stream().get_miopen(),
auto status = miopenConvolutionForwardImmediate(miopen_stream_handle,
w_desc.get(),
args[1].implicit(),
x_desc.get(),
......@@ -71,29 +98,66 @@ argument miopen_convolution::compute(context& ctx,
y_desc.get(),
args[3].implicit(),
args[2].implicit(),
args[2].get_shape().bytes(),
workspace_size,
solution_id);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Convolution: running convolution failed");
return args[3];
#endif
}
shape miopen_convolution::find(context& ctx, const shape& output_shape, std::vector<shape> inputs)
{
shape workspace_shape{};
auto x_desc = make_tensor(reshape_if_1d(inputs[0]));
auto w_desc = make_tensor(reshape_if_1d(inputs[1]));
auto y_desc = make_tensor(reshape_if_1d(output_shape));
std::size_t workspace_size = 0;
auto x_desc = make_tensor(reshape_if_1d(inputs[0]));
auto w_desc = make_tensor(reshape_if_1d(inputs[1]));
auto y_desc = make_tensor(reshape_if_1d(output_shape));
#ifdef MIGRAPHX_HAS_FIND_2_API
{
auto conv_problem = make_obj<miopen_problem>(
&miopenCreateConvProblem, cd.get(), miopenProblemDirectionForward);
set_tensor_descriptor(miopenTensorConvolutionX, x_desc, conv_problem);
set_tensor_descriptor(miopenTensorConvolutionW, w_desc, conv_problem);
set_tensor_descriptor(miopenTensorConvolutionY, y_desc, conv_problem);
auto* miopen_stream_handle = ctx.get_stream().get_miopen();
solution_ptr = find_solution(miopen_stream_handle, conv_problem.get());
auto status = miopenGetSolutionWorkspaceSize(solution_ptr.get(), &workspace_size);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Convolution : failed to get solution's workspace size");
std::size_t solution_size;
status = miopenGetSolutionSize(solution_ptr.get(), &solution_size);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Convolution: Failed to fetch solution size");
auto solution_binary = std::vector<char>{};
solution_binary.resize(solution_size);
status = miopenSaveSolution(solution_ptr.get(), solution_binary.data());
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Convolution: Saving solution failed");
solution_object = value::binary{solution_binary.data(), solution_size};
return shape{shape::int8_type, {workspace_size}};
}
#else
// else use immediate find mode
auto status = miopenConvolutionForwardGetWorkSpaceSize(ctx.get_stream().get_miopen(),
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
&workspace_size);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Convolution: Failed to get forward workspace size");
std::size_t workspace_size = 0;
miopenConvolutionForwardGetWorkSpaceSize(ctx.get_stream().get_miopen(),
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
&workspace_size);
workspace_shape = shape{shape::int8_type, {workspace_size}};
auto x = to_gpu(generate_argument(inputs[0]));
......@@ -103,20 +167,20 @@ shape miopen_convolution::find(context& ctx, const shape& output_shape, std::vec
int algo_count = 1;
miopenConvAlgoPerf_t perf;
auto status = miopenFindConvolutionForwardAlgorithm(ctx.get_stream().get_miopen(),
x_desc.get(),
x.implicit(),
w_desc.get(),
w.implicit(),
cd.get(),
y_desc.get(),
y.implicit(),
1,
&algo_count,
&perf,
workspace.implicit(),
workspace_size,
false);
status = miopenFindConvolutionForwardAlgorithm(ctx.get_stream().get_miopen(),
x_desc.get(),
x.implicit(),
w_desc.get(),
w.implicit(),
cd.get(),
y_desc.get(),
y.implicit(),
1,
&algo_count,
&perf,
workspace.implicit(),
workspace_size,
false);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Convolution: find convolution failed");
algo = perf.fwd_algo;
......@@ -148,35 +212,58 @@ shape miopen_convolution::find(context& ctx, const shape& output_shape, std::vec
solution_id = solutions.front().solution_id;
return shape{shape::int8_type, {perf.memory}};
#endif
}
void miopen_convolution::finalize(context& ctx,
const shape& output_shape,
std::vector<shape> inputs)
const std::vector<shape>& inputs)
{
if(cd == nullptr)
cd = make_conv(op);
if(solution_id == 0)
#ifdef MIGRAPHX_HAS_FIND_2_API
{
// Check that workspace hasn't changed
auto size = inputs.at(2).bytes();
auto ws = find(ctx, output_shape, inputs);
if(ws.bytes() > size)
MIGRAPHX_THROW("MIOpen Convolution: workspace has changed during finalization.");
(void)(ctx); // avoid warnings
(void)(output_shape);
(void)(inputs);
// load solution
if(solution_ptr == nullptr)
{
miopenSolution_t ptr;
auto status = miopenLoadSolution(&ptr,
reinterpret_cast<const char*>(solution_object.data()),
solution_object.size());
solution_ptr = miopen_solution{ptr};
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Convolution: loading convolution solution failed");
}
}
#else
// Use immediate mode API
{
if(cd == nullptr)
cd = make_conv(op);
if(solution_id == 0)
{
// Check that workspace hasn't changed
auto size = inputs.at(2).bytes();
auto ws = find(ctx, output_shape, inputs);
if(ws.bytes() > size)
MIGRAPHX_THROW("MIOpen Convolution: workspace has changed during finalization.");
}
auto x_desc = make_tensor(reshape_if_1d(inputs[0]));
auto w_desc = make_tensor(reshape_if_1d(inputs[1]));
auto y_desc = make_tensor(reshape_if_1d(output_shape));
auto x_desc = make_tensor(reshape_if_1d(inputs[0]));
auto w_desc = make_tensor(reshape_if_1d(inputs[1]));
auto y_desc = make_tensor(reshape_if_1d(output_shape));
auto status = miopenConvolutionForwardCompileSolution(ctx.get_stream().get_miopen(),
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
solution_id);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Convolution: compile solution failed");
auto status = miopenConvolutionForwardCompileSolution(ctx.get_stream().get_miopen(),
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
solution_id);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Convolution: compile solution failed");
}
#endif
}
} // namespace gpu
......
......@@ -39,6 +39,10 @@ struct miopen_convolution
op::convolution op;
shared<convolution_descriptor> cd = nullptr;
miopenConvFwdAlgorithm_t algo{};
#ifdef MIGRAPHX_HAS_FIND_2_API
value::binary solution_object{};
shared<miopen_solution> solution_ptr = nullptr;
#endif
uint64_t solution_id = 0;
template <class Self, class F>
......@@ -49,6 +53,9 @@ struct miopen_convolution
f(self.op.dilation, "dilation"),
f(self.op.group, "group"),
f(self.op.padding_mode, "padding_mode"),
#ifdef MIGRAPHX_HAS_FIND_2_API
f(self.solution_object, "solution_object"),
#endif
f(self.solution_id, "solution_id"));
}
......@@ -57,7 +64,7 @@ struct miopen_convolution
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
shape find(context& ctx, const shape& output_shape, std::vector<shape> inputs);
void finalize(context& ctx, const shape& output_shape, std::vector<shape> inputs);
void finalize(context& ctx, const shape& output_shape, const std::vector<shape>& inputs);
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
......
......@@ -70,6 +70,34 @@ Result make_obj(F f, Ts... xs)
return r;
}
#ifdef MIGRAPHX_HAS_FIND_2_API
using miopen_find_options = MIGRAPHX_MANAGE_PTR(miopenFindOptions_t, miopenDestroyFindOptions);
using miopen_problem = MIGRAPHX_MANAGE_PTR(miopenProblem_t, miopenDestroyProblem);
using miopen_solution = MIGRAPHX_MANAGE_PTR(miopenSolution_t, miopenDestroySolution);
inline miopen_solution find_solution(miopenHandle_t handle, miopenProblem_t problem)
{
miopenSolution_t solution;
size_t found = 0;
auto status = miopenFindSolutions(handle, problem, nullptr, &solution, &found, 1);
auto result = miopen_solution{solution};
if(status != miopenStatusSuccess or found == 0)
MIGRAPHX_THROW("MIOpen miopenFindSolutions failed");
return result;
}
inline void set_tensor_descriptor(miopenTensorArgumentId_t name,
tensor_descriptor& desc,
miopen_problem& problem_ptr)
{
auto status = miopenSetProblemTensorDescriptor(problem_ptr.get(), name, desc.get());
if(status != miopenStatusSuccess)
{
MIGRAPHX_THROW("setting problem tensor description failed");
}
}
#endif
inline tensor_descriptor make_tensor(const migraphx::shape& os, bool pack = false)
{
auto s = os.normalize_standard();
......
......@@ -65,7 +65,7 @@ struct gathernd_compiler : compiler<gathernd_compiler>
operation compile_op(context& ctx, const std::vector<shape>& inputs, const value& v) const
{
hip_compile_options options;
auto out_s = inputs.back();
const auto& out_s = inputs.back();
options.set_launch_params(v, compute_global_for(ctx, out_s.elements()));
options.inputs = inputs;
options.output = out_s;
......
......@@ -511,14 +511,7 @@ void print_value(std::ostream& os, const std::vector<value>& x)
os << "}";
}
void print_value(std::ostream& os, const value::binary& x)
{
// Convert binary to integers
std::vector<int> v(x.begin(), x.end());
os << "{";
os << to_string_range(v);
os << "}";
}
void print_value(std::ostream& os, const value::binary& x) { os << x; }
std::ostream& operator<<(std::ostream& os, const value& d)
{
......
......@@ -37,10 +37,6 @@
#include <migraphx/functional.hpp>
#include <test.hpp>
using migraphx::trim;
// m test_gpu_mlir && ./bin/test_gpu_mlir
struct mlir_gpu_target : migraphx::gpu::target
{
std::string name() const { return "mlir"; }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment