"src/targets/gpu/vscode:/vscode.git/clone" did not exist on "dbb87db1e1c87ed4da36a768c2e7d07e58536bc8"
Unverified Commit 881a4bd4 authored by Umang Yadav's avatar Umang Yadav Committed by GitHub
Browse files

Merge branch 'develop' into fix_parse_if

parents a2d710e3 4b1c1c41
...@@ -63,8 +63,8 @@ struct quant_convolution ...@@ -63,8 +63,8 @@ struct quant_convolution
void check_attribute_size() const void check_attribute_size() const
{ {
if(not((padding.size() == stride.size() or (padding.size() / 2) == stride.size()) and if((padding.size() != stride.size() and (padding.size() / 2) != stride.size()) or
stride.size() == dilation.size())) stride.size() != dilation.size())
{ {
MIGRAPHX_THROW("QUANT_CONVOLUTION: inconsistent attribute sizes"); MIGRAPHX_THROW("QUANT_CONVOLUTION: inconsistent attribute sizes");
} }
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include <migraphx/argument.hpp> #include <migraphx/argument.hpp>
#include <migraphx/stringutils.hpp> #include <migraphx/stringutils.hpp>
#include <migraphx/value.hpp> #include <migraphx/value.hpp>
#include <migraphx/dyn_output.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -62,9 +63,9 @@ struct unary : op_name<Derived> ...@@ -62,9 +63,9 @@ struct unary : op_name<Derived>
value attributes() const { return base_attributes(); } value attributes() const { return base_attributes(); }
shape compute_shape(std::vector<shape> inputs) const shape compute_shape(std::vector<shape> inputs) const
{ {
check_shapes{inputs, static_cast<const Derived&>(*this)}.has(1); check_shapes{inputs, static_cast<const Derived&>(*this), true}.has(1);
auto s = inputs.at(0); auto s = inputs.at(0);
if(s.scalar()) if(s.dynamic() or s.scalar())
{ {
return s; return s;
} }
...@@ -78,9 +79,9 @@ struct unary : op_name<Derived> ...@@ -78,9 +79,9 @@ struct unary : op_name<Derived>
} }
} }
argument compute(const shape& output_shape, std::vector<argument> args) const argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
{ {
argument result{output_shape}; argument result{dyn_out.computed_shape};
result.visit([&](auto output) { result.visit([&](auto output) {
args[0].visit([&](auto input) { args[0].visit([&](auto input) {
std::transform(input.begin(), std::transform(input.begin(),
......
...@@ -32,6 +32,8 @@ ...@@ -32,6 +32,8 @@
#include <utility> #include <utility>
#include <unordered_map> #include <unordered_map>
#include <migraphx/reflect.hpp> #include <migraphx/reflect.hpp>
#include <migraphx/dyn_output.hpp>
#include <migraphx/functional.hpp>
#include <migraphx/streamutils.hpp> #include <migraphx/streamutils.hpp>
#include <migraphx/normalize_attributes.hpp> #include <migraphx/normalize_attributes.hpp>
#include <migraphx/argument.hpp> #include <migraphx/argument.hpp>
...@@ -199,9 +201,12 @@ auto compute_op(rank<1>, ...@@ -199,9 +201,12 @@ auto compute_op(rank<1>,
context& ctx, context& ctx,
const shape& output_shape, const shape& output_shape,
const std::vector<argument>& input) const std::vector<argument>& input)
-> decltype(x.compute(auto_any_cast(ctx), output_shape, input)) -> decltype(x.compute(auto_any_cast(ctx),
make_compute_output_shape(pack(x, output_shape, input)),
input))
{ {
return x.compute(auto_any_cast(ctx), output_shape, input); return x.compute(
auto_any_cast(ctx), make_compute_output_shape(pack(x, output_shape, input)), input);
} }
template <class T> template <class T>
...@@ -220,9 +225,9 @@ compute_op(const T& x, context& ctx, const shape& output_shape, const std::vecto ...@@ -220,9 +225,9 @@ compute_op(const T& x, context& ctx, const shape& output_shape, const std::vecto
template <class T> template <class T>
auto compute_op(rank<1>, const T& x, const shape& output_shape, const std::vector<argument>& input) auto compute_op(rank<1>, const T& x, const shape& output_shape, const std::vector<argument>& input)
-> decltype(x.compute(output_shape, input)) -> decltype(x.compute(make_compute_output_shape(pack(x, output_shape, input)), input))
{ {
return x.compute(output_shape, input); return x.compute(make_compute_output_shape(pack(x, output_shape, input)), input);
} }
template <class T> template <class T>
...@@ -244,9 +249,11 @@ auto compute_op(rank<1>, ...@@ -244,9 +249,11 @@ auto compute_op(rank<1>,
const shape& output, const shape& output,
const std::vector<argument>& inputs, const std::vector<argument>& inputs,
const std::vector<module_ref>& module_args, const std::vector<module_ref>& module_args,
F f) -> decltype(x.compute(output, inputs, module_args, f)) F f)
-> decltype(
x.compute(make_compute_output_shape(pack(x, output, inputs)), inputs, module_args, f))
{ {
return x.compute(output, inputs, module_args, f); return x.compute(make_compute_output_shape(pack(x, output, inputs)), inputs, module_args, f);
} }
template <class T, class F> template <class T, class F>
...@@ -278,9 +285,17 @@ auto compute_op(rank<4>, ...@@ -278,9 +285,17 @@ auto compute_op(rank<4>,
const shape& output, const shape& output,
const std::vector<argument>& inputs, const std::vector<argument>& inputs,
const std::vector<module_ref>& module_args, const std::vector<module_ref>& module_args,
F f) -> decltype(x.compute(auto_any_cast(ctx), output, inputs, module_args, f)) F f) -> decltype(x.compute(auto_any_cast(ctx),
make_compute_output_shape(pack(x, output, inputs)),
inputs,
module_args,
f))
{ {
return x.compute(auto_any_cast(ctx), output, inputs, module_args, f); return x.compute(auto_any_cast(ctx),
make_compute_output_shape(pack(x, output, inputs)),
inputs,
module_args,
f);
} }
template <class T, class F> template <class T, class F>
...@@ -290,9 +305,11 @@ auto compute_op(rank<3>, ...@@ -290,9 +305,11 @@ auto compute_op(rank<3>,
const shape& output, const shape& output,
const std::vector<argument>& inputs, const std::vector<argument>& inputs,
const std::vector<module_ref>& module_args, const std::vector<module_ref>& module_args,
F f) -> decltype(x.compute(output, inputs, module_args, f)) F f)
-> decltype(
x.compute(make_compute_output_shape(pack(x, output, inputs)), inputs, module_args, f))
{ {
return x.compute(output, inputs, module_args, f); return x.compute(make_compute_output_shape(pack(x, output, inputs)), inputs, module_args, f);
} }
template <class T, class F> template <class T, class F>
...@@ -302,9 +319,10 @@ auto compute_op(rank<2>, ...@@ -302,9 +319,10 @@ auto compute_op(rank<2>,
const shape& output, const shape& output,
const std::vector<argument>& inputs, const std::vector<argument>& inputs,
const std::vector<module_ref>&, const std::vector<module_ref>&,
F) -> decltype(x.compute(output, inputs)) F)
-> decltype(x.compute(make_compute_output_shape(pack(x, output, inputs)), inputs))
{ {
return x.compute(output, inputs); return x.compute(make_compute_output_shape(pack(x, output, inputs)), inputs);
} }
template <class T, class F> template <class T, class F>
...@@ -314,9 +332,12 @@ auto compute_op(rank<1>, ...@@ -314,9 +332,12 @@ auto compute_op(rank<1>,
const shape& output, const shape& output,
const std::vector<argument>& inputs, const std::vector<argument>& inputs,
const std::vector<module_ref>&, const std::vector<module_ref>&,
F) -> decltype(x.compute(auto_any_cast(ctx), output, inputs)) F) -> decltype(x.compute(auto_any_cast(ctx),
make_compute_output_shape(pack(x, output, inputs)),
inputs))
{ {
return x.compute(auto_any_cast(ctx), output, inputs); return x.compute(
auto_any_cast(ctx), make_compute_output_shape(pack(x, output, inputs)), inputs);
} }
template <class T, class F> template <class T, class F>
...@@ -348,7 +369,8 @@ auto is_context_free_op(rank<1>, ...@@ -348,7 +369,8 @@ auto is_context_free_op(rank<1>,
const T& x, const T& x,
const shape& output_shape, const shape& output_shape,
const std::vector<argument>& input) const std::vector<argument>& input)
-> decltype(x.compute(output_shape, input), std::true_type{}); -> decltype(x.compute(make_compute_output_shape(pack(x, output_shape, input)), input),
std::true_type{});
template <class T> template <class T>
auto is_context_free_op(rank<0>, const T&, const shape&, const std::vector<argument>&) auto is_context_free_op(rank<0>, const T&, const shape&, const std::vector<argument>&)
......
...@@ -56,11 +56,11 @@ auto reflect_impl(rank<0>, T&, Selector) ...@@ -56,11 +56,11 @@ auto reflect_impl(rank<0>, T&, Selector)
} }
template <class T> template <class T>
auto reflectable_impl(rank<1>, T&& x) auto reflectable_impl(rank<1>, const T& x)
-> decltype(T::reflect(x, reflect_placeholder{}), std::true_type{}); -> decltype(T::reflect(x, reflect_placeholder{}), std::true_type{});
template <class T> template <class T>
auto reflectable_impl(rank<0>, T &&) -> decltype(std::false_type{}); auto reflectable_impl(rank<0>, const T&) -> decltype(std::false_type{});
template <class T> template <class T>
struct remove_rvalue_reference struct remove_rvalue_reference
...@@ -111,8 +111,18 @@ auto reflect(T& x, Selector f) ...@@ -111,8 +111,18 @@ auto reflect(T& x, Selector f)
template <class T> template <class T>
auto reflect_tie(T& x) auto reflect_tie(T& x)
{ {
return reflect(x, [](auto&& y, auto&&...) { return detail::wrap<decltype(y)>(y); })( return reflect(x, [](auto&& y, auto&&...) {
[](auto&&... xs) { return detail::auto_tuple(xs.get()...); }); // cppcheck-suppress UnnecessaryElseStatement
if constexpr(is_reflectable<decltype(y)>{})
{
auto t = reflect_tie(y);
return detail::wrap<decltype(t)>(t);
}
else
{
return detail::wrap<decltype(y)>(y);
}
})([](auto&&... xs) { return detail::auto_tuple(xs.get()...); });
} }
template <class T, class F> template <class T, class F>
......
...@@ -26,7 +26,9 @@ ...@@ -26,7 +26,9 @@
#include <ostream> #include <ostream>
#include <algorithm> #include <algorithm>
#include <migraphx/reflect.hpp>
#include <migraphx/rank.hpp> #include <migraphx/rank.hpp>
#include <migraphx/requires.hpp>
#include <migraphx/config.hpp> #include <migraphx/config.hpp>
#include <vector> #include <vector>
...@@ -83,6 +85,20 @@ auto stream_write_value_impl(rank<0>, std::ostream& os, const Range& r) ...@@ -83,6 +85,20 @@ auto stream_write_value_impl(rank<0>, std::ostream& os, const Range& r)
os << "}"; os << "}";
} }
template <class T, MIGRAPHX_REQUIRES(is_reflectable<T>{})>
void stream_write_value_impl(rank<0>, std::ostream& os, const T& x)
{
char delim = '{';
reflect_each(x, [&](auto&& y, auto name) {
os << delim;
os << name << "=";
stream_write_value_impl(rank<2>{}, os, y);
delim = ',';
});
if(delim == ',')
os << "}";
}
} // namespace detail } // namespace detail
template <class T> template <class T>
......
...@@ -25,7 +25,6 @@ ...@@ -25,7 +25,6 @@
#include <migraphx/file_buffer.hpp> #include <migraphx/file_buffer.hpp>
#include <migraphx/json.hpp> #include <migraphx/json.hpp>
#include <migraphx/msgpack.hpp> #include <migraphx/msgpack.hpp>
#include <migraphx/file_buffer.hpp>
#include <fstream> #include <fstream>
namespace migraphx { namespace migraphx {
......
...@@ -34,7 +34,6 @@ ...@@ -34,7 +34,6 @@
#include <migraphx/pass_manager.hpp> #include <migraphx/pass_manager.hpp>
#include <migraphx/make_op.hpp> #include <migraphx/make_op.hpp>
#include <migraphx/register_target.hpp> #include <migraphx/register_target.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/json.hpp> #include <migraphx/json.hpp>
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
......
...@@ -30,7 +30,7 @@ namespace onnx { ...@@ -30,7 +30,7 @@ namespace onnx {
void recalc_conv_attributes(value& v, size_t kdims) void recalc_conv_attributes(value& v, size_t kdims)
{ {
if(not(v["padding"].size() == kdims or v["padding"].size() == kdims * 2)) if(v["padding"].size() != kdims and v["padding"].size() != kdims * 2)
{ {
v["padding"].resize(kdims); v["padding"].resize(kdims);
std::fill_n(v["padding"].begin(), kdims, 0); std::fill_n(v["padding"].begin(), kdims, 0);
......
...@@ -46,9 +46,6 @@ ...@@ -46,9 +46,6 @@
#include <migraphx/iterator_for.hpp> #include <migraphx/iterator_for.hpp>
#include <migraphx/dfor.hpp> #include <migraphx/dfor.hpp>
#include <migraphx/ranges.hpp> #include <migraphx/ranges.hpp>
#include <migraphx/op/common.hpp>
#include <migraphx/op/rnn_var_sl_last_output.hpp>
#include <migraphx/op/rnn_variable_seq_lens.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
......
...@@ -827,7 +827,7 @@ MIGRAPHX_PRED_MATCHER(horiz_conv_dot, instruction_ref ins) ...@@ -827,7 +827,7 @@ MIGRAPHX_PRED_MATCHER(horiz_conv_dot, instruction_ref ins)
}; };
auto dots = std::count_if(ins->outputs().begin(), ins->outputs().end(), pred("dot")); auto dots = std::count_if(ins->outputs().begin(), ins->outputs().end(), pred("dot"));
auto convs = std::count_if(ins->outputs().begin(), ins->outputs().end(), pred("convolution")); auto convs = std::count_if(ins->outputs().begin(), ins->outputs().end(), pred("convolution"));
return not(dots < 2 and convs < 2); return (dots >= 2 or convs >= 2);
} }
struct find_conv_dot_horiz_fusion struct find_conv_dot_horiz_fusion
......
...@@ -215,55 +215,6 @@ struct cpu_pad ...@@ -215,55 +215,6 @@ struct cpu_pad
}; };
MIGRAPHX_REGISTER_OP(cpu_pad) MIGRAPHX_REGISTER_OP(cpu_pad)
struct leaky_relu_op
{
op::leaky_relu op;
std::string name() const { return "cpu::leaky_relu"; }
auto fcn() const
{
auto a = op.alpha;
return [a](auto x) { return x > 0 ? x : x * a; };
}
};
template <typename Op>
struct cpu_unary2 : auto_register_op<cpu_unary2<Op>>
{
cpu_unary2() = default;
template <class T>
cpu_unary2(T pop) : op(Op{std::move(pop)})
{
}
Op op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op.op, f);
}
std::string name() const { return op.name(); }
shape compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(1);
const auto& s = inputs.at(0);
return {s.type(), s.lens()};
}
argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
visit_all(result, args[0])([&](auto output, auto input) {
assert(input.get_shape().standard());
std::transform(input.begin(), input.end(), output.begin(), op.fcn());
});
return result;
}
};
template struct cpu_unary2<leaky_relu_op>;
struct cpu_rnn_var_sl_last_output struct cpu_rnn_var_sl_last_output
{ {
op::rnn_var_sl_last_output op; op::rnn_var_sl_last_output op;
......
...@@ -41,7 +41,6 @@ ...@@ -41,7 +41,6 @@
#include <migraphx/rewrite_quantization.hpp> #include <migraphx/rewrite_quantization.hpp>
#include <migraphx/rewrite_rnn.hpp> #include <migraphx/rewrite_rnn.hpp>
#include <migraphx/schedule.hpp> #include <migraphx/schedule.hpp>
#include <migraphx/memory_coloring.hpp>
#include <migraphx/simplify_algebra.hpp> #include <migraphx/simplify_algebra.hpp>
#include <migraphx/simplify_qdq.hpp> #include <migraphx/simplify_qdq.hpp>
#include <migraphx/simplify_reshapes.hpp> #include <migraphx/simplify_reshapes.hpp>
......
...@@ -84,10 +84,7 @@ add_library(migraphx_gpu ...@@ -84,10 +84,7 @@ add_library(migraphx_gpu
compile_hip.cpp compile_hip.cpp
compile_hip_code_object.cpp compile_hip_code_object.cpp
compiler.cpp compiler.cpp
convolution.cpp
deconvolution.cpp
device_name.cpp device_name.cpp
elu.cpp
fuse_mlir.cpp fuse_mlir.cpp
fuse_ops.cpp fuse_ops.cpp
gather.cpp gather.cpp
...@@ -100,7 +97,6 @@ add_library(migraphx_gpu ...@@ -100,7 +97,6 @@ add_library(migraphx_gpu
logsoftmax.cpp logsoftmax.cpp
loop.cpp loop.cpp
lrn.cpp lrn.cpp
leaky_relu.cpp
mlir.cpp mlir.cpp
multinomial.cpp multinomial.cpp
nonzero.cpp nonzero.cpp
...@@ -110,7 +106,6 @@ add_library(migraphx_gpu ...@@ -110,7 +106,6 @@ add_library(migraphx_gpu
pad.cpp pad.cpp
perfdb.cpp perfdb.cpp
pooling.cpp pooling.cpp
quant_convolution.cpp
reverse.cpp reverse.cpp
rnn_variable_seq_lens.cpp rnn_variable_seq_lens.cpp
rocblas.cpp rocblas.cpp
...@@ -146,14 +141,9 @@ register_migraphx_gpu_ops(hip_ ...@@ -146,14 +141,9 @@ register_migraphx_gpu_ops(hip_
register_migraphx_gpu_ops(miopen_ register_migraphx_gpu_ops(miopen_
abs abs
contiguous contiguous
convolution
deconvolution
elu
int8_conv_pack int8_conv_pack
leaky_relu
lrn lrn
pooling pooling
quant_convolution
) )
register_op(migraphx_gpu register_op(migraphx_gpu
HEADER migraphx/gpu/rnn_variable_seq_lens.hpp HEADER migraphx/gpu/rnn_variable_seq_lens.hpp
...@@ -167,6 +157,9 @@ register_op(migraphx_gpu ...@@ -167,6 +157,9 @@ register_op(migraphx_gpu
HEADER migraphx/gpu/gemm.hpp HEADER migraphx/gpu/gemm.hpp
OPERATORS gpu::rocblas_gemm<op::dot> gpu::rocblas_gemm<op::quant_dot> OPERATORS gpu::rocblas_gemm<op::dot> gpu::rocblas_gemm<op::quant_dot>
INCLUDES migraphx/gpu/context.hpp) INCLUDES migraphx/gpu/context.hpp)
register_op(migraphx_gpu HEADER migraphx/gpu/convolution.hpp
OPERATORS gpu::miopen_convolution<op::convolution> gpu::miopen_convolution<op::deconvolution> gpu::miopen_convolution<op::quant_convolution>
INCLUDES migraphx/gpu/context.hpp)
rocm_set_soversion(migraphx_gpu ${MIGRAPHX_SO_VERSION}) rocm_set_soversion(migraphx_gpu ${MIGRAPHX_SO_VERSION})
rocm_clang_tidy_check(migraphx_gpu) rocm_clang_tidy_check(migraphx_gpu)
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/convolution.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/generate.hpp>
#include <miopen/miopen.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape miopen_convolution::compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(4).standard();
std::vector<shape> conv_inputs(inputs.begin(), inputs.begin() + 2);
check_shapes{conv_inputs, *this}.max_ndims(5);
return op.normalize_compute_shape(conv_inputs);
}
inline shape reshape_if_1d(const shape& input)
{
shape new_shape{input};
auto dims = new_shape.lens();
if(dims.size() == 3)
{
std::vector<size_t> new_dims = dims;
new_dims.insert(new_dims.begin() + 2, 1);
new_shape = shape{input.type(), new_dims};
}
return new_shape;
}
argument miopen_convolution::compute(context& ctx,
const shape& output_shape,
const std::vector<argument>& args) const
{
auto x_desc = make_tensor(reshape_if_1d(args[0].get_shape()));
auto w_desc = make_tensor(reshape_if_1d(args[1].get_shape()));
auto y_desc = make_tensor(reshape_if_1d(output_shape));
auto* miopen_stream_handle = ctx.get_stream().get_miopen();
auto workspace_size = args[2].get_shape().bytes();
#ifdef MIGRAPHX_HAS_FIND_2_API
{
const miopenTensorArgument_t tensor_args[3] = {
{miopenTensorConvolutionX, nullptr, args[0].implicit()},
{miopenTensorConvolutionW, nullptr, args[1].implicit()},
{miopenTensorConvolutionY, nullptr, args[3].implicit()},
};
if(solution_ptr.get() == nullptr)
MIGRAPHX_THROW("MIOpen Convolution : Load MIOpen Solution before running it");
auto status = miopenRunSolution(miopen_stream_handle,
solution_ptr.get(),
3,
tensor_args,
args[2].implicit(),
workspace_size);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Convolution: running convolution using find_2.0 failed");
return args[3];
}
#else
// else use immediate mode
if(solution_id == 0)
MIGRAPHX_THROW("MIOpen Convolution: invalid solution ID");
auto status = miopenConvolutionForwardImmediate(miopen_stream_handle,
w_desc.get(),
args[1].implicit(),
x_desc.get(),
args[0].implicit(),
cd.get(),
y_desc.get(),
args[3].implicit(),
args[2].implicit(),
workspace_size,
solution_id);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Convolution: running convolution failed");
return args[3];
#endif
}
shape miopen_convolution::find(context& ctx, const shape& output_shape, std::vector<shape> inputs)
{
shape workspace_shape{};
auto x_desc = make_tensor(reshape_if_1d(inputs[0]));
auto w_desc = make_tensor(reshape_if_1d(inputs[1]));
auto y_desc = make_tensor(reshape_if_1d(output_shape));
std::size_t workspace_size = 0;
#ifdef MIGRAPHX_HAS_FIND_2_API
{
auto conv_problem = make_obj<miopen_problem>(
&miopenCreateConvProblem, cd.get(), miopenProblemDirectionForward);
set_tensor_descriptor(miopenTensorConvolutionX, x_desc, conv_problem);
set_tensor_descriptor(miopenTensorConvolutionW, w_desc, conv_problem);
set_tensor_descriptor(miopenTensorConvolutionY, y_desc, conv_problem);
auto* miopen_stream_handle = ctx.get_stream().get_miopen();
solution_ptr = find_solution(miopen_stream_handle, conv_problem.get());
auto status = miopenGetSolutionWorkspaceSize(solution_ptr.get(), &workspace_size);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Convolution : failed to get solution's workspace size");
std::size_t solution_size;
status = miopenGetSolutionSize(solution_ptr.get(), &solution_size);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Convolution: Failed to fetch solution size");
auto solution_binary = std::vector<char>{};
solution_binary.resize(solution_size);
status = miopenSaveSolution(solution_ptr.get(), solution_binary.data());
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Convolution: Saving solution failed");
solution_object = value::binary{solution_binary.data(), solution_size};
return shape{shape::int8_type, {workspace_size}};
}
#else
// else use immediate find mode
auto status = miopenConvolutionForwardGetWorkSpaceSize(ctx.get_stream().get_miopen(),
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
&workspace_size);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Convolution: Failed to get forward workspace size");
workspace_shape = shape{shape::int8_type, {workspace_size}};
auto x = to_gpu(generate_argument(inputs[0]));
auto w = to_gpu(generate_argument(inputs[1]));
auto y = allocate_gpu(output_shape);
auto workspace = allocate_gpu(workspace_shape);
int algo_count = 1;
miopenConvAlgoPerf_t perf;
status = miopenFindConvolutionForwardAlgorithm(ctx.get_stream().get_miopen(),
x_desc.get(),
x.implicit(),
w_desc.get(),
w.implicit(),
cd.get(),
y_desc.get(),
y.implicit(),
1,
&algo_count,
&perf,
workspace.implicit(),
workspace_size,
false);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Convolution: find convolution failed");
algo = perf.fwd_algo;
size_t solution_count;
status = miopenConvolutionForwardGetSolutionCount(ctx.get_stream().get_miopen(),
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
&solution_count);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Convolution: get solution count failed");
std::vector<miopenConvSolution_t> solutions(solution_count);
status = miopenConvolutionForwardGetSolution(ctx.get_stream().get_miopen(),
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
solution_count,
&solution_count,
solutions.data());
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Convolution: get solution failed");
solution_id = solutions.front().solution_id;
return shape{shape::int8_type, {perf.memory}};
#endif
}
void miopen_convolution::finalize(context& ctx,
const shape& output_shape,
const std::vector<shape>& inputs)
{
#ifdef MIGRAPHX_HAS_FIND_2_API
{
(void)(ctx); // avoid warnings
(void)(output_shape);
(void)(inputs);
// load solution
if(solution_ptr == nullptr)
{
miopenSolution_t ptr;
auto status = miopenLoadSolution(&ptr,
reinterpret_cast<const char*>(solution_object.data()),
solution_object.size());
solution_ptr = miopen_solution{ptr};
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Convolution: loading convolution solution failed");
}
}
#else
// Use immediate mode API
{
if(cd == nullptr)
cd = make_conv(op);
if(solution_id == 0)
{
// Check that workspace hasn't changed
auto size = inputs.at(2).bytes();
auto ws = find(ctx, output_shape, inputs);
if(ws.bytes() > size)
MIGRAPHX_THROW("MIOpen Convolution: workspace has changed during finalization.");
}
auto x_desc = make_tensor(reshape_if_1d(inputs[0]));
auto w_desc = make_tensor(reshape_if_1d(inputs[1]));
auto y_desc = make_tensor(reshape_if_1d(output_shape));
auto status = miopenConvolutionForwardCompileSolution(ctx.get_stream().get_miopen(),
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
solution_id);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Convolution: compile solution failed");
}
#endif
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/deconvolution.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/generate.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape miopen_deconvolution::compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(4).standard();
std::vector<shape> conv_inputs(inputs.begin(), inputs.begin() + 2);
check_shapes{conv_inputs, *this}.max_ndims(5);
return op.compute_shape(conv_inputs);
}
inline shape reshape_if_1d(const shape& input)
{
shape new_shape{input};
auto dims = new_shape.lens();
if(dims.size() == 3)
{
std::vector<size_t> new_dims = dims;
new_dims.insert(new_dims.begin() + 2, 1);
new_shape = shape{input.type(), new_dims};
}
return new_shape;
}
argument miopen_deconvolution::compute(context& ctx,
const shape& output_shape,
const std::vector<argument>& args) const
{
auto x_desc = make_tensor(reshape_if_1d(args[0].get_shape()));
auto w_desc = make_tensor(reshape_if_1d(args[1].get_shape()));
auto y_desc = make_tensor(reshape_if_1d(output_shape));
if(solution_id == 0)
MIGRAPHX_THROW("MIOpen Deconvolution: invalid solution ID");
auto status = miopenConvolutionForwardImmediate(ctx.get_stream().get_miopen(),
w_desc.get(),
args[1].implicit(),
x_desc.get(),
args[0].implicit(),
cd.get(),
y_desc.get(),
args[3].implicit(),
args[2].implicit(),
args[2].get_shape().bytes(),
solution_id);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Deconvolution: running convolution failed");
return args[3];
}
shape miopen_deconvolution::find(context& ctx, const shape& output_shape, std::vector<shape> inputs)
{
shape workspace_shape{};
auto x_desc = make_tensor(reshape_if_1d(inputs[0]));
auto w_desc = make_tensor(reshape_if_1d(inputs[1]));
auto y_desc = make_tensor(reshape_if_1d(output_shape));
std::size_t workspace_size = 0;
miopenConvolutionForwardGetWorkSpaceSize(ctx.get_stream().get_miopen(),
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
&workspace_size);
workspace_shape = shape{shape::int8_type, {workspace_size}};
auto x = to_gpu(generate_argument(inputs[0]));
auto w = to_gpu(generate_argument(inputs[1]));
auto y = allocate_gpu(output_shape);
auto workspace = allocate_gpu(workspace_shape);
int algo_count = 1;
miopenConvAlgoPerf_t perf;
auto status = miopenFindConvolutionForwardAlgorithm(ctx.get_stream().get_miopen(),
x_desc.get(),
x.implicit(),
w_desc.get(),
w.implicit(),
cd.get(),
y_desc.get(),
y.implicit(),
1,
&algo_count,
&perf,
workspace.implicit(),
workspace_size,
false);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Deconvolution: find convolution failed");
algo = perf.fwd_algo;
size_t solution_count;
status = miopenConvolutionForwardGetSolutionCount(ctx.get_stream().get_miopen(),
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
&solution_count);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Deconvolution: get solution count failed");
std::vector<miopenConvSolution_t> solutions(solution_count);
status = miopenConvolutionForwardGetSolution(ctx.get_stream().get_miopen(),
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
solution_count,
&solution_count,
solutions.data());
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Deconvolution: get solution failed");
solution_id = solutions.front().solution_id;
return shape{shape::int8_type, {perf.memory}};
}
void miopen_deconvolution::finalize(context& ctx,
const shape& output_shape,
std::vector<shape> inputs)
{
if(cd == nullptr)
cd = make_deconv(op);
if(solution_id == 0)
{
// Check that workspace hasn't changed
auto size = inputs.at(2).bytes();
auto ws = find(ctx, output_shape, inputs);
if(ws.bytes() > size)
MIGRAPHX_THROW("MIOpen Deconvolution: workspace has changed during finalization.");
}
auto x_desc = make_tensor(reshape_if_1d(inputs[0]));
auto w_desc = make_tensor(reshape_if_1d(inputs[1]));
auto y_desc = make_tensor(reshape_if_1d(output_shape));
auto status = miopenConvolutionForwardCompileSolution(ctx.get_stream().get_miopen(),
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
solution_id);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Deconvolution: compile solution failed");
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/elu.hpp>
#include <migraphx/gpu/context.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape miopen_elu::compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(2).not_broadcasted();
return inputs.at(1);
}
argument miopen_elu::compute(context& ctx,
const shape& output_shape,
const std::vector<argument>& args) const
{
float alpha = 1;
float beta = 0;
auto x_desc = make_tensor(args[0].get_shape());
auto y_desc = make_tensor(output_shape);
miopenActivationForward(ctx.get_stream().get_miopen(),
ad.get(),
&alpha,
x_desc.get(),
args[0].implicit(),
&beta,
y_desc.get(),
args[1].implicit());
return args[1];
}
void miopen_elu::finalize(context&, const shape&, const std::vector<shape>&)
{
ad = make_elu(op.alpha);
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -49,7 +49,7 @@ struct mlir_conv ...@@ -49,7 +49,7 @@ struct mlir_conv
std::string name() const { return "gpu::mlir_conv"; } std::string name() const { return "gpu::mlir_conv"; }
shape compute_shape(std::vector<shape> inputs, const std::vector<module_ref>& mods) const shape compute_shape(std::vector<shape> inputs, const std::vector<module_ref>& mods) const
{ {
check_shapes{inputs, *this}.standard(); check_shapes{inputs, *this}.packed_or_broadcasted();
if(mods.size() != 1) if(mods.size() != 1)
MIGRAPHX_THROW("should have one submodule."); MIGRAPHX_THROW("should have one submodule.");
if(inputs.size() < 2) if(inputs.size() < 2)
...@@ -70,6 +70,9 @@ MIGRAPHX_PRED_MATCHER(is_mlir_conv, instruction_ref ins) ...@@ -70,6 +70,9 @@ MIGRAPHX_PRED_MATCHER(is_mlir_conv, instruction_ref ins)
auto group = v.at("group").to<int>(); auto group = v.at("group").to<int>();
if(group != 1) if(group != 1)
return false; return false;
// Avoid MLIR assertion: Index < Length && "Invalid index!"
if(ins->get_shape().lens().size() != 4)
return false;
return true; return true;
} }
...@@ -96,9 +99,10 @@ struct find_conv_pointwise ...@@ -96,9 +99,10 @@ struct find_conv_pointwise
i.name()); i.name());
})) }))
return; return;
// Only fuse with fp32 for now // Only fuse with fp32/fp16
if(std::any_of(ins->inputs().begin(), ins->inputs().end(), [&](auto i) { if(std::any_of(ins->inputs().begin(), ins->inputs().end(), [&](auto i) {
return i->get_shape().type() != shape::type_t::float_type; return not contains({shape::type_t::float_type, shape::type_t::half_type},
i->get_shape().type());
})) }))
return; return;
std::sort(names.begin(), names.end()); std::sort(names.begin(), names.end());
......
...@@ -26,7 +26,6 @@ ...@@ -26,7 +26,6 @@
#include <migraphx/gpu/fuse_ops.hpp> #include <migraphx/gpu/fuse_ops.hpp>
#include <migraphx/matcher.hpp> #include <migraphx/matcher.hpp>
#include <migraphx/gpu/miopen.hpp> #include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/convolution.hpp>
#include <migraphx/gpu/device_name.hpp> #include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/oper.hpp> #include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/gemm.hpp> #include <migraphx/gpu/gemm.hpp>
...@@ -190,10 +189,12 @@ MIGRAPHX_PRED_MATCHER(fusable_conv, instruction_ref ins) ...@@ -190,10 +189,12 @@ MIGRAPHX_PRED_MATCHER(fusable_conv, instruction_ref ins)
return false; return false;
auto wei = ins->inputs().at(1)->get_shape(); auto wei = ins->inputs().at(1)->get_shape();
assert(wei.lens().size() == 4); assert(wei.lens().size() == 4);
auto conv = any_cast<miopen_convolution>(ins->get_operator()); auto miopen_conv_op = ins->get_operator().to_value();
if(conv.op.group > 1) auto algo = miopen_conv_op.at("algo").to<miopenConvFwdAlgorithm_t>();
auto conv_op = from_value<op::convolution>(miopen_conv_op["op"]);
if(conv_op.group > 1)
return false; return false;
if(wei.lens()[1] > 512 and conv.algo != miopenConvolutionFwdAlgoWinograd) if(wei.lens()[1] > 512 and algo != miopenConvolutionFwdAlgoWinograd)
return false; return false;
// Do not fuse non-symmetric input // Do not fuse non-symmetric input
...@@ -201,13 +202,12 @@ MIGRAPHX_PRED_MATCHER(fusable_conv, instruction_ref ins) ...@@ -201,13 +202,12 @@ MIGRAPHX_PRED_MATCHER(fusable_conv, instruction_ref ins)
if(input_lens[2] != input_lens[3] or wei.lens()[2] != wei.lens()[3]) if(input_lens[2] != input_lens[3] or wei.lens()[2] != wei.lens()[3])
return false; return false;
auto op = conv.op;
// Dont fuse winograd for non-3x3s since there is no fused windograd for those configs // Dont fuse winograd for non-3x3s since there is no fused windograd for those configs
if(conv.algo == miopenConvolutionFwdAlgoWinograd and wei.lens()[2] != 3 and if(algo == miopenConvolutionFwdAlgoWinograd and wei.lens()[2] != 3 and wei.lens()[3] != 3 and
wei.lens()[3] != 3 and contains({{1, 1}}, op.stride)) contains({{1, 1}}, conv_op.stride))
return false; return false;
return contains({{0, 0, 0, 0}, {1, 1, 1, 1}, {2, 2, 2, 2}}, op.padding) and return contains({{0, 0, 0, 0}, {1, 1, 1, 1}, {2, 2, 2, 2}}, conv_op.padding) and
contains({{0, 0}, {1, 1}}, op.stride) and contains({{1, 1}}, op.dilation); contains({{0, 0}, {1, 1}}, conv_op.stride) and contains({{1, 1}}, conv_op.dilation);
} }
void move_broadcasted_back(std::vector<instruction_ref>& args) void move_broadcasted_back(std::vector<instruction_ref>& args)
...@@ -462,7 +462,7 @@ void apply_conv_bias(context& ctx, module& m, const match::matcher_result& r) ...@@ -462,7 +462,7 @@ void apply_conv_bias(context& ctx, module& m, const match::matcher_result& r)
auto ins = r.result; auto ins = r.result;
auto input_ins = conv_ins->inputs().at(0); auto input_ins = conv_ins->inputs().at(0);
auto weights_ins = conv_ins->inputs().at(1); auto weights_ins = conv_ins->inputs().at(1);
auto conv_op = any_cast<miopen_convolution>(conv_ins->get_operator()).op; auto conv_op = from_value<op::convolution>((conv_ins->get_operator()).to_value()["op"]);
auto alloc_ins = ins->inputs().back(); auto alloc_ins = ins->inputs().back();
auto old_ws_ins = conv_ins->inputs().at(2); auto old_ws_ins = conv_ins->inputs().at(2);
...@@ -528,7 +528,7 @@ struct find_conv_pointwise ...@@ -528,7 +528,7 @@ struct find_conv_pointwise
auto ins = r.result; auto ins = r.result;
auto input_ins = conv_ins->inputs().at(0); auto input_ins = conv_ins->inputs().at(0);
auto weights_ins = conv_ins->inputs().at(1); auto weights_ins = conv_ins->inputs().at(1);
auto conv_op = any_cast<miopen_convolution>(conv_ins->get_operator()).op; auto conv_op = from_value<op::convolution>(conv_ins->get_operator().to_value()["op"]);
auto alloc_ins = ins->inputs().back(); auto alloc_ins = ins->inputs().back();
module_ref pm = ins->module_inputs().front(); module_ref pm = ins->module_inputs().front();
......
...@@ -183,8 +183,8 @@ argument register_on_gpu(const argument& arg) ...@@ -183,8 +183,8 @@ argument register_on_gpu(const argument& arg)
{ {
auto arg_shared = arg.share(); auto arg_shared = arg.share();
auto p = register_on_gpu(arg_shared.data(), arg_shared.get_shape().bytes()); auto p = register_on_gpu(arg_shared.data(), arg_shared.get_shape().bytes());
return {arg_shared.get_shape(), auto s = arg_shared.get_shape();
[p, a = std::move(arg_shared)]() mutable { return get_device_ptr(p.get()); }}; return {s, [p, a = std::move(arg_shared)]() mutable { return get_device_ptr(p.get()); }};
} }
argument to_gpu(const argument& arg, bool host) argument to_gpu(const argument& arg, bool host)
......
...@@ -25,18 +25,40 @@ ...@@ -25,18 +25,40 @@
#define MIGRAPHX_GUARD_RTGLIB_CONVOLUTION_HPP #define MIGRAPHX_GUARD_RTGLIB_CONVOLUTION_HPP
#include <migraphx/shape.hpp> #include <migraphx/shape.hpp>
#include <migraphx/op/convolution.hpp> #include <migraphx/generate.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/register_op.hpp>
#include <migraphx/gpu/miopen.hpp> #include <migraphx/gpu/miopen.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/deconvolution.hpp>
#include <unordered_map>
#include <migraphx/reflect.hpp>
#include <migraphx/gpu/context.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
struct context; inline shape reshape_if_1d(const shape& input)
{
shape new_shape{input};
auto dims = new_shape.lens();
if(dims.size() == 3)
{
std::vector<size_t> new_dims = dims;
new_dims.insert(new_dims.begin() + 2, 1);
new_shape = shape{input.type(), new_dims};
}
return new_shape;
}
template <class Op>
struct miopen_convolution struct miopen_convolution
{ {
op::convolution op; Op op;
bool int8_x4_format = false;
shared<convolution_descriptor> cd = nullptr; shared<convolution_descriptor> cd = nullptr;
miopenConvFwdAlgorithm_t algo{}; miopenConvFwdAlgorithm_t algo{};
#ifdef MIGRAPHX_HAS_FIND_2_API #ifdef MIGRAPHX_HAS_FIND_2_API
...@@ -48,29 +70,276 @@ struct miopen_convolution ...@@ -48,29 +70,276 @@ struct miopen_convolution
template <class Self, class F> template <class Self, class F>
static auto reflect(Self& self, F f) static auto reflect(Self& self, F f)
{ {
return pack(f(self.op.padding, "padding"), return pack(f(self.op, "op"),
f(self.op.stride, "stride"),
f(self.op.dilation, "dilation"),
f(self.op.group, "group"),
f(self.op.padding_mode, "padding_mode"),
#ifdef MIGRAPHX_HAS_FIND_2_API #ifdef MIGRAPHX_HAS_FIND_2_API
f(self.solution_object, "solution_object"), f(self.solution_object, "solution_object"),
#endif #endif
f(self.algo, "algo"),
f(self.int8_x4_format, "int8_x4_format"),
f(self.solution_id, "solution_id")); f(self.solution_id, "solution_id"));
} }
std::string name() const { return "gpu::convolution"; } std::string name() const { return "gpu::" + op.name(); }
shape compute_shape(const std::vector<shape>& inputs) const;
inline shape compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, op}.has(4).standard();
std::vector<shape> conv_inputs(inputs.begin(), inputs.begin() + 2);
check_shapes{conv_inputs, op}.max_ndims(5);
return migraphx::compute_shape<Op>(op, conv_inputs);
}
argument argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const; compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
shape find(context& ctx, const shape& output_shape, std::vector<shape> inputs); {
void finalize(context& ctx, const shape& output_shape, const std::vector<shape>& inputs); auto x_desc = make_tensor(reshape_if_1d(args[0].get_shape()), int8_x4_format);
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const auto w_desc = make_tensor(reshape_if_1d(args[1].get_shape()), int8_x4_format);
auto y_desc = make_tensor(reshape_if_1d(output_shape));
auto* miopen_stream_handle = ctx.get_stream().get_miopen();
auto workspace_size = args[2].get_shape().bytes();
#ifdef MIGRAPHX_HAS_FIND_2_API
{
const miopenTensorArgument_t tensor_args[3] = {
{miopenTensorConvolutionX, nullptr, args[0].implicit()},
{miopenTensorConvolutionW, nullptr, args[1].implicit()},
{miopenTensorConvolutionY, nullptr, args[3].implicit()},
};
if(solution_ptr.get() == nullptr)
MIGRAPHX_THROW("MIOpen " + op.name() + " : Load MIOpen Solution before running it");
auto status = miopenRunSolution(miopen_stream_handle,
solution_ptr.get(),
3,
tensor_args,
args[2].implicit(),
workspace_size);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen " + op.name() +
" : running convolution using find_2.0 failed");
return args[3];
}
#else
// else use immediate mode
if(solution_id == 0)
MIGRAPHX_THROW("MIOpen " + op.name() + " : invalid solution ID");
auto status = miopenConvolutionForwardImmediate(miopen_stream_handle,
w_desc.get(),
args[1].implicit(),
x_desc.get(),
args[0].implicit(),
cd.get(),
y_desc.get(),
args[3].implicit(),
args[2].implicit(),
workspace_size,
solution_id);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen " + op.name() + ": running convolution failed");
return args[3];
#endif
}
inline void set_conv_descriptor()
{
if(cd == nullptr)
{
cd = (op.name() == "deconvolution") ? make_deconv(op) : make_conv(op);
}
}
value compile(migraphx::context& ctx, const shape& output, const std::vector<shape>& input)
{
set_conv_descriptor();
auto ws = find(any_cast<migraphx::gpu::context>(ctx), output, input);
return {{"workspace", ws.bytes()}};
}
shape find(context& ctx, const shape& output_shape, const std::vector<shape>& inputs)
{
shape workspace_shape{};
auto x_desc = make_tensor(reshape_if_1d(inputs[0]), int8_x4_format);
auto w_desc = make_tensor(reshape_if_1d(inputs[1]), int8_x4_format);
auto y_desc = make_tensor(reshape_if_1d(output_shape));
std::size_t workspace_size = 0;
#ifdef MIGRAPHX_HAS_FIND_2_API
{
auto conv_problem = make_obj<miopen_problem>(
&miopenCreateConvProblem, cd.get(), miopenProblemDirectionForward);
set_tensor_descriptor(miopenTensorConvolutionX, x_desc, conv_problem);
set_tensor_descriptor(miopenTensorConvolutionW, w_desc, conv_problem);
set_tensor_descriptor(miopenTensorConvolutionY, y_desc, conv_problem);
auto* miopen_stream_handle = ctx.get_stream().get_miopen();
solution_ptr = find_solution(miopen_stream_handle, conv_problem.get());
auto status = miopenGetSolutionWorkspaceSize(solution_ptr.get(), &workspace_size);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen" + op.name() + " : failed to get solution's workspace size");
std::size_t solution_size;
status = miopenGetSolutionSize(solution_ptr.get(), &solution_size);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen" + op.name() + ": Failed to fetch solution size");
auto solution_binary = std::vector<char>{};
solution_binary.resize(solution_size);
status = miopenSaveSolution(solution_ptr.get(), solution_binary.data());
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen" + op.name() + ": Saving solution failed");
solution_object = value::binary{solution_binary.data(), solution_size};
return shape{shape::int8_type, {workspace_size}};
}
#else
auto status = miopenConvolutionForwardGetWorkSpaceSize(ctx.get_stream().get_miopen(),
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
&workspace_size);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen" + op.name() + " : Failed to get forward workspace size");
workspace_shape = shape{shape::int8_type, {workspace_size}};
auto x_shape = inputs[0];
auto w_shape = inputs[1];
if(int8_x4_format)
{
x_shape = pack_int8_shape(x_shape);
w_shape = pack_int8_shape(w_shape);
}
auto x = to_gpu(generate_argument(x_shape));
auto w = to_gpu(generate_argument(w_shape));
auto y = allocate_gpu(output_shape);
auto workspace = allocate_gpu(workspace_shape);
int algo_count = 1;
miopenConvAlgoPerf_t perf;
status = miopenFindConvolutionForwardAlgorithm(ctx.get_stream().get_miopen(),
x_desc.get(),
x.implicit(),
w_desc.get(),
w.implicit(),
cd.get(),
y_desc.get(),
y.implicit(),
1,
&algo_count,
&perf,
workspace.implicit(),
workspace_size,
false);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen " + op.name() + " : find convolution failed");
algo = perf.fwd_algo;
size_t solution_count;
status = miopenConvolutionForwardGetSolutionCount(ctx.get_stream().get_miopen(),
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
&solution_count);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen " + op.name() + ": get solution count failed");
std::vector<miopenConvSolution_t> solutions(solution_count);
status = miopenConvolutionForwardGetSolution(ctx.get_stream().get_miopen(),
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
solution_count,
&solution_count,
solutions.data());
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen " + op.name() + ": get solution failed");
solution_id = solutions.front().solution_id;
return shape{shape::int8_type, {perf.memory}};
#endif
}
void finalize(context& ctx, const shape& output_shape, const std::vector<shape>& inputs)
{
#ifdef MIGRAPHX_HAS_FIND_2_API
{
(void)(ctx); // avoid warnings
(void)(output_shape);
(void)(inputs);
// load solution
if(solution_ptr == nullptr)
{
miopenSolution_t ptr;
auto status =
miopenLoadSolution(&ptr,
reinterpret_cast<const char*>(solution_object.data()),
solution_object.size());
solution_ptr = miopen_solution{ptr};
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen " + op.name() + ": loading convolution solution failed");
}
}
#else
// Use immediate mode API
{
set_conv_descriptor();
if(solution_id == 0)
{
// Check that workspace hasn't changed
auto size = inputs.at(2).bytes();
auto ws = find(ctx, output_shape, inputs);
if(ws.bytes() > size)
MIGRAPHX_THROW("MIOpen " + op.name() +
": workspace has changed during finalization.");
}
auto x_desc = make_tensor(reshape_if_1d(inputs[0]), int8_x4_format);
auto w_desc = make_tensor(reshape_if_1d(inputs[1]), int8_x4_format);
auto y_desc = make_tensor(reshape_if_1d(output_shape));
auto status = miopenConvolutionForwardCompileSolution(ctx.get_stream().get_miopen(),
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
solution_id);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Convolution: compile solution failed");
}
#endif
}
inline std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{ {
return shapes.size() - 1; return shapes.size() - 1;
} }
};
inline shape pack_int8_shape(const shape& s) const
{
if(s.type() != shape::int8_type)
{
return s;
}
auto lens = s.lens();
auto strides = s.strides();
lens[1] = (lens[1] + 3) / 4 * 4;
strides[0] = strides[1] * lens[1];
return {s.type(), lens, strides};
}
};
} // namespace gpu } // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment