Commit 9db8a28d authored by Paul's avatar Paul
Browse files

Merge

parents 1f8aa24f 4b1c1c41
...@@ -21,53 +21,43 @@ ...@@ -21,53 +21,43 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE. * THE SOFTWARE.
*/ */
#ifndef MIGRAPHX_GUARD_RTGLIB_QUANT_CONVOLUTION_HPP #ifndef MIGRAPHX_GUARD_KERNELS_PAD_HPP
#define MIGRAPHX_GUARD_RTGLIB_QUANT_CONVOLUTION_HPP #define MIGRAPHX_GUARD_KERNELS_PAD_HPP
#include <migraphx/shape.hpp> #include <migraphx/kernels/shape.hpp>
#include <migraphx/reflect.hpp> #include <migraphx/kernels/index.hpp>
#include <migraphx/op/quant_convolution.hpp> #include <migraphx/kernels/algorithm.hpp>
#include <migraphx/gpu/miopen.hpp> #include <migraphx/kernels/ranges.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct context; template <class Offsets, class Input, class Output, class PadVal>
__device__ void pad(const index& idx,
struct miopen_quant_convolution const Offsets& offsets,
const Input& input,
Output& output,
const PadVal& pad_val)
{ {
op::quant_convolution op; auto output_shape = output.get_shape();
bool int8_x4_format = false; idx.global_stride(output_shape.elements(), [&](auto i) {
shared<convolution_descriptor> cd; // 1. get current multi-index for output
miopenConvFwdAlgorithm_t algo{}; // 2. get the size of the input to determine input boundaries
uint64_t solution_id = 0; // 3. compute the corresponding multi-index for input by accounting for offsets
// 4. if current multi-index is within offsets or input's new multi-index is out of bounds,
template <class Self, class F> // use pad value instead of input's value
static auto reflect(Self& self, F f) auto multi = output_shape.multi(i);
{ auto input_bounds = input.get_shape().lens;
// TODO: Add algo auto input_idx = multi - offsets;
return pack_join(migraphx::reflect(self.op, f), auto range_multi = range(multi.size());
pack(f(self.int8_x4_format, "int8_x4_format")));
} if(any_of(range_multi.begin(), range_multi.end(), [&](auto j) {
return multi[j] < offsets[j] or input_idx[j] >= input_bounds[j];
std::string name() const { return "gpu::quant_convolution"; } }))
shape compute_shape(const std::vector<shape>& inputs) const; output[multi] = pad_val;
argument else
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const; output[multi] = input[input_idx];
shape find(context& ctx, const shape& output_shape, std::vector<shape> inputs); });
void finalize(context& ctx, const shape& output_shape, std::vector<shape> inputs); }
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
private:
shape pack_int8_shape(const shape& s) const;
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
#endif #endif
...@@ -21,28 +21,29 @@ ...@@ -21,28 +21,29 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE. * THE SOFTWARE.
*/ */
#ifndef MIGRAPHX_GUARD_RTGLIB_FWD_CONV_BATCHNORM_REWRITE_HPP #ifndef MIGRAPHX_GUARD_KERNELS_RANGES_HPP
#define MIGRAPHX_GUARD_RTGLIB_FWD_CONV_BATCHNORM_REWRITE_HPP #define MIGRAPHX_GUARD_KERNELS_RANGES_HPP
#include <string> #include <migraphx/kernels/iota_iterator.hpp>
#include <migraphx/instruction_ref.hpp>
#include <migraphx/config.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
struct module; template <class Iterator>
struct iterator_range
/**
* Rewrite batchnorm to a multiply and add.
*/
struct rewrite_batchnorm
{ {
std::string name() const { return "rewrite_batchnorm"; } Iterator start;
void apply(module& m) const; Iterator last;
constexpr Iterator begin() const { return start; }
constexpr Iterator end() const { return last; }
}; };
} // namespace MIGRAPHX_INLINE_NS constexpr iterator_range<iota_iterator> range(diff_int start, diff_int last)
} // namespace migraphx {
return {{start, {}}, {last, {}}};
}
constexpr iterator_range<iota_iterator> range(diff_int last) { return range(0, last); }
#endif } // namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_RANGES_HPP
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/leaky_relu.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/miopen.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape miopen_leaky_relu::compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(2).not_broadcasted();
return inputs.at(1);
}
argument miopen_leaky_relu::compute(context& ctx,
const shape& output_shape,
const std::vector<argument>& args) const
{
float alpha = 1;
float beta = 0;
auto x_desc = make_tensor(args[0].get_shape());
auto y_desc = make_tensor(output_shape);
miopenActivationForward(ctx.get_stream().get_miopen(),
ad.get(),
&alpha,
x_desc.get(),
args[0].implicit(),
&beta,
y_desc.get(),
args[1].implicit());
return args[1];
}
void miopen_leaky_relu::finalize(context&, const shape&, const std::vector<shape>&)
{
ad = make_leaky_relu(op.alpha);
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -37,15 +37,12 @@ ...@@ -37,15 +37,12 @@
#include <migraphx/op/quant_convolution.hpp> #include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/quant_dot.hpp> #include <migraphx/op/quant_dot.hpp>
#include <migraphx/gpu/batch_norm_inference.hpp>
#include <migraphx/gpu/context.hpp> #include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/convolution.hpp> #include <migraphx/gpu/convolution.hpp>
#include <migraphx/gpu/deconvolution.hpp>
#include <migraphx/gpu/device_name.hpp> #include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/gemm.hpp> #include <migraphx/gpu/gemm.hpp>
#include <migraphx/gpu/int8_conv_pack.hpp> #include <migraphx/gpu/int8_conv_pack.hpp>
#include <migraphx/gpu/miopen.hpp> #include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/quant_convolution.hpp>
#include <migraphx/gpu/rocblas.hpp> #include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/compiler.hpp> #include <migraphx/gpu/compiler.hpp>
#include <migraphx/iterator_for.hpp> #include <migraphx/iterator_for.hpp>
...@@ -98,14 +95,11 @@ struct miopen_apply ...@@ -98,14 +95,11 @@ struct miopen_apply
add_extend_op("argmax"); add_extend_op("argmax");
add_extend_op("argmin"); add_extend_op("argmin");
add_extend_op("elu");
add_extend_op("gather"); add_extend_op("gather");
add_extend_op("leaky_relu");
add_extend_op("logsoftmax"); add_extend_op("logsoftmax");
add_extend_op("lrn"); add_extend_op("lrn");
add_extend_op("multinomial"); add_extend_op("multinomial");
add_extend_op("nonzero"); add_extend_op("nonzero");
add_extend_op("pad");
add_extend_op("pooling"); add_extend_op("pooling");
add_extend_op("prefix_scan_sum"); add_extend_op("prefix_scan_sum");
add_extend_op("reverse"); add_extend_op("reverse");
...@@ -115,16 +109,16 @@ struct miopen_apply ...@@ -115,16 +109,16 @@ struct miopen_apply
add_extend_op("scatter_none"); add_extend_op("scatter_none");
add_extend_op("topk"); add_extend_op("topk");
add_batch_norm_inference_op(); add_convolution_op("convolution");
add_convolution_op(); add_convolution_op("convolution");
add_deconvolution_op(); add_convolution_op("deconvolution");
add_convolution_op("quant_convolution");
add_gemm_op<op::dot>("dot"); add_gemm_op<op::dot>("dot");
add_gemm_op<op::quant_dot>("quant_dot"); add_gemm_op<op::quant_dot>("quant_dot");
add_if_op(); add_if_op();
add_loop_op(); add_loop_op();
add_neg_op(); add_neg_op();
add_nms_op(); add_nms_op();
add_quant_convolution_op();
} }
void copy_params() const void copy_params() const
...@@ -232,13 +226,26 @@ struct miopen_apply ...@@ -232,13 +226,26 @@ struct miopen_apply
return mod->insert_instruction(ins, make_op("allocate", {{"shape", to_value(s)}})); return mod->insert_instruction(ins, make_op("allocate", {{"shape", to_value(s)}}));
} }
void add_convolution_op() template <typename Op>
void add_gemm_op(const std::string& name)
{
apply_map.emplace(name, [=](instruction_ref ins) {
std::vector<instruction_ref> refs = ins->inputs();
assert(refs.size() == 2);
auto output = insert_allocation(ins, ins->get_shape());
refs.push_back(output);
return mod->replace_instruction(
ins, rocblas_gemm<Op>{Op{}, 1, 0, int8_x4_format, compute_fp32}, refs);
});
}
void add_convolution_op(const std::string& name)
{ {
apply_map.emplace("convolution", [=](instruction_ref ins) { apply_map.emplace(name, [=](instruction_ref ins) {
auto&& op = any_cast<op::convolution>(ins->get_operator()); // auto&& op = any_cast<op::convolution>(ins->get_operator());
// TODO: Use make_op // TODO: Use make_op
operation conv = miopen_convolution{op}; operation conv = make_op("gpu::" + name, {{"op", ins->get_operator().to_value()}, {"int8_x4_format", int8_x4_format}});
auto output = insert_allocation(ins, ins->get_shape()); auto output = insert_allocation(ins, ins->get_shape());
return mod->replace_instruction(ins, return mod->replace_instruction(ins,
...@@ -249,60 +256,33 @@ struct miopen_apply ...@@ -249,60 +256,33 @@ struct miopen_apply
}); });
} }
void add_deconvolution_op()
{
apply_map.emplace("deconvolution", [=](instruction_ref ins) {
auto&& op = any_cast<op::deconvolution>(ins->get_operator());
auto conv = miopen_deconvolution{op, make_deconv(op)};
auto ws = conv.find(get_context(), ins->get_shape(), to_shapes(ins->inputs()));
auto workspace = insert_allocation(ins, ws);
auto output = insert_allocation(ins, ins->get_shape());
return mod->replace_instruction(
ins, conv, ins->inputs().at(0), ins->inputs().at(1), workspace, output);
});
}
template <typename Op> template <typename Op>
void add_gemm_op(const std::string& name) void add_convolution_op(const std::string& name)
{ {
apply_map.emplace(name, [=](instruction_ref ins) { apply_map.emplace(name, [=](instruction_ref ins) {
std::vector<instruction_ref> refs = ins->inputs(); operation conv =
assert(refs.size() == 2); miopen_convolution<Op>{any_cast<Op>(ins->get_operator()), int8_x4_format};
auto output = insert_allocation(ins, ins->get_shape()); migraphx::context ctx = get_context();
refs.push_back(output); size_t ws_bytes = 0;
return mod->replace_instruction( auto compile_conv_with_format = [&](bool format) {
ins, rocblas_gemm<Op>{Op{}, 1, 0, int8_x4_format, compute_fp32}, refs); conv = miopen_convolution<Op>{any_cast<Op>(ins->get_operator()), format};
}); auto ws = conv.compile(ctx, ins->get_shape(), to_shapes(ins->inputs()));
} ws_bytes = ws.get("workspace", 0);
void add_quant_convolution_op()
{
apply_map.emplace("quant_convolution", [=](instruction_ref ins) {
auto&& op = any_cast<op::quant_convolution>(ins->get_operator());
shape ws;
miopen_quant_convolution conv;
auto compile_quant_conv_with_format = [&](bool format) {
conv = miopen_quant_convolution{op, format, make_conv(op)};
ws = conv.find(get_context(), ins->get_shape(), to_shapes(ins->inputs()));
}; };
try try
{ { // for the regular convolution and deconvolution, this try would always succeed
compile_quant_conv_with_format(int8_x4_format); compile_conv_with_format(int8_x4_format);
} }
catch(migraphx::exception&) catch(migraphx::exception&)
{ {
// In case no solver supports the default format, retry using the other format. // In case no solver supports the default format, retry using the other format.
compile_quant_conv_with_format(not int8_x4_format); compile_conv_with_format(not int8_x4_format);
} }
auto args = ins->inputs(); auto args = ins->inputs();
auto workspace = insert_allocation(ins, ws);
auto output = insert_allocation(ins, ins->get_shape()); auto output = insert_allocation(ins, ins->get_shape());
auto workspace = insert_allocation(ins, shape{shape::int8_type, {ws_bytes}});
return mod->replace_instruction(ins, conv, args[0], args[1], workspace, output); return mod->replace_instruction(ins, conv, args[0], args[1], workspace, output);
}); });
} }
...@@ -337,43 +317,6 @@ struct miopen_apply ...@@ -337,43 +317,6 @@ struct miopen_apply
}); });
} }
void add_batch_norm_inference_op()
{
apply_map.emplace("batch_norm_inference", [=](instruction_ref ins) {
auto&& op = any_cast<op::batch_norm_inference>(ins->get_operator());
auto output = insert_allocation(ins, ins->get_shape());
shape old_shape = ins->inputs().at(1)->get_shape();
auto input = ins->inputs()[0];
auto input_lens = input->get_shape().lens();
std::vector<int64_t> rsp_lens(input_lens.size(), 1);
// for per_activation case, also need to reshape input
if(op.bn_mode == op::batch_norm_inference::per_activation)
{
std::copy(input_lens.begin() + 1, input_lens.end(), rsp_lens.begin() + 1);
}
else
{
rsp_lens[1] = static_cast<int64_t>(old_shape.elements());
}
auto reshape_op = op::reshape{rsp_lens};
std::vector<instruction_ref> reshapes;
std::transform(ins->inputs().begin() + 1,
ins->inputs().end(),
std::back_inserter(reshapes),
[&](auto i) { return mod->insert_instruction(ins, reshape_op, i); });
return mod->replace_instruction(ins,
miopen_batch_norm_inference{op},
input,
reshapes[0],
reshapes[1],
reshapes[2],
reshapes[3],
output);
});
}
// use 0 - input to represent neg // use 0 - input to represent neg
void add_neg_op() void add_neg_op()
{ {
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE. * THE SOFTWARE.
*/ */
#include "migraphx/make_op.hpp"
#include <migraphx/gpu/mlir.hpp> #include <migraphx/gpu/mlir.hpp>
#ifdef MIGRAPHX_MLIR #ifdef MIGRAPHX_MLIR
...@@ -43,8 +44,9 @@ ...@@ -43,8 +44,9 @@
#include <migraphx/gpu/code_object_op.hpp> #include <migraphx/gpu/code_object_op.hpp>
#include <migraphx/gpu/context.hpp> #include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device_name.hpp> #include <migraphx/gpu/device_name.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/gpu/perfdb.hpp> #include <migraphx/gpu/perfdb.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/permutation.hpp>
#include <deque> #include <deque>
#include <variant> #include <variant>
...@@ -370,7 +372,11 @@ struct mlir_program ...@@ -370,7 +372,11 @@ struct mlir_program
mlir_operation_state& add_results(const std::vector<shape>& outputs) mlir_operation_state& add_results(const std::vector<shape>& outputs)
{ {
auto x = prog->make_tensors(outputs); std::vector<shape> reshaped(outputs.size());
std::transform(outputs.begin(), outputs.end(), reshaped.begin(), [](const shape& r) {
return shape{r.type(), r.lens()};
});
auto x = prog->make_tensors(reshaped);
mlirOperationStateAddResults(&op_state, x.size(), x.data()); mlirOperationStateAddResults(&op_state, x.size(), x.data());
return *this; return *this;
} }
...@@ -502,11 +508,12 @@ struct mlir_program ...@@ -502,11 +508,12 @@ struct mlir_program
{ {
pp = pp =
problem_params{ins->get_operator(), to_shapes(ins->inputs()), ins->get_shape()}; problem_params{ins->get_operator(), to_shapes(ins->inputs()), ins->get_shape()};
std::string tuned = get_tune_params(); // check if HW supports xdlops
bool xdlops = contains(get_xdlops_archs(), target_name);
std::string tuned = get_tune_params(xdlops);
if(not tuned.empty()) if(not tuned.empty())
ops.add_attributes({{"perf_config", tuned}}); ops.add_attributes({{"perf_config", tuned}});
// check if HW supports xdlops if(xdlops)
if(contains(get_xdlops_archs(), target_name))
ops.add_attributes({{"xdlopsV2", true}}); ops.add_attributes({{"xdlopsV2", true}});
} }
...@@ -571,7 +578,7 @@ struct mlir_program ...@@ -571,7 +578,7 @@ struct mlir_program
MIGRAPHX_THROW("Failed to compile mlir program"); MIGRAPHX_THROW("Failed to compile mlir program");
} }
std::string get_tune_params() { return get_mlir_perf_for_conv(pp); } std::string get_tune_params(bool xdlops) { return get_mlir_perf_for_conv(pp, xdlops); }
mlir_context ctx; mlir_context ctx;
MlirLocation location; MlirLocation location;
...@@ -589,8 +596,54 @@ std::string dump_mlir(const module& m) ...@@ -589,8 +596,54 @@ std::string dump_mlir(const module& m)
return mlir_print(&mlirOperationPrint, mod_op); return mlir_print(&mlirOperationPrint, mod_op);
} }
code_object_op compile_mlir(const context&, const module& m) void adjust_param_shapes(module& m, const std::vector<instruction_ref>& inputs)
{ {
auto names = m.get_parameter_names();
std::sort(names.begin(), names.end());
for(auto i : range(names.size()))
{
const auto& name = names[i];
const auto& input = inputs[i]->get_shape();
auto param = m.get_parameter(name);
if(input.standard())
continue;
auto lens = input.lens();
auto strides = input.strides();
std::vector<operation> ops;
if(input.transposed())
{
auto perm = find_permutation(input);
auto iperm = invert_permutation(perm);
lens = reorder_dims(lens, iperm);
strides = reorder_dims(strides, iperm);
ops.push_back(make_op("transpose", {{"permutation", perm}}));
}
if(input.broadcasted())
{
std::transform(lens.begin(),
lens.end(),
strides.begin(),
lens.begin(),
[](auto len, auto stride) -> std::size_t {
if(stride == 0)
return 1;
return len;
});
ops.push_back(make_op("multibroadcast", {{"out_lens", input.lens()}}));
}
auto new_param =
std::accumulate(ops.begin(),
ops.end(),
m.add_parameter(name + ".0", shape{input.type(), lens}),
[&](auto x, auto op) { return m.insert_instruction(param, op, x); });
m.replace_instruction(param, new_param);
m.remove_instruction(param);
}
}
code_object_op compile_mlir(const context&, module m, const std::vector<instruction_ref>& inputs)
{
adjust_param_shapes(m, inputs);
const bool trace = enabled(MIGRAPHX_TRACE_MLIR{}); const bool trace = enabled(MIGRAPHX_TRACE_MLIR{});
if(trace) if(trace)
std::cout << m << std::endl; std::cout << m << std::endl;
...@@ -662,13 +715,19 @@ instruction_ref insert_mlir(module& m, ...@@ -662,13 +715,19 @@ instruction_ref insert_mlir(module& m,
std::string dump_mlir(const module&) { return {}; } std::string dump_mlir(const module&) { return {}; }
code_object_op compile_mlir(const context&, const module&) { return {}; }
template <class T> template <class T>
void use(T&) void use(T&)
{ {
} }
// Disabling clang-tidy warning on non-real useage.
// NOLINTBEGIN(performance-unnecessary-value-param)
code_object_op compile_mlir(const context&, module, const std::vector<instruction_ref>&)
{
return {};
}
// NOLINTEND(performance-unnecessary-value-param)
instruction_ref instruction_ref
// cppcheck-suppress funcArgNamesDifferent // cppcheck-suppress funcArgNamesDifferent
insert_mlir(module& m, instruction_ref, code_object_op co, const std::vector<instruction_ref>&) insert_mlir(module& m, instruction_ref, code_object_op co, const std::vector<instruction_ref>&)
......
...@@ -108,16 +108,17 @@ auto query_miopen_db(const std::string& query) ...@@ -108,16 +108,17 @@ auto query_miopen_db(const std::string& query)
} // namespace } // namespace
std::string get_mlir_perf_for_conv(const problem_params& pp) std::string get_mlir_perf_for_conv(const problem_params& pp, bool xdlops)
{ {
std::string query = "select P.* \ std::string solver = xdlops ? "ConvMlirIgemmFwdXdlops" : "ConvMlirIgemmFwd";
std::string query = "select P.* \
from perf_db P, config C \ from perf_db P, config C \
where P.config = C.id AND \ where P.config = C.id AND \
P.solver = 'ConvMlirIgemmFwdXdlops' AND \ P.solver = '${solver}' AND \
${config}"; ${config}";
auto results = auto results = query_miopen_db(
query_miopen_db(interpolate_string(query, {{"config", generate_miopen_config(pp)}})); interpolate_string(query, {{"config", generate_miopen_config(pp)}, {"solver", solver}}));
if(results.empty()) if(results.empty())
return ""; return "";
return results.front().at("params"); return results.front().at("params");
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/quant_convolution.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/generate.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape miopen_quant_convolution::compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(4).standard();
return op.normalize_compute_shape({inputs.at(0), inputs.at(1)});
}
argument miopen_quant_convolution::compute(context& ctx,
const shape& output_shape,
const std::vector<argument>& args) const
{
auto x_desc = make_tensor(args[0].get_shape(), int8_x4_format);
auto w_desc = make_tensor(args[1].get_shape(), int8_x4_format);
auto y_desc = make_tensor(output_shape);
float alpha = 1;
float beta = 0;
auto status = miopenConvolutionForward(ctx.get_stream().get_miopen(),
&alpha,
x_desc.get(),
args[0].implicit(),
w_desc.get(),
args[1].implicit(),
cd.get(),
algo,
&beta,
y_desc.get(),
args[3].implicit(),
args[2].implicit(),
args[2].get_shape().bytes());
if(status != miopenStatusSuccess)
{
MIGRAPHX_THROW("QUANT_CONVOLUTION: run convolution forward failed");
}
return args[3];
}
shape miopen_quant_convolution::find(context& ctx,
const shape& output_shape,
std::vector<shape> inputs)
{
shape workspace_shape{};
auto x_desc = make_tensor(inputs[0], int8_x4_format);
auto w_desc = make_tensor(inputs[1], int8_x4_format);
auto y_desc = make_tensor(output_shape);
std::size_t workspace_size = 0;
miopenConvolutionForwardGetWorkSpaceSize(ctx.get_stream().get_miopen(),
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
&workspace_size);
workspace_shape = shape{shape::int8_type, {workspace_size}};
auto x_shape = inputs[0];
auto w_shape = inputs[1];
if(int8_x4_format)
{
x_shape = pack_int8_shape(x_shape);
w_shape = pack_int8_shape(w_shape);
}
auto x = to_gpu(generate_argument(x_shape));
auto w = to_gpu(generate_argument(w_shape));
auto y = allocate_gpu(output_shape);
auto workspace = allocate_gpu(workspace_shape);
int algo_count = 1;
miopenConvAlgoPerf_t perf;
auto status = miopenFindConvolutionForwardAlgorithm(ctx.get_stream().get_miopen(),
x_desc.get(),
x.implicit(),
w_desc.get(),
w.implicit(),
cd.get(),
y_desc.get(),
y.implicit(),
1,
&algo_count,
&perf,
workspace.implicit(),
workspace_size,
false);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Quant Convolution: find convolution failed");
algo = perf.fwd_algo;
size_t solution_count;
status = miopenConvolutionForwardGetSolutionCount(ctx.get_stream().get_miopen(),
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
&solution_count);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Quant Convolution: get solution count failed");
std::vector<miopenConvSolution_t> solutions(solution_count);
status = miopenConvolutionForwardGetSolution(ctx.get_stream().get_miopen(),
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
solution_count,
&solution_count,
solutions.data());
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Quant Convolution: get solution failed");
solution_id = solutions.front().solution_id;
return shape{shape::int8_type, {perf.memory}};
}
void miopen_quant_convolution::finalize(context& ctx,
const shape& output_shape,
std::vector<shape> inputs)
{
if(cd == nullptr)
cd = make_conv(op);
if(solution_id == 0)
{
// Check that workspace hasn't changed
auto size = inputs.at(2).bytes();
auto ws = find(ctx, output_shape, inputs);
if(ws.bytes() > size)
MIGRAPHX_THROW("MIOpen Quant Convolution: workspace has changed during finalization.");
}
auto x_desc = make_tensor(inputs[0], int8_x4_format);
auto w_desc = make_tensor(inputs[1], int8_x4_format);
auto y_desc = make_tensor(output_shape);
auto status = miopenConvolutionForwardCompileSolution(ctx.get_stream().get_miopen(),
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
solution_id);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen Quant Convolution: compile solution failed");
}
shape miopen_quant_convolution::pack_int8_shape(const shape& s) const
{
if(s.type() != shape::int8_type)
{
MIGRAPHX_THROW("PACK_INT8_SHAPE: only process int8_type");
}
auto lens = s.lens();
auto strides = s.strides();
lens[1] = (lens[1] + 3) / 4 * 4;
strides[0] = strides[1] * lens[1];
return {s.type(), lens, strides};
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -42,7 +42,6 @@ ...@@ -42,7 +42,6 @@
#include <migraphx/propagate_constant.hpp> #include <migraphx/propagate_constant.hpp>
#include <migraphx/register_target.hpp> #include <migraphx/register_target.hpp>
#include <migraphx/replace_allocate.hpp> #include <migraphx/replace_allocate.hpp>
#include <migraphx/rewrite_batchnorm.hpp>
#include <migraphx/rewrite_gelu.hpp> #include <migraphx/rewrite_gelu.hpp>
#include <migraphx/rewrite_pooling.hpp> #include <migraphx/rewrite_pooling.hpp>
#include <migraphx/rewrite_quantization.hpp> #include <migraphx/rewrite_quantization.hpp>
...@@ -113,8 +112,6 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti ...@@ -113,8 +112,6 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
dead_code_elimination{}, dead_code_elimination{},
insert_pad{}, insert_pad{},
dead_code_elimination{}, dead_code_elimination{},
rewrite_batchnorm{},
dead_code_elimination{},
rewrite_rnn{}, rewrite_rnn{},
dead_code_elimination{}, dead_code_elimination{},
inline_module{}, inline_module{},
...@@ -149,12 +146,14 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti ...@@ -149,12 +146,14 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
dead_code_elimination{}, dead_code_elimination{},
adjust_allocation{gpu_allocation_model{}}, adjust_allocation{gpu_allocation_model{}},
dead_code_elimination{}, dead_code_elimination{},
compile_miopen{}, compile_miopen{&gctx},
dead_code_elimination{}, dead_code_elimination{},
fuse_ops{&ctx, options.fast_math}, fuse_ops{&ctx, options.fast_math},
dead_code_elimination{}, dead_code_elimination{},
replace_allocate{gpu_allocation_model{}, options.offload_copy}, replace_allocate{gpu_allocation_model{}, options.offload_copy},
dead_code_elimination{}, dead_code_elimination{},
adjust_allocation{gpu_allocation_model{}},
dead_code_elimination{},
compile_ops{&ctx}, compile_ops{&ctx},
dead_code_elimination{}, dead_code_elimination{},
write_literals{&ctx}, write_literals{&ctx},
......
...@@ -26,15 +26,12 @@ ...@@ -26,15 +26,12 @@
#include <migraphx/instruction.hpp> #include <migraphx/instruction.hpp>
#include <migraphx/dfor.hpp> #include <migraphx/dfor.hpp>
#include <migraphx/op/identity.hpp> #include <migraphx/op/identity.hpp>
#include <migraphx/op/batch_norm_inference.hpp>
#include <migraphx/op/convolution.hpp> #include <migraphx/op/convolution.hpp>
#include <migraphx/op/deconvolution.hpp> #include <migraphx/op/deconvolution.hpp>
#include <migraphx/op/quant_convolution.hpp> #include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/dot.hpp> #include <migraphx/op/dot.hpp>
#include <migraphx/op/quant_dot.hpp> #include <migraphx/op/quant_dot.hpp>
#include <migraphx/op/elu.hpp>
#include <migraphx/op/im2col.hpp> #include <migraphx/op/im2col.hpp>
#include <migraphx/op/leaky_relu.hpp>
#include <migraphx/op/logsoftmax.hpp> #include <migraphx/op/logsoftmax.hpp>
#include <migraphx/op/loop.hpp> #include <migraphx/op/loop.hpp>
#include <migraphx/op/lrn.hpp> #include <migraphx/op/lrn.hpp>
...@@ -75,84 +72,6 @@ typename std::conditional_t<std::is_integral<T>{}, std::make_signed<T>, std::ena ...@@ -75,84 +72,6 @@ typename std::conditional_t<std::is_integral<T>{}, std::make_signed<T>, std::ena
return x; return x;
} }
//
// ref implemenataion of batch norm for inference
//
// inputs are:
// args[0] -> input data buffer
// args[1] -> mini batch mean
// args[2] -> mini batch variance
// args[3] -> gamma
// args[4] -> bias
//
// The equation to compute batch norm for inference is:
//
// output[i] = bias + gamma * (input[i] + mean) / sqrt(variance + epsilon)
//
// the input data format should be nchw
//
struct ref_batch_norm_inference
{
op::batch_norm_inference op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "ref::batch_norm_inference"; }
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{
argument output{output_shape};
double epsilon = op.epsilon;
auto input = args[0];
auto arg_gamma = args[1];
auto arg_bias = args[2];
auto mini_batch_mean = args[3];
auto mini_batch_variance = args[4];
if(op.bn_mode == op::batch_norm_inference::spatial)
{
visit_all(output, input, mini_batch_mean, mini_batch_variance, arg_gamma, arg_bias)(
[&](auto result, auto buffer, auto mean, auto variance, auto gamma, auto bias) {
par_for(output_shape.elements(), [&](auto i) {
auto idx = output_shape.multi(i);
auto c = idx[1];
assert((variance[c] + epsilon) > 0);
result[i] =
gamma[c] * (buffer[i] - mean[c]) / std::sqrt(variance[c] + epsilon) +
bias[c];
});
});
}
if(op.bn_mode == op::batch_norm_inference::per_activation)
{
visit_all(output, input, mini_batch_mean, mini_batch_variance, arg_gamma, arg_bias)(
[&](auto result, auto buffer, auto mean, auto variance, auto gamma, auto bias) {
par_for(output_shape.elements(), [&](auto i) {
auto idx = output_shape.multi(i);
idx[0] = 0;
auto index = output_shape.index(idx);
assert((variance[index] + epsilon) > 0);
result[i] = gamma[index] * (buffer[i] - mean[index]) /
std::sqrt(variance[index] + epsilon) +
bias[index];
});
});
}
return output;
}
};
MIGRAPHX_REGISTER_OP(ref_batch_norm_inference)
struct ref_lrn struct ref_lrn
{ {
op::lrn op; op::lrn op;
...@@ -237,15 +156,16 @@ struct ref_convolution : auto_register_op<ref_convolution<Op>> ...@@ -237,15 +156,16 @@ struct ref_convolution : auto_register_op<ref_convolution<Op>>
argument compute(context&, shape output_shape, std::vector<argument> args) const argument compute(context&, shape output_shape, std::vector<argument> args) const
{ {
std::vector<std::size_t> padding; std::vector<std::size_t> padding;
if(op.use_dynamic_same_auto_pad) if(op.padding_mode != op::padding_mode_t::default_)
{ {
auto input_lens = args[0].get_shape().lens(); auto input_lens = args[0].get_shape().lens();
std::vector<std::size_t> img_lens{input_lens.begin() + 2, input_lens.end()};
auto weights_lens = args[1].get_shape().lens(); auto weights_lens = args[1].get_shape().lens();
std::vector<std::size_t> k_lens{weights_lens.begin() + 2, weights_lens.end()}; padding =
padding = calc_dyn_auto_pad(img_lens, k_lens, op.stride, op.dilation); op.padding_mode == op::same_upper
output_shape = ? calc_dyn_auto_pad(input_lens, weights_lens, op.stride, op.dilation, true)
compute_padded_shape({args.at(0).get_shape(), args.at(1).get_shape()}, padding); : calc_dyn_auto_pad(input_lens, weights_lens, op.stride, op.dilation, false);
output_shape = compute_padded_shape(
args[0].get_shape(), args[1].get_shape(), padding, op.stride, op.dilation);
} }
else else
{ {
...@@ -313,34 +233,6 @@ struct ref_convolution : auto_register_op<ref_convolution<Op>> ...@@ -313,34 +233,6 @@ struct ref_convolution : auto_register_op<ref_convolution<Op>>
}); });
return result; return result;
} }
private:
/*!
* Used for dynamic auto padding since padding needs to be computed at evaulation time.
* \param inputs two fixed shape inputs [input_tensor, weights]
* \param padding from auto_pad calculation
*/
shape compute_padded_shape(const std::vector<shape>& inputs,
const std::vector<std::size_t>& padding) const
{
const shape& input = inputs.at(0);
const shape& weights = inputs.at(1);
const size_t num_spatial_dims = input.lens().size() - 2;
std::vector<size_t> output_lens{input.lens()[0], weights.lens()[0]};
// calculate the output shape of the convolution: ((W - K + 2P) / S) + 1
for(size_t i = 0; i < num_spatial_dims; i++)
{
auto padding_factor = padding[i] + padding[i + num_spatial_dims];
output_lens.push_back(std::size_t(std::max<std::ptrdiff_t>(
1,
(input.lens()[i + 2] - (1 + op.dilation[i] * (weights.lens()[i + 2] - 1)) +
padding_factor) /
op.stride[i] +
1)));
}
return inputs[0].with_lens(output_lens);
}
}; };
struct ref_im2col struct ref_im2col
...@@ -537,65 +429,6 @@ struct ref_quant_gemm ...@@ -537,65 +429,6 @@ struct ref_quant_gemm
}; };
MIGRAPHX_REGISTER_OP(ref_gemm) MIGRAPHX_REGISTER_OP(ref_gemm)
struct leaky_relu_op
{
op::leaky_relu op;
std::string name() const { return "ref::leaky_relu"; }
auto fcn() const
{
auto a = op.alpha;
return [a](auto x) { return x > 0 ? x : x * a; };
}
};
struct elu_op
{
op::elu op;
std::string name() const { return "ref::elu"; }
auto fcn() const
{
auto a = op.alpha;
return [a](auto x) { return x > 0 ? x : a * std::expm1(x); };
}
};
template <typename Op>
struct ref_unary : auto_register_op<ref_unary<Op>>
{
ref_unary() = default;
template <class T>
ref_unary(T pop) : op(Op{std::move(pop)})
{
}
Op op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op.op, f);
}
std::string name() const { return op.name(); }
shape compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(1);
const auto& s = inputs.at(0);
return {s.type(), s.lens()};
}
argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
visit_all(result, args[0])([&](auto output, auto input) {
assert(input.get_shape().standard());
std::transform(input.begin(), input.end(), output.begin(), op.fcn());
});
return result;
}
};
template <class Op> template <class Op>
struct ref_softmax : auto_register_op<ref_softmax<Op>> struct ref_softmax : auto_register_op<ref_softmax<Op>>
{ {
...@@ -731,16 +564,12 @@ struct ref_apply ...@@ -731,16 +564,12 @@ struct ref_apply
void init() void init()
{ {
apply_map["batch_norm_inference"] =
extend_op<ref_batch_norm_inference, op::batch_norm_inference>();
apply_map["convolution"] = extend_op<ref_convolution<op::convolution>, op::convolution>(); apply_map["convolution"] = extend_op<ref_convolution<op::convolution>, op::convolution>();
apply_map["dot"] = extend_op<ref_gemm, op::dot>(); apply_map["dot"] = extend_op<ref_gemm, op::dot>();
apply_map["quant_dot"] = extend_op<ref_quant_gemm, op::quant_dot>(); apply_map["quant_dot"] = extend_op<ref_quant_gemm, op::quant_dot>();
apply_map["quant_convolution"] = apply_map["quant_convolution"] =
extend_op<ref_convolution<op::quant_convolution>, op::quant_convolution>(); extend_op<ref_convolution<op::quant_convolution>, op::quant_convolution>();
apply_map["elu"] = extend_op<ref_unary<elu_op>, op::elu>();
apply_map["im2col"] = extend_op<ref_im2col, op::im2col>(); apply_map["im2col"] = extend_op<ref_im2col, op::im2col>();
apply_map["leaky_relu"] = extend_op<ref_unary<leaky_relu_op>, op::leaky_relu>();
apply_map["logsoftmax"] = extend_op<ref_softmax<op::logsoftmax>, op::logsoftmax>(); apply_map["logsoftmax"] = extend_op<ref_softmax<op::logsoftmax>, op::logsoftmax>();
apply_map["lrn"] = extend_op<ref_lrn, op::lrn>(); apply_map["lrn"] = extend_op<ref_lrn, op::lrn>();
apply_map["pad"] = extend_op<ref_pad, op::pad>(); apply_map["pad"] = extend_op<ref_pad, op::pad>();
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
*/ */
#include <migraphx/tf/op_parser.hpp> #include <migraphx/tf/op_parser.hpp>
#include <migraphx/tf/tf_parser.hpp> #include <migraphx/tf/tf_parser.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/ranges.hpp> #include <migraphx/ranges.hpp>
#include <migraphx/make_op.hpp> #include <migraphx/make_op.hpp>
...@@ -38,16 +39,37 @@ struct parse_batchnorm : op_parser<parse_batchnorm> ...@@ -38,16 +39,37 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
instruction_ref parse(const op_desc& /*opd*/, instruction_ref parse(const op_desc& /*opd*/,
const tf_parser& /*parser*/, const tf_parser& /*parser*/,
tf_parser::node_info info, tf_parser::node_info info,
const std::vector<instruction_ref>& args) const std::vector<instruction_ref> args) const
{ {
float epsilon = 1e-5f; // different default epsilon than from ONNX
float momentum = 0.9f; float epsilon = 1e-4f;
if(contains(info.attributes, "epsilon")) if(contains(info.attributes, "epsilon"))
{ {
epsilon = info.attributes.at("epsilon").f(); epsilon = info.attributes.at("epsilon").f();
} }
auto op = make_op("batch_norm_inference", {{"epsilon", epsilon}, {"momentum", momentum}});
return info.add_instruction(op, args); auto x_lens = args[0]->get_shape().lens();
auto x_type = args[0]->get_shape().type();
// unsqueeze tensors of shape (C) to broadcast correctly
auto rt = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {0.5}});
auto eps = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {epsilon}});
auto scale_unsqueeze =
info.add_instruction(migraphx::make_op("unsqueeze", {{"axes", {1, 2}}}), args[1]);
auto bias_unsqueeze =
info.add_instruction(migraphx::make_op("unsqueeze", {{"axes", {1, 2}}}), args[2]);
auto mean_unsqueeze =
info.add_instruction(migraphx::make_op("unsqueeze", {{"axes", {1, 2}}}), args[3]);
auto var_unsqueeze =
info.add_instruction(migraphx::make_op("unsqueeze", {{"axes", {1, 2}}}), args[4]);
auto numer = info.add_broadcastable_binary_op("sub", args[0], mean_unsqueeze);
auto var_eps = info.add_broadcastable_binary_op("add", var_unsqueeze, eps);
auto denom = info.add_broadcastable_binary_op("pow", var_eps, rt);
auto div0 = info.add_broadcastable_binary_op("div", numer, denom);
auto r0 = info.add_broadcastable_binary_op("mul", div0, scale_unsqueeze);
return info.add_broadcastable_binary_op("add", r0, bias_unsqueeze);
} }
}; };
......
...@@ -75,7 +75,6 @@ struct parse_conv : op_parser<parse_conv> ...@@ -75,7 +75,6 @@ struct parse_conv : op_parser<parse_conv>
const std::string& pad_mode = info.attributes.at("padding").s(); const std::string& pad_mode = info.attributes.at("padding").s();
if(pad_mode.find("SAME") != std::string::npos) if(pad_mode.find("SAME") != std::string::npos)
{ {
op.padding_mode = op::padding_mode_t::same;
std::vector<size_t> weight_dims = weights->get_shape().lens(); std::vector<size_t> weight_dims = weights->get_shape().lens();
size_t weight_h = weight_dims[2]; size_t weight_h = weight_dims[2];
size_t weight_w = weight_dims[3]; size_t weight_w = weight_dims[3];
...@@ -87,10 +86,6 @@ struct parse_conv : op_parser<parse_conv> ...@@ -87,10 +86,6 @@ struct parse_conv : op_parser<parse_conv>
op.padding = std::vector<size_t>(pads.begin(), pads.end()); op.padding = std::vector<size_t>(pads.begin(), pads.end());
} }
else if(pad_mode.find("VALID") != std::string::npos)
{
op.padding_mode = op::padding_mode_t::valid;
}
else if(pad_mode.find("EXPLICIT") != std::string::npos) else if(pad_mode.find("EXPLICIT") != std::string::npos)
{ {
std::vector<size_t> padding; std::vector<size_t> padding;
......
...@@ -80,7 +80,6 @@ struct parse_depthwiseconv : op_parser<parse_depthwiseconv> ...@@ -80,7 +80,6 @@ struct parse_depthwiseconv : op_parser<parse_depthwiseconv>
if(pad_mode.find("SAME") != std::string::npos) if(pad_mode.find("SAME") != std::string::npos)
{ {
op.padding_mode = op::padding_mode_t::same;
std::vector<size_t> weight_dims = weights->get_shape().lens(); std::vector<size_t> weight_dims = weights->get_shape().lens();
size_t weight_h = weight_dims[2]; size_t weight_h = weight_dims[2];
size_t weight_w = weight_dims[3]; size_t weight_w = weight_dims[3];
...@@ -101,10 +100,6 @@ struct parse_depthwiseconv : op_parser<parse_depthwiseconv> ...@@ -101,10 +100,6 @@ struct parse_depthwiseconv : op_parser<parse_depthwiseconv>
op.padding[1] = pads[1]; op.padding[1] = pads[1];
} }
} }
else if(pad_mode.find("VALID") != std::string::npos)
{
op.padding_mode = op::padding_mode_t::valid;
}
} }
std::vector<int64_t> new_weights_shape; std::vector<int64_t> new_weights_shape;
......
...@@ -55,7 +55,8 @@ struct half_copy_host final : migraphx::experimental_custom_op_base ...@@ -55,7 +55,8 @@ struct half_copy_host final : migraphx::experimental_custom_op_base
hipMemcpyHostToHost, hipMemcpyHostToHost,
ctx.get_queue<hipStream_t>())); ctx.get_queue<hipStream_t>()));
MIGRAPHX_HIP_ASSERT(hipDeviceSynchronize()); MIGRAPHX_HIP_ASSERT(hipDeviceSynchronize());
MIGRAPHX_HIP_ASSERT(hipMemset(output_buffer_ptr, 0, copy_bytes)); MIGRAPHX_HIP_ASSERT(
hipMemsetAsync(output_buffer_ptr, 0, copy_bytes, ctx.get_queue<hipStream_t>()));
MIGRAPHX_HIP_ASSERT(hipDeviceSynchronize()); MIGRAPHX_HIP_ASSERT(hipDeviceSynchronize());
return inputs[1]; return inputs[1];
} }
...@@ -97,7 +98,8 @@ struct half_copy_device final : migraphx::experimental_custom_op_base ...@@ -97,7 +98,8 @@ struct half_copy_device final : migraphx::experimental_custom_op_base
hipMemcpyDeviceToDevice, hipMemcpyDeviceToDevice,
ctx.get_queue<hipStream_t>())); ctx.get_queue<hipStream_t>()));
MIGRAPHX_HIP_ASSERT(hipDeviceSynchronize()); MIGRAPHX_HIP_ASSERT(hipDeviceSynchronize());
MIGRAPHX_HIP_ASSERT(hipMemset(output_buffer_ptr, 0, copy_bytes)); MIGRAPHX_HIP_ASSERT(
hipMemsetAsync(output_buffer_ptr, 0, copy_bytes, ctx.get_queue<hipStream_t>()));
MIGRAPHX_HIP_ASSERT(hipDeviceSynchronize()); MIGRAPHX_HIP_ASSERT(hipDeviceSynchronize());
return inputs[1]; return inputs[1];
} }
...@@ -124,7 +126,7 @@ struct half_copy_device_same_buffer final : migraphx::experimental_custom_op_bas ...@@ -124,7 +126,7 @@ struct half_copy_device_same_buffer final : migraphx::experimental_custom_op_bas
virtual bool runs_on_offload_target() const override { return true; } virtual bool runs_on_offload_target() const override { return true; }
virtual migraphx::argument virtual migraphx::argument
compute(migraphx::context, migraphx::shape, migraphx::arguments inputs) const override compute(migraphx::context ctx, migraphx::shape, migraphx::arguments inputs) const override
{ {
// This custom op simply sets first half size_bytes of the input 0, and rest of the half // This custom op simply sets first half size_bytes of the input 0, and rest of the half
// bytes are copied. for this custom_op, it does its computation on the "device". Therefore, // bytes are copied. for this custom_op, it does its computation on the "device". Therefore,
...@@ -133,7 +135,8 @@ struct half_copy_device_same_buffer final : migraphx::experimental_custom_op_bas ...@@ -133,7 +135,8 @@ struct half_copy_device_same_buffer final : migraphx::experimental_custom_op_bas
auto input_bytes = inputs[0].get_shape().bytes(); auto input_bytes = inputs[0].get_shape().bytes();
auto copy_bytes = input_bytes / 2; auto copy_bytes = input_bytes / 2;
MIGRAPHX_HIP_ASSERT(hipSetDevice(0)); MIGRAPHX_HIP_ASSERT(hipSetDevice(0));
MIGRAPHX_HIP_ASSERT(hipMemset(buffer_ptr, 0, copy_bytes)); MIGRAPHX_HIP_ASSERT(
hipMemsetAsync(buffer_ptr, 0, copy_bytes, ctx.get_queue<hipStream_t>()));
MIGRAPHX_HIP_ASSERT(hipDeviceSynchronize()); MIGRAPHX_HIP_ASSERT(hipDeviceSynchronize());
return inputs[0]; return inputs[0];
} }
......
...@@ -48,4 +48,4 @@ void gpu_literal_test() ...@@ -48,4 +48,4 @@ void gpu_literal_test()
} }
} }
int main() { gpu_literal_test(); } int main() { gpu_literal_test(); } // NOLINT (bugprone-exception-escape)
...@@ -84,7 +84,7 @@ migraphx::program create_program_from_mlir(const migraphx::module& mmlir) ...@@ -84,7 +84,7 @@ migraphx::program create_program_from_mlir(const migraphx::module& mmlir)
inputs.push_back(mm->add_parameter("output", mmlir.get_output_shapes().front())); inputs.push_back(mm->add_parameter("output", mmlir.get_output_shapes().front()));
migraphx::gpu::context ctx; migraphx::gpu::context ctx;
migraphx::gpu::insert_mlir(*mm, mm->end(), compile_mlir(ctx, mmlir), inputs); migraphx::gpu::insert_mlir(*mm, mm->end(), compile_mlir(ctx, mmlir, inputs), inputs);
return p; return p;
} }
...@@ -141,7 +141,7 @@ TEST_CASE(conv) ...@@ -141,7 +141,7 @@ TEST_CASE(conv)
const std::string mlir_output = R"__migraphx__( const std::string mlir_output = R"__migraphx__(
module { module {
func.func @main(%arg0: tensor<2x8x3x3xf32>, %arg1: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {kernel = "mixr"} { func.func @main(%arg0: tensor<2x8x3x3xf32>, %arg1: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {kernel = "mixr"} {
%0 = migraphx.convolution(%arg1, %arg0) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1], use_dynamic_same_auto_pad = 0 : i64} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32> %0 = migraphx.convolution(%arg1, %arg0) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
return %0 : tensor<1x2x2x2xf32> return %0 : tensor<1x2x2x2xf32>
} }
} }
...@@ -164,7 +164,7 @@ TEST_CASE(conv_add_relu) ...@@ -164,7 +164,7 @@ TEST_CASE(conv_add_relu)
const std::string mlir_output = R"__migraphx__( const std::string mlir_output = R"__migraphx__(
module { module {
func.func @main(%arg0: tensor<1x2x2x2xf32>, %arg1: tensor<2x8x3x3xf32>, %arg2: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {kernel = "mixr"} { func.func @main(%arg0: tensor<1x2x2x2xf32>, %arg1: tensor<2x8x3x3xf32>, %arg2: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {kernel = "mixr"} {
%0 = migraphx.convolution(%arg2, %arg1) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1], use_dynamic_same_auto_pad = 0 : i64} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32> %0 = migraphx.convolution(%arg2, %arg1) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
%1 = migraphx.add(%0, %arg0) : (tensor<1x2x2x2xf32>, tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32> %1 = migraphx.add(%0, %arg0) : (tensor<1x2x2x2xf32>, tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
%2 = migraphx.relu(%1) : (tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32> %2 = migraphx.relu(%1) : (tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
return %2 : tensor<1x2x2x2xf32> return %2 : tensor<1x2x2x2xf32>
......
...@@ -30,7 +30,6 @@ ...@@ -30,7 +30,6 @@
#include <migraphx/ref/target.hpp> #include <migraphx/ref/target.hpp>
#include <migraphx/gpu/target.hpp> #include <migraphx/gpu/target.hpp>
#include <migraphx/verify.hpp> #include <migraphx/verify.hpp>
#include <migraphx/quantization.hpp>
#include <migraphx/dead_code_elimination.hpp> #include <migraphx/dead_code_elimination.hpp>
#include <migraphx/propagate_constant.hpp> #include <migraphx/propagate_constant.hpp>
#include <migraphx/pass_manager.hpp> #include <migraphx/pass_manager.hpp>
......
batch_norm_invalid_rank_test: batch_norm_rank_2_test:
7 J
x x
scale scale
bias bias
mean mean
variancey"BatchNormalizationbatch_norm_invalid_rank_testZ variancey"BatchNormalization*
epsilon75batch_norm_rank_2_testZ
x x
 
 
Z Z
scale scale
 
Z Z
bias bias
 
Z Z
mean mean
 
Z Z
variance variance
 
b b
y y
 
 
B B
\ No newline at end of file \ No newline at end of file
...@@ -331,6 +331,24 @@ def batch_norm_flat_test(): ...@@ -331,6 +331,24 @@ def batch_norm_flat_test():
return ([node], [x, scale, bias, mean, var], [out]) return ([node], [x, scale, bias, mean, var], [out])
@onnx_test
def batch_norm_rank_2_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [2, 5])
scale = helper.make_tensor_value_info('scale', TensorProto.FLOAT, [5])
bias = helper.make_tensor_value_info('bias', TensorProto.FLOAT, [5])
mean = helper.make_tensor_value_info('mean', TensorProto.FLOAT, [5])
var = helper.make_tensor_value_info('variance', TensorProto.FLOAT, [5])
out = helper.make_tensor_value_info('y', TensorProto.FLOAT, [2, 5])
node = onnx.helper.make_node(
'BatchNormalization',
inputs=['x', 'scale', 'bias', 'mean', 'variance'],
outputs=['y'],
epsilon=1e-6)
return ([node], [x, scale, bias, mean, var], [out])
@onnx_test @onnx_test
def batch_norm_1d_test(): def batch_norm_1d_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT16, [2, 3, 4]) x = helper.make_tensor_value_info('x', TensorProto.FLOAT16, [2, 3, 4])
...@@ -385,23 +403,6 @@ def batch_norm_3d_test(): ...@@ -385,23 +403,6 @@ def batch_norm_3d_test():
return ([node], [x, scale, bias, mean, var], [out]) return ([node], [x, scale, bias, mean, var], [out])
@onnx_test
def batch_norm_invalid_rank_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [8, 8])
scale = helper.make_tensor_value_info('scale', TensorProto.FLOAT, [8])
bias = helper.make_tensor_value_info('bias', TensorProto.FLOAT, [8])
mean = helper.make_tensor_value_info('mean', TensorProto.FLOAT, [8])
var = helper.make_tensor_value_info('variance', TensorProto.FLOAT, [8])
out = helper.make_tensor_value_info('y', TensorProto.FLOAT, [8, 8])
node = onnx.helper.make_node(
'BatchNormalization',
inputs=['x', 'scale', 'bias', 'mean', 'variance'],
outputs=['y'])
return ([node], [x, scale, bias, mean, var], [out])
@onnx_test @onnx_test
def batch_norm_invalid_bias_rank_test(): def batch_norm_invalid_bias_rank_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [2, 3, 4, 4]) x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [2, 3, 4, 4])
...@@ -3647,6 +3648,16 @@ def neg_test(): ...@@ -3647,6 +3648,16 @@ def neg_test():
return ([node], [x], [y]) return ([node], [x], [y])
@onnx_test
def neg_dynamic_test():
x = helper.make_tensor_value_info('0', TensorProto.INT64, [None, 3])
y = helper.make_tensor_value_info('1', TensorProto.INT64, [None, 3])
node = onnx.helper.make_node('Neg', inputs=['0'], outputs=['1'])
return ([node], [x], [y])
@onnx_test @onnx_test
def nms_test(): def nms_test():
b = helper.make_tensor_value_info('boxes', TensorProto.FLOAT, [1, 6, 4]) b = helper.make_tensor_value_info('boxes', TensorProto.FLOAT, [1, 6, 4])
...@@ -5280,6 +5291,20 @@ def sinh_test(): ...@@ -5280,6 +5291,20 @@ def sinh_test():
return ([node], [x], [y]) return ([node], [x], [y])
@onnx_test
def sinh_dynamic_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [None])
y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [None])
node = onnx.helper.make_node(
'Sinh',
inputs=['x'],
outputs=['y'],
)
return ([node], [x], [y])
@onnx_test @onnx_test
def size_float_test(): def size_float_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [2, 3, 4]) x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [2, 3, 4])
......
...@@ -42,7 +42,6 @@ ...@@ -42,7 +42,6 @@
#include <migraphx/op/lrn.hpp> #include <migraphx/op/lrn.hpp>
#include <migraphx/op/reshape.hpp> #include <migraphx/op/reshape.hpp>
#include <migraphx/op/unknown.hpp> #include <migraphx/op/unknown.hpp>
#include <random>
#include <migraphx/serialize.hpp> #include <migraphx/serialize.hpp>
...@@ -394,6 +393,31 @@ TEST_CASE(batch_norm_flat_test) ...@@ -394,6 +393,31 @@ TEST_CASE(batch_norm_flat_test)
EXPECT(p == prog); EXPECT(p == prog);
} }
TEST_CASE(batch_norm_rank_2_test)
{
migraphx::program p;
auto* mm = p.get_main_module();
auto x = mm->add_parameter("x", {migraphx::shape::float_type, {2, 5}});
auto scale = mm->add_parameter("scale", {migraphx::shape::float_type, {5}});
auto bias = mm->add_parameter("bias", {migraphx::shape::float_type, {5}});
auto mean = mm->add_parameter("mean", {migraphx::shape::float_type, {5}});
auto var = mm->add_parameter("variance", {migraphx::shape::float_type, {5}});
auto rt = mm->add_literal(migraphx::literal{migraphx::shape::float_type, {0.5}});
auto eps = mm->add_literal(migraphx::literal{migraphx::shape::float_type, {1e-6f}});
auto numer = add_common_op(*mm, migraphx::make_op("sub"), {x, mean});
auto var_eps = add_common_op(*mm, migraphx::make_op("add"), {var, eps});
auto denom = add_common_op(*mm, migraphx::make_op("pow"), {var_eps, rt});
auto div0 = add_common_op(*mm, migraphx::make_op("div"), {numer, denom});
auto r0 = add_common_op(*mm, migraphx::make_op("mul"), {div0, scale});
add_common_op(*mm, migraphx::make_op("add"), {r0, bias});
auto prog = optimize_onnx("batch_norm_rank_2_test.onnx");
EXPECT(p == prog);
}
TEST_CASE(batch_norm_1d_test) TEST_CASE(batch_norm_1d_test)
{ {
migraphx::program p; migraphx::program p;
...@@ -856,8 +880,7 @@ TEST_CASE(conv_autopad_same_test) ...@@ -856,8 +880,7 @@ TEST_CASE(conv_autopad_same_test)
auto l0 = mm->add_parameter("0", {migraphx::shape::float_type, {1, 3, 32, 32}}); auto l0 = mm->add_parameter("0", {migraphx::shape::float_type, {1, 3, 32, 32}});
auto l1 = mm->add_parameter("1", {migraphx::shape::float_type, {1, 3, 3, 3}}); auto l1 = mm->add_parameter("1", {migraphx::shape::float_type, {1, 3, 3, 3}});
migraphx::op::convolution op; migraphx::op::convolution op;
op.padding = {1, 1, 1, 1}; op.padding = {1, 1, 1, 1};
op.padding_mode = migraphx::op::padding_mode_t::same;
mm->add_instruction(op, l0, l1); mm->add_instruction(op, l0, l1);
auto prog = optimize_onnx("conv_autopad_same_test.onnx"); auto prog = optimize_onnx("conv_autopad_same_test.onnx");
...@@ -1034,15 +1057,11 @@ TEST_CASE(conv_dynamic_batch_same_upper) ...@@ -1034,15 +1057,11 @@ TEST_CASE(conv_dynamic_batch_same_upper)
auto l0 = mm->add_parameter( auto l0 = mm->add_parameter(
"0", {migraphx::shape::float_type, {{1, 10, 0}, {3, 3, 0}, {5, 5, 0}, {5, 5, 0}}}); "0", {migraphx::shape::float_type, {{1, 10, 0}, {3, 3, 0}, {5, 5, 0}, {5, 5, 0}}});
auto l1 = mm->add_parameter("1", {migraphx::shape::float_type, {1, 3, 3, 3}}); auto l1 = mm->add_parameter("1", {migraphx::shape::float_type, {1, 3, 3, 3}});
auto c0 = auto c0 = mm->add_instruction(
mm->add_instruction(migraphx::make_op("convolution", migraphx::make_op("convolution",
{{"padding", {1, 1, 1, 1}}, {{"padding", {1, 1, 1, 1}}, {"stride", {1, 1}}, {"dilation", {1, 1}}}),
{"stride", {1, 1}}, l0,
{"dilation", {1, 1}}, l1);
{"padding_mode", migraphx::op::padding_mode_t::same},
{"use_dynamic_same_auto_pad", false}}),
l0,
l1);
mm->add_return({c0}); mm->add_return({c0});
migraphx::onnx_options options; migraphx::onnx_options options;
...@@ -1064,8 +1083,7 @@ TEST_CASE(conv_dynamic_img_same_upper) ...@@ -1064,8 +1083,7 @@ TEST_CASE(conv_dynamic_img_same_upper)
{{"padding", {0, 0}}, {{"padding", {0, 0}},
{"stride", {1, 1}}, {"stride", {1, 1}},
{"dilation", {1, 1}}, {"dilation", {1, 1}},
{"padding_mode", migraphx::op::padding_mode_t::same_upper}, {"padding_mode", migraphx::op::padding_mode_t::same_upper}}),
{"use_dynamic_same_auto_pad", true}}),
l0, l0,
l1); l1);
mm->add_return({c0}); mm->add_return({c0});
...@@ -1089,8 +1107,7 @@ TEST_CASE(conv_dynamic_kernel_same_lower) ...@@ -1089,8 +1107,7 @@ TEST_CASE(conv_dynamic_kernel_same_lower)
{{"padding", {0, 0}}, {{"padding", {0, 0}},
{"stride", {1, 1}}, {"stride", {1, 1}},
{"dilation", {1, 1}}, {"dilation", {1, 1}},
{"padding_mode", migraphx::op::padding_mode_t::same_lower}, {"padding_mode", migraphx::op::padding_mode_t::same_lower}}),
{"use_dynamic_same_auto_pad", true}}),
l0, l0,
l1); l1);
mm->add_return({c0}); mm->add_return({c0});
...@@ -3483,6 +3500,21 @@ TEST_CASE(neg_test) ...@@ -3483,6 +3500,21 @@ TEST_CASE(neg_test)
EXPECT(p == prog); EXPECT(p == prog);
} }
TEST_CASE(neg_dynamic_test)
{
migraphx::program p;
auto* mm = p.get_main_module();
migraphx::shape s{migraphx::shape::int64_type, {{1, 10, 0}, {3, 3, 0}}};
auto input = mm->add_parameter("0", s);
auto ret = mm->add_instruction(migraphx::make_op("neg"), input);
mm->add_return({ret});
migraphx::onnx_options options;
options.default_dyn_dim_value = {1, 10, 0};
auto prog = migraphx::parse_onnx("neg_dynamic_test.onnx", options);
EXPECT(p == prog);
}
TEST_CASE(nms_test) TEST_CASE(nms_test)
{ {
migraphx::program p; migraphx::program p;
...@@ -5206,6 +5238,24 @@ TEST_CASE(sinh_test) ...@@ -5206,6 +5238,24 @@ TEST_CASE(sinh_test)
EXPECT(p == prog); EXPECT(p == prog);
} }
TEST_CASE(sinh_dynamic_test)
{
migraphx::program p;
auto* mm = p.get_main_module();
migraphx::shape::dynamic_dimension dd{1, 10, 0};
std::vector<migraphx::shape::dynamic_dimension> dyn_dims;
dyn_dims.push_back(dd);
auto input = mm->add_parameter("x", migraphx::shape{migraphx::shape::float_type, dyn_dims});
auto ret = mm->add_instruction(migraphx::make_op("sinh"), input);
mm->add_return({ret});
migraphx::onnx_options options;
options.default_dyn_dim_value = dd;
auto prog = parse_onnx("sinh_dynamic_test.onnx", options);
EXPECT(p == prog);
}
TEST_CASE(size_float_test) TEST_CASE(size_float_test)
{ {
migraphx::program p; migraphx::program p;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment