Commit b076d0f4 authored by Paul's avatar Paul
Browse files

Merge branch 'jit-layernorm-merge' into bert-opt-layernorm

parents 03c6967e d705e483
......@@ -90,7 +90,6 @@ add_library(migraphx
shape.cpp
simplify_algebra.cpp
simplify_reshapes.cpp
target_assignments.cpp
tmp_dir.cpp
value.cpp
verify_args.cpp
......
......@@ -35,17 +35,13 @@ struct onnx_options
{
/// Old way to set default fixed dimension size
std::size_t default_dim_value = 0;
/*!
* Default dynamic dimension size (if both default_dim_value and default_dyn_dim_value
* set parser throws)
*/
/// Default dynamic dimension size (if both default_dim_value and default_dyn_dim_value set
/// parser throws)
shape::dynamic_dimension default_dyn_dim_value = {1, 1, 0};
/// Explicitly specify the dims of an input
std::unordered_map<std::string, std::vector<std::size_t>> map_input_dims = {};
/*!
* Explicitly specify dynamic dims of an input (if both map_input_dims and
* map_dyn_input_dims set parser throws)
*/
/// Explicitly specify dynamic dims of an input (if both map_input_dims and map_dyn_input_dims
/// set parser throws)
std::unordered_map<std::string, std::vector<shape::dynamic_dimension>> map_dyn_input_dims = {};
/// Continue parsing onnx file if an unknown operator is found
bool skip_unknown_operators = false;
......@@ -53,6 +49,8 @@ struct onnx_options
bool print_program_on_error = false;
/// Max iter num for the loop operator
int64_t max_loop_iterations = 10;
/// Use dynamic output for operators when available
bool use_dyn_output = false;
};
/// Create a program from an onnx file
......
......@@ -45,7 +45,15 @@ struct convert : unary<convert>
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(1);
return {target_type, inputs.at(0).lens(), inputs.at(0).strides()};
auto input = inputs.at(0);
if(input.dynamic())
{
return {target_type, input.dyn_dims()};
}
else
{
return {target_type, input.lens(), input.strides()};
}
}
std::string point_op() const
......
......@@ -45,11 +45,13 @@ namespace op {
struct nonmaxsuppression
{
bool center_point_box = false;
bool use_dyn_output = false;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.center_point_box, "center_point_box"));
return pack(f(self.center_point_box, "center_point_box"),
f(self.use_dyn_output, "use_dyn_output"));
}
std::string name() const { return "nonmaxsuppression"; }
......@@ -57,27 +59,81 @@ struct nonmaxsuppression
shape compute_shape(std::vector<shape> inputs) const
{
// requires at least 2 inputs
check_shapes{{inputs.at(0), inputs.at(1)}, *this}.only_dims(3);
auto lens = inputs.front().lens();
check_shapes{{inputs.at(0), inputs.at(1)}, *this, true}.only_dims(3).same_ndims();
auto boxes_max_lens = inputs.at(0).max_lens();
// num batches * num boxes
const auto max_num_boxes = boxes_max_lens.at(0) * boxes_max_lens.at(1);
// check input shape
if(lens[1] != inputs.at(1).lens()[2])
auto fixed_shape_error_check = [&]() {
auto lens = inputs.front().lens();
if(lens[1] != inputs.at(1).lens()[2])
{
MIGRAPHX_THROW(
"NonMaxSuppression: spatial dimension mismatch between boxes and scores input");
}
if(lens[0] != inputs.at(1).lens()[0])
{
MIGRAPHX_THROW(
"NonMaxSuppression: number of batches mismatch between boxes and scores input");
}
};
if(use_dyn_output)
{
MIGRAPHX_THROW(
"NonMaxSuppression: spatial dimension mismatch between boxes and scores input");
if(inputs.at(0).dynamic())
{
// both boxes and scores should be dynamic
// check dynamic dimensions are consistent
const auto boxes_dims = inputs.at(0).dyn_dims();
const auto scores_dims = inputs.at(1).dyn_dims();
if(boxes_dims.at(1) != scores_dims.at(2))
{
MIGRAPHX_THROW("NonMaxSuppression: dynamic spatial dimension mismatch between "
"boxes and scores input");
}
if(boxes_dims.at(0) != scores_dims.at(0))
{
MIGRAPHX_THROW("NonMaxSuppression: dynamic number of batches mismatch between "
"boxes and scores input");
}
}
else if(inputs.at(1).dynamic())
{
// scores has dynamic shape, boxes fixed shape
// check that it is only a dynamic number of classes
const auto scores_dims = inputs.at(1).dyn_dims();
const auto boxes_lens = inputs.at(0).lens();
if(not scores_dims.at(0).is_fixed() or scores_dims.at(0).max != boxes_lens.at(0))
{
MIGRAPHX_THROW("NonMaxSuppression: scores dynamic num_classes; num_batches not "
"fixed or mismatched");
}
if(not scores_dims.at(2).is_fixed() or scores_dims.at(2).max != boxes_lens.at(1))
{
MIGRAPHX_THROW("NonMaxSuppression: scores dynamic num_classes; "
"spatial_dimension not fixed or mismatches");
}
}
else
{
fixed_shape_error_check();
}
std::vector<shape::dynamic_dimension> out_lens = {};
out_lens.push_back({0, max_num_boxes, 0});
out_lens.push_back({3, 3, 0});
return {shape::int64_type, out_lens};
}
// check batch sizes
if(lens[0] != inputs.at(1).lens()[0])
else
{
MIGRAPHX_THROW(
"NonMaxSuppression: number of batches mismatch between boxes and scores input");
if(inputs.at(0).dynamic() or inputs.at(1).dynamic())
{
MIGRAPHX_THROW(
"NonMaxSuppression: dynamic input shape with use_dyn_output set to false");
}
fixed_shape_error_check();
std::vector<std::size_t> out_lens = {max_num_boxes, 3};
return {shape::int64_type, out_lens};
}
std::vector<int64_t> out_lens(2);
out_lens.at(0) = lens.at(1);
out_lens.at(1) = 3;
return {shape::int64_type, out_lens};
}
struct box
......@@ -181,13 +237,13 @@ struct nonmaxsuppression
}
template <class Output, class Boxes, class Scores>
void compute_nms(Output output,
Boxes boxes,
Scores scores,
const shape& output_shape,
std::size_t max_output_boxes_per_class,
double iou_threshold,
double score_threshold) const
std::size_t compute_nms(Output output,
Boxes boxes,
Scores scores,
const shape& max_output_shape,
std::size_t max_output_boxes_per_class,
double iou_threshold,
double score_threshold) const
{
std::fill(output.begin(), output.end(), 0);
const auto& lens = scores.get_shape().lens();
......@@ -197,7 +253,7 @@ struct nonmaxsuppression
// boxes of a class with NMS applied [score, index]
std::vector<std::pair<double, int64_t>> selected_boxes_inside_class;
std::vector<int64_t> selected_indices;
selected_boxes_inside_class.reserve(output_shape.elements());
selected_boxes_inside_class.reserve(max_output_shape.elements());
// iterate over batches and classes
shape comp_s{shape::double_type, {num_batches, num_classes}};
shape_for_each(comp_s, [&](auto idx) {
......@@ -237,11 +293,14 @@ struct nonmaxsuppression
}
});
std::copy(selected_indices.begin(), selected_indices.end(), output.begin());
return selected_indices.size() / 3;
}
argument compute(const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
// make buffer of maximum size
shape max_output_shape = {output_shape.type(), output_shape.max_lens()};
argument result{max_output_shape};
std::size_t max_output_boxes_per_class =
(args.size() > 2) ? (args.at(2).at<std::size_t>()) : 0;
......@@ -249,22 +308,29 @@ struct nonmaxsuppression
{
return result;
}
double iou_threshold = (args.size() > 3) ? (args.at(3).at<double>()) : 0.0f;
double score_threshold = (args.size() > 4) ? (args.at(4).at<double>()) : 0.0f;
double iou_threshold = (args.size() > 3) ? (args.at(3).at<double>()) : 0.0f;
double score_threshold = (args.size() > 4) ? (args.at(4).at<double>()) : 0.0f;
std::size_t num_selected = 0;
result.visit([&](auto output) {
visit_all(args[0], args[1])([&](auto boxes, auto scores) {
compute_nms(output,
boxes,
scores,
output_shape,
max_output_boxes_per_class,
iou_threshold,
score_threshold);
num_selected = compute_nms(output,
boxes,
scores,
max_output_shape,
max_output_boxes_per_class,
iou_threshold,
score_threshold);
});
});
return result;
if(use_dyn_output)
{
return result.reshape({output_shape.type(), {num_selected, 3}});
}
else
{
return result;
}
}
};
......
......@@ -21,16 +21,24 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_MIGRAPHX_SUPPORTED_SEGMENTS_HPP
#define MIGRAPHX_GUARD_MIGRAPHX_SUPPORTED_SEGMENTS_HPP
#include <migraphx/target_assignments.hpp>
#include <unordered_set>
#include <migraphx/instruction_ref.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
void target_assignments::add_assignment(instruction_ref ins, const std::string& target)
struct supported_segment
{
assignments.emplace(ins, target);
}
std::unordered_set<instruction_ref> instructions;
float metric;
};
using supported_segments = std::vector<supported_segment>;
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif // MIGRAPHX_GUARD_MIGRAPHX_SUPPORTED_SEGMENTS_HPP
......@@ -37,8 +37,10 @@
#include <migraphx/compile_options.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/rank.hpp>
#include <migraphx/module_ref.hpp>
#include <migraphx/support_metric.hpp>
#include <migraphx/instruction_ref.hpp>
#include <migraphx/supported_segments.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......@@ -64,12 +66,12 @@ struct target
*/
context get_context() const;
/**
* @brief Check how well an instruction is supported on a target with the given metric
* @param ins Instruction to check if it's supported
* @param metric Used to define how the return value should be interpreted
* @return The value based on the chosen metric. Negative numbers mean unsupported
* @brief Get the ranges of instructions that are supported on a target
* @param module Module to check for supported instructions
* @param metric Used to define how the quality of the support should be measured
* @return the supported segments of the graph
*/
float is_supported(T&, instruction_ref ins, support_metric m) const;
supported_segments target_is_supported(T&, const_module_ref mod, support_metric metric) const;
/**
* @brief copy an argument to the current target.
*
......@@ -115,9 +117,9 @@ argument copy_from_target(T&, const argument& arg)
}
template <class T>
float target_is_supported(T&, instruction_ref, support_metric)
supported_segments target_find_supported(T&, const_module_ref, support_metric)
{
return 0;
return {};
}
#ifdef TYPE_ERASED_DECLARATION
......@@ -132,7 +134,7 @@ struct target
//
context get_context() const;
// (optional)
float is_supported(instruction_ref ins, support_metric m) const;
supported_segments find_supported(const_module_ref mod, support_metric m) const;
// (optional)
argument copy_to(const argument& input) const;
// (optional)
......@@ -224,10 +226,10 @@ struct target
return (*this).private_detail_te_get_handle().get_context();
}
float is_supported(instruction_ref ins, support_metric m) const
supported_segments find_supported(const_module_ref mod, support_metric m) const
{
assert((*this).private_detail_te_handle_mem_var);
return (*this).private_detail_te_get_handle().is_supported(ins, m);
return (*this).private_detail_te_get_handle().find_supported(mod, m);
}
argument copy_to(const argument& input) const
......@@ -261,33 +263,33 @@ struct target
virtual std::shared_ptr<private_detail_te_handle_base_type> clone() const = 0;
virtual const std::type_info& type() const = 0;
virtual std::string name() const = 0;
virtual std::string name() const = 0;
virtual std::vector<pass> get_passes(context& ctx,
const compile_options& options) const = 0;
virtual context get_context() const = 0;
virtual float is_supported(instruction_ref ins, support_metric m) const = 0;
virtual argument copy_to(const argument& input) const = 0;
virtual argument copy_from(const argument& input) const = 0;
virtual argument allocate(const shape& s) const = 0;
const compile_options& options) const = 0;
virtual context get_context() const = 0;
virtual supported_segments find_supported(const_module_ref mod, support_metric m) const = 0;
virtual argument copy_to(const argument& input) const = 0;
virtual argument copy_from(const argument& input) const = 0;
virtual argument allocate(const shape& s) const = 0;
};
template <class T>
static auto private_detail_te_default_is_supported(char,
T&& private_detail_te_self,
instruction_ref ins,
support_metric m)
-> decltype(private_detail_te_self.is_supported(ins, m))
static auto private_detail_te_default_find_supported(char,
T&& private_detail_te_self,
const_module_ref mod,
support_metric m)
-> decltype(private_detail_te_self.find_supported(mod, m))
{
return private_detail_te_self.is_supported(ins, m);
return private_detail_te_self.find_supported(mod, m);
}
template <class T>
static float private_detail_te_default_is_supported(float,
T&& private_detail_te_self,
instruction_ref ins,
support_metric m)
static supported_segments private_detail_te_default_find_supported(float,
T&& private_detail_te_self,
const_module_ref mod,
support_metric m)
{
return target_is_supported(private_detail_te_self, ins, m);
return target_find_supported(private_detail_te_self, mod, m);
}
template <class T>
......@@ -372,10 +374,11 @@ struct target
context get_context() const override { return private_detail_te_value.get_context(); }
float is_supported(instruction_ref ins, support_metric m) const override
supported_segments find_supported(const_module_ref mod, support_metric m) const override
{
return private_detail_te_default_is_supported(char(0), private_detail_te_value, ins, m);
return private_detail_te_default_find_supported(
char(0), private_detail_te_value, mod, m);
}
argument copy_to(const argument& input) const override
......
......@@ -33,10 +33,20 @@ inline namespace MIGRAPHX_INLINE_NS {
struct target_assignments
{
void add_assignment(instruction_ref ins, const std::string& target);
using iterator = std::unordered_map<instruction_ref, std::string>::const_iterator;
using value_type = std::pair<instruction_ref, std::string>;
auto begin() const { return assignments.cbegin(); }
auto end() const { return assignments.cend(); }
auto size() const { return assignments.size(); }
auto& at(instruction_ref ins) const { return assignments.at(ins); }
auto insert(iterator it, const std::pair<instruction_ref, std::string>& assignment)
{
return assignments.insert(it, assignment);
}
auto find(instruction_ref ins) const { return assignments.find(ins); }
auto begin() const { return assignments.begin(); }
auto end() const { return assignments.end(); }
private:
std::unordered_map<instruction_ref, std::string> assignments;
......
......@@ -97,6 +97,7 @@ struct onnx_parser
shape::dynamic_dimension default_dyn_dim_value = {1, 1, 0};
std::unordered_map<std::string, std::vector<std::size_t>> map_input_dims;
std::unordered_map<std::string, std::vector<shape::dynamic_dimension>> map_dyn_input_dims;
bool use_dyn_output = false;
bool skip_unknown_operators = false;
int64_t max_loop_iterations = 10;
int64_t opset_version = 13;
......
......@@ -60,8 +60,14 @@ program parse_onnx_from(const onnx_options& options, Ts&&... xs)
{
parser.default_dyn_dim_value = options.default_dyn_dim_value;
}
if(not options.map_input_dims.empty() and not options.map_dyn_input_dims.empty())
{
MIGRAPHX_THROW("PARSE_ONNX_FROM: both map_input_dims and map_dyn_input_dims non-empty, only"
"one should be used");
}
parser.skip_unknown_operators = options.skip_unknown_operators;
parser.max_loop_iterations = options.max_loop_iterations;
parser.use_dyn_output = options.use_dyn_output;
if(options.print_program_on_error)
{
......@@ -80,6 +86,7 @@ program parse_onnx_from(const onnx_options& options, Ts&&... xs)
{
parser.parse_from(std::forward<Ts>(xs)...);
}
return std::move(parser.prog);
}
......
......@@ -256,11 +256,6 @@ int64_t onnx_parser::get_opset_version(const onnx::ModelProto& model)
void onnx_parser::parse_graph(module* mod, const onnx::GraphProto& graph)
{
if(not map_input_dims.empty() and not map_dyn_input_dims.empty())
{
MIGRAPHX_THROW("PARSE_GRAPH: both map_input_dims and map_dyn_input_dims non-empty, only"
"one should be used");
}
std::unordered_map<std::string, instruction_ref> mod_insts;
for(auto&& f : graph.initializer())
{
......
......@@ -58,7 +58,6 @@ struct parse_generic_op : op_parser<parse_generic_op>
{"Log", "log"},
{"LRN", "lrn"},
{"Neg", "neg"},
{"NonMaxSuppression", "nonmaxsuppression"},
{"Reciprocal", "recip"},
{"Relu", "relu"},
{"Round", "round"},
......@@ -75,7 +74,7 @@ struct parse_generic_op : op_parser<parse_generic_op>
bool needs_contiguous(const std::string& op_name) const
{
return contains({"flatten", "gather", "nonmaxsuppression", "scatter"}, op_name);
return contains({"flatten", "gather", "scatter"}, op_name);
}
instruction_ref parse(const op_desc& opd,
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/make_op.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {
struct parse_nonmaxsuppression : op_parser<parse_nonmaxsuppression>
{
std::vector<op_desc> operators() const { return {{"NonMaxSuppression", "nonmaxsuppression"}}; }
instruction_ref parse(const op_desc& opd,
const onnx_parser& parser,
const onnx_parser::node_info& info,
const std::vector<instruction_ref>& args) const
{
auto op = parser.load(opd.op_name, info);
op.from_value({{"use_dyn_output", parser.use_dyn_output}});
return info.add_instruction(op, args);
}
};
} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -37,6 +37,7 @@
#include <migraphx/output_iterator.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/marker.hpp>
#include <migraphx/supported_segments.hpp>
#include <iostream>
#include <sstream>
#include <algorithm>
......@@ -167,13 +168,37 @@ target_assignments program::get_target_assignments(const std::vector<target>& ta
target_assignments p;
const auto* mod = get_main_module();
for(auto it : iterator_for(*mod))
std::vector<std::pair<target, supported_segments>> target_subgraphs;
target_subgraphs.reserve(targets.size());
std::transform(targets.begin(),
targets.end(),
std::back_inserter(target_subgraphs),
[&](const auto& t) { return std::make_pair(t, t.find_supported(mod, m)); });
for(const auto ins : iterator_for(*mod))
{
auto t = std::max_element(
targets.begin(), targets.end(), [it, m](const target& lhs, const target& rhs) {
return lhs.is_supported(it, m) < rhs.is_supported(it, m);
});
p.add_assignment(it, t->name());
if(contains(p, ins))
{
continue;
}
for(const auto& [target, subgraph] : target_subgraphs)
{
// can't pass a structured binding into lambda in C++17 so create a variable for it
const auto& t = target;
for(const auto& segment : subgraph)
{
const auto& instructions = segment.instructions;
if(not contains(instructions, ins))
{
continue;
}
std::transform(instructions.begin(),
instructions.end(),
std::inserter(p, p.end()),
[&](auto instr) { return std::make_pair(instr, t.name()); });
}
}
}
return p;
}
......
......@@ -30,6 +30,7 @@
#include <migraphx/compile_options.hpp>
#include <migraphx/fpga/context.hpp>
#include <migraphx/config.hpp>
#include <migraphx/supported_segments.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......@@ -41,7 +42,7 @@ struct target
std::string name() const;
std::vector<pass> get_passes(migraphx::context& ctx, const compile_options&) const;
migraphx::context get_context() const { return context{}; }
float is_supported(instruction_ref ins, support_metric m);
supported_segments find_supported(const_module_ref mod, support_metric m) const;
argument copy_to(const argument& arg) const { return arg; }
argument copy_from(const argument& arg) const { return arg; }
......
......@@ -34,6 +34,7 @@
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/normalize_ops.hpp>
#include <migraphx/iterator_for.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......@@ -62,12 +63,17 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
argument target::allocate(const shape& s) const { return fill_argument(s, 0); }
float is_supported(instruction_ref ins, support_metric m)
supported_segments target::find_supported(const_module_ref mod, support_metric m) const
{
// for now, not using the ins and metric to return a value
(void)ins;
(void)m;
return 1.0;
supported_segment instrs;
for(const auto ins : iterator_for(*mod))
{
instrs.instructions.insert(ins);
}
instrs.metric = 1; // arbitrary value
return {instrs};
}
MIGRAPHX_REGISTER_TARGET(target);
......
......@@ -33,41 +33,43 @@
namespace migraphx {
// NOLINTNEXTLINE
#define MIGRAPHX_DEVICE_ARRAY_OP(op, binary_op) \
template <class U> \
constexpr array& operator op(const array<U, N>& x) \
{ \
array_for_each(*this, x)([](auto& sy, auto sx) { sy op sx; }); \
return *this; \
} \
template <class U, MIGRAPHX_REQUIRES(is_convertible<U, T>{})> \
constexpr array& operator op(const U& x) \
{ \
array_for_each (*this)([&](auto& sy) { sy op x; }); \
return *this; \
} \
template <class U> \
friend constexpr auto operator binary_op(const array& x, const array<U, N>& y) \
{ \
array<decltype(T {} binary_op U{}), N> z{}; \
array_for_each(z, x, y)([&](auto& sz, auto sx, auto sy) { sz = sx binary_op sy; }); \
return z; \
} \
template <class U, MIGRAPHX_REQUIRES(is_convertible<U, T>{})> \
friend constexpr auto operator binary_op(const array& x, const U& y) \
{ \
array<decltype(T {} binary_op U{}), N> z{}; \
array_for_each(z, x)([&](auto& sz, auto sx) { sz = sx binary_op y; }); \
return z; \
} \
template <class U, MIGRAPHX_REQUIRES(is_convertible<U, T>{})> \
friend constexpr auto operator binary_op(const U& x, const array& y) \
{ \
array<decltype(T {} binary_op U{}), N> z{}; \
array_for_each(z, y)([&](auto& sz, auto sy) { sz = x binary_op sy; }); \
return z; \
#define MIGRAPHX_DEVICE_ARRAY_OP(op, binary_op) \
template <class U> \
constexpr array& operator op(const array<U, N>& x) \
{ \
array_detail::array_for_each(*this, x)([](auto& sy, auto sx) { sy op sx; }); \
return *this; \
} \
template <class U, MIGRAPHX_REQUIRES(is_convertible<U, T>{})> \
constexpr array& operator op(const U& x) \
{ \
array_detail::array_for_each (*this)([&](auto& sy) { sy op x; }); \
return *this; \
} \
template <class U> \
friend constexpr auto operator binary_op(const array& x, const array<U, N>& y) \
{ \
array<decltype(T {} binary_op U{}), N> z{}; \
array_detail::array_for_each(z, x, y)( \
[&](auto& sz, auto sx, auto sy) { sz = sx binary_op sy; }); \
return z; \
} \
template <class U, MIGRAPHX_REQUIRES(is_convertible<U, T>{})> \
friend constexpr auto operator binary_op(const array& x, const U& y) \
{ \
array<decltype(T {} binary_op U{}), N> z{}; \
array_detail::array_for_each(z, x)([&](auto& sz, auto sx) { sz = sx binary_op y; }); \
return z; \
} \
template <class U, MIGRAPHX_REQUIRES(is_convertible<U, T>{})> \
friend constexpr auto operator binary_op(const U& x, const array& y) \
{ \
array<decltype(T {} binary_op U{}), N> z{}; \
array_detail::array_for_each(z, y)([&](auto& sz, auto sy) { sz = x binary_op sy; }); \
return z; \
}
namespace array_detail {
template <class T>
constexpr auto is_vectorizable()
{
......@@ -75,20 +77,15 @@ constexpr auto is_vectorizable()
}
template <class T>
constexpr auto array2vec(T x)
__device__ auto& array2vec(T& x)
{
using value_type = typename T::value_type;
constexpr auto size = decltype(x.size()){};
using type = vec<value_type, size>;
static_assert(size != 3, "Wrong size");
return __builtin_bit_cast(type, x);
}
template <class T, class U, index_int N>
constexpr void vec2array(T& x, vec<U, N> v)
{
if constexpr(not is_const<T>{})
x = __builtin_bit_cast(T, v);
if constexpr(is_const<T>{})
return reinterpret_cast<const type&>(x);
else
return reinterpret_cast<type&>(x);
}
template <class T, class... Ts>
......@@ -101,11 +98,16 @@ constexpr auto array_for_each(T& x, Ts&... xs)
(is_vectorizable<typename Ts::value_type>() or ...)) and
size <= 8 and size > 1 and (size % 2 == 0))
{
[&](auto v, auto... vs) {
f(v, vs...);
vec2array(x, v);
swallow{(vec2array(xs, vs), 0)...};
}(array2vec(x), array2vec(xs)...);
if(__builtin_is_constant_evaluated())
{
for(index_int i = 0; i < size; i++)
f(x[i], xs[i]...);
}
else
{
using vec_type = std::remove_reference_t<decltype(array2vec(x))>;
f(array2vec(x), __builtin_convertvector(array2vec(xs), vec_type)...);
}
}
else
{
......@@ -114,6 +116,7 @@ constexpr auto array_for_each(T& x, Ts&... xs)
}
};
}
} // namespace array_detail
template <class T, index_int N>
struct array
......@@ -151,18 +154,13 @@ struct array
constexpr T dot(const array& x) const
{
T result = 0;
for(index_int i = 0; i < N; i++)
result += x[i] * d[i];
return result;
auto r = x * (*this);
return r.reduce([](auto a, auto b) { return a + b; }, 0);
}
constexpr T product() const
{
T result = 1;
for(index_int i = 0; i < N; i++)
result *= d[i];
return result;
return reduce([](auto x, auto y) { return x * y; }, 1);
}
constexpr T single(index_int width = 100) const
......@@ -186,6 +184,15 @@ struct array
return result;
}
template <class F>
constexpr auto reduce(F f, T init) const
{
T result = init;
for(index_int i = 0; i < N; i++)
result = f(result, d[i]);
return result;
}
MIGRAPHX_DEVICE_ARRAY_OP(+=, +)
MIGRAPHX_DEVICE_ARRAY_OP(-=, -)
MIGRAPHX_DEVICE_ARRAY_OP(*=, *)
......
......@@ -55,14 +55,15 @@ __device__ void generic_binary_layernorm(
return make_array(x, x * x) * vec_type<value_type>{1.0 / relements};
})(input1, input2);
auto mean_x = means[0];
auto mean_x2 = means[1];
auto mean_x = means[0];
auto mean_x2 = means[1];
auto variance = mean_x2 - (mean_x * mean_x);
r.inner([&](auto& y, auto x1, auto x2, auto... xs) {
auto x = op(x1, x2);
auto m = x - mean_x;
// m * rsqrt(mean(m ^ 2) + 1e-12)
y = compute(m * rsqrt(mean_x2 - mean_x + value_type{1e-12}), xs...);
y = compute(m * rsqrt(variance + value_type{1e-12}), xs...);
})(output, input1, input2, inputs...);
});
}
......
......@@ -94,8 +94,8 @@ MIGRAPHX_DPP_REDUCE(op::max, v_max)
MIGRAPHX_DPP_REDUCE(op::min, v_min)
MIGRAPHX_DPP_REDUCE(op::product, v_mul)
template <class Op, class T, class F>
__device__ auto block_reduce(index idx, Op op, T init, index_int n, F f)
template <class Op, class T, class Index, class F>
__device__ auto block_reduce(index idx, Op op, T init, Index n, F f)
{
#if __AMDGCN_WAVEFRONT_SIZE == 32
constexpr index_int lanes_per_thread = 16;
......@@ -123,8 +123,8 @@ __device__ auto block_reduce(index idx, Op op, T init, index_int n, F f)
return y;
}
#else
template <class Op, class T, class F>
__device__ auto block_reduce(index idx, Op op, T init, index_int n, F f)
template <class Op, class T, class Index, class F>
__device__ auto block_reduce(index idx, Op op, T init, Index n, F f)
{
using type = decltype(f(0));
......
......@@ -26,8 +26,9 @@
#include <migraphx/make_op.hpp>
#include <migraphx/program.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/ref/target.hpp>
#include <migraphx/fpga/target.hpp>
#include <migraphx/target_assignments.hpp>
#include <migraphx/iterator_for.hpp>
migraphx::program create_program()
{
......@@ -37,8 +38,8 @@ migraphx::program create_program()
auto x = mm->add_parameter("x", s);
auto y = mm->add_parameter("y", s);
auto z = mm->add_parameter("z", s);
auto diff = mm->add_instruction(migraphx::make_op("div"), x, y);
mm->add_instruction(migraphx::make_op("div"), diff, z);
auto diff = mm->add_instruction(migraphx::make_op("add"), x, y);
mm->add_instruction(migraphx::make_op("add"), diff, z);
return p;
}
......@@ -47,14 +48,16 @@ TEST_CASE(is_supported)
auto p = create_program();
auto targets = migraphx::get_targets();
EXPECT(!targets.empty());
auto first_target = targets[0];
auto t = migraphx::make_target(first_target);
auto t = migraphx::make_target("fpga");
const auto assignments = p.get_target_assignments({t});
for(const auto& [ins, target] : assignments)
const auto* mod = p.get_main_module();
EXPECT(mod->size() == assignments.size());
for(const auto ins : iterator_for(*mod))
{
(void)ins;
EXPECT(target == first_target);
const auto& target = assignments.at(ins);
EXPECT(target == "fpga");
}
}
......
......@@ -3589,7 +3589,7 @@ def nms_test():
st = helper.make_tensor_value_info('score_threshold', TensorProto.FLOAT,
[1])
out = helper.make_tensor_value_info('selected_indices', TensorProto.INT64,
[6, 3])
[None, 3])
node = onnx.helper.make_node('NonMaxSuppression',
inputs=[
......@@ -3603,6 +3603,108 @@ def nms_test():
return ([node], [b, s, mo, iou, st], [out])
@onnx_test
def nms_use_dyn_output_false_test():
b = helper.make_tensor_value_info('boxes', TensorProto.FLOAT, [1, 6, 4])
s = helper.make_tensor_value_info('scores', TensorProto.FLOAT, [1, 1, 6])
mo = helper.make_tensor_value_info('max_output_boxes_per_class',
TensorProto.INT64, [1])
iou = helper.make_tensor_value_info('iou_threshold', TensorProto.FLOAT,
[1])
st = helper.make_tensor_value_info('score_threshold', TensorProto.FLOAT,
[1])
out = helper.make_tensor_value_info('selected_indices', TensorProto.INT64,
[None, 3])
node = onnx.helper.make_node('NonMaxSuppression',
inputs=[
'boxes', 'scores',
'max_output_boxes_per_class',
'iou_threshold', 'score_threshold'
],
outputs=['selected_indices'],
use_dyn_output=0)
return ([node], [b, s, mo, iou, st], [out])
@onnx_test
def nms_dynamic_batch_test():
b = helper.make_tensor_value_info('boxes', TensorProto.FLOAT, [None, 6, 4])
s = helper.make_tensor_value_info('scores', TensorProto.FLOAT,
[None, 1, 6])
mo = helper.make_tensor_value_info('max_output_boxes_per_class',
TensorProto.INT64, [1])
iou = helper.make_tensor_value_info('iou_threshold', TensorProto.FLOAT,
[1])
st = helper.make_tensor_value_info('score_threshold', TensorProto.FLOAT,
[1])
out = helper.make_tensor_value_info('selected_indices', TensorProto.INT64,
[None, 3])
node = onnx.helper.make_node('NonMaxSuppression',
inputs=[
'boxes', 'scores',
'max_output_boxes_per_class',
'iou_threshold', 'score_threshold'
],
outputs=['selected_indices'],
center_point_box=1,
use_dyn_output=1)
return ([node], [b, s, mo, iou, st], [out])
@onnx_test
def nms_dynamic_boxes_test():
b = helper.make_tensor_value_info('boxes', TensorProto.FLOAT, [1, None, 4])
s = helper.make_tensor_value_info('scores', TensorProto.FLOAT,
[1, 1, None])
mo = helper.make_tensor_value_info('max_output_boxes_per_class',
TensorProto.INT64, [1])
iou = helper.make_tensor_value_info('iou_threshold', TensorProto.FLOAT,
[1])
st = helper.make_tensor_value_info('score_threshold', TensorProto.FLOAT,
[1])
out = helper.make_tensor_value_info('selected_indices', TensorProto.INT64,
[None, 3])
node = onnx.helper.make_node('NonMaxSuppression',
inputs=[
'boxes', 'scores',
'max_output_boxes_per_class',
'iou_threshold', 'score_threshold'
],
outputs=['selected_indices'])
return ([node], [b, s, mo, iou, st], [out])
@onnx_test
def nms_dynamic_classes_test():
b = helper.make_tensor_value_info('boxes', TensorProto.FLOAT, [1, 6, 4])
s = helper.make_tensor_value_info('scores', TensorProto.FLOAT,
[1, None, 6])
mo = helper.make_tensor_value_info('max_output_boxes_per_class',
TensorProto.INT64, [1])
iou = helper.make_tensor_value_info('iou_threshold', TensorProto.FLOAT,
[1])
st = helper.make_tensor_value_info('score_threshold', TensorProto.FLOAT,
[1])
out = helper.make_tensor_value_info('selected_indices', TensorProto.INT64,
[None, 3])
node = onnx.helper.make_node('NonMaxSuppression',
inputs=[
'boxes', 'scores',
'max_output_boxes_per_class',
'iou_threshold', 'score_threshold'
],
outputs=['selected_indices'])
return ([node], [b, s, mo, iou, st], [out])
@onnx_test
def not_test():
x = helper.make_tensor_value_info('0', TensorProto.INT32, [4])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment