Commit df032e06 authored by Paul's avatar Paul
Browse files

Merge branch 'develop' into mlir-c

parents cf4642cd 19f65e7e
......@@ -6,7 +6,7 @@ ARG PREFIX=/usr/local
RUN dpkg --add-architecture i386
# Add rocm repository
RUN sh -c 'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/4.2/ xenial main > /etc/apt/sources.list.d/rocm.list'
RUN sh -c 'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/4.5/ ubuntu main > /etc/apt/sources.list.d/rocm.list'
# Install dependencies
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
......@@ -32,6 +32,8 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-
software-properties-common \
wget \
rocm-device-libs \
hip-base \
libnuma-dev \
miopen-hip \
rocblas \
zlib1g-dev && \
......
......@@ -20,7 +20,7 @@ def rocmtestnode(Map conf) {
rm -rf build
mkdir build
cd build
CXX=${compiler} CXXFLAGS='-Werror -Wno-fallback' cmake -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache ${flags} ..
CXX=${compiler} CXXFLAGS='-Werror' cmake -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache ${flags} ..
make -j\$(nproc) generate all doc package check VERBOSE=1
"""
echo cmd
......@@ -75,6 +75,8 @@ def rocmnodename(name) {
node_name = "${rocmtest_name} && fiji";
} else if(name == "vega") {
node_name = "${rocmtest_name} && vega";
} else if(name == "navi21") {
node_name = "${rocmtest_name} && navi21";
} else if(name == "nogpu") {
return rocmtest_name;
}
......@@ -110,6 +112,10 @@ rocmtest clang_debug: rocmnode('vega') { cmake_build ->
def debug_flags = "-g -O2 -fno-omit-frame-pointer -fsanitize=${sanitizers} -fno-sanitize-recover=${sanitizers}"
cmake_build("/opt/rocm/llvm/bin/clang++", "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DMIGRAPHX_ENABLE_GPU=Off -DMIGRAPHX_ENABLE_CPU=On -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}'")
}
}, clang_release_navi: rocmnode('navi21') { cmake_build ->
stage('HIP Clang Release Navi') {
cmake_build("/opt/rocm/llvm/bin/clang++", "-DCMAKE_BUILD_TYPE=release")
}
}
def onnxnode(name, body) {
......
pfultz2/rocm-recipes
facebook/zstd@v1.4.5 -X subdir -DCMAKE_DIR=build/cmake
ccache@v4.1
pcre,pfultz2/pcre@8.45 -H sha256:d6f7182602a775a7d500a0cedca6449af0400c6493951513046d17615ed0bf11
danmar/cppcheck@2.6 -DHAVE_RULES=1
RadeonOpenCompute/rocm-cmake@ececd2eccae4d01e7ec154efe90ac43ebf4df317 --build
-f requirements.txt
sphinx==2.2.2
breathe==4.13.1
docutils==0.17.1
sphinx==4.2.0
breathe==4.31.0
sphinx_rtd_theme==1.0.0
# git+https://github.com/arximboldi/breathe@fix-node-parent
......@@ -18,6 +18,8 @@
#
# import os
# import sys
from datetime import date
import re
# sys.path.insert(0, os.path.abspath('.'))
# -- General configuration ------------------------------------------------
......@@ -29,7 +31,9 @@
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = ['breathe', 'sphinx.ext.mathjax', 'sphinx.ext.viewcode']
extensions = [
'breathe', 'sphinx.ext.mathjax', 'sphinx.ext.viewcode', 'sphinx_rtd_theme'
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
......@@ -45,7 +49,7 @@ master_doc = 'index'
# General information about the project.
project = u'MIGraphX'
copyright = u'2018, AMD'
copyright = u'2018-{}, AMD'.format(date.today().year)
author = u'AMD'
# The version info for the project you're documenting, acts as replacement for
......@@ -53,9 +57,12 @@ author = u'AMD'
# built documents.
#
# The short X.Y version.
version = u'0.1'
with open('../../CMakeLists.txt') as file:
version = next((re.findall('[0-9.]+', line)[0]
for line in file.readlines()
if 'rocm_setup_version' in line))
# The full version, including alpha/beta/rc tags.
release = u'0.1'
release = version
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
......@@ -82,7 +89,7 @@ todo_include_todos = False
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'alabaster'
html_theme = 'sphinx_rtd_theme'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
......
......@@ -6,7 +6,7 @@ ARG PREFIX=/usr/local
RUN dpkg --add-architecture i386
# Add rocm repository
RUN sh -c 'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/4.2/ xenial main > /etc/apt/sources.list.d/rocm.list'
RUN sh -c 'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/4.5/ ubuntu main > /etc/apt/sources.list.d/rocm.list'
# Install dependencies
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
......@@ -29,6 +29,8 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-
software-properties-common \
wget \
rocm-device-libs \
hip-base \
libnuma-dev \
miopen-hip \
rocblas \
zlib1g-dev && \
......
......@@ -131,6 +131,7 @@ register_migraphx_ops(
multibroadcast
multinomial
neg
nonmaxsuppression
nonzero
outline
pad
......@@ -155,6 +156,7 @@ register_migraphx_ops(
rnn_last_cell_output
rnn_last_hs_output
rnn_var_sl_last_output
roialign
round
rsqrt
scalar
......
......@@ -252,7 +252,7 @@ struct shape : MIGRAPHX_CONST_HANDLE_BASE(shape)
const size_t* pout;
size_t pout_size;
call(&migraphx_shape_lengths, &pout, &pout_size, this->get_handle_ptr());
return std::vector<size_t>(pout, pout + pout_size);
return {pout, pout + pout_size};
}
std::vector<size_t> strides() const
......@@ -260,7 +260,7 @@ struct shape : MIGRAPHX_CONST_HANDLE_BASE(shape)
const size_t* pout;
size_t pout_size;
call(&migraphx_shape_strides, &pout, &pout_size, this->get_handle_ptr());
return std::vector<size_t>(pout, pout + pout_size);
return {pout, pout + pout_size};
}
migraphx_shape_datatype_t type() const
......@@ -312,7 +312,7 @@ struct argument : MIGRAPHX_CONST_HANDLE_BASE(argument)
{
const_migraphx_shape_t pout;
call(&migraphx_argument_shape, &pout, this->get_handle_ptr());
return shape(pout);
return {pout};
}
char* data() const
......@@ -325,9 +325,8 @@ struct argument : MIGRAPHX_CONST_HANDLE_BASE(argument)
/// Generate an argument using random data
static argument generate(shape ps, size_t pseed = 0)
{
return argument(
make<migraphx_argument>(&migraphx_argument_generate, ps.get_handle_ptr(), pseed),
own{});
return {make<migraphx_argument>(&migraphx_argument_generate, ps.get_handle_ptr(), pseed),
own{}};
}
friend bool operator==(const argument& px, const argument& py)
......@@ -378,7 +377,7 @@ struct program_parameter_shapes : MIGRAPHX_HANDLE_BASE(program_parameter_shapes)
{
const_migraphx_shape_t pout;
call(&migraphx_program_parameter_shapes_get, &pout, this->get_handle_ptr(), pname);
return shape(pout);
return {pout};
}
std::vector<const char*> names() const
......@@ -438,7 +437,7 @@ struct arguments : MIGRAPHX_HANDLE_BASE(arguments), array_base<arguments>
{
const_migraphx_argument_t pout;
call(&migraphx_arguments_get, &pout, this->get_handle_ptr(), pidx);
return argument(pout);
return {pout};
}
struct iterator_read
......@@ -449,7 +448,7 @@ struct arguments : MIGRAPHX_HANDLE_BASE(arguments), array_base<arguments>
const_migraphx_argument_t pout;
call(&migraphx_arguments_get, &pout, self, pidx);
return argument(pout);
return {pout};
}
};
};
......@@ -471,7 +470,7 @@ struct shapes : MIGRAPHX_HANDLE_BASE(shapes), array_base<shapes>
{
const_migraphx_shape_t pout;
call(&migraphx_shapes_get, &pout, this->get_handle_ptr(), pidx);
return shape(pout);
return {pout};
}
struct iterator_read
......@@ -481,7 +480,7 @@ struct shapes : MIGRAPHX_HANDLE_BASE(shapes), array_base<shapes>
{
const_migraphx_shape_t pout;
call(&migraphx_shapes_get, &pout, self, pidx);
return shape(pout);
return {pout};
}
};
};
......@@ -609,7 +608,7 @@ struct operation : MIGRAPHX_HANDLE_BASE(operation)
{
std::array<char, 1024> out_name;
call(&migraphx_operation_name, out_name.data(), 1024, this->get_handle_ptr());
return std::string(out_name.data());
return {out_name.data()};
}
};
......
......@@ -26,16 +26,18 @@ cpp_generator::function::set_body(const module& m, const cpp_generator::generate
{
names[ins] =
migraphx::any_cast<migraphx::builtin::param>(ins->get_operator()).parameter;
continue;
}
if(ins->name() == "@return")
else if(ins->name() == "@return")
{
assert(ins->inputs().size() == 1);
return_ins = ins->inputs().front();
}
std::string n = "z" + std::to_string(names.size());
names[ins] = n;
ss << "auto " << n << " = " << g(ins, names) << ";\n";
else
{
std::string n = "z" + std::to_string(names.size());
names[ins] = n;
ss << "auto " << n << " = " << g(ins, names) << ";\n";
}
}
ss << "return " << names.at(return_ins) << ";\n";
body = ss.str();
......@@ -84,8 +86,11 @@ void cpp_generator::fmap(const std::function<std::string(std::string)>& f) { imp
std::string cpp_generator::generate_point_op(const operation& op,
const std::vector<std::string>& args)
{
auto v = op.to_value();
return interpolate_string(op.attributes()["point_op"].to<std::string>(),
auto v = op.to_value();
auto attributes = op.attributes();
if(not attributes.contains("point_op"))
MIGRAPHX_THROW("op is missing point_op attribute: " + op.name());
return interpolate_string(attributes["point_op"].to<std::string>(),
[&](auto start, auto last) -> std::string {
auto key = trim({start, last});
if(key.empty())
......@@ -120,7 +125,12 @@ std::string cpp_generator::str() const { return impl->fs.str(); }
cpp_generator::function cpp_generator::generate_module(const module& m)
{
function f;
f.set_name(m.name()).set_types(m).set_body(
auto name = transform_string(m.name(), [](char c) {
if(with_char(::isalnum)(c) or c == '_')
return c;
return '_';
});
f.set_name(name).set_types(m).set_body(
m, [&](instruction_ref ins, const auto& names) -> std::string {
if(ins->name() == "@literal")
return shape::cpp_type(ins->get_shape().type()) + "(" +
......@@ -130,7 +140,6 @@ cpp_generator::function cpp_generator::generate_module(const module& m)
ins->inputs().end(),
std::back_inserter(args),
[&](auto i) { return names.at(i); });
auto s = this->generate_point_op(ins->get_operator(), args);
return this->generate_point_op(ins->get_operator(), args);
});
return f;
......
......@@ -480,7 +480,7 @@ struct perf : command<perf>
std::cout << "Allocating params ... " << std::endl;
auto m = c.params(p);
std::cout << "Running performance report ... " << std::endl;
p.perf_report(std::cout, n, m);
p.perf_report(std::cout, n, m, c.l.batch);
}
};
......
......@@ -11,7 +11,7 @@ inline namespace MIGRAPHX_INLINE_NS {
void eliminate_data_type::apply(module& m) const
{
static const std::vector<std::string> skip_op_names = {
"convert", "get_tuple_elem", "if", "loop"};
"convert", "get_tuple_elem", "if", "loop", "roialign"};
for(auto ins : iterator_for(m))
{
if(ins->name()[0] == '@')
......
......@@ -13,6 +13,8 @@ inline namespace MIGRAPHX_INLINE_NS {
static literal get_scalar(instruction_ref ins)
{
if(ins->name() == "contiguous")
return get_scalar(ins->inputs().front());
const auto& s = ins->get_shape();
if(not(s.elements() == 1 or s.scalar()))
return {};
......@@ -31,11 +33,16 @@ static void create_pointwise_modules(module_pass_manager& mpm)
{
if(not ins->get_operator().attributes().get("pointwise", false))
continue;
auto* pm = mpm.create_module("pointwise" + std::to_string(n++));
// Skip convert op for now
if(ins->name() == "convert")
continue;
assert(ins->get_operator().attributes().contains("point_op"));
auto* pm = mpm.create_module(mpm.get_module().name() + ":pointwise" + std::to_string(n++));
pm->set_bypass();
std::unordered_map<instruction_ref, instruction_ref> param_map;
std::vector<instruction_ref> pointwise_inputs;
std::size_t i = 0;
for(auto input : ins->inputs())
{
if(contains(param_map, input))
......@@ -44,8 +51,9 @@ static void create_pointwise_modules(module_pass_manager& mpm)
if(scalar.empty())
{
pointwise_inputs.push_back(input);
param_map[input] = pm->add_parameter("x" + std::to_string(param_map.size()),
shape{input->get_shape().type()});
param_map[input] =
pm->add_parameter("x" + std::to_string(i), shape{input->get_shape().type()});
i++;
}
else
{
......@@ -68,6 +76,7 @@ static void create_pointwise_modules(module_pass_manager& mpm)
static std::vector<instruction_ref> append_pointwise_module(instruction_ref ins,
instruction_ref output)
{
assert(contains(output->inputs(), ins));
module_ref pm = ins->module_inputs().at(0);
module_ref xm = output->module_inputs().at(0);
......@@ -75,14 +84,18 @@ static std::vector<instruction_ref> append_pointwise_module(instruction_ref ins,
assert(last->name() == "@return");
assert(last->inputs().size() == 1);
assert(pm->get_parameter_names().size() == ins->inputs().size());
assert(xm->get_parameter_names().size() == output->inputs().size());
std::vector<instruction_ref> inputs = ins->inputs();
std::unordered_map<instruction_ref, instruction_ref> map_ins;
std::unordered_map<instruction_ref, instruction_ref> input_map;
// Copy inputs to input_map
for(auto i : range(inputs.size()))
{
auto input = inputs[i];
auto param = pm->get_parameter("x" + std::to_string(i));
auto input = inputs[i];
auto param = pm->get_parameter("x" + std::to_string(i));
assert(param != pm->end());
input_map[input] = param;
}
// Add the new parameter and additional inputs
......@@ -90,6 +103,7 @@ static std::vector<instruction_ref> append_pointwise_module(instruction_ref ins,
{
auto input = output->inputs()[i];
auto param = xm->get_parameter("x" + std::to_string(i));
assert(param != xm->end());
if(input == ins)
{
map_ins[param] = last->inputs().front();
......
......@@ -35,7 +35,7 @@ struct if_op
MIGRAPHX_THROW("IF: output shapes of submodules must be the same.");
}
return shape(out_shapes0);
return {out_shapes0};
}
argument compute(const shape&,
......
......@@ -54,7 +54,7 @@ struct loop
ins_out_shapes.push_back({out_s.type(), lens});
}
return shape(ins_out_shapes);
return {ins_out_shapes};
}
struct ref_loop
......
#ifndef MIGRAPHX_GUARD_OPERATORS_NONMAXSUPPRESSION_HPP
#define MIGRAPHX_GUARD_OPERATORS_NONMAXSUPPRESSION_HPP
#include <cmath>
#include <queue>
#include <cstdint>
#include <iterator>
#include <migraphx/config.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/float_equal.hpp>
#include <migraphx/algorithm.hpp>
#include <migraphx/tensor_view.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/output_iterator.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct nonmaxsuppression
{
bool center_point_box = false;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.center_point_box, "center_point_box"));
}
std::string name() const { return "nonmaxsuppression"; }
shape compute_shape(std::vector<shape> inputs) const
{
// requires at least 2 inputs
check_shapes{inputs, *this}.standard();
check_shapes{{inputs.at(0), inputs.at(1)}, *this}.only_dims(3);
auto lens = inputs.front().lens();
// check input shape
if(lens[1] != inputs.at(1).lens()[2])
{
MIGRAPHX_THROW("NonMaxSuppression: dimension mismatch between first and second input!");
}
std::vector<int64_t> out_lens(2);
out_lens.at(0) = lens.at(1);
out_lens.at(1) = 3;
return {shape::int64_type, out_lens};
}
struct box
{
std::array<float, 2> x;
std::array<float, 2> y;
void sort()
{
std::sort(x.begin(), x.end());
std::sort(y.begin(), y.end());
}
std::array<float, 2>& operator[](std::size_t i) { return i == 0 ? x : y; }
float area() const
{
assert(std::is_sorted(x.begin(), x.end()));
assert(std::is_sorted(y.begin(), y.end()));
return (x[1] - x[0]) * (y[1] - y[0]);
}
};
template <class T>
box batch_box(const T* boxes, std::size_t bidx) const
{
box result{};
const T* start = boxes + 4 * bidx;
if(center_point_box)
{
float half_width = start[2] / 2.0f;
float half_height = start[3] / 2.0f;
float x_center = start[0];
float y_center = start[1];
result.x = {x_center - half_width, x_center + half_width};
result.y = {y_center - half_height, y_center + half_height};
}
else
{
result.x = {start[1], start[3]};
result.y = {start[0], start[2]};
}
return result;
}
inline bool suppress_by_iou(box b1, box b2, float iou_threshold) const
{
b1.sort();
b2.sort();
box intersection{};
for(auto i : range(2))
{
intersection[i][0] = std::max(b1[i][0], b2[i][0]);
intersection[i][1] = std::min(b1[i][1], b2[i][1]);
}
std::vector<std::array<float, 2>> bbox = {intersection.x, intersection.y};
if(std::any_of(bbox.begin(), bbox.end(), [](auto bx) {
return not std::is_sorted(bx.begin(), bx.end());
}))
{
return false;
}
const float area1 = b1.area();
const float area2 = b2.area();
const float intersection_area = intersection.area();
const float union_area = area1 + area2 - intersection_area;
if(area1 <= .0f or area2 <= .0f or union_area <= .0f)
{
return false;
}
const float intersection_over_union = intersection_area / union_area;
return intersection_over_union > iou_threshold;
}
argument compute(const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
result.visit([&](auto out) { std::fill(out.begin(), out.end(), 0); });
std::size_t max_output_boxes_per_class = 0;
float iou_threshold = 0.0f;
float score_threshold = 0.0f;
if(args.size() > 2)
{
max_output_boxes_per_class = args.at(2).at<std::size_t>();
}
// max_output_boxes_per_class is 0, no output
if(max_output_boxes_per_class == 0)
{
return result;
}
if(args.size() > 3)
{
iou_threshold = args.at(3).at<float>();
}
if(args.size() > 4)
{
score_threshold = args.at(4).at<float>();
}
const auto& lens = args.at(1).get_shape().lens();
auto batch_num = lens[0];
auto class_num = lens[1];
auto box_num = args.at(0).get_shape().lens()[1];
std::vector<std::pair<float, int64_t>> selected_boxes_inside_class;
std::vector<int64_t> selected_indices;
selected_boxes_inside_class.reserve(output_shape.elements());
auto scores = make_view<float>(args.at(1).get_shape(), args.at(1).cast<float>());
const float* boxes = args.at(0).cast<float>();
shape comp_s{shape::float_type, {batch_num, class_num}};
shape_for_each(comp_s, [&](auto idx) {
auto bidx = idx[0];
auto cidx = idx[1];
std::size_t score_offset = (bidx * class_num + cidx) * box_num;
const float* batch_boxes = boxes + bidx * box_num * 4;
std::priority_queue<std::pair<float, int64_t>> sorted_boxes;
auto insert_to_sorted_boxes =
make_function_output_iterator([&](const auto& x) { sorted_boxes.push(x); });
int64_t box_idx = 0;
transform_if(scores.begin() + score_offset,
scores.begin() + score_offset + box_num,
insert_to_sorted_boxes,
[&](auto sc) {
box_idx++;
return sc >= score_threshold;
},
[&](auto sc) { return std::make_pair(sc, box_idx - 1); });
selected_boxes_inside_class.clear();
// Get the next box with top score, filter by iou_threshold
while(!sorted_boxes.empty() &&
selected_boxes_inside_class.size() < max_output_boxes_per_class)
{
const std::pair<float, int64_t>& next_top_score = sorted_boxes.top();
// Check with existing selected boxes for this class, suppress if exceed the IOU
// (Intersection Over Union) threshold
bool not_selected = std::any_of(
selected_boxes_inside_class.begin(),
selected_boxes_inside_class.end(),
[&](auto selected_index) {
return this->suppress_by_iou(batch_box(batch_boxes, next_top_score.second),
batch_box(batch_boxes, selected_index.second),
iou_threshold);
});
if(not not_selected)
{
selected_boxes_inside_class.push_back(next_top_score);
selected_indices.push_back(bidx);
selected_indices.push_back(cidx);
selected_indices.push_back(next_top_score.second);
}
sorted_boxes.pop();
}
});
result.visit([&](auto out) {
std::copy(selected_indices.begin(), selected_indices.end(), out.begin());
});
return result;
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
......@@ -26,19 +26,17 @@ struct pointwise
auto pnames = pm->get_parameter_names();
std::sort(pnames.begin(), pnames.end());
check_shapes{inputs, *this}.has(pnames.size()).same_dims();
for(auto i : range(pnames.size()))
{
auto s1 = pm->get_parameter(pnames[i])->get_shape();
auto s2 = inputs[i];
if(s1.type() != s2.type())
MIGRAPHX_THROW("Mismatch type");
}
if(pm->get_output_shapes().size() != 1)
MIGRAPHX_THROW("submodule should have only one output.");
auto type = pm->get_output_shapes().front().type();
// Scalar output if all inputs are scalar
if(inputs.front().elements() == 1 and
all_of(inputs, [](const auto& s) { return s.scalar(); }))
return shape{type};
return shape::from_permutation(type, inputs.front().lens(), find_permutation(inputs));
}
......
......@@ -9,6 +9,7 @@ namespace op {
struct prelu : binary<prelu>
{
std::string point_op() const { return "(${0} < 0) ? (${0} * ${1}) : ${0}"; }
auto apply() const
{
return [](auto x, auto slope) { return ((x < 0) ? (x * slope) : x); };
......
......@@ -9,6 +9,7 @@ namespace op {
struct recip : unary<recip>
{
std::string point_op() const { return "1 / ${0}"; }
auto apply() const
{
return [](auto x) { return 1 / x; };
......
#ifndef MIGRAPHX_GUARD_OPERATORS_ROIALIGN_HPP
#define MIGRAPHX_GUARD_OPERATORS_ROIALIGN_HPP
#include <limits>
#include <migraphx/check_shapes.hpp>
#include <migraphx/config.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/par_for.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/shape_for_each.hpp>
#include <cmath>
#include <numeric>
#include <utility>
#include <vector>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct roialign
{
std::string coord_trans_mode = "half_pixel";
std::string mode = "avg";
int64_t output_height = 1;
int64_t output_width = 1;
int64_t sampling_ratio = 0;
float spatial_scale = 1.0f;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.coord_trans_mode, "coordinate_transformation_mode"),
f(self.mode, "mode"),
f(self.output_height, "output_height"),
f(self.output_width, "output_width"),
f(self.sampling_ratio, "sampling_ratio"),
f(self.spatial_scale, "spatial_scale"));
}
std::string name() const { return "roialign"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(3).standard();
auto x_lens = inputs.at(0).lens();
auto roi_lens = inputs.at(1).lens();
auto bi_lens = inputs.at(2).lens();
auto type = inputs.at(0).type();
// check input correct
if(bi_lens.size() != 1)
{
MIGRAPHX_THROW("ROIALIGN: batch indices should be 1 dimension!");
}
if(roi_lens.size() != 2 or roi_lens.at(1) != 4)
{
MIGRAPHX_THROW(
"ROIALIGN: rois should be 2 dimensions, and the second dim should be 4!");
}
if(roi_lens.front() != bi_lens.front())
{
MIGRAPHX_THROW("ROIALIGN: rois and batch indices inputs should have the same number!");
}
std::vector<std::size_t> out_lens = x_lens;
out_lens[0] = roi_lens[0];
out_lens[2] = output_height;
out_lens[3] = output_width;
return {type, out_lens};
}
struct pos_weight
{
// neighbor indices for the bilinear interpolation
std::array<std::size_t, 4> pos = {0, 0, 0, 0};
// neighbor weights for the bilinear interpolation
std::array<float, 4> w = {0.0f, 0.0f, 0.0f, 0.0f};
};
auto calc_pos_weight(const std::array<std::size_t, 2>& dims,
const shape& comp_s,
const std::array<float, 2>& roi_start,
const std::array<float, 2>& bin_size,
const std::array<std::size_t, 2>& bin_grid_size) const
{
std::vector<pos_weight> results(bin_grid_size[0] * bin_grid_size[1] * output_height *
output_width);
shape_for_each(comp_s, [&](auto idx) {
std::array<std::size_t, 2> p = {idx[0], idx[1]};
std::array<std::size_t, 2> i = {idx[2], idx[3]};
auto index = comp_s.index(idx);
std::array<float, 2> xy{};
std::array<int64_t, 2> low{};
std::array<int64_t, 2> high{};
for(auto ii : range(p.size()))
{
xy[ii] = roi_start[ii] + p[ii] * bin_size[ii] +
(i[ii] + .5f) * bin_size[ii] / bin_grid_size[ii];
xy[ii] = (coord_trans_mode == "output_half_pixel") ? (xy[ii] - 0.5f) : xy[ii];
if(xy[ii] < -1.0 or xy[ii] > dims[ii])
{
results[index] = pos_weight{};
return;
}
xy[ii] = std::max(xy[ii], 0.0f);
low[ii] = xy[ii];
high[ii] = low[ii] + 1;
if(low[ii] >= dims[ii] - 1)
{
xy[ii] = high[ii] = low[ii] = dims[ii] - 1;
}
}
results[index].pos = {low[0] * dims[1] + low[1],
low[0] * dims[1] + high[1],
high[0] * dims[1] + low[1],
high[0] * dims[1] + high[1]};
float ly = xy[0] - low[0];
float lx = xy[1] - low[1];
float hy = 1.0f - ly;
float hx = 1.0f - lx;
// save weights and indeces
results[index].w = {hy * hx, hy * lx, ly * hx, ly * lx};
});
return results;
}
struct max_pool
{
double init() { return std::numeric_limits<double>::lowest(); }
double operator()(double x, double y) { return std::max(x, y); }
double final(double x, std::size_t) { return (x); }
};
struct avg_pool
{
double init() { return 0.0; }
double operator()(double x, double y) { return x + y; }
double final(double x, std::size_t y) { return (y == 0) ? 0.0 : (x / y); }
};
template <class T, class Op>
std::tuple<double, int64_t> calc_pooling(const T& data,
const std::array<std::size_t, 2>& bin_grid_size,
const std::vector<pos_weight>& pos_weights,
int64_t index,
Op op) const
{
double output_val = op.init();
const int64_t count = bin_grid_size[0] * bin_grid_size[1];
dfor(bin_grid_size[0], bin_grid_size[1])([&](auto, auto) {
const auto& pc = pos_weights[index];
std::array<double, 4> wv;
std::transform(
pc.w.begin(), pc.w.end(), pc.pos.begin(), wv.begin(), [&](auto w, auto pos) {
return *(data + pos) * w;
});
output_val = std::accumulate(wv.begin(), wv.end(), output_val, op);
index += 1;
});
output_val = op.final(output_val, count);
return {output_val, index};
}
argument compute(const shape& output_shape, std::vector<argument> args) const
{
argument result{output_shape};
const auto& out_lens = output_shape.lens();
int64_t n_rois = out_lens[0];
std::size_t channels = out_lens[1];
// output dims of height and width, in all 2-dim arrays, the first dim
// is for height and second dim is for width
std::array<std::size_t, 2> out_dims = {out_lens[2], out_lens[3]};
const auto& x_lens = args.at(0).get_shape().lens();
// input dims of height and width
std::array<std::size_t, 2> in_dims = {x_lens[2], x_lens[3]};
auto roi_s = args.at(1).get_shape();
visit_all(result, args.at(0), args.at(1))([&](auto output, auto x, auto roi) {
const auto* batch_indices = args.at(2).cast<int64_t>();
par_for(n_rois, [&](auto n) {
const auto bottom_data = x.begin();
const auto roi_batch_ind = batch_indices[n];
// Do not using rounding; this implementation detail is critical
std::array<float, 2> roi_starts = {
static_cast<float>(roi[roi_s.index({n, 1})] * spatial_scale),
static_cast<float>(roi[roi_s.index({n, 0})] * spatial_scale)};
std::array<float, 2> roi_ends = {
static_cast<float>(roi[roi_s.index({n, 3})] * spatial_scale),
static_cast<float>(roi[roi_s.index({n, 2})] * spatial_scale)};
// Force malformed ROIs to be 1x1
std::array<float, 2> roi_size{};
std::array<float, 2> bin_size{};
std::array<std::size_t, 2> bin_grid_size{};
for(auto ii : range(roi_size.size()))
{
roi_size[ii] = roi_ends[ii] - roi_starts[ii];
roi_size[ii] = std::max(roi_size[ii], 1.0f);
bin_size[ii] = roi_size[ii] / out_dims[ii];
bin_grid_size[ii] = (sampling_ratio > 0)
? sampling_ratio
: std::ceil(roi_size[ii] / out_dims[ii]);
}
// we want to precalculate indices and weights shared by all channels,
// this is the key point of optimization
std::vector<std::size_t> comp_lens = {
out_dims[0], out_dims[1], bin_grid_size[0], bin_grid_size[1]};
shape comp_s{shape::float_type, comp_lens};
auto pre_calc =
this->calc_pos_weight(in_dims, comp_s, roi_starts, bin_size, bin_grid_size);
std::vector<std::size_t> comp_lens1 = {channels, out_dims[0], out_dims[1]};
shape comp_s1{migraphx::shape::float_type, comp_lens1};
std::vector<int64_t> vec_index(channels, 0);
shape_for_each(comp_s1, [&](auto idx) {
auto c = idx[0];
auto ph = idx[1];
auto pw = idx[2];
const auto offset_bottom_data =
bottom_data + static_cast<int64_t>((roi_batch_ind * channels + c) *
in_dims[0] * in_dims[1]);
double output_val;
std::tie(output_val, vec_index[c]) =
(mode == "avg") ? this->calc_pooling(offset_bottom_data,
bin_grid_size,
pre_calc,
vec_index[c],
avg_pool{})
: this->calc_pooling(offset_bottom_data,
bin_grid_size,
pre_calc,
vec_index[c],
max_pool{});
output(n, c, ph, pw) = output_val;
});
});
});
return result;
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
......@@ -18,6 +18,7 @@ namespace op {
struct sigmoid : unary<sigmoid>
{
std::string point_op() const { return "1.f / (1.f + ${function:exp}(-${0}))"; }
auto apply() const
{
return [](auto x) { return 1.f / (1.f + std::exp(-x)); };
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment