Commit f303624d authored by charlie's avatar charlie
Browse files

Merge branch 'develop' of github.com:ROCmSoftwarePlatform/AMDMIGraphX into...

Merge branch 'develop' of github.com:ROCmSoftwarePlatform/AMDMIGraphX into refactor_dyn_fixed_compare
parents f4a3a182 48cc33e4
...@@ -87,7 +87,7 @@ RUN git clone --single-branch --branch ${ONNXRUNTIME_BRANCH} --recursive ${ONNXR ...@@ -87,7 +87,7 @@ RUN git clone --single-branch --branch ${ONNXRUNTIME_BRANCH} --recursive ${ONNXR
ADD tools/build_and_test_onnxrt.sh /onnxruntime/build_and_test_onnxrt.sh ADD tools/build_and_test_onnxrt.sh /onnxruntime/build_and_test_onnxrt.sh
RUN cget -p /usr/local install ROCmSoftwarePlatform/llvm-project-mlir@c0723a7e50043d973cb73ae51dc30d36679ee7e5 -DBUILD_MIXR_TARGET=On RUN cget -p /usr/local install ROCmSoftwarePlatform/rocMLIR@0f38fb33f518b53b94b541feb9b079668c5518e8 -DBUILD_MIXR_TARGET=On -DLLVM_ENABLE_ZSTD=Off -DLLVM_ENABLE_THREADS=Off
ENV MIOPEN_FIND_DB_PATH=/tmp/miopen/find-db ENV MIOPEN_FIND_DB_PATH=/tmp/miopen/find-db
ENV MIOPEN_USER_DB_PATH=/tmp/miopen/user-db ENV MIOPEN_USER_DB_PATH=/tmp/miopen/user-db
......
...@@ -29,6 +29,7 @@ See below for a comprehensive list of commands and option arguments, as well as ...@@ -29,6 +29,7 @@ See below for a comprehensive list of commands and option arguments, as well as
| --tf | Load file as a tensorflow graph | | --tf | Load file as a tensorflow graph |
| --migraphx | Load file as a migraphx graph | | --migraphx | Load file as a migraphx graph |
| --migraphx-json | Load file as a migraphx JSON graph | | --migraphx-json | Load file as a migraphx JSON graph |
| --batch | Set batch size for the model |
| --nhwc | Treat tensorflow format as nhwc | | --nhwc | Treat tensorflow format as nhwc |
| --nchw | Treat tensorflow format as nchw | | --nchw | Treat tensorflow format as nchw |
| --skip-unknown-operators | Skip unknown operators when parsing and continue to parse | | --skip-unknown-operators | Skip unknown operators when parsing and continue to parse |
......
...@@ -21,6 +21,6 @@ ...@@ -21,6 +21,6 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE. # THE SOFTWARE.
##################################################################################### #####################################################################################
tensorflow==2.7.2 tensorflow==2.9.3
onnxruntime onnxruntime
tokenizers tokenizers
\ No newline at end of file
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_INT_DIVIDE_HPP
#define MIGRAPHX_GUARD_RTGLIB_INT_DIVIDE_HPP
#include <migraphx/config.hpp>
#include <cmath>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
template <class R, class T, class U>
R floor_divide(T x, U y)
{
return R(std::floor(double(x) / double(y)));
}
template <class R, class T, class U>
R ceil_divide(T x, U y)
{
return R(std::ceil(double(x) / double(y)));
}
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
...@@ -80,6 +80,7 @@ struct literal : raw_data<literal> ...@@ -80,6 +80,7 @@ struct literal : raw_data<literal>
fill(start, end); fill(start, end);
} }
// Directly copies buffer of x
template <class T, MIGRAPHX_REQUIRES(sizeof(T) == 1)> template <class T, MIGRAPHX_REQUIRES(sizeof(T) == 1)>
literal(const shape& s, T* x) : buffer(make_shared_array<char>(s.bytes())), m_shape(s) literal(const shape& s, T* x) : buffer(make_shared_array<char>(s.bytes())), m_shape(s)
{ {
...@@ -107,25 +108,15 @@ struct literal : raw_data<literal> ...@@ -107,25 +108,15 @@ struct literal : raw_data<literal>
std::shared_ptr<char> buffer; std::shared_ptr<char> buffer;
shape m_shape; shape m_shape;
// Keeps the same data ordering as the given container
template <class Iterator> template <class Iterator>
void fill(Iterator start, Iterator end) void fill(Iterator start, Iterator end)
{ {
assert(std::distance(start, end) == m_shape.elements()); assert(std::distance(start, end) == m_shape.elements());
if(m_shape.standard())
{
m_shape.visit_type([&](auto as) { std::copy(start, end, as.from(buffer.get())); });
}
else
{
auto it = start;
m_shape.visit_type([&](auto as) { m_shape.visit_type([&](auto as) {
auto output = make_view(m_shape, as.from(buffer.get())); auto output = make_view(m_shape, as.from(buffer.get()));
shape_for_each(output.get_shape(), [&](const auto& idx) { std::copy(start, end, output.begin());
output(idx.begin(), idx.end()) = *it; // NOLINT(bugprone-signed-char-misuse)
it++;
}); });
});
}
} }
}; };
......
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
#include <migraphx/argument.hpp> #include <migraphx/argument.hpp>
#include <migraphx/par_for.hpp> #include <migraphx/par_for.hpp>
#include <migraphx/shape_for_each.hpp> #include <migraphx/shape_for_each.hpp>
#include <migraphx/int_divide.hpp> #include <migraphx/dyn_output.hpp>
#include <cmath> #include <cmath>
#include <utility> #include <utility>
...@@ -49,6 +49,9 @@ struct pooling ...@@ -49,6 +49,9 @@ struct pooling
bool ceil_mode = false; bool ceil_mode = false;
int lp_order = 2; int lp_order = 2;
// Global pooling with dynamic shape input
bool dyn_global = false;
template <class Self, class F> template <class Self, class F>
static auto reflect(Self& self, F f) static auto reflect(Self& self, F f)
{ {
...@@ -57,7 +60,8 @@ struct pooling ...@@ -57,7 +60,8 @@ struct pooling
f(self.stride, "stride"), f(self.stride, "stride"),
f(self.lengths, "lengths"), f(self.lengths, "lengths"),
f(self.ceil_mode, "ceil_mode"), f(self.ceil_mode, "ceil_mode"),
f(self.lp_order, "lp_order")); f(self.lp_order, "lp_order"),
f(self.dyn_global, "dyn_global"));
} }
std::string name() const { return "pooling"; } std::string name() const { return "pooling"; }
...@@ -65,51 +69,111 @@ struct pooling ...@@ -65,51 +69,111 @@ struct pooling
void check_attribute_size() const void check_attribute_size() const
{ {
if((padding.size() != stride.size() and (padding.size() / 2) != stride.size()) or if((padding.size() != stride.size() and (padding.size() / 2) != stride.size()) or
stride.size() != lengths.size()) (not dyn_global and stride.size() != lengths.size()))
{ {
MIGRAPHX_THROW("POOLING: inconsistent attribute sizes"); MIGRAPHX_THROW("POOLING: inconsistent attribute sizes");
} }
} }
size_t kdims() const
{
check_attribute_size();
return stride.size();
}
value attributes() const { return {{"normalize_padding", "padding"}}; } value attributes() const { return {{"normalize_padding", "padding"}}; }
std::vector<std::size_t> calc_spatial_dim_out(const std::vector<std::size_t>& input_lens,
std::size_t kdims) const
{
std::vector<std::size_t> output_lens{};
for(size_t i = 0; i < kdims; ++i)
{
if(input_lens[i + 2] == 0)
{
// handle opt = 0
output_lens.push_back(0);
}
else
{
std::size_t padding_factor = 2 * padding[i];
if(padding.size() == 2 * kdims)
padding_factor = padding[i] + padding[i + kdims];
assert(input_lens[i + 2] + padding_factor >= lengths[i]);
std::size_t dim_size = input_lens[i + 2] + padding_factor - lengths[i];
std::size_t len =
(ceil_mode)
? dim_size / stride[i] + static_cast<std::size_t>((dim_size % stride[i] !=
0)) // ceil uint divide
: dim_size / stride[i]; // floor divide
output_lens.push_back(len + 1);
}
}
return output_lens;
}
shape normalize_compute_shape(std::vector<shape> inputs) const shape normalize_compute_shape(std::vector<shape> inputs) const
{ {
check_shapes{inputs, *this}.has(1); check_shapes{inputs, *this, true}.has(1);
check_attribute_size();
const shape& input = inputs.at(0); const shape& input = inputs.at(0);
auto input_lens = input.lens();
size_t kdims = input_lens.size() - 2;
auto input_size = inputs[0].lens().size();
auto padding_size = padding.size(); auto padding_size = padding.size();
if(input_size != padding_size / 2 + 2 and input_size != padding_size + 2) size_t kdims = input.ndim() - 2;
if(input.ndim() != padding_size / 2 + 2 and input.ndim() != padding_size + 2)
{ {
MIGRAPHX_THROW("POOLING: input and attribute size mismatch!"); MIGRAPHX_THROW("POOLING: input and attribute size mismatch!");
} }
std::vector<std::size_t> output_lens(input_lens.begin(), input_lens.begin() + 2); if(input.dynamic())
for(size_t i = 0; i < kdims; i++)
{ {
std::ptrdiff_t dim_size; auto input_dyn_dims = input.dyn_dims();
auto padding_factor = 2 * padding[i]; std::vector<shape::dynamic_dimension> output_dyn_dims(input_dyn_dims.begin(),
if(padding_size == 2 * kdims) input_dyn_dims.begin() + 2);
padding_factor = padding[i] + padding[i + kdims]; if(dyn_global)
dim_size = input_lens[i + 2] + padding_factor - lengths[i]; {
assert(dim_size >= 0); for(size_t i = 0; i < kdims; ++i)
std::size_t len = (ceil_mode) ? ceil_divide<std::ptrdiff_t>(dim_size, stride[i]) {
: floor_divide<std::ptrdiff_t>(dim_size, stride[i]); output_dyn_dims.push_back(shape::dynamic_dimension{1, 1, 1});
output_lens.push_back(std::size_t(std::max<std::ptrdiff_t>(1, len + 1)));
} }
return inputs[0].with_lens(output_lens); return {input.type(), output_dyn_dims};
}
else
{
auto min_spatial_dims = calc_spatial_dim_out(input.min_lens(), kdims);
auto max_spatial_dims = calc_spatial_dim_out(input.max_lens(), kdims);
auto opt_spatial_dims = calc_spatial_dim_out(input.opt_lens(), kdims);
for(size_t i = 0; i < kdims; ++i)
{
output_dyn_dims.push_back(shape::dynamic_dimension{
min_spatial_dims[i], max_spatial_dims[i], opt_spatial_dims[i]});
}
return {input.type(), output_dyn_dims};
} }
}
else
{
auto input_lens = input.lens();
size_t kdims() const std::vector<std::size_t> output_lens(input_lens.begin(), input_lens.begin() + 2);
// Used for when normalize_compute_shape() is called again at model eval time
// for an originally dynamic shape. Since kernel shape is not used with dyn_global.
if(dyn_global)
{ {
check_attribute_size(); for(size_t i = 0; i < kdims; ++i)
return stride.size(); {
output_lens.push_back(1);
}
return {input.type(), output_lens};
}
else
{
auto output_spatial_lens = calc_spatial_dim_out(input_lens, kdims);
output_lens.insert(
output_lens.end(), output_spatial_lens.begin(), output_spatial_lens.end());
return inputs[0].with_lens(output_lens);
}
}
} }
struct lpnorm_pool struct lpnorm_pool
...@@ -158,7 +222,11 @@ struct pooling ...@@ -158,7 +222,11 @@ struct pooling
}; };
template <class Type, class Out, class In, class Op> template <class Type, class Out, class In, class Op>
void calc_pooling(const shape& output_shape, Out& output, const In& input, Op op) const void calc_pooling(const shape& output_shape,
Out& output,
const In& input,
const std::vector<std::size_t>& kernel_dims,
Op op) const
{ {
auto in_s = input.get_shape(); auto in_s = input.get_shape();
auto in_lens = in_s.lens(); auto in_lens = in_s.lens();
...@@ -172,7 +240,7 @@ struct pooling ...@@ -172,7 +240,7 @@ struct pooling
auto d_2 = dim - 2; auto d_2 = dim - 2;
int start = int start =
static_cast<int>(idx_o[dim] * stride[d_2]) - static_cast<int>(padding[d_2]); static_cast<int>(idx_o[dim] * stride[d_2]) - static_cast<int>(padding[d_2]);
int end = std::min(start + lengths[d_2], in_lens[dim]); int end = std::min(start + kernel_dims[d_2], in_lens[dim]);
start = std::max(start, 0); start = std::max(start, 0);
win_start.push_back(start); win_start.push_back(start);
win_size.push_back(end - start); win_size.push_back(end - start);
...@@ -198,21 +266,32 @@ struct pooling ...@@ -198,21 +266,32 @@ struct pooling
}); });
} }
argument compute(const shape& output_shape, std::vector<argument> args) const argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
{
argument result{dyn_out.computed_shape};
auto input_lens = args[0].get_shape().lens();
std::vector<std::size_t> kernel_dims;
if(dyn_global)
{ {
argument result{output_shape}; kernel_dims.insert(kernel_dims.end(), input_lens.begin() + 2, input_lens.end());
}
else
{
kernel_dims = this->lengths;
}
visit_all(result, args[0])([&](auto output, auto input) { visit_all(result, args[0])([&](auto output, auto input) {
using type = typename decltype(output)::value_type; using type = typename decltype(output)::value_type;
switch(mode) switch(mode)
{ {
case migraphx::op::pooling_mode::average: case migraphx::op::pooling_mode::average:
calc_pooling<type>(output_shape, output, input, avg_pool{}); calc_pooling<type>(dyn_out.computed_shape, output, input, kernel_dims, avg_pool{});
break; break;
case migraphx::op::pooling_mode::max: case migraphx::op::pooling_mode::max:
calc_pooling<type>(output_shape, output, input, max_pool{}); calc_pooling<type>(dyn_out.computed_shape, output, input, kernel_dims, max_pool{});
break; break;
case migraphx::op::pooling_mode::lpnorm: case migraphx::op::pooling_mode::lpnorm:
calc_pooling<type>(output_shape, output, input, lpnorm_pool{lp_order}); calc_pooling<type>(
dyn_out.computed_shape, output, input, kernel_dims, lpnorm_pool{lp_order});
break; break;
} }
}); });
......
...@@ -69,14 +69,10 @@ struct squeeze ...@@ -69,14 +69,10 @@ struct squeeze
std::vector<shape::dynamic_dimension> dyn_dims = {}; std::vector<shape::dynamic_dimension> dyn_dims = {};
if(axes.empty()) if(axes.empty())
{ {
for(auto i : range(input_shape.ndim())) std::copy_if(input_shape.dyn_dims().cbegin(),
{ input_shape.dyn_dims().cend(),
auto dd = input_shape.dyn_dims()[i]; std::back_inserter(dyn_dims),
if(dd != 1) [&](auto dd) { return dd != 1; });
{
dyn_dims.push_back(dd);
}
}
} }
else else
{ {
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include <migraphx/config.hpp> #include <migraphx/config.hpp>
#include <migraphx/value.hpp> #include <migraphx/value.hpp>
#include <migraphx/op/normalize_attribute.hpp> #include <migraphx/op/normalize_attribute.hpp>
#include <migraphx/dyn_output.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -45,17 +46,15 @@ struct transpose ...@@ -45,17 +46,15 @@ struct transpose
} }
std::string name() const { return "transpose"; } std::string name() const { return "transpose"; }
shape compute_shape(std::vector<shape> inputs) const shape compute_shape(std::vector<shape> inputs) const
{ {
check_shapes{inputs, *this}.has(1); check_shapes{inputs, *this, true}.has(1);
auto input = inputs.at(0); auto input = inputs.at(0);
auto input_lens = input.lens();
auto input_strides = input.strides();
auto t = input.type();
if(dims.size() != input_lens.size()) if(dims.size() != input.ndim())
{ {
MIGRAPHX_THROW("Permutation has wrong number of axes"); MIGRAPHX_THROW("TRANSPOSE: Permutation has wrong number of axes");
} }
std::vector<int64_t> axes(dims.size()); std::vector<int64_t> axes(dims.size());
std::iota(axes.begin(), axes.end(), 0); std::iota(axes.begin(), axes.end(), 0);
...@@ -63,19 +62,36 @@ struct transpose ...@@ -63,19 +62,36 @@ struct transpose
{ {
MIGRAPHX_THROW("TRANSPOSE: Invalid permutation"); MIGRAPHX_THROW("TRANSPOSE: Invalid permutation");
} }
std::vector<size_t> output_lens(input_lens.size());
std::vector<size_t> output_strides(input_lens.size()); if(input.dynamic())
for(std::size_t i = 0; i < output_lens.size(); i++) {
std::vector<shape::dynamic_dimension> output_dyn_dims(input.ndim());
std::transform(dims.cbegin(), dims.cend(), output_dyn_dims.begin(), [&](auto dim) {
return input.dyn_dims()[dim];
});
return {input.type(), output_dyn_dims};
}
else
{
auto input_lens = input.lens();
auto input_strides = input.strides();
std::vector<size_t> output_lens(input.ndim());
std::vector<size_t> output_strides(input.ndim());
for(std::size_t i = 0; i < input.ndim(); i++)
{ {
output_lens[i] = input_lens[dims[i]]; output_lens[i] = input_lens[dims[i]];
output_strides[i] = input_strides[dims[i]]; output_strides[i] = input_strides[dims[i]];
} }
return {t, output_lens, output_strides}; return {input.type(), output_lens, output_strides};
} }
argument compute(shape output_shape, std::vector<argument> args) const }
argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
{ {
return args[0].reshape(output_shape); return args[0].reshape(dyn_out.computed_shape);
} }
std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; } std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
}; };
......
...@@ -31,6 +31,9 @@ ...@@ -31,6 +31,9 @@
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
/**
* Iterates the given function over the indices from the shape in order.
*/
template <class F> template <class F>
void shape_for_each(const migraphx::shape& s, F f) void shape_for_each(const migraphx::shape& s, F f)
{ {
...@@ -51,7 +54,6 @@ void shape_for_each(const migraphx::shape& s, F f) ...@@ -51,7 +54,6 @@ void shape_for_each(const migraphx::shape& s, F f)
call(indices); call(indices);
} }
} }
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
......
...@@ -77,14 +77,14 @@ static void update_pooling(const instruction_ref& input, const instruction_ref& ...@@ -77,14 +77,14 @@ static void update_pooling(const instruction_ref& input, const instruction_ref&
{ {
return; return;
} }
auto kdims = input->get_shape().lens().size() - 2; auto kdims = input->get_shape().ndim() - 2;
if(std::equal(op.padding.begin(), if(std::equal(op.padding.begin(),
op.padding.begin() + kdims, op.padding.begin() + kdims,
op.padding.begin() + kdims, op.padding.begin() + kdims,
op.padding.end())) op.padding.end()))
return; return;
std::vector<int64_t> padding(input->get_shape().lens().size() * 2, 0); std::vector<int64_t> padding(input->get_shape().ndim() * 2, 0);
std::vector<size_t> pads_l(op.padding.begin(), op.padding.begin() + kdims); std::vector<size_t> pads_l(op.padding.begin(), op.padding.begin() + kdims);
std::vector<size_t> pads_r(op.padding.begin() + kdims, op.padding.end()); std::vector<size_t> pads_r(op.padding.begin() + kdims, op.padding.end());
op.padding = std::vector<size_t>(kdims * 2, 0); op.padding = std::vector<size_t>(kdims * 2, 0);
......
...@@ -47,52 +47,42 @@ struct parse_pooling : op_parser<parse_pooling> ...@@ -47,52 +47,42 @@ struct parse_pooling : op_parser<parse_pooling>
{"GlobalLpPool", "lpnorm"}}; {"GlobalLpPool", "lpnorm"}};
} }
instruction_ref parse(const op_desc& opd, value handle_values(const op_desc& opd,
const onnx_parser& /*parser*/,
onnx_parser::node_info info, onnx_parser::node_info info,
std::vector<instruction_ref> args) const const shape& in_shape,
value values) const
{ {
const std::unordered_map<std::string, op::pooling_mode> mode_map = { auto kdims = in_shape.ndim() - 2;
{"max", op::pooling_mode::max}, if(starts_with(opd.onnx_name, "Global"))
{"average", op::pooling_mode::average},
{"lpnorm", op::pooling_mode::lpnorm}};
std::string mode = opd.op_name;
if(not contains(mode_map, mode))
{ {
MIGRAPHX_THROW("onnx pooling mode must be [\"max\", \"average\", \"lpnorm\"]"); // if spatial dimensions are dynamic use dyn_global flag
if(in_shape.dynamic() and std::any_of(in_shape.dyn_dims().cbegin() + 2,
in_shape.dyn_dims().cend(),
[](auto dd) { return not dd.is_fixed(); }))
{
values["dyn_global"] = true;
values["lengths"] = std::vector<size_t>();
} }
operation op = make_op("pooling", {{"mode", mode_map.at(mode)}}); else
value values = op.to_value();
auto l0 = args[0];
auto in_lens = l0->get_shape().lens();
assert(in_lens.size() > 2);
auto kdims = in_lens.size() - 2;
if(starts_with(opd.onnx_name, "Global"))
{ {
values["lengths"] = std::vector<size_t>(in_lens.begin() + 2, in_lens.end()); // works with static and fixed dynamic shape
auto m_lens = in_shape.max_lens();
values["lengths"] = std::vector<size_t>(m_lens.begin() + 2, m_lens.end());
}
} }
// does not support ceil_mode
if(contains(info.attributes, "ceil_mode")) if(contains(info.attributes, "ceil_mode"))
{ {
values["ceil_mode"] = static_cast<bool>(info.attributes.at("ceil_mode").i()); values["ceil_mode"] = static_cast<bool>(info.attributes.at("ceil_mode").i());
} }
// count include padding, if count include pad is 1, we always use
// explicit pad
int count_include_pad = 0;
if(contains(info.attributes, "count_include_pad"))
{
count_include_pad = info.attributes.at("count_include_pad").i();
}
if(contains(info.attributes, "strides")) if(contains(info.attributes, "strides"))
{ {
values["stride"].clear(); values["stride"].clear();
copy(info.attributes["strides"].ints(), std::back_inserter(values["stride"])); copy(info.attributes["strides"].ints(), std::back_inserter(values["stride"]));
check_attr_sizes(kdims, values["stride"].size(), "PARSE_POOLING: inconsistent strides"); check_attr_sizes(kdims, values["stride"].size(), "PARSE_POOLING: inconsistent strides");
} }
if(contains(info.attributes, "kernel_shape")) if(contains(info.attributes, "kernel_shape"))
{ {
values["lengths"].clear(); values["lengths"].clear();
...@@ -110,6 +100,46 @@ struct parse_pooling : op_parser<parse_pooling> ...@@ -110,6 +100,46 @@ struct parse_pooling : op_parser<parse_pooling>
// ensure pads availabe only when auto_pad is "NOT_SET" // ensure pads availabe only when auto_pad is "NOT_SET"
check_padding_mode(info, "POOLING"); check_padding_mode(info, "POOLING");
return values;
}
instruction_ref parse(const op_desc& opd,
const onnx_parser& /*parser*/,
onnx_parser::node_info info,
std::vector<instruction_ref> args) const
{
std::string mode = opd.op_name;
const std::unordered_map<std::string, op::pooling_mode> mode_map = {
{"max", op::pooling_mode::max},
{"average", op::pooling_mode::average},
{"lpnorm", op::pooling_mode::lpnorm}};
if(not contains(mode_map, mode))
{
MIGRAPHX_THROW(
"PARSE_POOLING: onnx pooling mode must be [\"max\", \"average\", \"lpnorm\"]");
}
operation op = make_op("pooling", {{"mode", mode_map.at(mode)}});
value values = op.to_value();
auto l0 = args[0];
auto in_shape = l0->get_shape();
assert(in_shape.ndim() > 2);
auto kdims = in_shape.ndim() - 2;
values = handle_values(opd, info, in_shape, values);
// count include padding, if count include pad is 1, we always use
// explicit pad
int count_include_pad = 0;
if(contains(info.attributes, "count_include_pad"))
{
if(in_shape.dynamic())
{
MIGRAPHX_THROW("PARSE_POOLING: count_include_pad attribute is not supported for "
"dynamic input shape");
}
count_include_pad = info.attributes.at("count_include_pad").i();
}
std::vector<int64_t> paddings; std::vector<int64_t> paddings;
float pad_val = ((mode == "max") ? std::numeric_limits<float>::lowest() : 0.0f); float pad_val = ((mode == "max") ? std::numeric_limits<float>::lowest() : 0.0f);
...@@ -122,6 +152,13 @@ struct parse_pooling : op_parser<parse_pooling> ...@@ -122,6 +152,13 @@ struct parse_pooling : op_parser<parse_pooling>
} }
if(contains(info.attributes, "auto_pad")) if(contains(info.attributes, "auto_pad"))
{
if(in_shape.dynamic())
{
MIGRAPHX_THROW(
"PARSE_POOLING: Auto padding pooling with dynamic input shape not supported");
}
else
{ {
values["padding"].clear(); values["padding"].clear();
// return paddings could be empty, then setting to 0 for no padding // return paddings could be empty, then setting to 0 for no padding
...@@ -129,9 +166,10 @@ struct parse_pooling : op_parser<parse_pooling> ...@@ -129,9 +166,10 @@ struct parse_pooling : op_parser<parse_pooling>
values, values,
values["lengths"].to_vector<std::size_t>(), values["lengths"].to_vector<std::size_t>(),
{1, 1}, {1, 1},
in_lens, in_shape.lens(),
paddings); paddings);
} }
}
if(paddings.size() != 2 * kdims) if(paddings.size() != 2 * kdims)
{ {
...@@ -150,6 +188,7 @@ struct parse_pooling : op_parser<parse_pooling> ...@@ -150,6 +188,7 @@ struct parse_pooling : op_parser<parse_pooling>
values["stride"].resize(kdims); values["stride"].resize(kdims);
std::fill_n(values["stride"].begin(), kdims, 1); std::fill_n(values["stride"].begin(), kdims, 1);
} }
// used to calculate the supposed output shape // used to calculate the supposed output shape
std::vector<int64_t> orig_padding = paddings; std::vector<int64_t> orig_padding = paddings;
...@@ -159,6 +198,11 @@ struct parse_pooling : op_parser<parse_pooling> ...@@ -159,6 +198,11 @@ struct parse_pooling : op_parser<parse_pooling>
if(not slice_start.empty()) if(not slice_start.empty())
{ {
if(in_shape.dynamic())
{
MIGRAPHX_THROW(
"PARSE_POOLING: asymmetric padding not supported for dynamic input shape");
}
// calculate expected output shape // calculate expected output shape
orig_padding.insert(orig_padding.begin() + kdims, 2, 0); orig_padding.insert(orig_padding.begin() + kdims, 2, 0);
orig_padding.insert(orig_padding.begin(), 2, 0); orig_padding.insert(orig_padding.begin(), 2, 0);
......
...@@ -47,7 +47,7 @@ struct parse_transpose : op_parser<parse_transpose> ...@@ -47,7 +47,7 @@ struct parse_transpose : op_parser<parse_transpose>
} }
// if perm is empty, use the default value // if perm is empty, use the default value
auto n_dim = args.front()->get_shape().lens().size(); auto n_dim = args.front()->get_shape().ndim();
if(perm.empty()) if(perm.empty())
{ {
perm.resize(n_dim); perm.resize(n_dim);
......
...@@ -24,7 +24,6 @@ ...@@ -24,7 +24,6 @@
#include <migraphx/gpu/compiler.hpp> #include <migraphx/gpu/compiler.hpp>
#include <migraphx/make_op.hpp> #include <migraphx/make_op.hpp>
#include <migraphx/gpu/context.hpp> #include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/mlir.hpp> #include <migraphx/gpu/mlir.hpp>
namespace migraphx { namespace migraphx {
......
...@@ -32,7 +32,13 @@ ...@@ -32,7 +32,13 @@
#include <mlir-c/Dialect/MIGraphX.h> #include <mlir-c/Dialect/MIGraphX.h>
#include <mlir-c/IntegerSet.h> #include <mlir-c/IntegerSet.h>
#include <mlir-c/Pass.h> #include <mlir-c/Pass.h>
#include <mlir-c/Registration.h> #include <mutex>
#if !defined(MLIR_MIGRAPHX_DIALECT_API_VERSION) || MLIR_MIGRAPHX_DIALECT_API_VERSION != 3
#warning "Incompatible version of rocMLIR library used, disabling"
#undef MIGRAPHX_MLIR
#else
#include <mlir-c/RegisterRocMLIR.h>
#endif
#endif #endif
#include <migraphx/env.hpp> #include <migraphx/env.hpp>
...@@ -50,10 +56,6 @@ ...@@ -50,10 +56,6 @@
#include <deque> #include <deque>
#include <variant> #include <variant>
#if defined(MLIR_MIGRAPHX_DIALECT_API_VERSION) && MLIR_MIGRAPHX_DIALECT_API_VERSION >= 2
#define MIGRAPHX_MLIR_BARE_POINTER
#endif
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
...@@ -168,9 +170,11 @@ struct mlir_program ...@@ -168,9 +170,11 @@ struct mlir_program
location(mlirLocationUnknownGet(ctx.get())), location(mlirLocationUnknownGet(ctx.get())),
mmodule(mlirModuleCreateEmpty(location)) mmodule(mlirModuleCreateEmpty(location))
{ {
MlirDialectHandle mixr_handle = mlirGetDialectHandle__migraphx__(); MlirDialectRegistry registry = mlirDialectRegistryCreate();
mlirDialectHandleRegisterDialect(mixr_handle, ctx.get()); mlirRegisterRocMLIRDialects(registry);
mlirRegisterAllDialects(ctx.get()); mlirContextAppendDialectRegistry(ctx.get(), registry);
mlirContextLoadAllAvailableDialects(ctx.get());
mlirDialectRegistryDestroy(registry);
mlirContextSetAllowUnregisteredDialects(ctx.get(), true /*allow*/); mlirContextSetAllowUnregisteredDialects(ctx.get(), true /*allow*/);
} }
...@@ -452,7 +456,8 @@ struct mlir_program ...@@ -452,7 +456,8 @@ struct mlir_program
auto ops = create_operation_state("func.func"); auto ops = create_operation_state("func.func");
ops.add_attributes({{"function_type", make_function_type(inputs, outputs)}, ops.add_attributes({{"function_type", make_function_type(inputs, outputs)},
{"sym_name", std::string("main")}, {"sym_name", std::string("main")},
{"kernel", std::string("mixr")}}); {"kernel", std::string("mixr")},
{"arch", target_arch}});
ops.add_region(std::move(region)); ops.add_region(std::move(region));
insert(body, std::move(ops)); insert(body, std::move(ops));
...@@ -512,7 +517,8 @@ struct mlir_program ...@@ -512,7 +517,8 @@ struct mlir_program
pp = pp =
problem_params{ins->get_operator(), to_shapes(ins->inputs()), ins->get_shape()}; problem_params{ins->get_operator(), to_shapes(ins->inputs()), ins->get_shape()};
// check if HW supports xdlops // check if HW supports xdlops
bool xdlops = contains(get_xdlops_archs(), target_name); auto target_chip = trim(split_string(target_arch, ':').front());
bool xdlops = contains(get_xdlops_archs(), target_chip);
std::string tuned = get_tune_params(xdlops); std::string tuned = get_tune_params(xdlops);
if(not tuned.empty()) if(not tuned.empty())
ops.add_attributes({{"perf_config", tuned}}); ops.add_attributes({{"perf_config", tuned}});
...@@ -540,7 +546,7 @@ struct mlir_program ...@@ -540,7 +546,7 @@ struct mlir_program
// 1st pipeline to call // 1st pipeline to call
mlirMIGraphXAddHighLevelPipeline(pm.get()); mlirMIGraphXAddHighLevelPipeline(pm.get());
// 2nd pipeline to call // 2nd pipeline to call
mlirMIGraphXAddBackendPipeline(pm.get(), target_name.c_str(), "amdgcn-amd-amdhsa", ""); mlirMIGraphXAddBackendPipeline(pm.get(), target_arch.c_str());
mlirPassManagerRun(pm.get(), mmodule.get()); mlirPassManagerRun(pm.get(), mmodule.get());
code_object_op op{}; code_object_op op{};
...@@ -550,16 +556,7 @@ struct mlir_program ...@@ -550,16 +556,7 @@ struct mlir_program
return op; return op;
} }
void find_target() void find_target() { target_arch = get_device_name(); }
{
std::string tname = get_device_name();
// HACK: Since MLIR can't handle the full target name
target_name = trim(split_string(tname, ':').front());
if(tname.size() != target_name.size())
std::cout
<< "*************** WARNING: MLIR may not compile the correct target features for: "
<< tname << std::endl;
}
std::pair<std::size_t, std::size_t> get_launch_params() const std::pair<std::size_t, std::size_t> get_launch_params() const
{ {
...@@ -588,7 +585,7 @@ struct mlir_program ...@@ -588,7 +585,7 @@ struct mlir_program
mlir_module mmodule; mlir_module mmodule;
problem_params pp; problem_params pp;
std::deque<std::string> strings{}; std::deque<std::string> strings{};
std::string target_name; std::string target_arch;
}; };
std::string dump_mlir(const module& m) std::string dump_mlir(const module& m)
...@@ -650,6 +647,10 @@ code_object_op compile_mlir(const context&, module m, const std::vector<instruct ...@@ -650,6 +647,10 @@ code_object_op compile_mlir(const context&, module m, const std::vector<instruct
const bool trace = enabled(MIGRAPHX_TRACE_MLIR{}); const bool trace = enabled(MIGRAPHX_TRACE_MLIR{});
if(trace) if(trace)
std::cout << m << std::endl; std::cout << m << std::endl;
// set mutex while llvm thread support is disabled.
static std::mutex g_mlirc_mutex; // NOLINT
const std::lock_guard<std::mutex> lock(g_mlirc_mutex);
mlir_program mp; mlir_program mp;
mp.find_target(); mp.find_target();
mp.parse(m); mp.parse(m);
...@@ -669,46 +670,9 @@ instruction_ref insert_mlir(module& m, ...@@ -669,46 +670,9 @@ instruction_ref insert_mlir(module& m,
std::vector<instruction_ref> refs; std::vector<instruction_ref> refs;
std::size_t last = 0; std::size_t last = 0;
#ifdef MIGRAPHX_MLIR_BARE_POINTER
refs.reserve(inputs.size()); refs.reserve(inputs.size());
std::copy(inputs.begin(), inputs.end(), std::back_inserter(refs)); std::copy(inputs.begin(), inputs.end(), std::back_inserter(refs));
last = refs.size() - 1; last = refs.size() - 1;
#else
refs.reserve(inputs.size() * 15);
std::unordered_map<uint64_t, instruction_ref> literal_map{};
auto get_literal = [&](uint64_t value) {
auto fi = literal_map.find(value);
if(fi != literal_map.end())
return fi->second;
auto lit = m.add_literal(value);
literal_map.emplace(value, lit);
return lit;
};
for(auto input : inputs)
{
const size_t offset = 0;
auto s = input->get_shape();
last = refs.size();
refs.push_back(input);
refs.push_back(input);
refs.push_back(get_literal(offset)); // offset
// dim sizes
std::transform(s.lens().begin(),
s.lens().end(),
std::back_inserter(refs),
[&](const auto& lval) { return get_literal(lval); });
// refs.push_back(get_literal(1)); // G
// dim strides
std::transform(s.strides().begin(),
s.strides().end(),
std::back_inserter(refs),
[&](const auto& lval) { return get_literal(lval); });
// refs.push_back(get_literal(1)); // G
}
#endif
co.expected_inputs = to_shapes(refs); co.expected_inputs = to_shapes(refs);
co.output_arg = last; co.output_arg = last;
return m.insert_instruction(ins, co, refs); return m.insert_instruction(ins, co, refs);
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include <migraphx/stringutils.hpp> #include <migraphx/stringutils.hpp>
#include <migraphx/permutation.hpp> #include <migraphx/permutation.hpp>
#include <fstream> #include <fstream>
#include <mutex>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -88,6 +89,9 @@ std::string generate_miopen_config(const problem_params& pp) ...@@ -88,6 +89,9 @@ std::string generate_miopen_config(const problem_params& pp)
auto query_miopen_db(const std::string& query) auto query_miopen_db(const std::string& query)
{ {
static std::mutex g_db_mutex; // NOLINT
const std::lock_guard<std::mutex> lock(g_db_mutex);
// TODO: Store db as a static variable // TODO: Store db as a static variable
const auto dbpath = fs::path{"/opt"} / "rocm" / "share" / "miopen" / "db" / "miopen.db"; const auto dbpath = fs::path{"/opt"} / "rocm" / "share" / "miopen" / "db" / "miopen.db";
// Check if db file exists. // Check if db file exists.
......
...@@ -146,8 +146,6 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti ...@@ -146,8 +146,6 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
dead_code_elimination{}, dead_code_elimination{},
pack_int8_args{}, pack_int8_args{},
dead_code_elimination{}, dead_code_elimination{},
adjust_allocation{gpu_allocation_model{}},
dead_code_elimination{},
fuse_ops{&ctx, options.fast_math}, fuse_ops{&ctx, options.fast_math},
dead_code_elimination{}, dead_code_elimination{},
replace_allocate{gpu_allocation_model{}, options.offload_copy}, replace_allocate{gpu_allocation_model{}, options.offload_copy},
......
...@@ -140,7 +140,7 @@ TEST_CASE(conv) ...@@ -140,7 +140,7 @@ TEST_CASE(conv)
{ {
const std::string mlir_output = R"__migraphx__( const std::string mlir_output = R"__migraphx__(
module { module {
func.func @main(%arg0: tensor<2x8x3x3xf32>, %arg1: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {kernel = "mixr"} { func.func @main(%arg0: tensor<2x8x3x3xf32>, %arg1: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {arch = "", kernel = "mixr"} {
%0 = migraphx.convolution(%arg1, %arg0) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32> %0 = migraphx.convolution(%arg1, %arg0) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
return %0 : tensor<1x2x2x2xf32> return %0 : tensor<1x2x2x2xf32>
} }
...@@ -163,7 +163,7 @@ TEST_CASE(conv_add_relu) ...@@ -163,7 +163,7 @@ TEST_CASE(conv_add_relu)
{ {
const std::string mlir_output = R"__migraphx__( const std::string mlir_output = R"__migraphx__(
module { module {
func.func @main(%arg0: tensor<1x2x2x2xf32>, %arg1: tensor<2x8x3x3xf32>, %arg2: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {kernel = "mixr"} { func.func @main(%arg0: tensor<1x2x2x2xf32>, %arg1: tensor<2x8x3x3xf32>, %arg2: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {arch = "", kernel = "mixr"} {
%0 = migraphx.convolution(%arg2, %arg1) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32> %0 = migraphx.convolution(%arg2, %arg1) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
%1 = migraphx.add(%0, %arg0) : (tensor<1x2x2x2xf32>, tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32> %1 = migraphx.add(%0, %arg0) : (tensor<1x2x2x2xf32>, tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
%2 = migraphx.relu(%1) : (tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32> %2 = migraphx.relu(%1) : (tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
......
...@@ -49,6 +49,25 @@ TEST_CASE(literal_test) ...@@ -49,6 +49,25 @@ TEST_CASE(literal_test)
EXPECT(l4.empty()); EXPECT(l4.empty());
} }
TEST_CASE(literal_nstd_shape_vector)
{
migraphx::shape nstd_shape{migraphx::shape::float_type, {1, 3, 2, 2}, {12, 1, 6, 3}};
std::vector<float> data(12);
std::iota(data.begin(), data.end(), 0);
auto l0 = migraphx::literal{nstd_shape, data};
// check data buffer is read in correctly
std::vector<float> expected_buffer = {0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11};
const auto* start = reinterpret_cast<const float*>(l0.data());
std::vector<float> l0_data{start, start + 12};
EXPECT(l0_data == expected_buffer);
// check that using visit() (that uses a tensor view) gives data in correct order
std::vector<float> results_vector(12);
l0.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
EXPECT(results_vector == data);
}
TEST_CASE(literal_os1) TEST_CASE(literal_os1)
{ {
migraphx::literal l{1}; migraphx::literal l{1};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment