Commit 5a14c0bf authored by umangyadav's avatar umangyadav
Browse files

Merge branch 'develop' into workspace_size

parents cb01e280 5fa42993
......@@ -72,7 +72,7 @@ bool memory_coloring_impl::allocate(interval_ptr interval)
if(conflict_table.find(vn) != conflict_table.end())
{
std::set<int>& vn_set = conflict_table[vn];
const std::set<int>& vn_set = conflict_table[vn];
for(const auto& iter : vn_set)
{
live_range* range = live_ranges[iter];
......@@ -267,8 +267,8 @@ void memory_coloring_impl::verify()
{
for(int i = 0; i < num_of_lives; ++i)
{
live_interval& interval = live_intervals[i];
live_range& segment = interval.segment;
const live_interval& interval = live_intervals[i];
const live_range& segment = interval.segment;
if(segment.begin == invalid_offset)
{
......@@ -284,7 +284,7 @@ void memory_coloring_impl::verify()
int vn = segment.vn;
if(conflict_table.find(vn) != conflict_table.end())
{
std::set<int>& vn_set = conflict_table[vn];
const std::set<int>& vn_set = conflict_table[vn];
for(const auto& iter : vn_set)
{
live_range* range = live_ranges[iter];
......@@ -319,8 +319,8 @@ void memory_coloring_impl::dump_intervals()
{
std::cout << " segment:" << i;
std::cout << " =>";
std::set<int>& table = conflict_table[i];
for(auto& iter : table)
const std::set<int>& table = conflict_table[i];
for(const auto& iter : table)
{
std::cout << (iter) << ",";
}
......@@ -357,7 +357,7 @@ void live_interval::dump()
std::cout << "id:" << id;
segment.dump();
std::cout << " uses:";
for(auto& iter : use_points)
for(const auto& iter : use_points)
{
std::cout << " " << get_ins_enum(iter) << ",";
}
......
......@@ -52,19 +52,21 @@ void calculate_padding(int64_t idx,
}
}
std::vector<std::size_t> calc_dyn_auto_pad(std::vector<std::size_t> tensor_lens,
std::vector<std::size_t> k_lens,
std::vector<std::size_t> strides,
std::vector<std::size_t> dilations,
std::vector<std::size_t> calc_dyn_auto_pad(const std::vector<std::size_t>& input_lens,
const std::vector<std::size_t>& wei_lens,
const std::vector<std::size_t>& strides,
const std::vector<std::size_t>& dilations,
bool use_upper)
{
std::vector<std::size_t> padding;
padding.resize(2 * k_lens.size());
for(std::size_t i = 0; i < padding.size() / 2; i++)
assert(input_lens.size() >= 3);
std::size_t num_spatial_dims = input_lens.size() - 2;
padding.resize(2 * num_spatial_dims);
for(std::size_t i = 0; i < num_spatial_dims; i++)
{
std::ptrdiff_t input_dim = tensor_lens[i];
std::ptrdiff_t input_dim = input_lens[i + 2];
std::ptrdiff_t stride = strides[i];
std::ptrdiff_t weight_dim = k_lens[i];
std::ptrdiff_t weight_dim = wei_lens[i + 2];
std::ptrdiff_t dilation = dilations[i];
std::ptrdiff_t output_dim = (input_dim + stride - 1) / stride; // round up result
std::ptrdiff_t new_weight_dim = weight_dim + (weight_dim - 1) * (dilation - 1);
......@@ -86,5 +88,28 @@ std::vector<std::size_t> calc_dyn_auto_pad(std::vector<std::size_t> tensor_lens,
return padding;
}
shape compute_padded_shape(const shape& input,
const shape& weights,
const std::vector<std::size_t>& padding,
const std::vector<std::size_t>& stride,
const std::vector<std::size_t>& dilation)
{
const size_t num_spatial_dims = input.lens().size() - 2;
std::vector<size_t> output_lens{input.lens()[0], weights.lens()[0]};
// calculate the output shape of the convolution: ((W - K + 2P) / S) + 1
for(size_t i = 0; i < num_spatial_dims; ++i)
{
auto padding_factor = padding[i] + padding[i + num_spatial_dims];
output_lens.push_back(std::size_t(std::max<std::ptrdiff_t>(
1,
(input.lens()[i + 2] - (1 + dilation[i] * (weights.lens()[i + 2] - 1)) +
padding_factor) /
stride[i] +
1)));
}
return input.with_lens(output_lens);
}
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -398,7 +398,7 @@ std::vector<argument> generic_eval(const program& p,
return generic_eval(mm, ctx, params, {}, make_trace);
}
std::vector<argument> program::eval(parameter_map params) const
std::vector<argument> program::eval(parameter_map params, execution_environment exec_env) const
{
auto& ctx = this->impl->ctx;
#ifndef NDEBUG
......@@ -423,6 +423,12 @@ std::vector<argument> program::eval(parameter_map params) const
#endif
auto trace_level = value_of(MIGRAPHX_TRACE_EVAL{});
std::vector<argument> ret;
if(exec_env.async)
{
ctx.wait_for(exec_env.queue);
}
if(trace_level > 0)
{
......@@ -434,49 +440,56 @@ std::vector<argument> program::eval(parameter_map params) const
ins_out[x] = ss.str();
});
return generic_eval(*this,
ctx,
std::move(params),
with_check_context([&](auto& ins, auto f, auto&& check_context) {
ctx.finish();
std::cout << "Run instruction: " << ins_out.at(ins) << std::endl;
timer t{};
auto result = check_context(f);
double t1 = t.record<milliseconds>();
ctx.finish();
double t2 = t.record<milliseconds>();
std::cout << "Time: " << t1 << "ms, " << t2 << "ms" << std::endl;
if(trace_level > 1 and ins->name().front() != '@' and
ins->name() != "load" and not result.empty())
{
target tgt = make_target(this->impl->target_name);
auto buffer = tgt.copy_from(result);
if(trace_level == 2)
{
std::cout << "Output has "
<< to_string_range(classify_argument(buffer))
<< std::endl;
std::cout << "Output: ";
preview_argument(std::cout, buffer);
std::cout << std::endl;
}
else
{
std::cout << "Output: " << buffer << std::endl;
}
}
return result;
}));
ret = generic_eval(*this,
ctx,
std::move(params),
with_check_context([&](auto& ins, auto f, auto&& check_context) {
ctx.finish();
std::cout << "Run instruction: " << ins_out.at(ins) << std::endl;
timer t{};
auto result = check_context(f);
double t1 = t.record<milliseconds>();
ctx.finish();
double t2 = t.record<milliseconds>();
std::cout << "Time: " << t1 << "ms, " << t2 << "ms" << std::endl;
if(trace_level > 1 and ins->name().front() != '@' and
ins->name() != "load" and not result.empty())
{
target tgt = make_target(this->impl->target_name);
auto buffer = tgt.copy_from(result);
if(trace_level == 2)
{
std::cout << "Output has "
<< to_string_range(classify_argument(buffer))
<< std::endl;
std::cout << "Output: ";
preview_argument(std::cout, buffer);
std::cout << std::endl;
}
else
{
std::cout << "Output: " << buffer << std::endl;
}
}
return result;
}));
}
else
{
return generic_eval(*this,
ctx,
std::move(params),
with_check_context([&](auto&, auto f, auto&& check_context) {
return check_context(f);
}));
ret = generic_eval(*this,
ctx,
std::move(params),
with_check_context([&](auto&, auto f, auto&& check_context) {
return check_context(f);
}));
}
if(exec_env.async)
{
ctx.finish_on(exec_env.queue);
}
return ret;
}
const int program_file_version = 5;
......
......@@ -264,12 +264,13 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
py::class_<migraphx::argument>(m, "argument", py::buffer_protocol())
.def_buffer([](migraphx::argument& x) -> py::buffer_info { return to_buffer_info(x); })
.def("__init__",
[](migraphx::argument& x, py::buffer b) {
py::buffer_info info = b.request();
new(&x) migraphx::argument(to_shape(info), info.ptr);
})
.def(py::init([](py::buffer b) {
py::buffer_info info = b.request();
return migraphx::argument(to_shape(info), info.ptr);
}))
.def("get_shape", &migraphx::argument::get_shape)
.def("data_ptr",
[](migraphx::argument& x) { return reinterpret_cast<std::uintptr_t>(x.data()); })
.def("tolist",
[](migraphx::argument& x) {
py::list l{x.get_shape().elements()};
......@@ -354,6 +355,23 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
}
return p.eval(pm);
})
.def("run_async",
[](migraphx::program& p,
py::dict params,
std::uintptr_t stream,
std::string stream_name) {
migraphx::parameter_map pm;
for(auto x : params)
{
std::string key = x.first.cast<std::string>();
py::buffer b = x.second.cast<py::buffer>();
py::buffer_info info = b.request();
pm[key] = migraphx::argument(to_shape(info), info.ptr);
}
migraphx::execution_environment exec_env{
migraphx::any_ptr(reinterpret_cast<void*>(stream), stream_name), true};
return p.eval(pm, exec_env);
})
.def("sort", &migraphx::program::sort)
.def("print", [](const migraphx::program& p) { std::cout << p << std::endl; })
.def("__eq__", std::equal_to<migraphx::program>{})
......
......@@ -73,7 +73,7 @@ void insert_submod_allocations(instruction_ref ins, module& mod, const allocatio
name_shapes.insert(ps.begin(), ps.end());
}
for(auto& pn : name_shapes)
for(const auto& pn : name_shapes)
{
const auto& s = pn.second;
instruction_ref output{};
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/rewrite_batchnorm.hpp>
#include <migraphx/program.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/op/batch_norm_inference.hpp>
#include <migraphx/op/broadcast.hpp>
#include <migraphx/op/add.hpp>
#include <migraphx/op/mul.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/dfor.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
void rewrite_batchnorm::apply(module& m) const
{
for(auto ins : iterator_for(m))
{
if(ins->name() != "batch_norm_inference")
continue;
// Get scale, bias, mean, variance from inputs
auto gamma = ins->inputs()[1]->eval();
auto bias = ins->inputs()[2]->eval();
auto mean = ins->inputs()[3]->eval();
auto variance = ins->inputs()[4]->eval();
if(any_of({gamma, bias, mean, variance}, [](auto arg) { return arg.empty(); }))
continue;
std::vector<std::size_t> lens = ins->inputs()[1]->get_shape().lens();
shape s{ins->get_shape().type(), lens};
// Get epsilon
auto bn_op = any_cast<op::batch_norm_inference>(ins->get_operator());
auto epsilon = bn_op.epsilon;
argument a{s};
argument b{s};
visit_all(gamma, bias, mean, variance, a, b)(
[&](auto gamma2, auto bias2, auto mean2, auto variance2, auto a2, auto b2) {
dfor(a.get_shape().elements())(
[&](std::size_t c) { a2[c] = gamma2[c] / std::sqrt(variance2[c] + epsilon); });
dfor(b.get_shape().elements())([&](std::size_t c) {
b2[c] = bias2[c] - (gamma2[c] * mean2[c] / std::sqrt(variance2[c] + epsilon));
});
});
auto broadcast = op::broadcast{1, ins->get_shape().lens()};
auto a_ins = m.add_literal({a.get_shape(), a.data()});
auto a_broadcast = m.insert_instruction(ins, broadcast, a_ins);
auto mul = m.insert_instruction(ins, make_op("mul"), ins->inputs().front(), a_broadcast);
auto b_ins = m.add_literal({b.get_shape(), b.data()});
auto b_broadcast = m.insert_instruction(ins, broadcast, b_ins);
auto add = m.insert_instruction(ins, make_op("add"), mul, b_broadcast);
m.replace_instruction(ins, add);
}
}
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -57,12 +57,14 @@ auto conv_const_weights()
auto reduction() { return match::name_contains("reduce"); }
// conv(x, w) * a => conv(x, a * w)
struct find_mul_conv
{
auto matcher() const
{
return match::name("mul")(match::either_arg(0, 1)(conv_const_weights().bind("conv"),
match::name("broadcast").bind("a")));
return match::name("mul")(
match::either_arg(0, 1)(conv_const_weights().bind("conv"),
match::name("broadcast", "multibroadcast").bind("a")));
}
void apply(module& m, const match::matcher_result& r) const
......@@ -72,14 +74,35 @@ struct find_mul_conv
auto a_ins = r.instructions["a"];
auto w_ins = r.instructions["w"];
auto broadcast_op = any_cast<op::broadcast>(a_ins->get_operator());
if(broadcast_op.axis != 1)
const auto& a_input_lens = a_ins->inputs().front()->get_shape().lens();
std::size_t num_not_one_dims = std::count_if(
a_input_lens.cbegin(), a_input_lens.cend(), [](auto dim) { return dim != 1; });
if(num_not_one_dims > 1)
return;
// check broadcasted along channels
const auto& a_lens = a_ins->get_shape().lens();
const auto& a_strides = a_ins->get_shape().strides();
auto is_broadcasted_axis = [](auto len, auto stride) { return len == 1 or stride == 0; };
if(a_strides.at(1) != 1)
return;
if(not is_broadcasted_axis(a_lens.front(), a_strides.front()))
return;
if(not std::equal(a_lens.begin() + 2,
a_lens.end(),
a_strides.begin() + 2,
a_strides.end(),
is_broadcasted_axis))
return;
auto sq = m.insert_instruction(ins, make_op("squeeze"), a_ins->inputs().front());
auto new_a = m.insert_instruction(
ins,
make_op("broadcast", {{"axis", 0}, {"out_lens", w_ins->get_shape().lens()}}),
a_ins->inputs().front());
ins, make_op("broadcast", {{"axis", 0}, {"out_lens", w_ins->get_shape().lens()}}), sq);
auto new_mul = m.insert_instruction(ins, make_op("mul"), new_a, w_ins);
auto new_conv = m.insert_instruction(
ins, conv_ins->get_operator(), conv_ins->inputs().front(), new_mul);
......@@ -412,6 +435,24 @@ struct find_concat_op
}
};
void move_instructions_back(module& m, instruction_ref pos, std::vector<instruction_ref> inss)
{
auto start = range(m.begin(), pos);
for(auto ins : iterator_for(start))
{
auto it = std::find(inss.begin(), inss.end(), ins);
if(it != inss.end())
inss.erase(it);
}
for(auto ins : inss)
{
if(not m.has_instruction(ins))
continue;
move_instructions_back(m, pos, ins->inputs());
m.move_instruction(ins, pos);
}
}
std::vector<instruction_ref> get_splits(instruction_ref ins)
{
std::vector<instruction_ref> result;
......@@ -587,8 +628,7 @@ struct find_splits
}))
return;
for(auto data : data_args)
m.move_instructions(data, ins);
move_instructions_back(m, ins, data_args);
auto slice_op = any_cast<op::slice>(splits.front()->get_operator());
assert(not slice_op.axes.empty());
......@@ -841,8 +881,7 @@ struct find_conv_dot_horiz_fusion
concat_axis = axis;
}
for(auto arg : args)
m.move_instructions(arg, input);
move_instructions_back(m, input, args);
// TODO: Check if axes match
auto concat =
m.insert_instruction(input, make_op("concat", {{"axis", concat_axis}}), args);
......@@ -894,6 +933,73 @@ struct find_div_const
}
};
struct find_unit_ops
{
auto matcher() const
{
auto mul_1 = match::name("mul")(
match::either_arg(0, 1)(match::has_value(1.0f), match::any().bind("x")));
auto div_1 =
match::name("div")(match::args(match::any().bind("x"), match::has_value(1.0f)));
auto add_0 = match::name("add")(
match::either_arg(0, 1)(match::has_value(0.0f, 1e-12), match::any().bind("x")));
auto sub_0 =
match::name("sub")(match::args(match::any().bind("x"), match::has_value(0.0f)));
return match::any_of(mul_1, div_1, add_0, sub_0);
}
void apply(module& m, const match::matcher_result& r) const
{
auto ins = r.result;
auto c_in = r.instructions["x"];
m.replace_instruction(ins, c_in);
}
};
struct find_neg_unit_ops
{
auto matcher() const
{
auto mul_neg_1 = match::name("mul")(
match::either_arg(0, 1)(match::has_value(-1.0f), match::any().bind("x")));
auto div_neg_1 =
match::name("div")(match::args(match::any().bind("x"), match::has_value(-1.0f)));
auto sub_0 =
match::name("sub")(match::args(match::has_value(0.0f), match::any().bind("x")));
return match::any_of(mul_neg_1, div_neg_1, sub_0);
}
void apply(module& m, const match::matcher_result& r) const
{
auto ins = r.result;
auto c_in = r.instructions["x"];
auto neg = m.add_instruction(make_op("neg"), c_in);
m.replace_instruction(ins, neg);
}
};
struct find_zero_ops
{
auto matcher() const
{
auto mul_zero = match::name("mul")(
match::either_arg(0, 1)(match::has_value(0.0f).bind("x"), match::any()));
auto div_zero =
match::name("div")(match::args(match::has_value(0.0f).bind("x"), match::any()));
return match::any_of(mul_zero, div_zero);
}
void apply(module& m, const match::matcher_result& r) const
{
auto ins = r.result;
auto zero_ins = r.instructions["x"];
m.replace_instruction(ins, zero_ins);
}
};
struct find_sub_const
{
auto matcher() const
......@@ -985,20 +1091,35 @@ struct find_split_reshape
auto rsp_lens = rsp->get_shape().lens();
auto rsp_strides = rsp->get_shape().strides();
rsp_strides.insert(rsp_strides.begin(), rsp_strides[0] * rsp_lens[0]);
auto ait = std::find(rsp_strides.begin(), rsp_strides.end(), slc_dim_size);
auto ait = std::find(rsp_strides.begin(), rsp_strides.end(), slc_dim_size);
int rsp_axis = -1;
if(ait == rsp_strides.end())
{
return;
}
int rsp_axis = std::distance(rsp_strides.begin(), ait);
else if(ait == rsp_strides.end() - 1)
{
// edge case
// slice_dim == 1, in that case it could match with last stride of 1.
// it should accumulate lengths from last dim in that case. discount 1 to avoid going
// out of bounds.
assert(slc_dim_size == 1);
rsp_axis = std::distance(rsp_strides.begin(), ait) - 1;
}
else
{
rsp_axis = std::distance(rsp_strides.begin(), ait);
}
// calculate reshape output shape
std::vector<int64_t> vec_dims(vec_rsp.size());
std::transform(vec_rsp.begin(), vec_rsp.end(), vec_dims.begin(), [&](auto is) {
return is->get_shape().lens()[rsp_axis];
});
std::vector<int64_t> rsp_out_lens(rsp_lens.begin(), rsp_lens.end());
rsp_out_lens[rsp_axis] = std::accumulate(vec_dims.begin(), vec_dims.end(), std::int64_t{0});
// insert the reshape instruction and add contiguous if needed
......@@ -1095,6 +1216,9 @@ void simplify_algebra::apply(module& m) const
find_mul_conv{},
find_mul_slice_conv{},
find_mul_add{},
find_unit_ops{},
find_neg_unit_ops{},
find_zero_ops{},
find_dot_add{},
find_div_const{},
find_sub_const{},
......
......@@ -271,6 +271,44 @@ struct find_nested_slice
}
};
struct find_concat_multibroadcasts
{
auto matcher() const
{
return match::name("concat")(match::all_of[match::inputs()](match::name("multibroadcast")));
}
void apply(module& m, const match::matcher_result& mr) const
{
auto ins = mr.result;
auto op = any_cast<op::concat>(ins->get_operator());
auto out_lens = ins->get_shape().lens();
auto inputs = ins->inputs();
auto in_strides = inputs.front()->get_shape().strides();
// Only apply when concat axis is not a broadcasted dimension
if(std::any_of(inputs.begin(), inputs.end(), [&](auto i) {
return i->get_shape().strides()[op.axis] == 0;
}))
{
return;
}
// Use inputs of multibroadcast ops as inputs to new concat op
std::transform(inputs.begin(), inputs.end(), inputs.begin(), [](auto i) {
return i->inputs().front();
});
// Reduce axis by number of leading broadcasted dimensions
if(inputs.front()->get_shape().lens().size() < out_lens.size())
op.axis -= std::count(in_strides.begin(), in_strides.begin() + op.axis, 0);
auto concat = m.insert_instruction(ins, op, inputs);
m.replace_instruction(
ins, migraphx::make_op("multibroadcast", {{"out_lens", out_lens}}), concat);
}
};
struct find_concat_transpose
{
auto matcher() const
......@@ -764,6 +802,7 @@ void simplify_reshapes::apply(module& m) const
find_reshaper{},
find_transpose{},
find_concat_transpose{},
find_concat_multibroadcasts{},
find_nested_convert{},
find_nested_slice{},
find_nested_concat{},
......
......@@ -35,6 +35,7 @@ add_library(migraphx_cpu
dnnl.cpp
eltwise.cpp
erf.cpp
fmod.cpp
fuse_ops.cpp
gather.cpp
gemm.cpp
......@@ -42,6 +43,7 @@ add_library(migraphx_cpu
logsoftmax.cpp
lowering.cpp
lrn.cpp
mod.cpp
preallocate.cpp
pooling.cpp
reduction.cpp
......
......@@ -21,22 +21,16 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_COS_HPP
#define MIGRAPHX_GUARD_RTGLIB_COS_HPP
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/cos.hpp>
#include <migraphx/config.hpp>
#include <migraphx/cpu/pointwise.hpp>
#include <migraphx/op/fmod.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace cpu {
struct hip_cos : unary_device<hip_cos, device::cos>
{
};
template struct cpu_binary<op::fmod>;
} // namespace gpu
} // namespace cpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
......@@ -26,7 +26,6 @@
#include <migraphx/instruction.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/op/batch_norm_inference.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/deconvolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
......@@ -43,6 +42,8 @@
#include <migraphx/op/argmax.hpp>
#include <migraphx/op/argmin.hpp>
#include <migraphx/op/rnn_var_sl_last_output.hpp>
#include <migraphx/op/mod.hpp>
#include <migraphx/op/fmod.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/par_dfor.hpp>
......
......@@ -21,22 +21,16 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_EXP_HPP
#define MIGRAPHX_GUARD_RTGLIB_EXP_HPP
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/exp.hpp>
#include <migraphx/config.hpp>
#include <migraphx/cpu/pointwise.hpp>
#include <migraphx/op/mod.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace cpu {
struct hip_exp : unary_device<hip_exp, device::exp>
{
};
template struct cpu_binary<op::mod>;
} // namespace gpu
} // namespace cpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
......@@ -37,7 +37,6 @@
#include <migraphx/propagate_constant.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/replace_allocate.hpp>
#include <migraphx/rewrite_batchnorm.hpp>
#include <migraphx/rewrite_pooling.hpp>
#include <migraphx/rewrite_quantization.hpp>
#include <migraphx/rewrite_rnn.hpp>
......@@ -78,8 +77,6 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
eliminate_identity{},
eliminate_pad{},
dead_code_elimination{},
rewrite_batchnorm{},
dead_code_elimination{},
rewrite_rnn{},
dead_code_elimination{},
eliminate_common_subexpression{},
......
......@@ -39,81 +39,9 @@ file(GLOB KERNEL_FILES ${CONFIGURE_DEPENDS}
message(STATUS "KERNEL_FILES: ${KERNEL_FILES}")
add_embed_library(migraphx_kernels ${KERNEL_FILES})
add_library(migraphx_device
device/acos.cpp
device/acosh.cpp
device/add.cpp
device/add_clip.cpp
device/add_relu.cpp
device/add_sigmoid.cpp
device/add_tanh.cpp
device/argmax.cpp
device/argmin.cpp
device/asin.cpp
device/asinh.cpp
device/atan.cpp
device/atanh.cpp
device/ceil.cpp
device/clip.cpp
device/concat.cpp
device/contiguous.cpp
device/convert.cpp
device/cos.cpp
device/cosh.cpp
device/div.cpp
device/equal.cpp
device/erf.cpp
device/exp.cpp
device/fill.cpp
device/floor.cpp
device/gather.cpp
device/gelu.cpp
device/greater.cpp
device/int8_gemm_pack.cpp
device/layernorm.cpp
device/less.cpp
device/log.cpp
device/logical_and.cpp
device/logical_or.cpp
device/logical_xor.cpp
device/logsoftmax.cpp
device/max.cpp
device/min.cpp
device/mul.cpp
device/mul_add.cpp
device/mul_add_relu.cpp
device/multinomial.cpp
device/nonzero.cpp
device/pad.cpp
device/pow.cpp
device/prelu.cpp
device/prefix_scan_sum.cpp
device/recip.cpp
device/reduce_max.cpp
device/reduce_mean.cpp
device/reduce_min.cpp
device/reduce_sum.cpp
device/reduce_prod.cpp
device/relu.cpp
device/reverse.cpp
device/rnn_variable_seq_lens.cpp
device/round.cpp
device/rsqrt.cpp
device/scatter.cpp
device/sigmoid.cpp
device/sign.cpp
device/sin.cpp
device/sinh.cpp
device/softmax.cpp
device/sqdiff.cpp
device/sqrt.cpp
device/sub.cpp
device/tan.cpp
device/tanh.cpp
device/topk.cpp
device/unary_not.cpp
device/where.cpp
)
file(GLOB DEVICE_GPU_SRCS ${CONFIGURE_DEPENDS} ${CMAKE_CURRENT_SOURCE_DIR}/device/*.cpp)
add_library(migraphx_device ${DEVICE_GPU_SRCS})
add_library(compile_for_gpu INTERFACE)
target_compile_options(compile_for_gpu INTERFACE -std=c++17 -fno-gpu-rdc -Wno-cuda-compat -Wno-unused-command-line-argument -Xclang -fallow-half-arguments-and-returns)
target_link_libraries(compile_for_gpu INTERFACE hip::device -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument -Wno-option-ignored)
......@@ -150,18 +78,12 @@ add_library(migraphx_gpu
allocation_model.cpp
argmax.cpp
argmin.cpp
batch_norm_inference.cpp
clip.cpp
code_object_op.cpp
compile_ops.cpp
compile_gen.cpp
compile_hip.cpp
compile_hip_code_object.cpp
compiler.cpp
concat.cpp
convert.cpp
convolution.cpp
deconvolution.cpp
device_name.cpp
elu.cpp
fuse_mlir.cpp
......@@ -186,13 +108,11 @@ add_library(migraphx_gpu
pad.cpp
perfdb.cpp
pooling.cpp
quant_convolution.cpp
reverse.cpp
rnn_variable_seq_lens.cpp
rocblas.cpp
scatter.cpp
schedule_model.cpp
softmax.cpp
sync_device.cpp
target.cpp
topk.cpp
......@@ -207,81 +127,27 @@ function(register_migraphx_gpu_ops PREFIX)
endforeach()
endfunction()
register_migraphx_gpu_ops(hip_
acosh
acos
add
argmax
argmin
asinh
asin
atanh
atan
ceil
clip
concat
convert
cosh
cos
div
equal
erf
exp
floor
gather
greater
less
log
logsoftmax
logical_and
logical_or
logical_xor
loop
max
min
mul
multinomial
nonzero
pad
pow
prelu
prefix_scan_sum
recip
reduce_max
reduce_mean
reduce_min
reduce_prod
reduce_sum
relu
reverse
round
rsqrt
scatter
sigmoid
sign
sinh
sin
softmax
sqdiff
sqrt
sub
tanh
tan
topk
unary_not
where
)
register_migraphx_gpu_ops(miopen_
abs
batch_norm_inference
contiguous
convolution
deconvolution
elu
int8_conv_pack
leaky_relu
lrn
pooling
quant_convolution
)
register_op(migraphx_gpu
HEADER migraphx/gpu/rnn_variable_seq_lens.hpp
......@@ -295,6 +161,9 @@ register_op(migraphx_gpu
HEADER migraphx/gpu/gemm.hpp
OPERATORS gpu::rocblas_gemm<op::dot> gpu::rocblas_gemm<op::quant_dot>
INCLUDES migraphx/gpu/context.hpp)
register_op(migraphx_gpu HEADER migraphx/gpu/convolution.hpp
OPERATORS gpu::miopen_convolution<op::convolution> gpu::miopen_convolution<op::deconvolution> gpu::miopen_convolution<op::quant_convolution>
INCLUDES migraphx/gpu/context.hpp)
rocm_set_soversion(migraphx_gpu ${MIGRAPHX_SO_VERSION})
rocm_clang_tidy_check(migraphx_gpu)
......@@ -322,26 +191,11 @@ message(STATUS "extractkernel: ${MIGRAPHX_EXTRACT_KERNEL}")
set(MIGRAPHX_ENABLE_MLIR OFF CACHE BOOL "")
if(MIGRAPHX_ENABLE_MLIR)
find_library(MLIRAPI_LIBRARY MLIRMIOpen
PATH_SUFFIXES
# Workaournd broken mlir install
lib/ lib/lib)
# REQUIRED is not supported before cmake 3.18
if(NOT MLIRAPI_LIBRARY)
message(FATAL_ERROR "libMLIRMIOpen not found")
else()
message(STATUS "Build with libMLIRMIOpen: " ${MLIRAPI_LIBRARY})
endif()
find_path(MLIRAPI_HEADERS NAMES mlir-c/Dialect/MIGraphX.h)
# Workaround MLIR broken installation
find_path(MLIRAPI_HEADERS2 NAMES mlir-c/Registration.h
PATH_SUFFIXES
include/external/include external/include)
# Find package rocMLIR
find_package(rocMLIR 1.0.0 CONFIG REQUIRED)
message(STATUS "Build with rocMLIR::rockCompiler ${rocMLIR_VERSION}")
target_compile_definitions(migraphx_gpu PRIVATE "-DMIGRAPHX_MLIR")
target_include_directories(migraphx_gpu SYSTEM PRIVATE ${MLIRAPI_HEADERS} ${MLIRAPI_HEADERS2})
target_link_libraries(migraphx_gpu PUBLIC ${MLIRAPI_LIBRARY})
target_link_libraries(migraphx_gpu PUBLIC rocMLIR::rockCompiler)
endif()
set(MIGRAPHX_USE_HIPRTC OFF CACHE BOOL "")
......@@ -380,9 +234,18 @@ endif()
include(CheckLibraryExists)
get_target_property(MIOPEN_LOCATION MIOpen LOCATION)
check_library_exists(MIOpen "miopenHiddenSetConvolutionFindMode" "${MIOPEN_LOCATION}" HAS_FIND_MODE_API)
check_library_exists(MIOpen "miopenFindSolutions" "${MIOPEN_LOCATION}" HAS_FIND_2_API)
if(HAS_FIND_2_API)
target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_2_API)
message(STATUS "MIGraphx is using Find-2.0 API of MIOpen")
else()
message(STATUS "MIOpen does not have Find-2.0 API")
endif()
if(HAS_FIND_MODE_API)
target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_MODE_API)
message(STATUS "MIOpen has find mode api")
message(STATUS "MIGraphx is using Find Mode API of MIOpen")
else()
message(STATUS "MIOpen does not have find mode api")
endif()
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/batch_norm_inference.hpp>
#include <migraphx/gpu/context.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape miopen_batch_norm_inference::compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(6);
check_shapes{inputs.data(), inputs.data() + 1, *this}.same_ndims().max_ndims(5);
return op.compute_shape({inputs.at(0), inputs.at(1), inputs.at(2), inputs.at(3), inputs.at(4)});
}
inline shape reshape_to_2d(const shape& input)
{
auto dims = input.lens();
if(dims.size() >= 4)
return input;
std::vector<size_t> new_dims(dims.begin(), dims.end());
std::size_t num = 4 - dims.size();
new_dims.insert(new_dims.end(), num, 1);
return {input.type(), new_dims};
}
argument miopen_batch_norm_inference::compute(context& ctx,
const shape& output_shape,
const std::vector<argument>& args) const
{
shape x_shape = args[0].get_shape();
shape y_shape = output_shape;
shape bn_shape = args[3].get_shape();
auto x_desc = make_tensor(reshape_to_2d(x_shape));
auto y_desc = make_tensor(reshape_to_2d(y_shape));
auto bn_desc = make_tensor(reshape_to_2d(bn_shape));
float alpha = 1.0;
float beta = 0.0f;
miopenBatchNormalizationForwardInference(ctx.get_stream().get_miopen(),
miopenBatchNormMode_t(op.bn_mode),
&alpha,
&beta,
x_desc.get(),
args[0].implicit(),
y_desc.get(),
args[5].implicit(),
bn_desc.get(),
args[1].implicit(),
args[2].implicit(),
args[3].implicit(),
args[4].implicit(),
op.epsilon);
return args[5];
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/clip.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/clip.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape hip_clip::compute_shape(std::vector<shape> inputs) const
{
inputs.pop_back();
return op.compute_shape(inputs);
}
argument hip_clip::compute(context& ctx, const shape&, const std::vector<argument>& args) const
{
device::clip(ctx.get_stream().get(), args.back(), args.front(), args.at(1), args.at(2));
return args.back();
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -22,6 +22,7 @@
* THE SOFTWARE.
*/
#include <migraphx/gpu/compile_gen.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/permutation.hpp>
#include <migraphx/stringutils.hpp>
......@@ -48,12 +49,13 @@ static std::vector<std::size_t> vector_sizes(const std::vector<shape>& inputs)
return {4, 2};
}
vectorize vectorize::elements(std::size_t axis, const std::vector<shape>& inputs)
vectorize vectorize::elements(std::size_t axis,
const std::vector<shape>& inputs,
const std::vector<std::size_t>& sizes)
{
if(std::all_of(
inputs.begin(), inputs.end(), [&](const auto& s) { return s.lens()[axis] == 1; }))
return {1, axis};
auto sizes = vector_sizes(inputs);
std::vector<std::size_t> max_vec_size;
std::transform(inputs.begin(),
inputs.end(),
......@@ -81,6 +83,33 @@ vectorize vectorize::elements(std::size_t axis, const std::vector<shape>& inputs
return {*std::min_element(max_vec_size.begin(), max_vec_size.end()), axis};
}
vectorize vectorize::elements(context& ctx, std::size_t axis, const std::vector<shape>& inputs)
{
if(inputs.empty())
return {1, axis};
std::size_t n = std::max_element(inputs.begin(),
inputs.end(),
by(std::less<>{}, [](const auto& s) { return s.elements(); }))
->elements();
std::size_t max_global = ctx.get_current_device().get_cu_count() *
ctx.get_current_device().get_max_workitems_per_cu();
std::size_t over = n / max_global;
bool broadcasted =
std::any_of(inputs.begin(), inputs.end(), [](const auto& s) { return s.broadcasted(); });
std::vector<std::size_t> sizes;
if(broadcasted and over > 8)
sizes.push_back(8);
if(over > 4)
sizes.push_back(4);
sizes.push_back(2);
return elements(axis, inputs, sizes);
}
vectorize vectorize::elements(std::size_t axis, const std::vector<shape>& inputs)
{
return elements(axis, inputs, vector_sizes(inputs));
}
std::string vectorize::str() const
{
return "vectorize<" + to_string(size) + ", " + to_string(axis) + ">()";
......@@ -102,7 +131,7 @@ preload preload::broadcasts(std::size_t axis, const std::vector<shape>& inputs)
std::size_t bytes = 0;
for(auto i : preloaded)
{
auto input = inputs[i];
const auto& input = inputs[i];
bytes += input.bytes();
if(bytes > max_lds_bytes)
break;
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment