Commit 00d90ca8 authored by Khalique Ahmed's avatar Khalique Ahmed
Browse files

Merge branch 'develop' of https://github.com/ROCmSoftwarePlatform/AMDMIGraphX into mi100_opts

parents 439c0838 ee79e9b7
......@@ -25,6 +25,8 @@
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
using milliseconds = std::chrono::duration<double, std::milli>;
struct program_impl
{
// A map is used to keep references to modules of the program
......@@ -293,10 +295,14 @@ std::vector<argument> program::eval(parameter_map params) const
ctx.finish();
std::cout << "Run instruction: ";
this->debug_print(ins);
timer t{};
auto result = check_context(f);
double t1 = t.record<milliseconds>();
ctx.finish();
double t2 = t.record<milliseconds>();
std::cout << "Time: " << t1 << "ms, " << t2 << "ms" << std::endl;
if(trace_level > 1 and ins->name().front() != '@' and ins->name() != "load")
std::cout << "Ouput: " << result << std::endl;
std::cout << "Output: " << result << std::endl;
return result;
});
}
......@@ -480,8 +486,7 @@ double common_average(const std::vector<double>& v)
void program::perf_report(std::ostream& os, std::size_t n, parameter_map params) const
{
using milliseconds = std::chrono::duration<double, std::milli>;
auto& ctx = this->impl->ctx;
auto& ctx = this->impl->ctx;
// Run once by itself
eval(params);
ctx.finish();
......
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
......@@ -6,6 +6,7 @@
#include <migraphx/par_for.hpp>
#include <migraphx/functional.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/dom_info.hpp>
#include <unordered_map>
#include <unordered_set>
#include <queue>
......@@ -16,6 +17,7 @@
#include <set>
#include <deque>
#include <chrono>
#include <iomanip>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......@@ -88,7 +90,7 @@ struct stream_info
return args.end();
}
const std::size_t min_partition_threshold = 1;
const std::size_t min_partition_threshold = 2;
sort_args_by_weight(args, std::greater<>{});
auto it = std::lower_bound(std::next(args.begin()),
......@@ -353,6 +355,7 @@ struct stream_info
{
std::unordered_map<instruction_ref, std::vector<std::vector<instruction_ref>>> result;
std::unordered_map<instruction_ref, std::unordered_set<instruction_ref>> merge_from;
dominator_info di = compute_dominator(p);
result.reserve(p.size());
merge_from.reserve(p.size());
for(auto ins : reverse_iterator_for(p))
......@@ -366,8 +369,13 @@ struct stream_info
merge_from[ins].insert(merge_from[arg].begin(), merge_from[arg].end());
}
auto streams = this->get_streams(ins);
if(is_split_point(ins))
{
erase_if(merge_from[ins],
[&](auto merge) { return di.strictly_dominate(ins, merge); });
}
auto streams = this->get_streams(ins);
// Collect concur instructions for each merge point.
for(const auto& merge : merge_from[ins])
{
......@@ -396,11 +404,18 @@ struct stream_info
std::unordered_map<instruction_ref, std::unordered_set<instruction_ref>>
get_conflicts(module& p)
{
using conflict_table_type =
std::unordered_map<instruction_ref, std::unordered_set<instruction_ref>>;
conflict_table_type conflict_table;
auto concur_ins = this->find_concurrent_instructions(p);
// Compute an index for each instruction
std::unordered_map<instruction_ref, std::size_t> ins2index;
std::size_t index_total = 0;
for(auto ins : iterator_for(p))
ins2index[ins] = index_total++;
std::vector<conflict_table_type> thread_conflict_tables(
std::thread::hardware_concurrency());
std::vector<instruction_ref> index_to_ins;
......@@ -442,14 +457,13 @@ struct stream_info
for(auto ins1 : ins1_set)
{
auto p1 = std::distance(ins1, merge_first);
auto p1 = ins2index.at(ins1);
for(auto ins2 : ins2_set)
{
if(ins1 == ins2)
continue;
auto p2 = std::distance(ins2, merge_first);
// The smaller distance means the instruction occurs later
if(p1 > p2)
auto p2 = ins2index.at(ins2);
if(p2 > p1)
thrd_table[ins2].insert(ins1);
else
thrd_table[ins1].insert(ins2);
......
#include <migraphx/simplify_algebra.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/program.hpp>
#include <migraphx/op/add.hpp>
#include <migraphx/op/mul.hpp>
#include <migraphx/op/concat.hpp>
#include <migraphx/op/slice.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/contiguous.hpp>
#include <migraphx/op/as_shape.hpp>
#include <migraphx/op/broadcast.hpp>
#include <migraphx/op/neg.hpp>
#include <migraphx/op/recip.hpp>
#include <migraphx/op/reshape.hpp>
#include <migraphx/op/rsqrt.hpp>
#include <migraphx/op/transpose.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/literal.hpp>
......@@ -20,6 +13,7 @@
#include <migraphx/serialize.hpp>
#include <migraphx/algorithm.hpp>
#include <unordered_set>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......@@ -403,8 +397,27 @@ struct find_splits
match::any_of[match::outputs()](match::pointwise(), reduction()))));
}
static bool is_dependent(const module& m, instruction_ref ins1, instruction_ref ins2)
{
std::unordered_set<instruction_ref> traversed;
return fix<bool>([&](auto self, auto ins) -> bool {
if(ins == ins2)
return true;
if(contains(traversed, ins))
return false;
traversed.insert(ins);
const auto& inputs = ins->inputs();
return std::any_of(inputs.begin(), inputs.end(), [&](auto in) {
return m.has_instruction(in) and self(in);
});
})(ins1);
}
static std::vector<std::vector<instruction_ref>>
get_split_groups(const std::vector<instruction_ref>& splits)
get_split_groups(const module& m, const std::vector<instruction_ref>& splits)
{
std::vector<std::vector<instruction_ref>> groups;
for(auto out : splits.front()->outputs())
......@@ -421,9 +434,16 @@ struct find_splits
if(it == split->outputs().end())
break;
assert((*it)->name() != "slice");
// If there is a duplicate bail
if(contains(group, *it))
// there are should be no dependency between instructions in the group
if(std::any_of(group.begin(), group.end(), [&](auto i) {
return is_dependent(m, *it, i) or is_dependent(m, i, *it);
}))
{
return {};
}
group.push_back(*it);
}
if(group.size() != splits.size())
......@@ -460,13 +480,12 @@ struct find_splits
void apply(module& p, const match::matcher_result& r) const
{
auto ins = r.result;
auto ins = r.result;
auto splits = get_splits(ins);
if(splits.empty())
return;
for(const auto& group : get_split_groups(splits))
for(const auto& group : get_split_groups(p, splits))
{
auto start = group.front();
auto split_front = splits.front();
......@@ -644,19 +663,6 @@ struct find_add_convs
return x.stride[0] / y.stride[0];
}
static shape compute_stride_shape(const shape& input, std::size_t n)
{
return {input.type(),
{input.lens()[0],
input.lens()[1],
std::size_t(std::max<std::ptrdiff_t>(1, (input.lens()[2] - 1) / n + 1)),
std::size_t(std::max<std::ptrdiff_t>(1, (input.lens()[3] - 1) / n + 1))},
{input.strides()[0],
input.strides()[1],
input.strides()[2] * n,
input.strides()[3] * n}};
}
void apply(module& p, match::matcher_result r) const
{
auto ins = r.result;
......@@ -687,11 +693,7 @@ struct find_add_convs
return;
new_op = a_op;
b_input = p.insert_instruction(
ins,
make_op(
"as_shape",
{{"shape", to_value(compute_stride_shape(b_input->get_shape(), n))}}),
b_input);
ins, make_op("step", {{"axes", {2, 3}}, {"steps", {n, n}}}), b_input);
}
else if(b_op.stride < a_op.stride)
{
......@@ -700,11 +702,7 @@ struct find_add_convs
return;
new_op = b_op;
a_input = p.insert_instruction(
ins,
make_op(
"as_shape",
{{"shape", to_value(compute_stride_shape(a_input->get_shape(), n))}}),
a_input);
ins, make_op("step", {{"axes", {2, 3}}, {"steps", {n, n}}}), a_input);
}
else
return;
......
......@@ -376,9 +376,7 @@ struct find_resize
return;
}
arg_ind.visit([&](auto v) { vec_ind.assign(v.begin(), v.end()); });
std::vector<int> index(out_shape.elements());
std::iota(index.begin(), index.end(), 0);
if(not std::all_of(index.begin(), index.end(), [&](auto i) {
if(not all_of(range(out_shape.elements()), [&](auto i) {
auto out_idx = out_shape.multi(i);
auto in_idx = out_idx;
std::transform(out_idx.begin(),
......
#include <migraphx/dom_info.hpp>
#include <migraphx/program.hpp>
#include <basic_ops.hpp>
#include <test.hpp>
TEST_CASE(dom1)
{
migraphx::module mm;
auto ins1 = mm.add_parameter("entry", {migraphx::shape::float_type});
auto ins2 = mm.add_instruction(pass_op{}, ins1);
auto ins3 = mm.add_instruction(pass_op{}, ins2);
auto ins4 = mm.add_instruction(pass_op{}, ins2);
auto ins5 = mm.add_instruction(pass_op{}, ins3, ins4);
auto ins6 = mm.add_instruction(pass_op{}, ins2);
auto dom = migraphx::compute_dominator(mm);
EXPECT(dom.strictly_dominate(ins1, ins2));
EXPECT(dom.strictly_dominate(ins2, ins3));
EXPECT(dom.strictly_dominate(ins2, ins4));
EXPECT(dom.strictly_dominate(ins2, ins5));
EXPECT(dom.strictly_dominate(ins2, ins6));
EXPECT(not dom.strictly_dominate(ins3, ins6));
EXPECT(not dom.strictly_dominate(ins4, ins6));
EXPECT(not dom.strictly_dominate(ins3, ins5));
EXPECT(not dom.strictly_dominate(ins4, ins5));
}
int main(int argc, const char* argv[]) { test::run(argc, argv); }
......@@ -79,8 +79,7 @@ struct pass_op
return {};
return inputs.front();
}
int output_alias(const std::vector<migraphx::shape>&) const { return 0; }
int output_alias(const std::vector<migraphx::shape>& s) const { return s.empty() ? -1 : 0; }
};
struct mod_pass_op
......
This diff is collapsed.
......@@ -1549,4 +1549,23 @@ TEST_CASE(prefix_scan_sum)
}
}
TEST_CASE(step_test)
{
migraphx::shape s1{migraphx::shape::float_type, {1, 2, 4}};
{
migraphx::shape s2{migraphx::shape::float_type, {1, 1, 2}, {8, 8, 3}};
expect_shape(s2, migraphx::make_op("step", {{"axes", {1, 2}}, {"steps", {2, 3}}}), s1);
}
{
migraphx::shape s{migraphx::shape::float_type, {1, 2, 4}};
throws_shape(migraphx::make_op("step", {{"axes", {1, 2}}, {"steps", {1}}}), s1);
}
{
migraphx::shape s{migraphx::shape::float_type, {1, 2, 4}};
throws_shape(migraphx::make_op("step", {{"axes", {2, 3}}, {"steps", {2, 3}}}), s1);
}
}
int main(int argc, const char* argv[]) { test::run(argc, argv); }
......@@ -3847,6 +3847,42 @@ TEST_CASE(squeeze_test)
}
}
TEST_CASE(step_test)
{
{
migraphx::program p;
auto* mm = p.get_main_module();
std::vector<float> data(2 * 4 * 6);
std::iota(data.begin(), data.end(), 2);
migraphx::shape s1{migraphx::shape::float_type, {2, 1, 4, 6}};
auto l0 = mm->add_literal(migraphx::literal{s1, data});
auto r = mm->add_instruction(
migraphx::make_op("step", {{"axes", {0, 2, 3}}, {"steps", {2, 2, 3}}}), l0);
mm->add_return({r});
p.compile(migraphx::ref::target{});
auto result = p.eval({}).back();
migraphx::shape s2{migraphx::shape::float_type, {1, 1, 2, 2}};
EXPECT(result.get_shape() == s2);
}
{
migraphx::program p;
auto* mm = p.get_main_module();
std::vector<float> data(2 * 4 * 6);
std::iota(data.begin(), data.end(), 2);
migraphx::shape s1{migraphx::shape::float_type, {2, 1, 4, 6}};
auto l0 = mm->add_literal(migraphx::literal{s1, data});
auto tl = mm->add_instruction(migraphx::make_op("transpose", {{"dims", {0, 2, 3, 1}}}), l0);
auto r = mm->add_instruction(
migraphx::make_op("step", {{"axes", {0, 1, 2}}, {"steps", {2, 2, 3}}}), tl);
mm->add_return({r});
p.compile(migraphx::ref::target{});
auto result = p.eval({}).back();
migraphx::shape s2{migraphx::shape::float_type, {1, 2, 2, 1}};
EXPECT(result.get_shape() == s2);
}
}
TEST_CASE(sub_test)
{
migraphx::program p;
......
......@@ -774,6 +774,31 @@ TEST_CASE(inception_resnet)
t.check_conflicts(m, {c1, {i1}});
}
TEST_CASE(dominate_conflicts)
{
scheduler t{};
migraphx::module m;
auto one = m.add_literal(1);
auto onep1 = m.add_instruction(unary_op{}, one);
auto onep2 = m.add_instruction(unary_op{}, one);
auto binary1 = m.add_instruction(nary_op{}, onep1, onep2);
auto onep3 = m.add_instruction(unary_op{}, binary1);
auto onep4 = m.add_instruction(unary_op{}, binary1);
auto binary2 = m.add_instruction(nary_op{}, onep3, onep4);
t.run_pass(m);
EXPECT(t.get_stream(onep1) != t.get_stream(onep2));
EXPECT(t.get_stream(onep3) != t.get_stream(onep4));
EXPECT(get_wait_for(binary1) ==
get_wait_for(t.get_stream(binary1), {t.get_stream(onep1), t.get_stream(onep2)}));
t.check_conflicts(m, {{onep1}, {onep2}});
t.check_conflicts(m, {{onep3}, {onep4}});
t.check_conflicts(m, {{onep1, onep2}, {onep3, onep4}}, false);
t.check_conflicts(m, {{binary1}, {binary2}}, false);
}
TEST_CASE(inception1)
{
scheduler t{};
......
......@@ -2132,4 +2132,32 @@ TEST_CASE(reorder_slice_trans_diff_perm)
test(4);
}
TEST_CASE(reorder_slice_ins_deps)
{
auto create_module = [] {
migraphx::module m;
migraphx::shape sx{migraphx::shape::float_type, {4, 2}};
migraphx::shape sy{migraphx::shape::float_type, {2, 2}};
std::vector<float> datax = {0, 1, 2, 3, 4, 5, 6, 7};
std::vector<float> datay = {0, 1, 2, 3};
auto inx = m.add_literal(migraphx::literal(sx, datax));
auto iny = m.add_literal(migraphx::literal(sy, datay));
auto slc0 = m.add_instruction(
migraphx::make_op("slice", {{"axes", {0}}, {"starts", {0}}, {"ends", {2}}}), inx);
auto slc1 = m.add_instruction(
migraphx::make_op("slice", {{"axes", {0}}, {"starts", {2}}, {"ends", {4}}}), inx);
auto n0 = m.add_instruction(migraphx::make_op("neg"), slc0);
auto a0 = m.add_instruction(migraphx::make_op("add"), n0, slc1);
auto m0 = m.add_instruction(migraphx::make_op("mul"), a0, iny);
auto r = m.add_instruction(migraphx::make_op("add"), m0, slc0);
m.add_return({r});
return m;
};
auto m = create_module();
run_pass(m);
EXPECT(m == create_module());
}
int main(int argc, const char* argv[]) { test::run(argc, argv); }
#include "verify_program.hpp"
#include <migraphx/program.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/make_op.hpp>
struct test_step : verify_program<test_step>
{
migraphx::program create_program() const
{
migraphx::program p;
auto* mm = p.get_main_module();
migraphx::shape s1{migraphx::shape::float_type, {2, 1, 4, 6}};
auto l0 = mm->add_parameter("x", s1);
auto r = mm->add_instruction(
migraphx::make_op("step", {{"axes", {0, 2, 3}}, {"steps", {2, 2, 3}}}), l0);
mm->add_return({r});
return p;
}
};
#include "verify_program.hpp"
#include <migraphx/program.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/make_op.hpp>
struct test_step_broadcast_transpose : verify_program<test_step_broadcast_transpose>
{
migraphx::program create_program() const
{
migraphx::program p;
auto* mm = p.get_main_module();
migraphx::shape s1{migraphx::shape::float_type, {1, 1, 1, 6}};
auto l0 = mm->add_parameter("x", s1);
auto ml = mm->add_instruction(
migraphx::make_op("multibroadcast", {{"output_lens", {2, 1, 4, 6}}}), l0);
auto tl = mm->add_instruction(migraphx::make_op("transpose", {{"dims", {0, 2, 3, 1}}}), ml);
auto r = mm->add_instruction(
migraphx::make_op("step", {{"axes", {0, 1, 2}}, {"steps", {2, 2, 3}}}), tl);
mm->add_return({r});
return p;
}
};
#include "verify_program.hpp"
#include <migraphx/program.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/make_op.hpp>
struct test_step_transpose : verify_program<test_step_transpose>
{
migraphx::program create_program() const
{
migraphx::program p;
auto* mm = p.get_main_module();
migraphx::shape s1{migraphx::shape::float_type, {2, 1, 4, 6}};
auto l0 = mm->add_parameter("x", s1);
auto tl = mm->add_instruction(migraphx::make_op("transpose", {{"dims", {0, 2, 3, 1}}}), l0);
auto r = mm->add_instruction(
migraphx::make_op("step", {{"axes", {0, 1, 2}}, {"steps", {2, 2, 3}}}), tl);
mm->add_return({r});
return p;
}
};
......@@ -12,23 +12,15 @@ PREFIX=/usr/local
REQ_FILE_DIR=""
if [ "$#" -ge 2 ]; then
PREFIX=$1
REQ_FILE_DIR=$2
cd $2
elif [ "$#" -eq 1 ]; then
PREFIX=$1
fi
echo "Dependencies are install at $PREFIX"
# Manually ignore rocm dependencies
cget -p $PREFIX ignore \
RadeonOpenCompute/clang-ocl \
ROCm-Developer-Tools/HIP \
ROCmSoftwarePlatform/MIOpen \
ROCmSoftwarePlatform/MIOpenGEMM \
ROCmSoftwarePlatform/rocBLAS
cget -p $PREFIX init --cxx /opt/rocm/llvm/bin/clang++ --cc /opt/rocm/llvm/bin/clang
cget -p $PREFIX install -f ${REQ_FILE_DIR}dev-requirements.txt
cget -p $PREFIX install oneapi-src/oneDNN@v1.7
# Install deps with rbuild
rbuild prepare -d $PREFIX -s develop
# install onnx package for unit tests
pip3 install onnx==1.8.1 numpy==1.18.5 typing==3.7.4 pytest==6.0.1 packaging==16.8
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment