Unverified Commit 1d98fbb4 authored by Paul Fultz II's avatar Paul Fultz II Committed by GitHub
Browse files

Add parallel stream analysis (#629)

* Add intial multi stream analysis

* Formatting

* Add more tests

* Formatting

* Remove comment

* Analyze streams on the gpu

* Formatting

* Fix nstream

* Formatting

* Add test for return

* Formatting

* Make sure return has a stream assignment

* Formatting

* Fix asserts and checks

* Improve error message for out-of-order sequence

* Formatting
parent a5065265
......@@ -4,6 +4,7 @@ include(ROCMPackageConfigHelpers)
include(RegisterOp)
add_library(migraphx
analyze_streams.cpp
auto_contiguous.cpp
eliminate_common_subexpression.cpp
decompose.cpp
......
#include <migraphx/analyze_streams.hpp>
#include <migraphx/program.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/errors.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
bool happens_before(const std::vector<std::size_t>& e1, const std::vector<std::size_t>& e2)
{
return std::equal(e1.begin(), e1.end(), e2.begin(), e2.end(), std::less_equal<>{}) and
not std::equal(e1.begin(), e1.end(), e2.begin(), e2.end(), std::greater_equal<>{});
}
std::vector<stream_race> analyze_streams(const program& p, const stream_model& m)
{
using vector_clock = std::vector<std::size_t>;
std::vector<stream_race> races;
auto nstream = m.get_nstream();
std::vector<vector_clock> vclock(nstream, vector_clock(nstream));
std::unordered_map<instruction_ref, vector_clock> timestamp;
std::unordered_map<std::size_t, vector_clock> events;
for(auto ins : iterator_for(p))
{
if(not m.has_stream(ins))
continue;
std::size_t s = m.get_stream(ins);
assert(s < nstream);
assert(vclock.size() == nstream);
assert(vclock[s].size() == nstream);
if(m.is_record(ins))
{
vclock[s][s]++;
auto event = m.get_event_id(ins);
events[event] = vclock[s];
}
else if(m.is_wait(ins))
{
auto event = m.get_event_id(ins);
if(not contains(events, event))
MIGRAPHX_THROW("Event is waited on before being recorded: " +
std::to_string(event));
auto payload = events.at(event);
assert(vclock[s].size() == payload.size());
std::transform(vclock[s].begin(),
vclock[s].end(),
payload.begin(),
vclock[s].begin(),
[&](auto x, auto y) { return std::max(x, y); });
vclock[s][s]++;
}
else
{
vclock[s][s]++;
}
timestamp[ins] = vclock[s];
}
for(auto ins : iterator_for(p))
{
if(not m.has_stream(ins))
continue;
if(ins->inputs().empty())
continue;
std::size_t s = m.get_stream(ins);
// Find inputs from different streams
std::vector<instruction_ref> inputs;
fix([&](auto self, auto start) {
for(auto input : start->inputs())
{
if(not m.has_stream(input))
self(input);
else if(m.get_stream(input) != s)
inputs.push_back(input);
}
})(ins);
auto it = std::find_if(inputs.begin(), inputs.end(), [&](auto input) {
return not happens_before(timestamp.at(input), timestamp.at(ins));
});
if(it != inputs.end())
{
races.push_back({ins, *it});
}
}
return races;
}
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#ifndef MIGRAPHX_GUARD_RTGLIB_ANALYZE_STREAMS_HPP
#define MIGRAPHX_GUARD_RTGLIB_ANALYZE_STREAMS_HPP
#include <migraphx/config.hpp>
#include <migraphx/stream_model.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
struct program;
struct stream_race
{
instruction_ref ins;
// The instruction that should before
instruction_ref before;
};
std::vector<stream_race> analyze_streams(const program& p, const stream_model& m);
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_STREAM_MODEL_HPP
#define MIGRAPHX_GUARD_STREAM_MODEL_HPP
#include <cassert>
#include <string>
#include <functional>
#include <memory>
#include <type_traits>
#include <utility>
#include <migraphx/config.hpp>
#include <migraphx/instruction_ref.hpp>
#include <vector>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
#ifdef DOXYGEN
/// An interface for target-dependent model for the scheduler
struct stream_model
{
/// Get the number of streams used in the program
std::size_t get_nstream() const;
/// Get stream for instruction
std::size_t get_stream(instruction_ref ins) const;
/// Get unique event id for instruction
std::size_t get_event_id(instruction_ref ins) const;
/// Returns true if instruction has a stream assignment
bool has_stream(instruction_ref ins) const;
/// Returns true if the instruction records the event
bool is_record(instruction_ref ins) const;
/// Returns true if the instruction wait on the event
bool is_wait(instruction_ref ins) const;
};
#else
/*
* Type-erased interface for:
*
* struct stream_model
* {
* std::size_t get_nstream() const;
* std::size_t get_stream(instruction_ref ins) const;
* std::size_t get_event_id(instruction_ref ins) const;
* bool has_stream(instruction_ref ins) const;
* bool is_record(instruction_ref ins) const;
* bool is_wait(instruction_ref ins) const;
* };
*
*/
struct stream_model
{
// Constructors
stream_model() = default;
template <typename PrivateDetailTypeErasedT>
stream_model(PrivateDetailTypeErasedT value)
: private_detail_te_handle_mem_var(
std::make_shared<private_detail_te_handle_type<
typename std::remove_reference<PrivateDetailTypeErasedT>::type>>(
std::forward<PrivateDetailTypeErasedT>(value)))
{
}
// Assignment
template <typename PrivateDetailTypeErasedT>
stream_model& operator=(PrivateDetailTypeErasedT value)
{
using std::swap;
auto* derived = this->any_cast<PrivateDetailTypeErasedT>();
if(derived and private_detail_te_handle_mem_var.unique())
{
*derived = std::forward<PrivateDetailTypeErasedT>(value);
}
else
{
stream_model rhs(value);
swap(private_detail_te_handle_mem_var, rhs.private_detail_te_handle_mem_var);
}
return *this;
}
// Cast
template <typename PrivateDetailTypeErasedT>
PrivateDetailTypeErasedT* any_cast()
{
return this->type_id() == typeid(PrivateDetailTypeErasedT)
? std::addressof(static_cast<private_detail_te_handle_type<
typename std::remove_cv<PrivateDetailTypeErasedT>::type>&>(
private_detail_te_get_handle())
.private_detail_te_value)
: nullptr;
}
template <typename PrivateDetailTypeErasedT>
const typename std::remove_cv<PrivateDetailTypeErasedT>::type* any_cast() const
{
return this->type_id() == typeid(PrivateDetailTypeErasedT)
? std::addressof(static_cast<const private_detail_te_handle_type<
typename std::remove_cv<PrivateDetailTypeErasedT>::type>&>(
private_detail_te_get_handle())
.private_detail_te_value)
: nullptr;
}
const std::type_info& type_id() const
{
if(private_detail_te_handle_empty())
return typeid(std::nullptr_t);
else
return private_detail_te_get_handle().type();
}
std::size_t get_nstream() const
{
assert((*this).private_detail_te_handle_mem_var);
return (*this).private_detail_te_get_handle().get_nstream();
}
std::size_t get_stream(instruction_ref ins) const
{
assert((*this).private_detail_te_handle_mem_var);
return (*this).private_detail_te_get_handle().get_stream(ins);
}
std::size_t get_event_id(instruction_ref ins) const
{
assert((*this).private_detail_te_handle_mem_var);
return (*this).private_detail_te_get_handle().get_event_id(ins);
}
bool has_stream(instruction_ref ins) const
{
assert((*this).private_detail_te_handle_mem_var);
return (*this).private_detail_te_get_handle().has_stream(ins);
}
bool is_record(instruction_ref ins) const
{
assert((*this).private_detail_te_handle_mem_var);
return (*this).private_detail_te_get_handle().is_record(ins);
}
bool is_wait(instruction_ref ins) const
{
assert((*this).private_detail_te_handle_mem_var);
return (*this).private_detail_te_get_handle().is_wait(ins);
}
friend bool is_shared(const stream_model& private_detail_x,
const stream_model& private_detail_y)
{
return private_detail_x.private_detail_te_handle_mem_var ==
private_detail_y.private_detail_te_handle_mem_var;
}
private:
struct private_detail_te_handle_base_type
{
virtual ~private_detail_te_handle_base_type() {}
virtual std::shared_ptr<private_detail_te_handle_base_type> clone() const = 0;
virtual const std::type_info& type() const = 0;
virtual std::size_t get_nstream() const = 0;
virtual std::size_t get_stream(instruction_ref ins) const = 0;
virtual std::size_t get_event_id(instruction_ref ins) const = 0;
virtual bool has_stream(instruction_ref ins) const = 0;
virtual bool is_record(instruction_ref ins) const = 0;
virtual bool is_wait(instruction_ref ins) const = 0;
};
template <typename PrivateDetailTypeErasedT>
struct private_detail_te_handle_type : private_detail_te_handle_base_type
{
template <typename PrivateDetailTypeErasedU = PrivateDetailTypeErasedT>
private_detail_te_handle_type(
PrivateDetailTypeErasedT value,
typename std::enable_if<std::is_reference<PrivateDetailTypeErasedU>::value>::type* =
nullptr)
: private_detail_te_value(value)
{
}
template <typename PrivateDetailTypeErasedU = PrivateDetailTypeErasedT>
private_detail_te_handle_type(
PrivateDetailTypeErasedT value,
typename std::enable_if<!std::is_reference<PrivateDetailTypeErasedU>::value,
int>::type* = nullptr) noexcept
: private_detail_te_value(std::move(value))
{
}
std::shared_ptr<private_detail_te_handle_base_type> clone() const override
{
return std::make_shared<private_detail_te_handle_type>(private_detail_te_value);
}
const std::type_info& type() const override { return typeid(private_detail_te_value); }
std::size_t get_nstream() const override { return private_detail_te_value.get_nstream(); }
std::size_t get_stream(instruction_ref ins) const override
{
return private_detail_te_value.get_stream(ins);
}
std::size_t get_event_id(instruction_ref ins) const override
{
return private_detail_te_value.get_event_id(ins);
}
bool has_stream(instruction_ref ins) const override
{
return private_detail_te_value.has_stream(ins);
}
bool is_record(instruction_ref ins) const override
{
return private_detail_te_value.is_record(ins);
}
bool is_wait(instruction_ref ins) const override
{
return private_detail_te_value.is_wait(ins);
}
PrivateDetailTypeErasedT private_detail_te_value;
};
template <typename PrivateDetailTypeErasedT>
struct private_detail_te_handle_type<std::reference_wrapper<PrivateDetailTypeErasedT>>
: private_detail_te_handle_type<PrivateDetailTypeErasedT&>
{
private_detail_te_handle_type(std::reference_wrapper<PrivateDetailTypeErasedT> ref)
: private_detail_te_handle_type<PrivateDetailTypeErasedT&>(ref.get())
{
}
};
bool private_detail_te_handle_empty() const
{
return private_detail_te_handle_mem_var == nullptr;
}
const private_detail_te_handle_base_type& private_detail_te_get_handle() const
{
assert(private_detail_te_handle_mem_var != nullptr);
return *private_detail_te_handle_mem_var;
}
private_detail_te_handle_base_type& private_detail_te_get_handle()
{
assert(private_detail_te_handle_mem_var != nullptr);
if(!private_detail_te_handle_mem_var.unique())
private_detail_te_handle_mem_var = private_detail_te_handle_mem_var->clone();
return *private_detail_te_handle_mem_var;
}
std::shared_ptr<private_detail_te_handle_base_type> private_detail_te_handle_mem_var;
};
template <typename ValueType>
inline const ValueType* any_cast(const stream_model* x)
{
return x->any_cast<ValueType>();
}
template <typename ValueType>
inline ValueType* any_cast(stream_model* x)
{
return x->any_cast<ValueType>();
}
template <typename ValueType>
inline ValueType& any_cast(stream_model& x)
{
auto* y = x.any_cast<typename std::remove_reference<ValueType>::type>();
if(y == nullptr)
throw std::bad_cast();
return *y;
}
template <typename ValueType>
inline const ValueType& any_cast(const stream_model& x)
{
const auto* y = x.any_cast<typename std::remove_reference<ValueType>::type>();
if(y == nullptr)
throw std::bad_cast();
return *y;
}
#endif
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
......@@ -46,6 +46,9 @@ struct stream_info
auto&& op = ins->get_operator();
if(not is_context_free(op) and op.name()[0] != '@')
weight = model.weight(op);
// This will ensure a stream will be assigned to return
if(op.name() == "@return")
weight = 1;
iweights[ins] = weight;
weights[ins] =
std::accumulate(ins->inputs().begin(),
......
......@@ -94,6 +94,7 @@ target_include_directories(migraphx_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURR
target_include_directories(migraphx_device PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/device/include>)
add_library(migraphx_gpu
analyze_streams.cpp
argmax.cpp
argmin.cpp
eliminate_workspace.cpp
......
#include <migraphx/gpu/analyze_streams.hpp>
#include <migraphx/program.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/value.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct hip_stream_model
{
std::size_t max_stream = 0;
std::unordered_map<migraphx::instruction_ref, std::size_t> ins2stream{};
std::size_t get_nstream() const { return max_stream + 1; }
std::size_t get_stream(migraphx::instruction_ref ins) const { return ins2stream.at(ins); }
std::size_t get_event_id(migraphx::instruction_ref ins) const
{
auto v = ins->get_operator().to_value();
return v["event"].to<std::size_t>();
}
bool has_stream(migraphx::instruction_ref ins) const { return ins2stream.count(ins) > 0; }
bool is_record(migraphx::instruction_ref ins) const
{
return ins->name() == "gpu::record_event";
}
bool is_wait(migraphx::instruction_ref ins) const { return ins->name() == "gpu::wait_event"; }
};
stream_model make_stream_model(const program& p)
{
hip_stream_model m;
std::size_t stream = 0;
for(auto ins : iterator_for(p))
{
if(ins->name() == "gpu::set_stream")
{
auto v = ins->get_operator().to_value();
stream = v["stream"].to<std::size_t>();
m.max_stream = std::max(stream, m.max_stream);
}
if(ins->get_operator().is_context_free())
continue;
if(contains({"hip::hip_allocate_memory", "hip::hip_copy_literal", "@param"}, ins->name()))
continue;
m.ins2stream[ins] = stream;
}
return m;
}
std::vector<stream_race> analyze_streams(const program& p)
{
return migraphx::analyze_streams(p, make_stream_model(p));
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#ifndef MIGRAPHX_GUARD_RTGLIB_GPU_ANALYZE_STREAMS_HPP
#define MIGRAPHX_GUARD_RTGLIB_GPU_ANALYZE_STREAMS_HPP
#include <migraphx/config.hpp>
#include <migraphx/analyze_streams.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
std::vector<stream_race> analyze_streams(const program& p);
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#include <migraphx/analyze_streams.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/context.hpp>
#include "test.hpp"
#include "basic_ops.hpp"
struct record_event
{
std::size_t event = 0;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::pack(f(self.event, "event"));
}
std::string name() const { return "record_event"; }
migraphx::shape compute_shape(const std::vector<migraphx::shape>&) const { return {}; }
migraphx::argument compute(const migraphx::shape&, const std::vector<migraphx::argument>&) const
{
return {};
}
};
struct wait_event
{
std::size_t event = 0;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::pack(f(self.event, "event"));
}
std::string name() const { return "wait_event"; }
migraphx::shape compute_shape(const std::vector<migraphx::shape>&) const { return {}; }
migraphx::argument compute(const migraphx::shape&, const std::vector<migraphx::argument>&) const
{
return {};
}
};
struct set_stream
{
std::size_t stream = 0;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::pack(f(self.stream, "stream"));
}
std::string name() const { return "set_stream"; }
migraphx::shape compute_shape(const std::vector<migraphx::shape>&) const { return {}; }
migraphx::argument compute(const migraphx::shape&, const std::vector<migraphx::argument>&) const
{
return {};
}
};
struct test_stream_model
{
std::size_t max_stream = 0;
std::unordered_map<migraphx::instruction_ref, std::size_t> ins2stream{};
std::size_t get_nstream() const { return max_stream + 1; }
std::size_t get_stream(migraphx::instruction_ref ins) const { return ins2stream.at(ins); }
std::size_t get_event_id(migraphx::instruction_ref ins) const
{
auto v = ins->get_operator().to_value();
return v["event"].to<std::size_t>();
}
bool has_stream(migraphx::instruction_ref ins) const { return ins2stream.count(ins) > 0; }
bool is_record(migraphx::instruction_ref ins) const { return ins->name() == "record_event"; }
bool is_wait(migraphx::instruction_ref ins) const { return ins->name() == "wait_event"; }
};
struct program_model
{
migraphx::program p;
std::unordered_map<migraphx::instruction_ref, std::size_t> ins2stream{};
std::size_t max_stream = 0;
template <class... Ts>
migraphx::instruction_ref add_literal(Ts... xs)
{
return p.add_literal(xs...);
}
template <class... Ts>
migraphx::instruction_ref add_instruction(Ts... xs)
{
return p.add_instruction(xs...);
}
template <class... Ts>
migraphx::instruction_ref add_instruction_stream(std::size_t n, Ts... xs)
{
max_stream = std::max(max_stream, n);
auto ins = p.add_instruction(xs...);
ins2stream[ins] = n;
return ins;
}
template <class... Ts>
migraphx::instruction_ref add_return(Ts... xs)
{
return p.add_return({xs...});
}
template <class... Ts>
migraphx::instruction_ref add_return_stream(std::size_t n, Ts... xs)
{
max_stream = std::max(max_stream, n);
auto ins = p.add_return({xs...});
ins2stream[ins] = n;
return ins;
}
test_stream_model get_stream_model() const { return {max_stream, ins2stream}; }
std::vector<migraphx::stream_race> analyze() const
{
return migraphx::analyze_streams(p, get_stream_model());
}
void debug_print() const { p.debug_print(); }
void debug_print(const std::vector<migraphx::stream_race>& races) const
{
for(auto&& race : races)
{
std::cout << "Race:\n";
p.debug_print(race.ins);
p.debug_print(race.before);
}
}
};
TEST_CASE(simple_race1)
{
program_model pm;
auto one = pm.add_literal(1);
auto pass1 = pm.add_instruction_stream(0, pass_op{}, one);
auto pass2 = pm.add_instruction_stream(1, pass_op{}, one);
auto pass3 = pm.add_instruction_stream(0, pass_op{}, pass1, pass2);
auto races = pm.analyze();
EXPECT(races.size() == 1);
EXPECT(bool{races.front().ins == pass3});
EXPECT(bool{races.front().before == pass2});
}
TEST_CASE(simple_race2)
{
program_model pm;
auto one = pm.add_literal(1);
auto pass1 = pm.add_instruction_stream(0, pass_op{}, one);
auto pass2 = pm.add_instruction_stream(1, pass_op{}, one);
auto pass21 = pm.add_instruction_stream(1, pass_op{}, pass2);
auto pass3 = pm.add_instruction_stream(0, pass_op{}, pass1, pass21);
auto races = pm.analyze();
EXPECT(races.size() == 1);
EXPECT(bool{races.front().ins == pass3});
EXPECT(bool{races.front().before == pass21});
}
TEST_CASE(simple_race3)
{
program_model pm;
auto one = pm.add_literal(1);
auto pass1 = pm.add_instruction_stream(0, pass_op{}, one);
auto pass11 = pm.add_instruction_stream(0, pass_op{}, pass1);
auto pass2 = pm.add_instruction_stream(1, pass_op{}, one);
auto pass3 = pm.add_instruction_stream(0, pass_op{}, pass11, pass2);
auto races = pm.analyze();
EXPECT(races.size() == 1);
EXPECT(bool{races.front().ins == pass3});
EXPECT(bool{races.front().before == pass2});
}
TEST_CASE(simple_race4)
{
program_model pm;
auto one = pm.add_literal(1);
auto pass1 = pm.add_instruction_stream(0, pass_op{}, one);
auto pass11 = pm.add_instruction_stream(0, pass_op{}, pass1);
auto pass2 = pm.add_instruction_stream(1, pass_op{}, one);
auto pass21 = pm.add_instruction_stream(1, pass_op{}, pass2);
auto pass3 = pm.add_instruction_stream(0, pass_op{}, pass11, pass21);
auto races = pm.analyze();
EXPECT(races.size() == 1);
EXPECT(bool{races.front().ins == pass3});
EXPECT(bool{races.front().before == pass21});
}
TEST_CASE(simple_race5)
{
program_model pm;
auto one = pm.add_literal(1);
auto pass1 = pm.add_instruction_stream(0, pass_op{}, one);
auto pass2 = pm.add_instruction_stream(1, pass_op{}, one);
auto pass11 = pm.add_instruction_stream(0, pass_op{}, pass1);
auto pass21 = pm.add_instruction_stream(1, pass_op{}, pass2);
auto pass3 = pm.add_instruction_stream(0, pass_op{}, pass11, pass21);
auto races = pm.analyze();
EXPECT(races.size() == 1);
EXPECT(bool{races.front().ins == pass3});
EXPECT(bool{races.front().before == pass21});
}
TEST_CASE(simple_race_record_wait_wrong_stream)
{
program_model pm;
auto one = pm.add_literal(1);
auto pass1 = pm.add_instruction_stream(0, pass_op{}, one);
auto pass2 = pm.add_instruction_stream(1, pass_op{}, one);
pm.add_instruction_stream(0, record_event{1});
pm.add_instruction_stream(1, wait_event{1});
auto pass3 = pm.add_instruction_stream(0, pass_op{}, pass1, pass2);
auto races = pm.analyze();
EXPECT(races.size() == 1);
EXPECT(bool{races.front().ins == pass3});
EXPECT(bool{races.front().before == pass2});
}
TEST_CASE(simple_race_record_wait_same_stream1)
{
program_model pm;
auto one = pm.add_literal(1);
auto pass1 = pm.add_instruction_stream(0, pass_op{}, one);
auto pass2 = pm.add_instruction_stream(1, pass_op{}, one);
pm.add_instruction_stream(1, record_event{1});
pm.add_instruction_stream(1, wait_event{1});
auto pass3 = pm.add_instruction_stream(0, pass_op{}, pass1, pass2);
auto races = pm.analyze();
EXPECT(races.size() == 1);
EXPECT(bool{races.front().ins == pass3});
EXPECT(bool{races.front().before == pass2});
}
TEST_CASE(simple_race_record_wait_same_stream2)
{
program_model pm;
auto one = pm.add_literal(1);
auto pass1 = pm.add_instruction_stream(0, pass_op{}, one);
auto pass2 = pm.add_instruction_stream(1, pass_op{}, one);
pm.add_instruction_stream(0, record_event{1});
pm.add_instruction_stream(0, wait_event{1});
auto pass3 = pm.add_instruction_stream(0, pass_op{}, pass1, pass2);
auto races = pm.analyze();
EXPECT(races.size() == 1);
EXPECT(bool{races.front().ins == pass3});
EXPECT(bool{races.front().before == pass2});
}
TEST_CASE(simple_race_sync)
{
program_model pm;
auto one = pm.add_literal(1);
auto pass1 = pm.add_instruction_stream(0, pass_op{}, one);
auto pass2 = pm.add_instruction_stream(1, pass_op{}, one);
pm.add_instruction_stream(1, record_event{1});
pm.add_instruction_stream(0, wait_event{1});
pm.add_instruction_stream(0, pass_op{}, pass1, pass2);
auto races = pm.analyze();
EXPECT(races.empty());
}
TEST_CASE(race_return)
{
program_model pm;
auto one = pm.add_literal(1);
auto pass1 = pm.add_instruction_stream(0, pass_op{}, one);
auto pass2 = pm.add_instruction_stream(1, pass_op{}, one);
auto r = pm.add_return_stream(0, pass1, pass2);
auto races = pm.analyze();
EXPECT(races.size() == 1);
EXPECT(bool{races.front().ins == r});
EXPECT(bool{races.front().before == pass2});
}
TEST_CASE(race_return_sync)
{
program_model pm;
auto one = pm.add_literal(1);
auto pass1 = pm.add_instruction_stream(0, pass_op{}, one);
auto pass2 = pm.add_instruction_stream(1, pass_op{}, one);
pm.add_instruction_stream(1, record_event{1});
pm.add_instruction_stream(0, wait_event{1});
pm.add_return_stream(0, pass1, pass2);
auto races = pm.analyze();
EXPECT(races.empty());
}
TEST_CASE(race_double_wait1)
{
program_model pm;
auto one = pm.add_literal(1);
auto pass1 = pm.add_instruction_stream(0, pass_op{}, one);
auto pass2 = pm.add_instruction_stream(1, pass_op{}, one);
pm.add_instruction_stream(1, record_event{1});
auto pass3 = pm.add_instruction_stream(2, pass_op{}, one);
pm.add_instruction_stream(2, wait_event{1});
auto pass4 = pm.add_instruction_stream(2, pass_op{}, pass3, pass2);
pm.add_instruction_stream(2, record_event{2});
auto pass5 = pm.add_instruction_stream(0, pass_op{}, pass1, pass2);
pm.add_instruction_stream(0, record_event{3});
pm.add_instruction_stream(1, wait_event{3});
pm.add_instruction_stream(1, wait_event{2});
pm.add_instruction_stream(1, pass_op{}, pass4, pass5);
auto races = pm.analyze();
EXPECT(races.size() == 1);
EXPECT(bool{races.front().ins == pass5});
EXPECT(bool{races.front().before == pass2});
}
TEST_CASE(race_double_wait2)
{
program_model pm;
auto one = pm.add_literal(1);
auto pass1 = pm.add_instruction_stream(0, pass_op{}, one);
auto pass2 = pm.add_instruction_stream(1, pass_op{}, one);
pm.add_instruction_stream(1, record_event{1});
auto pass3 = pm.add_instruction_stream(2, pass_op{}, one);
auto pass4 = pm.add_instruction_stream(2, pass_op{}, pass3, pass2);
pm.add_instruction_stream(2, record_event{2});
pm.add_instruction_stream(0, wait_event{1});
auto pass5 = pm.add_instruction_stream(0, pass_op{}, pass1, pass2);
pm.add_instruction_stream(0, record_event{3});
pm.add_instruction_stream(1, wait_event{3});
pm.add_instruction_stream(1, wait_event{2});
pm.add_instruction_stream(1, pass_op{}, pass4, pass5);
auto races = pm.analyze();
EXPECT(races.size() == 1);
EXPECT(bool{races.front().ins == pass4});
EXPECT(bool{races.front().before == pass2});
}
TEST_CASE(race_double_wait3)
{
program_model pm;
auto one = pm.add_literal(1);
auto pass1 = pm.add_instruction_stream(0, pass_op{}, one);
auto pass2 = pm.add_instruction_stream(1, pass_op{}, one);
pm.add_instruction_stream(1, record_event{1});
auto pass3 = pm.add_instruction_stream(2, pass_op{}, one);
pm.add_instruction_stream(2, wait_event{1});
auto pass4 = pm.add_instruction_stream(2, pass_op{}, pass3, pass2);
pm.add_instruction_stream(2, record_event{2});
pm.add_instruction_stream(0, wait_event{1});
auto pass5 = pm.add_instruction_stream(0, pass_op{}, pass1, pass2);
pm.add_instruction_stream(1, wait_event{2});
auto pass6 = pm.add_instruction_stream(1, pass_op{}, pass4, pass5);
auto races = pm.analyze();
EXPECT(races.size() == 1);
EXPECT(bool{races.front().ins == pass6});
EXPECT(bool{races.front().before == pass5});
}
TEST_CASE(race_double_wait4)
{
program_model pm;
auto one = pm.add_literal(1);
auto pass1 = pm.add_instruction_stream(0, pass_op{}, one);
auto pass2 = pm.add_instruction_stream(1, pass_op{}, one);
pm.add_instruction_stream(1, record_event{1});
auto pass3 = pm.add_instruction_stream(2, pass_op{}, one);
pm.add_instruction_stream(2, wait_event{1});
auto pass4 = pm.add_instruction_stream(2, pass_op{}, pass3, pass2);
pm.add_instruction_stream(2, record_event{2});
pm.add_instruction_stream(0, wait_event{1});
auto pass5 = pm.add_instruction_stream(0, pass_op{}, pass1, pass2);
pm.add_instruction_stream(0, record_event{3});
pm.add_instruction_stream(1, wait_event{3});
auto pass6 = pm.add_instruction_stream(1, pass_op{}, pass4, pass5);
auto races = pm.analyze();
EXPECT(races.size() == 1);
EXPECT(bool{races.front().ins == pass6});
EXPECT(bool{races.front().before == pass4});
}
TEST_CASE(race_double_wait_sync)
{
program_model pm;
auto one = pm.add_literal(1);
auto pass1 = pm.add_instruction_stream(0, pass_op{}, one);
auto pass2 = pm.add_instruction_stream(1, pass_op{}, one);
pm.add_instruction_stream(1, record_event{1});
auto pass3 = pm.add_instruction_stream(2, pass_op{}, one);
pm.add_instruction_stream(2, wait_event{1});
auto pass4 = pm.add_instruction_stream(2, pass_op{}, pass3, pass2);
pm.add_instruction_stream(2, record_event{2});
pm.add_instruction_stream(0, wait_event{1});
auto pass5 = pm.add_instruction_stream(0, pass_op{}, pass1, pass2);
pm.add_instruction_stream(0, record_event{3});
pm.add_instruction_stream(1, wait_event{3});
pm.add_instruction_stream(1, wait_event{2});
pm.add_instruction_stream(1, pass_op{}, pass4, pass5);
auto races = pm.analyze();
EXPECT(races.empty());
}
TEST_CASE(race_multi_wait1)
{
program_model pm;
auto one = pm.add_literal(1);
auto pass1 = pm.add_instruction_stream(0, pass_op{}, one);
pm.add_instruction_stream(0, record_event{5});
auto pass2 = pm.add_instruction_stream(1, pass_op{}, one);
pm.add_instruction_stream(1, record_event{1});
pm.add_instruction_stream(2, wait_event{1});
auto pass3 = pm.add_instruction_stream(2, pass_op{}, one, pass2);
pm.add_instruction_stream(2, record_event{2});
pm.add_instruction_stream(3, wait_event{5});
auto pass4 = pm.add_instruction_stream(3, pass_op{}, one, pass1);
pm.add_instruction_stream(3, record_event{3});
pm.add_instruction_stream(0, wait_event{2});
auto pass5 = pm.add_instruction_stream(0, pass_op{}, pass3, pass1);
pm.add_instruction_stream(0, record_event{4});
pm.add_instruction_stream(1, wait_event{3});
auto pass6 = pm.add_instruction_stream(1, pass_op{}, pass4, pass5);
auto races = pm.analyze();
EXPECT(races.size() == 1);
EXPECT(bool{races.front().ins == pass6});
EXPECT(bool{races.front().before == pass5});
}
TEST_CASE(race_multi_wait2)
{
program_model pm;
auto one = pm.add_literal(1);
auto pass1 = pm.add_instruction_stream(0, pass_op{}, one);
pm.add_instruction_stream(0, record_event{5});
auto pass2 = pm.add_instruction_stream(1, pass_op{}, one);
pm.add_instruction_stream(1, record_event{1});
pm.add_instruction_stream(2, wait_event{1});
auto pass3 = pm.add_instruction_stream(2, pass_op{}, one, pass2);
pm.add_instruction_stream(2, record_event{2});
pm.add_instruction_stream(3, wait_event{5});
auto pass4 = pm.add_instruction_stream(3, pass_op{}, one, pass1);
pm.add_instruction_stream(3, record_event{3});
pm.add_instruction_stream(0, wait_event{2});
auto pass5 = pm.add_instruction_stream(0, pass_op{}, pass3, pass1);
pm.add_instruction_stream(0, record_event{4});
pm.add_instruction_stream(1, wait_event{4});
auto pass6 = pm.add_instruction_stream(1, pass_op{}, pass4, pass5);
auto races = pm.analyze();
EXPECT(races.size() == 1);
EXPECT(bool{races.front().ins == pass6});
EXPECT(bool{races.front().before == pass4});
}
TEST_CASE(race_multi_wait3)
{
program_model pm;
auto one = pm.add_literal(1);
auto pass1 = pm.add_instruction_stream(0, pass_op{}, one);
auto pass2 = pm.add_instruction_stream(1, pass_op{}, one);
pm.add_instruction_stream(1, record_event{1});
pm.add_instruction_stream(2, wait_event{1});
auto pass3 = pm.add_instruction_stream(2, pass_op{}, one, pass2);
pm.add_instruction_stream(2, record_event{2});
auto pass4 = pm.add_instruction_stream(3, pass_op{}, one, pass1);
pm.add_instruction_stream(3, record_event{3});
pm.add_instruction_stream(0, wait_event{2});
auto pass5 = pm.add_instruction_stream(0, pass_op{}, pass3, pass1);
pm.add_instruction_stream(0, record_event{4});
pm.add_instruction_stream(1, wait_event{3});
pm.add_instruction_stream(1, wait_event{4});
pm.add_instruction_stream(1, pass_op{}, pass4, pass5);
auto races = pm.analyze();
EXPECT(races.size() == 1);
EXPECT(bool{races.front().ins == pass4});
EXPECT(bool{races.front().before == pass1});
}
TEST_CASE(race_multi_wait_sync)
{
program_model pm;
auto one = pm.add_literal(1);
auto pass1 = pm.add_instruction_stream(0, pass_op{}, one);
pm.add_instruction_stream(0, record_event{5});
auto pass2 = pm.add_instruction_stream(1, pass_op{}, one);
pm.add_instruction_stream(1, record_event{1});
pm.add_instruction_stream(2, wait_event{1});
auto pass3 = pm.add_instruction_stream(2, pass_op{}, one, pass2);
pm.add_instruction_stream(2, record_event{2});
pm.add_instruction_stream(3, wait_event{5});
auto pass4 = pm.add_instruction_stream(3, pass_op{}, one, pass1);
pm.add_instruction_stream(3, record_event{3});
pm.add_instruction_stream(0, wait_event{2});
auto pass5 = pm.add_instruction_stream(0, pass_op{}, pass3, pass1);
pm.add_instruction_stream(0, record_event{4});
pm.add_instruction_stream(1, wait_event{3});
pm.add_instruction_stream(1, wait_event{4});
pm.add_instruction_stream(1, pass_op{}, pass4, pass5);
auto races = pm.analyze();
EXPECT(races.empty());
}
int main(int argc, const char* argv[]) { test::run(argc, argv); }
......@@ -8,6 +8,7 @@
#include <migraphx/ranges.hpp>
#include <migraphx/cpu/target.hpp>
#include <migraphx/gpu/target.hpp>
#include <migraphx/gpu/analyze_streams.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/manage_ptr.hpp>
......@@ -130,6 +131,19 @@ inline void compile_check(migraphx::program& p, const migraphx::target& t, bool
}
}
inline void check_gpu_streams(const migraphx::program& p)
{
auto races = migraphx::gpu::analyze_streams(p);
for(auto&& race : races)
{
std::cout << "FAILED: " << std::endl;
std::cout << "Race condition detected for: ";
p.debug_print(race.ins);
std::cout << "Should happen after: ";
p.debug_print(race.before);
}
}
template <class V>
std::vector<migraphx::argument> run_cpu(migraphx::program& p)
{
......@@ -152,6 +166,7 @@ std::vector<migraphx::argument> run_gpu(migraphx::program& p)
p = v.create_program();
auto_print pp{p, 1};
compile_check(p, migraphx::gpu::target{}, migraphx::enabled(MIGRAPHX_TRACE_GPU_COMPILE{}));
check_gpu_streams(p);
migraphx::program::parameter_map m;
for(auto&& x : p.get_parameter_shapes())
{
......
#ifndef MIGRAPHX_GUARD_STREAM_MODEL_HPP
#define MIGRAPHX_GUARD_STREAM_MODEL_HPP
#include <cassert>
#include <string>
#include <functional>
#include <memory>
#include <type_traits>
#include <utility>
#include <migraphx/config.hpp>
#include <migraphx/instruction_ref.hpp>
#include <vector>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
#ifdef DOXYGEN
/// An interface for target-dependent model for the scheduler
struct stream_model
{
/// Get the number of streams used in the program
std::size_t get_nstream() const;
/// Get stream for instruction
std::size_t get_stream(instruction_ref ins) const;
/// Get unique event id for instruction
std::size_t get_event_id(instruction_ref ins) const;
/// Returns true if instruction has a stream assignment
bool has_stream(instruction_ref ins) const;
/// Returns true if the instruction records the event
bool is_record(instruction_ref ins) const;
/// Returns true if the instruction wait on the event
bool is_wait(instruction_ref ins) const;
};
#else
<%
interface('stream_model',
virtual('get_nstream', returns='std::size_t', const=True),
virtual('get_stream', ins='instruction_ref', returns='std::size_t', const=True),
virtual('get_event_id', ins='instruction_ref', returns='std::size_t', const=True),
virtual('has_stream', ins='instruction_ref', returns='bool', const=True),
virtual('is_record', ins='instruction_ref', returns='bool', const=True),
virtual('is_wait', ins='instruction_ref', returns='bool', const=True)
)
%>
#endif
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment